/* Copyright 2001 Matt Flax <flatmax@ieee.org>
   This file is part of MFFM Time Scale Modification for Audio.

   MFFM Time Scale Modification for Audio is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   MFFM Time Scale Modification for Audio is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You have received a copy of the GNU General Public License
   along with MFFM Time Scale Modification for Audio
 */
#ifndef WSOLA_H_
#define WSOLA_H_

//#define NEWFILEOUT
#ifdef NEWFILEOUT //debug
#include <fstream>
#endif

//#define FILEOUT
#ifdef FILEOUT
#include <fstream>
#endif

//This function is for scientific rounding
#ifndef roundD
//#define roundD(a) ((a-(int)a>=0.5) ? ceil(a) : floor(a))
#define roundD(a) rint(a)
#endif

#ifndef MAXDOUBLE
#define MAXDOUBLE 1.79769e+308
#endif

#define c_re(c) ((c)[0])
#define c_im(c) ((c)[1])

#include <string.h>
#include <iostream>
using namespace std;
//#include <values.h>
#include "hanning.H"

//From the MFFM FFTw C++ wrapper available at sourceforge.net
#include <mffm/complexFFT.H>
#include <complex>

//From the MFFM_TimeCode library available at sourceforge.net
#include <mffm/timeCode.H>

///Use this to debug general
//#define W_DEBUG

#ifndef ATYPE
#define ATYPE short int
#endif
#ifndef SAMPLE_FREQUENCY
#define SAMPLE_FREQUENCY 44100
#endif

///The hanning window duration in s
#define HANNING_DURATION 0.085
//The proportional hanning overlap
#define HANNING_OVERLAP 0.5
///The sample length of the hanning window
#define HANNING_LENGTH(sampFreq) (int)roundD(HANNING_DURATION*(double)sampFreq)

//The pitch (Hz) which we don't want to exceed the period of durin matching
#define DELTA_DIVISOR 6.0

/* reference :
   An Overlap Add Technique Based on Waveform Similarity (WSOLA)
   For High Quality Time-Scale Modification of Speech
   Werner Verhelst and Marc Roelands
   IEEE publication
*/

///The time code structures used within
#define TIMECODETYPE_W TimeCode<MasterCounter<field, 1>, ATYPE>

//As we are using Hanning windows, extremes of the windows aren't important
#define NO_SEARCH_PERCENTAGE 0.1 //10.0/100.0 //Don't search this much of a frame

class WSOLA : public Hanning {
  TIMECODETYPE_W sFrame, dFrame, nextSFrame;//src and dest and next src frames
  ATYPE *output; // The timescale modified audio
  int count;

  int N; //The total size of the fft data structures
  ///The fft structure used to implement the convolution
  complexFFT *fft;
  ///The fft data structures
  complexFFTData *sFrameFFTData, *nextSFrameFFTData, *hanningFFTData;

  int hanningLength, deltaMax, lastNextSFrame, lastDFrame, sampleFrequency;

#ifdef NEWFILEOUT //debug
  ofstream outputBestLocs;
#endif


  void processFrame(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : enter"<<endl;
#endif
    int loc=sFrame.getCount();
    //Load the src frame window we are matching to
    ATYPE* tempSFW=sFrame.window->getDataPtr();
    for (int i=0;i<sFrame.window->getCount();i++)
      tempSFW[i]=(int)roundD((double)input[loc+i]*wnd[i]);

    //Search through for the closest match
    int cnt=hanningLength, initialDFrameLoc=dFrame.getCount(), initialNextSFrameLoc=nextSFrame.getCount();
    int delta=0;
    double difference, minDifference=MAXDOUBLE;
    int bestLocation=0;
    ATYPE *tempNSFW;
    if (initialDFrameLoc!=0) //We aren't at the beginning so process as normal
      while (delta<deltaMax){
	tempNSFW=nextSFrame.window->getDataPtr();
	loc=nextSFrame.getCount();
	for (int i=0;i<nextSFrame.window->getCount();i++)
	  tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
	if ((difference=findSimilarity(sFrame.window->getDataPtr(),nextSFrame.window->getDataPtr(), cnt))<minDifference){
	  minDifference=difference;
	  bestLocation=delta;
	  //cout<<"minDifference "<<minDifference<<" bestLocation "<<bestLocation<<endl;
	}
	nextSFrame=nextSFrame.getCount()+1;
	delta++;
      }
    //    cout<<bestLocation<<endl;
    //Copy the closest match over to the output audio array
    //cout<<"bestLocation: "<<bestLocation<<endl;
    nextSFrame=initialNextSFrameLoc+bestLocation;
    tempNSFW=nextSFrame.window->getDataPtr();
    loc=nextSFrame.getCount();
    int i;
    for (i=0;i<nextSFrame.window->getCount();i++)
      tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
    int maxBoundary;
    //    cout<<"difference= "<<(nextSFrame.getEnd()-nextSFrame.getCount())<<endl;
    if (cnt>(dFrame.getEnd()-dFrame.getCount()))
      maxBoundary=(dFrame.getEnd()-dFrame.getCount());
    else
      maxBoundary=cnt;
    //cout<<"i "<<i<<" maxBoundary "<<maxBoundary<<" initialDFrameLoc "<< initialDFrameLoc<<endl;
    cout<<initialDFrameLoc<<'\t'<<maxBoundary<<endl;
    tempNSFW=nextSFrame.window->getDataPtr();
    for (i=0;i<maxBoundary;i++)
      output[i+initialDFrameLoc]+=tempNSFW[i];
    //cout<<"i "<<i<<endl;
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : exit"<<endl;
#endif
  }

  void processFrameFFT(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrameFFT : enter "<<endl;
#endif
    int loc=sFrame.getCount();
    //Load the src frame window we are matching to
    ATYPE* tempSFW=sFrame.window->getDataPtr();
    for (int i=0;i<sFrame.window->getCount();i++)
      tempSFW[i]=(int)roundD((double)input[loc+i]*wnd[i]);

    //Search through for the closest match
    int bestLocation=0;
    if (dFrame.getCount()!=0){ //We aren't at the beginning so process as normal
      loc=nextSFrame.getCount();
      //load the frame data
      for (int i=0;i<hanningLength;i++){
	//nextSFrameFFTData->in[i].re=input[loc+i];
	//nextSFrameFFTData->in[i].im=sFrameFFTData->in[i].im=0.0;
	//sFrameFFTData->in[i].re=tempSFW[i];
	c_re(nextSFrameFFTData->in[i])=input[loc+i];
	c_im(nextSFrameFFTData->in[i])=c_im(sFrameFFTData->in[i])=0.0;
	c_re(sFrameFFTData->in[i])=tempSFW[i];
      }
      for (int i=hanningLength;i<hanningLength+deltaMax;i++){
	//nextSFrameFFTData->in[i].re=input[loc+i];
	//nextSFrameFFTData->in[i].im=sFrameFFTData->in[i].re=sFrameFFTData->in[i].im=0.0;
	c_re(nextSFrameFFTData->in[i])=input[loc+i];
	c_im(nextSFrameFFTData->in[i])=c_re(sFrameFFTData->in[i])=c_im(sFrameFFTData->in[i])=0.0;
      }
      bestLocation=findSimilarityFFT();
    }

#ifdef FILEOUT
    cout<<"\nbest loc "<<bestLocation<<'\t';
    if (dFrame.getCount()!=0)    {//old similarity section for comparison
      int delta=0;
      ATYPE *tempNSFW;
      double difference, minDifference=MAXDOUBLE;

      char tempBuf[128];
      string tempName("bestlocs."); 
      sprintf(tempBuf, "%d\0", nextSFrame.getCount()); tempName+=tempBuf;
      tempName+=".txt";
      ofstream outp(tempName.c_str());

      while (delta<deltaMax){
	//tempNSFW=nextSFrame.window->getDataPtr();
	loc=nextSFrame.getCount();
	for (int i=0;i<nextSFrame.window->getCount();i++)
	  tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
	if ((difference=findSimilarity(sFrame.window->getDataPtr(),nextSFrame.window->getDataPtr(), cnt))<minDifference){
	  minDifference=difference;
	  bestLocation=delta;
	  //cout<<"minDifference "<<minDifference<<" bestLocation "<<bestLocation<<endl;
	}
	outp<<difference<<'\n';
	nextSFrame=nextSFrame.getCount()+1;
	delta++;
      }
      outp.close();
      cout<<bestLocation<<endl;
    }
#endif
 
    ATYPE *tempNSFW;
    //Copy the closest match over to the output audio array
    tempNSFW=(nextSFrame=nextSFrame.getCount()+bestLocation).window->getDataPtr();
    loc=nextSFrame.getCount();
    for (int i=0;i<nextSFrame.window->getCount();i++)
      tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
    int maxBoundary=hanningLength;//Take a full window
    int tempI=dFrame.getCount();
    if (hanningLength>(dFrame.getEnd()-tempI)) //The upper limit check
      maxBoundary=(dFrame.getEnd()-dFrame.getCount());
    //    cout<<"difference= "<<(nextSFrame.getEnd()-nextSFrame.getCount())<<endl;
    //    cout<<tempI<<'\t'<<maxBoundary<<endl;
    for (int i=0;i<maxBoundary;i++)
      output[i+tempI]+=tempNSFW[i];
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : exit"<<endl;
#endif
  }

  void processLastFrame(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processLastFrame : enter"<<endl;
#endif
    //Find ammount left to use ...
    //int sLeft=sFrame.getEnd()-sFrame.getCount();
    int nSLeft=nextSFrame.getEnd()-nextSFrame.getCount();
    int dLeft=dFrame.getEnd()-dFrame.getCount();
    int lastPoint;
    if (dLeft!=0){
      //cout<<" sLeft "<<sLeft<<" nSLeft "<<nSLeft<<" dLeft "<<dLeft<<endl;
      
      //Reload the second half without hanning
      int halfCnt=(int)roundD((double)hanningLength/2.0);
      //    cout<<"hanningLength "<<hanningLength<<" halfCnt "<<halfCnt<<endl;
      for (int i=0;i<halfCnt;i++)
	output[lastDFrame+halfCnt+i]=input[lastNextSFrame+halfCnt+i];
      lastPoint=lastDFrame+halfCnt+halfCnt-1;
      //cout<<dFrame<<endl;
      //lastPoint=lastDFrame+halfCnt;
      //cout<<"lastDFrame "<<lastDFrame<<" lastPoint "<<lastPoint<<endl;

      //int newDLeft=dFrame.getEnd()-lastPoint;
      int newDLeft=dFrame.getEnd()-lastDFrame-halfCnt;
      //cout<<"newDLeft "<<newDLeft<<endl;

      if (nSLeft==dLeft){//Neither compressing nor stretching time
	for (int i=0;i<dFrame.getEnd()-lastPoint;i++)
	  output[lastPoint+i]=input[lastPoint+i];
      } else if (nSLeft>dLeft){//Compressing time
	//cout<<"here "<<(int)roundD((double)newDLeft/2.0)<<endl;
	int tempCnt=(int)roundD((double)newDLeft/2.0);
	//Hanning tempHanning(tempCnt);
	
	reset(tempCnt);
	//halfCnt=(int)roundD((double)tempHanning.getCount()/2.0);

	//Apply the hanning to the output
	int startLoc=lastPoint-halfCnt;
	nextSFrame=nextSFrame.getEnd()-newDLeft-halfCnt;
	int loc1=nextSFrame.getCount();
	for (int i=startLoc;i<=lastPoint;i++, loc1++)
	  output[i]=(ATYPE)roundD((double)output[i]*wnd[lastPoint-i]+
				  wnd[i-startLoc]*(double)input[loc1]);
	//output[i]=(ATYPE)roundD((double)output[i]*tempHanning[lastPoint-i]+
	//tempHanning[i-startLoc]*(double)input[loc1]);
	nextSFrame=nextSFrame.getEnd()-newDLeft;
	dFrame=dFrame.getEnd()-newDLeft;
	for (int i=0;i<newDLeft;i++,nextSFrame+=1,dFrame+=1)
	  output[dFrame.getCount()]=input[nextSFrame.getCount()];
	roundOff();
      }else if (nSLeft<dLeft){//Expanding time
	//We can simply copy the rest in ...
	nextSFrame=lastNextSFrame+hanningLength;
	newDLeft=nextSFrame.getEnd()-nextSFrame.getCount();
	int loc1=nextSFrame.getCount();
	dFrame=lastDFrame+hanningLength-1;
	int loc2=dFrame.getCount();
	for (int i=0;i<newDLeft;i++, loc2++, loc1++)
	  output[loc2]=input[loc1];
      }
    }
#ifdef W_DEBUG
    cout<<"WSOLA::processLastFrame : exit"<<endl;
#endif
  }

  // This function half hannings 1 ms at the end of the signal to ensure no clicking
  void roundOff(void){
    int wndSize=(int)roundD((double)sampleFrequency*0.01);
    if (wndSize%2)
      wndSize++;
    //Hanning smoothEnd(wndSize);
    reset(wndSize);
    int halfCnt=Hanning::getCount()/2;
    int loc=dFrame.getEnd()-halfCnt+1;
    int index=halfCnt;
    for (int i=loc;i<loc+halfCnt;i++)
      output[i]=(int)roundD((double)output[i]*wnd[index++]);
  }

  ///Inits the fft structures
  void initFFTStructures(int count){
    if (sFrameFFTData)
      if (sFrameFFTData->getSize()!=count)
	deInitFFTStructures();
      else
	return;
	
    //Set up the fft structures
    if (!(sFrameFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc sFrameFFTData of size "<<count<<endl;
    if (!(nextSFrameFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc nextSFrameFFTData of size "<<count<<endl;
    if (!(hanningFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc hanningFFTData of size "<<count<<endl;
    if (!(fft=new complexFFT(hanningFFTData)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc fft"<<endl;
    //set the hanning data here
    for (int i=0;i<hanningLength;i++){
      //hanningFFTData->in[i].re=wnd[i];
      //hanningFFTData->in[i].im=0.0;
      c_re(hanningFFTData->in[i])=wnd[i];
      c_im(hanningFFTData->in[i])=0.0;
    }
    for (int i=hanningLength;i<count;i++)
      //hanningFFTData->in[i].re=hanningFFTData->in[i].im=0.0;
      c_re(hanningFFTData->in[i])=c_im(hanningFFTData->in[i])=0.0;
    fft->fwdTransform();
    return;
  }

  /// De-Inits the FFT structures
  void deInitFFTStructures(void){
    if (fft) delete fft; fft=NULL;
    if (hanningFFTData) delete hanningFFTData; hanningFFTData=NULL;
    if (sFrameFFTData) delete sFrameFFTData; sFrameFFTData=NULL;
    if (nextSFrameFFTData) delete nextSFrameFFTData; nextSFrameFFTData=NULL;
  }

  ///This function returns the maximum similarity location
  int findSimilarityFFT(){
    //time data must be preloaded 
    fft->switchData(sFrameFFTData);
    fft->fwdTransform();
    fft->switchData(nextSFrameFFTData);
    fft->fwdTransform();

    // apply hanning to next source frame in this domain ...
    //multiply the outputs
      for (int i=0;i<N;i++){
	std::complex<fftw_real> res=(std::complex<fftw_real>(c_re(nextSFrameFFTData->out[i]), c_im(nextSFrameFFTData->out[i]))*
				     std::complex<fftw_real>(c_re(hanningFFTData->out[i]), -c_im(hanningFFTData->out[i]))-
	     std::complex<fftw_real>(c_re(sFrameFFTData->out[i]), c_im(sFrameFFTData->out[i])));
	///std::complex<fftw_real> res=(std::complex<fftw_real>(nextSFrameFFTData->out[i].re, nextSFrameFFTData->out[i].im)*std::complex<fftw_real>(sFrameFFTData->out[i].re, sFrameFFTData->out[i].im));//*
	////	     std::complex<fftw_real>(hanningFFTData->out[i].re, -hanningFFTData->out[i].im));///(double)count; //Multiply and scale
	//sFrameFFTData->out[i].re=res.real();
	//sFrameFFTData->out[i].im=res.imag();
	c_re(sFrameFFTData->out[i])=res.real();
	c_im(sFrameFFTData->out[i])=res.imag();
      }
      //Make sure we point to the correct output buffer
      fft->switchData(sFrameFFTData);
      //inverse transform ...
      fft->invTransform();
      
#ifdef NEWFILEOUT
      for (int i=0;i<N; i++)
	outputBestLocs<<" "<<c_re(sFrameFFTData->out[i])<<"+i*"<<
	  c_im(sFrameFFTData->out[i]);
      outputBestLocs<<";\n";
#endif

      //Scan for the maximum
      double maximum=-MAXDOUBLE, tempD;
      int bestLocation=0;
#ifdef FILEOUT
      char tempBuf[128];
      string tempName("bestloc."); 
      sprintf(tempBuf, "%d\0", nextSFrame.getCount()); tempName+=tempBuf;
      tempName+=".txt";
      ofstream outp(tempName.c_str());
#endif
      for (int i=0;i<deltaMax;i++){
	//	if ((tempD=abs(std::complex<fftw_real>(sFrameFFTData->in[i].re, sFrameFFTData->in[i].im)))>maximum){
	//if ((tempD=sFrameFFTData->in[i].re)>maximum){
	if ((tempD=c_re(sFrameFFTData->in[i]))>maximum){
	  bestLocation=i;
	  maximum=tempD;
	}
#ifdef FILEOUT
	outp<<c_re(sFrameFFTData->in[i])<<'\n';
	//outp<<sFrameFFTData->in[i].re<<'\n';
#endif
      }
#ifdef FILEOUT
      outp.close();
#endif
      return bestLocation;
  }

  inline double findSimilarity(ATYPE *src, ATYPE *nextSrc, int cnt){
    register double sim=0;
    int lower=(int)roundD((float)cnt*NO_SEARCH_PERCENTAGE); //No need to look at extremes - 'cause of hanning
    int upper=cnt-lower;
    for (int i=lower;i<upper;i++)
      //for (int i=0;i<cnt;i++)
      sim+=(double)abs(src[i]-nextSrc[i]);
    //cout<<sim<<endl;
    //    return sim*hanningLength/cnt;
    return sim;
  }
public:
  int channels;
  ///Constructor
  WSOLA(int hl=HANNING_LENGTH(SAMPLE_FREQUENCY), int sf=SAMPLE_FREQUENCY, int ch=1) : Hanning(hl){

#ifdef NEWFILEOUT
    outputBestLocs.open("simMeas.m");
    outputBestLocs<<"x=[";
#endif

#ifdef W_DEBUG
    cout<<"WSOLA::WSOLA : enter"<<endl;
#endif
    channels=ch;
    cout<<"Assuming "<<channels<<" channels"<<endl;
    hanningLength=hl;
    deltaMax=(int)roundD((double)hanningLength/DELTA_DIVISOR);

    //Set up null pointers
    fft=NULL;
    sFrameFFTData=nextSFrameFFTData=NULL;
    //N=deltaMax+2*hanningLength;
    N=deltaMax+hanningLength;
    initFFTStructures(N);

    sampleFrequency=sf;
    output=NULL;
    count=0;

#ifdef W_DEBUG
    cout<<"hanningLenght = "<<hanningLength<<" deltaMax = "<<deltaMax<<endl;
    cout<<"WSOLA::WSOLA : exit"<<endl;
#endif
  }

  ///Destructor
  ~WSOLA(void){
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : enter"<<endl;
#endif
    if (output) delete [] output;
    deInitFFTStructures();
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : exit"<<endl;
#endif
  }

  /**This is the actual processing loop.
    It take the pointer to the original audio (input), the sample count (cnt),
    it also takes the tau to scale by (tau)
   */
  void process(ATYPE *input, int cnt, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::process : enter"<<endl;
#endif



    //Malloc the output audio array
#ifdef W_DEBUG
    cout<<"WSOLA::process : processing : "<<cnt<<" samples"<<endl;
    cout<<"WSOLA::process : initting output array to : "<<(int)roundD(tau*(double)cnt)<<endl;
#endif
    int tempCnt=(int)roundD(tau*(double)cnt);
    if (tempCnt<=1)
      tempCnt=1;

    if (count<tempCnt){
      count=tempCnt;
      if (output) delete [] output;
      if (!(output=new ATYPE[tempCnt])){
	cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
	exit(-1);
      }
    }
    bzero(output, tempCnt*sizeof(ATYPE));

    //Set up the time codes
    sFrame.init(0,cnt);
    nextSFrame.init(0,cnt);
    dFrame.init(0,(int)roundD((double)cnt*tau));
    //Ensure the array frame and window sizes are correct
    (*sFrame.window)=hanningLength;
    (*nextSFrame.window)=hanningLength;
    (*dFrame.window)=hanningLength;
    sFrame.window->setFrameSize(sizeof(ATYPE));
    nextSFrame.window->setFrameSize(sizeof(ATYPE));
    dFrame.window->setFrameSize(sizeof(ATYPE));

    //Process input frame by input frame ....
    double loc=0;
    sFrame=0;
    nextSFrame=0;
    dFrame=0;

    //Set up the first half output frame ...
    int halfWndCnt=(int)roundD((double)sFrame.window->getCount()/2.0);
    for (int i=0;i<halfWndCnt;i++)
      output[i]+=(ATYPE)roundD((double)input[i]*wnd[i+halfWndCnt]);

    int wndCnt=hanningLength;
    int howMany=0; //Count the number of loops
    while (((nextSFrame.getCount()+wndCnt)<cnt) && ((sFrame.getCount()+wndCnt)<cnt) &&
	   ((dFrame.getCount()+(int)roundD((double)hanningLength/2.0))<dFrame.getEnd())){//Loop untill we've hit the end of the frame
      //Set the source frame to its beginning location
      //processFrame(input);
      processFrameFFT(input);

      cout<<"Start: "<<dFrame.getBeginning()<<"\tCurrent: "<<dFrame.getCount()<<"\t\tStop: "<<dFrame.getEnd()<<'\r';

      sFrame=(int)roundD(((double)nextSFrame.getCount()+(double)hanningLength/2.0));
      loc+=(double)hanningLength/2.0;
      lastNextSFrame=nextSFrame.getCount();
      nextSFrame=(int)roundD(loc/tau);
      lastDFrame=dFrame.getCount();
      dFrame=(int)roundD(loc);
      howMany++;
    }
    cout<<'\n';

#ifdef NEWFILEOUT //debug
    cout<<"rows="<<howMany<<"\ncols="<<N<<endl;
    outputBestLocs<<"];\n";
    outputBestLocs.close();
#endif

    //processLastFrame(input);
#ifdef W_DEBUG
    cout<<"WSOLA::process : exit"<<endl;
#endif
  }

  ///This is the size of the modified (output) data
  int getCount(void){return dFrame.getEnd();}

  ///This is for indexing the modified (output) data
  ATYPE& operator[](int i){
    return output[i];
  }
};
#endif //WSOLA_H_
