/* Copyright 2001 Matt Flax <flatmax@ieee.org>
   This file is part of MFFM Time Scale Modification for Audio.

   MFFM Time Scale Modification for Audio is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   MFFM Time Scale Modification for Audio is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You have received a copy of the GNU General Public License
   along with MFFM Time Scale Modification for Audio
 */
#ifndef WSOLA_H_
#define WSOLA_H_

///Use this to debug general
//#define W_DEBUG
#ifdef W_DEBUG
#include <fstream>
#endif

#define VERSION_NUM "4.8"

#ifndef WIN32
#include <values.h>
#else
#include "win32fix.H"
#endif

//This function is for scientific rounding
#ifndef roundD
//#define roundD(a) ((a-(int)a>=0.5) ? ceil(a) : floor(a))
#define roundD(a) rint(a)
#endif

/*
#ifndef MAXDOUBLE
//#define MAXDOUBLE 1.79769e+308
#include "win32fix.H"
#endif



#ifndef rint
#include "win32fix.H"
#endif

#ifndef MAXINT
#include "win32fix.H"
#endif

#ifndef MAXDOUBLE
#include "win32fix.H"
#endif
*/

#define c_re(c) ((c)[0])
#define c_im(c) ((c)[1])

#include <string.h>
#include <iostream>
using namespace std;
#include "hanning.H"

//#include "estimate.H"

//From the MFFM FFTw C++ wrapper available at sourceforge.net
#include <mffm/complexFFT.H>
#include <complex>

//From the MFFM_TimeCode library available at sourceforge.net
#include <mffm/timeCode.H>

#ifndef ATYPE
#define ATYPE short int
#endif

#ifndef SAMPLE_FREQUENCY
#define SAMPLE_FREQUENCY 44100 ///The default sample frequency
#endif

///The hanning window duration in s
#define HANNING_DURATION 0.08
//The proportional hanning overlap
#define HANNING_OVERLAP 0.5
///The sample length of the hanning window
#define HANNING_LENGTH(sampFreq) (int)roundD(HANNING_DURATION*(double)sampFreq)

//The pitch (Hz) which we don't want to exceed the period of durin matching
#define DELTA_DIVISOR 8.0

/* reference :
   An Overlap Add Technique Based on Waveform Similarity (WSOLA)
   For High Quality Time-Scale Modification of Speech
   Werner Verhelst and Marc Roelands
   IEEE publication
*/

///The time code structures used within
#define MASTERCOUNTERTYPE_W MasterCounter<field, 1>
#define TIMECODETYPE_W TimeCode<MASTERCOUNTERTYPE_W, ATYPE>

class WSOLA : public Hanning {
  //double avBestMatch; ///The average best match
  long int frameCnt; // The frame count
  TIMECODETYPE_W sFrame, dFrame; //Source and desired frames
  ATYPE *output; // The timescale modified audio

  int N; //The total size of the fft data structures
  ///The fft structure used to implement the convolution
  complexFFT *fft;
  ///The fft data structures
  complexFFTData *sFrameFFTData, *dFrameFFTData, *hanningFFTData,  *normWindowData;

  int hanningLength, deltaMax, sampleFrequency;

  ///Inits the fft structures
  void initFFTStructures(int count){
    if (sFrameFFTData)
      if (sFrameFFTData->getSize()!=count)
	deInitFFTStructures();
      else
	return;
	
    //Set up the fft structures
    if (!(sFrameFFTData=new complexFFTData(count)))
		std::cerr << "WSOLA::initFFTStructures : couldn't malloc sFrameFFTData of size "<<count<<endl;
    if (!(dFrameFFTData=new complexFFTData(count)))
      std::cerr << "WSOLA::initFFTStructures : couldn't malloc dFrameFFTData of size "<<count<<endl;
    if (!(hanningFFTData=new complexFFTData(count)))
      std::cerr << "WSOLA::initFFTStructures : couldn't malloc hanningFFTData of size "<<count<<endl;
    if (!(normWindowData=new complexFFTData(count)))
      std::cerr << "WSOLA::initFFTStructures : couldn't malloc normWindowData of size "<<count<<endl;
    if (!(fft=new complexFFT(hanningFFTData)))
      std::cerr << "WSOLA::initFFTStructures : couldn't malloc fft"<<endl;
    //set the hanning data and Norm. data here
    for (int i=0;i<hanningLength;i++){
      c_re(hanningFFTData->in[i])=wnd[i];
      c_im(hanningFFTData->in[i])=0.0;
    }
    for (int i=hanningLength;i<count;i++){
      c_re(hanningFFTData->in[i])=c_im(hanningFFTData->in[i])=0.0;
    }
    fft->fwdTransform(); //Find the hanning data DFT
  }

  /// De-Inits the FFT structures
  void deInitFFTStructures(void){
    if (fft) delete fft; fft=NULL;
    if (hanningFFTData) delete hanningFFTData; hanningFFTData=NULL;
    if (normWindowData) delete normWindowData; normWindowData=NULL;
    if (sFrameFFTData) delete sFrameFFTData; sFrameFFTData=NULL;
    if (dFrameFFTData) delete dFrameFFTData; dFrameFFTData=NULL;
  }

  /* call this to init structures which aren't FFT based */
  int newInit(void){
#ifdef W_DEBUG
    std::cout<<"WSOLA::newInit : enter "<<endl;
#endif
    if (output) delete [] output;
    if (!(output=new ATYPE[hanningLength])){
      std::cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
      return -1;
    }
    bzero(output, hanningLength*sizeof(ATYPE));

    reset();//Set time code locations to zero and maximise endpoints

#ifdef W_DEBUG
    std::cout<<"WSOLA::newInit : exit "<<endl;
#endif

    return 0;
  }

  ///This function returns the maximum similarity location
  int findSimilarityFFTNEW(){
    //time data must be preloaded   
    fft->switchData(sFrameFFTData);
    fft->fwdTransform();
    fft->switchData(normWindowData);
    fft->fwdTransform();
    fft->switchData(dFrameFFTData);
    fft->fwdTransform();

    // apply hanning to next source frame in this domain ...
    //multiply the outputs
    for (int i=0;i<N;i++){
      std::complex<fftw_real> res1=(std::complex<fftw_real>(c_re(sFrameFFTData->out[i]), -c_im(sFrameFFTData->out[i])));

      //Find the Similarity
      std::complex<fftw_real> res2=(std::complex<fftw_real>(c_re(dFrameFFTData->out[i]), c_im(dFrameFFTData->out[i])));
      res2*=res1;
      c_re(dFrameFFTData->out[i])=res2.real();
      c_im(dFrameFFTData->out[i])=res2.imag();

      //Find the Norm
      std::complex<fftw_real> res3=(std::complex<fftw_real>(c_re(hanningFFTData->out[i]), c_im(hanningFFTData->out[i])));
      std::complex<fftw_real> res4=(std::complex<fftw_real>(c_re(normWindowData->out[i]), -c_im(normWindowData->out[i])));
      //      cout<<res3<<'\t'<<res4<<'\t';
      res3*=res4;
      //cout<<res3<<'\n';
      c_re(normWindowData->out[i])=res3.real();
      c_im(normWindowData->out[i])=res3.imag();
    }


      //Make sure we point to the correct output buffer
      fft->switchData(dFrameFFTData);
      //inverse transform ...
      fft->invTransform();

      fft->switchData(normWindowData);
      //inverse transform ...
      fft->invTransform();

#ifdef W_DEBUG
      ofstream outlist("bestLocsR.txt",ios::app);
      for (int i=0;i<dFrameFFTData->getSize();i++)
	outlist<<c_re(dFrameFFTData->in[i])<<'\t';
      outlist<<endl;
      outlist.close();

      outlist.open("bestLocsI.txt",ios::app);
      for (int i=0;i<dFrameFFTData->getSize();i++)
	outlist<<c_im(dFrameFFTData->in[i])<<'\t';
      outlist<<endl;
      outlist.close();
#endif

      //Scan for the maximum
      double maximum=-MAXDOUBLE, minimum=MAXDOUBLE, tempD;
      int bestLocation=0, bestMin=0, bestMax=0;
      for (int i=0;i<deltaMax;i+=channels){
	tempD=c_re(dFrameFFTData->in[i])/(c_re(normWindowData->in[i])*c_re(normWindowData->in[i]));
	//tempD=c_re(dFrameFFTData->in[i]);
	//tempD=c_re(dFrameFFTData->in[i])*c_re(normWindowData->in[i]);
	//for (int j=1;j<channels;j++)
	//  tempD+=c_re(dFrameFFTData->in[i+j])/c_re(normWindowData->in[i+j]);
	if (tempD>maximum){
	  bestMax=i;
	  maximum=tempD;
	}
	if (tempD<minimum){
	  bestMin=i;
	  minimum=tempD;
	}
      }
      bestLocation=bestMax;
      //std::cout<<"bestLocation = "<<bestLocation<<endl;
      return bestLocation;
  }

  int processFrameFFT(void){
#ifdef W_DEBUG
    std::cout<<"WSOLA::processFrameFFT : enter "<<endl;
#endif

    ATYPE* tempDFW=dFrame.window->getDataPtr();
    for (int i=0;i<hanningLength;i++){ //Load the desired range
      c_re(dFrameFFTData->in[i])=(double)tempDFW[i]*wnd[i]*wnd[i];
      c_im(dFrameFFTData->in[i])=0.0;
    }
    for (int i=hanningLength;i<hanningLength+deltaMax;i++)
      c_im(dFrameFFTData->in[i])=c_re(dFrameFFTData->in[i])=0.0;

    ATYPE* tempSFW=sFrame.window->getDataPtr();
    for (int i=0;i<hanningLength+deltaMax;i++){ //Load the search range
      c_re(sFrameFFTData->in[i])=(double)tempSFW[i];
      //c_re(normWindowData->in[i])=(double)(tempSFW[i]*tempSFW[i]);
      if (tempSFW[i]>=0)
	c_re(normWindowData->in[i])=(double)tempSFW[i];
      else
	c_re(normWindowData->in[i])=(double)-tempSFW[i];
      c_im(normWindowData->in[i])=c_im(sFrameFFTData->in[i])=0.0;
    }
    //Search through for the closest match
    int bestLocation=0;
    bestLocation=findSimilarityFFTNEW();

#ifdef W_DEBUG
    std::cout<<"WSOLA::processFrame : exit"<<endl;
#endif
    return bestLocation;
  }
  
  int findBestMatch(void){
    int ret=0;
    int wndCnt=hanningLength, bestLocation;
    //Set the source frame to its beginning location
    if ((bestLocation=processFrameFFT())<0){
      std::cout<<"error ocurred during processFrameFFT function"<<endl;
      return PROCFFT_ERR;
    }
    return bestLocation;
  }

  void copyBestMatch(ATYPE *extOutput, int bestLocation){
    ATYPE* tempDFW=sFrame.window->getDataPtr();
    for (int i=0;i<hanningLength;i++) // Copy over by windowing and adding
      output[i]+=(ATYPE)roundD((double)tempDFW[i+bestLocation]*wnd[i]);
    for (int i=0;i<hanningLength/2;i++) // Copy over to the external output
      extOutput[i]=output[i];
  }

public:
  //Labeled errors
  typedef enum _errors {DEFAULT_ERR=-1, INPUT_READ2DF_ERR=-2, INPUT_READ2SF_ERR=-3, WRITE_ERR=-4, PROCFFT_ERR=-5, FINISHED_NORMALLY=-6} errors;
  
  int channels;
  /**Constructor ...
     This constructor takes the length of the hanning window, the sample
     frequency, the channel count (default=1)
  */
  WSOLA(int hl=HANNING_LENGTH(SAMPLE_FREQUENCY), int sf=SAMPLE_FREQUENCY, int ch=1) : Hanning(hl){
    
#ifdef W_DEBUG
    std::cout<<"WSOLA::WSOLA : enter"<<endl;
#endif
    //avBestMatch=0.0;
    frameCnt=0;
    channels=ch;
    std::cout<<"Assuming "<<channels<<" channels"<<endl;
    std::cout<<"hanning length = "<<hl<<endl;
    hanningLength=hl;
    deltaMax=(int)roundD((double)hanningLength/DELTA_DIVISOR*channels); 
    std::cout<<"deltaMax="<<deltaMax<<endl;    //Make sure we are using a valid deltaMax with respect to channel count
    while (remainder((double)(hanningLength+deltaMax)/(double)channels,floor((double)(hanningLength+deltaMax)/(double)channels))!=0.0){
      std::cout<<"hanning+delta remainder = "<<remainder((double)(hanningLength+deltaMax)/(double)channels,floor((double)(hanningLength+deltaMax)/(double)channels))<<endl;
      deltaMax++;
    }
    std::cout<<"deltaMax="<<deltaMax<<endl;    //Make sure we are using a valid deltaMax with respect to channel count

    //Set up null pointers
    fft=NULL;
    dFrameFFTData=sFrameFFTData=NULL;
    N=deltaMax+hanningLength;
    initFFTStructures(N);

    sampleFrequency=sf;
    output=NULL;

    if (newInit()<0){
      std::cout<<"WSOLA:: error - couldn't init the structures - out of memory ?"<<endl;
      exit(-1);
    }
#ifdef W_DEBUG
    std::cout<<"hanningLenght = "<<hanningLength<<" deltaMax = "<<deltaMax<<endl;
    std::cout<<"WSOLA::WSOLA : exit"<<endl;
#endif
  }

  ///Destructor
  ~WSOLA(void){
#ifdef W_DEBUG
    std::cout<<"WSOLA::~WSOLA : enter"<<endl;
#endif
    if (output) delete [] output;
    deInitFFTStructures();
#ifdef W_DEBUG
    std::cout<<"WSOLA::~WSOLA : exit"<<endl;
#endif
  }

  void initProcess(const ATYPE*inputSrc, double tau){
    int halfWndCnt=(int)((double)hanningLength/2.0);
    std::cout<<"halfWndCnt "<<halfWndCnt<<endl;
    for (int i=0;i<halfWndCnt;i++)
      output[i]=(ATYPE)roundD((double)inputSrc[i]*wnd[i+halfWndCnt]);
  }
  
  /** Called by the user to process a frame */
  int processFrame(ATYPE *extOutput, double tau){
#ifdef W_DEBUG
    std::cout<<"WSOLA::processFrame(tau) : enter"<<endl;
#endif
    int ret=0;
    int bestLoc;
    if ((bestLoc=findBestMatch())<0){
      std::cerr<<"Error findBestMatch HERE"<<endl;
      return bestLoc;
    }
    frameCnt++;

    copyBestMatch(extOutput,bestLoc); //copy the best match to the output

    shiftOn(tau);
    return ret;
  }

  ///This is the size of the modified (output) data
  int getCount(void){return dFrame.getEnd();}

  ///This is the index in input stream to read from
  int getSourceIndex(void){return sFrame.getCount();}
  ///This is the length of elements required by the input stream to read
  int getSourceLength(void){return (hanningLength+deltaMax);}
  ///Loads an external memory source stream to correct locations in WSOLA
  void loadSourceInput(ATYPE *inSrc){
    // Copies memory location to memory location
    memcpy(sFrame.window->getDataPtr(), inSrc, (hanningLength+deltaMax)*sizeof(ATYPE));
  }

  ///Loads an external memory source stream to correct locations in WSOLA
  void loadDesiredInput(ATYPE *inDes){
    // Copies memory location to memory location
    memcpy(dFrame.window->getDataPtr(), inDes, hanningLength*sizeof(ATYPE));
  }
  ///This is the index in input stream to read from
  int getDesiredIndex(void){return dFrame.getCount();}
  ///This is the length of elements required by the desired frame
  int getDesiredLength(void){return dFrame.window->getCount();}

  void reset(void){
    //Set up the time codes - default to max int size
    sFrame.init(0,hanningLength); // Dumy inits ... set maximum now
    sFrame.setFinish(MAXINT-1);
    sFrame.setEnd(MAXINT-2); // Make sure you don't try to set the current count to the same as the finish count or it will loop !
    
    dFrame.init(0,hanningLength); // Dumy inits ... set maximum now
    dFrame.setFinish(MAXINT-1);
    dFrame.setEnd(MAXINT-2);

    //Ensure the array frame and window sizes are correct
    (*sFrame.window)=hanningLength;
    (*dFrame.window)=hanningLength;
    sFrame.window->setFrameSize(sizeof(ATYPE));
    dFrame.window->setFrameSize(sizeof(ATYPE));

    //Process input frame by input frame ....
    sFrame=0;
    dFrame=0;
  }

  //Returns the number of bytes in a frame
  int getFrameSize(void){
    return sFrame.window->getFrameSize();
  }

  //sets the source position - leaves the desired position
  void setPosition(int pos){
    sFrame=pos;
  }

  void checkPositions(void){
    std::cout<<"sFrame "<<sFrame<<'\n'<<"dFrame "<<dFrame<<endl;
  }

  ///This function is now public to that the audio can bypass WSOLA for tau=1.0
  void shiftOn(double tau){
    for (int i=0;i<hanningLength/2.0;i++){
      output[i]=output[i+(int)((double)hanningLength/2.0)];
      output[i+(int)((double)hanningLength/2.0)]=(ATYPE)0.0;
    }

    // Locate to the next desired frame to match to
    dFrame=(int)roundD((double)sFrame.getCount()+(double)hanningLength/2.0);
    // Locate to the corresponding search region
    sFrame=(int)roundD((double)sFrame.getCount()+(double)hanningLength/2.0/tau);
  }
};
#endif //WSOLA_H_
