/* -*- Mode: c++ -*- 
 *
 *  Copyright 1997 Massachusetts Institute of Technology
 * 
 *  Permission to use, copy, modify, distribute, and sell this software and its
 *  documentation for any purpose is hereby granted without fee, provided that
 *  the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of M.I.T. not be used in advertising or
 *  publicity pertaining to distribution of the software without specific,
 *  written prior permission.  M.I.T. makes no representations about the
 *  suitability of this software for any purpose.  It is provided "as is"
 *  without express or implied warranty.
 * 
 */


#ifndef _VRPULSECORRELATOR_H_
#define _VRPULSECORRELATOR_H_

#include <VrDecimatingSigProc.h>

#if defined (ENABLE_MMX)
#include <VrMMX.h>
#endif

/* 
  This filter is a version of the complex FIR filter that is designed
  to be "digital aware". It tracks the symbol boundaries and
  (eventually) will be able to provide any pattern of output samples
  in each symbol period.

  The "digital aware" portions only support a single output stream
  now, but it seems feasible to make this filter supoort multiple
  outputs.  
  */
// ********************************************************

#define   PRECISION_BITS    12

template<class iType> 
class VrPulseCorrelator : public VrDecimatingSigProc<iType,VrComplex> {
protected:
  int num_taps, bump;
  int shift_bits, max_sample_count, sample_count, high_res, middle_offset, samples_per_symbol;
  int *all_done, high_res_start, high_res_waiting;
  VrComplex* taps;
  VrComplex phase_correction, phase_corr_incr, *over_sampled_result, small_corr_incr;
  long time;
  int symbol_boundary, symbol_period, pointer_incr, old_output_loc, new_output_loc; 
  float center_freq, gain;
  void buildFilter_complex();
#if defined (ENABLE_MMX)
  mmxTaps* processedTaps; //Precomputed constants, shifted four times
#endif
public: 
  virtual const char *name() { return "VrPulseCorrelator"; }
  virtual int work(VrSampleRange output, VrComplex *o[],
		   VrSampleRange inputs[], iType *i[]);
  virtual void initialize();
  int setCenter_Freq(float);
  int setNumber_Taps(int);

  float getSymbol_Period();
  int setSymbol_Period(float);
  int setSymbol_Timing(float);
  void start_oversampling(int, int,int,VrComplex*,int*);
  VrPulseCorrelator(int n, float d, const int t[], const float f[], 
  		     const float g[]);
  VrPulseCorrelator(float d, int t,float f, float g);
  ~VrPulseCorrelator();
  int version() { return 0; };
};

template<class iType> int
VrPulseCorrelator<iType>::work(VrSampleRange output, VrComplex *o[],
				VrSampleRange inputs[], iType *i[])
{
  unsigned int size = output.size;
  VrComplex result = 0;
  int output_offset = 0;

  cout << "    enter Work in filter" << endl;
  for (int i=0;i<size;) {

    //cout << pointer_incr << endl;
    //    symbol_boundary += symbol_period;

    if (high_res_waiting & (high_res_start-- == 0)) {
      high_res = 1;
      high_res_waiting = 0;
    }
    if (high_res) {
      if (sample_count == 1)  symbol_boundary += symbol_period;
      if (sample_count % (1 << shift_bits) == 0)  symbol_boundary += symbol_period;
      output_offset = (sample_count % (1 << shift_bits)) * (symbol_period >> shift_bits);
      new_output_loc = (symbol_boundary + output_offset) >> PRECISION_BITS;
    } else {
      symbol_boundary += symbol_period;
      new_output_loc = (symbol_boundary + (symbol_period/2)) >> PRECISION_BITS;
    }

    pointer_incr = new_output_loc - old_output_loc;
    if (pointer_incr <0) {
      //      cout << pointer_incr << endl;
      pointer_incr += (1 << (32-PRECISION_BITS));
    }
    //cout << "i: " << i << "  sample_count: " << sample_count << " incr: " << pointer_incr << endl;
    old_output_loc = new_output_loc;
    
    if (bump != 0) {
      symbol_boundary += bump;
      cout << "Bumping pointer " << (float) (bump) / (float)(1<<PRECISION_BITS)  << endl;
      bump =0;
    }
    
    result = 0;
    
    //make input pointer local
    iType *inputArray = i[0]+history+(-num_taps+1);

#if defined (ENABLE_MMX)
    if(processedTaps->mmxReady())
      result = processedTaps->mmxCVDProduct(inputArray);
    else { 
      VrComplex *taps_tmp = taps;
      for (int j=0; j < num_taps; j++)
	result += taps_tmp[j] * inputArray[j];
    }
#else
    VrComplex *taps_tmp = taps;
    for (int j=0; j < num_taps; j++)
      result += taps_tmp[j] * inputArray[j];     
#endif
   
    // Perform phase correction (non-trivial only for freq-xlating filter)
    if (center_freq != 0.0) {
      if (high_res){
	phase_correction *= small_corr_incr;
      } else {
	phase_correction *= phase_corr_incr;
      }
      result *= phase_correction;
    }
    if (high_res) {
      over_sampled_result[sample_count-1] = result;
      if ((sample_count - middle_offset) % samples_per_symbol == 0) {
	i++;
      *o[0]++ = result;
      }
      sample_count++;
      if (sample_count == max_sample_count) {
	sample_count = 1;
	high_res = 0;
	*all_done = 1;
      }	 
    } else /* not HIGH_RES */ {
      i++;
      *o[0]++ = result;
    }
  	i[0] += pointer_incr;
  }
  cout << "    leave Work in filter" << endl;
  return output.size;
}

template<class iType> void
VrPulseCorrelator<iType>::buildFilter_complex(){
  int inSampFreq;
  int index;
  float N = num_taps, a=0.0;
  float M = N-1; /* filter Order */

  inSampFreq = getInputSamplingFrequencyN(0); 
  
  if (center_freq == 0.0){

      // Build Complex Filter => 
      //            produces a low-pass filter using a real Hamming window

      for ( index=0 ; index < num_taps ; index++) {
       taps[index] = gain*VrComplex((0.54-0.46*cos(2*M_PI*index/(M))));
    }    
  } else {
    // Build composite Complex Filter => adds freq-shifting part
    a = 2*M_PI*center_freq / (float)inSampFreq;
    for ( index=0 ; index < num_taps ; index++) {

      taps[index] = VrComplex(gain*cos(a*index)*(0.54-0.46*cos(2*M_PI*index/(M))),
         gain*(-1)*sin(a*index)*(0.54-0.46*cos(2*M_PI*index/(M))));

    }
    phase_corr_incr = VrComplex(cos(a*(float)decimation),
				(-1)*sin(a*(float)decimation));
  }

  a = a * ((float)(symbol_period) / (float)((int)1 << PRECISION_BITS));
  VrComplex temp = VrComplex(cos(a),(-1)*sin(a));
  phase_corr_incr = temp;

#if defined (ENABLE_MMX)
  if(processedTaps!=NULL)
    delete processedTaps;
  processedTaps=new mmxTaps(taps,num_taps);
#endif
}

template<class iType> 
VrPulseCorrelator<iType>::VrPulseCorrelator(float per,int t,float freq, float g)
  :VrDecimatingSigProc<iType,VrComplex>(1,(int) per)
{

  symbol_period = (int) (per * (float)(1 << PRECISION_BITS));
  symbol_boundary = 0;
  old_output_loc = (symbol_period >>1) >> PRECISION_BITS;
  bump = 0;

  
  num_taps = t;
  phase_correction = VrComplex(1,0);
  phase_corr_incr = VrComplex(1,0);
  center_freq = freq;
  gain = g;
#if defined (ENABLE_MMX)
  processedTaps=NULL;
#endif
}

template<class iType> 
void VrPulseCorrelator<iType>::initialize()
{
  taps=new VrComplex[num_taps];
  buildFilter_complex();

  //Set history
  int max_num_taps = num_taps;
  setHistory(max_num_taps);
  high_res_waiting = 0;
}

template<class iType> 
int VrPulseCorrelator<iType>::setCenter_Freq(float cf)
{
  center_freq = cf;
  buildFilter_complex();
  return 1;
}

template<class iType> 
int VrPulseCorrelator<iType>::setNumber_Taps(int numT)
{
  num_taps = numT;
  delete taps;
  taps=new VrComplex[num_taps];

  //set history
  setHistory(num_taps);
  buildFilter_complex();
  return 1;
}

template<class iType> 
float VrPulseCorrelator<iType>::getSymbol_Period() 
{
  /* returns symbol period in seconds */
  return (float)symbol_period /(float)(1 << PRECISION_BITS)  /  (float)getInputSamplingFrequencyN(0);
}

template<class iType> 
int VrPulseCorrelator<iType>::setSymbol_Period(float period)
{
  /* input symbol period in seconds -> converts to fractional number of samples*/
  symbol_period = (int)(period * (float)(1 << PRECISION_BITS) * (float)getInputSamplingFrequencyN(0));
  cout << " Set period to " << (double) symbol_period / (double)(1 << PRECISION_BITS) << endl;
  return 1;
}

template<class iType> 
int VrPulseCorrelator<iType>::setSymbol_Timing(float fraction)
{
  /* input fraction (FP) of symbol period for offset adjustment: (+) =
     advance boundary relative to sample stream, (-)= retard boundary */
  bump  =  (int)( fraction * (float) symbol_period);
  return 1;
}

template<class iType> 
void VrPulseCorrelator<iType>::start_oversampling(int N, int B, int wait, VrComplex *output_ptr,
						 int *done_flag)
{

  /* this method sets up the filter to begin producing multiple
     outputs per symbol to allow the controlling (downstream) module
     to perform correlation to find a sync pulse to recover symbol
     timing

     start in 'wait' symbols (wait = 0  => start with next symbol)
     2^B output samples per symbol period, N symbol periods
  */

  shift_bits = B;
  max_sample_count = N * (1 << shift_bits);
  high_res_start = wait;
  high_res_waiting = 1;
  sample_count = 1;
  high_res = 0;

  over_sampled_result = output_ptr;
  middle_offset = (1 << (shift_bits-1))+1;
  samples_per_symbol = (1 << shift_bits);
  all_done = done_flag;
  *all_done = 0;

  float arg = 2*M_PI*center_freq / (float)getInputSamplingFrequencyN(0);
  small_corr_incr = VrComplex(cos(arg*(float)symbol_period / (float)(1 << (PRECISION_BITS+B))),
				(-1)*sin(arg*(float)symbol_period / (float)(1 << PRECISION_BITS+B)));
}

template<class iType> 
VrPulseCorrelator<iType>::~VrPulseCorrelator()
{
  delete taps;
#if defined (ENABLE_MMX)
  delete processedTaps;
#endif
}
#endif
