/* -*- Mode: c++ -*- */
/*
 * Copyright 2001 Free Software Foundation, Inc.
 * 
 * This file is part of GNU Radio
 * 
 * GNU Radio is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 * 
 * GNU Radio is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with GNU Radio; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */
/*
 *  Copyright 1997 Massachusetts Institute of Technology
 * 
 *  Permission to use, copy, modify, distribute, and sell this software and its
 *  documentation for any purpose is hereby granted without fee, provided that
 *  the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of M.I.T. not be used in advertising or
 *  publicity pertaining to distribution of the software without specific,
 *  written prior permission.  M.I.T. makes no representations about the
 *  suitability of this software for any purpose.  It is provided "as is"
 *  without express or implied warranty.
 * 
 */

#include <VrSigProc.h>
#include <VrConnect.h>

#define ROUNDUP(SIZE,UNIT) {SIZE = ( ( (SIZE+UNIT-1)/UNIT) * UNIT ); }
#define DEBUG_SIGP	0
unsigned int cacheSize = CACHESIZE;
float maxLatency = .2; //in seconds
#ifdef THREADS
pthread_key_t startMarkedModule;
#else
VrSigProc *startMarkedModule;
#endif 

//! has the data been computed?
/*!
 * \return true if all data in \p r has been computed.
 */
bool VrSigProc::dataReady (VrSampleRange r) {
  //We need to move the buffer's write pointer to be >= index+size
  //FIX parse through the link list?
  return (r.index + r.size <= WP);
}

//! has the data been marked to be computed?
/*!
 * \return true if all data in \p r is marked to be computed
 */
bool VrSigProc::dataMarked (VrSampleRange r) {
  //We need to move the buffer's marked pointer to be >= index+size
  return (r.index + r.size <= markedWP);
}
  
//Returns the minimum write space in downstream buffers
//  (given that you desire at least "desired" space)
int VrSigProc::minwritespace(VrSampleIndex newWP, unsigned int desired)
{
    int s = (*outBuffer)->writespace(newWP+desired); 
    unsigned int i=0;
    while(++i<numberOutputs) {
      int t=outBuffer[i]->writespace(newWP+desired);
      if(t<s) s=t;
    }
    return s+desired;
}

// requires LL to be locked already (holdover from when
// markData could be running in multiple threads)
void VrSigProc::attach_writer(writerLL *r)
{
    if (!first)
      first=r;
    else {
      last->next=r;

      if (PARANOID && (last->maxValid > r->maxValid)) {
	fprintf(stderr, "Out of order write threads.\n");
	abort ();
      }
    }
    last=r;

    if (PARANOID && (last->maxValid != markedWP)) {
      fprintf(stderr, "WriterLL inconsistency (another thread marked more data before the writer linked list was updated).\n");
      abort ();
    }

    MUTEX_UNLOCK(&mutex);
}

void VrSigProc::detach_writer(writerLL *r)
{
    MUTEX_LOCK(&mutex); 
    if (first==r) {
      first=r->next;
      if (last==r)
	last=NULL;

      WP=r->maxValid;		// This is where WP gets updated...
    } else {
#ifdef PERFMON
      cycles->WPdelayedUpdate++;
#endif
      writerLL *t= (writerLL *) first;
      while (t->next != r)
	t=t->next;

      t->next=r->next;
      t->maxValid=r->maxValid; 
      if(last==r)
	last = t;
    }
    MUTEX_UNLOCK(&mutex);
}

/*!
 * Returns the minimum RP in downstream buffers' readers.
 * computes the most up to date value possible.
 */
VrSampleIndex VrSigProc::proc_minRP() {
    if(!outBuffer)
      return (VrSampleIndex) -1; 	// huge	FIXME I think this should be 0

    VrSampleIndex m = (*outBuffer)->updateMinRP(); 
    unsigned int i = 0;
    while(++i < numberOutputs) {
      VrSampleIndex t = outBuffer[i]->updateMinRP();
      if (t < m)
	m = t;
    }
    return m;
}



//! figure out what data to compute next
/*!
  markData is the first part of the markData / VrSigProc::compute dyad.
  These two methods are called from VrMultiTask::schedule to schedule and
  compute the work of the signal processing modules.

  markData is called from the downstream side (starting with the
  sinks) to determine what data to schedule for future computation.

  Note that markData does NOT run in parallel (VrMultiTask is holding
  its mutex whenever it calls markData, and is the root of all calls
  to markData).
  
  This function also has many side effects, including sometimes
  setting startMarkedModule.  Really needs to be cleaned up.

  \return MARK_ALREADY if all data is already computed.
  \return MARK_READY if data is ready to be computed (and has been marked).
  \return MARK_READY_NO_MARK if data is ready on this level or above
           but no further data should be marked.
  \return MARK_NO_READY if no data is ready to be computed.
  \return MARK_THREAD if data is marked by another thread.
*/

int VrSigProc::markData(VrSampleRange r)
{
  VrSampleRange inputs[numberInputs];
  VrSampleRange testrange;
  int requestSize;
  VrSampleIndex minrp = 0;
  bool dataMarkedUpstream = false;
  unsigned int space = 0;

  VrSampleIndex pendingWP = markedWP;  		// what's already marked
  VrSampleIndex desiredWP = r.index + r.size;	// goal


  if (DEBUG_SIGP)
    printf ("[%s:%d] enter name %s WP %lld pendingWP %lld r.index %lld\n",
	    __FILE__, __LINE__, name(), WP, pendingWP, r.index);

  // Check if data is already computed
  if (desiredWP <= WP)
    return MARK_ALREADY; // all data has already been finished

  /** Check if unmarked data precedes us **/

  if(pendingWP < r.index) {
    // printf ("[%s:%d] unmarked precedes\n", __FILE__, __LINE__); // FIXME -eb

    // Data before the data we want has not been marked
    // Either we can skip some data or another thread
    //   needs to mark data before we do.
    
    if(WP==pendingWP && (minrp=proc_minRP()) > WP) {

      // we are first writing thread and all other chains are done
      // with data before VrSampleIndex minrp, therefore we can skip
      // some data

      if(uses_sync) {
	//(can't ever skip if the module uses the sync() call!)
	//so expand our data to compute everything up to index

	unsigned long new_size = r.size + (r.index - pendingWP);

#if 0 // INCOMPLETELY TESTED FIXME
	if (new_size > getMaxOutputSize () - 1){

	  // the new size won't fit in buffer, so break it up.

	  VrSampleRange	r2 = { pendingWP, getMaxOutputSize () - 1 };
	  int result = markData (r2);

	  if (DEBUG_SIGP)
	    printf ("[%s:%d] after markData name %s return %d\n",
		    __FILE__, __LINE__, name(), result);

	  switch (result){
	  case MARK_ALREADY:
	  case MARK_READY:
	  case MARK_READY_NO_MARK:
	    return MARK_READY_NO_MARK;	// keep from trying any more up this path

	  default:
	    return result;
	  }
	}
#endif
	  
	r.size = new_size;
	r.index = pendingWP;

	// FIXME there's a bug here when size > getMaxOutputSize().  The expanded range is greater
	// the the size of the buffer, and therefore can never be computed (this way).
	// The symptom is a diagnostic message from VrBuffer about VrSampleIndex too small.
	// the fix goes here.  -eb 11/1/2001
	//
	// the bug can be triggered using the VrSkippingSink with an
	// increment of 400000.  I saw it trying to refactor GrFFTSink to use
	// use VrSkippingSink

	if (r.size > getMaxOutputSize () - 1){
	  printf ("[%s:%d] BUG %s (%d)\n", __FILE__, __LINE__, name (), getMaxOutputSize() - 1);
	}

      }
      else { 
	if(minrp > r.index)
	  minrp = r.index;
#ifdef PERFMON
	cycles->skippedData += (minrp - pendingWP);
#endif
	pendingWP=minrp;
      } 
    }
    else {
      //could: find which of outBuffer[?]->connectors[?]->lastRP == minrp
#ifdef PERFMON
      cycles->blockOnUnmarkedData++;
#endif
      // FIXME -eb
      // printf ("[%s:%d] out of order minrp %lld r.index %lld\n", __FILE__, __LINE__, minrp, r.index);
      return MARK_NO_READY;
    }
  } 

  requestSize = desiredWP - pendingWP;
  if(requestSize <= 0)
    // All data has been marked by another thread.
    //
    // -eb 10/20/2001, I'm not sure about the comment above.  This code is
    // single threaded.  I believe that this branch is only take when
    // we've skipped data above and moved pendingWP.  This may never happen,
    // and if it does, the check should be moved up into the block above.
    // Needs more investigation...
    return MARK_THREAD;	

  if(pendingWP > WP && uses_sync) {
    //we probably won't be able to compute data since this
    //module uses the sync() function in its work procedure
  }

  //always ask for a multiple of outputSize
  ROUNDUP(requestSize, getOutputSize());

  testrange.index=pendingWP;
  testrange.size=requestSize;

  // Now we know what output range we're trying to compute...
  // figure out what input ranges this corresponds to and propagate
  // the marking upstream.
  
  if (numberInputs > 0) {

    forecast(testrange, inputs); // determine the range we need on each input

    int result = markDataUpstream (inputs, &dataMarkedUpstream);
    if (result != MARK_continue)
      return result;
  }

  // upstream marking was successful.  Now see if there's actually
  // room to write our buffer.

  if(outBuffer)
    space = minwritespace(pendingWP,requestSize);

  if (outBuffer && space < (unsigned int)requestSize) {
#ifdef PERFMON
   cycles->bufferFullOnWrite++;
#endif
    //if we are not a sink and there is
    //not enough space in the buffer for a full request
    //could: find which of outBuffer[?]->connectors[?]->lastRP == minrp
    // ?? don't bother computing ??
    if(dataMarkedUpstream)
      return MARK_READY_NO_MARK;

    return MARK_NO_READY;
  }

  /** Mark data to be computed **/
  desiredWP = pendingWP + requestSize;

  MUTEX_LOCK(&mutex); //hold-over from old way (see below)

  markedWP = desiredWP; //assumes single-threaded marking!!!

  /** initialize pointers to thread-specific marking data **/

  // FIXME clean up this stuff.  Rework this whole thread specific
  // mess so that it's all hidden one place instead of spread out
  // all through VrSigProc

#ifdef THREADS
  VrSampleRange *pmyrange=(VrSampleRange *) THREAD_GETSPECIFIC(myMarkedData);
  if(pmyrange == 0) {
    //init thread specific data
    pthread_setspecific(myMarkedData,(void *) new VrSampleRange);
    pthread_setspecific(myWriterLL,(void *) new writerLL);
    pthread_setspecific(inputs_forecasted,new VrSampleRange[numberInputs]);
    pthread_setspecific(myReaderLLs,(void *) new readerLL[numberInputs]);
    pmyrange=(VrSampleRange *) THREAD_GETSPECIFIC(myMarkedData);
    pmyrange->index=(long long) -1;
    pmyrange->size=0;
  }    
  VrSampleRange& myrange=*pmyrange;
#else
  if(!myWriterLL) {		//init (psuedo)thread specific data
    myMarkedData.index=(long long) -1;
    myMarkedData.size=0;
    myWriterLL=new writerLL;
    inputs_forecasted=new VrSampleRange[numberInputs];
    myReaderLLs=new readerLL[numberInputs];
  }
  VrSampleRange& myrange=myMarkedData;
#endif

  VrSampleRange *myinputs=(VrSampleRange *) THREAD_GETSPECIFIC(inputs_forecasted);
  writerLL *wLL = (writerLL *) THREAD_GETSPECIFIC(myWriterLL);
  readerLL *readerLLs = (readerLL *) THREAD_GETSPECIFIC(myReaderLLs);

  if (PARANOID && myrange.size) {
    fprintf(stderr,"Mark occured again before compute() completed in this thread.\n");
    abort ();
  }

  /** save which range this thread marked **/
  myrange.index = pendingWP;
  myrange.size = requestSize;

  /** save which input ranges were forcasted as needed for this marking **/
  for(unsigned int i=0;i<numberInputs;i++)
    myinputs[i]=inputs[i];

  /** create our entry in writer linked list **/
  wLL->maxValid = desiredWP;
  wLL->next = NULL;
  attach_writer(wLL);		// this call unlocks the mutex

  /** create our entries in the reader linked lists upstream **/
  if(numberInputs > 0 ) {    
    // Figure out what we'll ask for next
    //
    // This appears to be only for the benefit for VrConnect::minRP,
    // so that it is able to give a reasonable answer, even if when
    // it is asked when it has no readers currently linked to it.
    //
    // The index in the first arg to attach_reader is normally used to
    // compute minRP.
    //
    // -eb 10/20/2001
    
    VrSampleRange nextBlock[numberInputs];
    VrSampleRange nextrange = {pendingWP+requestSize, getOutputSize()};

    int retval=forecast(nextrange, nextBlock);

    for(unsigned int i=0;i<numberInputs;i++) {
      if(myinputs[i].size <= 0)
	readerLLs[i].index = (VrSampleIndex) -1;	// huge
      else {
	readerLLs[i].index = myinputs[i].index;
	readerLLs[i].next = NULL;
	if(retval < 0)	//don't know next block yet, use this one
	  inputConn[i]->attach_reader(&readerLLs[i],myinputs[i].index);
	else
	  inputConn[i]->attach_reader(&readerLLs[i],nextBlock[i].index);
      }
    }
  }

  /** set the starting module for this compute block to this **/
#ifdef THREADS
  pthread_setspecific(startMarkedModule, (void *) ((VrSigProc *) this));
#else
  startMarkedModule = this;
#endif
  return MARK_READY; //continue marking downstream if possible
}


//! propagate marking upstream
/*!
  This is the embodiment of a bunch of heuristics determining which
  data to schedule when (Explain this mess.)

  \p dataMarkedUpstream is set iff we successfully marked data upstream

  \return MARK_continue if the process was successful and
  we should continue to work on this module's data.  Otherwise return
  a more specific MARK_xxx value indicating our status.
 */
int VrSigProc::markDataUpstream (VrSampleRange *inputs,
				 bool *dataMarkedUpstream)
{
  int result = MARK_continue;
  *dataMarkedUpstream = false;

  for (unsigned int i = 0; i < numberInputs; i++) {

    int retVal = getUpstreamModuleN(i)->markData(inputs[i]);

    if (DEBUG_SIGP)
      printf ("[%s:%d] after markData name %s return %d not ready %d\n",
	      __FILE__, __LINE__, name(), retVal, result);

    switch(retVal) {
      //could add case 3 -- buffer full upstream -- compute whatever
      //  you can at highest priority

    case MARK_ALREADY:
      // data is finished upstream, look at the next input
      break;

    case MARK_READY:
      // data successfully marked upstream
      if(result != MARK_continue)
	return MARK_READY_NO_MARK;	// one of the other inputs isn't ready

      //check to see if other inputs are ready
      for (i++; i < numberInputs; i++) {
	if(!getUpstreamModuleN(i)->dataMarked(inputs[i])) {
	  // FIXME -eb 11/30/2001 removed this printf
	  // printf ("[%s:%d] not ready\n", __FILE__, __LINE__);
	  return MARK_READY_NO_MARK; //data upstream is marked but not here
	}
      }
      //only one input marked data, so mark our data below...
      *dataMarkedUpstream = true;
      break; 

    case MARK_READY_NO_MARK:	// data upstream is marked but not ours
      return MARK_READY_NO_MARK;

    case MARK_NO_READY:		// no data can be computed upstream
      result = MARK_NO_READY;
      break;

    case MARK_THREAD:		// data marked by another thread and not ready yet
      if(result == MARK_continue)
	result = MARK_THREAD;
      break;

    default:
      abort ();
    }    
  }

  return result;
}


//! compute the data previous marked
/*!
  Compute is the second part of the markData / compute dyad.  These
  two methods are called from VrMultiTask::schedule to schedule and
  compute the work of the signal processing modules.

  Note that this can run in parallel.  All of the scheduling is done
  in markData, so there is very little overhead or contention here.
  
  \return true if we successfully computed all marked data.  We
  return false if some module is not able to successfully compute
  the amount of data that it was expected to.
 */

bool VrSigProc::compute()
{ 
#ifdef THREADS
  VrSampleRange *rp=(VrSampleRange *) THREAD_GETSPECIFIC(myMarkedData);
  if(!rp)
    return true; //no data marked
  VrSampleRange& r=*rp;
#else
  VrSampleRange& r=myMarkedData;
  if(!myWriterLL)
    return true; //no data marked
#endif
  VrSampleRange *inputs=(VrSampleRange *) THREAD_GETSPECIFIC(inputs_forecasted);
  void *inputptrs[numberInputs];
  void *outputptrs[numberOutputs];
  bool complete_p = true;
  unsigned long complete=0; //units finished

  if(r.size == 0)	// the easy case
    return true;

  // First recursively compute our inputs...  If any of these fail to
  // complete, jump to the end and unmark the incomplete region
  
  if(numberInputs > 0 ) {
    for(unsigned int i=0;i<numberInputs;i++) {
      if(!getUpstreamModuleN(i)->compute())
	complete_p = false;
    }
    if (!complete_p)
      goto unmark_and_return;
      
    for(unsigned int i=0;i<numberInputs;i++) {
      while(!getUpstreamModuleN(i)->dataReady(inputs[i])) {
	//Some upstream data has been marked by another thread
	//  and is not computed yet.  wait for it
#ifdef PERFMON
	cycles->blockOnMarkedData++;
#endif
	//	YIELD();
      } 
      inputptrs[i]=(void *)inputConn[i]->getReadPointer(inputs[i]);
    }
  } 

#ifdef THREADS
  //fprintf(stderr,"%s.compute(%ld): %lld %d\n", name(), pthread_self(), beginWP, size);
#endif

  if (DEBUG_SIGP){
    printf("work[%s]: in [%lld:%ld] out[%lld:%ld] ",
	   name(), inputs[0].index, inputs[0].size, r.index, r.size);
    fflush (stdout);
  }

  for(unsigned int i = 0; i < numberOutputs; i++)
    outputptrs[i]=(void *)outBuffer[i]->getWritePointer(r);

  // Now, call work to compute our output data.

#ifdef PERFMON
  cycles->startCount();		// transfer running from overhead and back
#endif

  complete = work(r, outputptrs, inputs, inputptrs);

  if (DEBUG_SIGP)
    printf ("returns %ld\n", complete);

  if (PARANOID && complete > r.size) {
    fprintf(stderr,"Finished more work than asked for!\n");
    abort ();
  }

  if (complete < r.size) {
    // fprintf(stderr,"[%s] Warning: didn't finish all the work.\n", name());
    //couldn't finish all the work, reschedule

    if (PARANOID && (!uses_sync || markedWP != r.index+r.size)) {
      fprintf(stderr,"[%s] Non-serialized module didn't finish its work.\n", name());
      abort ();
    }
    complete_p = false;
  }

#ifdef PERFMON
  cycles->stopCount();
  cycles->updateSamples(complete);
#endif

 unmark_and_return:
  if(!complete_p) {
#if 0
    markedWP = r.index + complete;	// revise markedWP
#else
    writerLL *wLL = (writerLL *) THREAD_GETSPECIFIC(myWriterLL);
    MUTEX_LOCK(&mutex);
    assert (first == wLL);
    assert (markedWP == first->maxValid);
    markedWP = r.index + complete;		// revise markedWP
    first->maxValid = r.index + complete;	// revise maxValid which is looked at
						//   in detach_writer (hideous!)
    MUTEX_UNLOCK(&mutex);
#endif

#ifdef PERFMON
    cycles->uncompleteWork++;
#endif
  } 

  // We're no longer interested in these input ranges,
  // detach them from the reader list.  Detaching from the reader
  // list here is what eventually causes minRP to advance, although
  // it won't be noticed until the next time updateMinRP is called.

  if(numberInputs > 0 ) {
    readerLL *readerLLs = (readerLL *) THREAD_GETSPECIFIC(myReaderLLs);
    for(unsigned int i=0;i<numberInputs;i++) {
      if(readerLLs[i].index != (VrSampleIndex) -1) 
	inputConn[i]->detach_reader(&readerLLs[i]);
    }
  }	    

  // likewise, we're done writing this piece of the output.
  // Detaching updates WP (most of the time).
  
  writerLL *wLL = (writerLL *) THREAD_GETSPECIFIC(myWriterLL);
  detach_writer(wLL); //updates WP 
  r.size = 0; 

  return complete_p;
}

/*!
 * Connect our next input to PROC's Pth output.
 */
bool
VrSigProc::connect_proc(VrSigProc* proc, port n)
{
  VrConnect ** cs = new (VrConnect *[numberInputs+1]);
  VrBuffer *b;

  if(n >= proc->numberOutputs) {
    fprintf(stderr,"This SigProc does not have that many outputs.\n");
    return false;
  }

  if (itype_size != proc->type_size){
    fprintf (stderr, "CONNECT: i/o type size mismatch: %s(%d) -> %s(%d)\n",
	     proc->name (), proc->type_size, name(), itype_size);

    // harsh, but nobody checks the return value
    exit (1);
  }

  if(!proc->outBuffer[n])
    proc->initOutputBuffer(n);
  b = proc->outBuffer[n];

  cs[numberInputs] = new VrConnect((VrSigProc *) this, b);
  if (numberInputs > 0) {
    for(unsigned int i=0; i < numberInputs; i++)
      cs[i] = inputConn[i];
    delete inputConn;
  }
  inputConn = cs;
  numberInputs++;

  

  return true;
}

void
VrSigProc::initOutputBuffer(int n)
{
  outBuffer[n]=new VrBuffer(this);
}

void
VrSigProc::initMarkedData()
{
#ifdef THREADS
    if(pthread_key_create(&myMarkedData, NULL)) {
      fprintf(stderr,"VrBuffer: Could not create thread specific variable.\n");
      exit(-1);
    }
    if(pthread_key_create(&inputs_forecasted, NULL)) {
      fprintf(stderr,"VrBuffer: Could not create thread specific variable.\n");
      exit(-1);
    }
    if(pthread_key_create(&myWriterLL, NULL)) {
      fprintf(stderr,"VrBuffer: Could not create thread specific variable.\n");
      exit(-1);
    }
    if(pthread_key_create(&myReaderLLs, NULL)) {
      fprintf(stderr,"VrBuffer: Could not create thread specific variable.\n");
      exit(-1);
    }
    //these variables are initialized in markData()
#else
    myWriterLL=NULL; //variables are initialized in markData()
#endif

}

int
VrSigProc::setSamplingFrequency(double sf)
{
  if(sf<=0)
    return -1;

  if (checkOutputSamplingFrequency(sf) < 0) {
    fprintf(stderr,"Warning: %s couldn't set invalid samplingFrequency = %g\n",
	    name (), sf);
    return -1;
  }
  proc_samplingFrequency = sf;
  return 0;
}

int VrSigProc::forecast(VrSampleRange output, VrSampleRange inputs[]) {
  /* 1:1 ratio with no history */
  for(unsigned int i=0;i<numberInputs;i++) {
    inputs[i].index=output.index; /* ! do not subtract history ! */
    inputs[i].size=output.size; /* + history */
  }
  return 0;
}  

VrSigProc::VrSigProc(int outputs, unsigned int arg_itype_size, unsigned int arg_type_size)
  : uses_sync(0), proc_samplingFrequency(0.0), WP(0), markedWP(0), maxOutSize(0),
	inputConn(NULL), setupCalled(0), outputSize(1), type_size(arg_type_size),
    itype_size(arg_itype_size), initializeCalled(0), first(NULL), last(NULL),
    outBuffer(NULL), maxDSReadSize(0), numberInputs(0), numberOutputs(0)
{ 
#ifdef PERFMON
  num_print_stats=0;
  cycles = new VrCycleCount();
#endif

  MUTEX_INIT(&mutex);

  if (!outputs)		// For sinks
    outBuffer = NULL;
  else {
    outBuffer = new (VrBuffer *[outputs]);
    numberOutputs=outputs;
    for(int i=0;i<outputs;i++)
      outBuffer[i]=NULL;
  }
  initMarkedData();
}

VrSigProc::~VrSigProc()
{
#ifdef THREADS
  pthread_key_delete(myMarkedData);
  pthread_key_delete(inputs_forecasted);
  pthread_key_delete(myWriterLL);
  pthread_key_delete(myReaderLLs);
#endif
}

double VrSigProc::getInputSamplingFrequencyN(port p)
{
  return inputConn[p]->connect_getSamplingFrequency();
}

//setup proceeds in two phases:
// (1) init() phase: initialize() is called on all modules
//                   and sampling rates are pinned down
//                   (proceeds from top to bottom)
// (2) setup() phase: how much data modules will be reading
//                    is set and bufferSizes are computed
//                   (proceeds from bottom to top)

void VrSigProc::setup() { 
    unsigned int downstreamConnectors=0;
    for(unsigned int i=0;i<numberOutputs;i++)
      downstreamConnectors+=outBuffer[i]->getNumberConnectors();
    //FIX 
    if(++setupCalled>=downstreamConnectors) {
      //called from all downstream chains
      //continue setup upstream
      setup_upstream(); 
      //setup the output buffers
      for(unsigned int i=0;i<numberOutputs;i++)
        outBuffer[i]->setup(type_size);
    }
}

void VrSigProc::setup_upstream()
{
    //called once per setup of an entire tree

    /*** how big of blocks this module may be asked to
	 compute at one time*/
    unsigned int size=maxDSReadSize;

    size = (size/getOutputSize()) * getOutputSize(); //round down
    if(size == 0) size = getOutputSize();
    
    /*If our maxOutSize is changing propagate the setup upstream */
    if(size!=maxOutSize) {
      maxOutSize=size;
      fprintf(stderr,"%s.setup[%p] %d freq %g (%f sec)\n",
	      name(), this, maxOutSize, proc_samplingFrequency, maxOutSize/proc_samplingFrequency);
    
      for(unsigned int i=0;i<numberInputs;i++) {

	if(getUpstreamModuleN(i)->getSamplingFrequency()==0.0 ||
	   getSamplingFrequency()==0.0 ) {
	  fprintf(stderr,"SamplingFrequency can not be zero.\n");
	  exit(-1);
	} 
	unsigned int oSize = getUpstreamModuleN(i)->getOutputSize();
	unsigned int size_up = mapSizeUp(i,size); 
	ROUNDUP(size_up, oSize); 
	//add an extra block in case a read is not outputSize aligned
	size_up += oSize; 
	getUpstreamModuleN(i)->size_setup(size_up);
      }
    }
}  

//returns how big a block this module will need upstream (on input i)
//  given it has to produce size outputs
unsigned int VrSigProc::mapSizeUp(int i, unsigned int size)
{
    return (unsigned int) (size *
			   (getUpstreamModuleN(i)->getSamplingFrequency()
			    /getSamplingFrequency()));
}

void VrSigProc::size_setup(unsigned int size)
{ 
    if(size > maxDSReadSize)
      maxDSReadSize = size; 
    VrSigProc::setup();
}

bool
VrSigProc::isConnectedToSource()
{
  if(!inputConn)
    return false;
  for(unsigned int i=0;i<numberInputs;i++)
    if(!getUpstreamModuleN(i)->isConnectedToSource())
      return false;
  return true;
}

void VrSigProc::init_base()
{
      for(unsigned int i=0;i<numberInputs;i++) {
	getUpstreamModuleN(i)->init_base();
      }
      if(!initializeCalled) {
	pre_initialize ();	// set sampling freq
	initialize();		// other init dependent on sampling freq
	initializeCalled++;
      }
}

#ifdef PERFMON
  /*** Performance monitoring procedures ***/
long long VrSigProc::getTotalCycles()
{
  return cycles->getTotalCycles();
}

long long VrSigProc::getTotalCycles(int m)
{
  return cycles->getTotalCycles(m);
}

long VrSigProc::getTotalSamples()
{
  return cycles->getTotalSamples();
}
long long VrSigProc::getCyclesPerSample()
{
    if (getTotalSamples()) return getTotalCycles()/getTotalSamples();
    else return 0;
}
long long VrSigProc::getCyclesPerSample(int m)
{
    if (getTotalSamples()) return getTotalCycles(m)/getTotalSamples();
    else return 0;
}
void VrSigProc::addToGraph(VrPerfGraph *g)
{
    for(unsigned int i=0;i<numberInputs;i++) {
      getUpstreamModuleN(i)->addToGraph(g);
    }
    g->add(name(),this);
}
void VrSigProc::print_stats()
{
    //print only once
    if(num_print_stats++) return;
    //print upstream first
    for(unsigned int i=0;i<numberInputs;i++) {
      getUpstreamModuleN(i)->print_stats();
    }
    fprintf(stderr, "\n%s counts:\n",name());
    fprintf(stderr, "--------------\n");
    cycles->print_stats();
}
#endif

float VrSigProc::memoryTouched()
{
    float f=0;
    for(unsigned int i=0;i<numberInputs;i++) {
      float uf=getUpstreamModuleN(i)->memoryTouched();
      f+=uf+getUpstreamModuleN(i)->getSamplingFrequency()*averageInputUse(i)*itype_size;
    }
    return f; //+getSamplingFrequency()*type_size*numberOutputs;
}

VrSigProc *VrSigProc::getUpstreamModuleN(port p)
{
  return inputConn[p]->getUpstreamModule();
} 

/*! 
 * Call sync (arg_index) from work(...) to ensure the rest of the procedure runs
 *  sequentially...
 */
void VrSigProc::sync (VrSampleIndex arg_index) {
  uses_sync=1; 

  if (PARANOID && (WP > arg_index)) {
    fprintf(stderr, "out of order update to WP\n");
    abort ();
  }

  //wait for us to be first writer
  while(!is_synced(arg_index)) {
#ifdef THREADS
#ifdef PERFMON
    cycles->blockOnSync++;
#endif
    sched_yield();
#else
    fprintf(stderr, "Sync() waiting for another thread to write when we aren't using threads!\n");
    exit(-1);
#endif
  }
}

/*!
 * Initialize sampling frequency if not already set.
 * Derived class may override if necessary.
 */
void VrSigProc::pre_initialize ()
{
  if (getSamplingFrequency () == 0.0){
    if (itype_size == 0){		// source?
      fprintf (stderr,
	"Subclasses of VrSource must setSamplingFrequency explicitly in constructor or pre_initialize()\n");
      abort ();
    }

    assert (getNumberInputs () > 0);
    setSamplingFrequency (getInputSamplingFrequencyN (0));
  }
};

//! return true iff this VrSigProc is a sink

bool 
VrSigProc::isSink ()
{
  return numberOutputs == 0;
}
