/* ============================================================
 * File  : harvester.cpp
 * Author: Eric Giesselbach <ericgies@kabelfoon.nl>
 * Date  : 2004-12-18
 * Description : scans url for links (streams)
 *
 * Copyright 2003 by Eric Giesselbach

 * This program is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General
 * Public License as published bythe Free Software Foundation;
 * either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * ============================================================ */

/*
   needs work on
     exit signal messages
     http header info usage (slotReadyRead)
     
   new streamtuned v0.14: parsing of stream lists by perl scripts. 
*/

/*
   proxt support through http_proxy? Need this?
    http_proxy=http://username:password@your_proxy:port/
   http://lists.trolltech.com/qt-interest/2004-05/thread00280-0.html
*/

#include <iostream>

#include <qnetwork.h>
#include <qtimer.h>
#include <qstringlist.h>
#include <qdict.h>
#include <qregexp.h>
#include <qurl.h>
#include <qmutex.h>
#include <qapplication.h>
#include <qdatastream.h>
#include <qdom.h>

#include <sys/stat.h>

#include "harvester.h"


using namespace std;

QMutex mutex;

typedef int* pint;

/*
  Users can share the download directory:
  - assign all users to a common group (e.g. users)
  - create, set group, setgid a central downloads directory:
      cd /var
      mkdir downloads
      chgrp users downloads
      chmod 2775 downloads
  - if the directory contains files created before setting gid:
      chmod -R 0775 downloads
      chgrp -R users downloads
  - symlink the new directory as SUBPATH/.streamtuned/downloads
      cd $HOME/.streamtuned
      rm -Rf downloads (don't try this at home)
      ln -s /var/downloads .

  Note:
    1. upon selection of a item with STREAM_DL handler, wget will download the
       uri to the download dir.
    2. the downloading file will be renamed as [filename]_temp
    3. when download finishes the file will be renamed as filename
    4. the process that started wget is aware of the running download,
    5. other streamtuned processes will regard the [filename]_temp as a
       failed download and will start a new download upon selection. What
       happens next is specified in the wget documentation (I guess).
*/

Downloader::Downloader(QString directory)
{
   this->directory = directory;
   downloads.clear();
   umask(2);  // read/write rights for user and group
}

QString Downloader::download(QString url, bool& canplaynow, bool& error, QString& errorMessage)
// returns absolute path of downloading file
{
   QString path;
   Availability availability;
   bool pathFailed;

   error        = false;
   errorMessage = "";
   canplaynow   = false;
   
   path = checkAvailable(url, availability, pathFailed, true);

   // todo: when [filename]_temp file exists and no download on that file
   // is active, it should resume (wget -c). But Wget refuses to download if
   // server doesn't support resumes. Create workaround using wget feedback...
   //
   // For now: if "pending" check for running download and if none, ignore the _temp file.
   if ( availability == pending && downloads.find(url) == downloads.end() )
     availability = notavailable;
   
   canplaynow = ( availability != notavailable );
   if (pathFailed)
     errorMessage = TARGET" error: could not create file " + path;

   if (availability == notavailable && !pathFailed)
   {
        proc = new QProcess(this);
        
        proc->addArgument( "wget" );
        proc->addArgument( "-O" );
        proc->addArgument( path );  // pos 2, required slotDownloadStopped
        proc->addArgument( url );   // pos 3, required slotDownloadStopped

        connect(
           proc,
           SIGNAL( processExited() ),
           this,
           SLOT( slotDownloadStopped() )
        );
        
        error = !proc->launch("");

        if (error)
        {
          errorMessage = "Cannot create download process";
          delete proc;
        }
          else
            downloads.append(url);

   }

   return path;
}

//  download as path/filename_temp
//  rename to path/filename on succesful exit
//  resume download if path/filename_temp exists

void Downloader::slotDownloadStopped()
{
   bool error = true;
   QString errorMessage = "";
   
   QObject *obj = (QObject*)sender();

   if (obj)
   {
      QProcess *m_proc = (QProcess*)obj;
      
      QStringList args = m_proc->arguments();
      QString path = args[2];
      QString url  = args[3];
      
      if ( m_proc->normalExit() && m_proc->exitStatus() == 0 )  // download finished...
      {
         error = false;
         // rename filename_temp --> filename
         if ( path.right(5) == "_temp" )
         {
            QString newPath = path.left( path.length() - 5 );
            QDir dir(directory);
            dir.rename(path, newPath, true);
         }
      }  // else leave file for download resume/overwrite on next select
        else
          errorMessage = "Download failed (exit " + QString::number(m_proc->exitStatus()) + "): " + url;

      downloads.remove(url);
      delete m_proc;
      
      emit downloadFinished(url, error, errorMessage);
   }  
   
   //normalExit() // true if not aborted / crashed
   //exitStatus() // process return value
}

QString Downloader::checkAvailable( QString url, Availability& availability,
                                    bool& errorWhileCreating, bool createPathIfNotExists )
{
   QString path;
   errorWhileCreating = false;
   
   availability = notavailable;
   errorWhileCreating = false;
          
   QString urlPath = url.section( "://", 1, 1);  // strip protocol
   path = directory + "/" + urlPath;
   QFile file(path);

   if ( file.exists() )
   {
     availability = available;
     return path;
   }

   path    += "_temp";
   urlPath += "_temp";
   file.setName(path);

   if ( file.exists() )
   {
     availability = pending;
     return path;
   }

   if ( !createPathIfNotExists )
     return "";

   QStringList pathElements = QStringList::split( "/", urlPath );  // get directories and filename
   
   QDir dir;
   QString currentDir = directory;
   bool pathCreated = true;
   
   for ( uint i = 0; i < pathElements.size() - 1; i++ ) // assume last entry is file
   {
      currentDir += "/" + *pathElements.at(i);
      //cout << "create: " << currentDir << endl;
      dir = QDir( currentDir );

      if ( !dir.exists() )
        pathCreated = dir.mkdir(currentDir);
   }

   errorWhileCreating = !pathCreated;
   
   return path;
}

//----------------------------------------------------------------------------
// using cache is nice when working playlists, required when working rss/rdf feeds


CacheItem::CacheItem(QString lastdate, QString filename)
{
   this->filename = filename;
   this->lastdate = lastdate;
}

Cache::Cache()
{
  //typedef QDict<StreamParameter> History;
  // History history;
  fileIndex = 10000;
  cachePath = QString(getenv("HOME")) + "/."SUBPATH"/cache";
  
  history.setAutoDelete(true);
  history.resize(1000);

  loadCache();
}

Cache::~Cache()
{
  saveCache();
}

bool Cache::copyCacheFile(QString source, QString dest)
{
  QFile src(source);
  QFile dst(dest);

  if ( !src.open(IO_ReadOnly) )
  {
    cout << "cannot open " << source << endl;
    return false;
  }
  if ( !dst.open(IO_WriteOnly) )
  {
    cout << "cannot open " << dest << endl;
    return false;
  }

  
  QTextStream srcStream( &src );
  QTextStream dstStream( &dst );
  QString line = "";

  while ( !srcStream.atEnd() )
  {
     line = srcStream.readLine(); // line of text excluding '\n'
     dstStream << line << endl;
  }

  dst.close();
  src.close();

  return true;
}


bool Cache::getCacheItem(QString url, QString& lastdate, QString& filename)
{
  lastdate = "";
  filename = "";
  
  CacheItem* item = history[url];
  
  if ( item )
  {
    lastdate = item->lastdate;
    filename = item->filename;
    return true;
  }
  else
    return false;
}


QString Cache::createNewFile()
{
    QString filename;
    QFile file;
    bool result = false;
    int attempt = 0;
    
    do
    {
      fileIndex++;
      filename = cachePath + "/cache" + QString::number(fileIndex);

      file.setName( filename );
      if ( !file.exists() )
      {
         result = file.open(IO_WriteOnly);
         if ( file.isOpen() ) file.close();
         attempt++;
      }
    }
    while (!result && attempt < 5);

    if (result)
      return filename;
    else
      return "";
}


bool Cache::setCacheItem(QString url, QString lastdate, QString tempfilename)
{
  QString filename;
  CacheItem* item = history[url];
  
  if ( item )
  {
    item->lastdate = lastdate;
    copyCacheFile(tempfilename, item->filename);
  }
    else
  {
    //cout << "new cache file" << endl;
    filename = createNewFile();
    if (filename == "")
    {
       cerr << "cannot create cache file" << endl;
       return false;
    }
    
    copyCacheFile(tempfilename, filename);
    history.insert( url, new CacheItem(lastdate, filename) );
  }
  return true;
}

void Cache::loadCache()
{
    if ( !openCacheFile(false) )
      return;
    
    myFile.at(0);
    QTextStream stream( &myFile );

    history.clear();

    QString m_url = "";
    QString m_lastdate = "";
    QString m_filename = "";
    
    int stage = 0;
    QString line;

    while ( !stream.eof() )
    {
       line = stream.readLine();

       if (stage > 0) stage++;

       if ( line == "[item]" )
       {
         if (stage > 1) // data received
            history.insert( m_url, new CacheItem(m_lastdate, m_filename) );
         
         stage = 1;
       }

       // ignore empty lines, interpret [emptystring] as empty string.
       if ( line == "[emptystring]" ) line = "";
       
      
       if (line != "")
         switch (stage)
         {
           case 2: // url
             m_url = line;
           break;
           case 3: // lastdate
             m_lastdate = line;
           break;
           case 4: // filename
             m_filename = line;
           break;
           default:
           break;
         }
    }

    if ( stage > 1 )  // data received
      history.insert( m_url, new CacheItem(m_lastdate, m_filename) );

    closeCacheFile();
}

void Cache::saveCache()
{
    QString value;
    QTextStream stream( &myFile );
    CacheItem *item;

    //cout << "save cache" << endl;
        
    openCacheFile(true);
    
    QDictIterator<CacheItem>i(history);

    for (; i.current(); ++i )
    {
      //cout << "cache: " << i.currentKey() << endl;
      item = i.current();
      stream << endl;
      stream << "[item]" << endl;
      stream << i.currentKey() << endl;
      stream << item->lastdate << endl;
      stream << item->filename << endl;
    }

    closeCacheFile();
}

bool Cache::openCacheFile(bool overwrite)
{
   bool result = false;
   
   QString path = cachePath + "/cacheindex";
    
   myFile.setName( path );
   
   if (!overwrite)
     result = myFile.open(IO_ReadOnly);
   
   if (!result)
     result = myFile.open(IO_WriteOnly);

   if (!result)
     cerr << "cannot open file " << path << endl;
      
   return result;
}

bool Cache::closeCacheFile()
{
   if ( myFile.isOpen() )
   {
     myFile.close();
     return true;
   }
     else
       return false;
}


// from storagehandlers, rewrite and reuse that file using hash

//----------------------------------------------------------------------------

FetchBuffer::FetchBuffer(QString host, QString url, QString fileName, int requestId, QString& errorMessage)
{
    errorMessage = "";
    responseDate = "";
    urlEstimateCount = 0;
    
    this->requestId = requestId;
    this->url = url;
    this->host = host;
    this->fileName = fileName;
    
    file.setName(fileName);
    if ( !file.open(IO_WriteOnly) )
      errorMessage = "cannot open file " + fileName;
    else
      stream.setDevice( &file );
}


FetchBuffer::~FetchBuffer()
{
   closeBuffer();
}


void FetchBuffer::closeBuffer()
{
   if ( file.isOpen() )
   {
     
     file.close();
   }
}
     
void FetchBuffer::writeData(QString& text)
{
   if ( file.isOpen() )
     stream << text;
}


//----------------------------------------------------------------------------

CookieBin::CookieBin()
{

}

CookieBin::~CookieBin()
{

}

// "set-cookie"-line from response header
void CookieBin::updateCookie(QString host, QString newCookie)
{
    //cout << "process cookie: " << newCookie << " for host: " << host << endl;
    
    // For now: ignore domain (use host instead), expiration, secure flag
    QStringList newCookieParts = QStringList::split( ";", newCookie );
    newCookie = newCookieParts[0];

    //cout << "  new cookie: " << newCookie << endl;
    
    QString name  = newCookie.section( '=', 0, 0 ).stripWhiteSpace();
    QString value = newCookie.section( '=', 1, 1 ).stripWhiteSpace();

    newCookie = name + "=" + value;
    
    // first cookie for host
    if ( !hasCookies(host) )
    {  
      cookieMap.insert(host, newCookie);
      return;
    }
      
    QStringList cookieParts = QStringList::split( "; ", cookieMap[host] );

    uint i = 0;
    bool processed = false;
    
    while ( i < cookieParts.size() && !processed )
    {
      // cookie update
      if ( cookieParts[i].startsWith(name + "=") )
      {
        processed = true;
        
        if (value == "")
          cookieParts.remove( cookieParts[i] );
        else
          cookieParts[i] = newCookie;
      }
      
      i++;
    }

    if (!processed && value != "")
      cookieParts.append(newCookie);
      
    cookieMap[host] = cookieParts.join("; ");
    //cout << "COOKIES STORED: " << cookieMap[host] << endl;
}

// have cookie(s) for host?
bool CookieBin::hasCookies(QString host)
{
   return cookieMap.contains(host);
}  

// "cookie"-line to send with request header
QString CookieBin::getCookieHdrLine(QString host)
{
   if ( hasCookies(host) )
     return cookieMap[host];
   else
     return QString::null;
}

void CookieBin::setReferer(QString host, QString referer)
{
   this->referer = referer;
   refererHost = host;
   //cout << "setting referer: " << referer << " for host " << host << endl;
}

QString CookieBin::getReferer(QString host)
{
   //cout << "get referer for host: " << host << " while refererHost = " << refererHost << endl;
   if ( refererHost != host )
     return "";
   else
     return referer;
}

bool CookieBin::hasReferer(QString host)
{
   return (refererHost == host);
}

//----------------------------------------------------------------------------


Requester::Requester() : QHttpX()
{
  http = 0;
  cache = new Cache();

  http = new QHttpX();
  fetchBuffer = 0;
  
  connect(
           http,
           SIGNAL(requestFinished(int, bool)),
           this,
           SLOT(slotRequestFinished(int, bool))
         );

  connect(
           http,
           SIGNAL(dataReadProgress(int, int)),
           this,
           SLOT(slotDataReadProgress(int, int))
         );

  connect(
           http,
           SIGNAL(readyRead(const QHttpXResponseHeader&)),
           this,
           SLOT(slotReadyRead(const QHttpXResponseHeader&))
         );

  connect(
           http,
           SIGNAL( responseHeaderReceived(const QHttpXResponseHeader&) ),
           this,
           SLOT( slotResponseHeaderReceived(const QHttpXResponseHeader&) )
         );
}


Requester::~Requester()
{
  if (http) delete http;
  if (cache) delete cache;
  if (fetchBuffer) delete fetchBuffer;
}


bool Requester::fetchData(QString url, QString fileName, QString& errorMsg)
{

    this->fileName = fileName;
    
    status = abort;
    http->abort(); // Synchr. call to slotRequestFinished w. error set

    redirectCount = 0;
    

    QUrl *qurl = new QUrl( url );

    if (!qurl->isValid() || !qurl->hasHost() || qurl->protocol() != "http")
    {
      delete qurl;
      return false;
    }

    if (fetchBuffer)
    {
      delete fetchBuffer;
      fetchBuffer = 0;
    }
    
    hHost = qurl->host();
    port  = qurl->port();

    if (port != -1)
      hHost += ":" + QString::number(qurl->port());
    else
      port=80;

    hUrl = qurl->encodedPathAndQuery();
    host = qurl->host();
    delete(qurl);

    this->url = url;

    status = sethost;
    
    int ident = http->setHost(host, port);
        
    fetchBuffer = new FetchBuffer( host, url, fileName, ident, errorMsg);
        
    if (errorMsg != "")
    {
       delete fetchBuffer;
       fetchBuffer = 0;
       cout << "mythstream error: " << errorMsg << endl;
       return false;
    }
      
    return true;
}


void Requester::slotResponseHeaderReceived(const QHttpXResponseHeader& response)
{
    QString cachefilename, cachelastdate, temp;
    
   // check for cookies
    QString cookieData = response.value("set-cookie");
    
    if (cookieData)
    {
      //cout << "COOKIES: " << cookieData << endl;

      // put multiple cookies in rawlist
      QStringList cookieRawList = QStringList::split("&&COOKIESEP&&", cookieData);

      // feed every cookie to cookieBin
      for (uint i = 0; i < cookieRawList.size(); i++)
         cookieBin.updateCookie( host, cookieRawList[i] );
    }
   
   
    if ( response.statusCode() == 304 ) // Not Modified
    {
      //cout << "response: not modified" << endl;
      fetchBuffer->setResponseDate( "CACHE" );
      status = notmodified;
      http->abort();
    }

    // patch from Andreas Gerstlauer, handles redirects
    else if ( response.statusCode() >= 300 && response.statusCode() <= 400 ) // redirect
    {
      status = sethost;
      this->url = response.value("location");
      //cout << "redirected to " << this->url << endl;
           
      QUrl *qurl = new QUrl( url );

      status = abort;
      http->abort(); // Synchr. call to slotRequestFinished w. error set
      
      if (fetchBuffer)
      {
        delete fetchBuffer;
        fetchBuffer = 0;
      }

      if (!qurl->isValid() || redirectCount > 20) // no hashost, allow relative path
      {
        
        delete qurl;

        if (redirectCount > 20)
          temp = "too much redirections, giving up";
        else
          temp = "redirected to invalid URL";
        //cout << temp << endl;
        emit fetchReady(true, temp);

        return;
      }
              
    
    if (qurl->hasHost())
    {
      hHost = qurl->host();
      port  = qurl->port();
      host = qurl->host();

      if (port != -1)
        hHost += ":" + QString::number(qurl->port());
      else
        port=80;
    }
    
    hUrl = qurl->encodedPathAndQuery();
    delete(qurl);
    
    //cout << "port " << port << " host " << host << " hhost " << hHost << " hUrl " << hUrl << endl;

    status = sethost;

    redirectCount++;

    int ident = http->setHost(host, port);
    //fetchBuffer->setRequestId(ident);
        
    QString errorMsg;
    
    fetchBuffer = new FetchBuffer( host, url, fileName, ident, errorMsg);
        
    if (errorMsg != "")
    {
       delete fetchBuffer;
       status = abort;
       http->abort();
       fetchBuffer = 0;
       cout << "MythStream error: " << errorMsg << endl;
       return;
    }
      

    }
    
    if ( response.statusCode() == 200 && fetchBuffer ) // get reported date
    {
      //cout << "response: " << response.value("Date") << endl;
      fetchBuffer->setResponseDate( response.value("Date") );
    }
}

void Requester::slotDataReadProgress(int read, int size)
{
   if ( !fetchBuffer )
   {
      cerr << "no buffer!" << endl;
      return;
   }

   if ( read > 25000 && (fetchBuffer->urlEstimateCount == 0 ||
                         int(read/fetchBuffer->urlEstimateCount) > 50000 ) )
   {
     //cout << "ABORT fetch: no url's detected" << endl;
     http->abort();
     return;
   }
   
   emit fetchProgress(read, size);
}

void Requester::slotReadyRead(const QHttpXResponseHeader&)
{
    QString temp = QString( http->readAll() );
    //cout << "http data: " << temp << endl;

    
    if ( !fetchBuffer )
    {
       cerr << "no buffer to write to " << endl;
       return;
    }
    
    if ( fetchBuffer->isOpen() )
    {
      fetchBuffer->writeData(temp);
    }
    else
      cerr << "stream feed buffer not open " << endl;
    
   // --> prevent read of massiva amounts of binary data
   // count the number of complete url's in this chunk
   // add to total, do some math and decide to continue

    // check for protocol://server.domain
    int index = 0;
    QRegExp expr;
    expr.setPattern("\\w+://[\\w\\-]+\\.[\\w\\-]+"); // matchprobability in noise: a few 1E-6
    
    while ( index > -1)
    {
      index  = expr.search( temp, index );
      index += expr.matchedLength();
      if ( index > -1 ) fetchBuffer->urlEstimateCount++;
    }

    expr.setPattern("<a\\s+href"); // matchprobability in noise: dunno
    index = 0;
    
    while ( index > -1)
    {
      index  = expr.search( temp, index );
      index += expr.matchedLength();
      if ( index > -1 ) fetchBuffer->urlEstimateCount++;
    }

}

void Requester::slotRequestFinished(int id, bool error)
{
    QString errorMsg;
    QString cachefilename, cachelastdate, temp;

    if ( status == abort || status == notmodified ) // our abort
      error = false;
    
    if ( http->error() != QHttpX::NoError && error ) // real http error
    {
      errorMsg = http->errorString();
    }
  
    switch ( status )
    {


      case sethost:  // host lookup finished
            if (!error && fetchBuffer)
            {
              status = getdata;
              cache->getCacheItem(url, cachelastdate, cachefilename);
              //cout << "last fetch for " << url << ": " << cachelastdate << endl;
              QHttpXRequestHeader header( "GET", hUrl );              

              if ( cookieBin.hasCookies(host) )
              {
                //header.setValue( "Cookie", cookieBin.getCookieHdrLine(host) + "; ns_cookietest=true; ns_session=true");
                header.setValue( "Cookie", cookieBin.getCookieHdrLine(host));
                //cout << "sending cookie: " << cookieBin.getCookieHdrLine(host) << endl;
              }
                
              if ( cookieBin.hasReferer(host) )
              {
                header.setValue( "Referer", cookieBin.getReferer(host) );
                //cout << "sending referer: " << cookieBin.getReferer(host) << endl;
              }

              cookieBin.setReferer(host, hHost + hUrl);

              if (cachelastdate != "")
                header.setValue( "If-Modified-Since", cachelastdate );

// Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)

              header.setValue( "User-Agent", "Mozilla/5.0 (compatible; MythStream-v"VERSION")" );
              
              header.setValue( "Host", hHost );

              //header.setValue( "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.4) Gecko/20060406 Firefox/1.5.0.4 (Debian-1.5.dfsg+1.5.0.4-1)");

              //header.setValue( "Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");

              //header.setValue( "Accept-Language", "en-us,en;q=0.5");
              //header.setValue( "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7");
              //header.setValue( "Accept-Encoding", "gzip,deflate");

              //header.setValue( "Keep-Alive", "300");
              //header.setValue( "Connection", "keep-alive");
              
              //cout << "fetching url: " << hUrl << endl;
              fetchBuffer->setRequestId( http->request( header ) );
            }
      break;
  
      case getdata:
            if ( !error )
            {
              if ( fetchBuffer && fetchBuffer->getRequestId()==id && fetchBuffer->isOpen() )
              {
                //cout << "go stream feed buffer" << endl;
                if ( http->bytesAvailable() )
                {
                  temp = QString( http->readAll() );
                //cout << "got: " << temp << endl;
                  fetchBuffer->writeData(temp);
                }

                fetchBuffer->closeBuffer();
                if ( fetchBuffer->getResponseDate() == "CACHE" )
                {
                    cache->getCacheItem(url, cachelastdate, cachefilename);
                    cache->copyCacheFile( cachefilename, fetchBuffer->getFileName() );
                }
                    else
                      cache->setCacheItem( fetchBuffer->getUrl(), fetchBuffer->getResponseDate(),
                                          fetchBuffer->getFileName() );
              
                delete fetchBuffer;
                fetchBuffer = 0;
              }
                else
                    cerr << "no stream feed buffer" << endl;
                            
              emit fetchReady(false, errorMsg);
            }
      break;
  
      case notmodified:
            //cout << "using cached page" << endl;
            if ( !error )
            {
              if ( fetchBuffer && fetchBuffer->getRequestId()==id && fetchBuffer->isOpen() )
              {
                if ( fetchBuffer->getResponseDate() == "CACHE" )
                {
                    cache->getCacheItem(url, cachelastdate, cachefilename);
                    cache->copyCacheFile( cachefilename, fetchBuffer->getFileName() );
                }
                
                delete fetchBuffer;
                fetchBuffer = 0;
              }
                else
                    cerr << "no stream feed buffer" << endl;
                            
              emit fetchReady(false, errorMsg);
            }

      break;
      
      case abort:
              error = true;
              errorMsg = "fetch aborted";
      break;
  
      default:
      break;
    }
  
    if (error)
    {
      if ( fetchBuffer && fetchBuffer->getRequestId() == id )
      {
        delete fetchBuffer;
        fetchBuffer = 0;
      }
      emit fetchReady(true, errorMsg);
    }
}

void Requester::stop()
{
   http->abort();
}

//----------------------------------------------------------------------------

NewUrl::NewUrl(QString nUrl, QString nName, QString nDescr, QString nHandler)
{
    url   = nUrl;
    name  = nName;
    descr = nDescr;
    handler = nHandler;
    proto   = "";
    port    = -1;
    file    = "";
    context = "";
    tag     = 0;
}

StreamHarvester::StreamHarvester()
{
    currentUrl   = "";
    currentName  = "";
    currentDescr = "";
    currentHandler = "";

    defaultParser = "";
    
    currentTag   = 0;

    history.setAutoDelete(true);
    history.clear();

    urlList.setAutoDelete(true);
    urlList.clear();
    rootTag = 0;

    proc = NULL;
    eParser = NULL;
    
    busy    = false;
    pending = false;
    aborted = false;

    QString home = getenv("HOME");
    parsersPath = home + "/."SUBPATH"/parsers/";
    parsersPath_install = QString(PREFIX"/share/mythtv/mythstream/parsers/");
    
    //cout << "parsers path: " << parsersPath << endl;
    
    requester = new Requester();
    connect ( requester, SIGNAL(fetchReady(bool, QString&)), this, SLOT(slotfetchReady(bool, QString&)) );
    connect ( requester, SIGNAL(fetchProgress(int, int)), this, SLOT(slotFetchProgress(int, int) ) );
}

StreamHarvester::~StreamHarvester()
{
   delete requester;
   if (eParser)
   {
      externalParserTerminate();
      delete(eParser);
   }
}


// ****************************** public ******************************

void StreamHarvester::fetchData(QString& url, QString& name, QString& descr, QString& handler)
{
    //cout << "fetch call" << endl;
    aborted = true;
    pending = true;

    currentName  = name;
    currentUrl   = url;
    currentDescr = descr;
    currentHandler = handler;
    
    currentTag   = 0;

    if (busy)
      stopFetch();
    else
      startFetch();
}

void StreamHarvester::parseData(QString& data, QString& name, QString& descr, QString& handler)
{
    if ( busy ) return;

    aborted = false;
    pending = false;
    busy    = true;

    currentName  = name;
    currentUrl   = "";
    currentDescr = descr;
    currentHandler = handler;
    
    currentTag   = 0;

    // skip data retrieval process
    this->data = data;
    processExited();
}

void StreamHarvester::stop()
{
   requester->stop();
   externalParserTerminate();
}

bool StreamHarvester::isBusy()
{
    return busy;
}

QString& StreamHarvester::getCurrentUrl()
{
   return currentUrl;
}

QString& StreamHarvester::getCurrentName()
{
   return currentName;
}

QString& StreamHarvester::getCurrentDescr()
{
   return currentDescr;
}

QString& StreamHarvester::getCurrentHandler()
{
   return currentHandler;
}

int StreamHarvester::getCurrentTag()
{
   return currentTag;
}

bool StreamHarvester::goBack()
{
   mutex.lock();

   history.removeLast();
   NewUrl *url = history.last();

    if ( url )
    {
      currentUrl   = url->url;
      currentName  = url->name;
      currentDescr = url->descr;
      currentHandler = url->handler;
      
      history.removeLast();

      mutex.unlock();

      fetchData(currentUrl, currentName, currentDescr, currentHandler);
      currentTag = url->tag;
      return true;
    }
      else
    {
      mutex.unlock();
      return false;
    }
}

void StreamHarvester::setTagOnCurrent(int tag)
{
   mutex.lock();

   NewUrl *url = history.last();
   if ( url )
     url->tag = tag;
   else
     rootTag = tag;

   mutex.unlock();
}

// ****************************** private ******************************

void StreamHarvester::startFetch()
{
    data = "";
    QString errorMessage = "";
    //slotfetchReady(bool, QString&)
    if ( currentHandler[0] == '*') // standalone parser, do not fetch with requester
    {
      //cout << "standalone parser " << currentHandler << endl;
      pending = false;
      aborted = false;
      processExited();
      return;
    }
    
    // currentHandler
    if ( requester->fetchData(currentUrl, parsersPath + "list.xml", errorMessage) )
    {
      busy = true;
      emit fetchStatus(receiving, 0);
    }
      else
        emit fetchStatus(errorset, invalidurl);
}


void StreamHarvester::stopFetch()
{
   //cout << "stop fetch" << endl;
   externalParserTerminate();
   startFetch();
}


// ************ external process callback **************

QString StreamHarvester::checkHasParser(QString handler)
{
   
    QString parserName = handler;

    if ( parserName[0] == '*') parserName.remove(0,1); //change: remove standalone parser token *

    QFileInfo fileInfo( parsersPath + parserName );

    if ( fileInfo.isFile() && fileInfo.isReadable() )
    {
        parserName = parsersPath + parserName;
        return parserName;
    }

    QFileInfo ifileInfo( parsersPath_install + parserName );

    if ( ifileInfo.isFile() && ifileInfo.isReadable() )
    {
        parserName = parsersPath_install + parserName;
        return parserName;
    }

    cerr << "external parser not found: " << parserName << endl;

    // fallback to default
    if ( handler != defaultParser )
    {
      cerr << "falling back to " << defaultParser << endl;
      parserName = checkHasParser( defaultParser );
    }

    return parserName;
}

void StreamHarvester::getParser(QString &parser, QString &path)
{
    QString parserName = parser;

    if ( parserName[0] == '*') parserName.remove(0,1); //change: remove standalone parser token *

    QFileInfo fileInfo( parsersPath + parserName );

    if ( fileInfo.isFile() && fileInfo.isReadable() )
    {
        parserName = parsersPath + parserName;
        parser = parserName;
        path = parsersPath;
        return;
    }

    QFileInfo ifileInfo( parsersPath_install + parserName );

    if ( ifileInfo.isFile() && ifileInfo.isReadable() )
    {
        parserName = parsersPath_install + parserName;
        parser = parserName;
        path = parsersPath_install;
        return;
    }

    cerr << "external parser not found: " << parserName << endl;

    // fallback to default
    if ( parserName != defaultParser )
    {
      cerr << "falling back to " << defaultParser << endl;
      parser = defaultParser;
      getParser( parser, path );
    }

    return;
}


void StreamHarvester::processExited()
{
   delete proc;
   proc = NULL;

   // user posted new fetch request
   if (pending)
   {
     startFetch();
     return;
   }

   // no new fetch, but stopFetch still called
   if (aborted)
   {
     aborted = false;
     busy    = false;
     return;
   }
   
   externalParserStart(currentUrl, currentHandler);
}

void StreamHarvester::slotFetchProgress(int read, int) // data read by Requester
{
   read = read / 1024; // assuming read in bytes
   emit fetchStatus(receiving, read);
}

void StreamHarvester::slotfetchReady(bool error, QString& data)
{
   this->data = data;
   pending = false;
   aborted = false;

   lastHost = requester->hHost;
   
   if (!error)
     processExited();
   else
   {
     busy = false;
     // leaving data (error message) unused
     if (data == "fetch stopped") // if due to pending fetch the resulting message will be overwritten
       emit fetchStatus(errorset, nourlindata);
     else
       emit fetchStatus(errorset, nodata);
   }
}

void StreamHarvester::customEvent(QCustomEvent *ev)
{
    if ( ev->type() == QEvent::User + StreamHarvester::ready )
    {
       int *cnt = (int*)ev->data();
       parserExited(*cnt);
       delete cnt;
    }

    if ( ev->type() == QEvent::User + StreamHarvester::parsing )
    {
       int *cnt = (int*)ev->data();
       emit fetchStatus(parsing, *cnt);
       delete cnt;
    }
}

void StreamHarvester::parserExited(int matchCount)
{
   // parser ready (and not aborted)
   // not threadsafe
   
   mutex.lock();

   if ( matchCount > 1 )
   {
     history.append(
       new NewUrl( currentUrl,
                   currentName,
                   currentDescr,
                   currentHandler ) );

     if (history.count() > 50)
       history.removeFirst();
   }

   busy = false;

   mutex.unlock();

   emit fetchStatus(ready, matchCount);
}

// outsourcing the parser factory...

void StreamHarvester::externalParserStart(QString srcUrl, QString parser)
{
    if ( eParser ) return;

#if PARSER == PERL
    // mythstream default
    QString ext = ".pl";
    defaultParser = "default.pl";
#elif PARSER == EXEC
    QString ext = "";
    defaultParser = "default";
#elif PARSER == SHELL
    QString ext = ".sh";
    defaultParser = "default.sh";
#endif

    QString ppath = parsersPath;
    //cout << "default parser: " << defaultParser << " advised parser: " << parser << ext << endl;
    
    // empty parser field is implicit configuration of default parser
    if (parser == "") parser = "default";

    parser += ext;
    
    getParser(parser, ppath);
    
    cout << "resulting parser: " << parser << " workdir: " << ppath << endl;

    sourceUrl = srcUrl;
    
    urlCount = 0;
    eParserUrl = NULL;
    urlList.clear();
    parserOutput = "";
    
    eParser = new QProcess( this );  
    eParser->setWorkingDirectory(QDir(parsersPath));
    eParser->setCommunication( QProcess::Stdin |
                            QProcess::Stdout
    //                        |QProcess::Stderr
    //                        |QProcess::DupStderr
                            );
                            
 
#if PARSER == PERL
    // execute perl script
    eParser->addArgument( "perl" );
    eParser->addArgument( parser );
    eParser->addArgument( "list.xml" );
    eParser->addArgument( sourceUrl );  // change
    //cout << "PERL PARSER" << endl;

#elif PARSER == EXEC
    // executable binary
    eParser->addArgument( parser );
    eParser->addArgument( "list.xml" );
    eParser->addArgument( sourceUrl );  // change
    //cout << "EXEC PARSER" << endl;

#elif PARSER == SHELL

    // execute any executable
    //
    // requires parser files to be executable!
    // parser and sourceUrl are external
    // trust parser: assume the perl script defines that value
    // do not trust sourceUrl provided by webserver
    //    sourceUrl is quoted, cannot contain quotes
    if ( sourceUrl.find('"') != -1 )
    {
      cerr << "MythStream: insecure sourceURl: " << sourceUrl << endl;
      sourceUrl = "";
    }
    
    eParser->addArgument( "sh" );
    eParser->addArgument( "-c" );
    eParser->addArgument( parser + " list.xml \"" + sourceUrl + "\"" );
    //cout << "SHELL PARSER" << endl;
    
#endif

    pending = false;

    connect( eParser, SIGNAL(readyReadStdout()),
            this, SLOT(externalParserRead()) );
    connect( eParser, SIGNAL(processExited()),
            this, SLOT(externalParserExited()) );

    
    if ( !eParser->start() )
      fprintf( stderr, "error starting parser " + parser + "\n" );
    
    baseUrl = sourceUrl;
    int lastSlash = baseUrl.findRev("/", -1);
    int lastDot   = baseUrl.findRev(".", -1);
    if ( 
         lastSlash > 0 && 
         lastSlash < lastDot &&
         baseUrl.mid(lastSlash - 1, 1) != "/"
       )
         baseUrl = baseUrl.left(lastSlash);
}

void StreamHarvester::externalParserTerminate()
{
   if ( eParser && eParser->isRunning() )
   {
      eParser->tryTerminate();
   }
}


void StreamHarvester::send(QString type, int cnt)
{
   int eventType = 0;
   if (type == "result"   ) eventType = QEvent::User + StreamHarvester::ready;
   if (type == "linkcount") eventType = QEvent::User + StreamHarvester::parsing;

   QCustomEvent *ev = new QCustomEvent(eventType);
   int *i = new int(cnt);
   ev->setData(i);
   qApp->postEvent(this, ev);
}

void StreamHarvester::storeParserUrl()
{
        if ( eParserUrl && eParserUrl->url != sourceUrl )
        {
          if (eParserUrl->name == "") eParserUrl->name = eParserUrl->url;
          urlList.append( eParserUrl );
          urlCount++;
          eParserUrl = NULL;
        }
          else 
        {
          delete(eParserUrl);
          eParserUrl = NULL;
        }
}


bool StreamHarvester::externalParserLoadUrl(QString& xmlData)
{ 
    int     port;
    QString protocol, file, context;
    QUrl    *qurl;
    
    QDomDocument doc;    
    
    if (!doc.setContent(xmlData, false) ) 
    {
       cerr << TARGET": could not read parser output: " << endl;
       cerr << "-------------------------------------" << endl;
       cerr << xmlData << endl;
       cerr << "-------------------------------------" << endl;
       
       return false;
    }
    
    QDomElement docElem = doc.documentElement();
    QDomNode item = docElem.firstChild();
    QDomNode nName, nUrl, nDescr, nHandl, nMeta;
    QDomNodeList lMeta;
    QString name, url, descr, handl;
    QDomElement eMeta;
    QString metaName, metaContent, metaViewer;
    
    while( !item.isNull() ) 
    {
        QDomElement e = item.toElement();
        
        if ( e.isNull() || e.tagName() != "item" )
        {
          cerr << TARGET": invalid parser output" << endl;
          return false;
        }
       
        nName  = e.elementsByTagName("name" ).item(0);
        nUrl   = e.elementsByTagName("url"  ).item(0);
        nDescr = e.elementsByTagName("descr").item(0);
        nHandl = e.elementsByTagName("handler").item(0);
        lMeta  = e.elementsByTagName("meta");
        
        if ( !nName.isNull() && !nUrl.isNull() )  // required nodes
        {           
              name = nName.toElement().text();
              url  = nUrl.toElement().text();
              
              if ( nDescr.isNull() )  // description (single line)
                descr = "";
              else
                descr = nDescr.toElement().text();
              
              if ( nHandl.isNull() )  // handler (how to handle this item)
                handl = "";
              else
                handl = nHandl.toElement().text();
              
              eParserUrl = new NewUrl(url, name, descr, handl);
           
              context = "absolute";
              // check if relative
              if ( url.contains("://") == 0 )  // relative url
              {
                if ( url.find('/') == 0 )  //absolute path
                {
                   url = "http://" + lastHost + url;
                }
                  else
                {
                  context = "relative";
                  url = baseUrl + "/" + url;
                  url = url.replace( QRegExp("[^/]+/\\.\\./"), "" ); // test ../../ recursive?
                  url = url.replace( QRegExp("\\./"), "" );
                  url = url.replace( "///", "/" );
                }
                
              }
      
              // get url properties
              qurl = new QUrl( url );
              protocol = qurl->protocol();
              port = qurl->port();
              file = qurl->fileName() + qurl->query();
      
              // block
              // quick hack, block should be transported to streambrowser.cpp ...
              if ( 
                  file.contains(".rm")  > 0 ||
                  file.contains(".ra")  > 0 ||
                  file.contains(".wav") > 0 ||
                  file.contains(".ram") > 0 ||
                  file.contains(".asx") > 0 ||
                  file.contains(".avi") > 0 ||
                  file.contains(".mpg") > 0 ||
                  file.contains(".pls") > 0 ||
                  file.contains(".mp3") > 0 ||
                  file.contains(".ogg") > 0 ||
                  file.contains(".wmf") > 0 ||
                  file.contains(".smi") > 0 ||
                  file.contains(".m3u") > 0 
                 )
                file = "stream";
              else
                file = "other";
      
              delete ( qurl );
                
              eParserUrl->proto   = protocol;
              eParserUrl->file    = file;
              eParserUrl->context = context;
              eParserUrl->port    = port;
              eParserUrl->url     = url;
              eParserUrl->handler = handl;
              //cout << "found name: " << name << ", handler: " << handl << endl;
              // process meta information
              for (uint i = 0; i<lMeta.count(); i++)
              {
                nMeta = lMeta.item(i);
                metaName = "";
                
                if ( !nMeta.isNull() )
                {
                   eMeta = nMeta.toElement(); 
                   if ( !eMeta.isNull() )
                   {
                     metaName    = eMeta.elementsByTagName("name"    ).item(0).toElement().text(); // so what :)
                     metaContent = eMeta.elementsByTagName("content" ).item(0).toElement().text();
                     metaViewer  = eMeta.elementsByTagName("viewer"  ).item(0).toElement().text();
                   }
                } 
                
                if ( metaName != "" ) 
                  eParserUrl->meta.append(metaName + "(stsep)" + metaViewer + "(stsep)" + metaContent);
              }
              
              storeParserUrl();
        }
                  
        item = item.nextSibling();
    }

    return true;
}        

void StreamHarvester::externalParserRead()
{
   QString temp = QString::null;
   do 
   {
     if (temp != QString::null) 
     { 
        parserOutput += temp;
        //cout << temp << endl;
     }
     temp = eParser->readLineStdout();
   }
     while (temp != QString::null);     
}


void StreamHarvester::externalParserExited()
{
   bool success = externalParserLoadUrl(parserOutput);
   if (eParser) delete eParser;
   eParserUrl = NULL;
   eParser = NULL;
   //cout << "tot urls: " << urlCount << endl;
   if (success)
     send("result", urlCount);
   else
     emit fetchStatus(errorset, bogusparseroutput);

}


