/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include <U2Core/LoadDocumentTask.h>
#include <U2Algorithm/FindAlgorithmTask.h>
#include <U2Core/DocumentModel.h>
#include <U2Core/AppContext.h>
#include <U2Core/DNATranslation.h>
#include <U2Core/DNASequenceObject.h>
#include <U2Core/DocumentUtils.h>
#include <U2Core/IOAdapter.h>
#include <U2Algorithm/SArrayIndex.h>
#include <U2Algorithm/SArrayBasedFindTask.h>
#include <U2Core/TextUtils.h>
#include <U2Core/Counter.h>
#include <U2Core/AppSettings.h>
#include <U2Core/AppResources.h>
#include <U2Gui/Notification.h>
#include "GenomeAlignerFindTask.h"

#include "GenomeAlignerTask.h"

namespace U2 {

const QString GenomeAlignerTask::taskName(tr("UGENE genome aligner"));
const QString GenomeAlignerTask::OPTION_ALIGN_REVERSED("align_reversed");
const QString GenomeAlignerTask::OPTION_IF_ABS_MISMATCHES("if_absolute_mismatches_value");
const QString GenomeAlignerTask::OPTION_MISMATCHES("mismatches_allowed");
const QString GenomeAlignerTask::OPTION_PERCENTAGE_MISMATCHES("mismatches_percentage_allowed");
const QString GenomeAlignerTask::OPTION_MAX_BUNCH_SIZE("max_bunch_size");
const QString GenomeAlignerTask::OPTION_PREBUILT_INDEX("if_prebuilt_index");
const QString GenomeAlignerTask::OPTION_INDEX_URL("path_to_the_index_file");
const QString GenomeAlignerTask::OPTION_IF_ONLY_FIRST_MATCH("stop_aligning_on_first_match");
const QString GenomeAlignerTask::INDEX_EXTENSION("idx");
const QString GenomeAlignerTask::OPTION_QUAL_THRESHOLD("quality_threshold");

GenomeAlignerTask::GenomeAlignerTask( const DnaAssemblyToRefTaskSettings& settings, bool _justBuildIndex )
: DnaAssemblyToReferenceTask(settings, TaskFlags_FOSCOE, _justBuildIndex),loadRefTask(NULL),createIndexTask(NULL), refSeqObj(NULL), 
  seqWriter(settings.resultFileName, settings.refSeqUrl.baseFileName()), windowSize(0), bunchSize(0),
  justBuildIndex(_justBuildIndex)
{
    GCOUNTER(cvar,tvar, "GenomeAlignerTask");  
    // TODO: check every time we load
    transl = AppContext::getDNATranslationRegistry()->lookupTranslation(BaseDNATranslationIds::NUCL_DNA_DEFAULT_COMPLEMENT);
    int nThreads = AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();
    setMaxParallelSubtasks(nThreads);

}

void GenomeAlignerTask::prepare()
{
    if (!justBuildIndex) {
        bool init = seqReader.init(settings.shortReadUrls);

        if (!init) {
            setError(tr("Can not init short reads loader. %1").arg(seqReader.getErrorMessage()));
            return;
        }
    }
    
    QList<DocumentFormat*> detectedFormats = DocumentUtils::detectFormat(settings.refSeqUrl);    
    if (!detectedFormats.isEmpty()) {
        IOAdapterFactory* factory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
        DocumentFormat* format = detectedFormats.first();
        loadRefTask = new LoadDocumentTask(format->getFormatId(), settings.refSeqUrl, factory);
        addSubTask(loadRefTask);
    } else {
        setError(QString("Unknown format: %1").arg(settings.refSeqUrl.fileName()));
    }
    
    alignReversed = settings.getCustomValue(OPTION_ALIGN_REVERSED, true).toBool();
    absMismatches = settings.getCustomValue(OPTION_IF_ABS_MISMATCHES, true).toBool();
    nMismatches = settings.getCustomValue(OPTION_MISMATCHES, 0).toInt();
    ptMismatches = settings.getCustomValue(OPTION_PERCENTAGE_MISMATCHES, 0).toInt();
    maxBunchSize = settings.getCustomValue(OPTION_MAX_BUNCH_SIZE, 5000).toInt();
    prebuiltIdx = settings.getCustomValue(OPTION_PREBUILT_INDEX, false).toBool();
    qualityThreshold = settings.getCustomValue(OPTION_QUAL_THRESHOLD, 0).toInt();
    onlyFirstMatch = settings.getCustomValue(OPTION_IF_ONLY_FIRST_MATCH, false).toBool();

    if (justBuildIndex) {
        indexFileName = settings.resultFileName.getURLString();
    } else {
        indexFileName = settings.getCustomValue(OPTION_INDEX_URL, "").toString();
    }

    //TODO: make correct code for common option "indexFileName"
    if (!settings.indexFileName.isEmpty()) {
        indexFileName = settings.indexFileName;
    }

    calculateWindowSize();
}

QList<Task*> GenomeAlignerTask::onSubTaskFinished( Task* subTask )
{
    QList<Task*> subTasks;
    if (hasErrors() || isCanceled()) {
        return subTasks;
    }

    if (justBuildIndex) {
        return onJustBuildSubTaskFinished(subTask);
    }

    if (subTask == loadRefTask) {
        Document* doc = loadRefTask->getDocument();
        QList<GObject*> seqObjects = doc->findGObjectByType(GObjectTypes::SEQUENCE);

        assert(seqObjects.count() == 1);
        if (seqObjects.count() == 0) {
            setError(QString("Ref sequence is not found in %1").arg(settings.refSeqUrl.fileName()) );
            return subTasks;
        }
        refSeqObj = qobject_cast<DNASequenceObject*>( seqObjects.first() );
        seqWriter.setRefSeqLength(refSeqObj->getSequenceLen());
        seqWriter.setRefSeqName(refSeqObj->getGObjectName());
    } else if ( isOneOfFindSubtasks(subTask)) {
        --bunchSize;
    }

    if (refSeqObj == NULL) {
        return subTasks;
    }

    if (bunchSize == 0 ) {
        writeResults();
        // Read next bunch of sequences
        readShortReadsBunch();
        
        if (shortReads.count() == 0) {
            // no more reads to align
            return subTasks;
        }
        
        initFindSubtasks = false;
        if ( createIndexTask == NULL ||  windowSize < createIndexTask->getPrefixSize() ) {
            setupCreateIndexTask();
            subTasks.append(createIndexTask);
        } else {
            initFindSubtasks = true;
        }
    } 
    
    if ( createIndexTask == subTask || initFindSubtasks == true ) {
        assert(createIndexTask != NULL);
        initFindSubtasks = false;
        SArrayIndex* index = createIndexTask->index;
        foreach(const DNASequence& seq, shortReads) {
            SArrayBasedSearchSettings s;
            s.query = seq.seq;
            s.useBitMask = false;
            s.absMismatches = absMismatches;
            s.nMismatches = nMismatches;
            s.ptMismatches = ptMismatches;
            s.unknownChar = createIndexTask->getUnknownChar();
            GenomeAlignerFindTask *findTask = new GenomeAlignerFindTask(index, s, onlyFirstMatch);
            findTasks.append(findTask);           
            subTasks.append(findTask);
        }
    } 

    return subTasks;
}


static bool isDnaQualityAboveThreshold(const DNASequence& dna, int threshold) {
    assert(!dna.quality.isEmpty());
    for (int i = 0; i < dna.length(); ++i) {
        int qValue = dna.quality.getValue(i);
        if (qValue < threshold) {
            return false;
        }
    }

    return true;
}


void GenomeAlignerTask::readShortReadsBunch()
{
    shortReads.clear();
    foreach( Task* fTask, findTasks) {
        fTask->cleanup();
    }
    findTasks.clear();
    while(seqReader.hasNext() && bunchSize < maxBunchSize) {
        const DNASequence& seq = seqReader.getNextSequenceObject() ->getDNASequence();
        if (MIN_SHORT_READ_LENGTH > seq.length()) {
            continue;
        }
        if ( qualityThreshold > 0 && seq.hasQualityScores() ) {
            // simple quality filtering
            bool ok = isDnaQualityAboveThreshold(seq, qualityThreshold);
            if (!ok) {
                continue;
            }
        }
        shortReads.append(seq);
        ++bunchSize;
    }

    if (bunchSize == 0) {
        return;
    }

    if (alignReversed) {
        foreach (const DNASequence& s, shortReads) {
            QByteArray reversed(s.seq);
            TextUtils::reverse(reversed.data(), reversed.count());
            DNASequence rDna(QString("%1 rev").arg(s.getName()), reversed, NULL);
            transl->translate(rDna.seq.data(), rDna.length());
            if (rDna.seq != s.seq) {
                shortReads.append(rDna);
                ++bunchSize;
            }
        }

    }
}


void GenomeAlignerTask::writeResults()
{
    int count = findTasks.count();
    for (int i = 0; i < count; ++i) {
        GenomeAlignerFindTask *findTask = findTasks.at(i);
        QList<int> findResults = findTask->getResults();
        const DNASequence& seq = shortReads.at(i); 
        foreach (int offset, findResults) {
            seqWriter.writeNextAlignedRead(offset - 1, seq);
        }
    }
}


void GenomeAlignerTask::setupCreateIndexTask()
{
    //what did it be?
    /*if (!justBuildIndex) {
        LRegion refRegion = refSeqObj->getSequenceRange();
        if (refRegion.len < (int)windowSize) {
            // wtf?
            windowSize = refRegion.len;
        }
    }*/

    calculateWindowSize();
    QString refFileName = settings.refSeqUrl.fileName();
    createIndexTask = new CreateSArrayIndexTask(refSeqObj, windowSize, true, prebuiltIdx, indexFileName, refFileName);
}



Task::ReportResult GenomeAlignerTask::report()
{
    TaskTimeInfo inf=getTimeInfo();
    int time=inf.finishTime-inf.finishTime; Q_UNUSED(time); // TODO: remove it?
    if (hasErrors()) {
        return ReportResult_Finished;
    }

    if (justBuildIndex) {
        QAction *action = new QAction("action", NULL);
        QString rep = QString("Suffix array index for %1 was built")
        .arg(settings.refSeqUrl.fileName());

        Notification *t = new Notification(rep, Report_Not, action);
        NotificationStack *nStack = AppContext::getMainWindow()->getNotificationStack();
        nStack->addNotification(t);

        return ReportResult_Finished;
    }
    
    if (seqWriter.getNumSeqWritten() == 0) {
        setError("Reference assembly failed - no possible alignment found");
        return ReportResult_Finished;
    }
    seqWriter.close();

    // TODO: the MAlignment object should be ommited 
    result.setName("Unused object");
    result.setAlphabet(AppContext::getDNAAlphabetRegistry()->findById(BaseDNAAlphabetIds::NUCL_DNA_DEFAULT()));
    
    return ReportResult_Finished;

}

void GenomeAlignerTask::run()
{
    
}

bool GenomeAlignerTask::isOneOfFindSubtasks( Task* subTask )
{
    foreach(Task* task, findTasks) {
        if (task == subTask) {
            return true;
        }
    }

    return false;
}

QList<Task*> GenomeAlignerTask::onJustBuildSubTaskFinished(Task *subTask)
{
    QList<Task*> subTasks;
    if (subTask == loadRefTask) {
        Document* doc = loadRefTask->getDocument();
        QList<GObject*> seqObjects = doc->findGObjectByType(GObjectTypes::SEQUENCE);

        assert(seqObjects.count() == 1);
        if (seqObjects.count() == 0) {
            setError(QString("Ref sequence is not found in %1").arg(settings.refSeqUrl.fileName()) );
            return subTasks;
        }
        refSeqObj = qobject_cast<DNASequenceObject*>( seqObjects.first() );
        if (MIN_SHORT_READ_LENGTH > refSeqObj->getDNASequence().length()) {
            setError(QString("Ref sequence is too short"));
            return subTasks;
        }
    }

    if (refSeqObj == NULL) {
        return subTasks;
    }
    if ( createIndexTask == NULL) {
        setupCreateIndexTask();
        subTasks.append(createIndexTask);
    }

    return subTasks;
}

void GenomeAlignerTask::calculateWindowSize() {
    windowSize = MIN_SHORT_READ_LENGTH;
    if (absMismatches) {
        if (nMismatches > 0) {
            windowSize = windowSize / (nMismatches + 1);
        }
    } else {
        switch (ptMismatches) {
            case 0:
                windowSize = MIN_SHORT_READ_LENGTH;
                break;
            case 1:
                windowSize = 30;
                break;
            case 2:
                windowSize = 25;
                break;
            case 3:
                windowSize = 17;
                break;
            case 4:
                windowSize = 15;
                break;
            case 5:
                windowSize = 13;
                break;
            case 6:
                windowSize = 11;
                break;
            case 7:
                windowSize = 10;
                break;
            case 8:
                windowSize = 10;
                break;
            case 9:
                windowSize = 10;
                break;
            case 10:
                windowSize = 7;
                break;
        }
    }
}
} // U2
