/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "SWWorker.h"
#include "FindWorker.h"

#include <workflow/IntegralBusModel.h>
#include <workflow/WorkflowEnv.h>
#include <workflow/WorkflowRegistry.h>
#include <workflow_support/CoreDataTypes.h>
#include <workflow_library/BioDatatypes.h>
#include <workflow_library/BioActorLibrary.h>
#include <workflow_support/DelegateEditors.h>
#include <workflow_support/CoreLibConstants.h>

#include <datatype/DNASequence.h>
#include <core_api/DNATranslation.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/AppContext.h>
#include <core_api/Log.h>
#include <core_api/SmithWatermanTaskFactoryRegistry.h>
#include <core_api/SubstMatrixRegistry.h>
#include <core_api/SWResultFilterRegistry.h>
#include <core_api/PluginModel.h>
#include <util_tasks/FailTask.h>
#include <util_tasks/TaskSignalMapper.h>
#include <util_tasks/SequenceWalkerTask.h>

#include "CoreLib.h"

/* TRANSLATOR GB2::LocalWorkflow::SWWorker */
/* TRANSLATOR GB2::LocalWorkflow::FindWorker */

namespace GB2 {
namespace LocalWorkflow {

static LogCategory log(ULOG_CAT_WD);

/**************************
 * SWWorkerFactory
 **************************/
static const QString NAME_ATTR("a_name");
static const QString PATTERN_ATTR("b_pattern");
static const QString SCORE_ATTR("c_minscore");
static const QString MATRIX_ATTR("d_matrix");
static const QString AMINO_ATTR("e_translate");
static const QString STRAND_ATTR("f_strand");
static const QString ALGO_ATTR("g_algorithm");
static const QString FILTER_ATTR("h_filter");
static const QString GAPOPEN_ATTR("i_gapopen");
static const QString GAPEXT_ATTR("j_gapext");

const QString SWWorkerFactory::ACTOR_ID("find.smithwaterman");

void SWAlgoEditor::populate() {
    QStringList algoLst = AppContext::getSmithWatermanTaskFactoryRegistry()->getListFactoryNames();
    if (algoLst.isEmpty()) {
        return;
    }
    foreach(const QString& n, algoLst) {
        items.insert(n,n);
    }
    QList<Attribute*> lst = proto->getAttributes();
    foreach(Attribute* a, lst) {
        if (a->getId() == ALGO_ATTR) {
            a->setAttributeValue(algoLst.first());
            break;
        }
    }
}

void SWWorkerFactory::init() {

    //QMap<Descriptor, DataTypePtr> m;
    //m[BioActorLibrary::SEQ_SLOT()] = BioDataTypes::DNA_SEQUENCE_TYPE();
    //m[BioActorLibrary::FEATURE_TABLE_SLOT()] = BioDataTypes::ANNOTATION_TABLE_TYPE();
    //DataTypePtr inSet(new DataTypeSet(Descriptor("regioned.sequence"), m));
    //DataTypeRegistry* dr = WorkflowEnv::getDataTypeRegistry();
    //assert(dr);
    //dr->registerEntry(inSet);

    QList<PortDescriptor*> p; QList<Attribute*> a;
    {
        Descriptor ind(CoreLibConstants::IN_PORT_ID, SWWorker::tr("Input data"), SWWorker::tr("An input sequence and set of regions to search in."));
        Descriptor oud(CoreLibConstants::OUT_PORT_ID, SWWorker::tr("Pattern annotations"), SWWorker::tr("Found regions"));
        p << new PortDescriptor(ind, BioDataTypes::DNA_SEQUENCE_TYPE(), true);
        p << new PortDescriptor(oud, BioDataTypes::ANNOTATION_TABLE_TYPE(), false, true);
    }

    QStringList filterLst = AppContext::getSWResultFilterRegistry()->getFiltersIds();
    QString defFilter = filterLst.isEmpty() ? QString() : filterLst.first();
    
    {
        Descriptor nd(NAME_ATTR, FindWorker::tr("Annotate as"), SWWorker::tr("Name of the result annotations marking found regions."));
        Descriptor pd(PATTERN_ATTR, FindWorker::tr("Pattern"), SWWorker::tr("A subsequence pattern to look for."));
        Descriptor scd(SCORE_ATTR, SWWorker::tr("Min score"), SWWorker::tr("The search stringency."));
        Descriptor ald(ALGO_ATTR, SWWorker::tr("Algorithm"), SWWorker::tr("Algorithm version."));
        Descriptor amd(AMINO_ATTR, FindWorker::tr("Search in translation"), SWWorker::tr("Translate a supplied nucleotide sequence to protein then search in the translated sequence."));
        Descriptor sd(STRAND_ATTR, FindWorker::tr("Search in"), SWWorker::tr("Which strands should be searched: direct, complement or both."));
        Descriptor mxd(MATRIX_ATTR, SWWorker::tr("Scoring matrix"), SWWorker::tr("The scoring matrix."));
        Descriptor frd(FILTER_ATTR, SWWorker::tr("Filter results"), SWWorker::tr("Result filtering strategy."));
        Descriptor god(GAPOPEN_ATTR, SWWorker::tr("Gap open score"), SWWorker::tr("Gap open score."));
        Descriptor ged(GAPEXT_ATTR, SWWorker::tr("Gap ext score"), SWWorker::tr("Gap extension score."));

        a << new Attribute(nd, CoreDataTypes::STRING_TYPE(), true, "misc_feature");
        a << new Attribute(pd, CoreDataTypes::STRING_TYPE(), true);
        a << new Attribute(mxd, CoreDataTypes::STRING_TYPE(), true, QString("---"));
        a << new Attribute(ald, CoreDataTypes::STRING_TYPE(), true);
        a << new Attribute(frd, CoreDataTypes::STRING_TYPE(), false, defFilter);
        a << new Attribute(scd, CoreDataTypes::NUM_TYPE(), false, 90);
        a << new Attribute(sd, CoreDataTypes::NUM_TYPE(), false, StrandOption_Both);
        a << new Attribute(amd, CoreDataTypes::BOOL_TYPE(), false, false);
        a << new Attribute(god, CoreDataTypes::NUM_TYPE(), false, -10.);
        a << new Attribute(ged, CoreDataTypes::NUM_TYPE(), false, -1.);
    }

    Descriptor desc(ACTOR_ID, SWWorker::tr("Smith-Waterman search"), SWWorker::tr("Finds regions of similarity to the specified pattern in each input sequence (nucleotide or protein one). "
        "<p>Under the hood is the well-known Smith-Waterman algorithm for performing local sequence alignment."));
    ActorPrototype* proto = new BusActorPrototype(desc, p, a);
    QMap<QString, PropertyDelegate*> delegates;    
    {
        QVariantMap m; m["minimum"] = 1; m["maximum"] = 100; m["suffix"] = "%";
        delegates[SCORE_ATTR] = new SpinBoxDelegate(m);
    }    
    {
        QVariantMap m; m["maximum"] = -0.; m["minimum"]=-10000000.;
        delegates[GAPOPEN_ATTR] = new DoubleSpinBoxDelegate(m);
        m["maximum"] = -1.;
        delegates[GAPEXT_ATTR] = new DoubleSpinBoxDelegate(m);
    }    
    {
        QVariantMap strandMap; 
        strandMap[FindWorker::tr("both strands")] = StrandOption_Both;
        strandMap[FindWorker::tr("direct strand")] = StrandOption_DirectOnly;
        strandMap[FindWorker::tr("complement strand")] = StrandOption_ComplementOnly;
        delegates[STRAND_ATTR] = new ComboBoxDelegate(strandMap);
    }
    {
        QVariantMap m;   
        foreach(const QString& n, filterLst) {
            m.insert(n,n);
        } 
        delegates[FILTER_ATTR] = new ComboBoxDelegate(m);
    }
    {
        QVariantMap m; m.insert(SWWorker::tr("Auto"), QString("---"));
        QStringList lst = AppContext::getSubstMatrixRegistry()->getMatrixNames();	
        foreach(const QString& n, lst) {
            m.insert(n,n);
        } 
        delegates[MATRIX_ATTR] = new ComboBoxDelegate(m);
    }
    SWAlgoEditor* aled = new SWAlgoEditor(proto);
    aled->connect(AppContext::getPluginSupport(), SIGNAL(si_allStartUpPluginsLoaded()), SLOT(populate()));
    delegates[ALGO_ATTR] = aled;
    proto->setEditor(new DelegateEditor(delegates));
    proto->setIconPath(":core/images/sw.png");
    proto->setPrompter(new SWPrompter());
    WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_BASIC(), proto);

    DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
    localDomain->registerEntry(new SWWorkerFactory());
}

/**************************
 * SWPrompter
 **************************/
QString SWPrompter::composeRichDoc() {
    BusPort* input = qobject_cast<BusPort*>(target->getPort(CoreLibConstants::IN_PORT_ID));
    //Actor* seqProducer = input->getProducer(BioActorLibrary::SEQ_SLOT_ID);
    //Actor* annProducer = input->getProducer(BioActorLibrary::FEATURE_TABLE_SLOT_ID);
    Actor* seqProducer = input->getProducer(CoreLibConstants::IN_PORT_ID);
    QString seqName = seqProducer ? tr("In each sequence from <u>%1</u>,").arg(seqProducer->getLabel()) : "";
    //QString annName = annProducer ? tr(" within a set of regions from <u>%1</u>").arg(annProducer->getLabel()) : "";

    SmithWatermanSettings cfg;
    cfg.strand = StrandOption(getParameter(STRAND_ATTR).toInt());
    cfg.percentOfScore = getParameter(SCORE_ATTR).toInt();
    //cfg.insDelAlg = getParameter(ALGO_ATTR).toBool();
    QString pattern = getRequiredParam(PATTERN_ATTR);

    QString strandName;
    switch (cfg.strand) {
    case StrandOption_Both: strandName = FindWorker::tr("both strands"); break;
    case StrandOption_DirectOnly: strandName = FindWorker::tr("direct strand"); break;
    case StrandOption_ComplementOnly: strandName = FindWorker::tr("complement strand"); break;
    default: break;
    }
    if (getParameter(AMINO_ATTR).toBool()) {
        strandName += tr(" of translated sequence");
    }

    QString resultName = getRequiredParam(NAME_ATTR);
    
    QString match = cfg.percentOfScore < 100 ? 
        tr("matches with <u>at least %1% score</u>").arg(cfg.percentOfScore) : tr("exact matches");

    //FIXME mention search algorithm?
    QString doc = tr("%1 find pattern <u>%2</u>."
        "<br>Look for <u>%3</u> in <u>%4</u>."
        "<br>Output the list of found regions annotated as <u>%5</u>.")
        .arg(seqName)
        .arg(pattern)
        .arg(match)
        .arg(strandName)
        .arg(resultName);
    
    return doc;
}

/**************************
 * SWWorker
 **************************/
SWWorker::SWWorker(Actor* a) : BaseWorker(a), input(NULL), output(NULL), algo(false) {
}

void SWWorker::init() {
    input = ports.value(CoreLibConstants::IN_PORT_ID);
    output = ports.value(CoreLibConstants::OUT_PORT_ID);
}

bool SWWorker::isReady() {
    return (/*algo && */input && input->hasMessage());
}

Task* SWWorker::tick() {
    Message inputMessage = getMessageAndSetupScriptValues(input);
    
    cfg.aminoTT = NULL;
    cfg.complTT = NULL;
    cfg.strand = StrandOption(actor->getParameter(STRAND_ATTR)->getAttributeValue<int>());
    cfg.percentOfScore = actor->getParameter(SCORE_ATTR)->getAttributeValue<int>();
    cfg.gapModel.scoreGapExtd = actor->getParameter(GAPEXT_ATTR)->getAttributeValue<double>();
    cfg.gapModel.scoreGapOpen = actor->getParameter(GAPOPEN_ATTR)->getAttributeValue<double>();
    mtrx = actor->getParameter(MATRIX_ATTR)->getAttributeValue<QString>();
    if(mtrx.isEmpty()){
        mtrx = "Auto";
    }
    cfg.pSm = AppContext::getSubstMatrixRegistry()->getMatrix(mtrx);
    QString filter = actor->getParameter(FILTER_ATTR)->getAttributeValue<QString>();
    cfg.resultFilter = AppContext::getSWResultFilterRegistry()->getFilter(filter);
    if(cfg.resultFilter == NULL){
        log.error(tr("Incorrect value:  filter name incorrect, default value used")); //details level won't work
        cfg.resultFilter = AppContext::getSWResultFilterRegistry()->getFilter("none");
    }
    
    resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>();
    if(resultName.isEmpty()){
        log.error(tr("Incorrect value: result name is empty, default value used")); //details level won't work
        resultName = "misc_feature";
    }
    cfg.ptrn = actor->getParameter(PATTERN_ATTR)->getAttributeValue<QString>().toAscii().toUpper();
    QString algName = actor->getParameter(ALGO_ATTR)->getAttributeValue<QString>();
    algo = AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(algName);

    if(cfg.percentOfScore < 0 || cfg.percentOfScore > 100){
        log.error(tr("Incorrect value: score value must lay between 0 and 100"));
        return new FailTask(tr("Incorrect value: score value must lay between 0 and 100"));
    }
    if(cfg.ptrn.isEmpty()){
        log.error(tr("Incorrect value: search pattern, pattern is empty"));
        return new FailTask(tr("Incorrect value: search pattern, pattern is empty"));
    }

    DNASequence seq = inputMessage.getData().value<DNASequence>();
    
    if (!algo) {
        return new FailTask(tr("SmithWaterman algorithm not found: %1").arg(algName));
    }
    if(cfg.percentOfScore < 0 || cfg.percentOfScore > 100){
        log.error(tr("Incorrect value: score value must lay between 0 and 100"));
        return new FailTask(tr("Incorrect value: score value must lay between 0 and 100"));
    }
    if(cfg.ptrn.isEmpty()){
        log.error(tr("Incorrect value: search pattern, pattern is empty"));
        return new FailTask(tr("Incorrect value: search pattern, pattern is empty"));
    }
    
    foreach(char c, cfg.ptrn){
        if(!seq.alphabet->contains(c)){
            log.error(tr("Incorrect value: pattern alphabet doesn't match sequence alphabet "));
            return new FailTask(tr("Incorrect value: pattern alphabet doesn't match sequence alphabet "));
        }
    }
    if (!seq.isNull()) {
        SmithWatermanSettings config(cfg);
        config.sqnc = QByteArray(seq.constData(), seq.length());
        if (config.strand != StrandOption_DirectOnly/* && seq.alphabet->getType() == DNAAlphabet_NUCL*/) {
            QList<DNATranslation*> compTTs = AppContext::getDNATranslationRegistry()->
                lookupTranslation(seq.alphabet, DNATranslationType_NUCL_2_COMPLNUCL);
            if (!compTTs.isEmpty()) {
                config.complTT = compTTs.first();
            } else {
                log.error(tr("Could not find complement translation for %1, searching only direct strand").arg(seq.getName()));
                config.strand = StrandOption_DirectOnly;
            }
        }
        if (actor->getParameter(AMINO_ATTR)->getAttributeValue<bool>()) {
            DNATranslationType tt = (seq.alphabet->getType() == DNAAlphabet_NUCL) ? DNATranslationType_NUCL_2_AMINO : DNATranslationType_RAW_2_AMINO;
            QList<DNATranslation*> TTs = AppContext::getDNATranslationRegistry()->lookupTranslation(seq.alphabet, tt);
			if (!TTs.isEmpty()) {
				config.aminoTT = TTs.first(); //FIXME let user choose or use hints ?
			}
        }
		if(!seq.alphabet->containsAll(config.ptrn.constData(),config.ptrn.length())) {
			return fail(tr("Pattern symbols not matching to alphabet"));
		}
        if(config.pSm.getName().isEmpty() && mtrx.toLower() != "auto"){
            log.details(tr("Invalid value: weight matrix with given name not exists"));
            return new FailTask(tr("Invalid value: weight matrix with given name not exists"));
        }
        if (config.pSm.isEmpty()) {
			QString matrixName;
            QStringList lst = AppContext::getSubstMatrixRegistry()->selectMatrixNamesByAlphabet(seq.alphabet);
            if (!lst.isEmpty()) {
                matrixName = lst.first();
                config.pSm = AppContext::getSubstMatrixRegistry()->getMatrix(matrixName);
            }
			if (config.pSm.isEmpty()) {
				return fail(tr("Can't find weight matrix name: '%1'!").arg(matrixName.isEmpty() ? tr("<empty>") : matrixName));
			}
        }

        config.globalRegion.len = seq.length();
        SmithWatermanReportCallbackImpl* rcb = new SmithWatermanReportCallbackImpl(NULL,resultName,QString()); //FIXME!!! where to delete?
        config.resultCallback = rcb;
        config.resultListener = new SmithWatermanResultListener(); //FIXME: where to delete??



        Task* task = algo->getTaskInstance(config, tr("smith_waterman_task"));
        rcb->setParent(task);
        callbacks.insert(task, rcb);

        connect(new TaskSignalMapper(task), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
        return task;

    }
	return fail(tr("Null sequence supplied to Smith-Waterman: %1").arg(seq.getName()));
}

Task* SWWorker::fail(const QString& err) {
    if (failFast) {
        return new FailTask(err);
    } else {
        log.error(err);
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), QVariant()));
        if (input->isEnded()) {
            output->setEnded();
        }
        return NULL;
    }
}

void SWWorker::sl_taskFinished(Task* t) {
    SmithWatermanReportCallbackImpl* rcb = callbacks.take(t);
    assert(rcb);

    if (rcb && output) {
        const QList<SharedAnnotationData>& res = rcb->getAnotations();
        QVariant v = qVariantFromValue<QList<SharedAnnotationData> >(res);
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), v));
        if (input->isEnded()) {
            output->setEnded();
        }
        log.info(tr("Found %1 matches of pattern '%2'").arg(res.size()).arg(QString(cfg.ptrn)));
    }
}

bool SWWorker::isDone() {
    return !input || input->isEnded();
}

void SWWorker::cleanup() {
}

} //namespace LocalWorkflow
} //namespace GB2
