/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "RepeatFinderTests.h"

#include "FindRepeatsTask.h"

#include <gobjects/DNASequenceObject.h>

#include <core_api/DocumentModel.h>
#include <core_api/DocumentFormats.h>

namespace GB2 {

#define SEQ_ATTR    "seq"
#define REG_ATTR    "reg"
#define W_ATTR      "w"
#define C_ATTR      "c"
#define INV_ATTR    "inverted"
#define MIND_ATTR   "mind"
#define MAXD_ATTR   "maxd"
#define RESULT_ATTR "expected_result"
#define REFL_ATTR   "reflect"
#define EXCL_ATTR   "exclude"


LRegion GTest_FindSingleSequenceRepeatsTask::parseRegion(const QString& n, const QDomElement& el) {
    LRegion res;
    QString v = el.attribute(n);
    if (v.isEmpty()) {
        return res;
    }
    int idx = v.indexOf("..");
    if (idx == -1 || idx+2 >= v.length()) {
        return res;
    }
    QString v1 = v.left(idx);
    QString v2 = v.mid(idx+2);
    int startPos = v1.toInt();
    int endPos = v2.toInt();
    if (startPos >= 0 && endPos > startPos) {
        res.startPos = startPos - 1;
        res.len = endPos - startPos + 1;
    }
    return res;
}

void GTest_FindSingleSequenceRepeatsTask::init(XMLTestFormat *tf, const QDomElement& el) {
	Q_UNUSED(tf);

    seq = el.attribute(SEQ_ATTR);
    if (seq.isEmpty()) {
        stateInfo.setError(QString("Value not found '%1'").arg(SEQ_ATTR));
        return;
    }
    region = parseRegion(REG_ATTR, el);

    minD = el.attribute(MIND_ATTR, "-1").toInt();
    maxD = el.attribute(MAXD_ATTR, "-1").toInt();

    QString wStr = el.attribute(W_ATTR);
    if (wStr.isEmpty()) {
        stateInfo.setError(QString("Value not found '%1'").arg(W_ATTR));
        return;
    }
    w = wStr.toInt();
    if (w < 2) {
        stateInfo.setError(QString("Illegal value for '%1': %2").arg(W_ATTR).arg(wStr));
        return;
    }

    QString cStr = el.attribute(C_ATTR, "0");
    c = cStr.toInt();
    if (c < 0  || c >= w) {
        stateInfo.setError(QString("Illegal value for '%1': %2").arg(C_ATTR).arg(cStr));
        return;
    }
    
    inverted = el.attribute("invert") == "true";
    reflect = el.attribute("reflect", "true") == "true";
    filterNested = el.attribute("filterNested", "false") == "true";

    resultFile = el.attribute(RESULT_ATTR);
    if (resultFile.isEmpty()) {
        stateInfo.setError(QString("Value not found '%1'").arg(RESULT_ATTR));
        return;
    }
    
    excludeList = el.attribute(EXCL_ATTR).split(',', QString::SkipEmptyParts);
}

static QString getAlgName(RFAlgorithm alg) {
    QString res;
    switch(alg) {
        case RFAlgorithm_Diagonal: res = "diagonal"; break;
        case RFAlgorithm_Suffix: res = "suffix"; break;
        default: res = "UNKNOWN"; break;
    }
    return res;
}

void GTest_FindSingleSequenceRepeatsTask::prepare() {
    if (hasErrors() || isCanceled()) {
        return;
    }
    DNASequenceObject * seqObj = getContext<DNASequenceObject>(this, seq);
	if (seqObj == NULL){
		stateInfo.setError("can't find sequence1");
		return;
	}
    if (region.isEmpty()) {
        region = seqObj->getSequenceRange();
    }
    
    int maxLen = seqObj->getSequenceLen();
    if (minD == -1) {
        minD = -maxLen;
    } 
    if (maxD == -1) {
        maxD = maxLen;
    }

    
    QList<RFAlgorithm> algos;
    algos << RFAlgorithm_Diagonal << RFAlgorithm_Suffix;

    FindRepeatsTaskSettings s;
    s.minLen = w;
    s.mismatches = c;
    s.minDist = minD;
    s.maxDist = maxD;
    s.inverted = inverted;
    s.seqRegion = region; 
    s.reportReflected = reflect;
    s.filterNested = filterNested;
    s.nThreads = 1;//todo: add to settings 
    
    foreach(RFAlgorithm algo, algos) {
        QString algName = getAlgName(algo);
        if (excludeList.contains(algName)) {
            continue;
        }
        s.algo = algo;
        Task* sub = new FindRepeatsTask(s, seqObj->getDNASequence());
        addSubTask(sub);
    }
}

void GTest_FindSingleSequenceRepeatsTask::run() {
    if (hasErrors() || isCanceled()) {
        return;
    }
    QVector<RFResult> expectedResults;
    // load file with results
    QString fname = env->getVar("COMMON_DATA_DIR") + "/" + resultFile;
    QFile file(fname);
    if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
        stateInfo.setError(QString("Can't open results file %1").arg(fname));
        return;
    }

    while (!file.atEnd()) {
        QString line = file.readLine();
        QStringList hit = line.split(' ', QString::SkipEmptyParts);
        if (hit.size()!=3) {
            stateInfo.setError(QString("Can't parse results line: %1").arg(line));
            return;
        }
        RFResult r;
        r.x = hit[0].toInt() - 1;
        r.y = hit[1].toInt() - 1;
        r.l = hit[2].toInt();
        if (r.x < 0 || r.y < 0 || r.l < 0) {
            stateInfo.setError(QString("Can't parse results line: %1").arg(line));
            return;
        }
        expectedResults.append(r);

    }
    file.close();

    qSort(expectedResults);

    //check all subtasks
    foreach(Task* t, getSubtasks()) {
        FindRepeatsTask* sub = qobject_cast<FindRepeatsTask*>(t);
        QVector<RFResult> calcResults = sub->getResults();
        if (expectedResults.size()!=calcResults.size()) {
            stateInfo.setError(QString("Results count not matched, num = %1, expected = %2, alg = %3")
                            .arg(calcResults.size()).arg(expectedResults.size()).arg(getAlgName(sub->getSettings().algo)));
            return;
        }
        qSort(calcResults);

        for (int i=0, n = expectedResults.size(); i < n; i++) {
            RFResult re = expectedResults[i];
            RFResult rc = calcResults[i];
            if (re!=rc) {
                stateInfo.setError(QString("Results not matched, expected(%1, %2, %3), computed(%4, %5, %6), algo = %7")
                    .arg(re.x).arg(re.y).arg(re.l).arg(rc.x).arg(rc.y).arg(rc.l).arg(getAlgName(sub->getSettings().algo)));
                return;
            }
        }
    }
}

QList<XMLTestFactory*> RepeatFinderTests::createTestFactories() {
    QList<XMLTestFactory*> res;
    res.append(GTest_FindSingleSequenceRepeatsTask::createFactory());
    return res;
}

} //namespace

