/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "ORFFinder.h"

#include <core_api/Log.h>
#include <util_text/TextUtils.h>
#include <util_algorithm/DynTable.h>
#include <util_algorithm/RollingArray.h>

#include <core_api/DNATranslation.h>
#include <core_api/DNAAlphabet.h>
#include <document_format/DNATranslationImpl.h>

#include <assert.h>

namespace GB2 {

static bool isDirect(ORFAlgorithmStrand s) {
    return s == ORFAlgorithmStrand_Both || s == ORFAlgorithmStrand_Direct;
}

static bool isComplement(ORFAlgorithmStrand s) {
    return s == ORFAlgorithmStrand_Both || s == ORFAlgorithmStrand_Complement;
}

void ORFFindAlgorithm::find(
                            ORFFindResultsListener* rl,
                            const ORFAlgorithmSettings& cfg,
                            const char* sequence, 
                            int seqLen, 
                            int& stopFlag, 
                            int& percentsCompleted)
{
    Q_UNUSED(seqLen);
    assert(cfg.proteinTT && cfg.proteinTT->isThree2One());
    DNATranslation3to1Impl* aTT = (DNATranslation3to1Impl*)cfg.proteinTT;
    bool mustFit = cfg.mustFit;
    bool mustInit = cfg.mustInit;
    bool allowAltStart = cfg.allowAltStart;
    int minLen = qMax(cfg.minLen, 3);

    int onePercentLen = cfg.searchRegion.len/100;
    if (cfg.strand == ORFAlgorithmStrand_Both) onePercentLen /=2;
    int leftTillPercent = onePercentLen;
    percentsCompleted = 0;

    if (isDirect(cfg.strand)) {
        int start[3] = {-1,-1,-1};
        if (!mustInit) {
            for (int i=0; i<3;i++) {
                int frame = (cfg.searchRegion.startPos + i)%3;
                start[frame] = cfg.searchRegion.startPos + i;
            }
        }
        int end = cfg.searchRegion.endPos();
        for(int i = cfg.searchRegion.startPos; i < end && !stopFlag; i++, leftTillPercent--) {
            int frame = i%3;
            if (start[frame] >=0) {
                if (aTT->isStopCodon(sequence + i)) {
                    int len = i - start[frame] + 3;
                    if (len>=minLen) rl->onResult(ORFFindResult(LRegion(start[frame], len), frame + 1));
                    start[frame] = mustInit? -1 : i + 3;
                }
            } else if (mustInit) { 
                if (aTT->isStartCodon(sequence + i)) {
                    start[frame] = i;
                } else if (allowAltStart 
                    && aTT->isCodon(DNATranslationRole_Start_Alternative, sequence + i)) {
                    start[frame] = i;
                }
            }
            if (leftTillPercent == 0) {
                percentsCompleted = qMin(percentsCompleted+1,100);
                leftTillPercent = onePercentLen;
            }
        }
        if (!mustFit && !stopFlag) {
            //check if non-terminated ORFs remained
            for (int i=0; i<3;i++) {
                if (start[i] >=0) {
					int len = end - start[i] - i;
					len -= len%3;
                    if (len>=minLen) rl->onResult(ORFFindResult(LRegion(start[i], len), i + 1));
                }
            }
        }
    }

    if (isComplement(cfg.strand)) {
        assert(cfg.complementTT && cfg.complementTT->isOne2One());
        QByteArray revComplDna(cfg.searchRegion.len, 0);
        cfg.complementTT->translate(sequence + cfg.searchRegion.startPos, cfg.searchRegion.len, 
            revComplDna.data(), cfg.searchRegion.len);
        TextUtils::reverse(revComplDna.data(), revComplDna.size());
        const char* rcSeq = revComplDna.data();

        int start[3] = {-1,-1,-1};
        if (!mustInit) {
            for (int i=0; i<3;i++) {
                int frame = (cfg.searchRegion.endPos() - i)%3;
                start[frame] = cfg.searchRegion.endPos() - i;
            }
        }
        int end = cfg.searchRegion.startPos;
        for(int i = cfg.searchRegion.endPos(); i >= end && !stopFlag; rcSeq++, i--, leftTillPercent--) {
            int frame = i%3;
            if (start[frame] >=0) {
                if (aTT->isStopCodon(rcSeq)) {
                    int len = start[frame] - i + 3;
                    if (len>=minLen) rl->onResult(ORFFindResult(LRegion(i - 3, len), frame - 3));
                    start[frame] = mustInit? -1 : i - 3;
                }
            } else if (mustInit) {
                if (aTT->isStartCodon(rcSeq)) {
                    start[frame] = i;
                } else if (allowAltStart 
                    && aTT->isCodon(DNATranslationRole_Start_Alternative, rcSeq)) {
                    start[frame] = i;
                }
            }
            if (leftTillPercent == 0) {
                percentsCompleted = qMin(percentsCompleted+1,100);
                leftTillPercent = onePercentLen;
            }
        }
        if (!mustFit && !stopFlag) {
            //check if non-terminated ORFs remained
            for (int i=0; i<3;i++) {
                if (start[i] >=0) {
                    int ind = end + i%3;
                    int len = start[i] - ind;
					len -= len%3;
                    if (len>=minLen) rl->onResult(ORFFindResult(LRegion(ind, len), i - 3));
                }
            }
        }
    }

}

}//namespace

