/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include <U2Algorithm/SArrayIndex.h>
#include <U2Algorithm/SArrayBasedFindTask.h>
#include <U2Algorithm/BitsTable.h>

#include "GenomeAlignerFindTask.h"
#include "GenomeAlignerIndex.h"

namespace U2 {

#define PCHAR_MATCH(x, y) (*(x) == *(y))


GenomeAlignerFindTask::GenomeAlignerFindTask(  SArrayIndex* i, const SArrayBasedSearchSettings& s, bool _onlyFirstMatch )
    : Task("GenomeAlignerFindTask", TaskFlag_None), index(i), config(new SArrayBasedSearchSettings(s)),
    onlyFirstMatch(_onlyFirstMatch)
{
    assert(index);
}

void GenomeAlignerFindTask::run() {
    runSearch();
}

void GenomeAlignerFindTask::runSearch() {
    int CMAX = 0;
    if (config->absMismatches) {
        CMAX = config->nMismatches;
    } else {
        CMAX = (config->query.length() * config->ptMismatches) / MAX_PERCENTAGE;
    }
    int W = config->query.length();
    int q = W / (CMAX + 1);
    int windowSize = index->getPrefixSize();

    assert(windowSize <=  q);
    if (windowSize > q) {
        setError( QString("Too large SArrayIndex window (%1) for %2-mismatch search").arg(windowSize).arg(CMAX) );
        return;
    }

    const char* querySeq = config->query.constData();
    const char* arraySeq = index->getIndexedSequence();
    SArrayIndex::SAISearchContext context;
    //search in bitMask
    for (int i = 0; i < W - windowSize + 1; i+=q) {
        const char *seq = querySeq + i;
        quint32 bitValue = index->getBitValue(seq);
        bool haveResults = index->findBit(&context,bitValue,seq);

        if (!haveResults) {
            continue;
        }

        int pos = -1;
        const char* endS = querySeq + W;
        const char* endA = arraySeq + index->getSequenceLength();
        while( ( pos = index->nextArrSeqPos(&context) ) != -1 ) {
            int c = 0;
            // forward collect
            const char* posS = seq + windowSize;
            const char* posA = arraySeq + pos + windowSize;
            for ( ; (posS < endS) && (c <= CMAX); posS++, posA++) {
                if (posA >= endA) {
                    // out of arrraySeq boundaries - > do not need to continue
                    c = CMAX + 1;
                    break;
                }
                c += PCHAR_MATCH(posS, posA) ? 0 : 1;
            }

            // backward collect
            posS = seq - 1;
            posA = arraySeq + pos - 1;

            for ( ; (posS >= querySeq) && (c <= CMAX); posS--, posA--) {
                if (posA < arraySeq) {
                    // out of arrraySeq boundaries - > do not need to continue
                    c = CMAX + 1;
                    break;
                }
                c += PCHAR_MATCH(posS, posA) ? 0 : 1;
            }
            int result = pos - i + 1;
            if ( (c <= CMAX) && (!results.contains(result))) {
                results.append(result);
                if (onlyFirstMatch) {
                    break;
                }
            }
        }
    }
}

void GenomeAlignerFindTask::cleanup()
{
    delete config;
    config = NULL;
}

FindTask::FindTask(U2::GenomeAlignerIndex *i, const SearchSettings &s)
: Task("GenomeAlignerFindTask", TaskFlag_None),
index(i), settings(new SearchSettings(s))
{

}

void FindTask::run() {
    int CMAX = 0;
    int W = 0;
    int q = 0;
    int windowSize = index->getPrefixSize();
    foreach (SearchQuery *qu, settings->queries) {
        if (settings->absMismatches) {
            CMAX = settings->nMismatches;
        } else {
            CMAX = (qu->shortRead.length() * settings->ptMismatches) / MAX_PERCENTAGE;
        }
        W = qu->shortRead.length();
        q = W / (CMAX + 1);

        assert(windowSize <= q);
        if (windowSize > q) {
            setError( QString("Too large SArrayIndex window (%1) for %2-mismatch search").arg(windowSize).arg(CMAX) );
            return;
        }

        const char* querySeq = qu->shortRead.constData();
        const char* arraySeq = index->getIndexedSequence();
        GenomeAlignerIndex::SAISearchContext context;
        //search in cache
        for (int i = 0; i < W - windowSize + 1; i+=q) {
            const char *seq = querySeq + i;
            quint64 bitValue = index->getBitValue(seq);
            context.bitValue = bitValue;
            context.query = seq;
            int numberOfPart = index->findInCache(context);

            if (numberOfPart < 0) {
                if (0 == CMAX) {
                    break;
                } else {
                    continue;
                }
            }

            CacheResult r;
            r.numberOfPart = numberOfPart;
            r.posAtShortRead = i;

            if (qu->cacheResults.isEmpty()) {
                qu->cacheResults.append(r);
            } else {
                QList<CacheResult>::iterator it = qu->cacheResults.begin();
                while (it != qu->cacheResults.end()) {
                    if ((*it).numberOfPart >= r.numberOfPart) {
                        qu->cacheResults.insert(it, r);
                        break;
                    }
                    it++;
                }
                if (qu->cacheResults.end() == it) {
                    qu->cacheResults.append(r);
                }
            }
        }
    }
}

void FindTask::cleanup() {

}

} // U2
