/* This file is part of Strigi Desktop Search
 *
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
#define STRIGI_IMPORT_API //todo: could also define this in cmake...
#include <jstreamsconfig.h>
#include <analyzerplugin.h>
#include <streamendanalyzer.h>
#include <indexable.h>
#include <extractor.h>

using namespace jstreams;
using namespace std;

class ExtractorEndAnalyzer : public jstreams::StreamEndAnalyzer {
private:
    static int objectCount;
    static EXTRACTOR_ExtractorList *extractors;
public:
    ExtractorEndAnalyzer();
    ~ExtractorEndAnalyzer();
    bool checkHeader(const char* header, int32_t headersize) const;
    char analyze(jstreams::Indexable& idx, jstreams::InputStream* in);
    static char staticAnalyze(std::string filename, jstreams::InputStream *in,
        int depth, jstreams::StreamIndexer *indexer, jstreams::Indexable*);
    const char* getName() const { return "ExtractorEndAnalyzer"; }
};

int ExtractorEndAnalyzer::objectCount = 0;
EXTRACTOR_ExtractorList* ExtractorEndAnalyzer::extractors;

ExtractorEndAnalyzer::ExtractorEndAnalyzer() {
    if (objectCount++ == 0) {
        extractors = EXTRACTOR_loadDefaultLibraries();
    }
}
ExtractorEndAnalyzer::~ExtractorEndAnalyzer() {
    if (--objectCount == 0) {
        EXTRACTOR_removeAll(extractors);
    }
}
bool
ExtractorEndAnalyzer::checkHeader(const char* /*header*/,
        int32_t /*headersize*/) const {
    return true;
}
char
ExtractorEndAnalyzer::analyze(jstreams::Indexable& idx, InputStream *in) {
    int32_t size = 20000;
    const char* b;
    int32_t nread = in->read(b, size, size);
    while (nread == size && size < 200000000) {
        in->reset(0);
        size *= 2;
        nread = in->read(b, size, size);
    }
    if (nread <= 0) {
        error = "Error reading stream.";
        return -1;
    }

    int nfields = 0;
    EXTRACTOR_KeywordList* list = EXTRACTOR_getKeywords2(extractors, b, nread);
    EXTRACTOR_KeywordList* i = list;
    while (i) {
        // todo: better mapping to field names
        const char* name = EXTRACTOR_getKeywordTypeAsString(i->keywordType);
        if (name && i->keyword) {
            idx.setField(name, i->keyword);
            //printf("exxtrac: %s %s\n", name, i->keyword);
            nfields++;
        }
        i = i->next;
    }
    EXTRACTOR_freeKeywords(list);

    return (nfields) ?0 :-1;
}

class ExtractorEndAnalyzerFactory
    : public StreamEndAnalyzerFactory {
private:
    const char* getName() const {
        return "ExtractorEndAnalyzer";
    }
    StreamEndAnalyzer* newInstance() const {
        return new ExtractorEndAnalyzer();
    }
};

class Factory : public AnalyzerFactoryFactory {
public:
    list<StreamEndAnalyzerFactory*>
    getStreamEndAnalyzerFactories() const {
        list<StreamEndAnalyzerFactory*> af;
        af.push_back(new ExtractorEndAnalyzerFactory());
        return af;
    }
};

STRIGI_ANALYZER_FACTORY(Factory)
