#ifndef SpellChecker_h
#include "SpellChecker.h"
#endif

#ifndef StringUtilities_h
#include "StringUtilities.h"
#endif

#ifndef std_algorithm
#define std_algorithm
#include <algorithm>
#endif

#ifndef std_fstream
#define std_fstream
#include <fstream>
#endif

#ifndef std_iostream
#define std_iostream
#include <iostream>
#endif

#ifndef std_vector
#define std_vector
#include <vector>
#endif

#ifndef std_string
#define std_string
#include <string>
#endif

#ifndef Log_h
#include "Log.h"
#endif

#include <ctype.h>

#include <math.h>

#include <stdlib.h>

using namespace std;
using namespace doctorj;

static const size_t COMP_LEN = 20;
static const size_t ARR_SIZE = COMP_LEN + 1;
static const int ADDITION = 1;
static const int CHANGE   = 2;
static const int DELETION = 1;

#define dupstr(x) (strcpy((char*)malloc(strlen(x) + 1), x))

static int min3(int x, int y, int z) 
{
    return (x < y) ? (x < z ? x : z) : (y < z ? y : z);
}

static char* upcase(const char* const str)
{
    char* up = dupstr(str);
    for (int i = 0, len = strlen(str); i < len; ++i) {
        up[i] = toupper(str[i]);
    }
    return up;
}

const int SpellChecker::DEFAULT_MAX_DISTANCE = 4;

SpellChecker::SpellChecker() : words_((char**)malloc(sizeof(char*) * 10000)), nWords_(0), maxWords_(10000)
{
    // Log::setEnabled();
}

SpellChecker::~SpellChecker()
{
    for (int i = 0; i < nWords_; ++i) {
        free(words_[i]);
    }
    free(words_);
}

/**
 * Computes the Levenstein edit distance between the two words.
 */
int SpellChecker::editDistance(const char* const str1, size_t len1, const char* const str2, size_t len2, int maximum) const
{
    len1 = std::min(len1, COMP_LEN);
    len2 = std::min(len2, COMP_LEN);

    // A minimum threshold of three is used for better results with short
    // strings (A modification to the original C code.)
    
    int threshold = std::max(maximum, (int)floor((double)1 + (len1 + 2) / 4.0));

    int diff = abs(int(len1) - int(len2));
    if (diff > threshold) {
        return -1 * diff;
    }
    else {
        return compare(str1, len1, str2, len2);
    }
}

/**
 * Compares the two characters. English words should probably be case
 * insensitive; code should not.
 */
int SpellChecker::compare(const char* const str1, int len1, const char* const str2, int len2) const
{
    int distance[ARR_SIZE][ARR_SIZE];
    distance[0][0] = 0;
    
    for (unsigned int j = 1; j < ARR_SIZE; ++j) {
        distance[0][j] = distance[0][j - 1] + ADDITION;
        // LOGF(1, "distance[0][%d] = distance[0][%d] + ADDITION = (%d)", j, j - 1, distance[0][j]);
        distance[j][0] = distance[j - 1][0] + DELETION;
        // LOGF(1, "distance[%d][0] = distance[%d][0] + DELETION = (%d)", j, j - 1, distance[j][0]);
    }
    
    for (int i = 1; i <= len1; ++i) {
        for (int j = 1; j <= len2; ++j) {
            distance[i][j] = min3(distance[i - 1][j - 1] + (str1[i - 1] == str2[j - 1] ? 0 : CHANGE),
                                  distance[i][j - 1] + ADDITION,
                                  distance[i - 1][j] + DELETION);
        }
    }
    
    return distance[len1][len2];
}

bool SpellChecker::isCorrect(const string& word, int maxEditDistance, multimap<int, string>* const nearMatches) const
{
    if (hasWord(word)) {
        return true;
    }
    else if (nearMatches != NULL) {
        const char* const cword = word.c_str();
        size_t wordlen = strlen(cword);
        
        for (int i = 0; i < nWords_; ++i) {
            // LOGF(1, "checking %s against %s", word.c_str(), s.c_str());
            if (cword[0] == words_[i][0] || cword[0] == words_[i][1]) {
                int ed = editDistance(cword, wordlen, words_[i], strlen(words_[i]), maxEditDistance);
                if (ed >= 0 && ed <= maxEditDistance) {
                    // LOGF(1, "adding '%s' as a near match", s.c_str());
                    nearMatches->insert(make_pair<int, string>(ed, string(words_[i])));
                }
            }
        }
    }
    return false;
}

bool SpellChecker::isCorrect(const string& word, multimap<int, string>* const nearMatches) const
{
    LOG(1, word);
    return isCorrect(word, DEFAULT_MAX_DISTANCE, nearMatches);
}

void SpellChecker::addDictionary(const string& dictName)
{
    ifstream ds(dictName.c_str());
    string s;
    while (ds >> s) {
        addWord(s.c_str());
    }
}

void SpellChecker::addWord(const string& word)
{
    addWord(word.c_str());
}

void SpellChecker::addWord(const char* const word)
{
    char* str = dupstr(word);
    if (nWords_ >= maxWords_) {
        // LOGF(1, "reallocating: nWords_: %d; maxWords_: %d", nWords_, maxWords_);
        maxWords_ *= 2;
        char** newWords = (char**)malloc(sizeof(char*) * maxWords_);
        memcpy(newWords, words_, nWords_ * sizeof(char*));
        words_ = newWords;
    }

    // LOGF(1, "adding %s as words_[%d]", str, nWords_);

    words_[nWords_++] = str;
}

bool SpellChecker::hasWord(const string& word) const
{
    LOG(1, word);
    for (int i = 0; i < nWords_; ++i) {
        // LOGF(1, "comparing %s <=> %s", words_[i], word.c_str());
        if (strcmp(words_[i], word.c_str()) == 0) {
            LOG(1, "returning true");
            return true;
        }
    }
    return false;
}

bool SpellChecker::nearMatch(const string& str1, const string& str2) const
{
    int edist = editDistance(str1.c_str(), str1.length(), str2.c_str(), str2.length());
    
    // the edit distance is misleading for very short words
    return edist >= 0 && edist <= DEFAULT_MAX_DISTANCE && edist < (int)str1.length() && edist < (int)str2.length();
}


struct ncseek : public std::unary_function<bool, string>
{
    string word;
    ncseek(const string& w) : word(StringUtilities::toLower(w)) {}
    virtual ~ncseek() {}

    bool operator()(const string& w)
    {
        if (w.length() == word.length()) {
            for (int i = 0, len = w.length(); i < len; ++i) {
                if (tolower(w[i]) != word[i]) {
                    return false;
                }
            }
            return true;
        }
        else {
            return false;
        }
    }
};


NoCaseSpellChecker::NoCaseSpellChecker()
{
}

NoCaseSpellChecker::~NoCaseSpellChecker()
{
}

int NoCaseSpellChecker::compare(const char* const str1, int len1, const char* const str2, int len2) const
{
    return SpellChecker::compare(upcase(str1), len1, upcase(str2), len2);
}

bool NoCaseSpellChecker::hasWord(const string& word) const
{
    LOG(1, string("hasWord(") + word + ")");
    for (int i = 0; i < nWords_; ++i) {
        if (strcasecmp(words_[i], word.c_str()) == 0) {
            return true;
        }
    }
    return false;

    // Times of the different implementations:

    // time:  1:38 wallclock ( 0.00 usr  0.00 sys + 95.21 cusr  0.09 csys = 95.30 CPU)
    // return find_if(words_.begin(), words_.end(), ncseek(word)) != words_.end();

    // time:  2:41 wallclock ( 0.00 usr  0.00 sys + 152.96 cusr  0.19 csys = 153.15 CPU)
    // return find(lcwords_.begin(), lcwords_.end(), StringUtilities::toLower(word)) != lcwords_.end();

    // time:  4:52 wallclock ( 0.00 usr  0.00 sys + 270.30 cusr  0.22 csys = 270.52 CPU)
    // return find_if(words_.begin(), words_.end(), bind2nd(ptr_fun(stringncmp), word)) != words_.end();
}

void NoCaseSpellChecker::addWord(const string& word)
{
    lcwords_.push_back(StringUtilities::toLower(word));
    SpellChecker::addWord(word);
}
