//---------------------------------------------------------
// Copyright 2015 Ontario Institute for Cancer Research
// Written by Jared Simpson (jared.simpson@oicr.on.ca)
//---------------------------------------------------------
//
// nanopolish_poremodel -- Representation of the Oxford
// Nanopore sequencing model, as described in a FAST5 file
//
#include "nanopolish_poremodel.h"
#include <fstream>
#include <sstream>
#include <cstring>
#include <bits/stl_algo.h>
#include "../fast5/src/fast5.hpp"

void PoreModel::bake_gaussian_parameters()
{
    scaled_params.resize(states.size());
    scaled_states.resize(states.size());

    for(unsigned i = 0; i < states.size(); ++i) {

        // as per ONT documents
        scaled_states[i].level_mean = states[i].level_mean * scale + shift;
        scaled_states[i].level_stdv = states[i].level_stdv * var;
        scaled_states[i].sd_mean = states[i].sd_mean * scale_sd;
        scaled_states[i].sd_lambda = states[i].sd_lambda * var_sd;
        scaled_states[i].update_sd_stdv();

        // for efficiency
        scaled_states[i].update_logs();

        // for compatibility
        scaled_params[i].mean = scaled_states[i].level_mean;
        scaled_params[i].stdv = scaled_states[i].level_stdv;
        scaled_params[i].log_stdv = scaled_states[i].level_log_stdv;
    }
    is_scaled = true;
}

void add_found_bases(char *known, const char *kmer) {
    char newbase[2];
    unsigned posn;
    newbase[1] = '\0';

    while ( (posn = strspn(kmer, known)) != strlen(kmer) ){
        newbase[0] = kmer[posn];
        strcat(known, newbase);
    }
    return;
}

PoreModel::PoreModel(const std::string filename, const Alphabet *alphabet) : is_scaled(false), pmalphabet(alphabet)
{
    model_filename = filename;
    std::ifstream model_reader(filename);
    std::string model_line;

    bool model_metadata_in_header = false;
    bool firstKmer = true;
    unsigned ninserted = 0;

    this->shift = 0.0;
    this->scale = 1.0;
    this->drift = 0.0;
    this->var = 1.0;
    this->scale_sd = 1.0;
    this->var_sd = 1.0;
    this->shift_offset = 0.0f;
    this->scale_offset = 0.0f;

    const size_t maxNucleotides = 50;
    char bases[maxNucleotides+1] = "";

    std::map<std::string, PoreModelStateParams> kmers;
    while (getline(model_reader, model_line)) {
        std::stringstream parser(model_line);

        // Extract the model name from the header
        if (model_line.find("#model_name") != std::string::npos) {
            std::string dummy;
            parser >> dummy >> this->name;
        }

        // Extract the strand from the header
        if (model_line.find("#strand") != std::string::npos) {
            std::string dummy;
            std::string in_strand;
            parser >> dummy >> in_strand;

            if(in_strand == "template") {
                this->metadata.model_idx = 0;
            } else if(in_strand == "complement.pop1") {
                this->metadata.model_idx = 1;
            } else if(in_strand == "complement.pop2") {
                this->metadata.model_idx = 2;
            } else {
                fprintf(stderr, "Error, unrecognized model strand %s for input file %s\n",
                    in_strand.c_str(), filename.c_str());
                exit(EXIT_FAILURE);
            }

            model_metadata_in_header = true;
        }

        // Extract the sequencing kit version from the header
        if (model_line.find("#kit") != std::string::npos) {
            std::string dummy;
            std::string in_kit;
            parser >> dummy >> in_kit;

            if(in_kit == "SQK006") {
                this->metadata.kit = KV_SQK007;
            } else if(in_kit == "SQK007") {
                this->metadata.kit = KV_SQK007;
            } else {
                fprintf(stderr, "Error, unrecognized model kit %s for input file %s\n",
                    in_kit.c_str(), filename.c_str());
                exit(EXIT_FAILURE);
            }
        }

        if (model_line.find("#type") != std::string::npos) {
            std::string dummy;
            parser >> dummy >> this->type;
        }

        // Extract shift/scale offset from the header
        // This will be applied to the per-read shift values
        // to allow switching between models with different averages
        if (model_line.find("#shift_offset") != std::string::npos) {
            std::string dummy;
            parser >> dummy >> this->shift_offset;
        }
        
        if (model_line.find("#scale_offset") != std::string::npos) {
            std::string dummy;
            parser >> dummy >> this->scale_offset;
        }

        // Use the alphabet defined in the header if available
        if (model_line.find("#alphabet") != std::string::npos) {
            std::string dummy;
            std::string alphabet_name;
            parser >> dummy >> alphabet_name;
            pmalphabet = get_alphabet_by_name(alphabet_name);
        }

        // skip the rest of the header
        if (model_line[0] == '#' || model_line.find("kmer") == 0) {
            continue;
        }

        std::string kmer;
        PoreModelStateParams params;

        // ig_lambda (R9), weight currently not read
        parser >> kmer >> params.level_mean >> params.level_stdv >> params.sd_mean >> params.sd_stdv;

        params.update_sd_lambda();
        params.update_logs();

        kmers[kmer] = params;
        add_found_bases(bases, kmer.c_str());

        if (firstKmer) {
            k = kmer.length();
            firstKmer = false;
        }
    }

    if(!model_metadata_in_header) {
        this->metadata = get_model_metadata_from_name(this->name);
    }

    if (pmalphabet == nullptr) 
        pmalphabet = best_alphabet(bases);

    assert( pmalphabet != nullptr );

    states.resize(pmalphabet->get_num_strings(k));
    for (const auto &iter : kmers ) {
        ninserted++;
        states[ pmalphabet->kmer_rank(iter.first.c_str(), k) ] = iter.second;
    }
    assert( ninserted == states.size() );

    is_scaled = false;
}

void PoreModel::write(const std::string filename, const std::string modelname) const
{
    std::string outmodelname = modelname;
    if(modelname.empty())
        outmodelname = name;

    std::ofstream writer(filename);
    writer << "#model_name\t" << outmodelname << std::endl;
    writer << "#type\t" << this->type << std::endl;
    writer << "#kit\t" << this->metadata.get_kit_name() << std::endl;
    writer << "#strand\t" << this->metadata.get_strand_model_name() << std::endl;
    writer << "#shift_offset\t" << this->shift_offset << std::endl;
    writer << "#scale_offset\t" << this->scale_offset << std::endl;

    std::string curr_kmer(k, this->pmalphabet->base(0));
    for(size_t ki = 0; ki < this->states.size(); ++ki) {
        writer << curr_kmer << "\t" << this->states[ki].level_mean << "\t" << this->states[ki].level_stdv << "\t"
               << this->states[ki].sd_mean << "\t" << this->states[ki].sd_stdv << std::endl;
        this->pmalphabet->lexicographic_next(curr_kmer);
    }
    writer.close();
}

void PoreModel::update_states( const PoreModel &other )
{
    k = other.k;
    pmalphabet = other.pmalphabet;
    shift += other.shift_offset;
    scale += other.scale_offset;
    update_states( other.states );
}

void PoreModel::update_states( const std::vector<PoreModelStateParams> &otherstates )
{
    states = otherstates;
    if (is_scaled) {
        bake_gaussian_parameters();
    }
}
