/*
 Bare-bones sequence recoding.
 Codon-recoding will require genetic codes, and so knowledge of the taxon-specific codes.
 TODO: implement 'degen' coding.
*/

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cstring>
#include <getopt.h>

#include "utils.h"
#include "sequence.h"
#include "seq_reader.h"
#include "recode.h"
#include "log.h"
#include "citations.h"

void print_help ();
std::string get_version_line ();

void print_help () {
    std::cout << "Nucleotide sequence recoding." << std::endl;
    std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
    std::cout << std::endl;
    std::cout << "Usage: pxrecode [OPTIONS]..." << std::endl;
    std::cout << std::endl;
    std::cout << "Options:" << std::endl;
    std::cout << " -s, --seqf=FILE      input sequence file, STDIN otherwise" << std::endl;
    std::cout << " -r, --recode=STRING  string identifying recoding scheme (default: RY)" << std::endl;
    std::cout << "  Supported recodings (use any valid combination):" << std::endl;
    std::cout << "      R = A|G" << std::endl;
    std::cout << "      Y = C|T" << std::endl;
    std::cout << "      S = C|G" << std::endl;
    std::cout << "      W = A|T" << std::endl;
    std::cout << "      M = A|C" << std::endl;
    std::cout << "      K = G|T" << std::endl;
    std::cout << "      B = C|G|T" << std::endl;
    std::cout << "      D = A|G|T" << std::endl;
    std::cout << "      H = A|C|T" << std::endl;
    std::cout << "      V = A|C|G" << std::endl;
    std::cout << " -o, --outf=FILE      output sequence file, STOUT otherwise" << std::endl;
    std::cout << " -h, --help           display this help and exit" << std::endl;
    std::cout << " -V, --version        display version and exit" << std::endl;
    std::cout << " -C, --citation       display phyx citation and exit" << std::endl;
    std::cout << std::endl;
    std::cout << "Report bugs to: <https://github.com/FePhyFoFum/phyx/issues>" << std::endl;
    std::cout << "phyx home page: <https://github.com/FePhyFoFum/phyx>" << std::endl;
}

std::string get_version_line () {
    std::string vl = "pxrecode 1.3\n";
    vl += "Copyright (C) 2013-2021 FePhyFoFum\n";
    vl += "License GPLv3\n";
    vl += "Written by Joseph W. Brown";
    return vl;
}

static struct option const long_options[] =
{
    {"seqf", required_argument, nullptr, 's'},
    {"recode", required_argument, nullptr, 'r'},
    {"outf", required_argument, nullptr, 'o'},
    {"help", no_argument, nullptr, 'h'},
    {"version", no_argument, nullptr, 'V'},
    {"citation", no_argument, nullptr, 'C'},
    {nullptr, 0, nullptr, 0}
};

int main(int argc, char * argv[]) {
    
    log_call(argc, argv);
    
    bool outfileset = false;
    bool fileset = false;
    std::string recodescheme;
    char * outf = nullptr;
    char * seqf = nullptr;
    
    while (true) {
        int oi = -1;
        int c = getopt_long(argc, argv, "s:r:o:hVC", long_options, &oi);
        if (c == -1) {
            break;
        }
        switch(c) {
            case 's':
                fileset = true;
                seqf = strdup(optarg);
                check_file_exists(seqf);
                break;
            case 'r':
                recodescheme = strdup(optarg);
                break;
            case 'o':
                outfileset = true;
                outf = strdup(optarg);
                break;
            case 'h':
                print_help();
                exit(0);
            case 'V':
                std::cout << get_version_line() << std::endl;
                exit(0);
            case 'C':
                std::cout << get_phyx_citation() << std::endl;
                exit(0);
            default:
                print_error(*argv);
                exit(0);
        }
    }
    
    if (fileset && outfileset) {
        check_inout_streams_identical(seqf, outf);
    }
    
    // set default if arg not provided
    if (recodescheme.empty()) {
        recodescheme = "RY";
    }
    
    std::istream * pios = nullptr;
    std::ostream * poos = nullptr;
    std::ifstream * fstr = nullptr;
    std::ofstream * ofstr = nullptr;
    
    if (outfileset) {
        ofstr = new std::ofstream(outf);
        poos = ofstr;
    } else {
        poos = &std::cout;
    }
    
    if (fileset) {
        fstr = new std::ifstream(seqf);
        pios = fstr;
    } else {
        pios = &std::cin;
        if (!check_for_input_to_stream()) {
            print_help();
            exit(1);
        }
    }
    
    SequenceRecoder sr (recodescheme);
    
    Sequence seq;
    std::string retstring;
    
    int ft = test_seq_filetype_stream(*pios, retstring);
    int num_taxa, num_char; // not used, but required by some reader functions
    bool first = true; // check first seq alphabet to make sure DNA. exit otherwise
    std::string alpha;
    
    // extra stuff to deal with possible interleaved nexus
    if (ft == 0) {
        bool interleave = false;
        get_nexus_dimensions(*pios, num_taxa, num_char, interleave);
        retstring = ""; // need to do this to let seqreader know we are mid-file
        if (!interleave) {
            while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
                if (first) {
                    alpha = seq.get_alpha_name();
                    if (alpha != "DNA") {
                        std::cerr << "Error: this only works for DNA. Exiting." << std::endl;
                        exit(0);
                    }
                    first = false;
                }
                (*poos) << ">" << seq.get_id() << std::endl
                        << sr.get_recoded_seq(seq.get_sequence()) << std::endl;
            }
        } else {
            std::vector<Sequence> seqs = read_interleaved_nexus(*pios, num_taxa, num_char);
            for (const auto & sq : seqs) {
                seq = sq;
                if (first) {
                    alpha = seq.get_alpha_name();
                    if (alpha != "DNA") {
                        std::cerr << "Error: this only works for DNA. Exiting." << std::endl;
                        exit(0);
                    }
                    first = false;
                }
                (*poos) << ">" << seq.get_id() << std::endl
                        << sr.get_recoded_seq(seq.get_sequence()) << std::endl;
            }
        }
    } else {
        bool complicated_phylip = false;
        // check if we are dealing with a complicated phylip format
        if (ft == 1) {
            get_phylip_dimensions(retstring, num_taxa, num_char);
            complicated_phylip = is_complicated_phylip(*pios, num_char);
        }
        if (complicated_phylip) {
            std::vector<Sequence> seqs = read_phylip(*pios, num_taxa, num_char);
            for (const auto & sq : seqs) {
                seq = sq;
                if (first) {
                    alpha = seq.get_alpha_name();
                    if (alpha != "DNA") {
                        std::cerr << "Error: this only works for DNA. Exiting." << std::endl;
                        exit(0);
                    }
                    first = false;
                }
                (*poos) << ">" << seq.get_id() << std::endl
                        << sr.get_recoded_seq(seq.get_sequence()) << std::endl;
            }
        } else {
            // fasta, fastq, or simple phylip
            while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
                if (first) {
                    alpha = seq.get_alpha_name();
                    if (alpha != "DNA") {
                        std::cerr << "Error: this only works for DNA. Exiting." << std::endl;
                        exit(0);
                    }
                    first = false;
                }
                (*poos) << ">" << seq.get_id() << std::endl
                        << sr.get_recoded_seq(seq.get_sequence()) << std::endl;
            }
            // fasta has a trailing one
            if (ft == 2) {
                (*poos) << ">" << seq.get_id() << std::endl
                        << sr.get_recoded_seq(seq.get_sequence()) << std::endl;
            }
        }
    }
    
    if (fileset) {
        fstr->close();
        delete pios;
    }
    if (outfileset) {
        ofstr->close();
        delete poos;
    }
    return EXIT_SUCCESS;
}
