/*
  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer

  $Id: eval.cpp,v 1.6 2006/07/09 11:26:27 taku-ku Exp $;

  Copyright (C) 2001-2006 Taku Kudo <taku@chasen.org>
  Copyright (C) 2004-2006 Nippon Telegraph and Telephone Corporation

*/

#include <cstdio>
#include <iostream>
#include <map>
#include <fstream>
#include <vector>
#include "mecab.h"
#include "param.h"
#include "common.h"
#include "utils.h"

namespace MeCab {

  class Eval {
  private:
    static bool read(std::istream &is,
                     std::vector<std::vector<std::string> > &r,
                     std::vector<int> &level)
    {
      if (!is) return false;
      char buf[BUF_SIZE];
      char *col[2];
      char *cvs[BUF_SIZE];
      r.clear();
      while (is.getline(buf, sizeof(buf))) {
        if (std::strcmp(buf, "EOS") == 0) break;
        CHECK_DIE(tokenize(buf, "\t", col,  2) == 2) << "format error";
        cvs[0] = col[0];
        size_t n = tokenizeCSV(col[1], cvs + 1, sizeof(cvs) - 1);
        std::vector<std::string> tmp;
        for (size_t i = 0; i < level.size(); ++i) {
          size_t m = level[i] < 0 ? n - 1 : level[i];
          CHECK_DIE(m < n) << " out of range " << level[i];
          std::string output;
          for (size_t j = 0; j <= m; ++j) {
            output += cvs[j];
            if (j != 0) output += "\t";
          }
          tmp.push_back(output);
        }
        r.push_back(tmp);
      }

      return true;
    }

    static bool parseLevel(const char *level_str,
                           std::vector<int> &level)
    {
      char buf[BUF_SIZE];
      char *col[512];
      std::strncpy(buf, level_str, sizeof(buf));
      level.clear();
      size_t n = tokenize2(buf, "\t ", col, sizeof(col));
      for (size_t i = 0; i < n; ++i) {
        level.push_back(std::atoi(col[i]));
      }
      return true;
    }

    static void printeval(size_t c, size_t p, size_t r)
    {
      double pr = (p == 0) ? 0 : 100.0 * c/p;
      double re = (r == 0) ? 0 : 100.0 * c/r;
      double F = ((pr + re) == 0.0) ? 0 : 2 * pr * re / (pr + re);
      std::printf("%4.4f(%d/%d) %4.4f(%d/%d) %4.4f\n", pr, c, p, re, c, r, F);
    }

  public:
    static bool eval (int argc, char **argv)
    {
      static const MeCab::Option long_options[] =
        {
          { "level",  'l',  "0 -1",    "STR",    "set level of evaluations" },
          { "version",  'v',  0,   0,    "show the version and exit"   },
          { "help",  'h',  0,   0,    "show this help and exit."   },
          { 0, 0, 0, 0 }
        };

      MeCab::Param param;
      param.open(argc, argv, long_options);

      if (! param.open (argc, argv, long_options)) {
        std::cout << param.what () << "\n\n" <<  COPYRIGHT
                  << "\ntry '--help' for more information." << std::endl;
        return -1;
      }

      if (!param.help_version(long_options)) return 0;

      std::vector<std::string> files = param.rest_args();
      if (files.size() < 2) {
        std::cout << "Usage: " <<
          param.program_name() << " output answer" << std::endl;
        return -1;
      }

      std::string system = files[0];
      std::string answer = files[1];

      const char *level_str = param.getProfileString("level").c_str();

      std::ifstream ifs1 (files[0].c_str());
      std::ifstream ifs2 (files[1].c_str());

      CHECK_DIE(ifs1) << "no such file or directory: " << files[0].c_str();
      CHECK_DIE(ifs2) << "no such file or directory: " << files[0].c_str();
      CHECK_DIE(level_str) << "level_str is NULL";

      std::vector<int> level;
      parseLevel(level_str, level);
      CHECK_DIE(level.size()) << "level_str is empty: " << level_str;
      std::vector<size_t> result_tbl(level.size());
      std::fill(result_tbl.begin(), result_tbl.end(), 0);

      size_t prec = 0;
      size_t recall = 0;

      std::vector<std::vector<std::string> > r1;
      std::vector<std::vector<std::string> > r2;

      while (true) {

        if (! read(ifs1, r1, level) ||
            ! read(ifs2, r2, level))
          break;

        size_t i1 = 0;
        size_t i2 = 0;
        size_t p1 = 0;
        size_t p2 = 0;

        while (i1 < r1.size() && i2 < r2.size()) {
          if (p1 == p2) {
            for (size_t i = 0; i < result_tbl.size(); ++i) {
              if (r1[i1][i] == r2[i2][i]) {
                result_tbl[i]++;

              }
            }
            p1 += r1[i1][0].size();
            p2 += r2[i2][0].size();
            ++i1;
            ++i2;
            ++prec;
            ++recall;
          } else if (p1 < p2) {
            p1 += r1[i1][0].size();
            ++i1;
            ++recall;
          } else {
            p2 += r2[i2][0].size();
            ++i2;
            ++prec;
          }
        }

        while (i1 < r1.size()) {
          ++recall;
          ++i1;
        }

        while (i2 < r2.size()) {
          ++prec;
          ++i2;
        }
      }

      std::cout <<  "              precision          recall         F" << std::endl;
      for (size_t i = 0; i < result_tbl.size(); ++i) {
        if (level[i] == -1) {
          std::cout << "LEVEL ALL: ";
        } else {
          std::cout << "LEVEL " << level[i] << ":    ";
        }
        printeval(result_tbl[i], prec, recall);
      }

      return true;
    }
  };
}

// export
int mecab_system_eval(int argc, char **argv)
{
  return MeCab::Eval::eval(argc, argv);
}
