///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// $Id: MimeMessageReader.cc,v 1.22 2003/08/30 21:42:24 bburton Exp $
//
// Copyright (C) 2000 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//

#include "util.h"
#include "RegularExpression.h"
#include "MimeMessageReader.h"

MimeMessageReader::MimeMessageReader(istream &in,
                                     bool ignore_from,
                                     bool ignore_content_length)
  : m_reader(in),
    m_ignoreFrom(ignore_from),
    m_ignoreContentLength(ignore_content_length),
    m_atEnd(false),
    m_haveContentLength(false),
    m_contentLength(0)
{
  m_headers.push_back(MimeHeader());
}

MimeMessageReader::~MimeMessageReader()
{
}

bool MimeMessageReader::readNextHeader()
{
  if (m_headers.size() != 1) {
    if (is_debug) {
      cerr << "WARNING: mime reader confused: found "
           << m_headers.size()
           << " headers when expecting only 1"
           << endl;
    }
    if (m_headers.size() > 1) {
      m_headers.erase(m_headers.begin() + 1, m_headers.end());
    }
  }

  assert(m_headers.size() == 1);
  if (!outer().read(m_reader, false, true)) {
    return false;
  }

  m_atEnd = false;

  if (is_debug) {
    cerr << "** MIME? " << outer().isMime()
         << " MULTI? " << outer().isMultiPart()
         << endl;
  }

  string content_length;
  if (!m_ignoreContentLength && outer().getField("content-length", content_length)) {
    m_haveContentLength = true;
    m_contentLength = max(0, atoi(content_length.c_str()) - 10);
  } else {
    m_haveContentLength = false;
    m_contentLength = 0;
  }
  m_reader.resetCounter();

  if (is_debug && m_haveContentLength) {
    cerr << "USING CONTENT-LENGTH " << m_contentLength << endl;
  }

  m_md5.start();

  if (outer().isMultiPart()) {
    return readToBoundary();
  }

  return true;
}

bool MimeMessageReader::getCharSet(string &value)
{
  value = outer().getCharSet();
  return value.length() > 0;
}

bool MimeMessageReader::getField(const string &name,
                                 string &value)
{
  return outer().getField(name, value);
}

int MimeMessageReader::getFieldCount()
{
  return outer().getFieldCount();
}

bool MimeMessageReader::getField(int index,
                                 string &value)
{
  return outer().getField(index, value);
}

bool MimeMessageReader::getFieldName(int index,
                                     string &name)
{
  return outer().getFieldName(index, name);
}

bool MimeMessageReader::getField(const string &name,
                                 vector<string> &value)
{
  return outer().getField(name, value);
}

void MimeMessageReader::addStringToDigest(const string &value)
{
  if (value.length() > 0) {
    if (is_debug) {
      cerr << "DIGEST: " << value << endl;
    }
    m_md5.add(value);
  }
}

bool MimeMessageReader::readNonMimeBody(string &text)
{
  assert(m_headers.size() == 1);

  bool prev_blank = true;
  while (m_reader.readLine()) {
    if (m_haveContentLength) {
      if (m_reader.getCount() >= m_contentLength) {
        m_haveContentLength = false;
      }
    }
    if (!m_haveContentLength && !m_ignoreFrom && MimeHeader::isFromLine(prev_blank, m_reader.getLine())) {
      break;
    }

    addStringToDigest(m_reader.getLine());

    text += m_reader.getLine();
    text += "\n";
    prev_blank = (m_reader.getLine().length() == 0);
  }

  m_atEnd = true;
  return true;
}

bool MimeMessageReader::readToBoundary(string *textp)
{
  assert(m_headers.size() > 0);
  assert(current().isMultiPart());

  bool prev_blank = true;
  while (true) {
    if (!m_reader.readLine()) {
      m_atEnd = true;
      return true;
    }

    if (m_haveContentLength) {
      if (m_reader.getCount() >= m_contentLength) {
        m_haveContentLength = false;
      }
    }
    if (!m_haveContentLength && !m_ignoreFrom && MimeHeader::isFromLine(prev_blank, m_reader.getLine())) {
      m_atEnd = true;
      return true;
    }

    addStringToDigest(m_reader.getLine());

    if (starts_with(m_reader.getLine(), current().getBoundary())) {
      if (is_debug) {
        cerr << "** FOUND " << m_reader.getLine() << endl;
      }
      return true;
    }

    if (textp) {
      *textp += m_reader.getLine();
      *textp += '\n';
    } else {
      if (is_debug) {
        cerr << "** SKIPPING " << m_reader.getLine() << endl;
      }
    }

    prev_blank = (m_reader.getLine().length() == 0);
  }
}

void MimeMessageReader::addFieldToDigest(const string &field_name)
{
  vector<string> values;
  if (outer().getField(field_name, values)) {
    for (vector<string>::const_iterator i = values.begin(); i != values.end(); ++i) {
      addStringToDigest(*i);
    }
  }
}

const string &MimeMessageReader::getMD5Digest()
{
  if (m_md5.isRunning()) {
    addFieldToDigest("date");
    addFieldToDigest("from");
    addFieldToDigest("to");
    addFieldToDigest("subject");
    m_md5.stop();
  }
  return m_md5.asString();
}

bool MimeMessageReader::readText(string &text,
                                 string &content_type)
{
  assert(m_headers.size() > 0);

  text.erase();
  content_type.erase();

  if (m_atEnd) {
    return false;
  }

  if (!current().isMultiPart()) {
    if (!readNonMimeBody(text)) {
      return false;
    }
    current().getField("content-type", content_type);
    if (is_debug) {
      cerr << "CONTENT-TYPE: " << content_type << endl;
    }
    if (current().isQuoted()) {
      unquoteText(text, false);
    } else if (current().isBase64()) {
      decodeText(text);
    }
    return true;
  }

  // The current line in the reader should be a boundary of the current
  // header that we skipped to previously.
  assert(starts_with(m_reader.getLine(), current().getBoundary()));

  if (starts_with(m_reader.getLine(), current().getTerminator())) {
    if (m_headers.size() == 1) {
      // skip junk after closing boundary
      string ignored;
      readNonMimeBody(ignored);
      return false;
    }
    m_headers.pop_back();
    return readToBoundary() && readText(text, content_type);
  }

  MimeHeader part_header;
  if (!part_header.read(m_reader, true, m_haveContentLength || m_ignoreFrom)) {
    return false;
  }

  if (part_header.isMultiPart()) {
    // a nested multipart, push it onto the stack and then read it's first part
    m_headers.push_back(part_header);
    return readToBoundary() && readText(text, content_type);
  }

  if (part_header.isMessage()) {
    // a nested message - is it mime or text?
    MimeHeader message_header;
    if (!message_header.read(m_reader, true, m_haveContentLength || m_ignoreFrom)) {
      return false;
    }
    if (message_header.isMultiPart()) {
      m_headers.push_back(message_header);
      return readToBoundary() && readText(text, content_type);
    }
  }

  part_header.getField("content-type", content_type);
  if (is_debug) {
    cerr << "CONTENT-TYPE: " << content_type << endl;
  }

  if (part_header.isMime() && !part_header.isText()) {
    // skipping over non-text mime part but return its content-type so
    // that it can be added as terms
    return readToBoundary();
  }

  readToBoundary(&text);

  if (part_header.isQuoted()) {
    unquoteText(text, false);
  } else if (part_header.isBase64()) {
    decodeText(text);
  }

  return true;
}

MimeHeader &MimeMessageReader::outer()
{
  assert(m_headers.size() > 0);
  return m_headers[0];
}

MimeHeader &MimeMessageReader::current()
{
  assert(m_headers.size() > 0);
  return m_headers[m_headers.size() - 1];
}

static unsigned char char_value(unsigned char ch)
{
  if (ch >= '0' && ch <= '9') {
    return ch - '0';
  }

  if (ch >= 'a' && ch <= 'f') {
    return 10 + ch - 'a';
  }

  if (ch >= 'A' && ch <= 'F') {
    return 10 + ch - 'A';
  }

  return 0;
}

void MimeMessageReader::unquoteText(string &text,
                                    bool is_header)
{
  string new_text;
  for (const char *s = text.c_str(); *s; ++s) {
    if (*s == '=') {
      if (s[1] == '\n') {
        // soft line break - ignore the newline
        ++s;
      } else if (is_xdigit(s[1]) && is_xdigit(s[2])) {
        new_text += safe_char(char_value(s[1]) << 4 | char_value(s[2]));
        s += 2;
      }
    } else if (*s == '_' && is_header) {
      new_text += ' ';
    } else {
      new_text += *s;
    }
  }
  text = new_text;
}

static const int BASE64_CHARS[256] = {
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
};

inline bool next_char64(const char *&s,
                        char &ch,
                        unsigned char &value)
{
  while (*s) {
    if (*s == '=') {
      ch = *s++;
      return true;
    }

    unsigned int index = (unsigned)*s;
    if (BASE64_CHARS[index] >= 0) {
      ch = *s++;
      value = BASE64_CHARS[index];
      return true;
    }

    ++s;
  }

  return false;
}

void MimeMessageReader::decodeText(string &text)
{
  string new_text;
  char c1, c2, c3, c4;
  unsigned char u1, u2, u3, u4;
  const char *s = text.c_str();
  while (next_char64(s, c1, u1) && next_char64(s, c2, u2) &&
         next_char64(s, c3, u3) && next_char64(s, c4, u4))
  {
    if (c3 != '=' && c4 != '=') {
      // common case of 3 octets
      new_text += safe_char((u1 << 2) | (u2 >> 4));           // 6 + 2
      new_text += safe_char(((u2 & 0x0f) << 4) | (u3 >> 2));  // 4 + 4
      new_text += safe_char(((u3 & 0x03) << 6) | u4);         // 2 + 6
    } else if (c3 == '=') {
      // padded case with 1 octet
      new_text += safe_char((u1 << 2) | (u2 >> 4));           // 6 + 2
    } else {
      // padded case with 2 octets
      new_text += safe_char((u1 << 2) | (u2 >> 4));           // 6 + 2
      new_text += safe_char(((u2 & 0x0f) << 4) | (u3 >> 2));  // 4 + 4
    }
  }
  text = new_text;
}


void MimeMessageReader::decodeHeader(string &text)
{
  static const string token_expr("[^] \t()<>@,;:\"/[?.=]+");
  static const string encoded_word_expr = string("=\\?(") + token_expr + ")\\?(" + token_expr + ")\\?([^\\?]+)\\?=";

  if (is_debug) {
    cerr << "decodeHeader: before: " << text << endl;
  }

  bool changed = false;
  RegularExpression::MatchData whole_match;
  string whole_string, charset, method, encoded_word;
  RegularExpression encoded_word_regex(encoded_word_expr, 4, false, true);
  while (encoded_word_regex.match(text.c_str())) {
    encoded_word_regex.getMatch(0, whole_match);
    encoded_word_regex.getMatch(2, method);
    encoded_word_regex.getMatch(3, encoded_word);

    method = trim(to_lower(method));
    if (method == "q") {
      unquoteText(encoded_word, true);
    } else if (method == "b") {
      decodeText(encoded_word);
    } else {
      // have to modify string to prevent regex from matching again
      encoded_word_regex.getMatch(1, charset);
      string new_string = charset + " " + method + " " + encoded_word;
      encoded_word = new_string;
    }
    text.replace(whole_match.start_pos, whole_match.end_pos - whole_match.start_pos,
                 encoded_word);
    changed = true;
  }

  if (is_debug) {
    if (changed) {
      cerr << "decodeHeader: after: " << text << endl;
    } else {
      cerr << "decodeHeader: no change" << endl;
    }
  }
}
