/***************************************************************************
                          codeparser.cpp  -  description
                             -------------------
    begin                : Die Jul 9 2002
    copyright            : (C) 2002 by Andr Simon
    email                : andre.simon1@gmx.de
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "codegenerator.h"

#include "htmlgenerator.h"
#include "xhtmlgenerator.h"
#include "rtfgenerator.h"
#include "latexgenerator.h"
#include "texgenerator.h"
#include "xslfogenerator.h"
#ifndef USE_WX_LIBRARY
  #include "ansigenerator.h"
#endif


using namespace std;

namespace highlight {

CodeGenerator* CodeGenerator::generator=NULL;

CodeGenerator* CodeGenerator::getInstance(OutputType type,
                                          const string& styleInfoPath,
                                          const string& styleInPath,
                                          const string& styleOutPath,
                                          bool includeStyle,
                                          bool attachAnchors,
                                          bool replaceQuotes,
                                          bool fopCompatible,
                                          int numSpaces,
                                          WrapMode lineWrappingMode,
                                          bool ln,
                                          bool lnz,
                                          bool fragment
                                          ) {
  if (generator==NULL){
     switch (type){
      case TEX:
        generator = new TexGenerator (styleInfoPath);
        break;
      case LATEX:
        generator = new LatexGenerator(styleInfoPath, replaceQuotes);
        break;
      case RTF:
        generator = new RtfGenerator (styleInfoPath);
        break;
      case XSLFO:
        generator = new XslFoGenerator(styleInfoPath, fopCompatible);
        break;
      case XHTML:
        generator = new XHtmlGenerator(styleInfoPath,
                                       styleInPath,
                                       styleOutPath,
                                       includeStyle,
                                       attachAnchors);
        break;
      #ifndef USE_WX_LIBRARY
      case ANSI:
        generator = new AnsiGenerator (styleInfoPath);
        break;
      #endif
      default:
        generator = new HtmlGenerator(styleInfoPath,
                                      styleInPath,
                                      styleOutPath,
                                      includeStyle,
                                      attachAnchors);
     }
  }

  generator->setNumberSpaces(numSpaces);
  generator->setPrintLineNumbers(ln);
  generator->setPrintZeroes(lnz);
  generator->setFragmentCode(fragment);
  generator->setLineWrapping(lineWrappingMode,
             (generator->getPrintLineNumbers())?80-LINE_NUMBER_WIDTH:80);
  return generator;
}

void CodeGenerator::deleteInstance(){
  delete generator;
  generator=NULL;
}


CodeGenerator::CodeGenerator():
    in(NULL),
    out(NULL),
    maskWs(false),
    excludeWs(false),
    fragmentOutput(false),
    showLineNumbers (false),
    lineNumberFillZeroes(false),
    lineNumber(0),
    numberSpaces(0),
    lineIndex(0),
    formatter(NULL),
    lineWrapper(NULL),
    formattingEnabled(false),
    formattingPossible(false)
{}

CodeGenerator::CodeGenerator(const string &colourTheme)
   :in(NULL),
    out(NULL),
    maskWs(false),
    excludeWs(false),
    fragmentOutput(false),
    showLineNumbers (false),
    lineNumberFillZeroes(false),
    lineNumber(0),
    stylePath(colourTheme),
    numberSpaces(0),
    lineIndex(0),
    formatter(NULL),
    lineWrapper(NULL),
    formattingEnabled(false),
    formattingPossible(false)
{
  line.reserve(100);
  docStyle.load(stylePath);
}

CodeGenerator::~CodeGenerator()
{
  delete lineWrapper;
  delete formatter;
}


/** Getter and Setter*/

void CodeGenerator::setPrintLineNumbers(bool flag){
  showLineNumbers=flag;
}

bool CodeGenerator::getPrintLineNumbers(){
  return showLineNumbers;
}

void CodeGenerator::setPrintZeroes(bool flag){
  lineNumberFillZeroes=flag;
}

bool CodeGenerator::getPrintZeroes(){
  return lineNumberFillZeroes;
}


void CodeGenerator::setFragmentCode(bool flag){
  fragmentOutput=flag;
}

bool CodeGenerator::getFragmentCode(){
  return fragmentOutput;
}

void CodeGenerator::setNumberSpaces(int num){
  numberSpaces=num;
}

int CodeGenerator::getNumberSpaces(){
  return numberSpaces;
}

void CodeGenerator::setStyleName(const string& s){
  stylePath=s;
}

const string& CodeGenerator::getStyleName(){
  return stylePath;
}

bool CodeGenerator::formattingDisabled(){
  return !formattingEnabled;
}

bool CodeGenerator::formattingIsPossible(){
  return formattingPossible;
}

void CodeGenerator::setLineWrapping(WrapMode lineWrappingStyle,
                                    unsigned int lineLength){
  if (lineWrappingStyle!=WRAP_DISABLED)  {
    lineWrapper=new LineWrapper(lineLength, lineWrappingStyle==WRAP_DEFAULT);
  }
}

WrapMode CodeGenerator::getLineWrapping(){
  if (lineWrapper==NULL) return WRAP_DISABLED;
  return (lineWrapper->indentCode()?WRAP_DEFAULT:WRAP_SIMPLE);
}

LanguageDefinition &CodeGenerator::getLanguage(){
  return langInfo;
}

void CodeGenerator::reset()
{
  lineIndex = lineNumber = 0;
  //showLineNumbers = lineNumberFillZeroes = fragmentOutput = false;
  line.clear();
}


/** sucht vorwaerts ab Position searchPos Ziffer in s und liefert Integerwert
der gefundenen Zahl zurueck.
Im SymbolString stehen die den einzelnen Symbolen zugeordneten Konstanten
immer HINTER diesen Symbolen*/
State CodeGenerator::getState(const string &s, unsigned int searchPos)
{
  unsigned int i= searchPos+1, result=0;

  // nach Ziffer in s suchen
  do {
      ++i;
  } while ((i<s.length()) && !isdigit(s[i])) ;

  // Zahl zusammensetzen
  while ((i<s.length()) && isdigit(s[i])){
    result = result *10 + (s[i]-'0');
    ++i;
  }
  return ((result)? (State)result:_UNKNOWN);
}

string CodeGenerator::getIdentifier()
{
  --lineIndex;
  unsigned int startPos=lineIndex;
  char c= line[lineIndex];

  while (    ( lineIndex < line.length()
          && (   StringTools::isAlpha(c)
              || isdigit(c))
              || isAllowedChar(c))
          )
    {
      ++lineIndex;
      c= line[lineIndex];
    }
  return string(line, startPos, lineIndex - startPos);
}

string CodeGenerator::getNumber()
{
  --lineIndex;
  unsigned int startPos=lineIndex;
  char c=line[lineIndex];

  while ( lineIndex < line.length() && (
          isdigit(c)
          // don't highlight methods applied on numbers as part of the number
          // i.e. Ruby: 3.xxx()
          || (c == '.' && isdigit(line[lineIndex+1]))
          // '-' is accepted as first character
          || (c == '-' && lineIndex == startPos)
          || (StringTools::isAlpha(c) && line[lineIndex-1]=='0')
          || (isxdigit(c) || c=='L' || c=='U' || c=='l' || c=='u') ))
    {
      ++lineIndex;
      c= line[lineIndex];
    }
  return string(line,startPos, lineIndex-startPos);
}

unsigned int CodeGenerator::getLineNumber()
{
  return lineNumber;
}

bool CodeGenerator::readNewLine(string &newLine){
  bool eof;
  terminatingChar=newLine[lineIndex-1];
  if (formattingPossible && formattingEnabled)
   {
     eof=!formatter->hasMoreLines();
      if (!eof)
       {
         newLine = formatter->nextLine();
       }
   }
   else  // reformatting not enabled
    {
      eof = ! getline( *in, newLine);
    }
   return eof;
}

unsigned char CodeGenerator::getInputChar()
{
  bool eol = lineIndex == line.length();

  if (eol) {
    bool eof=false;
    if (lineWrapper!=NULL){
       if (!lineWrapper->hasMoreLines()) {
          eof=readNewLine(line);
          lineWrapper->setLine(line);
       }
       line = lineWrapper->getNextLine();
    } else {
      eof=readNewLine(line);
    }
    lineIndex=0;
    ++lineNumber;
    line=StringTools::trimRight(line);
    return (eof)?'\0':'\n';
  }
  return line[lineIndex++];
}

State CodeGenerator::getCurrentState (bool lastStateWasNumber)
{
  unsigned char c;

  if (token.length()==0) {
    c=getInputChar();
  }  else {
    lineIndex-= (token.length()-1);
    c=token[0];
  }

  if (c=='\n'){
    return _EOL;   // End of line
  }

  if (c=='\0') {
    return _EOF;   // End of file
  }
  // replace tabs by spaces
  if (isspace(c))
    {
      if (c=='\t'){
         if (numberSpaces){
            for (int space=0; space < numberSpaces; space++){
              token += spacer;
            }
         } else {
           token=c;
         }
       } else {
          token=spacer;
       }
      return _WS;
    }

  // numbers have to be searched before using the symbolstring,
  // as numbers are part of this string
  if (isdigit(c)
      // recognize floats like .5
      || (c=='.' && isdigit(line[lineIndex]))
      // test if '-' belongs to a term like "1-2"
      || ((c == '-')
          && (!lastStateWasNumber)
          && isdigit(StringTools::getNextNonWs(line, lineIndex))) )
    {
      token = getNumber();
      return NUMBER;
    }

  unsigned int symbolLength;
  size_t symbolPos;
  bool found=false;
  string symbols=langInfo.getSymbolString();

  symbolPos = symbols.find(c);
  // search symbols (comment delimiters, directives etc.)
  // before keywords, because alphabetic chars may be part of symbols, too
  while ((symbolPos!= string::npos) && (!found))
    {
      symbolLength=symbols.find(' ', symbolPos)-symbolPos;
      token = symbols.substr(symbolPos, symbolLength);

      // TODO Ruby =ende, =end bugfix (whitespace after symbol needs to be checked)

      // Abfrage nach Leerzeichen in SymbolString verhindert falsches
      // Erkennen von Symbolteilen:
      if (lineIndex && token == line.substr(lineIndex-1, symbolLength)
                    //&& symbols[symbolPos-1] == ' '
                    && isspace(symbols[symbolPos-1])
                //    && isspace(line[lineIndex + symbolLength+1])

          ) {
         found = true;
         lineIndex += (symbolLength-1);
      } else {
        symbolPos = symbols.find_first_not_of(' ',symbols.find(' ',symbolPos));
      }
    }

  // dirty workaround stuff in here
  if (found) {
    State foundState = getState(symbols, symbolPos);

    // get the current keyword class id to apply the corresponding formatting style
    if (foundState==KEYWORD_BEGIN || foundState==TAG_BEGIN ) {
      currentKeywordClass=langInfo.getDelimPrefixClassID(token);
    }

    // Full line quotes must start in coloumn 1 (Fortran 77)
    if (langInfo.isFullLineComment() && foundState==SL_COMMENT){
      if (lineIndex==1) {
        return SL_COMMENT;
      }
    }
    // VHDL Workaround: distinguish string delimiters and event markers
    // (same eymbol: ')
    else if (langInfo.isVHDL()
             && foundState==STRING && currentState!=STRING
             && lineIndex > 1
             &&(isdigit(line[lineIndex-2]) || isalpha(line[lineIndex-2]))){
        c=line[lineIndex-1];
        // do not return, continue search...
    }  else {
        return foundState;
    }
  }

  // Alphanumerisches Token parsen und als Keyword oder Type erkennen
  if (StringTools::isAlpha(c) || langInfo.isPrefix(c))
    {
      if (langInfo.isPrefix(c)){
        token = c;
        ++lineIndex;
        token += getIdentifier();
      } else {
        token = getIdentifier();
      }
      string reservedWord=(langInfo.isIgnoreCase()) ?
                          StringTools::lowerCase(token):token;
      currentKeywordClass=langInfo.isKeyword(reservedWord);
      return (currentKeywordClass) ? KEYWORD : STANDARD;
    }

  // Character wurde nicht erkannt, in Standard-Zustand gehen
  token = c;
  return STANDARD;
}

string CodeGenerator::maskString(const string & s)
{
  ostringstream ss;
  for (unsigned int i=0;i< s.length();i++){
      ss << maskCharacter(s[i]);
  }
  return ss.str();
}

void CodeGenerator::printMaskedToken(bool flushWhiteSpace)
{
  if(flushWhiteSpace) flushWs();
  *out << maskString(token);
  token.clear();
}

bool CodeGenerator::isAllowedChar(char c)
{
  string allowedChars=langInfo.getAllowedChars();
  return ( allowedChars.find(c)!= string::npos);
}

bool CodeGenerator::styleFound(){
  return docStyle.found();
}

bool CodeGenerator::printExternalStyle(const string &outFile){
 return true;
}

bool CodeGenerator::printIndexFile(const vector<string> &fileList,
                                   const string &outPath){
  return true;
}

bool CodeGenerator::initIndentationScheme(const string &schemePath){

  if (formatter!=NULL){
    return true;
  }

  ConfigurationReader indentScheme(schemePath);
  if (indentScheme.found()){
    if (formatter==NULL) {
       formatter=new astyle::ASFormatter();

       string brackets=indentScheme.getParameter("brackets");
       if (!brackets.empty()){
         // Break brackets from pre-block code (i.e. ANSI C/C++ style).
         if (brackets=="break"){
           formatter->setBracketFormatMode(astyle::BREAK_MODE);
         }
         //Attach brackets to pre-block code (i.e. Java/K&R style).
         else if (brackets=="attach"){
           formatter->setBracketFormatMode(astyle::ATTACH_MODE);
         }
         // Break definition-block brackets and attach command-block brackets.
         else if (brackets=="linux"){
           formatter->setBracketFormatMode(astyle::BDAC_MODE);
         }
         // Break brackets before closing headers (e.g. 'else', 'catch', ..).
         // Should be appended to --brackets=attach or --brackets=linux.
         else if (brackets=="break-closing-headers"){
           formatter->setBreakClosingHeaderBracketsMode(true);
         }
       }

       string pad=indentScheme.getParameter("pad");
       if (!pad.empty()){
         //Insert space paddings around parenthesies only.
         if (pad=="paren"){
           formatter->setParenthesisPaddingMode(true);
         }
         // Insert space paddings around operators only.
         else if (pad=="oper"){
           formatter->setOperatorPaddingMode(true);
         }
         //Insert space paddings around operators AND parenthesies.
         else if (pad=="all"){
           formatter->setOperatorPaddingMode(true);
           formatter->setParenthesisPaddingMode(true);
         }
       }

       string oneLine=indentScheme.getParameter("one-line");
       if (!oneLine.empty()){
         // Don't break one-line blocks.
         if (oneLine=="keep-blocks"){
           formatter->setBreakOneLineBlocksMode(false);
         }
         // Don't break complex statements and multiple statements residing in a
         // single line.
         else if (oneLine=="keep-statements"){
           formatter->setSingleStatementsMode(false);
         }
       }

       // Insert empty lines around unrelated blocks, labels, classes, ...
       string breakBlocks=indentScheme.getParameter("break-blocks");
       if (!breakBlocks.empty()){
         if (breakBlocks=="all"){
           //Like --break-blocks, except also insert empty lines around closing
           //headers (e.g. 'else', 'catch', ...).
           formatter->setBreakClosingHeaderBlocksMode(true);
         }
         formatter->setBreakBlocksMode(true);
       }
       string trueVal="true";

       // Other options...

       //Indent using # spaces per indent. Not specifying # will result in a
       //default of 4 spaces per indent.
       string indentSpaces=indentScheme.getParameter("indent-spaces");

       // Indent a minimal # spaces in a continuous conditional belonging to a
       //conditional header.
       string minConditionalIndent=indentScheme.getParameter("min-conditional-indent");

       // Indent a maximal # spaces in a continuous statement, relatively to the
       // previous line.
       string maxInStatementIndent=indentScheme.getParameter("max-instatement-indent");

       // Add extra indentation to '{' and '}' block brackets.
       string indentBrackets=indentScheme.getParameter("indent-brackets");

       // Add extra indentation entire blocks (including brackets).
       string indentBlocks=indentScheme.getParameter("indent-blocks");

       // Indent the contents of namespace blocks.
       string indentNamespaces=indentScheme.getParameter("indent-namespaces");

       // Indent 'class' blocks, so that the inner 'public:','protected:' and
       // 'private: headers are indented inrelation to the class block.
       string indentClasses=indentScheme.getParameter("indent-classes");

       // Indent 'switch' blocks, so that the inner 'case XXX:' headers are
       // indented in relation to the switch block.
       string indentSwitches=indentScheme.getParameter("indent-switches");

       // Indent 'case XXX:' lines, so that they are flush with their bodies..
       string indentCases=indentScheme.getParameter("indent-cases");

       // Indent labels so that they appear one indent less than the current
       // indentation level, rather than being    flushed completely to the left
       // (which is the default).
       string indentLabels=indentScheme.getParameter("indent-labels");

       // Indent multi-line #define statements
       string indentPreprocessor=indentScheme.getParameter("indent-preprocessor");

       // Break 'else if()' statements into two different lines.
       string breakElseIfs = indentScheme.getParameter("break-elseifs");

       string javaStyle = indentScheme.getParameter("java-style");

       // default values in ASBeautifier are false, it is ok to set them false
       // if parameter does not exist in scheme file
       formatter->setBracketIndent(indentBrackets==trueVal);
       formatter->setBlockIndent(indentBlocks==trueVal);
       formatter->setNamespaceIndent(indentNamespaces==trueVal);
       formatter->setClassIndent(indentClasses==trueVal);
       formatter->setSwitchIndent(indentSwitches==trueVal);
       formatter->setCaseIndent(indentCases==trueVal);
       formatter->setLabelIndent(indentLabels==trueVal);
       formatter->setPreprocessorIndent(indentPreprocessor==trueVal);
       formatter->setBreakElseIfsMode(breakElseIfs==trueVal);

       if (javaStyle==trueVal){
         formatter->setJavaStyle();
       }

       if (!indentSpaces.empty()){
         formatter->setSpaceIndentation(StringTools::str2int(indentSpaces));
       }
       if (!minConditionalIndent.empty()){
         formatter->setMinConditionalIndentLength(
                      StringTools::str2int(minConditionalIndent));
       }
       if (!maxInStatementIndent.empty()){
         formatter->setMinConditionalIndentLength(
                      StringTools::str2int(maxInStatementIndent));
       }
    }
    formattingEnabled=(formatter != NULL);
    return true;
  } else {
    return false;
  }
}

LoadResult CodeGenerator::initLanguage(const string& langDefPath){
  bool reloadNecessary= langInfo.needsReload(langDefPath);
  if (reloadNecessary){
    bool failure = !langInfo.load(langDefPath);

    if (failure) {
      return LOAD_FAILED;
    }

    formattingPossible=langInfo.enableReformatting();

    if (styleTagOpen.size()>NUMBER_BUILTIN_STYLES){
       // remove dynamic keyword tag delimiters of the old language definition
       vector<string>::iterator keyStyleOpenBegin =
           styleTagOpen.begin() + NUMBER_BUILTIN_STYLES;
       vector<string>::iterator keyStyleCloseBegin =
           styleTagClose.begin()+ NUMBER_BUILTIN_STYLES;
       styleTagOpen.erase(keyStyleOpenBegin, styleTagOpen.end());
       styleTagClose.erase(keyStyleCloseBegin, styleTagClose.end());
    }
    // add new keyword delimiters
    for (unsigned int i=0;i< langInfo.getKeywordClasses().size(); i++){
      styleTagOpen.push_back(getMatchingOpenTag(i));
      styleTagClose.push_back(getMatchingCloseTag(i));
    }
  }
  return (reloadNecessary)?LOAD_NEW:LOAD_NONE;
}

ParseError CodeGenerator::printOutput (const string & inFileName,
                                       const string &outFileName)
{
  if (!docStyle.found()){
    return BAD_STYLE;
  }
  reset();

  ParseError error=PARSE_OK;

  in = (inFileName.empty()? &cin :new ifstream (inFileName.c_str()));
  if (!in->fail()) {
    out = (outFileName.empty()? &cout :new ofstream (outFileName.c_str()));
    if ( out->fail()){
      error=BAD_OUTPUT;
    }
  }

  if ( in->fail()){
     error=BAD_INPUT;
  }


  if (error==PARSE_OK) {
    if (formatter != NULL){
       formatter->init(new astyle::ASStreamIterator(in));
    }
    if (! fragmentOutput){
      *out << getHeader(inFileName);
    }
    printBody();
    if (! fragmentOutput){
      *out << getFooter();
    }
  }

  if (!outFileName.empty()){
    delete out; out=NULL;
  }
  if (!inFileName.empty()) {
    delete in; in=NULL;
  }
  return error;
}

unsigned int CodeGenerator::getStyleID(State s){
  if (s==KEYWORD){
    return NUMBER_BUILTIN_STYLES + currentKeywordClass-1;
  }
  return (unsigned int) s ;
}

void CodeGenerator::closeTag(State s){
  *out << styleTagClose[getStyleID(s)];
  flushWs();
  currentState=_UNKNOWN;
}

void CodeGenerator::openTag(State s){
  *out << styleTagOpen[getStyleID(s)];
  currentState=s;
}

///////////////////////////////////////////////////////////////////////////////

void CodeGenerator::processRootState()
{
  if (langInfo.highlightingDisabled()){
     string line;
     while (getline(*in, line)){
       *out << maskString(line) << getNewLine();
     }
     *out << flush;
     return;
  }

  State state=STANDARD;

  bool eof=false,
       firstLine=true; // avoid newline before printing the first output line
  openTag(STANDARD);
  do {
    // determine next state
    state= getCurrentState(state==NUMBER);
    // handle current state
    switch(state)
      {
      case KEYWORD:
      case KEYWORD_BEGIN:
        closeTag(STANDARD);
        eof=processKeywordState(state);
        openTag(STANDARD);
        break;
      case NUMBER:
        closeTag(STANDARD);
        eof=processNumberState();
        openTag(STANDARD);
        break;
      case ML_COMMENT_BEGIN:
        closeTag(STANDARD);
        eof=processMultiLineCommentState();
        openTag(STANDARD);
        break;
      case SL_COMMENT:
        closeTag(STANDARD);
        eof=processSingleLineCommentState();
        openTag(STANDARD);
        break;
      case STRING:
        closeTag(STANDARD);
        eof=processStringState(STANDARD);
        openTag(STANDARD);
        break;
      case DIRECTIVE_LINE:
        closeTag(STANDARD);
        eof=processDirectiveState();
        openTag(STANDARD);
        break;
      case TAG_BEGIN:
        closeTag(STANDARD);
        eof=processTagState();
        openTag(STANDARD);
        break;
      case ESC_CHAR_EXT:
        closeTag(STANDARD);
        eof=processEscapeCharState();
        openTag(STANDARD);
        break;
      case SYMBOL:
        closeTag(STANDARD);
        eof=processSymbolState();
        openTag(STANDARD);
        break;
      case _EOL:
        insertLineNumber(!firstLine);
        firstLine=false;
        break;
      case _EOF:
        eof=true;
        break;
      case _WS:
        processWsState();
        break;
      default:
        printMaskedToken();
        break;
      }
    }
  while (!eof);
  closeTag(STANDARD);
  *out << getNewLine();
  *out << flush;
}

bool CodeGenerator::processKeywordState(State myState){
  State newState=STANDARD;
  unsigned int myClassID=currentKeywordClass;
  bool eof=false,
       exitState=false;

  openTag(KEYWORD);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState();
    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case _EOF:
        eof = true;
        break;
      case KEYWORD_END:
         if (myState==KEYWORD_BEGIN){
           printMaskedToken();
         }
         exitState=true;
        break;
      default:
        exitState=    myState!=KEYWORD_BEGIN
                   &&((myClassID!=currentKeywordClass)||(myState!=newState));
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(KEYWORD);
  currentKeywordClass=0;
  return eof;
}

bool CodeGenerator::processNumberState(){
  State newState=STANDARD;
  bool eof=false,
       exitState=false;

  openTag(NUMBER);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState(true);
    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case _EOF:
        eof = true;
        break;
      default:
        exitState=newState!=NUMBER;
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(NUMBER);
  return eof;
}

bool CodeGenerator::processMultiLineCommentState()
{
  int commentCount=1;
  State newState=STANDARD;
  bool eof=false, exitState=false;

  openTag(ML_COMMENT_BEGIN);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState();

    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case _EOF:
        eof = true;
        break;
      case ML_COMMENT_BEGIN:
        if (langInfo.allowNestedMLComments()) {
              ++commentCount;
        }
        break;
      case ML_COMMENT_END:
        commentCount--;
        if (!commentCount){
            printMaskedToken();
            exitState=true;
        }
        break;
      default:
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(ML_COMMENT_BEGIN);
  return eof;
}

bool CodeGenerator::processSingleLineCommentState()
{
  State newState=STANDARD;
  bool eof=false, exitState=false;

  openTag(SL_COMMENT);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState();

    switch(newState)
      {
      case _WS:
        processWsState();
        break;
     case _EOL:
        printMaskedToken();
        exitState=true;
        insertLineNumber();
        break;
      case _EOF:
        eof = true;
        break;
      default:
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(SL_COMMENT);
  return eof;
}

bool CodeGenerator::processDirectiveState()
{
  State  newState=STANDARD;
  bool eof=false, exitState=false;

  openTag(DIRECTIVE_LINE);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState();
    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case DIRECTIVE_LINE_END:
        printMaskedToken();
        exitState=true;
        break;
      case _EOL:
        printMaskedToken();
        exitState=(terminatingChar!=langInfo.getContinuationChar());
        insertLineNumber();
        break;
      case ML_COMMENT_BEGIN:
        closeTag(DIRECTIVE_LINE);
        eof= processMultiLineCommentState();
        openTag(DIRECTIVE_LINE);
        break;
      case SL_COMMENT:
        closeTag(DIRECTIVE_LINE);
        eof= processSingleLineCommentState();
        openTag(DIRECTIVE_LINE);
        exitState=true;
        break;
      case STRING:
        closeTag(DIRECTIVE_LINE);
        eof=processStringState(DIRECTIVE_LINE);
        openTag(DIRECTIVE_LINE);
        break;
      case _EOF:
        eof = true;
        break;
      default:
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(DIRECTIVE_LINE);
  return eof;
}

bool CodeGenerator::processStringState(State oldState)
{
  State newState=STANDARD;
  bool eof=false, exitState=false;
  bool returnedFromOtherState=false;
  // Test if character before string open delimiter token equals to the
  // raw string prefix (Example: r" ", r""" """ in Python)
  bool isRawString=
         line[lineIndex-token.length()-1]==langInfo.getRawStringPrefix();

  string openStringDelimiter=token;

  State myState= (oldState==DIRECTIVE_LINE) ? DIRECTIVE_STRING : STRING;
  openTag(myState);
  do {
    // true if last token was an escape char
    if (!returnedFromOtherState) {
         printMaskedToken(newState!=_WS);
    }
    returnedFromOtherState=false;
    newState= getCurrentState();

    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case ML_COMMENT_END:
        printMaskedToken();
        break;
      case STRING:
        exitState= openStringDelimiter==token;
        printMaskedToken();
        break;
      case ESC_CHAR:
        if (!isRawString){
           closeTag(myState);
           eof=processEscapeCharState();
           openTag(myState);
           returnedFromOtherState=true;
        }
        break;
      case _EOF:
        eof = true;
        break;
      default:
        printMaskedToken();
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(myState);
  return eof;
}

bool CodeGenerator::processTagState()
{
  State  newState=STANDARD;
  bool eof=false, exitState=false, returnedFromOtherState=false;
  unsigned int myKeywordClass=currentKeywordClass;

  openTag(KEYWORD);
  do {
    if (!returnedFromOtherState) {
       printMaskedToken(newState!=_WS);
    }
    returnedFromOtherState = false;
    newState= getCurrentState();

    switch(newState)
      {
      case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case TAG_END:
        printMaskedToken();
        exitState=true;
        break;
      case STRING:
        closeTag(KEYWORD);
        eof=processStringState(KEYWORD);
        currentKeywordClass=myKeywordClass;
        openTag(KEYWORD);
        returnedFromOtherState = true;
        break;
      case ESC_CHAR:
        closeTag(KEYWORD);
        eof=processEscapeCharState();
        currentKeywordClass=myKeywordClass;
        openTag(KEYWORD);
        returnedFromOtherState = true;
        break;
      case NUMBER:
        closeTag(KEYWORD);
        eof=processNumberState();
        currentKeywordClass=myKeywordClass;
        openTag(KEYWORD);
        returnedFromOtherState = true;
        break;
      case _EOF:
        eof = true;
        break;
      default:
        printMaskedToken();
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(KEYWORD);
  currentKeywordClass=0;

  return eof;
}

bool CodeGenerator::processSymbolState(){

  State newState=STANDARD;
  bool eof=false,
       exitState=false;

  openTag(SYMBOL);
  do {
    printMaskedToken(newState!=_WS);
    newState= getCurrentState(true);
    switch(newState)
      {
     case _WS:
        processWsState();
        break;
      case _EOL:
        insertLineNumber();
        break;
      case _EOF:
        eof = true;
        break;
      default:
        exitState=newState!=SYMBOL;
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(SYMBOL);
  return eof;
}

bool CodeGenerator::processEscapeCharState()
{
  State newState=STANDARD;
  bool eof=false, exitState=false;

  openTag(ESC_CHAR);
  do {
    printMaskedToken(newState!=_WS);
    skipEscapeSequence();
    newState= getCurrentState();
    switch(newState)
      {
      case _EOL:
        insertLineNumber();
        exitState=true;
        break;
      case _WS:
        processWsState();
        lineIndex--;
        break;
      case _EOF:
        eof = true;
        break;
      default:
        exitState=newState!=ESC_CHAR && newState!=ESC_CHAR_EXT;
        break;
      }
  } while ((!exitState) && (!eof));

  closeTag(ESC_CHAR);
  return eof;
}

void  CodeGenerator::skipEscapeSequence(){
  if (lineIndex<line.length()){
    char c=line[lineIndex];
    int charsToSkip=1;
    // Escape Sequenz /ooo Oktal, /x000 hex, /u00xx Java unicode
    if (isdigit(c) ){
      // \0 abfangen
      while ( isdigit(line[lineIndex+charsToSkip]) && charsToSkip<4) {
        ++charsToSkip;
      }
    } else if (tolower(c)=='x'){
      charsToSkip=langInfo.isJava() ? 4 : 3;
    } else if (tolower(c)=='u'){
      charsToSkip=5;
    }
    while (charsToSkip-- && lineIndex++<line.length()){
       *out <<maskCharacter(line[lineIndex-1]);
    }
  }
}


void CodeGenerator::processWsState()
{
  if (!maskWs) {
    wsBuffer += token;
    token.clear();
    return;
  }

  // TODO : Move block to start of method;
  // avoid escaping of single whitespace in LaTeX/TeX
  int wscnt=0;
  lineIndex--;
  while (isspace(line[lineIndex])) {
    wscnt+=(line[lineIndex]==' ') ? 1 : numberSpaces;
    ++lineIndex;
  }
  // TODO end

  unsigned int styleID=getStyleID(currentState);
  if (excludeWs && styleID!=_UNKNOWN) {
     *out << styleTagClose[styleID];
  }
  if (wscnt>1){
    *out << maskWsBegin;
  }
  for (int i=0;i<wscnt;i++){
    *out << spacer;
  }
  if (wscnt>1){
    *out << maskWsEnd;
  }
  if (excludeWs && styleID!=_UNKNOWN){
     *out << styleTagOpen[styleID];
  }
  token.clear();
}

void CodeGenerator::flushWs(){
   *out<<wsBuffer;
   wsBuffer.clear();
}

bool CodeGenerator::isFirstNonWsChar() {
  unsigned int i=lineIndex-1;
  while (i--){
    if (!isspace(line[i])){
      return false;
    }
  }
  return true;
}

string CodeGenerator::getNewLine(){
  return newLineTag;
}

void CodeGenerator::insertLineNumber(bool insertNewLine) {
  if (insertNewLine){
    *out << getNewLine();
  }
  if (showLineNumbers) {
    ostringstream os;
    if (lineNumberFillZeroes) {
      os.fill('0');
    }
    os <<setw(LINE_NUMBER_WIDTH) << right << lineNumber;

    unsigned int styleID=getStyleID(currentState);

    if (styleID!=_UNKNOWN){
      *out << styleTagClose[styleID];
    }
    *out << styleTagOpen[LINENUMBER]
         << maskString(os.str()) << spacer
         << styleTagClose[LINENUMBER];

    if (styleID!=_UNKNOWN){
      *out << styleTagOpen[styleID];
    }
  }
}

unsigned int  CodeGenerator::getLineIndex(){
  return lineIndex;
}

}
