/* Token-reader for Bison's input parser,
   Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.

This file is part of Bison, the GNU Compiler Compiler.

Bison is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

Bison is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Bison; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */


/* 
   lex() is the entry point.  It is called from reader.c.
   It returns one of the token-type codes defined in lex.h.
   When an identifier is seen, the code IDENTIFIER is returned
   and the name is looked up in the symbol table using symtab.c;
   symval is set to a pointer to the entry found.  */

#include <ctype.h>
#include "system.h"

#include "new.h"
#include "declarations.h"

using namespace std;

namespace {
    /* Allocated size of token_buffer, not including space for terminator.  */
int maxtoken;
int unlexed;     /* these two describe a token to be reread */
bucket *unlexed_symval;  /* by the next call to lex */

//typedef pair<char const *, int> Element;
//
//class Comparator
//{
//    string const &d_target;
//    public:
//        Comparator (string const &target)
//        :
//            d_target(target)
//        {}
//        bool operator()(Element const &element)
//        {
//            return d_target == element.first;
//        }
//};
//
//Element defineSymbols[] =
//{
//    Element("baseclass-header", BASECLASS_HEADER),
//    Element("baseclass-preinclude", BASECLASS_PREINCLUDE),
//    Element("class-header", CLASS_HEADER),
//    Element("debug", DEBUGFLAG),
//    Element("error-verbose", ERROR_VERBOSE),
//    Element("expect", EXPECT),
//    Element("filenames", FILENAMES),
//    Element("implementation-header", IMPLEMENTATION_HEADER),
//    Element("left", LEFT),
//    Element("locationstruct", LOCATIONSTRUCT),
//    Element("lsp-needed", LSP_NEEDED),
//    Element("ltype", LTYPE),
//    Element("namespace", NAMESPACE),
//    Element("nonassoc", NONASSOC),
//    Element("binary", NONASSOC),
//    Element("nterm", NTERM),
//    Element("parsefun-source", PARSEFUN_SOURCE),
//    Element("class-name", CLASS_NAME),
//    Element("prec", PREC),
//    Element("right", RIGHT),
//    Element("start", START),
//    Element("stype", STYPE),
//    Element("term", TOKEN),
//    Element("token", TOKEN ),
//    Element("type", TYPE),
//    Element("union", UNION),
//};
//size_t/*unsigned*/ const nDefineSymbols = sizeof(defineSymbols) / sizeof(Element);
//
//char *grow_token_buffer(char* p)
//{
//  int offset = p - token_buffer;
//  maxtoken *= 2;
//    char *tmp = strcpy(new char[maxtoken + 1], token_buffer);
//    delete [] token_buffer;
//    return (token_buffer = tmp) + offset;
//}
//
//
//} // anonymous namespace ends
//
//
//
//int skip_white_space()
//{
//  register int c;
//  register int inside;
//
//  c = getc(finput);
//
//  for (;;)
//    {
//      int cplus_comment;
//
//      switch (c)
//    {
//    case '/':
//      c = getc(finput);
//      if (c != '*' && c != '/')
//        fatals("unexpected `/%c' found", (void*) c);
//      cplus_comment = (c == '/');
//
//      c = getc(finput);
//
//      inside = 1;
//      while (inside)
//        {
//          if (!cplus_comment && c == '*')
//        {
//          while (c == '*')
//            c = getc(finput);
//
//          if (c == '/')
//            {
//              inside = 0;
//              c = getc(finput);
//            }
//        }
//          else if (c == '\n')
//        {
//          lineno++;
//          if (cplus_comment)
//            inside = 0;
//          c = getc(finput);
//        }
//          else if (c == EOF)
//        fatal("unterminated comment");
//          else
//        c = getc(finput);
//        }
//
//      break;
//
//    case '\n':
//      lineno++;
//
//    case ' ':
//    case '\t':
//    case '\f':
//      c = getc(finput);
//      break;
//
//    default:
//      return (c);
//    }
//    }
//}
//
void unlex(int token)
{
  unlexed = token;
  unlexed_symval = symval;
}

int lex()
{
  register int c;
  register char *p;

//  if (unlexed >= 0)
//    {
//      symval = unlexed_symval;
//      c = unlexed;
//      unlexed = -1;
//      return (c);
//    }

  c = skip_white_space();

  switch (c)
    {
    case EOF:
      return (ENDFILE);

//    case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
//    case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
//    case 'K':  case 'L':  case 'M':  case 'N':  case 'O':
//    case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
//    case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
//    case 'Z':
//    case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
//    case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
//    case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
//    case 'p':  case 'q':  case 'r':  case 's':  case 't':
//    case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
//    case 'z':
//    case '.':  case '_':
//      p = token_buffer;
//      while (isalnum(c) || c == '_' || c == '.')
//    {
//      if (p == token_buffer + maxtoken)
//        p = grow_token_buffer(p);
//
//      *p++ = c;
//      c = getc(finput);
//    }
//
//      *p = 0;
//      ungetc(c, finput);
//      symval = getsym(token_buffer);
//      return (IDENTIFIER);

//    case '0':  case '1':  case '2':  case '3':  case '4':
//    case '5':  case '6':  case '7':  case '8':  case '9':
//      {
//    numval = 0;
//
//    while (isdigit(c))
//      {
//        numval = numval*10 + c - '0';
//        c = getc(finput);
//      }
//    ungetc(c, finput);
//    return (NUMBER);
//      }

    case '\'':
      translations = -1;

      /* parse the literal token and compute character code in  code  */

      c = getc(finput);
      {
    register int code = 0;

    if (c == '\\')
      {
        c = getc(finput);

        if (c <= '7' && c >= '0')
          {
        while (c <= '7' && c >= '0')
          {
            code = (code * 8) + (c - '0');
            c = getc(finput);
            if (code >= 256 || code < 0)
              fatals("malformatted literal token `\\%03o'", (void*) code);
          }
          }
        else
          {
        if (c == 't')
          code = '\t';
        else if (c == 'n')
          code = '\n';
        else if (c == 'a')
          code = '\007';
        else if (c == 'r')
          code = '\r';
        else if (c == 'f')
          code = '\f';
        else if (c == 'b')
          code = '\b';
        else if (c == 'v')
          code = 013;
        else if (c == 'x')
          {
            c = getc(finput);
            while ((c <= '9' && c >= '0')
               || (c >= 'a' && c <= 'z')
               || (c >= 'A' && c <= 'Z'))
              {
            code *= 16;
            if (c <= '9' && c >= '0')
              code += c - '0';
            else if (c >= 'a' && c <= 'z')
              code += c - 'a' + 10;
            else if (c >= 'A' && c <= 'Z')
              code += c - 'A' + 10;
            if (code >= 256 || code<0)/* JF this said if(c>=128) */
              fatals("malformatted literal token `\\x%x'",(void*) code);
            c = getc(finput);
              }
            ungetc(c, finput);
          }
        else if (c == '\\')
          code = '\\';
        else if (c == '\'')
          code = '\'';
        else if (c == '\"') /* JF this is a good idea */
          code = '\"';
        else
          {
            if (c >= 040 && c <= 0177)
              fatals ("unknown escape sequence `\\%c'", (void*) c);
            else
              fatals ("unknown escape sequence: `\\' followed by char code 0x%x", (void*) c);
          }

        c = getc(finput);
          }
      }
    else    // non-quoted backslash
      {
        code = c;
        c = getc(finput);
      }
    if (c != '\'')
      fatal("multicharacter literal tokens not supported");

    /* now fill token_buffer with the canonical name for this character
       as a literal token.  Do not use what the user typed,
       so that '\012' and '\n' can be interchangeable.  */

    p = token_buffer;
    *p++ = '\'';
    if (code == '\\')
      {
        *p++ = '\\';
        *p++ = '\\';
      }
    else if (code == '\'')
      {
        *p++ = '\\';
        *p++ = '\'';
      }
    else if (code >= 040 && code != 0177)
      *p++ = code;
    else if (code == '\t')
      {
        *p++ = '\\';
        *p++ = 't';
      }
    else if (code == '\n')
      {
        *p++ = '\\';
        *p++ = 'n';
      }
    else if (code == '\r')
      {
        *p++ = '\\';
        *p++ = 'r';
      }
    else if (code == '\v')
      {
        *p++ = '\\';
        *p++ = 'v';
      }
    else if (code == '\b')
      {
        *p++ = '\\';
        *p++ = 'b';
      }
    else if (code == '\f')
      {
        *p++ = '\\';
        *p++ = 'f';
      }
        else
      {
        *p++ = code / 0100 + '0';
        *p++ = ((code / 010) & 07) + '0';
        *p++ = (code & 07) + '0';
      }
    *p++ = '\'';
    *p = 0;
    symval = getsym(token_buffer);
    symval->internalClass = STOKEN;
    if (! symval->user_token_number)
      symval->user_token_number = code;
    return (IDENTIFIER);
      }

/////////////////////////////////////////////////////
    case ',':
      return (COMMA);

    case ':':
      return (COLON);

    case ';':
      return (SEMICOLON);

    case '|':
      return (BAR);

    case '{':
      return (LEFT_CURLY);

    case '=':
      do
    {
      c = getc(finput);
      if (c == '\n') lineno++;
    }
      while(c==' ' || c=='\n' || c=='\t');

      if (c == '{')
        return(LEFT_CURLY);
      else
    {
      ungetc(c, finput);
      return(ILLEGAL);
    }

    case '<':
      p = token_buffer;
      c = getc(finput);
      while (c != '>')
    {
      if (c == '\n' || c == EOF)
        fatal("unterminated type name");

      if (p == token_buffer + maxtoken)
        p = grow_token_buffer(p);

      *p++ = c;
      c = getc(finput);
    }
      *p = 0;
      return (TYPENAME);
        

    case '%':
      return (parse_percent_token());

    default:
      return (ILLEGAL);
    }
}



/* parse a token which starts with %.  Assumes the % has already been read and discarded.  */

//int parse_percent_token ()
//{
//  register int c;
//  register char *p;
//
//  p = token_buffer;
//  c = getc(finput);
//
//  switch (c)
//    {
//    case '%':
//      return (TWO_PERCENTS);
//
//    case '{':
//      return (PERCENT_LEFT_CURLY);
//
//    case '<':
//      return (LEFT);
//
//    case '>':
//      return (RIGHT);
//
//    case '2':
//      return (NONASSOC);
//
//    case '0':
//      return (TOKEN);
//
//    case '=':
//      return (PREC);
//    }
//  if (!isalpha(c))
//    return (ILLEGAL);
//
//  while (isalpha(c) || c == '-')
//  {
//      if (p == token_buffer + maxtoken)
//        p = grow_token_buffer(p);
//
//      *p++ = c;
//      c = getc(finput);
//  }
//  ungetc(c, finput);
//  *p = 0;
//
//  Element *ep = find_if(defineSymbols, defineSymbols + nDefineSymbols, 
//                          Comparator(token_buffer));
//
//  return ep != defineSymbols + nDefineSymbols ? ep->second : ILLEGAL;
//}
//

//void init_lex()
//{
//  maxtoken = 100;
//  *(token_buffer = new char[maxtoken + 1]) = 0;
//  unlexed = -1;
//}
