// ---------------------------------------------------------------------------
// - Lexer.cpp                                                               -
// - aleph engine - lexical analyzer class implementation                    -
// ---------------------------------------------------------------------------
// - This program is free software;  you can redistribute it  and/or  modify -
// - it provided that this copyright notice is kept intact.                  -
// -                                                                         -
// - This program  is  distributed in  the hope  that it will be useful, but -
// - without  any  warranty;  without  even   the   implied    warranty   of -
// - merchantability or fitness for a particular purpose.  In no event shall -
// - the copyright holder be liable for any  direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software.     -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2001 amaury darsch                                   -
// ---------------------------------------------------------------------------

#include "Lexer.hpp"
#include "Lexical.hpp"

namespace aleph {

  // this procedure flush the input stream until an eol or eof is found
  // the eol is consumed, the eof is pushedback
  static inline void flusheol (Input* is) {
    while (1) {
      char c = is->read ();
      if (c == eolc) return;
      if (c == eofc) {
	is->pushback (c);
	return;
      }
    }
  }
  
  // this procedure return true is the character is alpha
  static inline bool isalpha (const char c) {
    if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))
      return true;
    return false;
  }

  // this procedure returns true if the character is a digit
  static inline bool isdigit (const char c) {
    if ((c >= '0') && (c <= '9')) return true;
    return false;
  }

  // this procedure returns true if the character is a hexa decimal
  static inline bool ishexa (const char c) {
    if ((c >= '0') && (c <= '9')) return true;
    if ((c >= 'a') && (c <= 'f')) return true;
    if ((c >= 'A') && (c <= 'F')) return true;
    return false;
  }

  // create a new lexer

  Lexer::Lexer (Input* is) {
    d_lnum = 0;
    p_is   = is; Object::iref (is);
  }

  // destroy this lexer

  Lexer::~Lexer (void) {
    Object::dref (p_is);
  }

  // return the next available token

  Token Lexer::get (void) {
    // reset controls
    bool eflag  = false;
    bool esign  = false;
    bool eonly  = false;
    char clast  = nilc;
    long rcount = 0;

    // check for nil stream and reset buffer
    if (p_is == nilp) return Token (Token::ERROR,d_lnum);
    d_buffer.reset ();

  s_begin:
    char c = p_is->read ();
    switch (c) {
    case ' ':
    case tabc:
      goto s_begin;
    case eofc: 
      return Token (Token::EOF,d_lnum);
    case eolc: 
      return Token (Token::EOL,++d_lnum);
    case '(':
      return Token (Token::RFB, d_lnum);
    case ')':
      return Token (Token::RFE, d_lnum);
    case '{':
      return Token (Token::BFB, d_lnum);
    case '}':
      return Token (Token::BFE, d_lnum);
    case '#':
      goto s_comment;
    case '\'':
      goto s_character;
    case '"':
      goto s_string;
    case '[':
      rcount++;
      d_buffer.add (c);
      goto s_regex;
    case '+':
    case '-':
      d_buffer.add (c);
      goto s_number;
    case '0':
      d_buffer.add (c);
      goto s_numfmt;
    default:
      d_buffer.add (c);
      if (isdigit   (c) == true) goto s_integer;
      if (Lexical::valid (c) == true) goto s_lexical;
      break;
    }
    goto s_error;

  s_comment:
    c = p_is->read ();
    if (c == eolc) return Token (Token::EOL,++d_lnum);
    if (c == eofc) return Token (Token::EOF,++d_lnum);
    goto s_comment;

  s_number:
    c = p_is->read ();
    if (c == '0') {
      d_buffer.add (c);
      goto s_numfmt;
    }
    if (isdigit (c) == true) {
      d_buffer.add (c);
      goto s_integer;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::LEXICAL, d_buffer.tostring (), d_lnum);

  s_lexical:
    c = p_is->read ();
    if (c == ':') {
      d_buffer.add (clast = c);
      goto s_qualified;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::LEXICAL, d_buffer.tostring (), d_lnum);

  s_qualified:
    c = p_is->read ();
    if ((Lexical::valid (c) == true) || (c == ':')) {
      if ((clast == ':') && (c == ':')) {
	d_buffer.add (c);
	goto s_error;
      }
      d_buffer.add (clast = c);
      goto s_qualified;
    }
    if (clast == ':') goto s_error;
    p_is->pushback (c);
    return Token (Token::QUALIFIED, d_buffer.tostring (), d_lnum);

  s_numfmt:
    c = p_is->read ();
    if (isdigit (c) == true) {
      d_buffer.add (c);
      goto s_integer;
    }
    if ((c == 'r') || (c == 'R')) {
      goto s_relatif;
    }
    if (c == '.') {
      d_buffer.add (c);
      goto s_real;
    }
    if ((c == 'x') || (c == 'X')) {
      d_buffer.add (c);
      goto s_hexa;
    }
    if ((c == 'b') || (c == 'B')) {
      d_buffer.add (c);
      goto s_binary;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::INTEGER, d_buffer.tostring (), d_lnum);

  s_integer:
    c = p_is->read ();
    if (isdigit (c) == true) {
      d_buffer.add (c);
      goto s_integer;
    }
    if ((c == 'r') || (c == 'R')) {
      goto s_relatif;
    }
    if (c == '.') {
      d_buffer.add (c);
      goto s_real;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::INTEGER, d_buffer.tostring (), d_lnum);

  s_hexa:
    c = p_is->read ();
    if (c == '_') {
      goto s_hexa;
    }
    if ((c == 'r') || (c == 'R')) {
      goto s_relatif;
    }
    if (ishexa (c) == true) {
      d_buffer.add (c);
      goto s_hexa;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::INTEGER, d_buffer.tostring (), d_lnum);

  s_binary:
    c = p_is->read ();
    if (c == '_') {
      goto s_binary;
    }
    if ((c == 'r') || (c == 'R')) {
      goto s_relatif;
    }
    if ((c == '0') || (c == '1')) {
      d_buffer.add (c);
      goto s_binary;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::INTEGER, d_buffer.tostring (), d_lnum);

  s_relatif:
    c = p_is->read ();
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::RELATIF, d_buffer.tostring (), d_lnum);

  s_real:
    c = p_is->read ();
    if (isdigit (c) == true) {
      if ((eflag == true) && (eonly == true)) eonly = false;
      d_buffer.add (c);
      goto s_real;
    }
    if (((c == 'e') || (c == 'E')) && (eflag = false)) {
      d_buffer.add ('e');
      eflag = true;
      eonly = true;
      goto s_real;
    }
    if (((c == '+') || (c == '-')) && (eflag == true) && (esign == false) &&
	(eonly == true)) {
      d_buffer.add (c);
      esign = true;
      eonly = false;
      goto s_real;
    }
    if (Lexical::valid (c) == true) {
      d_buffer.add (c);
      goto s_lexical;
    }
    p_is->pushback (c);
    return Token (Token::REAL, d_buffer.tostring (), d_lnum);

  s_character:
    c = p_is->read ();
    if (c == '\'') 
      return Token (Token::CHARACTER, d_buffer.tostring (), d_lnum);
    if (c == '\\') goto s_charesc;
    d_buffer.add (c);
    goto s_character;
    
  s_charesc:
    c = p_is->read ();
    switch (c) {
    case 'n':
      d_buffer.add ('\n');
      break;
    case 't':
      d_buffer.add ('\t');
      break;
    case '\\':
      d_buffer.add ('\\');
      break;
    case '\'':
      d_buffer.add ('\'');
      break;
    default:
      d_buffer.add ('\\');
      p_is->pushback (c);
      break;
    }
    goto s_character;
    
  s_string:
    c = p_is->read ();
    if (c == '"') return Token (Token::STRING,d_buffer.tostring (), d_lnum);
    if (c == '\\') goto s_stresc;
    d_buffer.add (c);
    goto s_string;
    
  s_stresc:
    c = p_is->read ();
    switch (c) {
    case 'n':
      d_buffer.add ('\n');
      break;
    case 't':
      d_buffer.add ('\t');
      break;
    case '\\':
      d_buffer.add ('\\');
      break;
    case '"':
      d_buffer.add ('"');
      break;
    default:
      d_buffer.add ('\\');
      p_is->pushback (c);
      break;
    }
    goto s_string;

  s_regex:
    c = p_is->read ();
    switch (c) {
    case eofc:
      goto s_error;
    case '[':
      d_buffer.add (c);
      rcount++;
      goto s_regex;
    case ']':
      d_buffer.add (c);
      if (--rcount == 0) 
	return Token (Token::REGEX, d_buffer.tostring (), d_lnum);
      if (rcount < 0) goto s_error;
      goto s_regex;
    default:
      d_buffer.add (c);
      goto s_regex;
    }

  s_error:
    flusheol (p_is);
    return Token (Token::ERROR, d_buffer.tostring (), ++d_lnum);
  }
}
