//
// NAME:
//   flex.h
// TITLE:
//   Lexical Parser.
// FUNCTION:
//   Flex is a lexical parser than breaks an input stream, either a
//   null-terminated string or a file, into a sequence of tokens.
//   These tokens may be used to drive an LL(1) parser.
//
//   Flex is derived from the April 1992 Lex parser.  It's enhancements
//   are documented with the Flex object declaration.
//
// OBJECTS:
//   Flex.
//   flex_token_class.
// METHODS:
//   Flex::open_file()    Open a file for parsing.
//   Flex::open_string()  Open a null-termindated string for parsing.
//   Flex::close          Close the parser.
//   Flex::peek*()        Peek at the next token in the input.
//   Flex::get*()         Get the next token from the input.
// PREREQUISITES:
//   Common.
//
// AUTHOR:
//   Brendan Jones. (Contact through www.kdef.com/geek/vic)
// RIGHTS:
//   (c) Copyright Brendan Jones, 1995-2006.  All Rights Reserved.
// LEGAL NOTICE:
//   See legal.txt before viewing, modifying or using this software.
// CONTACT:
//   Web:	http://www.kdef.com/geek/vic
//   Email:	See www.kdef.com/geek/vic
// DATE:
//   April 9, 1992.
// RIGHTS:
//  This file is part of The Quetzalcoatl Compiler.
//  
//  The Quetzalcoatl Compiler is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//   the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//  
//  The Quetzalcoatl Compiler is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//  
//  You should have received a copy of the GNU General Public License
//  along with The Quetzalcoatl Compiler; if not, write to the Free Software
//  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
//
// MODIFICATIONS:
//   NAME  MODNO  DATE       DESCRIPTION
//   bj        1   7/Sep/92  C++ conversion.
//   bj        2  29/Dec/95  Adapted from Lex.
//   bj        3  18/Mar/96  Added Lex compatibility.
//                           Integrated with Scilex.
//   bj        4    10apr96  Added hex.
//   bj        5    22may97  peek_numeric() was looking for type id.
//			     It should have been looking for int!
//   bj        6    20jul97  Added literal char (and prior to this
//                           assign op and no real).
//   bj        7     6aug97  Added parse_warning() and string_convert().
//			     check() now rejects strings.
//   bj        8     4jan98  Added string_enclose(), whitespace_before(),
//			     and HTML escape token.
//   bj        9     7jan98  Add support for no_comment and linestrings.
//   bj       10    15apr99  Added set_comment_char().
//
// TODO:
//   Unicode support (do this using a template).
//
//
#ifndef FLEX_H
#define FLEX_H
#include "common.h"




// CONSTANT Flex Source Size.
//
// Largest source description allowed.
//
//
#define flex_source_size 128




// TYPE Flex Syntax.
//
// Flex supports the following syntaxes by looking for tokenising
// symbols unique to them.  A combination can be supported by
// bitoring the different flex_syntax_* types.
//
// Equal:       <= >= == !=
// Goal:        :-
// Assign:      :=
// Assign Op:   += -= *= /= %=
// No real:     Doesn't return real numbers as a single token.
//              eg. "2.05" is returned "2", ".", "05".
// Lit Char:    Literal character. eg. 'a'
// Amp Escape:  Accepts `&'symbolname[`;'] as a HTML escape token.
// No Comment:  Don't support C/C++ style comments.
// Linestrings: Allow multiline strings.
//
//
typedef ushort flex_syntax;
#define flex_syntax_default     0x0000
#define flex_syntax_equal       0x0001
#define flex_syntax_goal        0x0002
#define flex_syntax_assign      0x0004
#define flex_syntax_assign_op   0x0008
#define	flex_syntax_no_real	0x0010
#define	flex_syntax_lit_char	0x0020
#define flex_syntax_amp_escape  0x0040
#define	flex_syntax_nocomment	0x0080
#define	flex_syntax_linestrings	0x0100


// Some predefined syntaxes.
//
#define flex_syntax_c           0x0009
#define	flex_syntax_red		0x003d
#define	flex_syntax_html	0x01c0




// TYPE Flex Token Class.
//
// Different types of tokens.
//
typedef enum
	{
	flex_int,               // A positive integer.
	flex_float,             // A positive real number.
	flex_id,                // An Identifier.
	flex_hex,               // A hexidecimal number.
	flex_punc,              // A punctuation character.
	flex_string,            // A string.
	flex_char,		// A literal character
	flex_escape,		// A HTML escape token.
	flex_null,              // A null token (eg. EOF).

	// Compatibility enumerations for Lex.
	//
	int_lex    = flex_int,
	float_lex  = flex_float,
	id_lex     = flex_id,
	punc_lex   = flex_punc,
	string_lex = flex_string,
	null_lex   = flex_null,
	}
	flex_token_class;




// OBJECT Flex.
//
// Flex is a lexical parsing object.  It can take an input stream from
// either a file or a null-terminated string in memory and break it into
// a sequence of tokens.  Flex automatically skips white space and C++
// style comments.  Flex may be used to drive an LL(1) parser.
//
// Flex is based on Lex, written April 1992.  Flex has the following
// improvements:
//
// + Code Cleaned up.
// + Doesn't use scanf libraries.
// + Can read either from a file or an in memory null-terminated string.
// + Choose either to abort or merely flag errors.
// + Keeps track of source line and column number for easier debugging.
// + Recognises symbols unique to some syntaxes; eg. ":=", "<=".
//
//
class Flex
{
  protected:
    // The lookahead buffer.
    //

    // The type of the token in the lookahead buffer.  eg. <flex_int>
    //
    flex_token_class lookahead_type;

    // This contains the next token in the input stream.  The very
    // last token is the EOF token, signified by the first byte in
    // <lookahead> being set to zero.
    //
    char     *lookahead;

    // Temporary buffer sometimes used by get() to return a token.
    //
    char    *lookahead_copy;


    // The maximum size of the <lookahead> and <lookahead_copy> buffers;
    //
    long     flex_token_size;


    // Data Source.
    //
    char    *buffer;            // If non-NULL the string we are parsing.
    long     index;             // Our current position within that string.

    // File Source.
    //
    FILE    *file;              // If non-NULL the file we are parsing.

    char     source[flex_source_size];  // Description of our parsing source.
					// eg. Filename if a file source.

    char     ch;                // The next character in the input stream.
    boolean  ch_available;      // True iff <ch> is current loaded.  If
				// not we must fetch it from the input stream.

    boolean  exhausted;         // True iff we have hit the end of the input
				// stream.  Because we always look one token
				// this condition occurs one token before we
				// start returning true to <eof()>.

    flex_syntax syntax;         // What syntax are we parsing?  This tells
				// us what special character sequences
				// should be returned as a single token.

    boolean  string_conversion;	// If true, convert any slash codes (eg. "\n")
				// in strings into the corresponding
				// character.  Otherwise treat slashes as
				// any other character.

    boolean  string_enclosure;	// Return strings as a single token.

    boolean  space_before;	// Whitespace appears before token.
    boolean  space_after;	// Whitespace appears after  token.

    char     appl_comment_char;	// Application defined comment character (or 0).


  public:
    long     lines;             // Number of lines read (starting at 0)
    long     columns;           // Position in current line (starting at 0)
    long     token_lines,	// Position of current token (starting at 0)
	     token_columns;  	// Position of current token (starting at 0)

    boolean  abort;             // If true a parsing error will cause the
				// entire program to abend.  If false we
				// merely set the <error> flag.
    long     error;             // How many errors have occured?
    long     warning;		// How many warnings have occured?


  public:
    Flex(long Max_token_size = 1024);	// Constructor
   ~Flex(void);                 	// Destructor


    // GROUP Source Control.
    //

    // Parse the file <Filename>.
    //
    void open_file(
	const char  *Filename,
	flex_syntax  Syntax = flex_syntax_default);


    // Parse the null-terminated string <String>.
    //
    void open_string(
	char	    *Data,
	char        *Source = NULL,
	flex_syntax  Syntax = flex_syntax_default);


    // Finish parsing.
    //
    void close(void);




    // GROUP Options.
    //

    // Convert slash sequences in strings (eg. '\n') into their control
    // character equivalents if true.  Treat slashes like any other
    // character if false.
    //
    // CAUTION:
    //   While this may be changed at anytime, due to lookahead the
    //   next token is unaffected by any change.
    //
    void string_convert(boolean Convert)
      {
      string_conversion = Convert;
      }

    // When <Enclose> is true, string delimters (' and ") have special
    // meaning.  All characters between two identical string delimiters
    // are returned as a single token.  The delimters do not appear
    // within the string.  When false, the string delimters have no
    // special meaning.
    //
    // CAUTION:
    //   While this may be changed at anytime, due to lookahead the
    //   next token is unaffected by any change.
    //
    void string_enclose(boolean Enclose)
      {
      string_enclosure	= Enclose;
      }

    // Treat character <Comment> as single-line comment.
    // The C/C++ comment styles are not affected by this option.
    //
    void set_comment_char(char Comment)
      {
      appl_comment_char = Comment;
      }




    // GROUP Source Parsing.
    //

    // Peek at the next token without accepting it.
    //
    char   *peek(void)
      {return lookahead;}


    // Peek at the next token and return true if it matches <S>.
    //
    boolean peek_matches(const char *S)
      {return strcmp(lookahead, S) == 0;}


    // Peek at the token in the <lookahead_copy> buffer.
    //
    char   *peek_last(void)
      {return lookahead_copy;}


    // Peek at the type of the next token.
    //
    flex_token_class peek_type(void)
      {return lookahead_type;}


    // Is this a number?
    //
    boolean     peek_numeric(void)
      {return lookahead_type == flex_int   or	// MOD005
	      lookahead_type == flex_float or
	      lookahead_type == flex_hex;}

    // Is this an integer?
    //
    boolean     peek_int(void)
      {return lookahead_type == flex_int   or   // MOD005
	      lookahead_type == flex_hex;}


    // Have we reached the end of the input stream?
    //
    boolean eof(void)
      {return lookahead_type == flex_null;}
//    {return lookahead[0] == 0;}


    // Does whitespace appear before the next token?
    //
    boolean whitespace_before(void)
      {return space_before;}

    // Does whitespace appear after the next token?
    //
    boolean whitespace_after(void)
      {return space_after;}


    // Get the next token.  If a buffer <S> is provided it will be
    // copied into it.
    //
    char   *get(char *S = NULL,	int Size = -1);


    // Get an integer.
    //
    long    get_int(void);


    // Get a double-precision real number.
    //
    double  get_double(void);


    // Get a single-precision real number.
    //
    float   get_float(void)
      {return (float)get_double();}


    // Get an identifier.
    //
    char   *get_id(char *S = NULL,	int Size = -1);


    // Get a string.
    //
    char   *get_string(char *S = NULL,	int Size = -1);


    // Get a char.
    //
    char   get_char(void);


    // Check that the next token matches the string <S> to at least
    // the first <Minimum> characters.
    //
    void    check(char *S, byte Minimum);


    // Same but checks for a complete match.
    //
    void    check(char *S);


    // Return true and accept the next token iff it matches the
    // string <S> to the first <Minimum> characters.
    //
    boolean matches(char *S, byte Minimum);


    // Same but tests for a complete match.
    //
    boolean matches(char *S);


    // Skip to the end of the current line.
    //
    void skip_eol(void);




    // GROUP Error Handling.
    //

    // Register a parsing error.  If <abort> is true then the error
    // message is printed and the application abends.  If <abort>
    // is false then the <error> flag is set true for processing
    // by the application.
    //
    void    parse_error(const char *Message, char *Token = NULL);


    // Register a parsing warning.  Same as parse_error(),
    // but doesn't abort.
    //
    void    parse_warning(const char *Message, char *Token = NULL);


    // GROUP Compatibility.
    //

    // Get a single-precision real number.
    //
    float   get_real(void)
      {return (float)get_double();}


    // Get an integer.
    //
    long    get_int(long Value)
      {return get_int();}



  protected:
    // Initialise variables common to <open_file> and <open_string>
    //
    void    init(void);


    // Read the next token into <lookahead>, classifying it in
    // <lookahead_type>.
    //
    void    read_next(void);


    // Return the next character in the input stream.
    //
    char    next(void);


    // Reject the character just returned by <next>.  <Read_next> calls
    // <reject_next> when it realises the next character isn't part of
    // the current token.  The rejected character is thus saved up until
    // the next call to <next>. A character may be rejected any number
    // of times.
    //
    void    reject_next(void)
      {ch_available = true;}
};
#endif
