//
// NAME:
//   upl.h
// TITLE:
//   UPL/Quetzalcoatl.
// FUNCTION:
//   UPL/Quetzalcoatl is a tiny C compiler and 6502 assembler and linker.
//   I originally wrote UPL/Quetzalcoatl Mid-1998 to gain experience in
//   LL(N) compiler development, and as a test-bed for optimisation.
//   Unfortunately real-work intervened, and I never had an opportunity
//   to finish it.  As of September 1999 a year has passed, and I still
//   haven't had time to return to Quetzalocatl.
//
//   Harry Dodgson improved Quetzalcoatl in 2005, chasing down many bugs
//   and improving portability.
//
//   I had wanted to comment Quetzalcoatl to a text-book standard before
//   releasing the code under GNU, but unfortunately the time just isn't there.
//   Accordingly I've decided to proceed with the release as-is.
//   The source headers are mostly commented, but the body is largely uncommented. 
//   To find your way around, see the "Quetzalcoatl Compiler Hacker's Guide:"
//
//	http://www.kdef.com/geek/vic/quetz/develop/chg.html
//
//   There's a HTML version of the language guide here. 
//   Queztalcotal is best described as Tiny-C:
// 
// 	http://www.kdef.com/geek/vic/quetz/upl_pg.html
// 
//   For general information on QUetzalcoatl see it's home page:
// 
// 	http://www.kdef.com/geek/vic/quetzal.html
//
//   See the file readme.txt contained in this distribution.
//
// OBJECTS:
//   upl_Compiler       	UPL/Quetzalcoatl Compiler.
//   upl_Assembler		UPL/Quetzalcoatl 6502 Assembler.
// METHODS:
//   upl_Compiler::program()	Compile a UPL/Quetzalcoatl/C/C++ Program.
//   upl_Assembler::program()   Assemble a Quetzalcoatl 6502 Assembly Program.
// PREREQUISITES:
//   Common.
//
// AUTHOR:
//   Brendan Jones. (Contact through www.kdef.com/geek/vic)
// RIGHTS:
//   (c) Copyright Brendan Jones, 1998.  All Rights Reserved.
// SECURITY:
//   Unclassified.  
// LEGAL NOTICE:
//   See legal.txt before viewing, modifying or using this software.
// CONTACT:
//   Web:	http://www.kdef.com/geek/vic
//   Email:	See www.kdef.com/geek/vic
// DATE:
//   July 6, 1998.
// RIGHTS:
//  This file is part of The Quetzalcoatl Compiler.
//  
//  The Quetzalcoatl Compiler is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//   the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//  
//  The Quetzalcoatl Compiler is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//  
//  You should have received a copy of the GNU General Public License
//  along with The Quetzalcoatl Compiler; if not, write to the Free Software
//  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
//
// TODO:
//   Here are some suggested improvements in order of usefulness.
//   See the "Compiler Hacker's Guide" (above) for more detail.
//
//   0. Quetzalcoatl needs to support data structures that'll so it
//	can compile more than just Tiny-C programs;
//      That means support for arrays/pointers/structures/classes.
//   1. Rewrite arrays/pointers/indirection. Impelementation began in 1998,
//	but wasn never finished. Existing code is inelegant and suspect;
//	suggest a complete rewrite. Begin at the symbol table; with a 
//	robust symbol table definition, implementation will be much easier.
//      *PRIORITY*
//   2. Add structures. Perhaps classes. These were partially implemented,
//      but would probbaly be abest with a rewrite. *PRIORITY*
//   3. Test. *PRIORITY*
//   4. More Peephole optimisations in UPL_COPT.CPP
//   5. A Flow Optimiser during UPL_LINK.CPP.
//   6. Tree Optimisations (an on the fly-rollbacl optimiser).
//   7. Relocatable assembler. Currently does absolute code, 
//	but not hard to change.
//   8. Support C-isms like &&, || c.f. only and, or, etc.
//   9. Granular Runtime linking. (Flow optimiser may make this a non-issue)
//  10. Integrated debugger (would need a DLL hookup to VICE, etc.)
//  [bj 09oct2006]
//
//
// MODIFICATIONS:
//   NAME  MOD  DATE     DESCRIPTION
//
//
#ifndef UPL_H
#define UPL_H
#include <ctype.h>
#include "common.h"
#include "implemen.h"
#ifndef	COMMON_POSIX
#include <io.h>
#endif
#include "rays.h"
#include "btree.h"
#include "flex.h"
#include "upl_asm.h"
#include "upl_runt.h"
#include "upl_util.h"
#include "co_keys.h"
#ifndef COMMON_MSVC
#include "fstream.h"
#endif



// Constants.
//


// CONSTANT UPL_OBJ_MAGIC.
//
// The <UPL_OBJ_MAGIC> magic number is stored in the header of object files.
// We can use this to distinguish UPL object files from other files.
//

#ifndef FIXEND
#define	UPL_OBJ_MAGIC		0x9e45ac01u
#else
#define	UPL_OBJ_MAGIC		0x01ac459eu
#endif

// CONSTANT UPL_OBJ_VERSION.
//
// The <UPL_OBJ_VERSION> version number is stored in the header of
// object files.  We use this to distinguish between different
// versions of UPL object files, so we can tell what version of
// the compiler produced them and thus if they are compatible.
//

#ifndef FIXEND
#define	UPL_OBJ_VERSION		0x02000003
#else
#define	UPL_OBJ_VERSION		0x03000002
#endif

// CONSTANT UPL_OBJ_COMPAT_VERSION.
//
// This is the oldest object file version number that this release
// of the compiler/linker can read.  If this release isn't backwards
// compatible, then it'll be <UPL_OBJ_VERSION>.
//
#define	UPL_OBJ_COMPAT_VERSION	(UPL_OBJ_VERSION)




// CONSTANT UPL_TOKEN_LENGTH.
//
// The largest token we can read from a source file.
//
#define	UPL_TOKEN_LENGTH	(64)






// Enumerations.
//


// ENUMERATION upl_value_type.
//
// The value types of expressions.
//
// DEVELOPER NOTE:
//   Defining pointers in this manner is somewhat flawed, since it
//   doesn't allow complicatd constructs such as eg. a pointer to
//   an array of pointers.  [bj 15sep1999]
//
typedef	enum
	{
	upl_value_none,		// ie. Invalid.

	upl_void,
	upl_byte,
	upl_char,
	upl_boolean,
	upl_ushort,
	upl_short,

	upl_pointer_byte,
	upl_pointer_char,
	upl_pointer_boolean,
	upl_pointer_ushort,
	upl_pointer_short,

	upl_value_types		// Cast to an int, the number of such types.
	}
	upl_value_type;




// ENUMERATION upl_symbol_types.
//
// The symbol types.
//
//
typedef	enum
	{
	upl_symbol_none,	// ie. Invalid

	upl_constant,
	upl_variable,
	upl_function,		// A subroutine returning a result.
	upl_procedure,		// A subroutine returning no result.
	upl_label,		// Label: Assembler only.
	upl_type,		// A type declaration.

	upl_symbol_types	// Cast to an int, the number of such types.
	}
	upl_symbol_type;


// ENUMERATION upl_op.
//
// The operators (for arithmetic and logical calculations).
//
//
typedef	enum
	{
	upl_op_none,		// ie. Invalid

	upl_xor,
	upl_and,
	upl_or,
	upl_eq,
	upl_ne,
	upl_gt,
	upl_ge,
	upl_lt,
	upl_le,
	upl_mul,
	upl_div,
	upl_mod,
	upl_add,
	upl_sub,
	upl_not,
	upl_neg,

	upl_ops			// Cast to an int, the number of such types.
	}
	upl_op;





// TYPE upl_addr.
//
// An addresss in target memory.
//
// DEVELOPER NOTE:
//   Even though the 6502 is resticted to 64Kb, we use a 32-bit long
//   since it lets us test for overflow of the 64Kb database.
//   If we were using a 16-bit integer, this would wrap around and
//   thus we could never detect overflow.  [bj 15sep1999]
//
typedef	long		upl_addr;






// SECTION Relocation.
//

// TYPE upl_reloc_type.
//
// Relocation Type.
//
// Most object modules have relative addressing.  These are converted
// into absolute addressing when all the object modules are finally
// linked together.
//
// Each byte in an object module (data or code) has a relocation type.
// This describes how that byte needs to be transformed to convert
// to relative addressing.
//
typedef	byte		upl_reloc_type;
#define	upl_reloc_none		(0x00)

// Is this against the data or code segment?
//
#define upl_reloc_segment	(0x30) /* mask */
#define	upl_reloc_data		(0x10)
#define	upl_reloc_code		(0x20)

// Is this a word, a high byte or a low byte?
//
#define upl_reloc_datatype	(0x07) /* mask */
#define	upl_reloc_word		(0x01)
#define	upl_reloc_byte_lo	(0x02)
#define	upl_reloc_byte_hi	(0x04)

// Predefined relocation types.
//
#define	upl_data_word		(0x11)
#define	upl_data_byte_lo	(0x12)
#define	upl_data_byte_hi	(0x14)
#define	upl_code_word		(0x21)
#define	upl_code_byte_lo	(0x22)
#define	upl_code_byte_hi	(0x24)




// SECTION Patching.
//
// An object module will almost always contain external references,
// accessing memory locations (eg. variables) or calling subroutines
// in other modules or the runtime library.
//
// Each object module contains a list of patches.  Each describes where
// an external reference is made, what external symbol is accessed,
// and how the object code should be modified to reference this.
//
// Patches are external symbols that shall be imported from other
// object module.
//
//

// TYPE upl_patch_method.
//
typedef	byte	upl_patch_method;
#define	upl_patch_method_none		0

// The external symbol's value should be added to the locations current value.
//
#define	upl_patch_method_add		1

// The location should be set to the high byte of the external symbol's value.
//
#define	upl_patch_method_set_high	2

// The location should be set to the low byte of the external symbol's value.
//
#define	upl_patch_method_set_low 	3




// TYPE upl_external_id.
//
// This identifies an external symbol used in an object module.
//
typedef	ushort		upl_external_id;



// OBJECT upl_Patch.
//
// This describes a patch that is required to the object module.
//
class upl_Patch
{
  public:
    // External symbol to be applied.
    //
    // This is actually an index into <upl_Object::external_names>.
    //
    upl_external_id	external_id;

    // The address within the object module to apply it.
    //
    upl_addr		addr;

    // How the external symbol should be aplied.
    //
    boolean             patch_method;

  public:
    friend ostream& operator << (ostream& Str, const upl_Patch& P);
};



// OBJECT upl_Patches.
//
// A list of all patches required by an object module.
//
class 	upl_Patches
	: public Mantaray_long<upl_Patch>
{
  public:
    //
    // Append this patch list to in an object file <f>.
    //
    void write(FILE *f, long Patches) const;

    // Read this patch list from an object file <f>.
    //
    long read(FILE *f);

    // Sort the patch list.
    //
    // Major key is the <external_id>.  Minor key is <addr>.
    //
    void sort(long Patches);
};






// OBJECT upl_Object_state.
//
// This describes the number of patches and objects (bytes)
// in an object module.
//
class upl_Object_state
{
  public:
    long	patches,
		objects;
};




// TYPE upl_symbol_token.
//
// A symbol token.  (variable name, function name, etc)
//
//
typedef	char	upl_symbol_token[UPL_TOKEN_LENGTH];






// SECTION Names.
//
// Names are the names of symbols being exported/imported by an object module.
//
// The linker takes the names being exported by each object module,
// and offers them to every other object module for patching.
// This resolves all external symbol references in the linked executable.
//


// OBJECT upl_Name.
//
// The name of a symbol.
//
//
class 	upl_Name
{
  public:
    //
    // The name of the symbol.
    //
    // DEVELOPER NOTE:
    //   Win32 and Linux don't have the memory limitations that DOS does,
    //   thus we can use longer symbol names.  Note the object files
    //   remain compatible, but obviously if you generate a long symbol
    //   name with the Win32/Linux compiler, you may not be able to
    //   link to it with the DOS linker.  So why bother?  Function names
    //   are "mangled"; that is, an encoding of the parameters is added.
    //   For example, a 16 character function name with 4 parameters
    //   could be 24 characters long.  By allowing upto 128 characters
    //   we provide for a margin of error.  Note in extreme cases
    //   you might want this even longer, such as when you're using
    //   subroutines with a lot of complex parameters.  If you don't
    //   mind the consequent memory fragmentation, you could change
    //   this to use dynamically allocated names.  For now though,
    //   this current implementation has worked fine.  [bj 15sep1999]
    //
    #ifdef COMMON_DOS
    upl_symbol_token	name;
    #else
    char		name[128];
    #endif


    // Address or value of the symbol.
    //
    upl_addr	 	addr;

  public:
    void write(FILE *f) const;
    void read(FILE *f);
};




// OBJECT upl_Names.
//
// A list of all names in an object module.
//
// DEVELOPER NOTE:
//   Two <upl_Names> lists are used in each object module.
//   One is the name of exported symbols.  The other is the
//   name of imported symbols, indexed by <upl_Patches>.
//   [bj 15sep1999]
//
class 	upl_Names
	: public Mantaray_long<upl_Name>
{
  public:
    //
    // Append this name list to in an object file <f>.
    //
    void write(FILE *f, long Names) const;

    // Read this name list from an object file <f>.
    //
    long read(FILE *f);
};






// FORWARD upl_Context.
//
class upl_Context;






// SECTION Object Module.
//
// Each separately compiled part of a UPL program (each C file,
// Assembler file and the runtime library) are compiled into
// their own object module.  These are relocatable.  Ultimately
// they're combined by the linker to produce the executable.
//


// OBJECT upl_Object.
//
// An Object Module.
//
class upl_Object
{
  public:
    //
    // Object code/data.
    //
    // Number of objects used.
    long			    objects;

    // Array of object code/data.
    //
    Mantaray_long<byte>		    object;

    // Relocation type of each byte of object code/data.
    //
    Mantaray_long<upl_reloc_type>   reloc;

    // Relocation Partner.
    //
    // If we're relocating a high or a low byte, we need the full 16-bit
    // value.  But within the object code/data we only have room to store
    // one byte.  We thus store the other low/high byte in <reloc_partner>.
    //
    Mantaray_long<byte>		    reloc_partner;
    long			    reloc_partners;


    // Patches: A list of symbols we want to import.
    //
    upl_Patches			    patch;
    long			    patches;


    // Names.
    //

    // Internal Names: A list of internal symbols we are exporting.
    //
    upl_Names			    names;

    // External Names: A list of external symbols we are importing.
    //
    // List of external application names we want to import.
    // First entry correspons to external_id RUNTIME_END.
    //
    upl_Names			    external_names;


    // Alignment.
    //

    // Page Aligned?
    //
    // When linked, must this module be aligned with a page boundary.
    // On a 6502, Each page is 256 bytes long.  The first page is at 0.
    //
    boolean			    page_aligned;


    // Absolute Alignment?
    //
    // Must this module be aligned at a specific address?
    //
    // DEVELOPER NOTE:
    //   Currently the linker only allows one absolutely aligned
    //   object module, and appends all relocatable object modules
    //   following it.  Since the assembler currently produces only
    //   an absolute modules, this is always it.  A useful change
    //   would be to modify the assembler to produce relocatable
    //   modules, thus allowing multiple assembler modules
    //   in an executable.  [bj 15sep1999]
    //
    boolean			    absolute_aligned;

    // If <absolute_aligned>, the address at which it must be aligned.
    //
    long			    alignment_addr;


    // Miscellaneous Attributes.
    //

    // Runtime Target.
    //
    // This describes the target machine/configuration on which the
    // object module must run.  At the moment this is for information
    // only, and should only be necessary with the runtime library
    // (the only platform dependent part of the program).  For a
    // list of enumerations of <runtime_target> see <uplrtime.asm>.
    // The generic value for platform-independent modules is
    // <target_generic> = 0.
    //
    short			    runtime_target;

    // Flags.
    //
    // DEVELOPER NOTE:
    //   Flags is not currently used. [bj 15sep1999]
    //
    ulong			    flags;


  public:
    upl_Object(void);


    // Return the current (next address to be used) in this module.
    //
    upl_addr current_addr(void) {return objects;}


    // SECTION: Out. (Append code/data to the end of the object module)
    //

    // Write the byte <Value> of relocation type <Reloc>.
    // <upl_reloc_none> means the value doesn't need to be relocated.
    //
    upl_addr out(byte Value, upl_reloc_type Reloc = upl_reloc_none);

    // Write a word <Value> of relocation type <Reloc>.
    // <upl_reloc_none> means the value doesn't need to be relocated.
    //
    upl_addr out_word(word Value, upl_reloc_type Reloc = upl_reloc_none);

    // Write a string <S>.
    //
    // The context <C> is used solely to let us know if the characters
    // in the string need to be converted into another character set.
    // Note the context <C> is not changed by this operation.
    //
    upl_addr out_string(const char *String, const upl_Context& C);

    // Write a code segment address <Addr>.
    //
    upl_addr out_addr_code(ushort Addr);

    // Write a data segment address <Data>.
    //
    upl_addr out_addr_data(ushort Addr);

    // Shorten: Drop <Bytes> off the end of the object module.
    //
    // DEVELOPER NOTE:
    //   Be very careful when calling this;  If there might be any
    //   patches or names in the code being dropped then you should
    //   use context state mark/rollback instead. [bj 15sep1999]
    //
    void shorten(int Bytes)
      {objects -= Bytes;  assert(objects >= 0);}


    // SECTION: Set. (Change code/data within the object module)
    //

    // Change the byte at address <Object_i> to <Value/Reloc>.
    //
    void set_byte(
	long 		Object_i,
	byte 		Value,
	upl_reloc_type 	Reloc = upl_reloc_none);

    // Change the word at address <Object_i> to <Value/Reloc>.
    //
    void set_word(
	long 		Object_i,
	word 		Value,
	upl_reloc_type 	Reloc = upl_reloc_none);


    // SECTION: Patch.
    //

    // Add a request that the current address be patched
    // with <External_id> using <Patch_method>>
    //
    void add_patch(
	upl_external_id	 External_id,
	upl_patch_method Patch_method = upl_patch_method_none);


    // Write a word that shall be patched with <External_id>
    // using <Patch_method>.  If the <Patch_method> is <*add*>,
    // then <Content> is the value to which the external value
    // shall be added.
    //
    void out_word_patch(
	upl_external_id	 External_id,
	upl_patch_method Patch_method	= upl_patch_method_none,
	ushort		 Content	= 0);


    // SECTION Name.
    //

    // Add an internal, exportable <Name> with the value/address <Addr>.
    // <L> is only used if we need to abend and report an error.
    //
    void add_name(Flex& L, const char *Name, upl_addr Addr);

    // Add an external <Name> with <External_id> that shall be imported.
    // <L> is only used if we need to abend and report an error.
    //
    // CHECK.
    //
    void add_external_name(
	Flex& 		L,
	const char     *Name,
	upl_external_id External_id);


    // Generates code to push <Value> on the stack.
    //
    // If <Force_word> is true and <Value> is only one byte wide,
    // then we generate the code to push a whole word (not just
    // the low byte).
    //
    void push_value(long Value, boolean Force_word = false);




    // SECTION Rollbacks.
    //
    // The UPL compiler uses this technique for rollback optimisation:
    // It remembers its state before generating a new section of code,
    // using the mark() method.  If it later realises that this code
    // is redundant, or can be replaced by shorter, more efficient
    // code then it calls rollback().  This throws away those changes
    // that were made since mark().
    //
    // More than one mark() may be active at once.  Just use a different
    // <State> for each.
    //
    // DEVELOPER NOTE:
    //   This works pretty well, but some situations are difficult
    //   to rollback.  Suppose we have the code A; B; C where we
    //   find that B is redundant and can be deleted.  Unfortunately
    //   calling a rollback after C has been generated would throw
    //   away C as well as B.  Thus in this case we can't rollback,
    //   and the redundant code remains in place.  Aside from these
    //   situations, the rollback mechanism actually works quite well.
    //   Point is it's just one optimisation technique, which should
    //   be used alongside other optimisation techniques too.
    //   [bj 15sep1999]
    //

    // Mark our current state, saving it in <State>.
    //
    void mark(upl_Object_state& State) const;

    // Rollback to a previous state, that was saved in <State>.
    //
    void rollback(const upl_Object_state& State);




    // SECTION I/O.
    //

    // Save to an object file <f>.
    //
    void write(FILE *f) const;

    // Read from an object file <f>.
    //
    void read(FILE *f);

    friend
    ostream& operator << (ostream& Str, const upl_Object& U);
};







// SECTION Symbol Table.
//


// OBJECT upl_Symbol.
//
// This holds a single symbol in the symbol table.
//
//
class 	upl_Symbol :
	public Btree_node<upl_Symbol>
{
  public:
    //
    // The name of this symbol.
    //
    upl_symbol_token			token;

    // The value type.  eg. an integer.
    //
    upl_value_type			value_type;

    // The symbol type.  eg. a constant, a variable, a function.
    //
    upl_symbol_type			symbol_type;

    // Value.
    //
    // If a variable or subroutine, its address.
    // If a constant, its value.
    //
    long	        		value;

    // If an array, <count> is the number of items with it.
    // For non-arrays, <count> is set to zero.
    //
    ushort				count;

    // The size of a single item of <value_type>.
    //
    ushort				value_bytes;

    // The total size: max(count, 1) * size_bytes.
    //
    ushort				size_bytes;

    // If a subroutine, a list of the subroutine parameters.
    //
    Mantaray_short<upl_value_type>	parms;

    // If an external reference, the external id.
    //
    ushort				external_id;

    // INCOMPLETE: DEVELOPER NOTE:
    //
    // I started adding indirection and structs when I ran out of
    // development time.  I've since realised the symbols need a
    // far more flexible approach anyway.  This code remains
    // incomplete.  [bj 15sep1999]
    //
    ushort				indirection;
    boolean                             within_page;
    Stingray_short<upl_Symbol>		fields;


    // A pointer to the <type> declaration of this symbol.
    //
    upl_Symbol const		       *type;


    // Is this variable local to a subroutine?
    //
    // If 0/false, it is not a local variable.
    // If > 0,     this is the local address of this variable relative
    //             to the subroutine stack frame.
    //
    // UNSURE: Relationship between this and <in_local_storage>.
    //	       [bj 15sep1999]
    //
    ushort				is_local;

    // Is this variable in local storage?
    //
    // Local variables may either be automatic (in the stack frame)
    // or static (stored like a global variable, but only accessable
    // from within the subroutine).  If <in_local_storage> is true,
    // then this is an automatic variable stored in local (stack frame)
    // storage.
    //
    boolean				in_local_storage;


    // Fast Array?
    //
    // In a 6502 if an array is entirely within a page we may use
    // fast register indexing to access it.  Trouble is, because we're
    // generating relocatable code we don't know where the array
    // will end up in absolute memory.  However if we request that
    // the object module be aligned on a page boundary, even though
    // the code is still relocatable we can tell if we can use this
    // fast accessing.  If we can, <fast> is set true.  If false,
    // (the default) we assume we must use normal slow accessing.
    // That involves using registers in the zero page to add the
    // base address of the array with the index, etc.  This requires
    // many more processor cycles and results in larger, slower code.
    //
    boolean				fast;


  public:
    //
    // CTOR.
    //
    // Construct a Symbol:
    //
    // IN:
    // 	Token           The name of this symbol.
    //	Token_type      The value. eg. Integer.
    //  Symbol_type	The symbol type. eg. Variable.
    //  Count           If an array, the number of elements.  Otherwise 0.
    //  External_id     If an external symbol, its external id.
    //	Type		If a special type, a pointer to its type declaration.
    //			Leave as NULL for standard types (eg. Integer).
    //
    upl_Symbol(
	const char       *Token,
	upl_value_type	  Token_type,
	upl_symbol_type	  Symbol_type,
	long		  Value,
	ushort		  Count,
	long		  External_id	= 0,
	const upl_Symbol *Type		= NULL);

    //
    // GROUP Btree methods.
    //
    // These methods are used to store this object in a btree.
    //
    const   char *key(void) 	     	  const {return token;}
    boolean operator <  (const char *Key) const {return strcmp(key(), Key) <  0;}
    boolean operator >  (const char *Key) const {return strcmp(key(), Key) >  0;}
    boolean operator == (const char *Key) const {return strcmp(key(), Key) == 0;}
    void print_recursive(ostream& Str, const boolean Newline) const;
};




// OBJECT upl_Symbols.
//
// A list of all symbols.
//
class upl_Symbols
{
  protected:
    Btree<upl_Symbol, const char *>	symbols;
    Stingray_long<upl_Symbol>		local;
    long				locals;

  public:
    upl_Symbols(void);

    // Get the symbol <Token>.
    //
    // If <Mandatory> and the symbol is missing, we abend using <F>.
    // If not <Mandatory> and missing, we return NULL.
    // <F> is only used for error reporting.
    //
    const upl_Symbol *get(
	Flex& 		F,
	const char     *Token,
	const boolean	Mandatory) const;

    // Declare a new symbol.
    //
    // IN:
    //  List_stream	An optional list stream on which we will write
    //			details about the declaration.
    //  L		Used for error reporting only.
    // 	Token           The name of this symbol.
    //	Token_type      The value. eg. Integer.
    //  Symbol_type	The symbol type. eg. Variable.
    //  Count           If an array, the number of elements.  Otherwise 0.
    //  Local		True iff this is a local variable.
    //  External_id     If an external symbol, its external id.
    //	Type		If a special type, a pointer to its type declaration.
    //			Leave as NULL for standard types (eg. Integer).
    upl_Symbol *declare(
	ostream		 *List_stream,
	Flex&		  L,
	const char       *Token,
	upl_value_type	  Token_type,
	upl_symbol_type	  Symbol_type,
	long		  Value,
	ushort		  Count		= 0,
	boolean		  Local		= false,
	long		  External_id	= 0,
	const upl_Symbol *Type		= NULL);


    // Reset (discard) any local variable declarations.
    //
    void reset_locals(void);

    // Rollback.
    //
    // CAUTION: Don't confuse local symbol rollback with object rollback.
    //		They're two different things. [bj 15sep1999]
    //

    // Mark.  Return the number of locals currently declared.
    //
    long mark_locals(void) {return locals;}

    // Release.  Use only <Locals> of the locals currently declared.
    // 		 Setting this to zero is the same as reset_locals().
    //
    long release_locals(long Locals);
};




// SECTION Segment.
//
// An executable is made when a number of modules are linked.
// The <upl_Segments> object holds the code and data segments
// of each module.  (Note that either the code or the data
// segment may be empty).
//

// OBJECT upl_Segments.
//
// Code and data segments for a particular module.
//
class upl_Segments
{
  public:
    //
    // The name of these segments.
    //
    char			segment_name[32];

    // The code segment.
    //
    upl_Object			code;

    // The data segment.
    //
    upl_Object			data;

    // The number of bytes in the code segment.
    //
    upl_addr			code_addr;

    // The number of bytes in the data segment.
    //
    upl_addr			data_addr;


    // Are these segments relocatable?
    //
    boolean			relocatable;

    // Are these segments absolute?
    //
    boolean			absolute;


  public:
    upl_Segments(void);


    // Append these segments to file <f>.
    //
    void write(FILE *f) const;

    // Read these segments from file <f>.
    //
    void read(FILE *f);


    // Save these segments in object file <fn>.
    //
    void write(const char *fn) const;

    // Read these segments from object file <fn>.
    //
    void read(const  char *fn);
};






// ENUMERATION upl_char_conversion.
//
// This enumeration describes the type of conversion to be applied
// to strings being stored in an object module.
//
typedef	enum
	{
	upl_char_conversion_none,  // No conversion.
	upl_char_CBM_upper,	   // Conversion to Commodore upper case.
	upl_char_CBM_lower	   // Conversion to Commodore lower case.
	}
	upl_char_conversion;







// SECTION Context.
//


// OBJECT upl_Context_state.
//
// Context states are used to perform on-the-fly optimisation,
// allowing the compiler to rollback and remove or replace
// sections of redundant or inefficient code.
//
// This object is simply a composite of the context states
// of the code and data segments.
//
//
class	upl_Context_state
{
  public:
    upl_Object_state	code,
			data;
};




// OBJECT upl_Context.
//
//
//
//
class 	upl_Context
	: public upl_Segments
{
  public:
    //
    // Symbol table.
    //
    upl_Symbols			symbols;

    // Count.
    //
    long			errors;		  // Error count.
    long			warnings;	  // Warning count.

    // Next type enumeration to be allocated.
    //
    upl_value_type		types;

    // Next external id to be allocated.
    //
    long			next_external_id;

    // Verbosity.
    //
    // The larger this number, the more (increasingly detailed) messages
    // shall be displayed by the compiler during compilation.
    //
    int				verbose;


    // Subroutine.
    //

    // Are we currently compiling a subroutine?
    //
    boolean			in_subroutine;

    // A list of address to be patched with a jump to the code fragment
    // that returns from this subroutine.
    //
    Mantaray_long<upl_addr>     return_patch;
    long			return_patches;

    // Value this subroutine returns (if a function).
    //
    upl_value_type		return_value;

    // Ok to return immediately?
    //
    // If true, this subroutine has no stack frame and so we may
    // return immediately using an RTS, rather than by jumping
    // to the return code fragment (via <return_patch>).
    //
    boolean			return_immediate;


    // Character Conversion to be applied (if any).
    //
    upl_char_conversion		char_conversion;	// To characters.
    upl_char_conversion		string_conversion;	// To strings.


    // Alignment.
    //

    // Do we know how this object module shall be aligned?
    //
    boolean			know_alignment;

    // The compiler sets <needed_alignment> to true if we use the
    // fact we <know_aligmnment> for <fast> array access.
    //
    boolean			needed_alignment;

    // The alignment of within this module within a page.
    // eg. 0 aligns the module on page boundary.
    //
    upl_addr			page_align_bytes;


    // Local Variables.
    //

    // Address of next variable in subroutine.
    //
    long			subroutine_local_addr;

    // The maximum local address allocated within a subroutine.
    //
    long                        max_subroutine_local_addr;

    // Number of bytes located for local parameter storage.
    //
    long			local_parm_bytes;

    // True iff this subtoutine has automatic local variables.
    //
    boolean			has_auto_variables;

    // Address at which to store the number of automatic variable bytes.
    //
    upl_addr			auto_variable_addr;  // Where to store #.


    // Optimisation.
    //

    // Optimisation Level.
    //
    // The larger this number, the more optimisations shall be applied.
    //
    int				optimise_level;

    // Peephole optimisation.
    //
    // If true, perform on-the-fly peephole optimisation.
    //
    boolean			optimise_peephole;

    // Flow optimisation.
    //
    // DEVELOPER NOTE: Not currently used. [bj 15sep1999]
    //
    boolean			optimise_flow;

    ostream		       *list_file;


  public:
    upl_Context(void);


    // Predefine standard types (eg. "int").
    // <L> is only used if needed for error reporting.
    //
    void predefine_types(Flex& L);

    // Mark/Rollback.
    //
    // The UPL compiler uses this technique for rollback optimisation:
    // It remembers its state before generating a new section of code,
    // using the mark() method.  If it later realises that this code
    // is redundant, or can be replaced by shorter, more efficient
    // code then it calls rollback().  This throws away those changes
    // that were made since mark().
    //
    // More than one mark() may be active at once.
    // Just use a different <State> for each.
    //
    void mark(upl_Context_state&    Mark) const;
    void rollback(const upl_Context_state& Mark);


    // Allocation.
    //

    // Allocate a new value type, returning its enumeration.
    //
    upl_value_type allocate_type(void);

    // Allocate a new external id.
    //
    long get_next_external_id(void)
      {return next_external_id++;}


    // Subroutine.
    //

    // Declare that we are beginning a new subroutine, returning <Result>.
    // If <Return_immediate>, then this subtoutine does not need a stack
    // frame (ie. does not have parameters or local automatic variables).
    //
    void subroutine_begin(upl_value_type Result, boolean Return_immediate);

    // Arrange for object code address <Return_from_addr> to be patched
    // with the address of the subroutine return code fragment.
    // (Only needed if not <Return_immediate>.
    //
    void subroutine_return(upl_addr Return_from_addr);

    // Declare that we have finished the subroutine.
    //
    // The address of the return code fragment is <Return_addr>.
    // Any local variables shall be deallocated.
    //
    void subroutine_end(Flex& L, upl_addr Return_addr);
};





// SECTION Expression.
//


// ENUMERATION upl_transient_type.
//
// This enumeration defines where the transient results of a computation
// are currently stored.
//
typedef	enum
	{
	upl_transient_none,
	upl_transient_a,
	upl_transient_ax,
	upl_transient_x,
	upl_transient_y,
	upl_transient_znc	// Z, N, C status register flags.
	}
	upl_transient_type;



// OBJECT upl_Expr_result.
//
// This object defines the result of an expression.
//
//
class upl_Expr_result
{
  public:
    //
    // The value of the expression (if known).
    //
    long		value;

    // The type of value.
    //
    upl_value_type	value_type;

    // Is this a constant (in which case we know <value>).
    //
    boolean		constant;

    // The size of the value (bytes).
    //
    byte		value_bytes;

    // If the expression result is the contents of a variable,
    // <variable> points at it.  Otherwise NULL.
    //
    upl_Symbol const   *variable;


    // Transient.
    //
    // Compilation of an expression ends with the expression value
    // being pushed on the expression stack.  Often before it is
    // pushed it resides in a transient form, such as in registes
    // eg. <A> or <A/X> or even in flags such as <Z>.
    //
    // Often the next code compiled will pop the expression off
    // the stack and eg. store it somewhere.  So what happened?
    // We just pushed an expresion on the stack, then popped it off.
    // That's wasted cycles, baby.  Instead, we can rollback to
    // the transient state and generate code where the expression
    // stack is avoided.  eg.
    //
    //    ; Before optimisation		; After optimisation
    //    LDA #2			LDA #2
    //    JSR push_b		->
    //    JSR pop_b
    //    STA somevariable		STA somevariable
    //
    // In the above example, the transient is that the value is stored
    // in the accumulate <A>.  The <transient_state> holds a context
    // state recorded right after LDA#2, but before JSR push_b is called.
    // Thus if we want to use the transient, all we need do is rollback
    // to this point.
    //

    // Where is the transient expression result stored?
    //
    upl_transient_type	transient;

    // A context state taken immediately before the transient result
    // is pushed on the expression stack.
    //
    upl_Context_state	transient_state;

    // If the transient is a boolean stored in the Z, N or C flags
    // we need to know the operation so we know how to interpret
    // these flags.
    //
    upl_op		transient_op;	// To interpret ZNC


    // Levels of indirection.
    //
    // Each level of indirection represents a pointer.
    //
    // eg.    char is indirection level 0 (ie. no indirection)
    //       *char is indirection level 1.
    //      **char is indirection level 2.
    //
    // DEVELOPER NOTE:
    //   It has been recognised that there are better ways to manage
    //   pointers, since this representation won't allow a eg.
    //   an array of pointers to pointers.  [bj 15sep1999]
    //
    ushort		indirection;


    // True if this expression is now on the expression stack.
    //
    boolean		is_pushed;


  public:
    //
    // Set expression to the constant <Value>.
    // <L> is only used for error reporting.
    //
    void set_constant(Flex& L, long Value);

    // Set expression to point to a <Value_type> at address <Value>.
    // <L> is only used for error reporting.
    //
    // CHECK: why is this called <*constant_*>? [bj 15sep1999]
    //
    void set_constant_pointer(
	Flex&		L,
	upl_value_type 	Value_type,
	long 		Value);

    // Set expression to <Variable>.
    //
    void set_variable(const upl_Symbol& Variable);

    // Set expression to <Value_type> at <Indirection> levels of indirection.
    // Indirection is used to indicate the number of pointers that must
    // be followed to reach the value.  <Transient*> can be used to specify
    // any transient representation (if any).
    //
    void set_value_type(
	upl_value_type	    Value_type,
	ushort		    Indirection,
	upl_transient_type  Transient	    = upl_transient_none,
	upl_Context_state  *Transient_state = NULL,
	upl_op		    Transient_op    = upl_op_none);

    // Set Transient representation only.
    //
    void set_transient_only(
	upl_transient_type  Transient	    = upl_transient_none,
	upl_Context_state  *Transient_state = NULL,
	upl_op		    Transient_op    = upl_op_none);

    // This function returns true iff this expression may be optimisable.
    // For an expression to be optimisable we need a way of getting it
    // other than from the expression stack.
    //
    boolean	is_optimal(void) const
      {return constant
	   or variable  != NULL
	   or transient != upl_transient_none;}

    // Is this expression the value of variable?
    //
    boolean	is_variable(void) const
      {return variable != NULL;}

    // Is this expression a constant?
    //
    boolean	is_constant(void) const
      {return constant;}

    // Does this expression have a transient we can use?
    //
    boolean	is_transient(void) const
      {return transient != upl_transient_none;}

    // Ignore any transient representation from now on.
    //
    void ignore_transient(void)
      {transient = upl_transient_none;}

    // Ignore any variable representation from now on.
    //
    void ignore_variable(void)
      {variable = NULL;}
};




// OBJECT upl_Subscript.
//
// This object holds a subscript used to access an array.
//
//
class upl_Subscript
{
  public:
    //
    // The expression result holding the index.
    //
    upl_Expr_result	index;

    // Set true if we're using a fast indexing to access the array.
    //
    // DEVELOPER NOTE:
    //   While the array must be <fast> (all in one page), there
    //   are other conditions that must be satsified too.
    //   [bj 15sep1999]
    //
    boolean		use_fast_variable;

    // Set true if we're using a constant to index the array.
    //
    boolean		use_constant;

    // Set true if we must get the index from the stack.
    //
    boolean		use_stack;


  public:
    //
    // Read a subscript. eg. "[2*x]"
    //
    void read(Flex& 		L,
	      upl_Context&	C,
	const upl_Symbol& 	S);
};






// SECTION Compiler.
//
// This is where it all happens.
//


// OBJECT upl_Compiler.
//
// This is a static object.  That is, it contains no variables itself.
// All the contextual information is stored in the <upl_Context> object
// passed in each call.  <Flex> is the lexical parser.  Use <Flex> to
// open the source file before the first call to program().
//
// Most methods in <upl_Compiler> simply parse the named statement.
// eg. if_statement() parses an if-then-else statement.  As the
// compiler parses the statement new tokens are extracted as
// necessary from <Flex>, and the <upl_Context> fed the generated
// code, changes to the symbol table, etc.
//
// The entry point to this module is program().
//
// DEVELOPER NOTE:
//   Quetzalcoatl/UPL is an LL(N) compiler.  These compilers are easy
//   (and believe it or not, fun!) to write.  They're also educational;
//   It's quite easy to peer within and work out exactly what the compiler
//   is doing.  But LL(N) compilers can only interpet certain languages,
//   which C isn't!  To do a proper C compiler, you need an LR(N) compiler.
//   These are so difficult to write that compiler authors must use a
//   Compiler Compiler (such as YACC or Bison).  Their internal operations
//   are complicated (and boring to look at).  But if you're doing a
//   heavy-duty, production compiler, LR(N) is definitely the way to go.
//   So why did I use LL(N)?  Because Quetzalcoatl/UPL was written to
//   be educational, and a test bed for optimisation.  In order to
//   implement a C compiler in LL(N) I had to make some changes to the
//   base language.  When I ceased Quetzalcoatl/UPL development in
//   September 1998, I was in the process of refining the LL(N) grammar
//   to handle something more closely resembling ANSI C/C++.
//   Quetzalcoatl/UPL will never be a full ANSI C/C++ compiler; that's
//   not the point!  But it can still go someway towards compiling
//   more C programs and remain an LL(N) compiler.  [bj 15sep1999]

//
class upl_Compiler
{
  public:
    //
    // Read/compile a UPL/C Program.
    //
    // They are similar, so we can read both at the same time.
    //
    // IN:
    //   L		The lexical parser, with the source file opened.
    //   C		Context.
    //   Top_level      True when called from the top level.
    //			False when called as an include file.
    //
    static void program(
	Flex& 		L,
	upl_Context& 	C,
	boolean 	Top_level);

    // Parse an {} statement.
    //
    static void compound_statement(
	Flex& 		 L,
	upl_Context& 	 C,
	boolean 	*Unreachable_code = NULL,
	boolean		 Initialise_auto  = false);

    // Parse an if-then[-else] statement.
    //
    static void if_statement(Flex& L, 	    upl_Context& C);

    // Parse a while() statement.
    //
    static void while_statement(Flex& L,    upl_Context& C);

    // Parse a do-while() statement.
    //
    static void repeat_statement(Flex& L,   upl_Context& C);

    // Parse a statement.
    //
    static void for_statement(Flex& L,      upl_Context& C);

    // Parse a subroutine call statement.
    //
    static void subroutine_statement(
	Flex& 		   L,
	upl_Context& 	   C,
	const upl_Symbol&  S,
	upl_Expr_result   *Result = NULL);

    // Parse an assignment statement.
    //
    static void assign_statement(Flex& L,   upl_Context& C, const upl_Symbol& S);

    // Parse a statement to poke memory.
    //
    static void assign_memory_statement(Flex& L,   upl_Context& C);

    // Parse a C ioctl statement.
    //
    static void ioctl_statement(Flex& L,    upl_Context& C);

    // Parse a UPL subroutine call statement.
    //
    static void call_statement(Flex& L,     upl_Context& C);


    // Parse any statement.
    //
    static boolean statement(
	Flex& 		L,
	upl_Context& 	C,
	boolean        *Unreachable_code = NULL,
	boolean		Semicheck	 = true);


    // Parse the return statement.
    //
    static void return_statement(Flex& L,   upl_Context& C);


    // Parse an output statement.
    //
    // Use the UPL put/putln statement or C++ cout/cerr.
    //
    static void put_statement(Flex& L, 	    upl_Context& C);  // or putln

    // Parse a simple increment/decrement statement.
    //
    // TODO: This should be done in expr() to be compatible with C.
    //
    static void inc_statement(  // or dec
	Flex& 			L,
	upl_Context& 		C,
	boolean			Prefix,
	const upl_Symbol       *S,
	upl_Subscript	       *Subscript);


    // Reads a symbol from <L> and returns its details.
    // If the symbol is not in the table, return NULL 
    // if Mandatory=false or abend if Mandatory=true
    //
    static const upl_Symbol *type(
	Flex& 		L,
	upl_Context& 	C,
	boolean		Mandatory);


    // Generates code to skip over a 3 byte JMP statement
    // iff the condition <Clause> is true.
    //
    static void condition_flow(
	Flex&			L,
	upl_Context& 		C,
	upl_Expr_result&        Clause,
	boolean			Loop_if_true);


    // Read in an expression
    //
    // This is called recursively, incrementing Depth each time.
    // Doing this reduces the order of precedence.
    // This way we use the same expr() routine to process operands
    // in the correct order, e.g. and, or, ==, *, etc...
    //
    static upl_value_type expr(
	Flex& 			L,
	upl_Context& 		C,
	upl_Expr_result& 	Result,
	int 			Depth,
	boolean			Mandatory_id = true);

    // Read a term; a term is the atomic element around which expressions are built.
    // Terms include numbers, variables and function calls.
    //
    static upl_value_type term(
	Flex& 			L,
	upl_Context& 		C,
	upl_Expr_result& 	Result,
	boolean			Mandatory_id = true);


    // Read in and return a simple value type (ie. without subscripts, etc.)
    //
    static upl_value_type get_value_type(Flex& 	L);


    // How many bytes long is this <Value_type>?
    //
    static int value_bytes(
	upl_value_type	Value_type,
	boolean         Mandatory	= true);


    // Is this a pointer type? (e.g <*pointer_short>.
    //
    // TODO: This isn't a great way to manage pointers,
    //	     because it only measures one level of direction.
    //	     [bj 09oct2006]
    //
    static boolean is_pointer(upl_value_type Value_type);


    // Convert a pointer type into what it is pointing to.
    // e.g. <pointer_short> becomes <short>.
    // e.g. <pointer_byte>  becomes <byte>.
    //
    // TODO: This isn't a great way to manage pointers,
    //	     because it only measures one level of direction.
    //	     [bj 09oct2006]
    //
    static upl_value_type convert_pointer(Flex& L, upl_value_type Value_type);

    // Convert a data type into a pointer.
    // e.g. <short> becomes <pointer_short>.
    //
    // TODO: This isn't a great way to manage pointers,
    //	     because it only measures one level of direction.
    //	     [bj 09oct2006]
    //
    static upl_value_type pointer_value_type(upl_value_type Value_type);


    // Take two values on the stack of types <Top> and <Underneath>.
    // Internally work out what the result of <Op/Unsigned> these
    // two values should be, and return the result.
    // They are automatically cast to the appropriate types.
    // 
    // 
    static upl_value_type normalise_binary(
	Flex& 			L,
	upl_Context& 		C,
	upl_op			Op,
	upl_value_type		Underneath,
	upl_value_type		Top,
	boolean&		Unsigned,
	ushort		       *parm_bytes = NULL);

    // Convert/cast the top of the stack from type <Have> to type <Want>.
    //
    static boolean normalise_unary(
	Flex& 			L,
	upl_Context& 		C,
	upl_value_type		Have,
	upl_value_type		Want);


    // Make sure <Result> has been pushed on the stack.
    //
    static void ensure_pushed(
	Flex& 			L,
	upl_Context& C,
	upl_Expr_result&  Result);


    // Load/store variable into <A/X>.
    //
    // This is a generalised subroutine for accessing a variable.
    //
    // It load or stores (or otherwise accesses) the variable <D> or <D[Subscript]> 
    // into registers <A/X>.  We can also do other operations through this, 
    // such as increment, decrement, etc.
    //
    // IN:
    //    C	    	Context 
    //    D	    	The variable <D> we are accessing.
    //    Subscript 	Optional.  If the value we are accessing is an element indexed
    //		    	from a pointer/array, the subscript used to access it is 
    //		    	passed here.
    //    Load	    	True  iff we are loading the value.
    //	            	False iff we are storing it.
    //	            	(We use <Other_opcode> if neither of these.)
    //    Other_opcode  If neither loading or storing, we do this instead.
    //			Only certain opcodes are allowed here.
    //    Have_value    True iff a <Value> has been passed.
    //		    	If not, we use the value in <A> or <A/X>.
    //    Value	    	The <Value> if specified.  (Ignored if we don't <Have_value>).
    //    Address_value True iff <Value> is actually an address.
    //    
    //
    static void reg_var(
		upl_Context&	C,
	const 	upl_Symbol& 	D,
		upl_Subscript  *Subscript,
		boolean		Load,
		boolean		Have_value	= false,
		long		Value		=  0,
		byte		Other_opcode	=  0,
		boolean		Address_value   = false);


    // Push the value of variable <D> (with optional <Subscript>) on the stack.
    //
    static void push_var(
	upl_Context& C,
	const upl_Symbol& D,
	upl_Subscript  	 *Subscript,
	upl_Expr_result	 *Result);


    // Push the address of variable <D> (with optional <Subscript>) on the stack.
    //
    static void push_var_addr(
	Flex&		 L,
	upl_Context& 	 C,
  const	upl_Symbol&  	 D,
	upl_Subscript   *Subscript,
	upl_Expr_result& Result);


    // Pops the top of the stack and stores it in the variable <D> (with optional <Subscript>)
    //
    static void pop_var(
	upl_Context&  	  C,
	const upl_Symbol& D,
	upl_Subscript  *Subscript);


    // This generates a 6502 Opcode addressing the given byte Offset into the variable D. 
    //
    static void asm_var(
		upl_Context&	C,
	const 	upl_Symbol& 	D,
		byte 		Offset,
		asm_opcode 	Opcode);

    // Take the transient expression result <Clause> 
    // (transient means we know it is held in registers <A/X>),
    // and store it in the variable <Target>.
    //
    static void store_transient(
		upl_Context&	  C,
	const	upl_Symbol& 	  Target,
	const 	upl_Expr_result&  Clause);


    // Try and Optimise expression <A> <Op> <B>; e.g. "myvariable * 2"
    //
    static boolean optimise(
	upl_Context&  		C,
	upl_op 			Op,
	upl_Expr_result&	A,
	upl_Expr_result&	B,
	upl_Context_state&	Rollback_state);



    // Take the expression result <Clause> and load it into registers <A/X>.
    //
    static void load_reg(
		upl_Context&	  C,
	const 	upl_Expr_result&  Clause);


    // What happens when we <A> <Op> <B>?
    // This function returns the resulting type,
    // and sets <Unsigned> to reflect if it is unsigned or not.
    // We may have to cast <A> and <B>,
    // in which case they will be <new_A> and <new_B>.
    //
    static upl_value_type op_result(
		upl_op		Op,
		upl_value_type	A,
		upl_value_type	B,
		upl_value_type&	new_A,
		upl_value_type&	new_B,
		boolean&	Unsigned);


    // Name Mangling.
    //
    // Name Mangling is used to store parameter type information with
    // a subroutine name.  This prevents an external subroutine being
    // called with the wrong parameters.  For example, the subroutine's
    // foo(i) and foo(i, c) would not be accidently confused.
    //
    // DEVELOPER NOTE:
    //   Name mangling can also be used for overloading.
    //   However Quetzalcoatl/UPL doesn't currently support this.
    //   [bj 15sep1999]
    //

    // Append Mangling.
    //
    // UPDATED:
    //   Mangling       The string to append the mangling too.
    // IN:
    //   Value_type	The value to mangle and append to <Mangling>.
    //
    static
    void append_mangling(
	char 	       Mangling[],
	upl_value_type Value_type);


    // Character Conversion.
    //
    // Convert a character using <Conversion>.
    //
    static
    char convert_char(char ch, upl_char_conversion Conversion);
};






// SECTION Assembler.
//

// OBJECT upl_Assembler.
//
// All the contextual information is stored in the <upl_Context> object
// passed in each call.  <Flex> is the lexical parser.  Use <Flex> to
// open the source file before the first call to program().
//
// <upl_Assembler> operates just like <upl_Compiler>, but assembly
// language is much simpler than Quetzalcoatl/UPL!  The basic element
// is the line(), which can contain an assembly language instruction
// or assembler pragma/command.
//
// The entry point to this module is program().
//
class upl_Assembler
{
  public:
    //
    // List of opcodes, stored in a B-tree for optimal access.
    //
    common_Keys	opcode;

    // Next address to be allocated.
    //
    upl_addr	current_addr;


  public:
    upl_Assembler(void);

    //
    // Read/compile a Assembler Program.
    //
    // IN:
    //   L		The lexical parser, with the source file opened.
    //   C		Context.
    //   Verbose        Set true for verbose messages.
    //
    void     program(const char *Filename, upl_Context& C, boolean Verbose);
    void     recurse(const char *Filename, upl_Context& C, boolean Verbose, int pass);


    // Read an assembler statement (directive or actual opcode, etc.)
    //
    boolean  line(Flex& L, upl_Context& C, boolean Verbose, int Pass);

    // Read an expression (we can only use a constant result for the assembler).
    //
    long     expr(Flex& L, upl_Context& C, int Pass);


    // Return addressing mode suffix.
    //
    // The opcodes in the <opcode> key object consist of the instruction
    // type (eg. LDA), appended with an addressing mode suffix (eg. _IX).
    // This method returns the addressing mode suffix for <Addr_mode>.
    //
    static
    const char *addr_mode_suffix(asm_addr_mode Addr_mode);
};


#ifdef COMMON_MSW32
#include <io.h>
#endif
#endif
