//
// NAME:
//   upl_as.cpp
// TITLE:
//   UPL/Quetzalcoatl: Assembler.
// FUNCTION:
//   See header.
//
// AUTHOR:
//   Brendan Jones. (Contact through www.kdef.com/geek/vic)
// RIGHTS:
//   (c) Copyright Brendan Jones, 1998.  All Rights Reserved.
// SECURITY:
//   Unclassified.  
// LEGAL NOTICE:
//   See legal.txt before viewing, modifying or using this software.
// CONTACT:
//   Web:	http://www.kdef.com/geek/vic
//   Email:	See www.kdef.com/geek/vic
// DATE:
//   July 20, 1998.
// RIGHTS:
//  This file is part of The Quetzalcoatl Compiler.
//  
//  The Quetzalcoatl Compiler is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//   the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//  
//  The Quetzalcoatl Compiler is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//  
//  You should have received a copy of the GNU General Public License
//  along with The Quetzalcoatl Compiler; if not, write to the Free Software
//  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
//
// MODIFICATIONS:
//   NAME  MOD  DATE     DESCRIPTION
//
//
#ifdef DJGPP
#include <unistd.h>
#endif
#include "upl.h"
#include <ctype.h>



upl_Assembler::upl_Assembler(void)
{
  // We use <opcode> to convert from our own textural opcodes to the actual opcode numbers.
  // The opcode numbers are enumerated (e.g. ASM_BRK is the BRK opcode which is 0).
  // The textual opcodes we use here are our own notation; they're consistent and 
  // easier to process that the Motorola addressing modes which use all manner
  // of brackets and commas. When we read the opcodes in, we will convert from
  // Motorola mode to our own mode to look up the opcode in this <Btree>.
  //
  opcode.insert("BRK", ASM_BRK);
  opcode.insert("ORA_IX", ASM_ORA_IX);
  opcode.insert("ORA_Z", ASM_ORA_Z);
  opcode.insert("ASL_Z", ASM_ASL_Z);
  opcode.insert("PHP", ASM_PHP);
  opcode.insert("PLP", ASM_PLP);
  opcode.insert("ORA_IMM", ASM_ORA_IMM);
  opcode.insert("ASL_A", ASM_ASL_A);
  opcode.insert("ORA", ASM_ORA);
  opcode.insert("ASL", ASM_ASL);
  opcode.insert("BPL", ASM_BPL);
  opcode.insert("ORA_IY", ASM_ORA_IY);
  opcode.insert("ORA_ZX", ASM_ORA_ZX);
  opcode.insert("ASL_ZX", ASM_ASL_ZX);
  opcode.insert("CLC", ASM_CLC);
  opcode.insert("ORA_Y", ASM_ORA_Y);
  opcode.insert("ORA_X", ASM_ORA_X);
  opcode.insert("ASL_X", ASM_ASL_X);
  opcode.insert("JSR", ASM_JSR);
  opcode.insert("AND_IX", ASM_AND_IX);
  opcode.insert("BIT_Z", ASM_BIT_Z);
  opcode.insert("AND_Z", ASM_AND_Z);
  opcode.insert("ROL_Z", ASM_ROL_Z);
  opcode.insert("AND_IMM", ASM_AND_IMM);
  opcode.insert("BIT", ASM_BIT);
  opcode.insert("AND", ASM_AND);
  opcode.insert("ROL", ASM_ROL);
  opcode.insert("BMI", ASM_BMI);
  opcode.insert("AND_IY", ASM_AND_IY);
  opcode.insert("AND_ZX", ASM_AND_ZX);
  opcode.insert("ROL_ZX", ASM_ROL_ZX);
  opcode.insert("SEC", ASM_SEC);
  opcode.insert("AND_Y", ASM_AND_Y);
  opcode.insert("AND_X", ASM_AND_X);
  opcode.insert("ROL_X", ASM_ROL_X);
  opcode.insert("RTI", ASM_RTI);
  opcode.insert("EOR_IX", ASM_EOR_IX);
  opcode.insert("EOR_Z", ASM_EOR_Z);
  opcode.insert("LSR_Z", ASM_LSR_Z);
  opcode.insert("PHA", ASM_PHA);
  opcode.insert("EOR_IMM", ASM_EOR_IMM);
  opcode.insert("LSR_A", ASM_LSR_A);
  opcode.insert("JMP", ASM_JMP);
  opcode.insert("EOR", ASM_EOR);
  opcode.insert("LSR", ASM_LSR);
  opcode.insert("BVC", ASM_BVC);
  opcode.insert("EOR_IY", ASM_EOR_IY);
  opcode.insert("EOR_ZX", ASM_EOR_ZX);
  opcode.insert("LSR_ZX", ASM_LSR_ZX);
  opcode.insert("CLI", ASM_CLI);
  opcode.insert("EOR_Y", ASM_EOR_Y);
  opcode.insert("EOR_X", ASM_EOR_X);
  opcode.insert("LSR_X", ASM_LSR_X);
  opcode.insert("RTS", ASM_RTS);
  opcode.insert("ADC_IX", ASM_ADC_IX);
  opcode.insert("ADC_Z", ASM_ADC_Z);
  opcode.insert("RCR_Z", ASM_RCR_Z);
  opcode.insert("PLA", ASM_PLA);
  opcode.insert("ADC_IMM", ASM_ADC_IMM);
  opcode.insert("RCR_A", ASM_RCR_A);
  opcode.insert("JMP_I", ASM_JMP_I);
  opcode.insert("ADC", ASM_ADC);
  opcode.insert("ROR", ASM_ROR);
  opcode.insert("BVS", ASM_BVS);
  opcode.insert("ADC_IY", ASM_ADC_IY);
  opcode.insert("ADC_ZX", ASM_ADC_ZX);
  opcode.insert("ROR_ZX", ASM_ROR_ZX);
  opcode.insert("SEI", ASM_SEI);
  opcode.insert("ADC_Y", ASM_ADC_Y);
  opcode.insert("ADC_X", ASM_ADC_X);
  opcode.insert("ROR_X", ASM_ROR_X);
  opcode.insert("STA_IX", ASM_STA_IX);
  opcode.insert("STY_Z", ASM_STY_Z);
  opcode.insert("STA_Z", ASM_STA_Z);
  opcode.insert("STX_Z", ASM_STX_Z);
  opcode.insert("DEY", ASM_DEY);
  opcode.insert("TXA", ASM_TXA);
  opcode.insert("STY", ASM_STY);
  opcode.insert("STA", ASM_STA);
  opcode.insert("STX", ASM_STX);
  opcode.insert("BCC", ASM_BCC);
  opcode.insert("STA_IY", ASM_STA_IY);
  opcode.insert("STY_ZX", ASM_STY_ZX);
  opcode.insert("STA_ZX", ASM_STA_ZX);
  opcode.insert("STX_ZY", ASM_STX_ZY);
  opcode.insert("TYA", ASM_TYA);
  opcode.insert("STA_Y", ASM_STA_Y);
  opcode.insert("TXS", ASM_TXS);
  opcode.insert("STA_X", ASM_STA_X);
  opcode.insert("LDY_IMM", ASM_LDY_IMM);
  opcode.insert("LDA_IX", ASM_LDA_IX);
  opcode.insert("LDX_IMM", ASM_LDX_IMM);
  opcode.insert("LDY_Z", ASM_LDY_Z);
  opcode.insert("LDA_Z", ASM_LDA_Z);
  opcode.insert("LDX_Z", ASM_LDX_Z);
  opcode.insert("TAY", ASM_TAY);
  opcode.insert("LDA_IMM", ASM_LDA_IMM);
  opcode.insert("TAX", ASM_TAX);
  opcode.insert("LDY", ASM_LDY);
  opcode.insert("LDA", ASM_LDA);
  opcode.insert("LDX", ASM_LDX);
  opcode.insert("BCS", ASM_BCS);
  opcode.insert("LDA_IY", ASM_LDA_IY);
  opcode.insert("LDY_ZX", ASM_LDY_ZX);
  opcode.insert("LDA_ZX", ASM_LDA_ZX);
  opcode.insert("LDX_ZY", ASM_LDX_ZY);
  opcode.insert("CLV", ASM_CLV);
  opcode.insert("LDA_Y", ASM_LDA_Y);
  opcode.insert("TSX", ASM_TSX);
  opcode.insert("LDY_X", ASM_LDY_X);
  opcode.insert("LDA_X", ASM_LDA_X);
  opcode.insert("LDX_Y", ASM_LDX_Y);
  opcode.insert("CPY_IMM", ASM_CPY_IMM);
  opcode.insert("CMP_IX", ASM_CMP_IX);
  opcode.insert("CPY_Z", ASM_CPY_Z);
  opcode.insert("CMP_Z", ASM_CMP_Z);
  opcode.insert("DEC_Z", ASM_DEC_Z);
  opcode.insert("INY", ASM_INY);
  opcode.insert("CMP_IMM", ASM_CMP_IMM);
  opcode.insert("DEX", ASM_DEX);
  opcode.insert("CPY", ASM_CPY);
  opcode.insert("CMP", ASM_CMP);
  opcode.insert("DEC", ASM_DEC);
  opcode.insert("BNE", ASM_BNE);
  opcode.insert("CMP_IY", ASM_CMP_IY);
  opcode.insert("CMP_ZX", ASM_CMP_ZX);
  opcode.insert("DEC_ZX", ASM_DEC_ZX);
  opcode.insert("CLD", ASM_CLD);
  opcode.insert("CMP_Y", ASM_CMP_Y);
  opcode.insert("CMP_X", ASM_CMP_X);
  opcode.insert("DEC_X", ASM_DEC_X);
  opcode.insert("CPX_IMM", ASM_CPX_IMM);
  opcode.insert("SBC_IX", ASM_SBC_IX);
  opcode.insert("CPX_Z", ASM_CPX_Z);
  opcode.insert("SBC_Z", ASM_SBC_Z);
  opcode.insert("INC_Z", ASM_INC_Z);
  opcode.insert("INX", ASM_INX);
  opcode.insert("SBC_IMM", ASM_SBC_IMM);
  opcode.insert("NOP", ASM_NOP);
  opcode.insert("CPX", ASM_CPX);
  opcode.insert("SBC", ASM_SBC);
  opcode.insert("INC", ASM_INC);
  opcode.insert("BEQ", ASM_BEQ);
  opcode.insert("SBC_IY", ASM_SBC_IY);
  opcode.insert("SBC_ZX", ASM_SBC_ZX);
  opcode.insert("INC_ZX", ASM_INC_ZX);
  opcode.insert("SED", ASM_SED);
  opcode.insert("SBC_Y", ASM_SBC_Y);
  opcode.insert("SBC_X", ASM_SBC_X);
  opcode.insert("INC_X", ASM_INC_X);
}



// Read a line of assembler statements from <L>.
//
boolean upl_Assembler::line(
	Flex& 		L,
	upl_Context& 	C,
	boolean 	Verbose,
	int 		Pass)	    // 1 or 2 
{
  upl_symbol_token	token, op_token;

#ifdef NO_WARNINGS
  long			value = 0;
  boolean 		size_bytes = false;
#else
  long			value;
  boolean 		size_bytes;
#endif

  long			count;
  long			op;
  long			op_zero;
  boolean		result = true;
  flex_token_class	token_type;
  boolean		in_brackets;


//HFD
//if (Verbose)
//  cout << current_addr << ":" << endl;


  // If the line begins with a semicolon,
  // then the rest of the line is a comment.
  //
  if (equal(L.peek(), ";"))
    {
    // Skip it and return true.
    //
    if (L.lines == L.token_lines)
      //
      // Skip the rest of this line.
      //
      L.skip_eol();
    else
      //
      // If the semicolon is the last token on this line,
      // then throw away only this token. If we called
      // skip_eol() it would throw away the *next* line,
      // which would be a bad thing.
      //
      L.get(NULL);

    return true;
    }


  // Get the first token on this assembler statement.
  // (Usually there is one statement per line,
  //  but they are allowed to be split over multiple lines).
  //
  token_type = L.peek_type();
  L.get(token);



  // Label.
  //
  // If a colon follows, then this is a label.
  //
  Select(L.matches(":") and
	(token_type == flex_id or token_type == flex_string))
      {
      if (Pass == 1)
	{
	// If first pass, declare the label at this address.
	//
	C.symbols.declare(NULL, L, token, upl_ushort, upl_label, current_addr);

	if (token[0] != '_')
	  C.code.add_name(L, token, current_addr);
	}

      // If second pass, print this in the listing file.
      //
      if (C.list_file and Pass == 2)
	(*C.list_file) << current_addr << ":\t  \"" << token << "\":" << endl;
      }


    // A constant.
    //
    // If an equal sign follows, this is a constant declaration.
    //
    when(L.matches("="))
      {
      // Find the <value> of this constant.
      //
      value = expr(L, C, Pass);

      // We only need to declare it on the first pass.
      //
      if (Pass == 1)
        {
	// Find the symbol <S> recording this constant's value.
	//
	upl_Symbol *S = (upl_Symbol *)C.symbols.get(L, token, false);

	// If <S == NULL>, then this symbol has not been defined.
	//
	if (S == NULL)
	  {
	  // Declare this constant value.
	  //
	  C.symbols.declare(NULL, L, token, upl_ushort, upl_constant, value);
	  }
	else 
	  {
	  // Otherwise if it has been declared but is a constant,
	  // then change the value to the new value.
	  //
	  // NOTE: Seems a bit strange letting them change a constant value,
	  //       but it won't break anything compiler-side by doing so.
	  //	   [bj 09oct2006]
	  //
	  if (S->symbol_type == upl_constant)
	    {
	    S->value = value;
	    }
	  else
	    {
	    // They're not allowed to assign a value to a non-constant symbol.
	    //
	    L.parse_error("This identifier has already been declared");
	    }
	  }
	}
      }


    // dw (data word), db (data byte).
    //
    // If the token was dw or db, then they want to declare a data word or byte.
    //
    when(strlen(token) == 2 and token[0] == 'd')
      //
      // Find the <size_bytes> of the data item we are about to add.
      //
      switch (token[1])
	{
	case 'b': size_bytes = 1; break;
	case 'w': size_bytes = 2; break;
	default:
	  L.parse_error("db or dw expected");
	}

      // Keep adding data in this loop.
      //
      do
	{
	// If the data is a string...
	//
	if (L.peek_type() == flex_string)
	  {
	  // Then copy it character by character...
	  //
	  for (char const *p=L.get_string(); *p != 0; p++)
	    {
	    C.code.out(*p);
	    current_addr++; // Advance the current address over the character.
	    }

	  // Add a terminating null.
	  //
	  C.code.out(0);
	  current_addr++; // Advance the current address over the terminating null.
	  }
	else
	  {
	  // Otherwise it should be a numeric expression.
	  //
	  value = expr(L, C, Pass);

	  // If there is a bracket, they will want to repeat it.
	  // e.g. db 0(256)
	  // will declare a 256-byte block of the value zero.
	  //
	  if (L.matches("("))
	    {
	    count = expr(L, C, Pass);
	    L.check(")");
	    }
	  else
	    // Otherwise they just want one data word or byte,
	    //
	    count = 1;

	  // Create the block of 1 or more data items.
	  //
	  for (long i=0; i<count; i++)
	    if (size_bytes == 1)	// If a byte
	      C.code.out(value);	// Write <value> as a byte
	    else
	      C.code.out_word(value);	// Otherwise write the word.

	  // Advance the current address <current_addr> by the block size.
	  //
	  current_addr += (size_bytes * count);
	  }

	// Read the next data item until there are no more commas.
	//
	}
      while (L.matches(","));


    // Specify the origin of the code.
    //
    // Subsequent code/data will be written from this new current address.
    //
    when(equal(token, "org"))
      current_addr = C.code_addr = expr(L, C, Pass);


    // Terminate with a compile-time error.
    //
    when(equal(token, "error"))
      {
      L.parse_error(
	L.peek_type() == flex_string ? L.peek()
	: "Unspecified compile-time error declared by programmer");

      /*NOTREACHED*/

      L.get(NULL);
      }


    // Print a compile-time message.
    //
    when(equal(token, "message"))
      int pass = -1;

      if (L.peek_type() == flex_int)
	pass = L.get_int();

      if (L.peek_type() != flex_string)
        L.parse_error("String expected");

      if (pass < 0 or pass == Pass)
	{
	cerr << "I: ";

	if (pass == -1)
	  cerr << "Pass " << Pass << ": ";

	cerr << L.peek() << endl;
	}

      L.get(NULL);


    // Align the current address.
    // 
    // e.g. align 2 makes sure the current address is word-aligned (2 byte).
    // e.g. align 256 would make sure it is page-aligned (256 byte).
    //
    when(equal(token, "align"))
      value = L.get_int();
      current_addr   =  (current_addr % value == 0)
		     ?   current_addr
		     : ((current_addr / value + 1) * value);


    // End.
    //
    // We've reached the last compilable statement in the assembler file.
    //
    when(equal(token, "end"))
      if (C.list_file and Pass == 2)
	(*C.list_file) << current_addr << ":\t\t\t.end\n";

      // Return false so the caller knows not to ask for any more statements.
      //
      result	= false;


    // Declare Target.
    //
    // This is only used by the Quetzalcoatl Runtime Library (e.g. UPLRTIME.ASM);
    // Certain numbers are reserved for certain runtime targets.
    // e.g. Commodore 64 effectively has declare_target(4) in UPLRTIME.ASM.
    // We record this as a reference in the code/data segments.
    // It lets the linker check to make sure we're not mixing targets.
    // e.g. using the runtime library of an C-64 with an Atari's OBJ file.
    // It lets us check at linking time that everything is ok.
    //
    when(equal(token, "declare_target"))
      L.check("(");
      C.data.runtime_target =
      C.code.runtime_target = expr(L, C, Pass);
      L.check(")");


    // Declare Flags.
    //
    // This is not currently used. It could be used to remember properties
    // for special processing at link time. 
    //
    when(equal(token, "declare_flags"))
      L.check("(");
      C.data.flags =
      C.code.flags = expr(L, C, Pass);
      L.check(")");


    // Otherwise if the token at the start of the line was an identifier,
    // then we are expecting an assembly code statement to follow;
    // e.g. LDA #65
    //
    when(token_type == flex_id)
      asm_addr_mode	addr_mode = asm_imp,
			new_addr_mode;

      // Convert token to upper case.
      //
      for (char *p=token; *p != 0; p++)
	*p = toupper(*p);

      
      // A hash means immediate mode.
      // e.g.  LDA #65
      //
      Select(L.matches("#"))
	  value = expr(L, C, Pass);   // This will be the immediate mode value to load.
	  addr_mode = asm_imm;

	// A bracket means a "fancy" mode.
	// We will have to process it more to work out what it is. 
	//
	when(L.matches("("))
	  //
	  // Get the first value...
	  //
	  value = expr(L, C, Pass);

	  // If a comma, this is indirect-X mode.
	  // e.g. LDA (200),x
	  // 
	  if (L.matches(","))
	    {
	    L.check("x");
	    L.check(")");
	    addr_mode	= asm_ix;
	    }
	  else
	    {
	    L.check(")");

	    // If a comma here, this is indirect y mode.
	    // e.g. LDA (200),y
	    //
	    if (L.matches(","))
	      {
	      L.check("y");
	      addr_mode = asm_iy;
	      }
	    else
	      // Otheriwse it's regular indirect mode.
	      // e.g. LDA (200)
	      //
	      addr_mode = asm_ind;
	    }


	// Implied/Absolute Mode.
	//
	otherwise
	  // Could be abs, doesn't matter. Will correct below 
	  // once we know the whole instruction type.
	  //
	  addr_mode = asm_imp;	
      endsel


      // Get Addressing Mode Suffix.
      //
      // Inside Quetzalcoatl we use our own opcode names which is 
      // easier to process because it is more consistent.
      //
      // For example, LDA #65     becomes LDA_IMM 65
      // For example, LDA (200),Y becomes LDA_IY 200
      // 
      // Set <mode> to the addressing mode suffix for this opcode.
      //
      char const *mode = addr_mode_suffix(addr_mode);


      // Now we join the bare opcode (e.g. LDA) and the mode (e.g. _IMM) together.
      //
      strcpy(op_token, token);		// e.g. "LDA"
      if (mode)
	strcat(op_token, mode);		// e.g. "IMM"


      // Look up this opcode.
      //
      // The function will return the corresponding opcode number <op>,
      // or a negative number if it cannot be found in our <opcode> tree.
      //
      if ((op = opcode.get(op_token)) >= 0)
	{
	if (addr_mode == asm_imp)
	  switch (op)
	    {
	    case ASM_ADC: case ASM_AND: case ASM_ASL: case ASM_BIT:
	    case ASM_CMP: case ASM_CPX: case ASM_CPY: case ASM_DEC:
	    case ASM_EOR: case ASM_INC: case ASM_JMP: case ASM_JSR:
	    case ASM_LDA: case ASM_LDX: case ASM_LDY: case ASM_LSR:
	    case ASM_ORA: case ASM_ROL: case ASM_ROR: case ASM_SBC:
	    case ASM_STA: case ASM_STX: case ASM_STY:
	      {
	      // These are all absolute mode. 
	      //
	      // Some assembler coders like to put square brackets
	      // around absolute mode addresses. We check for and 
	      // discard them without any special processing.
	      // Either is ok.
	      //
	      in_brackets = L.matches("[");
	      value = expr(L, C, Pass);	      // The memory location we are addressing.
	      addr_mode = asm_abs;
	      if (in_brackets)
		L.matches("]");
	      }
	      break;


	    // A relative mode branch.
	    //
	    case ASM_BEQ: case ASM_BNE: case ASM_BCS: case ASM_BCC:
	    case ASM_BPL: case ASM_BMI: case ASM_BVC: case ASM_BVS:
	      in_brackets = L.matches("["); // Brackets are optional
	      value = expr(L, C, Pass);
	      addr_mode = asm_rel;
	      if (in_brackets)
		L.matches("]");

	      switch (Pass)
		{
		// On the first pass we don't need to do anything.
		// We may not have even processed the label we will
		// be jumping too yet.
		//
		case 1:
		  value = 0;
		  break;

		// On second pass we find how far we are jumping,
		// and put that amount. It is a relative jump,
		// so we can only jump between -128..127 bytes
		// within the current program counter.
		//
		case 2:
		  //
		  // This is how you calculate offsets in a 6502.
		  //
		  value = value - (current_addr+2);

		  // The relative jump amount must fit in a 2's complement signed byte.
		  // If it won't, the branch is invalid. They will need to rewrite
		  // it using a smaller negated branch over a jump.
		  // However we are a mere assembler; the coder will have to do that.
		  //
		  if (value < -128 or value > 127)
		    L.parse_error("A branch too far");
		  break;

		default:
		  abend(WHERE0, "Bad case");
		}

	      break;

	    default:
	      ;
	    }



	 // If it was an absolute mode address,
	 // see if they're indexing by the X or Y registers.
	 //
	 if (addr_mode == asm_abs)
	   if (L.matches(","))	
	     {
	     Select(L.matches("x"))	  // e.g. LDA (200),x
		 addr_mode = asm_x;	  // Indexed X
	       when(L.matches("y"))	  // e.g. LDA (200),x
		 addr_mode = asm_y;	  // Indexed Y
	       otherwise
		 L.parse_error("x or y expected");
	     endsel

	     // The <mode> has changed. Find the indexed opcode.
	     //
	     mode = addr_mode_suffix(addr_mode);
	     strcpy(op_token, token);
	     if (mode)
	       strcat(op_token, mode);

	      // Get the new opcode and make sure it is legal.
	      //
	      if ((op = opcode.get(op_token)) < 0)
		L.parse_error("Bad opcode or Invalid addressing mode");
	      }


	// Zero Page?
	//
        // If the memory address <value> is less than 256 bytes,
	// then it falls within the first page of memory.
	// There are special, compact instructions we can use
	// to acccess this area of memory. Check to see if we 
	// can use those here and change accordingly.
	//
	if (value < 256 )
	  {
	  switch (addr_mode)
	    {
	    case asm_x:   new_addr_mode = asm_zx; break;
	    case asm_y:   new_addr_mode = asm_zy; break;
	    case asm_abs: new_addr_mode = asm_z;  break;
	    default:
	      new_addr_mode = addr_mode;
	    }


	  // If the address mode changed (to zero-page address mode),
	  // then fetch and compute the new opcode.
	  //
	  if (new_addr_mode != addr_mode)
	    {
	    mode = addr_mode_suffix(new_addr_mode);
	    strcpy(op_token, token);
	    strcat(op_token, mode);

	    if ((op_zero = opcode.get(op_token)) >= 0)
	      {
	      addr_mode = new_addr_mode;
	      op	= op_zero;
	      }
	    }
	  }


	// For the Listing file we use the traditional Motorola style
	// e.g. LDA # rather than our own easier LDA_IMM style.
	// Find the corresponding prefix and suffix for the Motorola style.
	//
	char const *prefix = "", *suffix = "";

	if (C.list_file and Pass == 2)
	  {
	  (*C.list_file) << current_addr << ":\t  " << op;

	  switch (addr_mode)
	    {
	    case asm_imp:	break;
	    case asm_imm: prefix = " #";              break;
	    case asm_ix:  prefix = " ("; suffix = ",x)"; break;
	    case asm_iy:  prefix = " ("; suffix = "),y"; break;
	    case asm_zx:  prefix = " ("; suffix = ",x)"; break;
	    case asm_zy:  prefix = " "; suffix = ",x"; break;
	    case asm_x:   prefix = " "; suffix = ",y"; break;
	    case asm_y:   prefix = " "; suffix = ",y"; break;
	    case asm_z:   prefix = " "; suffix = ""; break;
	    case asm_abs: prefix = " "; suffix = ""; break;
	    case asm_ind: prefix = " ("; suffix = ")"; break;
	    case asm_rel: prefix = " "; suffix = ""; break;
	    }
	  }


	// Write the opcode.
	//
	C.code.out(op);


	switch (addr_mode)
	  {
	  // Implied Mode (no arguments).
	  //
	  case asm_imp:
	    current_addr += 1;	// The opcode only.

	    // Print to listing file on second pass.
	    //
	    if (C.list_file and Pass == 2)
		(*C.list_file) << "         ";
	    break;


	  // Absolute, Indexed, Indirect Mode.
	  //
	  // Two byte argument.
	  //
	  case asm_abs:
	  case asm_x:
	  case asm_y:
	  case asm_ind:
	    C.code.out_word(value); // Write the two-byte value.
	    current_addr += 3;	    // The opcode and two bytes for the address.

	    // Print to listing file on second pass.
	    //
	    if (C.list_file and Pass == 2)
	      {
	      (*C.list_file) << ' ';
	      (*C.list_file).width(2);
	      (*C.list_file) << (ushort)( value     & 0xff);
	      (*C.list_file) << ' ';
	      (*C.list_file).width(2);
	      (*C.list_file) << (ushort)((value>>8) & 0xff);
	      (*C.list_file) << "    ";
	      }
	    break;


	  // Immediate, Zero Page, Relative, Indirect mode.
	  //
	  // One byte argument.
	  //
	  case asm_imm:
	  case asm_z:
	  case asm_zx:
	  case asm_zy:
	  case asm_rel:
	  case asm_ix:
	  case asm_iy:
	    C.code.out(value);
	    current_addr += 2;	  // The opcode and one byte for the address/offset/value.
	
	    // Print to listing file on second pass.
	    //
	    if (C.list_file and Pass == 2)
	      {
	      (*C.list_file) << ' ';
	      (*C.list_file).width(2);

	      ushort b = (value & 0xff);

	      (*C.list_file) << b;

	      (*C.list_file) << "    ";
	      }
	    break;

	  default:
	    abend(WHERE0, "Bad case");
	  }


	// Print to listing file on second pass.
	//
	if (C.list_file and Pass == 2)
	  {
	  // Print the opcode (first three characters only of our opcode).
	  // e.g. LDA_IMM is printed lowercase as "lda"; consistent with Motorola.
	  //
	  (*C.list_file) << '\t'
	  << (char)tolower(op_token[0])
	  << (char)tolower(op_token[1])
	  << (char)tolower(op_token[2])
	  << prefix;				      // Print the prefix we set above.


	  if (addr_mode == asm_rel)		      // If a relative branch
	    (*C.list_file) << (current_addr+value);   // Print the calculate branch address.
	  else if (addr_mode != asm_imp)	      // Otherwise if anything but implied
	    (*C.list_file) << value;		      // Print the value (address, immediate value, etc.)

	  // Close with the suffix.
	  //
	  (*C.list_file) << suffix << endl;
	  }
	}
      else
	//
	// The opcode constructed is not recognised.
	// They have probably used an non-existant opcode or incompatible addressing mode.
	//
	L.parse_error("Bad opcode or Invalid addressing mode");


    // The "end" statement must be used to finish a statement.
    // If they got here, the next token in the input isn't recognised.
    //
    otherwise
      L.parse_error("Identifier or `;' expected");
  endsel


  return result;
}



// Read an assembler program from the source file <Filename>.
//
void upl_Assembler::program(
	const char     *Filename,
	upl_Context& 	C,
	boolean 	Verbose)
{
  Flex L;
  upl_Context_state	CS;

#ifndef NO_WARNINGS
  boolean		reading;
#endif

  // Remember our starting situation.
  //
  C.mark(CS);


  // We process the assembler source in two passes:
  //
  // Pass 1: Find the address of all labels, data, etc.
  // Pass 2: Generate code with the address we learnt in the first pass.
  //
  // We do this in two passes because we probably have code that accesses
  // an address *before* the address is declared in the source file.
  // For example, it could be branching to a label we haven't read yet.
  // So on the first pass, we find the addresses. At the end of that pass,
  // we will know all the adddress. On the second pass, we use them.
  //
  for (int pass=1; pass<=2; pass++)
    {
    // On each pass we start at the begining of memory.
    // 
    // This would be location 0; in zero page memory.
    // Usually though they will use the "org" statement
    // to start writing to another memory location.
    //
    current_addr = 0;


    // On second pass, move back to the start of our starting situation.
    //
    if (pass == 2)
      C.rollback(CS);

    // Open the file.
    //
    if (C.verbose >= 2)
      cerr << "I: Opening file " << Filename << endl;

    // The arguments here say to open the file as if it were a 'C' syntax
    // (which is similar to assembler), not to look for real numbers
    // (with decimal points and exponents) but to look for literal characters;
    // e.g. 'A'.
    //
    L.open_file((char *)Filename,
	flex_syntax_c|flex_syntax_no_real|flex_syntax_lit_char);

    loop
      //
      // If Statement.
      //
      // if (expression) statements .... { (elif(expression) | else) statements } endif
      //
      // A conditional statement.
      //
      if (L.matches("if"))
	{
	boolean reading;	    // Shall we read this next block of conditional statements?
	boolean branched = false;   // Have we branched yet (chosen one of the conditional statements)?

	L.check("(");
	reading = branched = expr(L, C, pass);	  // if-expression is true or false.
	L.check(")");


	loop
	  //
	  // "endif" finishes the if statement.
	  //
	  Select(L.matches("endif"))
	      break;

	    // an "else if" construct.
	    //
	    when(L.matches("elif"))
	       if (not branched)	      // If we haven't already branched...
		 {
		 L.check("(");
		 branched = reading = expr(L, C, pass);	// If true, we will use this next block.
		 L.check(")");
		 }
	       else
		 reading = false;	      // We have already taken the successful branch.

	    // an "else" construct.
	    //
	    when(L.matches("else"))
	      if (not branched)		      // If we haven't already branched...
		reading = branched = true;    // Then take this one! It's the last!
	      else
		reading = false;	      // We have already taken the successful branch.

	    otherwise
	      //
	      // Process a statement block...
	      //
	      if (reading)    // If we are reading this block...
		{
		// Try to read the statement.
		//
		if (not line(L, C, Verbose and pass == 2, pass))
		  {
		  // If they returned false, we hit an "end" statement.
		  // But we can't end without first getting an "endif".
		  //
		  L.parse_error("Unmatched \"if\"");
		  }
		}
	      else
		//
		// We're not reading this block.
		// Skip over the statements, one token at a time.
		//
		L.get(NULL);
	  endsel
	}
      //
      // Otherwise... 
      //
      else {

      // An "Include" Statement.
      //
      // This includes other files, just like a preprocessor.
      //
      // NOTE: This is new code by Harry. If it works, we can remove 
      //       the #ifdef and make it part of Q-proper. [bj 09oct2006]
      //
#ifdef NEW_CODE
      if (L.matches("include")) {
	  if (L.peek_type() == flex_string)
	    {
	    //Flex I;
	    char * tmp;

	    if (access(L.peek(), 0) == 0)
	      {
	      if (C.verbose >= 2)
		cerr << "I: Including file " << L.peek() << endl;
	      tmp = (char *)L.get(NULL);
	      L.open_file(tmp,
		  flex_syntax_c|flex_syntax_no_real|flex_syntax_lit_char);

	      recurse(tmp, C, C.verbose, pass);

	      L.close();
	      }
	    else
	      L.parse_error("Could not open #include file");
	    }
	  else
	    L.parse_error("String expected; eg. include \"header.h\"");
	break;
	}
#endif

	// Otherwise a regular assembler statement...
        // 
	// e.g. "LDA #65"
	//
	// Read this assembler statement in.
	//
	if (not line(L, C, Verbose and pass == 2, pass))
	  {
	  // if false, we reached the "end" statement.
	  //
	  break;
	  }
	}

    L.close();
    }
}




// NOTE: This is new code by Harry. If it works, we can remove 
//       the #ifdef and make it part of Q-proper. [bj 09oct2006]
//
#ifdef NEW_CODE
void upl_Assembler::recurse(
	const char     *Filename,
	upl_Context& 	C,
	boolean 	Verbose,
	int		pass)
{
  Flex L;
  upl_Context_state	CS;

  C.mark(CS);

    if (pass == 2)
      C.rollback(CS);

    if (C.verbose >= 2)
      cerr << "I: Opening file " << Filename << endl;
    L.open_file((char *)Filename,
	flex_syntax_c|flex_syntax_no_real|flex_syntax_lit_char);

    loop
      if (L.matches("if"))
	{
	boolean reading;
	boolean branched = false;

	L.check("(");
	reading = branched = expr(L, C, pass);
	L.check(")");

	loop
	  Select(L.matches("endif"))
	      break;

	    when(L.matches("elif"))
	       if (not branched)
		 {
		 L.check("(");
		 branched = reading = expr(L, C, pass);
		 L.check(")");
		 }
	       else
		 reading = false;

	    when(L.matches("else"))
	      if (not branched)
		reading = branched = true;
	      else
		reading = false;

	    otherwise
	      if (reading)
		{
		if (not line(L, C, Verbose and pass == 2, pass))
		   L.parse_error("Unmatched \"if\"");
		}
	      else
		L.get(NULL);
	  endsel
	}
      else {
	if (not line(L, C, Verbose and pass == 2, pass))
	  break;
	}

    L.close();
}
#endif



// Read an expression.
//
// The UPL Compiler has full-blown expression processing code.
// It can read a mathematical expression, complete with constants
// and numbers, and return the <result>.
//
// We the assembler call this code to process our expressions.
// This includes expressions using labels or memory locations.
//
long upl_Assembler::expr(
	Flex& 		L,
	upl_Context& 	C,
	int		Pass)
{
  upl_Expr_result	result;

  // Ask the compiler to read the expression, returning the expression <result>.
  //
  upl_Compiler::expr(L, C, result, 0, Pass == 2);

  // The result must be a constant value, which includes labels (memory addresses).
  // (It can't use variables, etc.)
  //
  if (not result.constant)
    L.parse_error("Constant expression expected");


  // Return the calculated result.
  //
  return result.value;
}




// Convert an addressing mode <Addr_move> enumeration into our
// own rationalised addressing mode suffix. This is easier for
// use to look up than the more esoteric Motorola addressing notations
// used in the source code.
//
// e.g. Motorola LDA #65 uses immediate mode, so we return "_IMM".
//
// On return, we can append that to the opcode to get "LDA_IMM".
//
const char *upl_Assembler::addr_mode_suffix(asm_addr_mode Addr_mode)
{
  char const *mode = NULL;


  switch (Addr_mode)
    {
    case asm_imp:
    case asm_abs:
    case asm_rel:
      break;

    case asm_imm: mode = "_IMM";  break;
    case asm_ix:  mode = "_IX";   break;
    case asm_iy:  mode = "_IY";   break;
    case asm_zx:  mode = "_ZX";   break;
    case asm_zy:  mode = "_ZY";   break;
    case asm_x:   mode = "_X";    break;
    case asm_y:   mode = "_Y";    break;
    case asm_z:   mode = "_Z";    break;
    case asm_ind: mode = "_I";    break;

    default:
      abend(WHERE0, "Bad case");
    }


  return mode;
}
