//
// NAME:
//   upl_copt.cpp
// TITLE:
//   UPL/Quetzalcoatl: Compiler: Optimiser.
// FUNCTION:
//   See header.
//
// AUTHOR:
//   Brendan Jones. (Contact through www.kdef.com/geek/vic)
// RIGHTS:
//   (c) Copyright Brendan Jones, 1998.  All Rights Reserved.
// SECURITY:
//   Unclassified.  
// LEGAL NOTICE:
//   See legal.txt before viewing, modifying or using this software.
// CONTACT:
//   Web:	http://www.kdef.com/geek/vic
//   Email:	See www.kdef.com/geek/vic
// DATE:
//   July 17, 1998.
// RIGHTS:
//  This file is part of The Quetzalcoatl Compiler.
//  
//  The Quetzalcoatl Compiler is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//   the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//  
//  The Quetzalcoatl Compiler is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//  
//  You should have received a copy of the GNU General Public License
//  along with The Quetzalcoatl Compiler; if not, write to the Free Software
//  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
//
// TODO:
//   Currently this code only optimises 8 bit arithmetic, for constants and non-array variables.
//   You can add other optimisations here.  [bj 16may2005]
//
// CAUTION/UNSURE/DUBIOUS:
//   This optimiser was originally written when UPL/Quetzalcoatl only had simple variables;
//   ie. no arrays and no pointers.  I've since added these, but I can't guarantee the 
//   following code handles them.  I've already excluded arrays,  It may be necessary to 
//   add further code to stop inappropriate optimisation (e.g. of pointer variables).
//   [bj 16may2005]
//
// MODIFICATIONS:
//   NAME  MOD  DATE       DESCRIPTION
//   bj    1    16may2005  Comments.  
//			   Added code to stop optimising array variables code 
//			   in the 1 byte segment.
//
//
#include "upl.h"


// Optimise Expressions.
//
// Try and Optimise expression <A> <Op> <B>; e.g. "myvariable * 2"
//
// IN:
//   Op			What is the operand? e.g. multiply, equal comparision, etc.
//   A			First expression
//   B			Second expression
// OUT:
//   Rollback_state	If we do optimise <A> <Op> <B>, then we generate some new code.
//			In which case, at some point in that new code its own results
//		        will be sitting in the accumulator, X-register or ZNC flags.
//			We will push these results on the runtime stack, but later
//			we may find out we need to immediately get them off the stack.
//			In which case, we could just leave them in those registers.
//			At the moment we can't tell, so we generate the code to push
//			them on the runtime stack anyway, but we record a <Rollback_state>.
//			That way, later if we do want to grab the register values,
//			we can use the state we that <Rollback_state> to rollback.
//  UPDATED:
//    C			Contex contain generated code, symbol tables, etc.
// RESULT:
//    True iff we optimised the code, in which case <Rollback_state> is set.
//    False if we could not optimise this code.
//
//
boolean upl_Compiler::optimise(
	upl_Context&  		C,
	upl_op 			Op,
	upl_Expr_result&	A,
	upl_Expr_result&	B,
	upl_Context_state&	Rollback_state)
  {
  // Did we succeed in optimising the code?
  //
  boolean		optimised	= false;    // Assume no

  // We are operating on two expressions; <A> and <B>.
  // We'll assign them to pointers <a> and <c>.
  //
  upl_Expr_result      *a;
  upl_Expr_result      *c;

  // Can we do a fast (bit-shited) multiplication or division?
  //
  boolean		fast_mul_div 	= false;

  // When we compute an expression, before the results are pushed on the runtime stack
  // they are held in registers; A, X if it's 16-bit, and the ZNC flags if it's a comparison.  
  // In a future optimisation (made after any optimisation we do here) they may be able
  // to rollback to the point the values are in the registers.  We use the <current_state>
  // object to take a snapshot so we can declare this.
  //
  upl_Context_state	current_state;

  // At the end of the optimisation, the results can be in various registers and flags
  // (described above).  We need to get these onto the runtime stack.  Rather than
  // duplicate that in every optimisation fragment, we merely set one of these flags
  // and do it at the end of this procedure.  [bj 16may2005]
  //
  boolean		push_byte_a 	= false,
			push_word_ax 	= false,
			push_byte_a_zcn	= false;


  // Assign <A, B> to <a, c>.
  //
  // To simplify things, we make the contstant the second expression <c>.
  // However division isn't transitive (e.g. 2*3=3*2, but 2/3 != 3/2),
  // so if we're diving we must always preserve the order.
  //
  if (B.is_constant() || Op == upl_div)
    {
    a = &A;
    c = &B;
    }
  else
    {
    a = &B;
    c = &A;
    }


  // If <c> is a non-zero constant with a single bit (e.g. 2, 4, 8, 16, etc.),
  // then we can perform fast multiplication or division using bit shifting.
  // This is much faster than calling the runtime libraries routines.
  //
  if (Op == upl_mul || Op == upl_div)
    if (c->value > 0)
      if (c->is_constant() && upl_Utility::count_bits(c->value) == 1)
	fast_mul_div = true;



  // Attempt Optimisations.
  //
  // Optimisations are grouped by conditions.  For example, the first group
  // handles the condition where the expressions are both 1 byte long,
  // and constants or non-subscripted variables.
  //
  // A second group could handle the addition of 16-bit values, etc.
  //
  // NOTE:
  //   I've done these merely as an example to get other developers started.
  //   I've only optimised a few cases.  You can add your own optimisation groups 
  //   and conditions here too.  [bj 16may2005]
  //   

  // Handle 1 byte optimisations.
  //
  // If  <a> is a 1 byte non-array variable or a value stored in the accumulator,
  // and <c> is a 1 byte constant or non-array variable,
  //
  Select(a->value_bytes == 1 &&
	((a->is_variable() && a->variable->count == 0) || a->transient == upl_transient_a) &&
	 c->value_bytes == 1 &&
	((c->is_variable() && c->variable->count == 0) || c->is_constant()) &&
	 (Op == upl_add || Op == upl_eq || Op == upl_ne || fast_mul_div))
      {
      // Get the value of <a> into the accumulator.
      //
      // NOTE:
      //   <a> is one expression we're working with.  <c> is the other.
      //   Don't confuse <a> with the Accumulator, or <c> with the carry bit.
      //   [bj 16may2005]
      //

      // The code to generate the value <a> and store it on the stack
      // has already been generated.  If <transient> is <upl_transient_a> 
      // then it means at some point the value for <a> was in the accumulator.
      // We can thus rollback to that state.
      //
      if (a->transient == upl_transient_a)
	C.rollback(a->transient_state);
      else if (a->is_variable())
	{
	// DUBIOUS/UNSURE: What if this is subscripted? [bj 16may2005]
	//

	// Otherwise we want to load a variable's value into the accumulator>
	//
	C.rollback(Rollback_state);
	asm_var(C, *a->variable, 0, ASM_LDA);
	}
      else
	abend(WHERE0, "Bad case");


      switch (Op)
	{
	// Optimise 1 byte Equal, Not Equals.
	//
	case upl_eq:
	case upl_ne:
	  {
	  // Assertion: expression <a> now sits in the accumulator.
	  //

	  // If we're doing a comparison to the variable <c>,
	  // then we can just use a 6502 CMP Absolute instruction.
	  //
	  if (c->is_variable())
	    asm_var(C, *c->variable, 0, ASM_CMP);
	  else if (c->is_constant())
	    {
	    // Otherwise if we're comparing it to a constant
	    // we can do a 6502 CMP Immediate instruction.
	    //
	    C.code.out(ASM_CMP_IMM);
	    C.code.out(c->value);
	    }
	  else
	    abend(WHERE0, "Bad case");

	  // The result of this comparison is now held in the 6502 ZNC flags.
	  //
	  C.mark(current_state);
	  A.set_value_type(A.value_type, A.indirection,
			   upl_transient_znc,
			  &current_state, Op);

	  // Branch if equal, not equal, etc.
	  //
	  switch (Op)
	    {
	    case upl_eq:
	      C.code.out(ASM_BNE);
	      C.code.out(4);
	      break;

	    case upl_ne:
	      C.code.out(ASM_BEQ);
	      C.code.out(4);
	      break;

	    default:
	      abend(WHERE0, "Bad case");
	    }

	  // If the condition was met...
	  //
	  // HFD fixed true for 0xff to be consistent and fixed BNE for 2 instead of 3
	  C.code.out(ASM_LDA_IMM);	// Then load the accumulator with <true; 1>
	  C.code.out(0xffu);
	  C.code.out(ASM_BNE);		// BNE is always true after this,
	  C.code.out(2);		// so we can use BNE to skip ahead
	  C.code.out(ASM_LDA_IMM);	// Then load the accumulator with <false; 0>
	  C.code.out(0);

	  // The result of the comparison is saved in the ZCN status flags,
	  // and also in a boolean value now held in the accumulator.
	  //
	  push_byte_a_zcn = true;
	  }
	  break;


	// Optimise 1 byte Addition.
	//
	case upl_add:
	  {
	  // Assertion: expression <a> now sits in the accumulator.
	  //

	  // Clear the carry flag ready for the addition.
	  //
	  C.code.out(ASM_CLC);

	  // If <c> is a variable, load directly from it.
	  //
	  if (c->is_variable())
	    asm_var(C, *c->variable, 0, ASM_ADC);
	  else
	    {
	    // Otherwise do an add immediate.
	    //
	    C.code.out(ASM_ADC_IMM);
	    C.code.out(c->value);
	    }

	  // The result of the addition is held in the accumulator.
	  //
	  push_byte_a = true;
	  }
	  break;


	case upl_mul:
	case upl_div:
	  {
	  // Assertion: expression <a> now sits in the accumulator.
	  //

	  // How many times we have to shift the accumulator depends
	  // on the value held in the constant <c>.
	  //
	  int shift_bits = upl_Utility::log2(c->value);


	  // UNSURE: Do we need to CLC first? [bj 09oct2006]

	  // Generate a shift instruction for each bit we have to shift the accumulator.
	  // If we're multiplying, shift left.  If we're dividing, shift right.
	  //
	  for (int i=0; i<shift_bits; i++)
	    C.code.out(Op == upl_mul ? ASM_ASL_A : ASM_LSR_A);

	  // The result of the addition is held in the accumulator.
	  //
	  push_byte_a = true;
	  }
	  break;

	default:
	  abend(WHERE0, "Bad case");
	}
      }


    // ADD YOUR OWN OPTIMISATION CONDITIONS AND STRATEGIES HERE:
    //
    // when(...) {}


    otherwise
      ;
  endsel



  // Push results on the runtime stack.
  //
  Select(push_byte_a)
      {
      // The result is a byte in the accumulator.
      // Declare the rollback point.
      //
      C.mark(current_state);
      A.set_value_type(A.value_type, A.indirection,
      		       upl_transient_a, &current_state);

      // Push the accumulator onto the runtime stack.
      //
      C.code.out(ASM_JSR);
      C.code.out_word_patch(RUNTIME_PUSH_B);
      optimised = true;
      }

    when(push_byte_a_zcn)
      {
      // The result of the comparison is held in the ZNC flags,
      // and this same result as a boolean in the accumulator A.
      // We've already set the transient point above, so all
      // we need do now is push the boolean result on the stack.
      //
      // NOTE: We could combine this fragment with <push_byte_a>,
      //       but I'm trying to make this as clear as possible.
      //       [bj 16may2005]
      // 
      C.code.out(ASM_JSR);
      C.code.out_word_patch(RUNTIME_PUSH_B);
      optimised = true;
      }

    // The result is a 16-bit value held in low accumulator, high x-register.
    //
    when(push_word_ax)
      {
      // Declare a rollback to this point using the AX registers.
      //
      C.mark(current_state);
      A.set_value_type(A.value_type, A.indirection,
      		       upl_transient_ax, &current_state);

      // And push this 16-bit result on the runtime stack.
      //
      C.code.out(ASM_JSR);
      C.code.out_word_patch(RUNTIME_PUSH_W);
      optimised = true;
      }

    otherwise
      ;
  endsel



  // Did we optimise expressions <A> and <B>?
  //
  return optimised;
  }

