/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This module compiles Malaga rule files. */

/* includes =================================================================*/

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "scanner.h"
#include "rule_type.h"
#include "rule_code.h"
#include "rule_parser.h"
#include "rule_symbols.h"
#include "files.h"

#undef GLOBAL
#define GLOBAL

#include "rule_compiler.h"

/* types ====================================================================*/

typedef struct /* a record to remember a rule's first instruction */
{
  int_t number; /* the rule number */
  int_t first_instr; /* the index of the first instruction */
} rule_instr_t;

typedef struct RULE_REFERENCE_T /* a reference to a rule, part of a list. */
{
  struct RULE_REFERENCE_T *next; /* next reference in this list */
  int_t rule_number; /* number of referenced rule */
} rule_reference_t;

/* functions ================================================================*/

LOCAL void mark_reachable (bool_t reachable[],
			   rule_reference_t **new_rules,
			   int_t rule)
/* Mark <rule> as reachable in the vector <reachable>. 
 * If it has not been reachable before, add it to list <new_rules>. */
{
  rule_reference_t *new_reference;

  if (! reachable[rule])
  {
    reachable[rule] = TRUE;
    new_reference = new_mem (sizeof (rule_reference_t));
    new_reference->next = *new_rules;
    new_reference->rule_number = rule;
    *new_rules = new_reference;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void add_reference (rule_reference_t **references,
			  int_t rule)
/* Add a reference to <rule> to <*references> if it is not already there. */
{
  rule_reference_t *reference;

  /* Check if a reference to <rule> already exists in <*references>. */
  for (reference = *references; reference != NULL; reference = reference->next)
  {
    if (reference->rule_number == rule)
      return;
  }

  /* Add a new reference to the front of the list. */
  reference = new_mem (sizeof (rule_reference_t));
  reference->next = *references;
  reference->rule_number = rule;
  *references = reference;
}

/*---------------------------------------------------------------------------*/

LOCAL void remove_reference (rule_reference_t **references)
/* Remove the first reference in <references>. */
{
  rule_reference_t *reference;

  if (references != NULL)
  {
    reference = *references;
    *references = reference->next;
    free_mem (&reference);
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void check_all_rules_reachable (int_t num_rules, 
				      rule_reference_t *references[])
/* Check if every rule can be reached.
 * <references[i]> is the reference list for rule <i>, 
 * i.e. the list of all rules that may be called from rule <i>. */
{
  bool_t *reachable;
  int_t *rule_set;
  rule_reference_t *new_rules;
  int_t i;

  /* Create a vector initialised with FALSEs. */
  reachable = new_vector (sizeof (bool_t), num_rules);

  new_rules = NULL;

  if (code.pruning_rule != -1)
    mark_reachable (reachable, &new_rules, code.pruning_rule);
  if (code.robust_rule != -1)
    mark_reachable (reachable, &new_rules, code.robust_rule);
  if (code.allo_rule != -1)
    mark_reachable (reachable, &new_rules, code.allo_rule);
  if (code.input_filter != -1)
    mark_reachable (reachable, &new_rules, code.input_filter);
  if (code.output_filter != -1)
    mark_reachable (reachable, &new_rules, code.output_filter);

  /* Mark the initial rules in the <reachable> vector. */
  if (code.initial_rule_set != -1)
  {
    for (rule_set = (int_t *) pool_item (code.rule_set_pool, 
					 code.initial_rule_set);
	 *rule_set != -1;
	 rule_set++)
    {
      if (*rule_set >= 0)
      mark_reachable (reachable, &new_rules, *rule_set);
    }
  }

  /* Mark all rules that may be called by other rules. */
  while (new_rules != NULL)
  {
    rule_reference_t *reference;

    /* Remove the first rule in <new_rules>. */
    reference = references[new_rules->rule_number];
    remove_reference (&new_rules);

    for ( ; reference != NULL; reference = reference->next)
      mark_reachable (reachable, &new_rules, reference->rule_number);
  }
  
  for (i = 0; i < num_rules; i++)
  {
    if (! reachable[i])
      fprintf (stderr, "warning: rule \"%s\" can't be reached\n", 
	       (string_t) pool_item (code.string_pool, 
				     ((rule_t *) pool_item (code.rule_pool, 
							    i))->name));
  }
  
  free_mem (&reachable);
}

/*---------------------------------------------------------------------------*/

LOCAL void check_no_circular_calls (int_t num_rules, 
				    rule_reference_t *calls[])
/* Check if there are no circular call chains in the rules.
 * <calls[i]> is the list of subrules that may be called by rule <i>. */
{
  bool_t *reachable;
  rule_reference_t *new_rules;
  int_t rule, i;

  reachable = new_vector (sizeof (bool_t), num_rules);

  /* Find all subrules reachable from <rule>. */
  for (rule = 0; rule < num_rules; rule++)
  {
    rule_reference_t *called;

    /* Mark all rules called from <rule> as reachable. */
    for (i = 0; i < num_rules; i++)
      reachable[i] = FALSE;
    new_rules = NULL;
    for (called = calls[rule]; called != NULL; called = called->next)
      mark_reachable (reachable, &new_rules, called->rule_number);

    /* Recursively mark all rules that call rules already marked 
     * in <reachable>. */
    while (new_rules != NULL)
    {
      int_t rule = new_rules->rule_number;

      /* Remove the first rule in <new_rules>. */
      remove_reference (&new_rules);
      
      for (called = calls[rule]; called != NULL; called = called->next)
	mark_reachable (reachable, &new_rules, called->rule_number);
    }

    if (reachable[rule])
    fprintf (stderr, "warning: rule \"%s\" is recursive\n", 
	     (string_t) pool_item (code.string_pool, 
				   ((rule_t *) 
				    pool_item (code.rule_pool, rule))->name));
  }
  
  free_mem (&reachable);
}

/*---------------------------------------------------------------------------*/

LOCAL int compare_first_instr (const void *key1, const void *key2)
/* Returns (-1/0/1) when the first instruction key1 is
 * (lower than/same as/higher than) the first instruction key2. */
{
   rule_instr_t *rule_1 = (rule_instr_t *) key1;
   rule_instr_t *rule_2 = (rule_instr_t *) key2;

   if (rule_1->first_instr < rule_2->first_instr)
      return -1;
   else if (rule_1->first_instr > rule_2->first_instr)
      return 1;
   else
     return 0;
}

/*---------------------------------------------------------------------------*/

LOCAL void check_rule_calls (void)
/* Report a warning if the rules in <code> contain a circular call chain.
 * Report a warning if a rule can't be reached. */
{
  int_t num_rules = pool_items (code.rule_pool);
  rule_instr_t *rules;
  int_t i, rule_index;
  rule_reference_t **references, **calls;

  /* Initialise <rules> and sort them for <first_instr>. */
  rules = new_vector (sizeof (rule_instr_t), num_rules);
  for (i = 0; i < num_rules; i++)
  {
    rule_t *rule = (rule_t *) pool_item (code.rule_pool, i);
    
    rules[i].number = i;
    rules[i].first_instr = rule->first_instr;
  }
  qsort (rules, num_rules, sizeof (rule_instr_t), compare_first_instr);
  
  /* Allocate a vector of rule references and a vector of calls. */
  references = new_vector (sizeof (rule_reference_t *), num_rules);
  calls = new_vector (sizeof (rule_reference_t *), num_rules);

  /* Fill references. */
  rule_index = 0;
  for (i = 0; i < pool_items (code.instr_pool); i++)
  {
    instr_t instr;
    int_t rule;
    
    /* Increment <rule> if instruction belongs to next rule. */
    while (rule_index+1 < num_rules && rules[rule_index+1].first_instr <= i)
      rule_index++;
    rule = rules[rule_index].number;
    
    instr = *((instr_t *) pool_item (code.instr_pool, i));
    /* Check each instruction: is it a add_state instruction? */
    if (OPCODE (instr) == INS_ADD_STATE && INSTR_INFO (instr) != -1)
    {
      /* Examine the rule set of this result statement. */
      int_t *rule_set;
      
      for (rule_set = (int_t *) pool_item (code.rule_set_pool, 
					   INSTR_INFO (instr));
	   *rule_set != -1;
	   rule_set++)
      {
	if (*rule_set >= 0)
	  add_reference (&references[rule], *rule_set);
      }
    }
    else if (OPCODE (instr) == INS_JUMP_SUBRULE)
    {
      add_reference (&references[rule], INSTR_INFO (instr));
      add_reference (&calls[rule], INSTR_INFO (instr));
    }
  }
  
  check_all_rules_reachable (num_rules, references);
  check_no_circular_calls (num_rules, calls);

  free_mem (&rules);
  
  for (i = 0; i < num_rules; i++)
  {
    while (references[i] != NULL)
      remove_reference (&references[i]);

    while (calls[i] != NULL)
      remove_reference (&calls[i]);
  }
  free_mem (&references);
  free_mem (&calls);
}

/*---------------------------------------------------------------------------*/

GLOBAL void compile_rule_file (string_t source_file_name, 
			       string_t object_file_name,
			       int_t file_type)
/* Compile file <source_file_name>
 * and save compiled data in a file with name <object_file_name>. */
{
  string_t file_name;
  int_t file_name_index; 

  init_rule_code (file_type);

  enter_function ("atoms", FUNC_TO_ATOMS);
  enter_function ("capital", FUNC_IS_CAPITAL);
  enter_function ("length", FUNC_GET_LENGTH);
  enter_function ("multi", FUNC_TO_MULTI);
  enter_function ("set", FUNC_TO_SET);
  enter_function ("switch", FUNC_GET_SWITCH);
  enter_function ("value_type", FUNC_GET_VALUE_TYPE);
  enter_function ("value_string", FUNC_GET_VALUE_STRING);
  enter_function ("transmit", FUNC_TRANSMIT);
  enter_function ("floor", FUNC_FLOOR);

  /* Copy file name into string pool. */
  file_name = copy_string_to_pool (code.string_pool, source_file_name, 
				   &file_name_index);

  /* Parse the rule file (and all included files). */
  begin_include (file_name);

  parse_rule_file ();
  end_include ();

  /* Check if every rule that is called is also defined.*/
  check_rules_defined ();

  /* Check for reachability of each rule and for circular rule chains. */
  check_rule_calls ();
  dump_variables ();
  free_symbols ();
  write_code (object_file_name);
  term_rule_code ();
}

/* end of file ==============================================================*/
