/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This module contains structures and functions for the run-time lexicon. */

/* includes =================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "tries.h"
#include "files.h"
#include "malaga_files.h"

#undef GLOBAL
#define GLOBAL

#include "lexicon.h"

/* variables ================================================================*/

LOCAL void *lexicon_data; /* address of the lexicon file mapped into memory */
LOCAL int_t lexicon_length; /* the length of *<lexicon_data> */

/* the run time lexicon */
LOCAL struct
{
  int_t *trie; /* a trie with indices to <cat_lists> */
  int_t trie_size;
  int_t trie_root; /* index of root node in <trie>. */

  int_t *cat_lists; /* lists of categories, stored in <values>
		     * (the last index <i> of each list is
		     * negative, real index is abs(<i>) - 1) */
  int_t cat_lists_size;

  cell_t *values; /* categories of lexicon entries */
  int_t values_size;
} lexicon;

LOCAL int_t cat_list_index, trie_node;
LOCAL string_t prefix_end;

/* functions ================================================================*/

GLOBAL void search_for_prefix (string_t string)
/* Search lexicon for prefixes of <string> in increasing length. 
 * The results are obtained by calling "get_next_prefix". */
{
  trie_node = lexicon.trie_root;
  prefix_end = string;
  cat_list_index = -1;
}

/*---------------------------------------------------------------------------*/

GLOBAL bool_t get_next_prefix (string_t *string_ptr, value_t *cat)
/* Get the next lexicon entry that is a prefix of <string>. 
 * Return FALSE iff no more entries exist.
 * If another entry exists, set <*string_ptr> to the remainder of <string>
 * and <*cat> to the category assigned to the lexicon entry.
 * <string> must have been set by "search_for_prefix". */
{
  int_t cat_index;
  
  if (cat_list_index == -1)
    lookup_trie (lexicon.trie, &trie_node, &prefix_end, &cat_list_index);

  if (cat_list_index == -1)
    return FALSE;

  cat_index = lexicon.cat_lists[cat_list_index++];
  if (cat_index < 0)
  {
    cat_list_index = -1;
    cat_index = - cat_index - 1;
  }
  *string_ptr = prefix_end;
  *cat = lexicon.values + cat_index;

  return TRUE;
}

/*---------------------------------------------------------------------------*/

GLOBAL void init_lexicon (string_t file_name)
/* Initialise this module. Read lexicon from file <file_name>. */
{
  lexicon_header_t *header; /* lexicon file header */

  /* Map the lexicon file into memory. This saves a lot of memory. */
  map_file (file_name, &lexicon_data, &lexicon_length);

  /* Check lexicon header. */
  header = (lexicon_header_t *) lexicon_data;
  check_header (&header->common_header, file_name, LEXICON_FILE,
 		LEXICON_CODE_VERSION);
  
  /* Init trie. */
  lexicon.trie_size = header->trie_size;
  lexicon.trie = (int_t *) (header + 1);
  lexicon.trie_root = header->trie_root;

  /* Init cat lists. */
  lexicon.cat_lists_size = header->cat_lists_size;
  lexicon.cat_lists = (int_t *) (lexicon.trie + lexicon.trie_size);

  /* Init values. */
  lexicon.values_size = header->values_size;
  lexicon.values = (cell_t *) (lexicon.cat_lists + lexicon.cat_lists_size);
}

/*---------------------------------------------------------------------------*/

GLOBAL void terminate_lexicon (void)
/* Terminate this module. */
{
  unmap_file (&lexicon_data, lexicon_length);
}

/* end of file ==============================================================*/

