/* C Mode */

/* portahash.c
   Implements a portable hash function for DType objects.
   Originally implemented by Ken Haase in the Machine Understanding Group
     at the MIT Media Laboratory.

   Copyright (C) 1994-2001 Massachusetts Institute of Technology
   Copyright (C) 2001-2002 beingmeta, inc. (A Delaware Corporation)

   This program comes with absolutely NO WARRANTY, including implied
   warranties of merchantability or fitness for any particular purpose.

    Use, modification, and redistribution of this program is permitted
    under the terms of either (at the developer's discretion) the GNU
    General Public License (GPL) Version 2, the GNU Lesser General Public
    License.

    This program is based on the FramerD library released in Fall 2001 by
    MIT under both the GPL and the LGPL licenses, both of which accompany
    this distribution.  Subsequent modifications by beingmeta, inc. are
    also released under both the GPL and LGPL licenses (at the developer's
    discretion).
*/ 

static char vcid[] = "$Id: portahash.c,v 1.7 2002/04/19 13:19:52 haase Exp $";

#define FD_INLINE_STRING_STREAMS 1

#include "framerd.h"

/* Used to generate hash codes */
#define MAGIC_MODULUS 16777213 /* 256000001 */

/** Hashing lisp objects **/

/* hash_string_dtype: (static)
     Arguments: a lisp pointer (to a string)
     Returns: a hash value (an unsigned int)

  Computes an iterative hash over the characters in the string.
*/
FASTOP unsigned int hash_string_dtype (lisp x)
{
  char *ptr=STRING_DATA(x), *limit=ptr+STRING_LENGTH(x);
  unsigned int sum=0;
  while (ptr < limit) {
    sum=(sum<<8)+(*ptr++); 
    sum=sum%(MAGIC_MODULUS);}
  return sum;
}

/* hash_unicode_string_dtype: (static)
     Arguments: a lisp pointer (to a string)
     Returns: a hash value (an unsigned int)

  Computes an iterative hash over the characters in the string.
*/
static unsigned int hash_unicode_string_dtype (lisp x)
{
  unsigned char *ptr=STRING_DATA(x), *limit=ptr+STRING_LENGTH(x);
  unsigned int sum=0;
  while (ptr < limit) {
    int c=fd_sgetc(&ptr);
    sum=(sum<<8)+(c);
    sum=sum%(MAGIC_MODULUS);}
  return sum;
}

/* hash_symbol_dtype: (static)
     Arguments: a lisp pointer (to a symbol)
     Returns: a hash value (an unsigned int)

  Computes an iterative hash over the characters in the symbol's name.
*/
FASTOP unsigned int hash_symbol_dtype(lisp x)
{
  unsigned char *s = SYMBOL_NAME (x);
  unsigned int sum=0;
  while (*s != '\0') {
    unichar_t c;
    if (*s < 0x80) c=*s++;
    else if (*s < 0xe0) {
      c=(unichar_t)((*s++)&(0x3f)<<6);
      c=c|((unichar_t)((*s++)&(0x3f)));}
    else {
      c=(unichar_t)((*s++)&(0x1f)<<12);
      c=c|(unichar_t)(((*s++)&(0x3f))<<6);
      c=c|(unichar_t)((*s++)&(0x3f));}
    sum=(sum<<8)+(c); sum=sum%(MAGIC_MODULUS);}
  return sum;
}

static unsigned int hash_pair_dtype(lisp string);
static unsigned int hash_record_dtype(lisp string);

/*
   hash_dtype: (static inline)
     Arguments: a "lisp" pointer
     Returns: an unsigned int computed from the pointer

     Basic strategy:
      integers become themselves, immediates get special codes,
       strings and symbols get characters scanned, objects use their
       low order bytes, vectors and pairs get their elements combined
       with an ordering (to keep it asymmetric).

     Notes: we assume 24 bit hash values so that any LISP implementation
      can compute hashes using just fixnum arithmetic.
*/
FASTOP unsigned int hash_dtype(lisp x)
{
  if (FD_IMMEDIATEP(x))
    if ((FD_EMPTY_LISTP(x)) || (FD_FALSEP(x))) return 37;
    else if (FD_TRUEP(x)) return 17;
    else if (FD_EMPTYP(x)) return 13;
    else {
      fd_fprintf(stderr,_("Strange immediate: %q"),x);
      return 19;}
  else if (FIXNUMP(x))
    return (FIXLISP(x))%(MAGIC_MODULUS);
  else if (ASCII_STRINGP(x))
    return hash_string_dtype(x);
  else if (UNICODE_STRINGP(x))
    return hash_unicode_string_dtype(x);
  else if (PAIRP(x))
    return hash_pair_dtype(x);
  else if (SYMBOLP(x))
    return hash_symbol_dtype(x);
  else if (OIDP(x)) {
    FD_OID id=OID_ADDR(x);
#if FD_OIDS_ARE_LONGS
    return id%(MAGIC_MODULUS);
#else
    unsigned int hi=FD_OID_HIGH(id), lo=FD_OID_LOW(id);
    int i=0; while (i++ < 4)
      {hi=((hi<<8)|(lo>>24))%(MAGIC_MODULUS); lo=lo<<8;}
    return hi;
#endif
      }
  else if (CHOICEP(x)) {
    unsigned int sum=0;
    DO_CHOICES(elt,x)
      sum=((sum+hash_dtype(elt))%MAGIC_MODULUS);
    END_DO_CHOICES;
    return sum;}
  else if (CHARACTERP(x)) return (CHAR_CODE(x))%(MAGIC_MODULUS);
  else if (FLONUMP(x)) {
    unsigned int as_int; float *f=(float *)(&as_int);
    *f=FLOATLISP(x);
    return (as_int)%(MAGIC_MODULUS);}
  else if (VECTORP(x)) {
    int size=VECTOR_LENGTH(x); unsigned int sum=0;
    DOTIMES(i,size)
      sum=(((sum<<4)+(hash_dtype(VECTOR_REF(x,i))))%MAGIC_MODULUS);
    return sum;}
  else if (SLOTMAPP(x)) {
    unsigned int sum=0;
    DO_SLOTS(k,v,x) {
      sum=(sum+hash_dtype(k))%MAGIC_MODULUS;
      sum=(sum+(flip_word(hash_dtype(v))%MAGIC_MODULUS))%MAGIC_MODULUS;}
    return sum;}
  else if (FD_RATIONALP(x)) {
    unsigned int sum=hash_dtype(FD_NUMERATOR(x))%MAGIC_MODULUS;
    sum=(sum<<4)+hash_dtype(FD_DENOMINATOR(x))%MAGIC_MODULUS;
    return sum%MAGIC_MODULUS;}
  else if (FD_COMPLEXP(x)) {
    unsigned int sum=hash_dtype(FD_REALPART(x))%MAGIC_MODULUS;
    sum=(sum<<4)+hash_dtype(FD_IMAGPART(x))%MAGIC_MODULUS;
    return sum%MAGIC_MODULUS;}
  else if (LRECORDP(x)) {
    unsigned int sum=hash_dtype(LRECORD_TAG(x))%MAGIC_MODULUS;
    sum=(sum<<4)+hash_dtype(LRECORD_DATA(x))%MAGIC_MODULUS;
    return sum%MAGIC_MODULUS;}
  else if (RECORDP(x)) return hash_record_dtype(x);
  else {
    struct FD_TYPE_REGISTRY *r=fd_lookup_typecode(PTR_TYPE(x));
    if ((r) && (r->hash_fcn)) return r->hash_fcn(x,hash_dtype);
    else fd_raise_exception(fd_NoHashMethod);}
}

FRAMERD_EXPORT
unsigned int fd_hash_dtype(lisp x) { return hash_dtype(x); }

/* hash_pair_dtype: (static)
     Arguments: a lisp pointer (to a pair)
     Returns: a hash value (an unsigned int)

     Adds the hash of the list's elements together, shifting each
     value by 4 bits and ending with the final CDR.  The shift is
     done to have the hash function be sensitive to element permuations.
*/
static unsigned int hash_pair_dtype(lisp x)
{
  lisp ptr=x; unsigned int sum=0;
  /* The shift here is introduced to make the hash function asymmetric */
  while (PAIRP(ptr)) {
    sum=((sum*2)+hash_dtype(CAR(ptr)))%(MAGIC_MODULUS);
    ptr=CDR(ptr);}
  if (!(FD_EMPTY_LISTP(ptr))) {
    unsigned int cdr_hash=hash_dtype(ptr);
    sum=(sum+(flip_word(cdr_hash)>>8))%(MAGIC_MODULUS);}
  return sum;
}

/** Hashing record dtypes **/

/* hash_record_dtype: (static)
     Arguments: a lisp pointer (to a record)
     Returns: a hash value (an unsigned int)

   Uses a variety of methods to compute a hash function
*/
static unsigned int hash_record_dtype(lisp x)
{
  struct FD_TYPE_REGISTRY *entry=fd_lookup_record(RECORD_TAG(x));
  if (entry) {
    if (entry->compound_dump_fcn) {
      lisp dump=entry->compound_dump_fcn(x);
      unsigned int hash=hash_dtype(dump);
      decref(dump);
      return hash;}
    else if ((entry->package_data_fcn) && ((entry->subcode) & 0x80)) {
      lisp *data;
      unsigned int size=entry->package_data_fcn(x,(void **)&data);
      lisp *ptr=data, *limit=ptr+size;
      unsigned int sum=0;
      while (ptr < limit) {
	sum=((sum<<4)+(hash_dtype(*ptr)))%MAGIC_MODULUS; ptr++;}
      if (entry->package_data_done_fcn)
	entry->package_data_done_fcn(x,size,(void **)&data);
      return sum;}
    else {
      unsigned char *data;
      unsigned int size=entry->package_data_fcn(x,(void **)&data);
      unsigned char *ptr=data, *limit=ptr+size;
      unsigned int sum=0;
      while (ptr < limit) sum=((sum<<4)+(*ptr++))%MAGIC_MODULUS;
      if (entry->package_data_done_fcn)
	entry->package_data_done_fcn(x,size,(void **)&data);
      return sum;}}
  else fd_raise_lisp_exception
	 ("Can't store in index file","unknown type",x);
}





/* File specific stuff */

/* The CVS log for this file
   $Log: portahash.c,v $
   Revision 1.7  2002/04/19 13:19:52  haase
   Fixed bugs involving NULs in UTF-8 strings

   Revision 1.6  2002/04/02 21:39:33  haase
   Added log and emacs init entries to C source files

*/

/* Emacs local variables
;;;  Local variables: ***
;;;  compile-command: "cd ../..; make" ***
;;;  End: ***
*/
