/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**---------------------------------------------------------
** Added addStopList to support printing of common words
** G. Hill 4/7/97  ghill@library.berkeley.edu
**
** change sprintf to snprintf to avoid corruption
** SRE 11/17/99
**
** 04/00 - Jose Ruiz
** change hash for bighash in mergeresultlists for better performance
** when big searchs (a* or b* or c*)
**
*/

#include "swish.h"
#include "hash.h"
#include "search.h"
#include "mem.h"
#include "string.h"

/* Hashes a string.
*/

unsigned hash(s)
char *s;
{
	unsigned hashval;
	
	for (hashval = 0; *s != '\0'; s++)
		hashval = (int)((unsigned char) *s) + 31 * hashval;
	return hashval % HASHSIZE;
}

/* Hashes a string for a larger hash table.
*/

unsigned bighash(s)
char *s;
{
	unsigned hashval;
	
	for (hashval = 0; *s != '\0'; s++)
		hashval = (int)((unsigned char) *s) + 31 * hashval;
	return hashval % BIGHASHSIZE;
}

/* Hashes a int.
*/

unsigned numhash(i)
int i;
{
	return i % HASHSIZE;
}

/* Hashes a int for a larger hash table.
*/

unsigned bignumhash(i)
int i;
{
	return i % BIGHASHSIZE;
}
/* Hashes a string for a larger hash table (for search).
*/

unsigned searchhash(s)
char *s;
{
	unsigned hashval;
	
	for (hashval = 0; *s != '\0'; s++)
		hashval = (int)((unsigned char) *s) + 31 * hashval;
	return hashval % SEARCHHASHSIZE;
}

/* Reads the internal list of default stopwords.
*/

void readdefaultstopwords()
{
	int i;
	
	for (i = 0; defaultstopwords[i] != NULL; i++)
		addstophash(defaultstopwords[i]);
}

/* Adds a stop word to the list of removed common words
*/
void addStopList(word)
char * word;
{
	char* arrayWord;
	static int stopMaxSize=0;
	
	if (isstopword(word) )
		return;

		/* Another BUG!!  Jose Ruiz 04/00
		The dimension of the array was not checked 
		Fixed */
	if (stopPos == stopMaxSize) {
		stopMaxSize += 100;
		if(!stopList)
			stopList = (char **)emalloc(stopMaxSize * sizeof(char *));
		else
			stopList = (char **)erealloc(stopList,stopMaxSize * sizeof(char *));
	}
	arrayWord = (char *) estrdup (word);
	stopList[stopPos++] = arrayWord;
}


/* Adds a stop word to a hash table.
*/

void addstophash(word)
char *word;
{
	unsigned hashval;
	struct swline *sp;
	
	if (isstopword(word))
		return;
	
	sp = (struct swline *) emalloc(sizeof(struct swline));
	sp->line = (char *) estrdup(word);
	
	hashval = hash(word);
	sp->next = hashstoplist[hashval];
	hashstoplist[hashval] = sp;
}

/* Sees if a word is a stop word by looking it up in the hash table.
*/

int isstopword(word)
char *word;
{
	unsigned hashval;
	struct swline *sp;
	
	hashval = hash(word);
	sp = hashstoplist[hashval];
	
	while (sp != NULL) {
		if (!strcmp(sp->line, word))
			return 1;
		sp = sp->next;
	}
	return 0;
}

/* Adds a file number and its associated file location
** to a hash table.
*/

void addtofilehashlist(fileshort, filelong)
int fileshort;
long filelong;
{
	unsigned hashval;
	struct filenum *fp;

	fp = (struct filenum *) emalloc(sizeof(struct filenum));
	fp->fileshort = fileshort;
	fp->filelong = filelong;
	
	hashval = bignumhash(fileshort);
	fp->next = filehashlist[hashval];
	filehashlist[hashval] = fp;
}

/* Looks up a file number in the hash table and
** returns the file position of the associated file info.
*/

long getfilenum(filenum)
int filenum;
{
	unsigned hashval;
	struct filenum *fp;
	
	hashval = bignumhash(filenum);
	fp = filehashlist[hashval];
	
	while (fp != NULL) {
		if (fp->fileshort == filenum) {
			return fp->filelong;
		}
		fp = fp->next;
	}
	return 0;
}

/* Adds a file number and the number of indexed words
** to a hash table.
*/

void addtofwordtotals(filenum, ftotalwords)
int filenum;
int ftotalwords;
{
	unsigned hashval;
	struct fwordtotal *fp;

	fp = (struct fwordtotal *) emalloc(sizeof(struct fwordtotal));
	fp->filenum = filenum;
	fp->totalwords = ftotalwords;
	fp->next = NULL;
	
	hashval = bignumhash(filenum);
	fp->next = fwordtotals[hashval];
	fwordtotals[hashval] = fp;
}

/* Looks up a file number in the hash table and
** returns the total number of words indexed in it.
*/

int gettotalwords(filenum)
int filenum;
{
	unsigned hashval;
	struct fwordtotal *fp;
	
	hashval = bignumhash(filenum);
	fp = fwordtotals[hashval];
	
	while (fp != NULL) {
		if (fp->filenum == filenum)
			return fp->totalwords;
		fp = fp->next;
	}
	return 0;
}

/* Adds a file number to a hash table of results.
** If the entry's alrady there, add the ranks,
** else make a new entry.
*/
/* Jose Ruiz 04/00
** For better performance in large "or"
** keep the lists sorted by filename
*/
void mergeresulthashlist(r)
struct result *r;
{
	unsigned hashval;
	struct result *rp, *tmp;
	int *newposition;
	
	tmp = NULL;
	hashval = bignumhash(r->filenum);
	
	rp = resulthashlist[hashval];
	while (rp != NULL) {
		if (rp->filenum == r->filenum) {
			rp->rank += r->rank;
			rp->structure |= r->structure;
			if(r->frequency) {
				if (rp->frequency) {
					newposition = (int *) emalloc((rp->frequency + r->frequency) * sizeof(int));
					CopyPositions(newposition,0,r->position,0,r->frequency);
					CopyPositions(newposition,r->frequency,rp->position,0,rp->frequency);
				} else {
					newposition = (int *) emalloc(r->frequency * sizeof(int));
					CopyPositions(newposition,0,r->position,0,r->frequency);
				}
				rp->frequency += r->frequency;
				efree(rp->position);
				rp->position = newposition;
			}
			freeresult(r);
			return;
		}
		else if (r->filenum < rp->filenum) break;
		tmp =rp;
		rp = rp->next;
	}
	if(!rp) {
		if(tmp) {
			tmp->next = r;
			r->next = NULL;
		} else {
			resulthashlist[hashval] = r;
			r->next = NULL;
		}
	} else {
		if(tmp) {
			tmp->next = r;
			r->next = rp;
		} else {
			resulthashlist[hashval] = r;
			r->next = rp;
		}
	}
}

/* Initializes the result hash list.
*/

void initresulthashlist()
{
	int i;
	
	for (i = 0; i < BIGHASHSIZE; i++)
		resulthashlist[i] = NULL;
}

void freefilehashlist()
{
int i;
struct filenum *fp,*tmp;
	for(i=0;i<BIGHASHSIZE;i++) 
		if(filehashlist[i]) {
			fp = (struct filenum *) filehashlist[i];
			while (fp) {
				tmp = fp->next;
				efree(fp);
				fp = tmp;
			}
		}
}

void freestophash()
{
int i;
struct swline *sp, *tmp;
 
        for(i=0;i<HASHSIZE;i++)
                if(hashstoplist[i]) {
			sp = (struct swline *)hashstoplist[i];
			while (sp) {
				tmp = sp->next;
				efree(sp->line);
				efree(sp);
				sp = tmp;
			}
		}
}

void freeStopList()
{
int i;
        for(i=0;i<stopPos;i++)
		efree(stopList[i]);
	if (stopList) efree(stopList);
}
