#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>

#include "udm_config.h"
#include "udm_common.h"
#include "udm_stopwords.h"
#include "udm_utils.h"
#include "udm_xmalloc.h"
#include "udm_agent.h"

UDM_STOPWORD * UdmIsStopWord(UDM_ENV * Conf,char *word){
int low = 0;
int high = Conf->nstoplist - 1;
int middle, match;

	if(!Conf->stoplist)return(0);
	while (low <= high) {
		middle = (low + high) / 2;
		match = strcmp(Conf->stoplist[middle].word, word);
		if (match < 0)low = middle + 1;
		if (match > 0)high = middle - 1;
		if (match == 0)return(&Conf->stoplist[middle]);
	}
	return(NULL);
}

static int cmpstop(const void *s1,const void *s2){
	return(strcmp(((const UDM_STOPWORD*)s1)->word,((const UDM_STOPWORD*)s2)->word));
}

void UdmSortStopList(UDM_ENV * Conf){
	/* Sort stoplist to run binary search later */
	qsort((void*)Conf->stoplist,Conf->nstoplist,sizeof(UDM_STOPWORD),cmpstop);
}

int UdmAddStopWord(UDM_ENV * Conf,UDM_STOPWORD * stopword){
	size_t j;

	/* If the word is already in list     */
	/* We will not add it again           */
	/* But mark it as "international word"*/
	/* i.e. the word without language     */
	/* It will allow to avoid troubles    */
	/* with language guesser              */
	for(j=0;j<Conf->nstoplist;j++){
		if(!strcmp(Conf->stoplist[j].word,stopword->word)){
			Conf->stoplist[j].lang[0]=0;
			return 0;
		}
	}

	if (!Conf->nstoplist) {
		Conf->stoplist=(UDM_STOPWORD *)UdmXmalloc((Conf->nstoplist+1)*sizeof(UDM_STOPWORD));
	}else{
		Conf->stoplist=(UDM_STOPWORD *)UdmXrealloc(Conf->stoplist,(Conf->nstoplist+1)*sizeof(UDM_STOPWORD));
	}
	
	Conf->stoplist[Conf->nstoplist].word=strdup(stopword->word);
	strncpy(Conf->stoplist[Conf->nstoplist].lang,stopword->lang,2);
	Conf->stoplist[Conf->nstoplist].lang[2]=0;
	Conf->nstoplist++;

	return(1);
}

void UdmFreeStopList(UDM_ENV * Env){
	size_t i;
	for(i=0;i<Env->nstoplist;i++){
		UDM_FREE(Env->stoplist[i].word);
	}
	UDM_FREE(Env->stoplist);
}


__INDLIB__ int UdmFileLoadStopList(UDM_ENV * Conf,char * stoplist_file_name){
	char fname[UDMSTRSIZ];
	char str[UDMSTRSIZ];
	char * lasttok;
	FILE * stopfile;

	if (!stoplist_file_name)
	{
		sprintf(fname,"%s%c%s",UDM_CONF_DIR,UDMSLASH,"stopwords.txt");
	} else
	{
		strncpy(fname,stoplist_file_name,UDMSTRSIZ-1);
	}

	if (!(stopfile=fopen(fname,"r")))
	{
		sprintf(Conf->errstr,"Can't open stopwords file '%s' (%s)", fname, strerror(errno));
		Conf->errcode=1;
		return(1);
	}

	while(fgets(str,sizeof(str),stopfile)){
		UDM_STOPWORD stopword;

		if((stopword.word=UdmGetToken(str,"\t\n\r",&lasttok))){
			char *newlang;

			newlang=UdmGetToken(NULL,"\t\n\r",&lasttok);
			strncpy(stopword.lang,newlang?newlang:"",2);
			stopword.lang[2]='\0';
			UdmAddStopWord(Conf,&stopword);
		}
	}
	fclose(stopfile);
	UdmSortStopList(Conf);
	return(0);
}
