#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include "udmsearch.h"
#include "udm_signals.h"
#include "udm_xmalloc.h"

#if (WIN32|WINNT)
#define udm_mutex_t		CRITICAL_SECTION
#define InitMutex(x)		InitializeCriticalSection(x)
#define DestroyMutex(x)		DeleteCriticalSection(x)
#define UDM_MUTEX_LOCK(x)	EnterCriticalSection(x)
#define UDM_MUTEX_UNLOCK(x)	LeaveCriticalSection(x)
#else
#include <unistd.h>
#ifdef HAVE_PTHREAD
#include <pthread.h>
#define udm_mutex_t		pthread_mutex_t
#define InitMutex(x)		pthread_mutex_init(x,NULL)
#define DestroyMutex(x)		pthread_mutex_destroy(x)
#define UDM_MUTEX_LOCK(x)	pthread_mutex_lock(x)
#define UDM_MUTEX_UNLOCK(x)	pthread_mutex_unlock(x)
#else
#define udm_mutex_t		int
#define InitMutex(x)		*(x)=0
#define DestroyMutex(x)
#define UDM_MUTEX_LOCK(x)
#define UDM_MUTEX_UNLOCK(x)
#endif
#endif

static unsigned int seconds =0; /* To sleep between documents	*/
static int flags	=0; /* For indexer			*/
static int started	=0; /* If initialization already done	*/
static int total_threads=0; /* Total threads number		*/
static int sleep_threads=0; /* Number of sleepping threads      */
static int next_thread  =1; /* Handle number for indexer	*/
static int max_index_time=-1;
static time_t start_index_time=0;
static int max_url_number=-1;
static int cur_url_number=0;

static UDM_ENV * Conf=NULL;

static udm_mutex_t mutex[UDM_LOCK_MAX];

static void InitMutexes(void){
int i;
	for(i=0;i<UDM_LOCK_MAX;i++){
		InitMutex(&mutex[i]);
	}
}
static void DestroyMutexes(void){
int i;
	for(i=0;i<UDM_LOCK_MAX;i++){
		DestroyMutex(&mutex[i]);
	}
}

/* CALL-BACK Locking function */
static void UdmLockProc(int command,int type){
#ifdef DEBUG_LOCK
	fprintf(stderr,"Try %s %d\n",(command==UDM_LOCK)?"lock":"unlock",type);
#endif
	switch(command){
		case UDM_LOCK:
			UDM_MUTEX_LOCK(&mutex[type]);
			break;
		case UDM_UNLOCK:
			UDM_MUTEX_UNLOCK(&mutex[type]);
			break;
	}
#ifdef DEBUG_LOCK
	fprintf(stderr,"%s %d\n",(command==UDM_LOCK)?"locked":"unlocked",type);
#endif

}

/* CALL_BACK Stat function */
static void UdmStatProc(int handle, int code, int expired, int total, const char* str){
	if(code>=0){
		printf("%10d %10d %10d %s\n",code,expired,total,str);
	}else{
		printf("   -----------------------------\n");
		printf("%10s %10d %10d\n\n\n","Total",expired,total);
	}
}

static int ShowStatistics(UDM_AGENT * Indexer){
int res;
	printf("\n          Database statistics\n\n");
	printf("%10s %10s %10s\n","Status","Expired","Total");
	printf("   -----------------------------\n");
	res=UdmGetStatistics(Indexer);
	return(res);
}

/* CallBack Func for Referers*/
static void UdmRefProc(int code, const char *url, const char * ref){
	printf("%d %s %s\n",code,url,ref);
}

static int ShowReferers(UDM_AGENT * Indexer){
int res;
	printf("\n          URLs and referers \n\n");
	res=UdmGetReferers(Indexer);
	return(res);
}

#undef THINFO_TEST
#ifdef THINFO_TEST
/* CallBack function for Thread information */
void UdmThreadProc(int handle,char *state, char* str){
	printf("%d %s %s\n",handle,state,str);
}
#endif



static int usage(void){

	fprintf(stderr,
"\n\
indexer from %s\n\
http://search.mnogo.ru (C) 1998-2001, LavTech Corp.\n\
\n\
Usage: indexer [OPTIONS]  [configfile]\n\
\n\
Indexing options:\
"
#ifdef HAVE_SQL
"\n\
  -a            reindex all documents even if not expired (may be\n\
                limited using -t, -u, -s, -c and -f options)\n\
  -m            reindex expired documents even if not modified (may\n\
                be limited using -t, -u, -c and -s options)\n\
  -e            index 'most expired' (oldest) documents first\n\
  -o            index documents with less depth (hops value) first\n\
  -n n          index only n documents and exit\n\
  -c n          index only n seconds and exit\n\
  -q            quick startup (do not add Server URLs)\n\
  -k            skip locking (affects MySQL and PostgreSQL only)\n\
"
#endif
"\n\
  -i            insert new URLs (URLs to insert must be given using -u or -f)\n\
  -p n          sleep n seconds after each URL\n\
  -w            do not warn before clearing documents from database\n\
"
#ifdef HAVE_PTHREAD
"  -N n          run N threads\n\
"
#endif

#ifdef HAVE_SQL
"\n\
Subsection control options (may be combined):\n\
  -s status     limit indexer to documents matching status (HTTP Status code)\n\
  -t tag        limit indexer to documents matching tag\n\
  -g category   limit indexer to documents matching category\n\
  -u pattern    limit indexer to documents with URLs matching pattern\n\
                (supports SQL LIKE wildcard '%%')\n\
  -f filename   read URLs to be indexed/inserted/cleared from file (with -a\n\
                or -C option, supports SQL LIKE wildcard '%%'; has no effect\n\
                when combined with -m option)\n\
  -f -          Use STDIN instead of file as URL list\n\
"
#else
"\n\
URL options:\n\
  -u URL        insert URL at startup\n\
  -f filename   read URLs to be inserted from file\n\
"
#endif
"\n\
Logging options:\n\
"
#ifdef LOG_PERROR
"  -l            do not log to stdout/stderr\n\
"
#endif
"  -v n          verbose level, 0-5\n\
\n\
Ispell import options:\n\
  -L language   Two letters Language code (en, ru, de etc.)\n\
  -A filename   ispell Affix file\n\
  -D filename   ispell Dictionary file\n\
  -d            dump to stdout instead of storing to database\n\
\n\
Misc. options:\n\
"
#ifdef HAVE_SQL

"  -C            clear database and exit\n\
  -S            print statistics and exit\n\
  -I            print referers and exit\n\
"
#endif
"  -h,-?         print this help page and exit\n\
\n\
\n\
Please mail bug reports and suggestions to <general@mnogosearch.org>.\n",
	UdmVersion());

	return(0);
}



#if  (WIN32|WINNT)
DWORD WINAPI thread_main(void *arg){
#else
static void * thread_main(void *arg){
#endif
	UDM_AGENT * Indexer;
	int res=IND_OK;
	int done=0;
	int i_sleep=0;
	

	UDM_MUTEX_LOCK(&mutex[0]);
	Indexer=UdmAllocAgent(Conf, next_thread++, UDM_OPEN_MODE_WRITE);
	if(!started){
		res=UdmIndexNextURL(Indexer,flags|UDM_FLAG_INIT);
		started=1;
	}
	UDM_MUTEX_UNLOCK(&mutex[0]);

	while(!done){
	
		if(max_index_time>=0){
			time_t now;
			
			time(&now);
			if((now-start_index_time)>max_index_time)
				break;
		}

		UDM_MUTEX_LOCK(&mutex[UDM_LOCK_TARGET]);
		if((max_url_number>=0)&&(cur_url_number>=max_url_number)){
			done=1;
		}
		UDM_MUTEX_UNLOCK(&mutex[UDM_LOCK_TARGET]);
		if(done)break;
	
		if(res!=IND_ERROR) /* Possible after bad startup */
			res=UdmIndexNextURL(Indexer,flags);

		UDM_MUTEX_LOCK(&mutex[UDM_LOCK_TARGET]);
		cur_url_number++;
		UDM_MUTEX_UNLOCK(&mutex[UDM_LOCK_TARGET]);

		switch(res){
			case IND_OK:
				if(i_sleep){
					UDM_MUTEX_LOCK(&mutex[0]);
					sleep_threads--;
					UDM_MUTEX_UNLOCK(&mutex[0]);
					i_sleep=0;
				}
				break;

			case IND_ERROR:
				UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmAgentErrorMsg(Indexer));
				done=1;
				break;

			case IND_NO_TARGET:
#ifdef HAVE_PTHREAD
			/* in multi-threaded environment we		*/
			/* should wait for a moment when every thread	*/
			/* has nothing to do				*/

				if(!i_sleep){
					UDM_MUTEX_LOCK(&mutex[0]);
					sleep_threads++;
					UDM_MUTEX_UNLOCK(&mutex[0]);
					i_sleep=1;
				}

				UDM_MUTEX_LOCK(&mutex[0]);
				done=(sleep_threads>=total_threads);
				UDM_MUTEX_UNLOCK(&mutex[0]);

				break;
#else
				done=1;
				break;
#endif
			default:
				done=1;
				break;
		}
		if((seconds)&&(!done)){
			UdmLog(Indexer,UDM_LOG_DEBUG,"Sleeping %d second(s)",seconds);
			UDMSLEEP(seconds);
		}

	}

	UDM_MUTEX_LOCK(&mutex[0]);
	if(res!=IND_ERROR){
		time_t now;
		time(&now);
		UdmLog(Indexer,UDM_LOG_ERROR,"Done (%d seconds)",now-start_index_time);
	}
	total_threads--;
	UDM_MUTEX_UNLOCK(&mutex[0]);

	UdmFreeAgent(Indexer);

#if     (WIN32|WINNT)
	return(0);
#else
	return(NULL);
#endif
}


int main(int argc, char **argv) {

char *language=NULL,*affix=NULL,*dictionary=NULL;
int npages=-1;
int nseconds=-1;
int clear=0,stat=0,integrity=0,mkind=0;
int log2stderr=1,dump=0;
int add_servers=UDM_FLAG_ADD_SERV; /* Add servers by default */
/* This is in standard headers
extern char *optarg;
extern int optind;
*/
int ch;
int maxthreads=1;
int warnings=1;
char * url_fname=NULL;
const char *config_name= UDM_CONF_DIR UDMSLASHSTR "indexer.conf";

#if (WIN32|WINNT)
{
	WSADATA wsaData;

	if(WSAStartup(0x101,&wsaData)!=0){
		fprintf(stderr,"WSAStartup() error %d\n",WSAGetLastError);
		exit(1);
	}
}
#endif


	UdmInit(); /* Initialize library */
	
	Conf=UdmAllocEnv();
	UdmSetLockProc(Conf,UdmLockProc);
	UdmSetStatProc(Conf,UdmStatProc);
	UdmSetRefProc(Conf,UdmRefProc);
#ifdef THINFO_TEST
	UdmSetThreadProc(Conf,UdmThreadProc);
#endif


	while ((ch = getopt(argc, argv, "UCSIMaheolmdqiwk?t:u:s:n:v:L:A:D:p:N:f:c:g:")) != -1){
		switch (ch) {
		case 'C': clear++;add_servers=0;break;
		case 'S': stat++;add_servers=0;break;
		case 'I': integrity++;add_servers=0;break;
		case 'L': language=optarg;break;
		case 'A': affix=optarg;add_servers=0;break;
		case 'D': dictionary=optarg;add_servers=0;break;
		case 'M': mkind=1;break;
		case 'q': add_servers=0;break;
		case 'l': log2stderr=0;break;
		case 'a': flags|=UDM_FLAG_MARK;break;
		case 'e': flags|=UDM_FLAG_EXP_FIRST;break;
		case 'o': flags|=UDM_FLAG_SORT_HOPS;break;
		case 'm': flags|=UDM_FLAG_REINDEX;break;
		case 'k': flags|=UDM_FLAG_SKIP_LOCKING;break;
		case 'n': npages=atoi(optarg);break;
		case 'c': nseconds=atoi(optarg);break;
		case 'v': UdmSetLogLevel(Conf, atoi(optarg));break;
		case 'p': seconds=atoi(optarg);break;
		case 'd': dump=1;break;
		case 't': UdmAddTagLimit(Conf,optarg);break;
		case 'g': UdmAddCatLimit(Conf,optarg);break;
		case 's': UdmAddStatusLimit(Conf,atoi(optarg));break;
		case 'u': 
			UdmAddURLLimit(Conf,optarg);
			if(flags&UDM_FLAG_INSERT)
				UdmAddHref(Conf,optarg,0,0,0,NULL,NULL);
			break;
		case 'N': maxthreads=atoi(optarg);break;
		case 'f': url_fname=optarg;break;
		case 'i': flags|=UDM_FLAG_INSERT;break;
		case 'w': warnings=0;break;
		case '?':
		case 'h':
		default:  
			usage();
			return(1);
		}
	}
	flags|=add_servers;

	argc -= optind;argv += optind;

	if(argc>1){
		usage();
		return(1);
	}
	if(argc==1)config_name=argv[0];

	UdmLoadConfig(Conf,config_name,0,add_servers+
		((!integrity&&!stat&&!clear)*UDM_FLAG_SPELL));

	if(UdmEnvErrCode(Conf)){
		fprintf(stderr,"%s\n",UdmEnvErrMsg(Conf));
		UdmFreeEnv(Conf);
		exit(1);
	}

#if (WIN32|WINNT)
#else
	UdmOpenLog(Conf, log2stderr);
	UdmInitSigHandlers(Conf);
#endif

	InitMutexes();

	/* Make sure URL file is readable if not STDIN */
	if(url_fname) {
		Conf->url_file_name=strdup(url_fname);
		if(strcmp(url_fname,"-")){
			FILE *url_file;
			if(!(url_file=fopen(url_fname,"r"))){
				UdmLog_noagent(Conf,UDM_LOG_ERROR,"Error: can't open url file '%s': %s",url_fname, strerror(errno));
				exit(1);
			}
			fclose(url_file);
		}
	}

	/* Make sure all URLs to be inserted are OK */
	if(flags&UDM_FLAG_INSERT) {
		if(url_fname) {
			if(strcmp(url_fname,"-")){
				UDM_AGENT * Loader;
				Loader=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_WRITE);
				if(IND_OK!=UdmURLFile(Loader, UDM_URL_FILE_PARSE)){
					UdmLog(Loader,UDM_LOG_ERROR,"Error: Invalid URL in '%s'",url_fname);
					exit(1);
				}
				UdmFreeAgent(Loader);
			}
		}
	}

	if(affix||dictionary){
		UDM_AGENT * Importer;
		int res=0;
		Importer=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_READ);

		if(!language){
			UdmLog(Importer,UDM_LOG_ERROR,"Error: Language is not specified for import!");
			exit(1);
		}
		if(strlen(language)!=2){
			UdmLog(Importer,UDM_LOG_ERROR,"Error: Language should be 2 letters!");
			exit(1);
		}

		if(affix)res=UdmImportAffixes(Importer->Conf,language,affix,Importer,dump);
		if(dictionary)res=UdmDBImportDictionary(Importer,language,dictionary,dump);

		total_threads=0;
		DestroyMutexes();
		UdmFreeAgent(Importer);
		UdmFreeEnv(Conf);

		return(res);
	}

	if(mkind){
		UDM_AGENT * Indexer;
		int res=0;
		Indexer=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_READ);
		UdmBuildExtIndex(Indexer);	
		UdmFreeAgent(Indexer);
		UdmFreeEnv(Conf);
		DestroyMutexes();
		return(res);
	}

	if(clear){
		UDM_AGENT * Indexer;
		int clear_confirmed=0;
		if(warnings) {
			char str[5]="";
			printf("You are going to delete database '%s' content\n",Conf->DBName?Conf->DBName:"");
			printf("Are you sure?(YES/no)");
			if(fgets(str,sizeof(str),stdin))
				if(!strncmp(str,"YES",3))
					clear_confirmed=1;
		}
		else
			clear_confirmed=1;

		if(clear_confirmed) {
			Indexer=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_WRITE);
			if(url_fname) {
				if(IND_OK!=UdmURLFile(Indexer,UDM_URL_FILE_CLEAR)){
					UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmAgentErrorMsg(Indexer));
				}
			}
			else {
				printf("Deleting...");
				if(IND_OK!=UdmClearDatabase(Indexer)){
					UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmAgentErrorMsg(Indexer));
				}
				printf("Done\n");
			}
			UdmFreeAgent(Indexer);
		}else{
			printf("Canceled\n");
		}
	}else
	if(stat){
		UDM_AGENT * Indexer;
		Indexer=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_READ);
		if(IND_OK!=ShowStatistics(Indexer)){
			UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmAgentErrorMsg(Indexer));
		}
		UdmFreeAgent(Indexer);
	}else
	if(integrity){
		UDM_AGENT * Indexer;
		Indexer=UdmAllocAgent(Conf, 0, UDM_OPEN_MODE_READ);
		if(IND_OK!=ShowReferers(Indexer)){
			UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmAgentErrorMsg(Indexer));
		}
		UdmFreeAgent(Indexer);
	}else
	{
#if (WIN32|WINNT)
#else
		UdmLog_noagent(Conf,UDM_LOG_ERROR, "indexer from %s started with '%s'", UdmVersion(), config_name);
#endif
		time(&start_index_time);
		max_url_number=npages;
		max_index_time=nseconds;

#ifdef HAVE_PTHREAD
		{
#if (WIN32|WINNT)
#else
			pthread_t *threads;
#endif
			int i;
			total_threads=maxthreads;
			threads=(pthread_t*)malloc(maxthreads*sizeof(pthread_t));
			UDM_MUTEX_LOCK(&mutex[0]);
			for(i=0;i<maxthreads;i++){
#if (WIN32|WINNT)
				CreateThread(NULL, 0, &thread_main, NULL, 0, NULL);
#else
				{
					pthread_attr_t attr;
					size_t stksize=UDMSTRSIZ*30+4*UDM_MAXTEXTSIZE;

					pthread_attr_init(&attr);
					pthread_attr_setstacksize(&attr, stksize);
					pthread_create(&threads[i],&attr,&thread_main,NULL);
				}
#endif
				
			}
			UDM_MUTEX_UNLOCK(&mutex[0]);

			while(1){
				int num;
				UDM_MUTEX_LOCK(&mutex[0]);
				num=total_threads;
				UDM_MUTEX_UNLOCK(&mutex[0]);
				if(!num)break;
				UDMSLEEP(1);
			}
			
			free(threads);
		}
#else
		thread_main(NULL);
#endif
	}
	total_threads=0;
	UdmFreeEnv(Conf);
	DestroyMutexes();

#if (WIN32|WINNT)
	WSACleanup();
#endif

	return(0);
}
