#ifndef lint
static char *rcs_makeindex_c = "$Header: /mit/jik/sipbsrc/src/webster/src/misc/RCS/makeindex.c,v 1.11 1992/03/17 23:29:29 jik Exp $";
#endif

/*
 * makeindex - make an index of the words in the dictionary
 *
 * We read in all the words from the dictionary files, and write them out
 * to wordfileindex.  We also create the DBM files wordfileindex.{dir,pag}
 * to hold the data about each word.  For each word we store the file it is
 * in and the seek position to get to the start of the first definition for
 * that word.  We also create wordfilehdr, which contains the header info,
 * namely a set of seek positions into wordfileindex for the start of each
 * letter (wordfileindex is sorted).
 *
 * David A. Curry
 * Purdue University
 * Engineering Computer Network
 * Original: April, 1986
 * Revised: December, 1986
 *
 * $Log: makeindex.c,v $
 * Revision 1.11  1992/03/17  23:29:29  jik
 * Don't use dbm.
 * more accurately, using dbm is dependent on USE_DBM being defined, and I'm
 * not going to define it when compiling.
 *
 * Revision 1.10  1992/03/17  23:13:30  jik
 * Woops.
 *
 * Revision 1.9  1992/03/17  23:05:54  jik
 * Fix to Ambar's ndbm -> dbm conversion, because she made a stupid mistake
 * which caused only the last definition in any string of multiple definitions
 * of a word to be printed.
 *
 * Revision 1.8  1992/03/17  04:31:24  jik
 * Added -h, -d and -D options for specifying header file name, output
 * directory name, and input directory name.
 *
 * Revision 1.7  1992/03/17  04:01:29  jik
 * Get rid of a FILE * leak.
 *
 * Revision 1.6  1992/03/16  16:14:53  jik
 * Allow an alternate Index file name to be generated (so that we can generate
 * a new index without overwriting the old one).
 *
 * Revision 1.5  1990/10/21  18:47:01  ambar
 * changed from ndbm to dbm for lusing 4.2 based ultrix
 *
 * Revision 1.4  90/04/04  12:32:41  jik
 * Added indexing of words on 'V' lines as well.  Also, use DBM_INSERT rather
 * than DBM_REPLACE to put words into the database.
 * 
 * Revision 1.3  88/02/29  05:58:50  ambar
 * *** empty log message ***
 * 
 * Revision 1.2  86/12/26  22:04:18  davy
 * Changed to create a DBM file index.
 * 
 */
#include <sys/types.h>
#include <sys/time.h>
#include <sys/file.h>
#include <sys/resource.h>
#include <ctype.h>
#ifndef USE_DBM
#include <ndbm.h>
#endif

extern char *optarg;
extern int optind;

#ifdef USE_DBM
/* feh */
#ifdef NULL
#undef NULL
#endif
#include <dbm.h>
#undef NULL
#endif

#include <stdio.h>

#include <index.h>
#include <wordfiles.h>

struct index idx;			/* index for current word	*/
struct header hdr;			/* the header			*/

char word[BUFSIZ];			/* current word			*/

char *indir, *outdir;

main(argc, argv)
int argc;
char **argv;
{
#ifndef USE_DBM
	DBM *db;
#endif
	register int i;
	char buf[BUFSIZ];
	struct rlimit rlim;
	datum key, content;
#ifdef USE_DBM
	datum old;
#endif
	register FILE *fp, *gp;
	register daddr_t addr, len, last_defn;
	int opt;

	indir = outdir = wordfiledir;

	while ((opt = getopt(argc, argv, "o:h:d:D:")) != EOF) {
	     switch (opt) {
	     case 'o':
		  wordfileindex = optarg;
		  break;
	     case 'h':
		  wordfilehdr = optarg;
		  break;
	     case 'd':
		  outdir = optarg;
		  break;
	     case 'D':
		  indir = optarg;
		  break;
	     default:
		  fprintf(stderr, "Usage: %s [-o indexfile]\n", argv[0]);
		  exit(1);
	     }
	}

	/*
	 * Kick our limits.
	 */
	rlim.rlim_max = rlim.rlim_cur = RLIM_INFINITY;
	setrlimit(RLIMIT_FSIZE, &rlim, 0);
	setrlimit(RLIMIT_STACK, &rlim, 0);
	setrlimit(RLIMIT_DATA, &rlim, 0);

	/*
	 * Create wordfileindex and the database files.
	 */

#ifdef USE_DBM
	sprintf(buf, "%s/%s%s", outdir, wordfileindex, ".dir");

	if ((gp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}
	fclose(gp);

	sprintf(buf, "%s/%s%s", outdir, wordfileindex, ".pag");

	if ((gp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}
	fclose(gp);
#endif

	sprintf(buf, "%s/%s", outdir, wordfileindex);

	if ((gp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}

	if 
#ifdef USE_DBM
	    (dbminit(buf) < 0)
#else
	    ((db = dbm_open(buf, O_WRONLY | O_CREAT, 0644)) == 0)
#endif
       {
		fprintf(stderr, "makeindex: cannot create index database.\n");
		exit(1);
	}

	word[0] = NULL;
	hdr.h_nwords = 0;
	
	/*
	 * For each word file...
	 */
	for (i=0; i < NWORDFILES; i++) {
		/*
		 * Open the word file.
		 */
		sprintf(buf, "%s/%s", indir, wordfiles[i]);

		if ((fp = fopen(buf, "r")) == NULL) {
			fprintf(stderr, "makeindex: cannot open \"%s\".\n", buf);
			exit(1);
		}

		/*
		 * Read lines; words are in "F:" lines.
		 *
		 * Well, actually, words are also in "V:" lines and
		 * should be indexed as such.
		 */
		last_defn = addr = 0L;
		while (fgets(buf, BUFSIZ, fp) != NULL) {
			len = strlen(buf);

			/*
			 * Not a word line.
			 */
			if ((*buf != 'F') && (*buf != 'V') && (*buf != 'R')) {
				addr += len;
				continue;
			}
			if (*buf == 'F')
			     last_defn = addr;
			addr += len;
			
			/*
			 * If this is another definition of the same word,
			 * keep going.
			 */
			if (sameword(word, buf))
				continue;

			/*
			 * Save file number, file position,
			 * and buffer position.
			 */
			idx.i_file = i;
			idx.i_filepos = last_defn;
			saveword(word, buf);

			/*
			 * Store the info in the database.
			 */
			key.dptr = word;
			key.dsize = strlen(word);
			content.dptr = (char *) &idx;
			content.dsize = sizeof(struct index);

#ifdef USE_DBM
			old = fetch(key);
			if (old.dptr && old.dsize) {
			     continue;
			}

			if (store(key, content) < 0)
#else
			if (dbm_store(db, key, content, DBM_INSERT) < 0)
#endif
		        {
				fprintf(stderr, "makeindex: cannot store to database.\n");
				exit(1);
			}

			/*
			 * Put the word into the file.
			 */
			fprintf(gp, "%s\n", word);
			hdr.h_nwords++;
		}

		fclose(fp);
	}

	fclose(gp);

	/*
	 * Mark start of each letter.
	 */
	setstarts();

	/*
	 * Write the header.
	 */
	sprintf(buf, "%s/%s", outdir, wordfilehdr);

	if ((fp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}

	if (fwrite(&hdr, sizeof(struct header), 1, fp) != 1) {
		fprintf(stderr, "makeindex: header write failed.\n");
		exit(1);
	}

#ifndef USE_DBM
	dbm_close(db);
#endif

	fclose(fp);
	exit(0);
}

/*
 * saveword - save the word from buf into word.
 */
saveword(word, buf)
register char *word;
register char *buf;
{
	char *index();
	register char *s;

	/*
	 * Extract the word.
	 */
	buf += 2;
	s = index(buf, ';');
	*s = NULL;

	/*
	 * Copy the word.
	 */
	while (*buf) {
		*word++ = (isupper(*buf) ? tolower(*buf) : *buf);
		buf++;
	}

	*word = NULL;
}

/*
 * setstarts - find starts of words.
 */
setstarts()
{
	FILE *fp;
	daddr_t addr;
	char buf[BUFSIZ];
	register char lastc;

	/*
	 * Sort from dictionary order to ASCII collating sequence.
	 */
	sprintf(buf, "sort -T /tmp -o %s/%s %s/%s", outdir, wordfileindex, outdir, wordfileindex);
	system(buf);

	/*
	 * Open the index.
	 */
	sprintf(buf, "%s/%s", outdir, wordfileindex);

	if ((fp = fopen(buf, "r")) == NULL) {
		fprintf(stderr, "makeindex: cannot open \"%s\".\n", buf);
		exit(1);
	}

	fgets(buf, BUFSIZ, fp);

	lastc = *buf;
	addr = strlen(buf);
	hdr.h_starts[SUBSCRIPT(lastc)] = 0;

	/*
	 * Save the address of the start of each new letter in the file.
	 */
	while (fgets(buf, BUFSIZ, fp) != 0) {
		if (*buf == lastc) {
			addr += strlen(buf);
			continue;
		}

		lastc = *buf;
		hdr.h_starts[SUBSCRIPT(lastc)] = addr;
		addr += strlen(buf);
	}

	hdr.h_idxsize = addr;

	fclose(fp);
}

/*
 * sameword - return non-zero if word and buf have the same word in them.
 */
sameword(word, buf)
register char *word;
register char *buf;
{
	char tmp[64];
	char *index();
	register char *s, *t;

	s = buf + 2;

	for (t=tmp; *s != ';'; s++, t++)
		*t = isupper(*s) ? tolower(*s) : *s;
	*t = NULL;

	return(strcmp(word, tmp) == 0);
}
