#include <sys/param.h>
#include <sys/dir.h>
#include <sys/file.h>
#include <stdio.h>
#include <ndbm.h>
#include <dictionary.h>
#include <errno.h>
#include <strings.h>
#include <ctype.h>
#include <dicterrors.h>
#include <dict_errs.h>


/*
 * makeindex -- build the index for the dictionary
 *
 * Options are:
 *
 * -d level	set debugging level
 * -i dir	directory dictionary is in
 * -n name	name to save index in
 * -l name	filename to read file list from
 * -s n		start at file number n, rather than at 0
 * -f n		end at file number n, rather than at the last one
 * -a		append to the index (rather than clearing before starting)
 * -h		help
 */

boolean_t 	debugging 	= 0;
boolean_t 	append 		= 0;
char *		directory	= DICTIONARYDIR;
char *		index_name	= INDEXNAME;
char *		list_name	= FILELIST;
short int	start		= 1;
short int	finish		= 10000;
char *		whoami;
int		num_words	= 0;
int		size_words	= 0;


main(argc, argv)
int argc;
char *argv[];
{
     initialize_dict_error_table();

     /*
      * Parse the argument list (doesn't return if there's a bad
      * argument or if help is requested)
      */
     parseArgs(argc, argv);

     if (dictInitializePaths(directory, list_name, index_name)) {
	  dict_error("initializing dictionary paths");
	  exit(dict_error_occurred);
     }

     /*
      * Erase the index if we're not supposed to append
      */
     if ((! append) && (dictClearDatabase())) {
	  dict_error("clearing database");
	  exit(dict_error_occurred);
     }

     /*
      * Now open a new index for writing
      */
     if (dictOpenDatabaseWrite()) {
	  dict_error("opening database for write");
	  exit(dict_error_occurred);
     }

     if (doIndex())
	  dict_error("creating the index");

     if (sortIndex())
	  dict_error("sorting the index");
     
     if (dictCloseDatabase())
	  dict_error("closing the database");

     exit(dict_error_occurred);
}


/*
 * Parse the command-line arguments and set global variables to
 * indicate what options have been selected.
 */
parseArgs(argc, argv)
int argc;
char *argv[];
{
     extern char *	optarg;
     extern int		optind;
     int		c;

     whoami = (whoami = rindex(argv[0], '/')) ? whoami + 1 : argv[0];
     
     while ((c = getopt(argc, argv, "d:i:n:l:s:f:ah")) != EOF) {
	  switch (c) {
	  case 'd':
	       debugging = atoi(optarg);
	       break;
	  case 'i':
	       directory = optarg;
	       break;
	  case 'n':
	       index_name = optarg;
	       break;
	  case 'l':
	       list_name = optarg;
	       break;
	  case 's':
	       start = atoi(optarg);
	       if (start < 0) {
		    fprintf(stderr,
			    "Argument to -s must be a positive integer.\n");
		    usage();
		    exit(1);
	       }
	       break;
	  case 'f':
	       finish = atoi(optarg);
	       if (finish < 0) {
		    fprintf(stderr,
			    "Argument to -f must be a positive integer.\n");
		    usage();
		    exit(1);
	       }
	       break;
	  case 'a':
	       append = 1;
	       break;
	  case 'h':
	       usage();
	       exit(0);
	  default:
	       usage();
	       exit(1);
	  }
     }
}


usage()
{
     fprintf(stderr,
	     "Usage: %s [-d level] [-i directory] [-n name] [-l name]\n\t[-s start] [-f finish] [-a] [-h]\n",
	     whoami);
}


doIndex()
{
     char file_path[MAXPATHLEN];
     fileNumber_t file_number = 0;
     int retval;
     int result = 0;
     
     /*
      * We're stealing variables from the library.
      */
     extern char **		_dict_file_names;
     extern fileNumber_t	_dict_num_files;
     extern char		_dict_directory[];

     if (finish > _dict_num_files)
	  finish = _dict_num_files;
     file_number = start;
     
     while (file_number <= finish) {
	  sprintf(file_path, "%s%s.d", _dict_directory,
		  _dict_file_names[file_number - 1]);
	  retval = doFile(file_path, file_number);
	  file_number++;
	  if (retval) {
	       dict_error(file_path);
	       result = retval;
	  }
     }

     return(result);
}


/*
 * Open the specified dictionary file, and index the definitions in it
 * in database.  Each word that is indexed is also output to the index file.
 */
doFile(file_path, file_number)
char *file_path;
fileNumber_t file_number;
{
     FILE *input;
     char line[DICTIONARYLINELENGTH];
     fileLocation_t definition_location = 0;
     fileLocation_t current_location = 0;
     int line_number = 0;
     int retval = 0;
     char word[DICTIONARYLINELENGTH];
     char *ptr1, *ptr2;
     definition_t definition;
     
     input = fopen(file_path, "r");
     if (! input) {
	  dict_set_error(errno ? errno : -1);
	  return(dict_error_code);
     }

     while (fgets(line, DICTIONARYLINELENGTH, input) != NULL) {
	  int len = strlen(line);
	  line_number++;
	  if (line[len - 1] != '\n') {
	       if (len == DICTIONARYLINELENGTH - 1) {
		    dict_set_error(DICT_LINE_TOO_LONG);
	       }
	       else {
		    dict_set_error(DICT_CORRUPT_FILE);
	       }
	       retval = dict_error_code;
	       dict_error_string = file_path;
	       goto done;
	  }
	  switch (*line) {
	  case 'F':
	       definition_location = current_location;
	  case 'V':
	  case 'R':
	       ptr1 = word;
	       ptr2 = &line[2];
	       while (*ptr2 != ';') {
		    *ptr1 = (isupper(*ptr2) ? tolower(*ptr2) : *ptr2);
		    ptr1++, ptr2++;
	       }
	       *ptr1 = '\0';
	       definition.file = file_number;
	       definition.file_position = definition_location;
	       retval = doEntry(word, (entryType_t) *line, definition);
	       if (retval)
		    goto done;
	  default:
	       current_location += len;
	  }
     }

done:
     if (fclose(input) == EOF) {
	  dict_set_error(errno ? errno : -1);
	  retval = dict_error_code;
     }
     return(retval);
}


priorityCmp(pri1, pri2)
char pri1, pri2;
{
     /*
      * 'F' type is first priority; 'V' is second; 'R' is third
      */

     switch(pri1) {
     case 'F':
	  switch(pri2) {
	  case 'F': return(0);
	  case 'V': return(1);
	  case 'R': return(2);
	  }
     case 'V':
	  switch(pri2) {
	  case 'F': return(-1);
	  case 'V': return(0);
	  case 'R': return(1);
	  }
     case 'R':
	  switch(pri2) {
	  case 'F': return(-2);
	  case 'V': return(-1);
	  case 'R': return(0);
	  }
     }
     return(1);
}

	       
findGullibleEntry(index_entry, entry)
indexEntry_t index_entry;
entry_t entry;
{
     int i;
     
     for (i = 0; i < index_entry.num_entries; i++) {
	  if (priorityCmp(index_entry.entry[i].type, entry.type) < 0) {
	       index_entry.entry[i] = entry;
	       return(0);
	  }
     }
     return(1);
}

     
insertNewEntry(index_entry, entry)
indexEntry_t index_entry;
entry_t entry;
{
     int i;

     for (i = 0; i < index_entry.num_entries; i++) {
	  if ((index_entry.entry[i].definition.file ==
	       entry.definition.file) &&
	      (index_entry.entry[i].definition.file_position ==
	       entry.definition.file_position)) {
	       if (priorityCmp(index_entry.entry[i].type, entry.type) < 0) {
		    index_entry.entry[i] = entry;
		    return(0);
	       }
	       else {
		    dict_set_status(DICT_DUPLICATE_INDEX_ENTRY);
		    return(dict_error_code);
	       }
	  }
     }
     
     index_entry.entry[index_entry.num_entries] = entry;
     index_entry.num_entries++;
     return(0);
}


doEntry(word, type, definition)
char *word;
entryType_t type;
definition_t definition;
{
     datum key, content;
     indexEntry_t index_entry;
     entry_t entry;
     int retval;

     /*
      * More stealing from the libraries
      */
     extern DBM *_dict_database;
     extern FILE *_dict_index_file;
     
     key.dptr = word;
     key.dsize = strlen(word);

     entry.type = type;
     entry.definition = definition;
     
     index_entry.num_entries = 1;
     index_entry.entry[0] = entry;

     content.dptr = (char *) &index_entry;
     content.dsize = (char *) &index_entry.entry[1] - (char *) &index_entry;

     /*
      * The overwhelmingly more common occurrence is storing the word
      * for the first time, so we try that first, and if that fails,
      * we do the fetch and add the word to an already existing entry.
      */

     retval = dbm_store(_dict_database, key, content, DBM_INSERT);
     if (retval < 0) {
	  dict_set_error(errno ? errno : -1);
	  return(dict_error_code);
     }
     if (retval == 1) {
	  if (debugging > 1) {
	       dict_set_warning(DICT_MULT_ENTRIES);
	       dict_error(word);
	  }
	  content = dbm_fetch(_dict_database, key);
	  if (! content.dptr) {
	       dict_set_error(errno ? errno : -1);
	       return(dict_error_code);
	  }
	  bcopy(content.dptr, (char *) &index_entry, content.dsize);
	  if (index_entry.num_entries > MAXENTRIES) {
	       dict_set_error(DICT_CORRUPT_INDEX);
	       return(dict_error_code);
	  }
	  else if (index_entry.num_entries == MAXENTRIES) {
	       if (debugging) {
		    dict_set_warning(DICT_MAX_ENTRIES);
		    dict_error(word);
	       }
	       if (findGullibleEntry(index_entry, entry))
		    /* not really an error */
		    return(0);
	  }
	  else {
	       retval = insertNewEntry(index_entry, entry);
	       if (retval == DICT_DUPLICATE_INDEX_ENTRY)
		    return(0);
	       else if (retval)
		    return(retval);
	       else {
		    index_entry.entry[index_entry.num_entries] = entry;
		    index_entry.num_entries++;
	       }
	  }
	  content.dptr = (char *) &index_entry;
	  content.dsize =
	       (char *) &index_entry.entry[index_entry.num_entries] -
		    (char *) &index_entry;
	  retval = dbm_store(_dict_database, key, content, DBM_REPLACE);
	  if (retval < 0) {
	       dict_set_error(errno ? errno : -1);
	       return(dict_error_code);
	  }
	  return(0);
     }

     fprintf(_dict_index_file, "%s %s\n", dictSoundex(word), word);
     if (ferror(_dict_index_file)) {
	  dict_set_error(errno ? errno : -1);
	  return(dict_error_code);
     }

     num_words++;
     /* The 2 is one for the space between the soundex and the word, */
     /* and one for the newline (null later on)			     */
     size_words += key.dsize + SOUNDEX_LENGTH + 2;
     
     return(0);
}


compare(str1, str2)
char **str1, **str2;
{
     return(strcmp(*str1, *str2));
}

sortIndex()
{
     char *string_space, **strings;
     extern FILE *_dict_index_file;
     extern char *malloc();
     int i, real_size = 0;
     char *ptr;
     int result = 0;
     char garbage;
     char word[DICTIONARYLINELENGTH+SOUNDEX_LENGTH+1];

     /* I would initialize this in the declaration, except that saber */
     /* complains (ARGH!)					      */
     *word = '\0';
     
     string_space = malloc((unsigned) size_words);
     strings = (char **) malloc((unsigned) (sizeof(char *) * num_words));
     if (! (string_space && strings)) {
	  dict_set_error(errno ? errno : -1);
	  return(dict_error_code);
     }
     
     if (fseek(_dict_index_file, 0L, 0) == -1) {
	  dict_set_error(errno ? errno : -1);
	  return(dict_error_code);
     }

     ptr = string_space;
     for (i = 0; i < num_words; i++) {
	  int len;
	  
	  if (fgets(ptr, sizeof(word), _dict_index_file) == NULL) {
	       dict_set_error(errno ? errno : -1);
	       result = dict_error_code;
	       goto done;
	  }
	  
	  len = strlen(ptr);
	  real_size += len;

	  if ((ptr[len - 1] != '\n') || (real_size > size_words)) {
	       dict_set_error(DICT_FILES_CHANGED);
	       result = dict_error_code;
	       goto done;
	  }

	  ptr[len - 1] = '\0';
	  strings[i] = ptr;
	  ptr += len;
     }

     if (! (fgets(&garbage, 1, _dict_index_file) == NULL) &&
	 (feof(_dict_index_file))) {
	  dict_set_error(DICT_FILES_CHANGED);
	  result = dict_error_code;
	  goto done;
     }

     qsort(strings, num_words, sizeof(char *), compare);

     if (fseek(_dict_index_file, 0L, 0) == -1) {
	  dict_set_error(errno ? errno : -1);
	  result = dict_error_code;
	  goto done;
     }

     for (i = 0; i < num_words; i++) {
	  if (strcmp(word, strings[i])) {
	       fprintf(_dict_index_file, "%s\n", strings[i]);
	       strcpy(word, strings[i]);
	  }
	  if (ferror(_dict_index_file)) {
	       dict_set_error(errno ? errno : -1);
	       result = dict_error_code;
	       goto done;
	  }
     }

done:
     free(strings);
     free(string_space);

     return(result);
}