

#include <ctype.h>
#include <ndbm.h>
#include <sys/file.h>
#include <fcntl.h>
#include "xthesaurus.h"

char *glob_search(), *literal_search();
DBM* db;
DBM* openlib();
long pdb_fetch();
#define DEPTH 12


/******* DATABASE LOOKUP STUFF *********/

char *lookup_word(word)
char *word;
{

  long indx = 0;
  char *str;

  lower(word);
  if ( isliteral (word) ) {
    str = literal_search(word);
  } else {
    str = glob_search(word);
  }
  return str;
}

char* literal_search(word)
char* word;
{
  int entries[20];
  int count, i;
  int roget;

  char* ret_str;

  int cur_ptr = 0;
  int str_size = 1;
  ret_str = (char*)malloc(1); 

  count = get_entries(word, entries);

  if(!count) return("No matches found");
  
  roget = open(ROGETFILE, O_RDONLY, 0);

  for(i = 0; i < count; i++) {
    long offset;
    long nextoffset;
    int ent_size;
    int recs, tries = 0;

    offset = pdb_fetch(db,entries[i]);
    nextoffset = pdb_fetch(db, entries[i]+1);

    /* kludge.  we're at the end, hope the last entry fits in 10k*/
    if(nextoffset == -1) nextoffset = offset+10240; 
    ent_size = nextoffset - offset;
    str_size += ent_size;
    ret_str = (char*)realloc(ret_str, str_size);

    lseek(roget, offset, L_SET);
    while((recs = read(roget, &ret_str[cur_ptr], ent_size)) == 0  && tries < 10)
      tries++;

    if(recs == EOF || tries == 10) {
      close(roget);
      return("Error trying to read  roget.txt");
    }
    cur_ptr += (recs);
    ret_str[cur_ptr] = 0;

  }
  ret_str[cur_ptr] = 0;
  close(roget);
  return ret_str;
}


/* dummies */
int isliteral (word)
char *word;
{
  return (!(index(word, '*') || index(word, '?')));
}

char* glob_search()
{
  return "Glob searching not implemented yet\n";
}

long pdb_fetch(db, entry)
    DBM* db;
{
  datum data, temp2;
  char temp[20];
  sprintf(temp, "%d", entry);
#if defined(SOLARIS) || defined(linux)
  temp2.dptr = temp;
  temp2.dsize = strlen (temp);
  data = dbm_fetch(db, temp2);
#else
  data = dbm_fetch(db, temp, strlen(temp));
#endif
  if(data.dsize == 0) return -1;
  bcopy( data.dptr, temp, data.dsize);
  return atol(temp);
}


get_entries(word, entries)
     char* word;
     int *entries;
{
  FILE* fptr;
  char buffer[1024];
  int filesize, partsize, location;
  int i, cmp;
  if(!(fptr = fopen(WORDINDEX, "r")))
    perror("fopen");

  lower(word);
  fseek(fptr, 0L, 2);
  filesize = ftell(fptr);
  partsize = filesize/2;
  location = partsize;
  fseek(fptr, location, 0);

  /* narrow down the search by binary splitting*/
  for(i = 0; i < DEPTH; i++) {
    
    if (location != 0) {
      /* if it's not at the beginning, eat a line first*/
      if(!fgets(buffer,1024,fptr)) return 0;
    }
    
    if(!fgets(buffer,1024,fptr))  return 0; /* get a word*/
    *index(buffer, ':') = 0;
    lower(buffer);

    cmp = strcmp(word, buffer);
    partsize /= 2;
    if(cmp > 0) 
      location += partsize;
    else
      location -= partsize;
    fseek(fptr,location, 0);
  }
  
  location -= partsize;
  if(location < partsize || location < 0) location = 0;
  fseek(fptr,location, 0);


  /* linear search*/
  if (location != 0) {
    /* if it's not at the beginning, eat a line first*/
    if(!fgets(buffer,1024,fptr))
      return 0;
  }
  
  for(;;) {
    if(!fgets(buffer, 1024, fptr)) return 0;
    *index(buffer, ':') = 0;
    lower(buffer);
    cmp = strcmp(word, buffer);
    if(cmp < 0) return 0; /* overpassed where it would be*/
    if(cmp == 0) { /* found it*/
      return chars2ints(buffer + strlen(buffer) + 1, entries);
    }
  }
  
}

/* turns "324 435" to {324 , 435} */
chars2ints(str, entries)
char* str;
int* entries;
{
  int count = 0;
  for(;;) {
    while(*str && isspace(*str)) str++;
    if(!*str) return count;
    *entries = atoi(str);
    entries++;
    count++;
    while(*str && !isspace(*str)) str++;
    if(!*str) return count;
  }
}

lower(word)
     char* word;
{
  while(*word) {
    if(isupper(*word)) {
      *word = toupper(*word);
    }
    word++;
  }
}


DBM* openlib()
{
  db = dbm_open(OFFSETSFILE, O_RDONLY, 0);
  return db;
}
