/* WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.

   This is part of the X user-interface for the WAIS software.  Do with it
   as you please.

   Version 0.82
   Wed Apr 24 1991

   jonathan@Think.COM

*/

#include "xwais.h"
#include "document.h"

static void
setdate(date, source)
char *date, *source;
{
  date[8] = 0;
  date[2] = date[5] = '/';
  date[0] = source[2];
  date[1] = source[3];
  date[3] = source[4];
  date[4] = source[5];
  date[6] = source[0];
  date[7] = source[1];
}

void
freeItemList(list)
char **list;
{
  char **temp;

  temp = list;

  while(*temp != NULL) {
    s_free(*temp);
    temp++;
  }

  s_free (list);
}

char **
buildDocumentItemList(doclist, scorep)
DocList doclist;
Boolean scorep;
{
  char **result, date[9];
  int num, i;
  DocList doc;

  /* find the length of the doclist in the question */

  for(num = 0, doc = doclist; doc != NULL; num++, doc = doc->nextDoc);

  result = (char**) s_malloc(1+num*sizeof(char*));
  result[num] = NULL;
  if(num > 0)
    for(i = 0, doc = doclist; i<num; i++, doc = doc->nextDoc) {
      if(scorep == TRUE) {
	result[i] = s_malloc(strlen(doc->thisDoc->doc->headline)+26);
	if ((doc->thisDoc->doc->date == NULL) ||
	    (strcmp(doc->thisDoc->doc->date, "0") == 0)) {
	  if (doc->thisDoc->doc->numChars > 1024)
	    sprintf(result[i], "%5d %4.1fK %s", 
		    doc->thisDoc->rawScore,
		    ((float)doc->thisDoc->doc->numChars/1024.0),
		    doc->thisDoc->doc->headline);
	  else
	    sprintf(result[i], "%5d %5d %s", 
		    doc->thisDoc->rawScore,
		    doc->thisDoc->doc->numChars,
		    doc->thisDoc->doc->headline);
	}
	else {
	  setdate(date, doc->thisDoc->doc->date);
	  if (doc->thisDoc->doc->numChars > 1024)
	    sprintf(result[i], "%5d %4.1fK (%s) %s",
		    doc->thisDoc->rawScore, 
		    ((float)doc->thisDoc->doc->numChars/1024.0),
		    date,
		    doc->thisDoc->doc->headline);
	  else
	    sprintf(result[i], "%5d %4d (%s) %s",
		    doc->thisDoc->rawScore, 
		    doc->thisDoc->doc->numChars,
		    date,
		    doc->thisDoc->doc->headline);
	    
	}
      }
      else {
	result[i] = s_malloc(strlen(doc->thisDoc->doc->headline)+30);
	if(doc->thisDoc->start > 0) {
	  if ((doc->thisDoc->doc->date != NULL) &&
	      (strcmp(doc->thisDoc->doc->date, "0") != 0)) {
	    setdate(date, doc->thisDoc->doc->date);
	    sprintf(result[i], "[%d,%d] (%s) %s", 
		    doc->thisDoc->start, doc->thisDoc->end, date,
		    doc->thisDoc->doc->headline);
	  }
	  else {
	    sprintf(result[i], "[%d,%d] %s", 
		    doc->thisDoc->start, doc->thisDoc->end,
		    doc->thisDoc->doc->headline);
	  }
	}
	else
	  if ((doc->thisDoc->doc->date != NULL) &&
	      (strcmp(doc->thisDoc->doc->date, "0") != 0)) {
	    setdate(date, doc->thisDoc->doc->date);
	    sprintf(result[i], "(%s) %s",
		    date, doc->thisDoc->doc->headline);
	  }
	  else {
	    sprintf(result[i], "%s", doc->thisDoc->doc->headline);
	  }
      }
      result[i] = trim_junk(result[i]);
    }
  return(result);
}

DocList ReadListOfDocuments(file)
FILE *file;
{
  short check_result;
  DocumentID documentid = NULL;
  DocList result, this, last;
          
  /* initialize */
  this = last = result = NULL;

  if(ReadStartOfList(file) == FALSE)
    return(NULL);

  while(TRUE) {
    documentid = (DocumentID)s_malloc(sizeof(_DocumentID));
    documentid->start = -1;
    documentid->end = -1;
    check_result = ReadDocument(documentid, file);
    if(check_result == END_OF_STRUCT_OR_LIST) {
      s_free(documentid);
      return(result);
    }
    else if(check_result == FALSE)
      return(result);

    else if(check_result == TRUE) {
      if(result == NULL)
	result = this = (DocList) s_malloc(sizeof(_DocList));
      else
	this = (DocList) s_malloc(sizeof(_DocList));
      this->thisDoc = documentid;
      if(last != NULL)
	last->nextDoc = this;
      last = this;
    }
  }
}

short
ReadFragment(file, dest)
FILE *file;
long *dest;
{
  char temp_string[MAX_SYMBOL_SIZE];
  short check_result;

  /* initialize */
  check_result = CheckStartOfStruct("fragment", file);

  if(FALSE == check_result){ 
    return(false);
  }
  if(END_OF_STRUCT_OR_LIST == check_result)
    {
      return(FALSE);
    }
    
  /* read the slots: */
  while(check_result != END_OF_STRUCT_OR_LIST){
    long val;
    short check_result = ReadSymbol(temp_string, file, MAX_SYMBOL_SIZE);
    if(END_OF_STRUCT_OR_LIST == check_result) {
      return(true);
    }
    if(0 == strcmp(temp_string, ":byte-pos")){
      if(FALSE == ReadLong(file, dest)){
	return(false);
      }
    }
    else if(0 == strcmp(temp_string, ":line-pos")){
      if(FALSE == ReadLong(file, dest)){
	return(false);
      }
    }
    else if(0 == strcmp(temp_string, ":para-id")){
      if(FALSE == ReadLong(file, dest)){
	return(false);
      }
    }
    else			/* we don't know what this is */
      SkipObject(file);
  }
  return(true);
}

/* Read a document from a file.  If it is the end of a list instead of
 * at a document, then return END_OF_STRUCT_OR_LIST, 
 * if it hits an error on loading, return FALSE,
 * otherwise return TRUE.
 */
short
ReadDoc(file, doc)
FILE *file;
CRetDocument doc;
{
  char temp_string[MAX_SYMBOL_SIZE];
  short check_result;
  long lines, chars, best, val;
  DocID* docid = NULL;
  SourceID aSid;
  char headline[MAX_SYMBOL_SIZE];
  char dateStr[MAX_SYMBOL_SIZE];
          
  /* initialize */
  check_result = CheckStartOfStruct("document", file);
  headline[0] = '\0';
  dateStr[0] = '\0';

  if(FALSE == check_result){ 
    return(false);
  }
  if(END_OF_STRUCT_OR_LIST == check_result)
    {
      return(FALSE);
    }
    
  /* read the slots: */
  while(check_result != END_OF_STRUCT_OR_LIST){
    long val;
    short check_result = ReadSymbol(temp_string, file, MAX_SYMBOL_SIZE);
    if(END_OF_STRUCT_OR_LIST == check_result) {
      if(*headline != 0)
	if(doc->headline != NULL) s_free(doc->headline);
	doc->headline = s_strdup(headline);
      return(true);
    }
    if(FALSE == check_result){
      return(false);
    } 
    if(0 == strcmp(temp_string, ":number-of-lines")) {
      ReadLong(file,&lines);
      doc->numLines = lines;
    }
    else if ((0 == strcmp(temp_string, ":number-of-bytes")) ||
	     (0 == strcmp(temp_string, ":number-of-characters"))){
      ReadLong(file,&chars);
      doc->numChars = chars;
    }
    else if(0 == strcmp(temp_string, ":best-line")){
      ReadLong(file,&best);
      doc->best = best;
    }
    else if(0 == strcmp(temp_string, ":date")){
      if(FALSE == ReadString(dateStr, file, MAX_SYMBOL_SIZE)){
	return(false);
      }
      if(doc->date != NULL) s_free(doc->date);
      doc->date = s_strdup(dateStr);
    }
    else if(0 == strcmp(temp_string, ":headline")){
      if(FALSE == ReadString(headline, file, MAX_SYMBOL_SIZE))
	return(false);
      if(doc->headline != NULL) s_free(doc->headline);
      doc->headline = s_strdup(headline);
    }
    else if(0 == strcmp(temp_string, ":doc-id")){
      docid = (DocID*)s_malloc(sizeof(DocID));
      if(FALSE == ReadDocID(docid, file)){
	return(false);
      }
      doc->id = docid;
    }
    else if(0 == strcmp(temp_string, ":source")){
      long return_value;
      aSid = (SourceID) s_malloc(sizeof(_SourceID));
      return_value = ReadSourceID(file, aSid);
      if (return_value == END_OF_STRUCT_OR_LIST || return_value == false)
	{ s_free(aSid);
	  return(false);
	}
      doc->sourceID = aSid;
    }
    else if(strcmp(temp_string, ":type") == 0) {
      if(FALSE == ReadString(temp_string, file, MAX_SYMBOL_SIZE))
	return(false);
      if(doc->type != NULL) s_free(doc->type);
      doc->type = s_strdup(temp_string);
    }
    else			/* we don't know what this is */
      SkipObject(file);
  }
  return(true);
}

short ReadDocument(doc, file)
DocumentID doc;
FILE *file;
{
  CRetDocument document = NULL;
  short status;
  char temp_string[MAX_SYMBOL_SIZE];
  short result = CheckStartOfStruct("document-id",file);
  
/*
  start = end = NULL;
*/
  doc->rawScore = doc->normalScore = -1;
  
  if (result == false)
    return(false);
  if (result == END_OF_STRUCT_OR_LIST)
    return(END_OF_STRUCT_OR_LIST);
    
  while (TRUE)
   { short check_result;
     long val;
     check_result = ReadSymbol(temp_string,file,MAX_SYMBOL_SIZE);

     if (check_result == false)
       return(false);
     if (check_result == END_OF_STRUCT_OR_LIST)
       return(true);
       
     if (strcmp(temp_string,":score") == 0)
      { if (ReadLong(file,&val) == false)
          return(false);
        doc->rawScore = (short)val;
      }
     else if (strcmp(temp_string,":document") == 0) {
       document = MakeNewDocument();
       status = ReadDoc(file, document);
       if(status == false) {
	 fprintf(stderr, "error reading document structure.\n");
       }
       doc->doc = document;
     }
     else if(0 == strcmp(temp_string, ":start")){
       if(FALSE == ReadFragment(file, &val))
	 return(false);
       doc->start = val;
     }
     else if(0 == strcmp(temp_string, ":end")){
       if(FALSE == ReadFragment(file, &val))
	 return(false);
       doc->end = val;
     }
     else
       SkipObject(file);
   }
}

CRetDocument
MakeNewDocument()
{
  CRetDocument result;

  result = (CRetDocument)s_malloc(sizeof(_CRetDocument));
  result->id = NULL;
  result->sourceID = NULL;
  result->myConnection= NULL;
  result->numLines = 0;
  result->numChars = 0;
  result->blocks = result->pendingBlocks = NULL;
  result->best = 0;
  result->source = result->headline = result->city = NULL;
  result->stock = result->company = result->industry = NULL;
  result->type = result->date = NULL;
  result->next = result->prev = NULL;
  result->paraStarts = NULL; /* should be array[1] = -1; */
  
  return(result);
}


DocList
makeDocList(doc, rest)
DocumentID doc;
DocList rest;
{
  DocList result;
  if((result = (DocList)s_malloc(sizeof(_DocList))) != NULL) {
    result->thisDoc = doc;
    result->nextDoc = rest;
  }
  return(result);
}

DocumentID
copy_docid(doc)
DocumentID doc;
{
  DocumentID result;
  
  result = (DocumentID)s_malloc(sizeof(_DocumentID));
  result->id = doc->id;
  result->rawScore = doc->rawScore;
  result->start = doc->start;
  result->end = doc->end;
  result->doc = doc->doc;

  return result;
}

void WriteDocument(doc, fp)
DocumentID doc;
FILE *fp;
{
  WriteStartOfStruct("document-id", fp);
  WriteNewline(fp);
  WriteSymbol(":score", fp);
  WriteLong(doc->rawScore, fp);
  WriteNewline(fp);
  if(doc->start >= 0) {
    WriteSymbol(":start", fp);
    WriteNewline(fp);
    WriteStartOfStruct("fragment", fp);
    WriteNewline(fp);
    WriteSymbol(":line-pos", fp);
    WriteLong(doc->start, fp);
    WriteEndOfStruct(fp);
    WriteNewline(fp);
  }
  if(doc->end > 0) {
    WriteSymbol(":end", fp);
    WriteNewline(fp);
    WriteStartOfStruct("fragment", fp);
    WriteNewline(fp);
    WriteSymbol(":line-pos", fp);
    WriteLong(doc->end, fp);
    WriteEndOfStruct(fp);
    WriteNewline(fp);
  }
  if(doc->doc != NULL) {
    WriteSymbol(":document", fp);
    WriteNewline(fp);
    WriteStartOfStruct("document", fp);
    WriteNewline(fp);
    if(doc->doc->headline != NULL) {
      WriteSymbol(":headline", fp);
      WriteString(doc->doc->headline, fp);
    }
    if (doc->doc->id != NULL) {
      WriteNewline(fp);
      WriteSymbol(":doc-id", fp);
      WriteDocID(doc->doc->id, fp);
      WriteNewline(fp);
    }
    if (doc->doc->sourceID != NULL) {
      WriteSymbol(":source", fp);
      WriteNewline(fp);
      WriteStartOfStruct("source-id", fp);
      WriteNewline(fp);
      WriteSymbol(":filename", fp);
      WriteString(doc->doc->sourceID->filename, fp);
      WriteNewline(fp);
      WriteEndOfStruct(fp);
      WriteNewline(fp);
    }
    WriteSymbol(":number-of-lines", fp);
    WriteLong(doc->doc->numLines, fp);
    WriteNewline(fp);
    WriteSymbol(":number-of-bytes", fp);
    WriteLong(doc->doc->numChars, fp);
    WriteNewline(fp);
    if(doc->doc->type != NULL) {
      WriteSymbol(":type", fp);
      WriteString(doc->doc->type, fp);
      WriteNewline(fp);
    }
    WriteSymbol(":best-line", fp);
    WriteLong(doc->doc->best, fp);
    WriteNewline(fp);
    if(doc->doc->date != NULL) {
      WriteSymbol(":date", fp);
      WriteString(doc->doc->date, fp);
      WriteNewline(fp);
    }
    else {
      WriteSymbol(":date", fp);
      WriteString("0", fp);
      WriteNewline(fp);
    }
    WriteNewline(fp);
    WriteEndOfStruct(fp);
  }
  WriteNewline(fp);
  WriteEndOfStruct(fp);
  WriteNewline(fp);
}

void sort_document_list(dlist)
DocList dlist;
{
  DocList d;
  DocumentID di;
  Boolean Changed = TRUE;

  while(Changed) {
    Changed = FALSE;
    for(d = dlist; d != NULL && d->nextDoc != NULL; d = d->nextDoc)
      if(d->thisDoc->rawScore < d->nextDoc->thisDoc->rawScore) {
	Changed = TRUE;
	di = d->thisDoc;
	d->thisDoc = d->nextDoc->thisDoc;
	d->nextDoc->thisDoc = di;
      }
  }
}
      
DocList
findLast(dlist)
DocList dlist;
{
  DocList d;

  if(dlist == NULL || dlist->nextDoc == NULL) return dlist;
  for(d = dlist; d->nextDoc != NULL; d = d->nextDoc);
  return d;
}

DocumentID
findDoc(dlist, number)
DocList dlist;
int number;
{
  DocList d;
  int i;

  for(i = 0, d = dlist; (d != NULL) && (i < number); i++, d = d->nextDoc);

  if (d != NULL)
    return(d->thisDoc);
  else
    return(NULL);
}

DocumentID
fillDocumentID(info, source, i)
WAISSearchResponse  *info;
SourceID source;
int i;
{
  DocumentID docID;


  if((docID = (DocumentID) s_malloc(sizeof(_DocumentID))) != NULL) {
    docID->rawScore = info->DocHeaders[i]->Score;
    docID->start = docID->end = -1;
    if((docID->doc = (CRetDocument) s_malloc(sizeof(_CRetDocument))) != NULL) {
      docID->doc->sourceID = source;
      /*	  docID->doc->version = info->DocHeaders[i]->VersionNumber; */
      docID->doc->numLines = info->DocHeaders[i]->Lines;
      docID->doc->numChars = info->DocHeaders[i]->DocumentLength;
      docID->doc->best = info->DocHeaders[i]->BestMatch;

      if(info->DocHeaders[i]->Headline != NULL) {
	long length;
	if (docID->doc->headline != NULL) s_free(docID->doc->headline);
	docID->doc->headline = s_strdup(info->DocHeaders[i]->Headline);
	length = strlen(docID->doc->headline);
/*	delete_seeker_codes(docID->doc->headline, &length); */
      }
      if(info->DocHeaders[i]->Source != NULL) {
	if (docID->doc->source != NULL) s_free(docID->doc->source);
	docID->doc->source = s_strdup(info->DocHeaders[i]->Source);
      }
      if(info->DocHeaders[i]->OriginCity != NULL) {
	if(docID->doc->city != NULL) s_free(docID->doc->city);
	docID->doc->city = s_strdup(info->DocHeaders[i]->OriginCity);
      }
      if(info->DocHeaders[i]->Date != NULL) {
	if(docID->doc->date != NULL) s_free(docID->doc->date);
	docID->doc->date = s_strdup(info->DocHeaders[i]->Date);
      }
      if(info->DocHeaders[i]->Types != NULL) {
	if(docID->doc->type != NULL) s_free(docID->doc->type);
	docID->doc->type = s_strdup(info->DocHeaders[i]->Types[0]);
      }
      else {
	if(docID->doc->type != NULL) s_free(docID->doc->type);
	docID->doc->type = s_strdup("TEXT");
      }
      if((docID->doc->id = docIDFromAny(info->DocHeaders[i]->DocumentID))
	== NULL) {
	docID->doc->id = (DocID*)s_malloc(sizeof(DocID));
	docID->doc->id->originalLocalID =
	  copy_any(info->DocHeaders[i]->DocumentID);
      }
/*
      if(info->DocHeaders[i]->DocumentID != NULL)
	docID->doc->id->originalLocalID =
	  copy_any(info->DocHeaders[i]->DocumentID);
*/
    }
  }
  return docID;
}
