/* WIDE AREA INFORMATION SERVER SOFTWARE	
   No guarantees or restrictions.  See the readme file for the full standard 
   disclaimer.
   5.29.90	Harry Morris, morris@think.com
*/

/* this file is a server process for a unix machine that takes input from 
   standard in or from a socket and searches the local search engine on the 
   unix box.
   originally written by harry morris.
   modified by brewster kahle. 7/90
   6.xx.90	Brewster - initial implementation of stdio interface
   7.xx.90	Patrick Bray - support for headers and forking processes
   90.07.31	Ephraim - support for logging 

   91.03.03     Jonathan - set searchLog to log_out.
   91.05.23	Jonathan - added fork process for indexer.
                           Fixed version display so it exits.
   91.05.25     Jonathan - added setuid.
   
   Tue Jul  9 12:11:02 1991 -- Michael Haberler mah@wu-wien.ac.at

                Added semi-intelligent INFO database indexing (only done if
		any of the .src files is newer than INFO.dct)
		
		Locking against multiple concurrent INFO rebuilds if 
		running under inetd

		Use scandir() for directory operations

		Works under inetd as well as standalone. Here are my inetd.conf
   		entries (not the missing userid in the Ultrix inetd.conf!):

   hpux 7.0/800, Interactive/386 2.2.1:
	z3950 stream tcp nowait root /usr/local/etc/waisserver waisserver -s \
		-d /usr/logins/mah/wais-sources

   Ultrix 4.1:
	z3950 stream tcp nowait /usr/local/etc/waisserver waisserver -s \
		-d /usr/logins/mah/wais-sources

   Also, add the next line to /etc/services, and tickle your YP server:
	z3950           210/tcp         # wide area information server (wais)

 to do:
   - limit all requests to that directory is switch is set?
   - make it so it can be a deamon in rc.local and inetd and a login process
 */

#define SERVER_DATE "Fri Sep 13 1991"

#ifndef lint
static char *RCSid = "$Header: /afs/athena.mit.edu/astaff/project/wais/src/wais-8-b3/ir/RCS/server.c,v 1.2 91/09/20 15:14:56 epeisach Exp $";
#endif

#define INFO_DICT    "INFO.dct"
#define LOCKFILE    "/tmp/INFO.lock" /* while re-indexing INFO */
#define NAPTIME     1		     /* seconds */
#define MAXNAPTIME  60		/* wait up to a minute for indexer to finish */

#include "sockets.h"
#include <sys/types.h>
#include <sys/stat.h>
#ifdef ultrix
#include <sys/file.h>
#else
#ifdef USG
#include <fcntl.h>
#else
#include <sys/file.h>
#endif
#endif /* else ultrix */
#ifdef SYSV			
#define SIGCHLD SIGCLD
#endif
#include <signal.h>
#include <string.h>
#include "irdirent.h"
#include "panic.h"
#include "ustubs.h"
#include "transprt.h"
#include "wmessage.h"
#include "ir.h"
#include "wprot.h"
#include "cutil.h"
#include "futil.h"
#include "irext.h"

/* to create the INFO index */
#include "irtfiles.h"
#include "irfiles.h"
#include "irhash.h"
#include "version.h"

#define BUFSZ 30000     /* size of our buffer */
static long bufferSize = BUFSZ; /* how much we are using
                                   (we get one of these per process) */

char *log_file_name = NULL;

FILE *logfile; /* the logfile */

/*---------------------------------------------------------------------------*/

#define TIMEOUT_LENGTH 36000 /* ten hour timeout. */
#define IDLE_TIME "10 hours"

void
serve_client(in,out, index_directory)
FILE* in;
FILE* out;
char *index_directory;
{ 
  char buf[BUFSZ];		/* contains the message and header */
  char *bufPtr ;		/* points at the begining of the z3950 */
  long size;			/* bytes in the z3950 message */
  WAISMessage header;		/* for storing the header */
  long i;
  long bytesLeft;
  struct itimerval new, old;
  long nextChar;

  new.it_interval.tv_sec = 0;
  new.it_interval.tv_usec = 0;
  new.it_value.tv_sec = TIMEOUT_LENGTH;
  new.it_value.tv_usec = 0;

  getitimer(ITIMER_REAL, &old);
  while (TRUE)
    {
      /* try to read the header */
      for (i = 0; i < HEADER_LENGTH; i++)
	{ 
	  setitimer(ITIMER_REAL, &new, NULL);
	  nextChar = fgetc(in);
	  if (nextChar == EOF)	/* my connection exited, so will I */
	    { 
	      return;
	    }
	  else
	    buf[i] = (char)nextChar;
	}

      setitimer(ITIMER_REAL, &old, NULL);
      /* parse the header */
      readWAISPacketHeader(buf,&header);

      /* make sure we have the right version.  
	 If we dont, we dont know what to do. */
      if (header.hdr_vers > HEADER_VERSION)
	panic("Incompatable header versions (Current version: %d, supplied version: %d.", 
	      HEADER_VERSION, header.hdr_vers) ;

      /* determine the size of the z3950 message */
      {
	char length_array[11];
	strncpy(length_array, header.msg_len, 10);
	length_array[10] = '\0';
	size = atol(length_array);
      }

      /* set bufPtr to start the z3950 message */
      bufPtr = buf + HEADER_LENGTH ;

      /* read the z3950 message */
      for (i = 0; i < size ; i++) {
	setitimer(ITIMER_REAL, &new, NULL);
	buf[i + HEADER_LENGTH] = (char)fgetc(in) ;
      }

      rewind(in);

      /* decode the z3950 if necessary */
      transportDecode((long)header.encoding,bufPtr,&size);
     
      /* XXX handle compression options */

      /* process it the z3950 */
      bytesLeft = bufferSize;

      size = interpret_buffer(bufPtr,size,bufPtr,bytesLeft,
			      &bufferSize,(long)header.hdr_vers,
			      index_directory); 

      /* re-encode the message if necessary */
      transportCode((long)header.encoding,bufPtr,&size); 

      /* XXX handle compression options */

      /* write the new header */
      writeWAISPacketHeader(buf,size,
			    (long)header.msg_type,header.server,
			    (long)header.compression,(long)header.encoding,
			    (long)header.hdr_vers);

      /* write the whole response to the output file */
      for (i = 0; i < size + HEADER_LENGTH; i++)
	fputc(buf[i],out) ;

      fflush(out);		/* flush any file buffers */
      rewind(out);		/* reset the file for read */

    }
}

/*---------------------------------------------------------------------------*/

#ifndef ISC
static void breakKey _AP((long s1,long s2,struct sigcontext* s3,char* s4));
#endif

static void
breakKey (s1,s2,s3,s4)
long s1;
long s2;
struct sigcontext *s3;
char *s4;
{
  panic ("got a ^c");
}

/*---------------------------------------------------------------------------*/

void
childhandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  wait(NULL);			/* give the kid a decent burial */
}

/*---------------------------------------------------------------------------*/

void
alarmhandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_CLOSE,
	  "Server idle longer %s. Closing server and exiting.", IDLE_TIME);
  exit(0);
}

/*---------------------------------------------------------------------------*/

void
seghandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_CLOSE,
	  "Segmentation violation.  Bummer. Closing server and exiting.");
  exit(0);
}

/*---------------------------------------------------------------------------*/

void
bushandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_CLOSE,
	  "Bus error.  Bummer. Closing server and exiting.");
  exit(0);
}

/*---------------------------------------------------------------------------*/

#include <pwd.h>

int finduid(name)
char *name;
{
  struct passwd *pwent;

  if ((pwent = getpwnam(name)) == NULL) {
    return -1;
  }

  return(pwent->pw_uid);
}

static  char *index_dir = NULL;
static  time_t info_change_time;
static  int indexing_needed = 0;
static  char *info_dict = INFO_DICT;

extern int alphasort();

/* selecttion function for scandir()
 * trigger on ".src" extension, regular file, and != "INFO.src"
 * Indexing is needed if any of the .src files is younger than 
 * INFO.dct
 */
static int
srcfiles(e)
	struct dirent *e;
{
	struct stat sb;
	char *lastdot = strrchr(e->d_name,'.');
	int candidate;

	candidate =	lastdot && 
	      (stat(merge_pathnames(e->d_name,index_dir), &sb) >= 0) && 
	      ((sb.st_mode & S_IFMT) == S_IFREG) &&
	      !strcmp(lastdot,source_ext) && 
	      strcmp(e->d_name,info_dict); /* whew */

        if (candidate) {
	    indexing_needed |= (sb.st_mtime > info_change_time);
	    return 1;
	}
	return 0;
}


/*---------------------------------------------------------------------------*/

extern char *inet_ntoa _AP((struct in_addr));

void
main(argc,argv)
int argc;
char* argv[];
{ FILE *file;
  long socket;
  char *next_argument = next_arg(&argc, &argv), *command_name;
  boolean use_stdio = TRUE;		/* default is true */
  long tcp_port = 210;			/* tcp_port to use */
  /* char *log_file_name = NULL; */	/* name of file for error output */
  int child_proc;		/* for the child process id */
  char *uid_name = "root";	/* user id so setuid if root */
  int uid = 0;		/* if not specified, leave as root. */
  int child,lockfd;
  struct stat statbuf;
  struct dirent **list;
  int n_files,fd;    
  int naptime = 0;
  extern int errno;
  extern char *sys_errlist[];
  char host_name[255];

  command_name = next_argument;
  host_name[0] = 0;

  if (!strcmp(command_name, "waisserver.d")) {
    struct sockaddr_in source;
    int sourcelen;
#ifdef BSD
  struct in_addr {
    union {
      struct { u_char s_b1,s_b2,s_b3,s_b4; } S_un_b;
      u_long S_addr;
    } S_un;
  } addr_p;
#endif /* BSD */

  sourcelen = sizeof(struct sockaddr_in);

    if (!getpeername(fileno(stdout),&source,&sourcelen)) {
#ifdef BSD
      addr_p.S_un.S_addr = source.sin_addr.s_addr;
#endif				/* BSD */
  
      if(source.sin_family == AF_INET) {
#ifdef NOINETNTOA
	sprintf(host_name, "%d.%d.%d.%d",
#ifdef BSD
		addr_p.S_un.S_un_b.s_b1,
		addr_p.S_un.S_un_b.s_b2,
		addr_p.S_un.S_un_b.s_b3,
		addr_p.S_un.S_un_b.s_b4
#else
		source.sin_addr.S_un.S_un_b.s_b1,
		source.sin_addr.S_un.S_un_b.s_b2,
		source.sin_addr.S_un.S_un_b.s_b3,
		source.sin_addr.S_un.S_un_b.s_b4

#endif /* BSD */
		);
#else
 	sprintf(host_name, "%s", inet_ntoa(source.sin_addr));
#endif /* NOINETNTOA */
      }
    }
    else sprintf(host_name, "Error getting socket: %d, %s.", errno, sys_errlist[errno]);

    use_stdio = TRUE;
  }

  if (argc == 0){
    printf("Usage: %s [-p [port_number]] [-s] [-d directory] [-u user] [-v]\n",
	   command_name);
    printf(" -p [port] listen to the port.  If the port is supplied, then\n");
    printf("    that tcp_port number is used.  If it is not supplied \n");
    printf("    then the Z39.50 port (210) is used.\n");
    printf(" -d directory: means to use the directory as the source of databases.\n");
    printf("    Defaults to the current directory.\n");
    printf(" -e [file]: set log output to file, or /dev/null if not specified.\n");
    printf(" -s means listen to standard I/O for queries.  This is the default\n");
    printf(" -u user: if started as root, setuid to user after startup.\n");
    printf(" -v prints the version.\n");
    exit(1);
  }
  if(NULL == (next_argument = next_arg(&argc, &argv))){
    printf("No arguments specified\n");
    exit(0);
  }
  while((next_argument != NULL) &&
	('-' == next_argument[0])){
    /* then we have an argument to process */
    if (0 == strcmp("-p", next_argument)){
      char *peek_argument = peek_arg(&argc, &argv);
      use_stdio = FALSE;
      if ((NULL != peek_argument) && /* if we are not out of args */
	  ('-' != peek_argument[0])){ { /* and the next isn't an option... */
	    /* get the port number */
	    tcp_port = atoi(next_arg(&argc, &argv));
	  }			/* end if (explicit tcp_port) */
				    }
    }				/* end if (-p) */
    else if (0 == strcmp("-s", next_argument)){
      use_stdio = TRUE;
    }				/* end if (-s) */

    else if (0 == strcmp("-e", next_argument)) {
      char *peek_argument = peek_arg(&argc, &argv);
      log_file_name = "/dev/null"; /* default to /dev/null */
      if ((peek_argument != NULL) &&
	  ('-' != peek_argument[0])) {
	log_file_name = next_arg(&argc, &argv);
      }				/* end if (explicit log file) */
    }				/* end if (-e) */
    else if (0 == strcmp("-d", next_argument)) {
      index_dir = next_arg(&argc, &argv);
    }
    else if (0 == strcmp("-v", next_argument)) {
      printf("%s: %s, %s\n", command_name, VERSION, SERVER_DATE);
    }
    else if (0 == strcmp("-u", next_argument)) {
      uid_name = next_arg(&argc, &argv);
      if((uid = finduid(uid_name)) < 0)
	panic("Couldn't find user %s.", uid_name);
    }
    else{
      panic("Don't recognize the %s option", next_argument);
    }
    next_argument = next_arg(&argc, &argv);
  }				/* end while (more arguments) */

  if (use_stdio && log_file_name == NULL) 
    log_file_name = "/dev/null";

  if (log_file_name == NULL) 
    logfile = stderr;
  else logfile = NULL;
  
  index_dir = index_dir ? index_dir : ".";  
  info_dict = s_strdup(merge_pathnames(info_dict,index_dir));
  
  /* remember timestamp on INFO.dct if rebuilding needed 
   * If it doesnt exist, it's assumed to be *very* old, to force
   * re-indexing
   */
  info_change_time = (stat(info_dict,&statbuf) == -1) ? 0 : statbuf.st_mtime;
  
  /* compare with candidates */

  if ((n_files = scandir(index_dir, &list, srcfiles, alphasort)) < 0) {
      waislog(WLOG_HIGH, WLOG_ERROR, 
	      "Error: reading directory %s, %s", 
	      index_dir, sys_errlist[errno]);
      indexing_needed = FALSE;
  }
  
  /* ok. we know if we need indexing, 
   * and have all the filenames. 
   */
  
  if (info_change_time == 0) indexing_needed = TRUE;
  if (indexing_needed) {

    /* Time to re-index,
     * aquire the lock 
     */
    waislog(WLOG_MEDIUM, WLOG_INDEX,
	    "re-indexing needed, info_change_time=%d",info_change_time); 

    if (( fd = open(LOCKFILE, O_WRONLY|O_CREAT|O_EXCL,0666)) == -1) {
	  
      /* already locked by somebody else
       * spin  till she finishes
       */
      while (!(stat(LOCKFILE,&statbuf) == -1)) {
	sleep(NAPTIME);
	naptime += NAPTIME;
	waislog(WLOG_MEDIUM, WLOG_INFO,
		"INFO locked, waiting since %d seconds", naptime);
	if (naptime  > MAXNAPTIME)  {

	  waislog(WLOG_HIGH, WLOG_ERROR,
		  "Warning - lockfile %s won't go away after %d seconds, not reindexing.", 
		  LOCKFILE, naptime);
	  break;
	}
      }
      /* if lockfile went away, assume INFO.* build finished
       * so just use it
       */
    } else {			/* we aquired the lock, so rebuild database  */
	  
      if (!(child = fork())) {
	database *db;
	struct dirent **s = list;
	char filename[MAX_FILENAME_LEN];
	      
	waislog(WLOG_MEDIUM, WLOG_INDEX,
		"Creating INFO database, pid=%d",getpid());
	db = openDatabase(merge_pathnames("INFO",	index_dir),
			  true, /* maybe this should append XXX */
			  false);
	db->the_word_memory_hashtable =
	  init_word_memory_hashtable(1L<<16, 100000L, db->the_word_memory_hashtable);
	      
	while (*s) {		/* index it */
	  strncpy(filename, index_dir, MAX_FILENAME_LEN);
	  if(index_dir[strlen(index_dir) -1] != '/')
	    strncat(filename, "/", MAX_FILENAME_LEN);
	  strncat(filename, (*s)->d_name, MAX_FILENAME_LEN);
	  waislog(WLOG_MEDIUM, WLOG_INDEX,
		  "Indexing %s", filename);
	  index_text_file(filename, NULL, NULL, NULL, 
			  NULL, "WSRC", db, true, false);
	  s++;
	}
	freedir(list);		/* array of filenames */
	      
	if(!probe_file(source_filename(filename, db)))
	  write_src_structure(source_filename(filename, db),
			      "INFO", "WSRC", NULL, 0L, true, tcp_port);
	finished_add_word(db);
	closeDatabase(db);
	if (unlink(LOCKFILE))
	  panic("Indexer: cant unlink lockfile!\n");
	waislog(WLOG_MEDIUM, WLOG_INDEX,
		"Indexer pid=%d done", getpid());
	      
	exit(0);		/* indexing child */

      }  else if (child == -1) {
	waislog(WLOG_HIGH, WLOG_ERROR,
		"Unable to fork for indexer.");
	exit(1);
      }
      /* wait for child process */
      else while (wait(0) != child) ; /* do nothing */
    }
  }


  if (use_stdio == TRUE) {
    if(host_name[0] != 0) {
      waislog(WLOG_MEDIUM, WLOG_CONNECT,
	      "Accepted connection from: %s. %s", host_name, VERSION);
    }
    else {
      waislog(WLOG_MEDIUM, WLOG_CONNECT,
	      "Couldn't determine peer connection. %s", VERSION);
    }
  }
  else {waislog(WLOG_MEDIUM, WLOG_INFO, "Running server %s", VERSION);}

  signal(SIGINT, breakKey);

  signal(SIGCHLD, childhandler);  	/* XXX dont really need this any more */
  signal(SIGALRM, alarmhandler);

  signal(SIGSEGV, seghandler);

  signal(SIGBUS, bushandler);

  if(use_stdio == FALSE)
   { open_server(tcp_port,&socket,BUFSZ);

#ifdef SECURE_SERVER
     /* if root, setuid to user specified id. */
     if (uid > 0 && getuid() == 0)  {
       waislog(WLOG_MEDIUM, WLOG_INFO,
	       "Setting uid to %s.", uid_name);
       if ( 0 > setuid(uid)) {
	 waislog(WLOG_HIGH, WLOG_ERROR,
		 "Unable to setuid to %s!  Exiting.", uid_name);
	 exit(-1);
       }
     }
#endif
     while (TRUE) { /* be a server for several connections */
       accept_client_connection(socket,&file);
	  
       if ((child_proc = fork()) == 0) {
	      
	      /* grandson handles this connection
	       * double-fork takes care of zombies 
	       */
	      if ((child_proc = fork()) == 0) { 
		  serve_client(file, file, index_dir);
		  /* but leaves server up */
		  close_client_connection(file);
		  close_server(socket);
		  /* just exits this child */
		  waislog(WLOG_MEDIUM, WLOG_CLOSE,
			  "Done handling client");
		  exit(0);
	      } else {
		  /* son: orphans the grandchild, so init picks up 
		   * the exit status
		   */
		  exit(0);
	      }
          } else {
	      waislog(WLOG_MEDIUM, WLOG_INFO,
		      "Child PID = %d", child_proc);
	      close_client_connection(file);     /* parent shouldn't keep the file */
	  }
      }
   }
  else if(use_stdio == TRUE)
   { /* connections on stdio don't use child processes yet */
     serve_client(stdin, stdout, index_dir);
     waislog(WLOG_MEDIUM, WLOG_CLOSE,
	     "Done handling client");
      /* close the whole thing */
     exit(0);
   }
}

/*---------------------------------------------------------------------------*/

