/**
 ** scanner.c
 **
 ** Copyright 1990, 1991 by Randy Sargent.
 **
 ** The author hereby grants to MIT permission to use this software.
 ** The author also grants to MIT permission to distribute this software
 ** to schools for non-commercial educational use only.
 **
 ** The author hereby grants to other individuals or organizations
 ** permission to use this software for non-commercial
 ** educational use only.  This software may not be distributed to others
 ** except by MIT, under the conditions above.
 **
 ** Other than these cases, no part of this software may be used or
 ** distributed without written permission of the author.
 **
 ** Neither the author nor MIT make any representations about the 
 ** suitability of this software for any purpose.  It is provided 
 ** "as is" without express or implied warranty.
 **
 ** Randy Sargent
 ** Research Specialist
 ** MIT Media Lab
 ** 20 Ames St.  E15-301
 ** Cambridge, MA  02139
 ** E-mail:  rsargent@athena.mit.edu
 **
 **/


#define SCANNER_MODULE

#include "util.h"
#include "scanner.h"

/*-------------------------------------------------------------------------*/
/* Private Constants                                                       */

/* #define STAND_ALONE */

#define SCANNER_EOF 256
#define SCANNER_BUFFER_SIZE 1024
#define SCANNER_MAX_UNGET_TOKENS 10

/*-------------------------------------------------------------------------*/
/* Private Variables                                                       */

/* Each of the following tables describe a class of characters.  A character
   is a memeber of a class if it's entry in the table is TRUE */

static Bool ch_white_space[257];   /* Whitespace.  Delimits all tokens */
static Bool ch_singles[257];       /* Single character tokens.  Delimits numbers
				      and symbols */
static Bool ch_start_sym[257];     /* Characters which can start a symbol */
static Bool ch_continue_sym[257];  /* Characters which can continue a symbol */
static Bool ch_digits[257];        /* The digits (0-9) */

/* The token buffer.  This buffer holds the current token which has been read.
   The token buffer is null-terminated */

char *token_buffer;
Int  token_buffer_loc;
Int  token_buffer_max_size;

/*-------------------------------------------------------------------------*/
/* Private Functions                                                       */

/* 0 if successful */
Int scanner__install(char **table, char *characters)
{
    Int i;
    *table= malloc(257);
    if (!*table) return 1;
    for (i= 0; i< 256; i++)        *table[i]= FALSE;
    for (i= 0; characters[i]; i++) *table[characters[i]]= TRUE;
    return 0;
}

void scanner__init_globals(void)
{
    if (!token_buffer) {
	token_buffer_max_size= SCANNER_BUFFER_SIZE;
	token_buffer= malloc(token_buffer_max_size);
    }
}

#define SCANNER_GET_CHAR(scanner) \
  ( *((scanner)->buffer_ptr) ?    \
    *((scanner)->buffer_ptr++) : scanner_get_char(scanner) )

/* Implements only one character of backup */

#define SCANNER_UNGET_CHAR(scanner, ch) \
    if (ch != SCANNER_EOF) scanner->buffer_ptr--; else
  
static INLINE Int scanner_get_char(Scanner *scanner)
{
    if (scanner->buffer_ptr - scanner->buffer >= scanner->buffer_size) {
	if (scanner->stream) {
	    scanner->buffer_loc= 0;
	    scanner->buffer_size= fread(scanner->buffer, 1,
					scanner->buffer_max_size,
					scanner->stream);
	    if (scanner->buffer_size == 0) return SCANNER_EOF;
	} else {
	    return SCANNER_EOF;
	}
    }
    
    return scanner->buffer[scanner->buffer_loc++];
}

static INLINE void scanner_unget_char(Scanner *scanner, Int ch)
{
}

void INLINE scanner_put_char(Scanner *scanner, Int ch)
{   /* scanner not used */
    if (token_buffer_loc >= token_buffer_max_size) {
	token_buffer_max_size *= 2;
	realloc(&(void*)token_buffer, token_buffer_max_size);
    }
    token_buffer[token_buffer_loc++]= (char) ch;
}

/*-------------------------------------------------------------------------*/
/* Public Functions                                                        */

/* 0 if successful */
Int scanner_init(Scanner *s, FILE *stream, char *string, 
		    char *white_space, char *singles,
		    char *start_sym, char *continue_sym)
{
    char *digits= "0123456789";

    scanner__init_globals();

    if (stream && string) die(("Both stream and stream set in scanner_init"));
    if (!stream && !string) die(("Neither stream nor string set in scanner_init"));

    s->stream= stream;
    if (string) {
	s->buffer_max_size= strlen(string);
	s->buffer= string;
	s->buffer_ptr= string;
    }
    else {
	s->buffer_max_size= SCANNER_BUFFER_SIZE;
	s->buffer= malloc(SCANNER_BUFFER_SIZE + 1);
	s->buffer_ptr= s->buffer;
	s->buffer[0]= 0;
	if (!s->buffer) return 1;
    }
    if (queue_init(&s->unget, SCANNER_MAX_UNGET_TOKENS, sizeof(Token))) return 1;
    if (scanner__install(&s->white_space,  white_space  )) return 1;
    if (scanner__install(&s->singles,      singles      )) return 1;
    if (scanner__install(&s->start_sym,    start_sym    )) return 1;
    if (scanner__install(&s->continue_sym, continue_sym )) return 1;
    if (scanner__install(&s->digits,       digits       )) return 1;
    return 0;
}

/* 0 if successful, EOF if end-of-file */
Token *scanner_get_token(Scanner *scanner)
{
    static Token t;
    Int c;
    
    token_buffer_loc= 0;
    
    /* Skip whitespace */
    while (ch_white_space[ c= scanner_get_char(scanner) ]);
    
    scanner_put_char(scanner, c);
    
    /* Determine type of token */
    
    if (ch_start_sym[c]) {        /* Symbol */
	token_id= token_symbol_id;
	while (ch_continue_sym[ c= scanner_get_char(scanner) ])
	  scanner_put_char(scanner, c);
	scanner_unget_char(scanner, c);
    }
    
    else if (ch_singles[c]) {    /* Single */
	token_id= token_single_id;
    }
    
    else if (ch_digits[c]) {     /* Digits */
	token_id= token_integer_id;
	while (ch_digits[ c= scanner_get_char(scanner) ])
	  scanner_put_char(scanner, c);
	scanner_unget_char(scanner, c);
    }
    
    else if (c == SCANNER_EOF) {
	token_id= token_eof_id;
    }
    
    else {
	token_id= token_error_id;
    }
    
    scanner_put_char(scanner, 0);
    *ret= token_buffer;
    return token_id;
}

/*-------------------------------------------------------------------------*/
/* Stand-alone test code                                                   */

#ifdef STAND_ALONE
void scanner_test(Scanner *foo)
{
    while (1) {
	char      *token;
	Token_id token_id= scanner_get_token(foo, &token);
	
	switch (token_id) {
	  case token_eof_id:
	    printf("End of scanner\n");
	    return;
	  case token_symbol_id:
	    printf("Symbol >%s<\n", token);
	    break;
	  case token_single_id:
	    printf("Single >%s<\n", token);
	    break;
	  case token_integer_id:
	    printf("Integer >%s<\n", token);
	    break;
	  case token_error_id:
	    printf("Error >%s<\n", token);
	    break;
	  default:
	    die(("Illegal token type %d", token_id));
	}
    }
}

NOPROTO main() /*__ no proto */
{
    char *to_parse= "a b c def ghijkl mnopqr 12~34 5768,foo<bar";
    Scanner *foo;
    
    scanner_init(
		 /* whitespace */
		 " \t\r\n",    
		 /* single char token */
		 "<>,()",      
		 /* begin symbol */
		 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_",
		 /* continue symbol */
		 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
		 );
    
    foo= scanner_create_from_string(to_parse, strlen(to_parse));
    
    scanner_test(foo);
}
#endif
