/*
 *[C] The Regents of the University of Michigan and Merit Network, Inc.1993 
 *All Rights Reserved 
 *  
 *  Permission to use, copy, modify, and distribute this software and its 
 *  documentation for any purpose and without fee is hereby granted, provided 
 *  that the above copyright notice and this permission notice appear in all 
 *  copies of the software and derivative works or modified versions thereof, 
 *  and that both the copyright notice and this permission and disclaimer 
 *  notice appear in supporting documentation. 
 *   
 *   THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
 *   EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF 
 *   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE REGENTS OF THE 
 *   UNIVERSITY OF MICHIGAN AND MERIT NETWORK, INC. DO NOT WARRANT THAT THE 
 *   FUNCTIONS CONTAINED IN THE SOFTWARE WILL MEET LICENSEE'S REQUIREMENTS OR 
 *   THAT OPERATION WILL BE UNINTERRUPTED OR ERROR FREE. The Regents of the 
 *   University of Michigan and Merit Network, Inc. shall not be liable for any 
 *   special, indirect, incidental or consequential damages with respect to any 
 *   claim by Licensee or any third party arising from use of the software. 
 */

extern "C" {
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
}
#include "Node.h"
#include "typedefs.h"
#include "parser.tab.h"

#define DPRINTF if (debug) printf

static int debug = 0;
char *lex_buf;

typedef struct _Word {
	char *val;
	int num;
} Word;

static Word words[] = {
"AND",    AND_TOK,
"and",    AND_TOK,
"OR",     OR_TOK,
"or",     OR_TOK,
"NOT",    NOT_TOK,
"not",    NOT_TOK,
"from",   FROM_TOK,
"accept", ACCEPT_TOK,
"ANY",    ANY_TOK,
//"to",     TO_TOK,
//"transit",TRANSIT_TOK,
//"exclude",EXCLUDE_TOK,
NULL,     -1
};

/*
 * NOTE: It is interesting that the StateTransition table below needs to have
 * a column indicating whether or not another character should be read
 * after the current character.  The problem is that some tokens can be
 * recognized as soon as their last character is seen; i.e. parentheses,
 * commas, etc. In order to recognize other tokens, one must read one character
 * past the last one comprising the token. If we increment our
 * pointer to look at a heretofore unseen character after every state change,
 * then we can have situations where we miss the beginning of a token.
 */
typedef struct _State {
    int current_state;
    int next_state;
    int increment;            // Read the next char? See NOTE above.
    int token_type;           // -1's indicate 'don't care'.
    char *input;              // Null ptr indicates default action.
} State, *StatePtr;

#define END  255
#define end_of_input 0
#define cn_or_key    500
#define rsvd_word    501

/****************************************************************************
 * Global variables.  This time I'm afraid that they're a necessary evil.   *
 ****************************************************************************/
int lex_pos = 0;              /* Position in strings being parsed. */
int start_token = 0;          /* Start of current token.           */

State TransitionTable[] = {
 0, 0, 1,  -1,           " \t",
 0, 1, 1,  -1,           "BCDEFGHIJKLMOPQRSTUVWXYZ",
 0, 2, 1,  -1,           "A",
 0, 6, 1,  -1,           "0123456789",
 0, 7, 1,  -1,           "abcdefghijklmnopqrstuvwxyz",
 0, 8, 1,  -1,           "N",
 0,29, 1,  -1,           "<",
 0,END,1,  (int)'(',     "(",
 0,END,1,  (int)')',     ")",
 0,END,1,  (int)'{',     "{",
 0,END,1,  (int)'}',     "}",
 0,END,1,  (int)',',     ",",
 0,END,0,  end_of_input, "+",   // Remember, + codes for '\0'.
 0,END,1,  ERR_TOK,       NULL,
 1, 1, 1,  -1,           "ABCDEFGHIJKLMNOPQRSTUVWXYZ_-",
 1,END,0,  cn_or_key,    "+(){}\t ",
 1,END,1,  ERR_TOK,       NULL,
 2, 3, 1,  -1,           "S",
 2, 1, 0,  -1,           "ABCDEFGHIJKLMNOPQRTUVWXYZ-_+",
 2,END,0,  cn_or_key,    " ",
 2,END,1,  ERR_TOK,       NULL,
 3, 4, 1,  -1,           "-",
 3, 5, 1,  -1,           "0123456789",
 3,END,1,  ERR_TOK,       NULL,
 4, 4, 1,  -1,           "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
 4,END,0,  ASMACRO_TOK,  " (){}\t+",
 4,END,1,  ERR_TOK,       NULL,
 5, 5, 1,  -1,           "0123456789",
 5,END,0,  ASNUM_TOK,    " (){}\t+",
 5,END,1,  ERR_TOK,       NULL,
 6, 6, 1,  -1,           "0123456789",
 6,23, 1,  -1,           ".",
 6,25, 1,  -1,           "/",
 6,END,0,  NUM_TOK,      " (){}\t+",
 6,END,1,  ERR_TOK,       NULL,
 7, 7, 1,  -1,           "abcdefghijklmnopqrstuvwxyz",
 7,END,0,  rsvd_word,    " (){}\t+",
 7,END,1,  ERR_TOK,       NULL,
 8, 9, 1,  -1,           "S",
 8, 1, 0,  -1,           "ABCDEFGHIJKLMNOPQRTUVWXYZ+",
 8,END,1,  ERR_TOK,       NULL,
 9,10, 1,  -1,           "F",
 9, 1, 0,  -1,           "ABCDEGHIJKLMNOPQRSTUVWXYZ+",
 9,END,1,  ERR_TOK,       NULL,
10,11, 1,  -1,           "_",
10, 1, 0,  -1,           "ABCDEFGHIJKLMNOPQRSTUVWXYZ+",
10,END,1,  ERR_TOK,       NULL,
11,12, 1,  -1,           "D",
11,END,1,  ERR_TOK,       NULL,
12,13, 1,  -1,           "B",
12,END,1,  ERR_TOK,       NULL,
13,14, 1,  -1,           "{",
13,END,1,  ERR_TOK,       NULL,
14,14, 1,  -1,           " \t",
14,15, 1,  -1,           "a",
14,END,1,  ERR_TOK,       NULL,
15,16, 1,  -1,           "s",
15,END,1,  ERR_TOK,       NULL,
16,17, 1,  -1,           "l",
16,END,1,  ERR_TOK,       NULL,
17,18, 1,  -1,           "i",
17,END,1,  ERR_TOK,       NULL,
18,19, 1,  -1,           "s",
18,END,1,  ERR_TOK,       NULL,
19,20, 1,  -1,           "t",
19,END,1,  ERR_TOK,       NULL,
20,20, 1,  -1,           " \t",
20,21, 1,  -1,           "=",
20,END,1,  ERR_TOK,       NULL,
21,22, 1,  -1,           "=",
21,END,1,  ERR_TOK,       NULL,
22,22, 1,  -1,           " \t",
22,27, 1,  -1,           "0123456789",
22,28, 1,  -1,           "*",
22,END,1,  ERR_TOK,       NULL,
23,24, 1,  -1,           "0123456789",
23,END,1,  ERR_TOK,       NULL,
24,23, 1,  -1,           ".",
24,24, 1,  -1,           "0123456789",
24,25, 1,  -1,           "/",
24,END,1,  ERR_TOK,       NULL,
25,26, 1,  -1,           "0123456789",
25,END,1,  ERR_TOK,       NULL,
26,26, 1,  -1,           "0123456789",
26,END,0,  PRFMSK_TOK,   " ,\t}+",
26,END,1,  ERR_TOK,       NULL,
27,27, 1,  -1,           "0123456789:() ",
27,END,1,  DBSEL_TOK,    "}",
27,END,1,  ERR_TOK,       NULL,
28,28, 1,  -1,           " \t",
28,END,1,  DBSEL_TOK,    "}",
28,END,1,  ERR_TOK,       NULL,
29,29, 1,  -1,           " _\t",
29,30, 1,  -1,           "A",
29,36, 1,  -1,           "^",
29,END,1,  ERR_TOK,       NULL,
30,31, 1,  -1,           "S",
30,END,1,  ERR_TOK,       NULL,
31,32, 1,  -1,           "0123456789",
31,END,1,  ERR_TOK,       NULL,
32,32, 1,  -1,           "0123456789",
32,33, 1,  -1,           " _\t",
32,END,1,  ERR_TOK,       NULL,
33,33, 1,  -1,           " _\t",
33,34, 1,  -1,           ".",
33,END,1,  ERR_TOK,       NULL,
34,34, 1,  -1,           " _\t",
34,35, 1,  -1,           "*",
34,END,1,  ASPATH_TOK,   ">",
34,END,1,  ERR_TOK,       NULL,
35,35, 1,  -1,           " _\t",
35,END,1,  ASPATH_TOK,   ">",
35,END,1,  ERR_TOK,       NULL,
36,36, 1,  -1,           " _\t",
36,37, 1,  -1,           "A",
36,END,1,  ERR_TOK,       NULL,
37,38, 1,  -1,           "S",
37,END,1,  ERR_TOK,       NULL,
38,39, 1,  -1,           "0123456789",
38,END,1,  ASPATH_TOK,   ">",
38,END,1,  ERR_TOK,       NULL,
39,39, 1,  -1,           "0123456789",
39,35, 1,  -1,           " _\t",
39,END,1,  ASPATH_TOK,   ">",
39,END,1,  ERR_TOK,       NULL,
-1, -1, -1, -1,       NULL    /* End of Table Marker */
};

// My own strndup.  To make sure that all memory allocated is via the 'new'
// operator.
static char *strndup(char *s, int len)
{
    char *new_p;

	new_p = new char[len+1];
	memset( (void*) new_p, (int) 0, len+1 );
	strncpy(new_p, s, len);
	return( new_p );
}

static int my_strchr(char *string, char c) 
{
	int rc = 0;

	for (; *string; string++)
		if ( (*string == '+' && c == '\0' ) || ( *string == c ) ) {
			rc = 1;
			break;
		}

	return( rc );
}

// Treat underscores as spaces. Get rid of leading and trailing spaces.
// Compress multiple spaces.  If first non-white space char is ^, get rid of
// it too.

static char *normalize( char *str1 )
{
	int s, t, state = 0;
	char *str;

	str = strstr( str1, "AS" );
	if ( !str )
		str = str1;
	else
		str += 2;

	s = t = 0;

	while( str[ t ] == ' ' || str[ t ] == '_' )
		t++;

	while ( str[ t ] ) {
		switch ( state ) {
			case 0:
				if ( str[ t ] != '_' && str[ t ] != ' ' && str[ t ] != '^' ) {
					str[ s++ ] = str[ t ];
					state = 1;
				}	
				break;

			case 1:
				if ( str[ t ] == ' ' || str[ t ] == '_' ) {
					str[ s++ ] = ' ';
					state = 2;
				}
				else
					str[ s++ ] = str[ t ];
				break;

			case 2:
				if ( str[ t ] != '_' && str[ t ] != ' ' ) {
					str[ s++ ] = str[ t ];
					state = 1;
				}	
				break;

			default:
				cerr << "Error:" << __FILE__ << "(" << __LINE__ << "):";
				cerr << "Unknown state!" << endl;
				abort(); 
		}

		t++;
	}

	if ( str[ s-1 ] == ' ' )
		str[ s-1 ] = '\0';
	else
		str[ s ] = '\0';

	// Now shift the string back to the beginning of the array.  If we hand
	// back a pointer which points into the middle of memory we got from a
	// 'new' call, and that pointer is subsequently handed to delete[], 
	// delete will choke.
	char *new_str;
	new_str = strndup( str, strlen(str) );
	return( new_str );
}

int get_tok_num( char *string )
{
	int i;

	for( i = 0; words[i].val; i++)
		if ( !strcmp( words[i].val, string ) )
			break;

	return( words[i].num );
}

int yylex( void )
{
    int i,
        token_type = -1,
        next_state =  0,
        state      =  0;
    char c,
         buf[100];

	DPRINTF("   lex_pos:%d lex_buf:'%s'\n", lex_pos, lex_buf);
	
	if ( lex_buf[lex_pos] == '\0' )
	{
		lex_pos = 0;
		return( 0 );  /* Done parsing all input. */
	}

	for(start_token = lex_pos; state != END; lex_pos += TransitionTable[i].increment ) {
		DPRINTF("   State: %d lex_pos:%d char:'%c' ", state, lex_pos, lex_buf[lex_pos]);

		/*
		 * State 0 is the state in which all parsing begins.  If we parse some
		 * garbage and get sent back to state 0, then we need to reset
		 * start_token to point to the beginning of the next thing we will try to
		 * parse.
		 */
		if ( state == 0 )
			start_token = lex_pos;

		/* Find current state in TransitionTable */
		for ( i = 0; TransitionTable[i].current_state >= 0; i++ )
			if ( TransitionTable[i].current_state == state )
				break;

		if ( TransitionTable[i].current_state < 0 ) {
			fprintf(stderr,"Unknown state: %i\n", state); 
			abort();
		}


		// Loop through the rules for this particular state, looking for the
		// rule that applies to our current input, c.
		for (; TransitionTable[i].current_state == state; i++) {
			c = lex_buf[lex_pos];

			if ( !TransitionTable[i].input || 
			                          my_strchr(TransitionTable[i].input, c)) {
				token_type = TransitionTable[i].token_type;
				next_state = TransitionTable[i].next_state;
				break;
			}
		}

		DPRINTF("Next State: %d Token:%d\n", next_state, token_type);
		state = next_state;
	}


	// Note that the lexer allows prefix/masks which aren't exactly correct.
	// It will accept 777.333/2500.  To make this code tighter, added a routine
	// that get's called from the case statement below, to check what's been
	// accepted by the state machine.  It's good enough for now.

	switch ( token_type ) {
		case ASPATH_TOK:
			// The '-2' in the strndup call below is to get rid of the angle brackets
			// the enclose the AS path clause.
			char *tmp;
			tmp = strndup(&(lex_buf[start_token+1]), lex_pos-start_token-2);
			yylval.val = normalize( tmp );
			delete[] tmp;

			break;

		case PRFMSK_TOK:
		case NUM_TOK:
		case ASNUM_TOK:
		case ASMACRO_TOK:
			yylval.val = strndup(&(lex_buf[start_token]), lex_pos - start_token );
			break;

		case cn_or_key:
			memset( (void*) buf, (int) 0, sizeof(buf) );
			strncpy( buf, &(lex_buf[start_token]), lex_pos - start_token );
			token_type = get_tok_num( buf );

			if ( token_type < 0 ) {
				token_type = CNAME_TOK;
				yylval.val = strndup(&(lex_buf[start_token]), lex_pos - start_token );
			}

			break;
		case rsvd_word:
			memset( (void*) buf, (int) 0, sizeof(buf) );
			strncpy( buf, &(lex_buf[start_token]), lex_pos - start_token );
			token_type = get_tok_num( buf );

			if ( token_type < 0 ) {
				token_type = ERR_TOK;
				yylval.val = (char*) NULL;
			}
			break;

		case DBSEL_TOK:

			// start_token points to string which looks like 
			// "NSF_DB{ aslist == <value> }"
			// yylval for this token should be "<value>".
			// Scan this string and make the beginning the first non-whitespace char
			// after the '=='.  Chop of the final '}' and any whitespace before it.

			char *p, *start;

			// Position start right after '=='.
			start = strstr( &(lex_buf[start_token]), "==") + 2;
			for(; isspace(*start); start++)
	    		continue;

			p = strchr( start, '}') - 1;
			for(; isspace(*p); p--)
				continue;

			p++;
			yylval.val = strndup( start, p - start );
			break;

		case ERR_TOK:
		case end_of_input:
		default:
			break;
	}

	if ( token_type == 0 )
		lex_pos = 0;

	return( token_type );
}
