/*
 * Copyright 1990 by Baylor College of Medicine ALL RIGHTS RESERVED. 
 *
 * This program is subject to a license agreement between 
 * Baylor College of Medicine and MIT. Any use inconsistent with
 * said license and any use by persons other than the faculty, 
 * students and staff at MIT or any use on a computer not operated 
 * as part of the Athena Computing Environment (ACE) is expressly 
 * prohibited.
 */
/*
 *		This file contains the routines necessary to construct Determinstic
 *		finite automata using simple algebraic operations such
 *		as union, concatenation, complementation, etc.
 *
 *		Author:
 *			Eric Taylor
 *			Baylor College of Medicine
 *
 */
#include <stdio.h>
#include <etrealloc.h>
#include <dfa.h>

/*
 *	For concatenation and closure, a state list represents a state in the new dfa.
 */
typedef struct hash *Hash ;
struct hash {
	int q ;
	Int_List sl ;
	Hash next ;
} ;

#define MAX_HASH 5003
static Hash entries_sl[MAX_HASH] ;

static
Dfa
make_dfa(label)
{
	Dfa d = (Dfa)etmalloc(sizeof *d) ;
	d->label = label ;
	d->n_states = 0 ;
	d->states = NULL ;
	return d ;
}

static
add_state_to_dfa(d,is_final)
	Dfa d ;
{
	int q = d->n_states++ ;
	d->states = (State *)etrealloc(d->states,d->n_states * sizeof(State)) ;
	d->states[q].is_final = is_final ;
	return q ;
}

/*
 *	Uses quick sort to sort a list of integers
 */
static
qsort_sl(f,l)
	int *f ;
	int *l ;
{
	if (f < l)
	{
		int *i = f ;
		int *j = l ;
		int a = 0 ;
		int n = 0 ;

		/* Use average as approximation for median */
		while(i < j)
		{
			a += *i++ ;
			n++ ;
		}
		a /= n ;

		/* Now paritions into < avg and >= avg */
		i = f ;
		while(i <= j)
		{
			while(*j >= a && j >= f) j-- ;
			while(*i < a && i <= l) i++ ;
			if (i < j)
			{
				int t = *i ;
				*i = *j ;
				*j = t ;
				i++ ;
				j-- ;
			}
		}

		/* Now parition into < avg, avg, and > avg */
		{
			int *k ;
			for(k = j + 1; k <= l; k++)
			{
				if (*k == a)
				{
					j++ ;
					*k = *j ;
					*j = a ;
				}
			}
		}

		/* Sort < avg */
		qsort_sl(f,i - 1) ;

		/* Sort > average */
		qsort_sl(j + 1,l) ;
	}
}

/*
 *	Sorts a state list
 */
static
qsort_SL(sl)
	Int_List *sl ;
{
	qsort_sl(sl->lst,sl->lst + sl->n - 1) ;
}

/*
 *	Removes any states that are duplicated in a state list.
 */
static
remove_same(sl)
	Int_List *sl ;
{
	if (sl->n)
	{
		int i ;
		int j = 0 ;
		sl->lst[j++] = sl->lst[0] ;
		for(i = 1; i < sl->n; i++)
		{
			if (sl->lst[j - 1] != sl->lst[i])
			{
				sl->lst[j++] = sl->lst[i] ;
			}
		}
		sl->n = j ;
	}
}

/*
 *		Empties state pair hash table.
 */
static
empty_hash()
{
	int i ;
	for(i = 0; i < MAX_HASH; i++)
	{
		Hash h = entries_sl[i] ;
		while(h != NULL)
		{
			Hash next = h->next ;
			etfree((char *)h->sl.lst) ;
			etfree((char *)h) ;
			h = next ;
		}
		entries_sl[i] = NULL ;
	}
}

/*
 *	Hashes a state list.
 */
static
Hash *
hash(sl)
	Int_List *sl ;
{
	unsigned sum = 0 ;
	int i ;
	for(i = 0; i < sl->n; i++)
	{
		sum = sum * 3 + sl->lst[i] ;
	}
	return entries_sl + sum % MAX_HASH ;
}

/*
 *		Finds a state list.
 */
static
find_hash_sl(sl)
	Int_List *sl ;
{
	Hash h = *(hash(sl)) ;
	while(h != NULL)
	{
		if (sl->n == h->sl.n)
		{
			int i ;
			for(i = 0; i < sl->n; i++)
			{
				if (sl->lst[i] != h->sl.lst[i])
				{
					goto nxt ;
				}
			}
		}
		return h->q ;
		nxt:
		h = h->next ;
	}
	return -1 ;
}

/*
 *		Finds a state tuple.
 */
static
find_hash(q1,q2)
{
	static int qs[2] ;
	static Int_List il = {qs,2} ;
	qs[0] = q1 ;
	qs[1] = q2 ;
	return find_hash_sl(&il) ;
}

/*
 *		Adds a state list to hash table.
 */
static
add_hash_sl(sl,d,is_final)
	Int_List *sl ;
	Dfa d ;
{
	int q = add_state_to_dfa(d,is_final) ;
	Hash *hsh = hash(sl) ;
	Hash h = (Hash)etmalloc(sizeof *h) ;
	copy_sl(&h->sl,sl) ;
	h->q = q ;
	h->next = *hsh ;
	*hsh = h ;
	return q ;
}

/*
 *		Adds a state pair to hash table.
 */
static
add_hash(q1,q2,d,is_final)
	Dfa d ;
{
	static int qs[2] ;
	static Int_List sl = {qs,2} ;
	qs[0] = q1 ;
	qs[1] = q2 ;
	return add_hash_sl(&sl,d,is_final) ;
}

static
apply_bool_op(a,b,bool_op)
	unsigned long bool_op ;
{
	return (1 << ((a ? 2 : 0) | (b ? 1 : 0))) & bool_op ;
}

/*
 *	Makes a copy of a state, negating is_final flag.
 */
static
copy_state(d1,q1,d)
	Dfa d1 ;
	Dfa d ;
{
	int q = find_hash(q1,-1) ;
	if (q < 0)
	{
		int i ;
		q = add_hash(q1,-1,d,!d1->states[q1].is_final) ;
		for(i = 0; i < 256; i++)
		{
			d->states[q].transitions[i] = copy_state(d1,d1->states[q1].transitions[i],d) ;
		}
	}
	return q ;
}

/*
 *	Makes a copy of a state, negating is_final flag.
 */
static
copy_state2(d1,q1,d)
	Dfa d1 ;
	Dfa d ;
{
	int q = find_hash(q1,-1) ;
	if (q < 0)
	{
		int i ;
		q = add_hash(q1,-1,d,d1->states[q1].is_final) ;
		for(i = 0; i < 256; i++)
		{
			d->states[q].transitions[i] = copy_state2(d1,d1->states[q1].transitions[i],d) ;
		}
	}
	return q ;
}

/*
 *	Merges two dfas beginning each beginning at a certain state
 *	into one.
 */
static
merge_states(d1,q1,d2,q2,d,bool_op)
	Dfa d1 ;
	Dfa d2 ;
	Dfa d ;
	unsigned long bool_op ;
{
	int q = find_hash(q1,q2) ;

	/* If it already exists then return already existing state */
	if (q < 0)
	{

		/* Otherwise create it and fill in transitions */
		/* If either state is final, new state is final */
		q = add_hash(q1,q2,apply_bool_op(d1->states[q1].is_final,d2->states[q2].is_final,bool_op)) ;

		/* Fill in transitions to other states */
		{
			int i ;
			for(i = 0; i < 256; i++)
			{
				d->states[q].transitions[i] =
					merge_states(
						d1,d1->states[q1].transitions[i],
						d2,d2->states[q2].transitions[i],
						d,bool_op) ;
			}
		}
	}
	return q ;
}

/*
 *	Concatentates dfa from a list of states.
 *	From a list of states, a new list of states is computed by
 *	applying the transition table to each one of them given any letter.
 *	If a final state of the first dfa is reached, then the start state
 *	of the second is automatically added to the beginning of the list.
 */
static
concat_states(d1,d2,sl,d)
	Dfa d1 ;
	Dfa d2 ;
	Int_List *sl ;
	Dfa d ;
{
	/* If already there then do no more */
	int q = find_hash_sl(sl) ;
	if (q < 0)
	{
		int i ;

		/* Create space for new state list */
		int *new = (int *)etmalloc((sl->n + 1) * sizeof(int)) ;

		/* Create new state */
		q = add_hash_sl(sl,d,0) ;

		/* Check to see if the final state of the second dfa is in it */
		/* If so, then new state is final */
		for(i = 0; i < sl->n; i++)
		{
			int tq = sl->lst[i] - d1->n_states ;
			if (tq >= 0 && d2->states[tq].is_final)
			{
				d->states[q].is_final = 1 ;
				break ;
			}
		}

		/* Now add in transitions */
		for(i = 0; i < 256; i++)
		{
			int j ;
			Int_List nsl ;
			int has_final = 0 ;
			for(j = 0; j < sl->n; j++)
			{
				int tq = sl->lst[j] ;
				if (tq >= d1->n_states)
				{
					new[j] = d2->states[tq - d1->n_states].transitions[i] + d1->n_states ;
				}
				else
				{
					new[j] = d1->states[tq].transitions[i] ;
					if (d1->states[new[j]].is_final)
					{
						has_final = 1 ;
					}
				}
			}
			nsl.n = j ;
			nsl.lst = new ;
			if (has_final)
			{
				nsl.lst[nsl.n++] = d2->start_state + d1->n_states ;
			}
			qsort_SL(&nsl) ;
			remove_same(&nsl) ;
			d->states[q].transitions[i] = concat_states(d1,d2,&nsl,d) ;
		}
		etfree((char *)new) ;
	}
	return q ;
}

/*
 *	Copies one state list into another
 */
static
copy_sl(sl1,sl)
	Int_List *sl1 ;
	Int_List *sl ;
{
	int i ;
	sl1->lst = (int *)etmalloc(sl->n * sizeof(int)) ;
	sl1->n = sl->n ;
	for(i = 0; i < sl->n; i++)
	{
		sl1->lst[i] = sl->lst[i] ;
	}
}

/*
 *	Computes the closure of a dfa from a state list.
 *	The transitions of the states are applied.  If any state
 *	is a final state, the start state of the dfa is automaticlly added
 *	to the list.
 */
static
closure_states(d,sl,new_d)
	Dfa d ;
	Int_List *sl ;
	Dfa new_d ;
{
	int q = find_hash_sl(sl) ;
	if (q < 0)
	{
		int i ;
		int *new = (int *)etmalloc((sl->n + 1) * sizeof(int)) ;
		q = add_hash_sl(sl,new_d,sl_has_final(d,sl)) ;
		for(i = 0; i < 256; i++)
		{
			int j ;
			Int_List nsl ;
			for(j = 0; j < sl->n; j++)
			{
				new[j] = d->states[sl->lst[j]].transitions[i] ;
			}
			nsl.n = j ;
			nsl.lst = new ;
			if (sl_has_final(d,&nsl))
			{
				nsl.lst[nsl.n++] = d->start_state ;
			}
			qsort_SL(&nsl) ;
			remove_same(&nsl) ;
			new_d->states[q].transitions[i] = closure_states(d,&nsl,new_d) ;
		}
		etfree((char *)new) ;
	}
	return q ;
}

/*
 *	Checks to see of a state list has a final state in it
 */
static
sl_has_final(d,sl)
	Dfa d ;
	Int_List *sl ;
{
	int i ;
	for(i = 0; i < sl->n; i++)
	{
		if (d->states[sl->lst[i]].is_final)
		{
			return 1 ;
		}
	}
	return 0 ;
}

/*
 *		Executes a dfa given a string.
 */
dfa_execute(d,s)
	Dfa d ;
	char *s ;
{
	int q = d->start_state ;
	while(*s)
	{
		q = d->states[q].transitions[*s++] ;
	}
	return d->states[q].is_final ;
}

/*
 *		Returns the concatenation of 2 dfas.
 */
Dfa
dfa_concat(label,d1,d2)
	Dfa d1 ;
	Dfa d2 ;
{
	/* Allocate dfa */
	Dfa d = make_dfa(label) ;
	Int_List sl ;
	sl.lst = (int *)etmalloc(2 * sizeof(int)) ;
	sl.lst[0] = d1->start_state ;
	sl.n = 1 ;
	if (d1->states[d1->start_state].is_final)
	{
		sl.lst[sl.n++] = d2->start_state + d1->n_states ;
	}

	/* Allocate states */
	d->start_state = concat_states(
		d1,d2,&sl,
		d) ;
	empty_hash() ;
	return d ;
}

/*
 *	Returns the dfa generated by a boolean set operator.
 */
Dfa
dfa_set_op(label,d1,d2,op)
	Dfa d1 ;
	Dfa d2 ;
	unsigned long op ;
{
	/* Allocate dfa */
	Dfa d = make_dfa(label) ;

	d->start_state = merge_states(
		d1,d1->start_state,
		d2,d2->start_state,
		d,op) ;
	empty_hash() ;
	return d ;
}

static
convert_zero(d)
	Dfa d ;
{
	if (!d->states[d->start_state].is_final)
	{
		int q = add_state_to_dfa(d,1) ;
		int i ;
		for(i = 0; i < 256; i++)
		{
			d->states[q].transitions[i] =
				d->states[d->start_state].transitions[i] ;
		}
		d->start_state = q ;
	}
}

/*
 *	Returns the p-closure of a dfa.
 */
Dfa
dfa_closure(label,d)
	Dfa d ;
{
	Dfa new_d = dfa_pclosure(label,d) ;
	convert_zero(new_d) ;
	return new_d ;
}

/*
 *	Returns the closure of a dfa.
 */
Dfa
dfa_pclosure(label,d)
	Dfa d ;
{
	/* Allocate dfa */
	Dfa new_d = make_dfa(label) ;
	Int_List sl ;
	sl.lst = (int *)etmalloc(sizeof(int)) ;
	sl.lst[0] = d->start_state ;
	sl.n = 1 ;

	new_d->start_state = closure_states(d,&sl,new_d) ;
	empty_hash() ;
	return new_d ;
}

Dfa
dfa_pattern_string(label,patterns,l)
	char **patterns ;
{
	/* Allocate dfa */
	Dfa d = make_dfa(label) ;
	int err = add_state_to_dfa(d,0) ;
	int final = add_state_to_dfa(d,1) ;
	int last_state = final ;

	/* Create single final state */
	{
		int i ;
		for(i = 0; i < 256; i++)
		{
			d->states[final].transitions[i] = err ;
		}
	}

	/* Create error state */
	{
		int i ;
		for(i = 0; i < 256; i++)
		{
			d->states[err].transitions[i] = err ;
		}
	}

	/* Create states for each letter */
	{
		int j ;
		for(j = l - 1; j >= 0; j--)
		{
			int i ;
			int q = add_state_to_dfa(d,0) ;
			for(i = 0; i < 256; i++)
			{
				d->states[q].transitions[i] = err ;
			}
			for(i = 0; patterns[j][i]; i++)
			{
				d->states[q].transitions[patterns[j][i]] = last_state ;
			}
			last_state = q ;
		}
	}

	d->start_state = last_state ;

	return d ;
}

/*
 *	Returns the dfa represented by a string.
 */
Dfa
dfa_nstring(label,s,l,case_sensitive)
	char *s ;
{
	int i ;
	char **patterns = (char **)etmalloc(l * sizeof(char *)) ;
	Dfa d ;
	for(i = 0; i < l; i++)
	{
		int c = s[i] ;
		if (case_sensitive)
		{
			if (c >= 'A' && c <= 'Z')
			{
				c += 'a' - 'A' ;
			}
			if (c >= 'a' && c <= 'z')
			{
				patterns[i] = etmalloc(3) ;
				patterns[i][0] = c ;
				patterns[i][1] = c - 'a' + 'A' ;
				patterns[i][2] = 0 ;
				continue ;
			}
		}
		patterns[i] = etmalloc(2) ;
		patterns[i][0] = c ;
		patterns[i][1] = 0 ;
	}
	d = dfa_pattern_string(label,patterns,l) ;
	for(i = 0; i < l; i++)
	{
		etfree(patterns[i]) ;
	}
	etfree((char *)patterns) ;
	return d ;
}

Dfa
dfa_string(label,s,case_sensitive)
	char *s ;
{
	return dfa_nstring(label,s,strlen(s),case_sensitive) ;
}

/*
 *	Returns the dfa represented by a string of any length.
 */
Dfa
dfa_wild_string(label)
{
	/* Allocate dfa */
	Dfa d = make_dfa(label) ;
	int q = add_state_to_dfa(d,1) ;

	{
		int i ;
		for(i = 0; i < 256; i++)
		{
			d->states[q].transitions[i] = q ;
		}
	}

	d->start_state = q ;
	return d ;
}

/*
 *	Returns the dfa represented by a string of fixed length.
 */
Dfa
dfa_wild_nchar(label,n)
{
	static char s[256] = {
		1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
		21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
		41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,50,
		61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,
		81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,
		101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,
		121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,130,
		141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,
		161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,
		181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,
		201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
		221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,230,
		241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,0} ;
	char **t = (char **)etmalloc(n * sizeof(char *)) ;
	int i ;
	Dfa d ;
	for(i = 0; i < n; i++)
	{
		t[i] = s ;
	}
	d = dfa_pattern_string(label,t,n) ;
	free((char *)t) ;
	return d ;
}

/*
 *	Returns the complement of a dfa.
 */
Dfa
dfa_compl(label,d1)
	Dfa d1 ;
{
	Dfa d = make_dfa(label) ;
	d->start_state = copy_state(d1,d1->start_state,d) ;
	empty_hash() ;
	return d ;
}

Dfa
dfa_zero(label,d1)
	Dfa d1 ;
{
	Dfa d = make_dfa(label) ;
	d->start_state = copy_state2(d1,d1->start_state,d) ;
	empty_hash() ;
	convert_zero(d) ;
	return d ;
}

write_dfa(fname,d)
	char *fname ;
	Dfa d ;
{
	int i ;
	FILE *fp = fopen(fname,"w") ;
	char name[256] ;
	if (fp == NULL)
	{
		return -1 ;
	}
	for(i = 0; fname[i] && fname[i] != '.'; i++)
	{
		name[i] = fname[i] ;
	}
	name[i] = 0 ;
	fprintf(fp,"#define START_STATE_%s %d\n",name,d->start_state) ;
	fprintf(fp,"static struct {\n\tint is_final ;\n\tint transitions[256] ;\n} %s_states[%d] = {\n",name,d->n_states) ;
	for(i = 0; i < d->n_states; i++)
	{
		int j ;
		fprintf(fp,"\t{%d,{",d->states[i].is_final) ;
		for(j = 0; j < 256; j++)
		{
			fprintf(fp,"%3d,",d->states[i].transitions[j]) ;
			if (j % 20 == 19)
			{
				fprintf(fp,"\n\t    ") ;
			}
		}
		fprintf(fp,"}},\n") ;
	}
	fprintf(fp,"} ;\n") ;
	fclose(fp) ;
	return 0 ;
}

/*
 *		Frees up all memory used by a dfa.
 */
dfa_destroy(d)
	Dfa d ;
{
	etfree((char *)d->states) ;
	etfree((char *)d) ;
}
