Received: from MIT.MIT.EDU by po5.MIT.EDU (5.61/4.7) id AA00928; Thu, 20 Jan 94 10:06:23 EST
Received: from ro.doe.carleton.ca by MIT.EDU with SMTP
	id AA19541; Thu, 20 Jan 94 10:06:17 EST
Received: from emma.doe.carleton.ca by doe.carleton.ca (4.1/SMI-4.0)
	id AA11182; Thu, 20 Jan 94 10:05:39 EST
From: jac@doe.carleton.ca (James A. Cherry)
Received: by emma.doe.carleton.ca (4.1/Sun-Client)
	id AA25950; Thu, 20 Jan 94 10:05:38 EST
Message-Id: <9401201505.AA25950@emma.doe.carleton.ca>
Subject: Anagram program
To: mkgray@MIT.EDU
Date: Thu, 20 Jan 1994 10:05:38 -0500 (EST)
X-Mailer: ELM [version 2.4 PL22]
Mime-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Content-Length: 9917      

Matthew,

    To compile the C file, I suggest using

  gcc -O2 -o anagram anagram.c

Now, the format of a command is

  anagram letters [numbers] [suggested words]

where letters is a string of lower case letters only.

The optional [numbers] fields force the program to look for anagrams that
contain words of each length.  Up to ten word lengths may be specified.  This
is useful when you have a very large number of letters (e.g., more than 20)
that you wish to permute, and you wish for the program to skip over all the
anagrams containing only short (three and four letter) words.  For example,
when you are looking for anagrams of 30 letters, you might include the lengths
"8 8 8" to force three of the words to be eight letters long each.

The optional [suggested words] fields are taken one at a time, and they consist
of lower case letters (a-z) only too; each set of these letters is removed from
the original letters, and anagrams of the remaining letters are found.  This
can be useful when you wish to find anagrams involving particular words or a
proper name.

Both optional fields can be specified at the same time.

If no command line arguments are specified, you are prompted for the arguments;
after the program has found all anagrams, it returns you to the prompt.

Examples:
  anagram jamescherry
    (find all anagrams of "jamescherry")
  anagram jamescherry 5
    (forces at least one of the words to be 5 letters long)
  anagram jamescherry 6 5
    (forces one word to be 6 letters long and the other to be 5 long)
  anagram jamescherry charm
    (find all anagrams where "charm" is one of the words)
  anagram
    (enters interactive mode; then typing "jamescherry" at the prompt is the
     same as example 1, above.  Likewise "jamescherry 5" at the prompt is the
     same as example 2.)

NOTE: You must supply an English dictionary; I do NOT recommend /usr/dict/words
as it is very small!  The key problem with /usr/dict/words is that it does not
have many plural forms and verb forms.  For example, my local copy of
/usr/dict/words has "abate" and "abater", but not "abates", "abated", or
"abating".  As a result, many potential anagrams will not be found if you use
/usr/dict/words.  I highly recommend using the dictionary that is supplied in
the tar file.  It's big, but it's good.

Regards,

James.

/*
 *
 * anagram.c - finds anagrams of a given set of letters
 *
 * by James Cherry, December 2, 1992
 *
 */

#include <stdio.h>
#include <string.h>
#include <malloc.h>

typedef struct dictent {
  struct dictent *alph;
  struct dictent *len;
  struct dictent *gl;
  char *word;
  int le;
} dent;

#define DE_SIZE		( sizeof( dent ) )

dent *sptr[26][40], *cptr[26][40], *aptr[26], *gptr[40];
char my_argv[10][50];
int tl, nlets[26], rl, reql[10], trl, stack[50];

void init_prog() { 
	int i, j;

	fprintf( stderr, "Initializing...\n" );
	for( i = 0; i < 26; i++ ) {
		aptr[i] = NULL;
		for( j = 1; j < 40; j++ ) {
			sptr[i][j] = NULL;
			cptr[i][j] = NULL;
		}
	}
}

void abort_prog( msg )
  char *msg;
{
	fprintf( stderr, msg );
	exit( 1 );
}

void read_dict() {
	FILE *fp;
	char word[40], *tword;
	dent *tdent, *wptr;
	int i, le, ai, wi;

	fp = fopen( "words", "r" );
	if( fp == NULL ) abort_prog( "Could not find dictionary file\n" );
	fprintf( stderr, "Reading dictionary...\n" );

	wptr = NULL;
	ai = 0;
	while( fscanf( fp, "%s", word ) != EOF ) {
		le = strlen( word );
		for( i = 0; i < le; i++ ) {
			if( word[i] >= 'A' && word[i] <= 'Z' ) word[i] += ( 'a' - 'A' );
			if( word[i] < 'a' || word[i] > 'z' ) break;
		}
		if( i != le ) continue;
		tdent = (dent *) malloc( DE_SIZE );
		tword = (char *) malloc( le + 1 );
		if( tdent == NULL || tword == NULL )
			abort_prog( "malloc() failed\n" );

		wi = word[0] - 'a';
		strcpy( tword, word );
		tdent->word = tword;
		tdent->le = le;
		tdent->alph = NULL;
		tdent->len = NULL;
		if( wptr == NULL ) aptr[ai++] = tdent;
		else {
			if( wptr->word[0] != word[0] ) aptr[ai++] = tdent;
			wptr->alph = tdent;
		}
		if( cptr[wi][le] == NULL ) {
			sptr[wi][le] = tdent;
			cptr[wi][le] = tdent;
		} else {
			cptr[wi][le]->len = tdent;
			cptr[wi][le] = tdent;
		}
		wptr = tdent;
	}
}

void check_dict() {
	int i;
	dent *pt;
	
	for( i = 0; i < 26; i++ ) printf( "%s ", aptr[i]->word );
	printf( "\n" );
	for( i = 1; i < 39; i++ )
		if( sptr[0][i] != NULL ) printf( "%s ", sptr[0][i]->word );
	printf( "\n" );
	pt = sptr[3][12];
	while( pt != NULL ) {
		printf( "%s ", pt->word );
		pt = pt->len;
	}
	printf( "\n" );
}

int get_args() {
	char inp;
	int i, j;

	printf( "Type in a list of letters, optionally followed by word lengths,\n" );
	printf( "optionally followed by word suggestions.  A blank line exits.\n" );
	printf( "-> " );

	i = 0;
	j = 0;
	do {
		scanf( "%c", &inp );
		if( inp > ' ' ) {
			if( j == 0 ) i++;
			if( inp >= 'A' && inp <= 'Z' ) inp += 'a' - 'A';
			my_argv[i][j++] = inp;
		} else {
			if( i > 0 ) my_argv[i][j] = '\0';
			j = 0;
		}
	} while( inp != '\n' );
	return( i - 1 );
}

int parse_args( nargs )
  int nargs;
{
	int i, j, c, t;

	for( i = 0; i < 26; i++ ) nlets[i] = 0;
	tl = strlen( my_argv[1] );
	for( i = 0; i < tl; i++ ) {
		c = my_argv[1][i];
		if( c < 'a' || c > 'z' ) break;
		nlets[c - 'a']++;
	}
	if( i != tl ) abort_prog( "Non-alphabetic characters in input set\n" );
	i = 2;
	rl = 0;
	while( i < nargs ) {
		if( my_argv[i][0] < '1' || my_argv[i][0] > '9' ) break;
		reql[rl++] = atoi( my_argv[i++] );
	}
	for( j = rl - 1; j > 0; j-- )
		for( c = 0; c < j; c++ )
			if( reql[c] < reql[c + 1] ) {
				t = reql[c];
				reql[c] = reql[c + 1];
				reql[c + 1] = t;
			}
	for( j = 0, trl = 0; j < rl; j++ ) trl += reql[j];
/*	for( i = 0; i < rl; i++ ) printf( "%2d ", reql[i] );
	printf( "\n" ); */
	return( i );
}

void find_valid_words() {
	int i, j, k, c, tn[26];
	dent *dptr, *tp[40];
	long checked, found, perc;

	checked = 0;
	found = 0;
	for( i = 0; i < 40; i++ ) gptr[i] = NULL;
	for( i = 0; i < 26; i++ ) {
		for( j = 1; j < 40; j++ ) {
			dptr = sptr[i][j];
			while( dptr != NULL ) {
				for( k = 0; k < 26; k++ ) tn[k] = nlets[k];
				for( k = 0; k < j; k++ ) {
					c = dptr->word[k] - 'a';
					if( --tn[c] < 0 ) break;
				}
				if( k == j ) {
					if( gptr[j] == NULL ) {
						gptr[j] = dptr;
						tp[j] = dptr;
					} else {
						tp[j]->gl = dptr;
						tp[j] = dptr;
					}
					dptr->gl = NULL;
					found++;
				}
				dptr = dptr->len;
				checked++;
			}
		}
	}
	perc = 1000 - ( ( found * 10000 + 5 ) / 10 ) / checked;
	fprintf( stderr, "eliminated %2d.%d%% of words\n", perc / 10, perc % 10 );
/*	for( i = 1; i < 40; i++ ) {
		if( gptr[i] != NULL ) printf( "%s ", gptr[i]->word );
	}
	printf( "\n" ); */
}

void do_search( es, sw )
  char *es, *sw;
{
	int i, j, slev, tn[26], cl, co, cp, rs, rs2;
	long na;
	dent *sp[50];

	na = 0;
	co = 0;
	fprintf( stderr, "%sGoing through dictionary...", es );
	find_valid_words();
	fprintf( stderr, "%sStarting search...\n", es );
	for( i = 0; i < 50; i++ ) {
		stack[i] = 0;
		sp[i] = NULL;
	}
	cl = tl;
	slev = 0;
	stack[0] = 1;
	sp[0] = gptr[1];
	for( ;; ) {
		for( i = 0, j = 0, rs = tl, rs2 = trl; i < rl && j <= slev; ) {
			if( stack[j] >= reql[i] ) {
				rs -= stack[j];
				if( stack[j] == reql[i] ) {
					rs2 -= stack[j];
					i++;
				}
				if( rs < rs2 ) {
					j = -1;
					break;
				}
				j++;
			} else {
				j = -1;
				break;
			}
		}
		if( j == -1 ) sp[slev] = NULL;
		while( sp[slev] != NULL ) {
			for( i = 0; i < 26; i++ ) tn[i] = nlets[i];
			for( i = 0; i < stack[slev]; i++ ) {
				if( --tn[( (int) sp[slev]->word[i] ) - 'a'] < 0 ) break;
			}
			if( i == stack[slev] ) break;
			else sp[slev] = sp[slev]->gl;
		}
		if( sp[slev] == NULL ) {
			if( slev != 0 ) {
				if( stack[slev] < stack[slev - 1] && cl - stack[slev] > 0 ) {
					stack[slev]++;
					if( stack[slev] != stack[slev - 1] )
						sp[slev] = gptr[stack[slev]];
					else sp[slev] = sp[slev - 1];
				} else {
					stack[slev] = 0;
					sp[slev] = NULL;
					slev--;
					for( i = 0; i < stack[slev]; i++ )
						nlets[( (int) sp[slev]->word[i] ) - 'a']++;
					sp[slev] = sp[slev]->gl;
					cl += stack[slev];
				}
			} else {
				if( stack[0] < tl ) {
					stack[0]++;
					sp[0] = gptr[stack[0]];
				} else break;
			}
		} else {
			if( cl > stack[slev] ) {
				for( i = 0; i < 26; i++ ) nlets[i] = tn[i];
				cl -= stack[slev];
				slev++;
				stack[slev] = 1;
				sp[slev] = gptr[1];
			} else {
				cp = tl + slev + 3;
				if( sw != NULL ) cp += 1 + strlen( sw );
				if( co + cp > 79 ) {
					co = cp;
					printf( "\n" );
				} else co += cp;
				printf( "(" );
				if( sw != NULL ) printf( "%s ", sw );
				for( i = 0; i <= slev; i++ ) {
					printf( "%s", sp[i]->word );
					if( i != slev ) printf( " " );
					else printf( ") " );
				}
				na++;
				sp[slev] = sp[slev]->gl;
			}
		}
	}
	printf( "\n*** Total number of anagrams found: %d\n", na );
}

void search( suggp, sugg )
  int suggp, sugg;
{
	int i, j, c, tn[26];

	if( suggp == sugg ) {
		do_search( "", NULL );
		return;
	}
	for( i = suggp; i < sugg; i++ ) {
		printf( "Target letters: %s\n", my_argv[i] );
		for( j = 0; j < 26; j++ ) tn[j] = nlets[j];
		for( j = 0; j < strlen( my_argv[i] ); j++ ) {
			c = my_argv[i][j];
			if( c < 'a' || c > 'z' ) break;
			if( --tn[c - 'a'] < 0 ) break;
		}
		if( j == strlen( my_argv[i] ) ) {
			for( j = 0; j < 26; j++ ) nlets[j] = tn[j];
			tl -= strlen( my_argv[i] );
			do_search( "*** ", my_argv[i] );
			for( j = 0; j < strlen( my_argv[i] ); j++ )
				nlets[( (int) my_argv[i][j] ) - 'a']++;
			tl += strlen( my_argv[i] );
		}
	}
}

void main( argc, argv )
  int argc;
  char **argv;
{
	int nargs;

	init_prog();
	read_dict();
	if( argc == 1 ) {
		for( ;; ) {
			nargs = get_args();
			if( nargs == -1 ) exit( 0 );
			search( parse_args( nargs + 2 ), nargs + 2 );
		}
	} else {
		for( nargs = 1; nargs < argc; nargs++ )
			strcpy( my_argv[nargs], argv[nargs] );
		search( parse_args( argc ), argc );
		exit( 0 );
	}
}
