#!/afs/athena/contrib/perl5/p -w

use strict;
use FileHandle;

require 'tokens.list' || die "huh?";
@ZwgcDesc::tokens || die "no tokens";

# The following code deals with turning a file into a list of tokens.

my @tokens = ();

my %tok_map = (	'and' => '&',
		'or'  => '|',
		'not' => '!',
		map(("\L$_", "\U$_"), @ZwgcDesc::tokens)
	      );

my %esc_map = ( "\n" => '', 'n' => "\n", 't' => "\t", "b" => "\010" ); #\b=bs

# tok('string') pushes the TOKEN corresponding to 'string' onto the token list
sub tok {
  my ($raw) = @_;
  $raw = "\L$raw" unless ref($raw);
  push(@tokens, $tok_map{$raw} || $raw);
  '';
}

# escape('foo') deals with unmangling '\foo' inside a string.
sub escape {
  my ($sequence) = @_;
  return chr($sequence) if $sequence =~ /^\d+$/;
  $esc_map{$sequence} || $sequence;
}

# str('string') pushes the string corresponding to "string" onto the token list
sub str {
  my ($str) = shift || '';
  $str =~ s@\\(\d+|.)@escape($1)@eg;
  push(@tokens, '"'.$str.'"');
  '';
}

# tokenize($handle) reads in all data from FH $handle and returns tokens
sub tokenize {
  my ($fh) = @_;
  ($fh && defined($fh->fileno)) || return;

  local ($_);
  my ($start);

  # this 8K buffer stuff is for kids.  >=)
  print STDERR "reading data...\n";
  $_ = join('', <$fh>);
  close $fh;

  print STDERR "tokenizing...\n";
 TOKEN:
  while ($_) {
    s@^\n@tok(["\n"])@e 		&& next TOKEN;	# skip whitespace
    s@^\s+@@				&& next TOKEN;	# skip whitespace
    s@^(\#[^\n]*$)@tok([$1])@em		&& next TOKEN;	# skip '#' comments
    s@^(/\*.*\*/)@tok([$1])@e		&& next TOKEN;	# skip C-style comments
    s@^/\*@@			&& (warn "C-style comment never gets closed",
				    last TOKEN);

    s@^([!=][~=])@tok($1)@e		&& next TOKEN;	# (reg)?n?eq

    s@^([\Q+|&().,!=\E])@tok($1)@e	&& next TOKEN;	# self-delimited chars

    s@^([A-Za-z0-9_]+)@tok($1)@e	&& next TOKEN;	# identifiers
    s@^(\$[A-Za-z0-9_]+)@tok($1)@e	&& next TOKEN;	# variable lookups

    s@^"((?:[^"]|\\")*[^\\])?"@str($1)@e && next TOKEN;	# strings

    $start = substr($_, 0, 5);
    die "unexpected token ($start...)";
  }
  @tokens;
}

my @tok = tokenize(new FileHandle "/mit/bert/.zwgc.desc");

print STDERR "... done.\n";

print join('', map("$_\n", grep(!ref($_), @tok)));
exit 0;

my ($msg) = '';
my ($space) = '';
my ($t);

for $t (@tok) {
  if (ref($t)) {
    $msg .= join('', @$t);
    $space = '';
  } else {
    $msg .= $space . $t;
    $space = ' ';
  }
}
print $msg;
