# $Id: Desc.pm,v 1.1 1996/08/30 01:31:42 bert Exp $
### The following code deals with turning a .desc file into a list of tokens.

package Zwgc::Desc;

use vars qw( @ISA @EXPORT_OK $VERSION $DATE );

use Exporter;
@ISA = qw( Exporter );
@EXPORT_OK = qw( tokenize );

$VERSION = '1.00';   $DATE = '<22aug1996>';

use FileHandle;

use vars qw( @tokens %tok_map %esc_map );

%tok_map = (	'and' => '&',
		'or'  => '|',
		'not' => '!',
	      );
%esc_map = ( "\n" => '', "b" => '\010' ); #\b=bs

# tok('string') pushes the TOKEN corresponding to 'string' onto the token list
sub tok {
  my ($raw) = @_;
  $raw = "\L$raw" unless ref($raw);
  push(@tokens, $tok_map{$raw} || $raw);
  '';
}

# escape('foo') deals with unmangling '\foo' inside a string.
sub escape {
  my ($sequence) = @_;
  $esc_map{$sequence} || "\\$sequence";
}

# str('string') pushes the string corresponding to "string" onto the token list
sub str {
  my ($str) = shift || '';
  $str =~ s@\\(\d+|.)@escape($1)@eg;
  push(@tokens, '"'.$str.'"');
  '';
}

# show('string') is generated by 'show string endshow', because show is special
sub show {
  tok('show');
  my ($str) = shift || '';
  $str =~ s@\\(\d+|.)@escape($1)@eg;
  push(@tokens, $str);
}

# tokenize($handle) reads in all data from FH $handle and returns tokens
sub tokenize {
  my ($fh) = @_;
  ($fh && defined($fh->fileno)) || return;

  local ($_, @tokens);
  my ($start);

  # this 8K buffer stuff is for kids.  >=)
  print STDERR "reading data...\n";
  $_ = join('', <$fh>);
  close $fh;

  print STDERR "tokenizing...\n";
 TOKEN:
  while ($_) {
    s@^\n@tok(["\n"])@e 		&& next TOKEN;	# skip whitespace
    s@^\s+@@				&& next TOKEN;	# skip whitespace
    s@^(#[^\n]*\n)@tok([$1])@e		&& next TOKEN;	# skip '#' comments
    s@^(#[^\n]*$)@tok([$1])@e		&& next TOKEN;	# skip '#' comments

    s@^/\*@@ &&
      ( m@\*/@ || die "unterminated C-style comment" ) &&
	( tok(['/*'.$`.'*/']), $_ = $',	next TOKEN);	# skip C-stl comments'

    s@^([!=][~=])@tok($1)@e		&& next TOKEN;	# (reg)?n?eq

    s@^([\Q+|&().,!=\E])@tok($1)@e	&& next TOKEN;	# self-delimited chars

    s@^show\s+@@ &&
      ( m@\s+endshow\b@ || die "unterminated show" ) &&
	( show($`), $_ = $', 		next TOKEN);	# "show" is special'
						

    s@^([A-Za-z0-9_]+)@tok($1)@e	&& next TOKEN;	# identifiers
    s@^(\$[A-Za-z0-9_]+)@tok($1)@e	&& next TOKEN;	# variable lookups

    s@^"((?:[^"]|\\")*[^\\])?"@str($1)@e && next TOKEN;	# strings

    $start = substr($_, 0, 10);
    $start =~ s/\n/\\n/;
    die "unexpected token ($start...)";
  }
  @tokens;
}

1;
