#!/afs/athena/contrib/perl5/p -w
# $Id: zwgc2zion.pl,v 1.7 1996/09/09 00:09:18 bert Exp $

use strict;
use FileHandle;
use lib '/afs/sipb/user/bert/dev/zion/dev';
use Zwgc::Desc      1.00;
use zwgc2zion_rules 0.20;
use Carp;

use vars qw( @saved_comments $debug );

### This section takes the token list and rewrites it in Perl.  (We hope.)

package main;

use vars qw( $indent );
$indent = '';

sub translate {
  my (@tokens) = @_;
  print STDERR "translating to Perl...\n";

  my $desc = { 'tokens' => \@tokens, 'state' => [] };

  while (@tokens) {
    print command($desc), pop_comments($desc);
  }
}

sub comments {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my ($d, @data);

  while (ref($tr->[0])) {
    COMMENT_FRAG:
    foreach $d (@{shift(@$tr)}) {
      ($d eq "\n") && ( push(@data, $d), next COMMENT_FRAG );
      ($d =~ /^\#/) && ( push(@data, $d), next COMMENT_FRAG );
      # $d contains a C-style comment
      $d =~ s@^\s*/\*\s*@@gm;
      $d =~ s@^@\# @gm;
      $d =~ s@\s*\*/\s*$@@gm;
      $d =~ s@([^\n])$@$1\n@gs;
      push(@data, $d);
    }
  } 
  @data;
}

sub push_comments {
  push @saved_comments, comments(@_);
}
sub pop_comments {
  push_comments(@_) if @_;
  my @data = @saved_comments;
  @saved_comments = ();
  @data;
}

sub assert {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my ($assertion, $value, @result);

  croak "internal error: confirmed *what*??? (@$tr)" if $gt->{'confirmed'};

 ASSERTION:
  for $assertion (@_) {
    @$tr || croak("unexpected end of input while looking for \U$assertion\E".
		  "\n  aborting");

    push_comments($gt);

    $value = shift(@$tr);
    ("\L$value" eq $assertion)
      || croak("ran into \U$value\E while looking for \U$assertion\E".
	       "\n  aborting");
  }
  @result;  # these are all the comments we picked up
}

sub flatten {
  map( (ref($_) ? flatten(@$_) : $_), @_ );
}

sub expect_magic {
  my ($gt, $magic, $token) = @_;
  my $tr = $gt->{'tokens'};

  my $states = $gt->{'state'};

  my (@comments, $dummy, @magic);

  @magic = split(/\s*([\?<>@])\s*/, $magic);
  ($dummy = shift(@magic))
    && croak("unexpected symbol at the beginning of magic: $dummy".
	     "\n  aborting");

  while (@magic) {
    my $op  = shift(@magic);
    my $arg = shift(@magic) || croak("unexpected end of expect magic at $op".
				     "\n  aborting");
    if ($op eq '>') {		# push state onto state stack
      unshift(@$states, $arg);
    } elsif ($op eq '<') {	# pop state off the state stack
      # first, make sure whe have the right state.
      @$states
	|| croak("trying to end the \U$arg\E while at top level".
		 "\n  aborting");
      ($states->[0] eq $arg)
	|| croak("trying to end the \U$arg\E while inside \U$states->[0]\E".
		 "\n  aborting");

      shift(@$states);
    } elsif ($op eq '?') {	# assert the current state
      @$states
	|| croak("\U$token\E found while at top level, not inside \U$arg\E".
		 "\n  aborting");
      ($states->[0] eq $arg)
	|| croak("\U$token\E found inside \U$states->[0]\E, not \U$arg\E".
		 "\n  aborting");
    } elsif ($op eq '@') {	# assert the next token
      my (@allowed) = split(m@/@, $arg);
      my (%allowed) = map( ($_,1), @allowed );

      @allowed
	|| croak("no arguments supplied to \@ in expect magic".
		 "\n  aborting");

      # check whether this is one of the allowed tokens...
      @$tr || croak("unexpected end of input while looking for \U$arg\E".
		    "\n  aborting");
      push_comments($gt);

      $allowed{"\L$tr->[0]"}
	||croak("ran into \U$tr->[0]\E while looking for one of: \U@allowed\E".
		"\n  aborting");

      # mark the next token as already marked OK
      $gt->{'confirmed'} = 1;
    } else {
      croak("unknown magic symbol: $op\n  aborting");
    }
  }
  @comments;
}


sub build_from_template {
  local ($_);  my (@frags);
  ($_, @frags) = @_;

  (ref($_) eq 'CODE')	&& return &$_(@frags);
  ref($_)		&& croak("internal error: reference inside template".
				 "\n  aborting");
  /^\$(\d+)$/		&& return @{$frags[$1]};
  /^\$\$(\d+)$/		&& return map( "\$$_", @{$frags[$1]} );
  return $_;
}

sub expect {
  my ($gt, $ident) = @_;
  my $tr = $gt->{'tokens'};
  my (@frags, $plan, @extras);
  @frags = ($gt->{'state'});

  $zwgc2zion_rules::expect{$ident}
    || croak "\U$ident\E is unsupported by the translator\n  aborting";
  ref($zwgc2zion_rules::expect{$ident})
    || croak "$zwgc2zion_rules::expect{$ident}\n  aborting";

 PLAN:
  foreach $plan (@{$zwgc2zion_rules::expect{$ident}}) {
    ($plan eq '{token}')  && ( push(@frags,[token($gt)]),	next PLAN );
    ($plan eq '{tokens}') && ( push(@frags,[tokenS($gt)]),	next PLAN );
    ($plan eq '{expr}')   && ( push(@frags,[expression($gt)]),	next PLAN );
    ($plan eq '{expr s}') && ( push(@frags,[exprS_space($gt)]),	next PLAN );
    ($plan eq '{expr,s}') && ( push(@frags,[exprS_comma($gt)]),	next PLAN );
    ($plan eq '{cmd}')    && ( push(@frags,[command($gt)]),	next PLAN );
    ($plan eq '{cmds}')   && ( push(@frags,[commandS($gt)]),	next PLAN );

    # Any token in the expect list starting with "::" is considered special
    ($plan =~ /^::\s+/)   && ( expect_magic($gt, $', $ident), #'
			       next PLAN );
    assert($gt, $plan);
  }

  @frags;
}

sub assemble {
  my ($ident, @frags) = @_;
  map( build_from_template($_, @frags), @{$zwgc2zion_rules::perlify{$ident}} );
}

sub command {
  my $gt = shift;
  my $tr = $gt->{'tokens'};

  print STDERR "CMD: $tr->[0] $tr->[1] $tr->[2] $tr->[3]...\n" if $debug;

  my @result = pop_comments($gt);
  my $cmd = shift @$tr;

  exists $zwgc2zion_rules::command{$cmd}
    || croak "\U$cmd\E is not a command\n  aborting";
  ($gt->{'confirmed'} || exists $zwgc2zion_rules::primary{$cmd})
    || croak("\U$cmd\E is a part of a command, but was found alone".
	     "\n  aborting");
  delete $gt->{'confirmed'};

  push(@result, assemble($cmd, expect($gt, $cmd)));
  push(@result, command($gt)) while ($gt->{'confirmed'});

  @result;
}

sub commandS {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my $at_least_one = shift;
  my (@result);

  push(@result, command($gt)) if ($at_least_one);
  push(@result, pop_comments($gt));

  while (exists $zwgc2zion_rules::primary{$tr->[0]}) {
    push(@result, command($gt), pop_comments($gt));
  }
  @result;
}

sub regexpify {
  my (@strexp) = @_;
  if (($#strexp == 0) && ($strexp[0] =~ /^[""]/)) {
    # if it's a string, we use " as pattern delimiter and output m"pattern"
    # (because the string may contain unquoted /'s, but not "'s)
    ( ' m' . $strexp[0] );
  } elsif (($#strexp == 0) && ($strexp[0] =~ /^\$/)) {
    # if it's a variable, we interpolate it into a regexp
    ( '/' . $strexp[0] . '/' );
  } else {
    # if it's neither, we commit gruesome, unnatural acts
    ( '/@{[', @strexp, ']}/' );
  }
}

sub expression {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my ($done);
  my @expr = pop_comments($gt);

  croak "internal error: confirmed *what*??? (@$tr)" if $gt->{'confirmed'};

  while (! $done) {
    push_comments($gt);

    my $token = shift(@$tr)
      || croak "end of input while looking for an expression\n  aborting";

    if ( $token eq '(' ) {
      # NB: assert() pushes comments that occur before assertion
      push( @expr,  '( ', expression($gt),  ' )' );
      assert( $gt, ')' );

    } elsif ( $token eq '!' ) {
      push( @expr,  ' !', expression($gt) );

    } elsif (exists $zwgc2zion_rules::function{"\L$token"}) {
      $token = "\L$token";
      push( @expr, assemble($token, expect($gt, $token)));

    } elsif ( $token =~ /^\$(\d+)$/ ) {
      push( @expr, '$fields[' . ($1-1) . ']');

    } elsif ( $token =~ /^\$/ ) {
      push( @expr,  $token );

    } elsif ( $token =~ /^\"/ ) {	# "
      # NB: only variables starting with $ or @ are interpolated into strings
      $token =~ s/\@/\\\@/g;	# quote interpolated variable names
      $token =~ s/\$/\\\$/g;	# quote interpolated variable names
      push( @expr,  $token );

    } else {
      croak "I don't know what to do with token \U$token\E\n  aborting";
    }

    push_comments($gt);

    if (@$tr && $zwgc2zion_rules::regop{$tr->[0]}) {
      # for =~ and !~, we need to turn a string into a regexp
      push( @expr, $zwgc2zion_rules::operator{shift(@$tr)},
	    regexpify( expression($gt) ));
      $done = 1;
    } elsif (@$tr && $zwgc2zion_rules::operator{$tr->[0]}) {
      push( @expr, $zwgc2zion_rules::operator{shift(@$tr)} );
    } else {
      $done = 1;
    }
  }

  @expr;
}

sub exprS_comma {
  my $gt = shift;
  my $tr = $gt->{'tokens'};

  my (@result) = expression($gt);
  push_comments($gt);

  while ($tr->[0] eq ',') {
    shift(@$tr);
    push(@result, ',', pop_comments($gt), expression($gt));
    push_comments($gt);
  }
  @result;
}

sub exprS_space {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my $at_least_one = shift;
  my (@separator, @result);
  push_comments($gt);

  while (@$tr &&
	 ! exists $zwgc2zion_rules::command{$tr->[0]}) {
    push(@result, @separator, pop_comments($gt), expression($gt));
    push_comments($gt);
    @separator = ( ',' );
  }

  ($at_least_one && !@result)
    && croak("no expressions (at least one expected) in $at_least_one".
	     "\n  aborting");
  @result;
}

sub token {
  my $gt = shift;
  my $ident = shift;

  @{$gt->{'tokens'}}
    || croak("no tokens (one expected) in $ident".
	     "\n  aborting");
  push_comments($gt);
  my($result) = shift(@{$gt->{'tokens'}});
  exists $zwgc2zion_rules::command{$result}
    && croak("token expected, \U$result\L found".
	     "\n  aborting");

  $result;
}

sub tokenS {
  my $gt = shift;
  my $tr = $gt->{'tokens'};
  my $at_least_one = shift;
  my (@separator, @result);

  push_comments($gt);

  while (@$tr &&
	 ! exists $zwgc2zion_rules::command{$tr->[0]}) {
    push(@result, @separator, pop_comments($gt), shift(@$tr));
    push_comments($gt);
    @separator = ( ',' );
  }

  ($at_least_one && !@result)
    && croak("no tokens (at least one expected) in $at_least_one".
	     "\n  aborting");

  @result;
}

### Version information

package main;

use vars qw( $VERSION $DATE );
$VERSION = '0.967';
$DATE = '<08sep1996>';
# $Revision: 1.7 $

### What follows is the main body of the program.

$zwgc2zion_rules::VERSION   ||= 'unspecified';
$zwgc2zion_rules::DATE      ||= '';
$Zwgc::Desc::VERSION ||= 'unspecified';
$Zwgc::Desc::DATE    ||= '';
print STDERR <<"EndOfBanner";

zwgc2\zion translator,  version $VERSION $DATE
 Zwgc\::Desc tokenizer, version $Zwgc::Desc::VERSION $Zwgc::Desc::DATE
 tran\slation rules,    version $zwgc2zion_rules::VERSION $zwgc2zion_rules::DATE

please send bug reports and comments to <bert\@mit.edu>

EndOfBanner

my $file = $ARGV[0] || "$ENV{'HOME'}/.zwgc.desc";
print STDERR "reading $file...\n";
my $output = $ARGV[1];
#$output ||= "$ENV{'HOME'}/.zion.desc.pl"
#  unless ( -r "$ENV{'HOME'}/.zion.desc.pl" );
if ($output && ($output ne '-')) {
  print STDERR "writing $output...\n";
  open(STDOUT, "> $output");
} else {
  print STDERR "writing stdout...\n";
}

print <<"EndOfVersion", $zwgc2zion_rules::header;
## NOTE: this file has been machine-generated.  It's ugly.  So?  =)
##
## zwgc2\zion translator,  version $VERSION $DATE
##  Zwgc\::Desc tokenizer, version $Zwgc::Desc::VERSION $Zwgc::Desc::DATE
##  tran\slation rules,    version $zwgc2zion_rules::VERSION $zwgc2zion_rules::DATE

EndOfVersion

translate( Zwgc::Desc::tokenize(new FileHandle $file) );
print $zwgc2zion_rules::footer;

print STDERR "... done.\n";
