#!/afs/athena/contrib/perl5/perl
# a movemail replacement which runs SpamAssassin on each message
# convert mail in movemail format back to UNIX mail-spool form.
use warnings;
use strict;

my      $SAINST="/mit/bert/project/spam";
my  $spamd_port=10000 + $<;

my       $spamf="$SAINST/bin/spamc";
my $spamf_flags="-f -d 127.0.0.1 -p $spamd_port -s 100000";

my       $spamd="$SAINST/bin/spamd";
my $spamd_flags="--syslog=null -A 127.0.0.1 -p $spamd_port";

my  $check_read="$ENV{HOME}/.spamassassin/user_prefs";

my    $movemail="/usr/athena/bin/movemail";

my    $DEBUG=1;
sub DEBUG {
  warn join("", @_, "\n") if $DEBUG;
}

### input

sub read_line ($) {
  my ($fh) = @_;
  local ($/) = "\n";
  scalar <$fh>;
}

sub read_body ($) {
  my ($fh) = @_;
  local ($/) = "\037\cL";
  my $body = scalar <$fh>;
  $body =~ s/\037(?:\cL|\n)?\z//;
  $body;
}

sub read_end ($) {
  my ($fh) = @_;
  while (my $line = read_line $fh) {
    $line =~ /\S/ and die "Extraneous data at the end of file!\n";
  }
}

sub read_msg ($) {
  my ($fh) = @_;
  my $babyl = read_line $fh;
  defined $babyl or die "Message doesn't contain a BABYL header!\n";
  $babyl =~ /^[01],/ or die "BABYL message header is invalid!\n";
  read_body $fh;
}

sub read_next ($) {
  my ($fh) = @_;
  my $start = read_line $fh;
  defined $start or return;
  $start =~ /^$/ or read_end($fh), return;
  1;
}

sub read_first ($) {
  my ($fh) = @_;
  my $start = read_line $fh;
  defined $start or return;
  $start =~ /^\cL/ and return 1;  # if no BABYL header, done!
  /^BABYL OPTIONS:/ or die "Unable to interpret file format.\n";
  read_body $fh;
  read_next $fh;
}

sub read_write_first ($) {
  my ($fh) = @_;
  my $start = read_line $fh;
  defined $start or return;
  $start =~ /^\cL/ and return 1;  # if no BABYL header, done!
  $start =~ /^BABYL OPTIONS:/ or die "Unable to interpret file format.\n";
  print $start;
  print read_body($fh);
  read_next $fh;
}

### checking for previous SpamAssasination

sub copy_headers ($) {
  my ($msg) = @_;
  my $pos = index $msg, "\n\n";
  $pos < 0 and return $msg;
  substr $msg, 0, $pos+1;
}

sub already_filtered ($) {
  my ($hdr) = @_;
  $hdr =~ /^X-Spam-Status:/mi;
}

### helper: a baby-sitter class for spamd

{
  package Spamd;

  use warnings;
  use strict;

  sub _start_spamd () {
    my $pid = fork;
    defined $pid or die "fork() for spamd: $!";
    if (! $pid) {
      # child: run spamd
      exec "$spamd $spamd_flags";
      die "exec() for spamd: $!";
    }
    # parent: baby-sit
    $pid;
  }

  sub new ($) {
    my ($class) = @_;
    my $pid = _start_spamd();
    bless [$pid, time], $class;
  }

  sub wait ($;$) {
    my ($self, $delay) = @_;
    my ($pid, $start) = @$self;
    $delay = 5 unless defined $delay;
    $delay -= (time - $start);

    # wait for spamd to come fully up (don't know how to check!)
    sleep $delay if $delay > 0;
    # might as well make sure it didn't bail
    kill 0, $pid or die "spamd exited prematurely!\n";
  }


  sub kill ($) {
    my ($self) = @_;
    defined $self->[0]     or return;
    kill 'HUP', $self->[0] or return;
    $self->[0] = undef;
    1;
  }

  sub DESTROY ($) {
    my ($self) = @_;
    $self->kill;
  }
}    

### mail washing

sub wash ($@) {
  local ($|) = 1;
  print "\cL\n0, unseen,,\n";

  open my $SPAM, '|-', "$spamf $spamf_flags"
    or die "Can't start SpamAssassin filter\n";
  print $SPAM @_;
  close $SPAM;

  print "\037";
}

### helper: locally redirect a file handle

{
  package HandleSaver;
  use warnings;
  use strict;

  sub new ($$;$) {
    my ($class, $handle, $local) = @_;

    local (*HANDLE) = $handle;
    open my $copy, '>& HANDLE'
      or die "HandleSaver::new: dup(HANDLE): $!";
    my $self = bless [$handle, $copy], $class;

    # if $local is missing, just leave the old value
    defined $local or return $self;

    if (ref $local) {
      local (*LOCAL);  # split up, or Perl complains about unique use
      *LOCAL = $local;
      open $handle, '>& LOCAL'
	or die "HandleSaver::new: dup(LOCAL): $!";
    } else {
      open $handle, '>', $local
	or die "HandleSaver::new: open($local): $!";
    }

    $self;
  }

  sub DESTROY ($) {
    my ($self) = @_;
    local (*HANDLE, *COPY);  # split up, or Perl complains about unique use
    (*HANDLE, *COPY) = @$self;
    open HANDLE, '>& COPY'
      or die "HandleSaver::DESTROY: dup(): $!";
  }
}

### spool processing

sub movemail (@) {
  my (@args) = @_;
  my $ret = system "$movemail @args";
  return unless $ret;
  my $status = ($ret >> 8);
  warn "movemail exited with status $status!\n";
  exit $status;
}

sub process ($$) {
  my ($in, $out) = @_;

  open my $SPOOL, '<', $in
    or die "process: open(spool): $!\n";

  my $save = HandleSaver->new(\*STDOUT, $out);

  read_write_first($SPOOL) or return;
  my $first = 1;

  while (1) {
    my $msg = read_msg $SPOOL
      or last;
    #$first and already_filtered(copy_headers $msg)
    #  and die "The spool has already been filtered with SpamAssasin!\n";
    $first = 0;
    wash $msg;
    read_next $SPOOL
      or last;
  }
}

### run it all

#my $err = HandleSaver->new(\*STDERR, '/tmp/washmail.debug.bert');
#warn "[logging stderr for '$0 @ARGV']\n";

-r $check_read or die "Can't read $check_read, quitting.\n";

@ARGV == 2 or die "usage: washmail [po:]source dest\n";
my ($pobox, $destfile) = @ARGV;
my $tempfile = "$destfile.dirty";

-s $destfile and warn("$destfile already exists!\n"), exit(0);

my $sd = Spamd->new;

if (-e $tempfile) {
  warn("$tempfile already exists!\n");
} else {
  movemail $pobox, $tempfile;
}
if (-s $tempfile) {
  $sd->wait;
  process $tempfile, $destfile;
}
unlink $tempfile;
