#!/usr/athena/bin/perl

push(@INC,
     '/afs/sipb.mit.edu/project/periodic-postings/faq_server',
     '/afs/sipb.mit.edu/project/foo-server/picayune/mail-server',
     '/afs/gza.com/development/mail-server');

require 'header.pl';

while (($_ = $ARGV[0]) && /^-/) {
    shift;
    if (/^-debug$/) {
	$debug++;
	next;
    }
    else {
	die "Unknown option $_.\n";
    }
}

while ($msg = shift @ARGV) {
    if ($debug > 1) {
	print STDERR "Doing $msg.\n";
    }

    if (! open(MSG, $msg)) {
	warn "Opening $msg: $!.\n";
	next;
    }

    $nextline = <MSG>;

    ($nextline = undef) if ($nextline =~ /^From /);

    ($nextline, %headers) = &header'parse($nextline, 'MSG');

    close(MSG);

    if (! ($date = &header'field_value('date', %headers))) {
	warn "No Date field in $msg.\n";
	next;
    }

    if ($dates{$date}) {
	&check_dup($dates{$date}, $msg);
	next;
    }

    $dates{$date} = $msg;
}

sub check_dup {
    local($old, $new) = @_;
    local($sum1, $sum2);

    $sum1 = &body_checksum($old);
    $sum2 = &body_checksum($new);

    if (! ($sum1 && $sum2)) {
	return;
    }

    if ($sum1 == $sum2) {
	if (! $debug) {
	    print "Removing $new, duplicate of $old.\n";
	    if (! unlink($new)) {
		warn "Error removing $new: $!.\n";
	    }
	}
	else {
	    print "Would remove $new, duplicate of $old.\n";
	}
	return;
    }

    print "Possible duplicates: $old and $new.\n";
}
    
sub body_checksum {
    local($msg) = @_;
    local($checksum);
    local($text);
    local($*) = undef;

    if (! open(MSG, $msg)) {
	warn "Error opening $msg: $!.\n";
	return undef;
    }

    while (<MSG>) {
	last if /^$/;
    }

    if ($_ ne "\n") {
	warn "EOF before end of header in $msg.\n";
	close(MSG);
	return undef;
    }

    $text = $_;

    while (<MSG>) {
	$text .= $_;
    }

    close(MSG);

    $text =~ s/^\n+//;
    $text =~ s/\n\n+$/\n/;

    if (! $text) {
	warn "Empty body in $msg.\n";
	close(MSG);
	return undef;
    }

    $checksum = unpack("%16C*", $text);

    return($checksum);
}

