#!/usr/athena/bin/perl

$root = '/afs/sipb.mit.edu/project/periodic-postings/news.answers';

$ENV{'HOME'} = $root;
$ENV{'PATH'} = "$root:$ENV{'PATH'}";

$folder = 'archive';
$outfile = '-';
$save_size = 1024;
$warn_size = 10240;

$usage = "Usage: $0 [ -d ] [ -v ] [ -f folder ] [ -o outfile ] [ message [ ... ] ]";

$from_line_regexp = "From .*\n";
$header_field_regexp = "[^ ]+:.*\n([ \t]+.*\n)*";
$newsgroups_line_regexp = "Newsgroups: .*\n";
$answers_newsgroups_line_regexp = "Newsgroups: .*\\.answers\\b.*\n";

$begin_inclusion_regexp = "\\s*(\\[begin reference inclusion(\\.|)\\]|\\(Begin form letter(\\.|)\\))\\s*\n";
$end_inclusion_regexp = "\\s*(\\[end reference inclusion(\\.|)\\]|\\(End form letter(\\.|)\\))\\s*\n";

while (($_ = $ARGV[0]) && /^-/) {
    shift;
    if (/^-d$/) {
	$debug++;
	next;
    }
    if (/^-v$/) {
	$verbose++;
    }
    elsif (/^-root$/) {
	if (! ($root = shift)) {
	    die "Missing argument to $_ option.\n$usage\n";
	}
	next;
    }
    elsif (/^-f$/) {
	if (! ($folder = shift)) {
	    die "Missing argument to $_ option.\n$usage\n";
	}
	next;
    }
    elsif (/^-o/) {
	if (! ($outfile = shift)) {
	    die "Missing argument to $_ option.\n$usage\n";
	}
	next;
    }
    else {
	die "Unknown option $_.\n$usage\n";
    }
}

if (@ARGV) {
    @files = @ARGV;
}
else {
    opendir(FOLDER, "$root/Mail/$folder") || 
	die "Opening $root/Mail/$folder: $!.\n";

    @files = readdir(FOLDER);

    closedir(FOLDER);
}

while ($file = shift @files) {
    next if ($file !~ /^[0-9]+$/);
    print STDERR "Doing $file\n" if (($debug > 1) || ($verbose > 1));
    $backed_up = undef;
    &maybe_delete_body($file);
    &maybe_strip_inclusions($file);
    if (($size = (-s "$root/Mail/$folder/$file")) > $warn_size) {
	warn "File $file has size $size.\n";
    }
}

sub maybe_delete_body {
    local($msg) = @_;
    local($*) = 0;
    local($/);
    local($text, $end_text, $header);
    local($file) = "$root/Mail/$folder/$msg";
    local($delfile) = "$root/Mail/$folder/.#$msg";
    local($mainheader, $auxheader);
    local($doit);

    open(MSG, $file) ||
	die "Opening $msg: $!.\n";

    $/ = "\n\n";

    $mainheader = <MSG>;
    $auxheader = <MSG>;

    $/ = undef;

    $text = <MSG>;

    close(MSG) || die "Error closing $file after read: $!.\n";

    if ($mainheader =~ /(^|\n)$answers_newsgroups_line_regexp/i) {
	$header = $mainheader;
	$text = $auxheader . $text;
	$doit++;
    }
    elsif (($auxheader =~ /^$header_field_regexp/) &&
	   ($auxheader =~ /(^|\n)$answers_newsgroups_line_regexp/i)) {
	$header = $mainheader . $auxheader;
	$doit++;
    }

    if ($doit) {
	$end_text = substr($text, $save_size);
	if ($end_text) {
	    $text = substr($text, 0, $save_size);
	    if (($text !~ /\n$/) && ($end_text =~ /.*\n/)) {
		$text .= $&;
	    }
	    ($text .= "\n") if ($text !~ /\n$/);
	    $text .= "[remainder of text deleted by $0]\n";

	    if ($debug) {
		print STDERR "Would trim $msg.\n";
	    }
	    else {
		open(MSG, ">$file.new") ||
		    die "Error opening $file.new for write: $!.\n";
		print(MSG $header, $text) || 
		    die "Error writing to $file.new: $!.\n";
		close(MSG) || 
		    die "Error closing $file.new: $!.\n";
		if (! $backed_up++) {
		    rename($file, $delfile) || 
			die "Error renaming $file: $!.\n";
		}
		rename("$file.new", $file) ||
		    die "Error renaming $file.new: $!.\n";
		if ($verbose) {
		    print STDERR "Trimmed $msg.\n";
		}
	    }
	}
    }
}

sub maybe_strip_inclusions {
    local($msg) = @_;
    local($file) = "$root/Mail/$folder/$msg";
    local($delfile) = "$root/Mail/$folder/.#$msg";
    local($text);
    local($deleted) = 0;
    local($replace_string) = "[inclusion deleted by $0]";
    local($flag_string) = "xx$$xx";
    local($in_inclusion);

    open(MSG, $file) ||
	die "Opening $msg: $!.\n";

    while (<MSG>) {
	# I'm not using the ".." operator here because I'm not sure
 	# what it'll do if the second half of it never matches in a
 	# file.
	if (/$begin_inclusion_regexp/io) {
	    $in_inclusion++;
	}
	if ($in_inclusion) {
	    $deleted++;
	    $text .= "$flag_string\n";
	    if (/$end_inclusion_regexp/io) {
		$in_inclusion = undef;
	    }
	    next;
	}
	$text .= $_;
    }

    close(MSG) || die "Error closing $file after read: $!.\n";

    if ($deleted) {
	$text =~ s/(^|\n)($flag_string\n)+/$1$replace_string\n/gio;
	if ($debug) {
	    print STDERR "Would remove inclusions from $msg.\n";
	}
	else {
	    open(MSG, ">$file.new") ||
		die "Error opening $file.new for write: $!.\n";
	    print(MSG $text) || 
		die "Error writing to $file.new: $!.\n";
	    close(MSG) || 
		die "Error closing $file.new: $!.\n";
	    if (! $backed_up++) {
		rename($file, $delfile) || 
		    die "Error renaming $file: $!.\n";
	    }
	    rename("$file.new", $file) ||
		die "Error renaming $file.new: $!.\n";
	    if ($verbose) {
		print STDERR "Removed inclusions from $msg.\n";
	    }
	}
    }
}
