#!/usr/athena/bin/perl

push(@INC, '/rtfm/mserv', 
     '/afs/sipb.mit.edu/project/periodic-postings/faq_server',
     '/afs/gza.com/development/mail-server');

require "lib.pl";
require 'header.pl';

$dirs = "/rtfm/ftp/pub/usenet-by-group /rtfm/ftp/pub/usenet-by-hierarchy";
$dirprefixpat = "^/rtfm/ftp/pub/usenet-";
$debug = 0;
$domail = 0;
$report = 0;
$prompt = 0;
$check_multiples = 0;
$modified = 0;
$cache = 1;
$new = 0;
$read = 1;
$save = 1;
$sort = 1;
$reformat = 1;
$dbdir = "../data";
$delete = 1;
$checksub = 1;
$checkfrom = 1;
$checkreply = 1;
$checkng = 1;
$checkarc = 1;
$checkmiss = 1;
$checkfreq = 1;
$valid = 1;

foreach $dir ('/usr/ucb', '/usr/bin') {
    if ($ENV{'PATH'} !~ m,(^|:)$dir($|:),) {
	$ENV{'PATH'} .= ":$dir";
    }
}

$usage = "Usage: $0 [ -new ... ] [ -days num ] [ -nocache ] [ -noread ] 
        [ -nosave ] [ -nosort ] [ -noreformat ] [ -nodelete ] [ -report ] 
        [ -nochecksub ] [ -nocheckfrom ] [ -nocheckreply ] [ - nocheckng ]
        [ -nocheckarc ] [ -nocheckmiss ] [ -nocheckfreq ] [ -novalid ]
        [ -repfile filename ] [ -mail ] [ -prompt ] [ -start num ] [
	-end num ] [ -num num ] [ -debug ] [ -db filename ] [ -dbdir
	directory ] [ -faqdirs directories ]\n";

while ($_ = $ARGV[0], /^-/) {
    shift;
    if (/^-new$/) {
	$new++;
	next;
    }
    elsif (/^-days$/) {
        (@ARGV > 0) || die "Missing argument to \"-days\".\n$usage";
        $new += shift;
        next;
    }
    elsif (/^-nocache$/) {
	$cache = 0;
	next;
    }
    elsif (/^-noread$/) {
	$read = 0;
	next;
    }
    elsif (/^-nosave$/) {
	$save = undef;
	next;
    }
    elsif (/^-nosort$/) {
	$sort = undef;
	next;
    }
    elsif (/^-noreformat$/) {
	$reformat = undef;
	next;
    }
    elsif (/^-nodelete$/) {
	$delete = undef;
	next;
    }
    elsif (/^-nochecksub$/) {
	$checksub = undef;
	next;
    }
    elsif (/^-nocheckfrom$/) {
	$checkfrom = undef;
	next;
    }
    elsif (/^-nocheckreply$/) {
	$checkreply = undef;
	next;
    }
    elsif (/^-nocheckng$/) {
	$checkng = undef;
	next;
    }
    elsif (/^-nocheckarc$/) {
	$checkarc = undef;
	next;
    }
    elsif (/^-nocheckmiss$/) {
	$checkmiss = undef;
	next;
    }
    elsif (/^-nocheckfreq$/) {
	$checkfreq = undef;
	next;
    }
    elsif (/^-novalid$/) {
	$valid = undef;
	next;
    }
    elsif (/^-report$/) {
	$report++;
	next;
    }
    elsif (/^-repfile$/) {
	(@ARGV > 0) || die "Missing argument to \"-repfile\".\n$usage";
	$repfile = shift;
	next;
    }
    elsif (/^-mail$/) {
	$domail++;
	next;
    }
    elsif (/^-prompt$/) {
	$prompt++;
	next;
    }
    elsif (/^-multiples$/) {
	$check_multiples++;
	next;
    }
    elsif (/^-start$/) {
	(@ARGV > 0) || die "Missing argument to \"-start\".\n$usage";
	$minrecord = shift;
	next;
    }
    elsif (/^-end$/) {
	(@ARGV > 0) || die "Missing argument to \"-end\".\n$usage";
	$maxrecord = shift;
	next;
    }
    elsif (/^-num$/) {
	(@ARGV > 0) || die "Missing argument to \"-num\".\n$usage";
	$minrecord = $maxrecord = shift;
    }
    elsif (/^-debug$/) {
	$debug++;
	next;
    }
    elsif (/^-db$/) {
	(@ARGV > 0) || die "Missing argument to \"-db\".\n$usage";
	$dbfile = shift;
	next;
    }
    elsif (/^-dbdir$/) {
	(@ARGV > 0) || die "Missing argument to \"-dbdir\".\n$usage";
	$dbdir = shift;
	next;
    }
    elsif (/^-faqdirs$/) {
	(@ARGV > 0) || die "Missing argument to \"-faqdirs\".\n$usage";
	$dirs = shift;
	next;
    }
    else {
	die "Unknown argument \"$_\".\n$usage";
    }
}

$dirpat = "^(" . join('|', split(' ', $dirs)) . ")";

if ($prompt || $check_multiples) {
    local($oldselect);

    open(PROMPTIN, "/dev/tty") || die "Opening /dev/tty: $!.\n";
    open(PROMPTOUT, ">/dev/tty") || die "Opening /dev/tty for write: $!.\n";
    $oldselect = select(PROMPTOUT); $| = 1; select($oldselect);
}
else {
    open(PROMPTIN, "/dev/null") || die "Opening /dev/null: $!.\n";
    open(PROMPTOUT, ">&STDERR") || die "Dup'ing PROMPTOUT: $!.\n";
}

if ($debug) {
    $oldselect = select(STDOUT);
    $| = 1;
    select($oldselect);
}

$dbdir =~ s,(.)/$,$1,;

($dbfile = "$dbdir/list") if (! $dbfile);

(@foo = stat($dbfile)) || die "stat'ing \"$dbfile\": $!.\n";
$listtime = $foo[9];

# check all the files you need to open to make sure you
# can open them before you put in several hours of work.

$find_file = "$dbdir/list";
open (FIND, $find_file) || die "Cannot open list\n";
close (FIND);

$find_file = "$dbdir/list.aux";
open (FIND, $find_file) || die "Cannot open list.aux\n";
close (FIND);

$find_file = "$dbdir/list.old";
open (FIND, $find_file) || die "Cannot open list.old\n";
close (FIND);

$find_file = "$dbdir/list.aux.old";
open (FIND, $find_file) || die "Cannot open list.aux.old\n";
close (FIND);

$find_file = "$dbdir/find.new";
open (FIND, $find_file) || die "Cannot open find.new\n";
close (FIND);

$find_file = "$dbdir/find.cache";
open (FIND, $find_file) || die "Cannot open find.cache\n";
close (FIND);

if ($repfile) {
    open(REPORT, ">$repfile") || die "Opening $repfile for write: $!.\n";
    close(REPORT);
}

print "Reading... " if ($debug);

@records = &lopip'read_data($dbfile);

($minrecord = 0) if (! defined($minrecord));
($maxrecord = $#records) if (! defined($maxrecord));
$goodones = $maxrecord - $minrecord + 1;

# Reformat all records to make sure that all are correctly formatted.
# 
# Remember that reformat_entry dies on a malformatted entry, so we want that
# to happen now, if necessary, rather than after a whole lot of work
# (interactive or otherwise) has been done on other entries in the list).

if ($reformat) {
    print "reformatting... " if ($debug);

    for ($i = $minrecord; $i <= $maxrecord; $i++) {
	($records[$i] = &lopip'reformat_entry($records[$i])) ||
	    die "Reformat failed.\n";
    }
}

printf("%d records.\n", @records + 0) if ($debug);

@wrongsub = ();
@wrongfrom = ();
@wrongng = ();
@no_archive_name = ();
@missing = ();
@rightsub = ();
@rightfrom = ();
@rightng = ();

&get_file_list();

print "Checking...\n" if ($debug);

$SIG{'INT'} = 'sigint';

sub sigint {
    warn "SIGINT (interrupt) received.  Stopping check after current entry.\n";
    $signal_received++;
}

for ($i = $minrecord; $i <= $maxrecord; $i++) {
    last if ($signal_received);

    print "$i " if ($debug && ($i % 10 == 0));

    $foundfile = &find_file($i);

    if (! $foundfile) {
	printf("[%s] Missing %s/%s\n", 
	       &lopip'field_value($records[$i], 'id'),
	       &lopip'field_value($records[$i], 'newsgroups'),
	       &lopip'field_value($records[$i], 'subject'))
	    if ($debug > 1);
	$goodones--;
	push(@missing, $i);
	&error($i, "\t(a) The posting does not exist, or
\t(b) The Subject: field in the entry is incorrect, or
\t(c) The entry is new enough in the list that my archiver
\t    hasn't caught the posting yet (i.e. there's really
\t    nothing wrong), or
\t(d) The posting is in a distribution that my site does
\t    not receive and therefore cannot be archived by my
\t    workstation.");
    }
    if ($read) {
	&check($i, $foundfile);
    }
}

$SIG{'INT'} = 'DEFAULT';

print "\n" if ($debug);

 if ($report) {
     if ($repfile) {
	 open(REPORT, ">$repfile") || die "Opening $repfile for write: $!.\n";
     }
     else {
	 $repfile = "-";
	 open(REPORT, ">&STDOUT") || die "Dup'ing stdout: $!.\n";
     }

     printf(REPORT "%d entries total, %d valid.\n\n",
	    $maxrecord - $minrecord + 1, $goodones)
	 || (unlink($repfile), die "Writing to $repfile: $!.\n");

     if (@wrongsub > 0) {
	 printf(REPORT "Wrong subject (%d):\n\n", @wrongsub + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @wrongsub; $i++) {
	     print(REPORT "[", 
		   &lopip'field_value($records[$wrongsub[$i]], 'id'),
		   "] $records[$wrongsub[$i]]")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	     print REPORT "Actual Subject: $rightsub[$i]\n\n"
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     if (@wrongfrom > 0) {
	 printf(REPORT "Wrong from (%d):\n\n", @wrongfrom + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @wrongfrom; $i++) {
	     print(REPORT "[",
		   &lopip'field_value($records[$wrongfrom[$i]], 'id'),
		   "] $records[$wrongfrom[$i]]")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	     print REPORT "Actual From: $rightfrom[$i]\n\n"
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     if (@wrongreply > 0) {
	 printf(REPORT "Wrong Reply-To (%d):\n\n", @wrongreply + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @wrongreply; $i++) {
	     print(REPORT "[",
		   &lopip'field_value($records[$wrongreply[$i]], 'id'),
		   "] $records[$wrongreply[$i]]")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	     print REPORT "Actual Reply-To: $rightreply[$i]\n\n"
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     if (@wrongng > 0) {
	 printf(REPORT "Wrong newsgroups (%d):\n\n", @wrongng + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @wrongng; $i++) {
	     print(REPORT "[",
		   &lopip'field_value($records[$wrongng[$i]], 'id'),
		   "] $records[$wrongng[$i]]")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	     print REPORT "Actual Newsgroups: $rightng[$i]\n\n"
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     if (@no_archive_name > 0) {
	 printf(REPORT "No Archive-name line (%d):\n\n", @no_archive_name + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @no_archive_name; $i++) {
	     print(REPORT "[",
		   &lopip'field_value($records[$no_archive_name[$i]], 'id'),
		   "] $records[$no_archive_name[$i]]\n")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     if (@missing > 0) {
	 printf(REPORT "Missing (%d):\n\n", @missing + 0)
	     || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 for ($i = 0; $i < @missing; $i++) {
	     print(REPORT "[",
		   &lopip'field_value($records[$missing[$i]], 'id'),
		   "] $records[$missing[$i]]\n")
		 || (unlink($repfile), die "Writing to $repfile: $!.\n");
	 }
     }

     close(REPORT);
 }

if ($save) {
    if ($sort) {
	print "Sorting...\n" if ($debug);
	&lopip'sort_data(*records);
    }

    print "Saving...\n" if ($debug);
    $outfile = $dbfile;
    (@foo = stat($dbfile)) || die "stat'ing \"$dbfile\": $!.\n";
    $newtime = $foo[9];
    if ($newtime != $listtime) {
	warn "Database file has been modified.\n
Writing to $dbfile.conflict instead.\n";
	$outfile = "$dbfile.conflict";
    }
    if (! &lopip'write_data($outfile, *records)) {
	if (! &lopip'write_data("/tmp/lopip.$$", *records)) {
	    die "Database write to $outfile failed.\n";
	}
	else {
	    die "Database write to $outfile failed; saved in /tmp/lopip.$$.\n";
	}
    }
    exit 1 if ($outfile eq "$dbfile.conflict");
}

sub check {
    local($num, $file) = @_;
    local($f, $n, @s, $an, $naan, @s_regexp);
    local($summary);
    local($f2, $n2, $s2);
    local($errors) = "";
    local($date);
    local($archive_name, $news_answers_archive_name);
    local($next_line);
    local(%headers, %auxheaders);
    local($fhandle);
    local($posting_reply_to, $lopip_reply_to);
    local($freq, $lopip_freq);

    if (&lopip'is_special($records[$num], 'subject')) {
	print "Skipping special $num\n" if ($debug);
	return;
    }

    $f = &lopip'field_value($records[$num], 'from');
    $n = &lopip'field_value($records[$num], 'newsgroups');
    @s_regexp = @s = &lopip'field_values($records[$num], 'subject');
    $an = &lopip'field_value($records[$num], 'archive-name');
    $naan = &lopip'field_value($records[$num], 'news-answers-archive-name');
    $lopip_freq = &lopip'field_value($records[$num], 'frequency');

    for (@s_regexp) {
	s/(\W)/\\$1/g;
	s/\\\*/\.\*/g;
	$_ = "^" . $_ . "\$"; # was "$"
    }

    if ($file) {
	if ($file =~ /\.Z$/) {
	    if (! ($fhandle = &do_zcat($file))) {
		warn "Zcat of \"$file\" failed: $!.\n";
		return;
	    }
	}
	else {
	    if (! open(ARTICLE, $file)) {
		warn "Opening \"$file\": $!.\n";
		return;
	    }
	    $fhandle = "ARTICLE";
	}

	$last_lf = 0;

	($next_line, %headers) = &header'parse(undef, "main'$fhandle");
	($next_line, %auxheaders) = &header'parse($next_line, "main'$fhandle");
	close($fhandle);

	$f2 = &header'field_value('from', %headers);
	$s2 = &header'field_value('subject', %headers);
	$n2 = &header'field_value('newsgroups', %headers);
	$date = &header'field_value('date', %headers);
	$archive_name = &header'field_value('archive-name', %auxheaders) ||
	    &header'field_value('archive-name', %headers);
	$news_answers_archive_name = 
	    &header'field_value('news-answers-archive-name', %auxheaders) ||
	    &header'field_value('news-answers-archive-name', %headers);
	$summary = &header'field_value('summary', %headers);
	$freq =
	    &header'field_value('posting-frequency', %auxheaders) ||
	    &header'field_value('posting-frequency', %headers);

	if (! $f2) {
	    warn "Missing From: - field in \"$file\".\n";
	    return;
	}
	if (! $n2) {
	    warn "Missing Newsgroups: - field in \"$file\".\n";
	    return;
        }
	if (! $s2) {
	    warn "Missing Subject: - field in \"$file\".\n";
            return;
        }

	if ($summary && 
	    &lopip'field_value($records[$num], "Summary") ne $summary) {
	    ($records[$num] = &lopip'set_field($records[$num], "Summary", 
					       $summary, 1)) ||
		die "set_field of Summary failed.\n";
	}

	if ($date && (&lopip'field_value($records[$num], "Date") ne $date)) {
	    ($records[$num] = &lopip'set_field($records[$num], "Date", 
					       $date, 1)) ||
		die "set_field of Date failed.\n";
	    &posting_record($num, $date);
	}

	if ($archive_name && ($an ne $archive_name)) {
	    ($records[$num] = &lopip'set_field($records[$num], "Archive-name",
					       $archive_name, 1))
		|| die "set_field of Archive-name failed.\n";
	    $an = $archive_name;
	}

	if ($news_answers_archive_name &&
	    ($naan ne $news_answers_archive_name)) {
	    ($records[$num] = &lopip'set_field($records[$num],
					       "News-answers-archive-name",
					       $news_answers_archive_name, 1)) ||
			die "set_field of News-answers-archive-name failed.\n";
	    $naan = $news_answers_archive_name;
	}

	$records[$num] = 
	    &set_secondary_archive_names($records[$num], *headers,
					 *auxheaders);

	if ($freq && ($freq ne $lopip_freq) &&
            $checkfreq &&
	    !&lopip'is_valid($records[$num], 'frequency')) {
	    print "Wrong Frequency in $num\n" if ($debug);
	    if (&confirm('Frequency', $lopip_freq, $freq, $num, 0)) {
		$lopip_freq = $freq;
	    }
	}
	    
	if (($lopip_reply_to = &lopip'field_value($records[$num], 
						  'reply-to')) &&
             $checkreply && 
	    ($posting_reply_to = &header'field_value('reply-to', %headers)) &&
	    ($lopip_reply_to ne $posting_reply_to) &&
	    (! &lopip'is_valid($records[$num], 'reply-to'))) {
	    print "Wrong Reply-To in $num\n" if ($debug);
	    if (! &confirm('Reply-To', $lopip_reply_to, $posting_reply_to,
			   $num, 0)) {
		$errors .= "\t\"Reply-To:\" line in entry does not match \"Reply-To:\" line
\tin posting (\"$lopip_reply_to\" vs. \"$posting_reply_to\").\n";
		push(@wrongreply, $num);
		push(@rightreply, $posting_reply_to);
	    }
	    else {
		$lopip_reply_to = $posting_reply_to;
	    }
	}

	if (($f ne $f2) && 
            $checkfrom &&  
            (&namestrip($f) ne &namestrip($f2)) && 
            !&lopip'is_valid($records[$num], "From")) {
	    print "Wrong From in $num\n" if ($debug);
	    if (! &confirm("From", $f, $f2, $num, 0)) {
		$errors .= "\t\"From:\" line in entry does not match \"From:\" line
\tin posting (\"$f\" vs. \"$f2\").\n";
		push(@wrongfrom, $num);
		push(@rightfrom, $f2);
	    }
	    else {
		$f = $f2;
	    }
	}
	if (($n ne $n2) && 
            $checkng && !&lopip'is_valid($records[$num], "Newsgroups")) {
	    print("Wrong Newsgroups in $num (", 
		  &changed_ngs($n, $n2), ")\n") if ($debug);
	    if (! &confirm("Newsgroups", $n, $n2, $num, 0)) {
		$errors .= "\t\"Newsgroups:\" line in entry does not match \"Newsgroups:\" line
\tin posting (\"$n\" vs. \"$n2\").\n";
		push(@wrongng, $num);
		push(@rightng, $n2);
	    }
	    else {
		$n = $n2;
	    }
	}
	if (!&matches_oneof($s2, @s_regexp) && $checksub && 
	    !&lopip'is_valid($records[$num], "Subject")) {
	    print "Wrong Subject in $num\n" if ($debug);
	    if (! &multi_confirm("Subject", $s2, $num, 0, @s)) {
		$errors .= "\t\"Subject:\" lines in entry does not match \"Subject:\" line
\tin posting (\"$s[0]\" vs. \"$s2\").\n";
		push(@wrongsub, $num);
		push(@rightsub, $s2);
	    }
	}
    }
    if (($n =~ /news\.answers/) && (! ($an || $naan)) &&
        $checkarc && 
	!&lopip'is_valid($records[$num], "Newsgroups")) {
	print "No Archive-name in news.answers posting $num\n"
	    if ($debug);
	if (! &confirm("Archive-name", $num, 1)) {
	    $n2 = $n;
	    $n2 =~ s/(^|,)news\.answers($|,)/$1$2/;
	    $n2 =~ s/,,/,/;
	    $n2 =~ s/,$//;
	    if (! &confirm("Newsgroups", $n, $n2, $num, 0)) {
		$errors .= "\tNo \"Archive-name:\" line in posting that is supposed to appear\n\tin news.answers.\n";
		push(@no_archive_name, $num);
	    }
	}
    }
    if ($errors ne "") {
	&error($num, $errors);
	$goodones--;
    }
}

sub confirm {
    local($field, $old, $new, $number, $inaux);
    local($do);
    local($replace);

    if (@_ == 5) {
	($field, $old, $new, $number, $inaux) = @_;
	$replace++;
    }
    elsif (@_ == 3) {
	($field, $number, $inaux) = @_;
    }
    else {
	die "confirm called with wrong number of arguments";
    }

    return 0 if (! $prompt);

    if ($replace) {
	print PROMPTOUT "Replace \"$old\" \nwith \"$new\" in\n$records[$number]\n? ";
	$do = <PROMPTIN> =~ /^y/i;
    }
    else {
	print PROMPTOUT "Set \"$field\" field in\n$records[$number]\n(give new value or [CR])? ";
	$do = <PROMPTIN>;
	chop $do;
    }

    if ($replace) {
	if ($do) {
	    ($records[$number] = &lopip'set_field($records[$number], $field, 
						  $new, $inaux)) ||
		die "set_field of $field failed.\n";
	    print PROMPTOUT "Mark it important? ";
	    if (<PROMPTIN> =~ /^y/i) {
		$records[$number] = 
		    &lopip'mark_important($records[$number], $field);
	    }
	    return 1;
	}
	elsif ($valid) {
	    print PROMPTOUT "Mark it valid? ";
	    if (<PROMPTIN> =~ /^y/i) {
		$records[$number] = &lopip'mark_valid($records[$number], 
						      $field);
		return 1;
	    }
	}
    }
    else {
	if ($do =~ /./) {
	    ($records[$number] = &lopip'set_field($records[$number], $field, 
						  $do, $inaux)) ||
		die "set_field of $field failed.\n";
	    print PROMPTOUT "Mark it important? ";
	    if (<PROMPTIN> =~ /^y/i) {
		$records[$number] = 
		    &lopip'mark_important($records[$number], $field);
	    }
	    return 1;
	}
    }

    0;
}

sub multi_confirm {
    local($field, $new, $number, $inaux, @old) = @_;

    return 0 if (! $prompt);

    print PROMPTOUT "Replace one of these:\n";
    for (0..$#old) {
	 print PROMPTOUT " ", $_ + 1, ". ", $old[$_], "\n";
     }
    print PROMPTOUT "with \"$new\" in\n$records[$number]\n(enter number to replace, 'a' to add it as a new one, anything else to\nleave things the same)? ";
    $do = <PROMPTIN>;

    if ($do =~ /^a/i) {
	($records[$number] = &lopip'new_field($records[$number], $field, $new))
	    || die "net_field of $field failed.\n";
	print PROMPTOUT "Mark it important? ";
	if (<PROMPTIN> =~ /^y/i) {
	    $records[$number] = &lopip'mark_important($records[$number],
						      $field, @old + 1);
	}
	return 1;
    }
    elsif ($do =~ /^Q$/) {
        $signal_received++;        
    }
    elsif ($do =~ /^[0-9]+$/) {
	if (($do > 0) && ($do <= @old)) {
	    ($records[$number] = 
	     &lopip'set_field_n($records[$number], $field, $new, 0, $do)) ||
		 die "set_field_n of $field occurrence $do failed.\n";
	    print PROMPTOUT "Mark it important? ";
	    if (<PROMPTIN> =~ /^y/i) {
		$records[$number] = &lopip'mark_important($records[$number],
							  $field, $do);
	    }
	    return 1;
	}
	else {
	    warn "Doing nothing because the number you entered was out of range.\n";
	}
    }
    
    if ($valid) {
        print PROMPTOUT "Mark it valid (actually, this prompt is a placebo to make this look like\nall the other times you're prompted for stuff)? ";
        $do = <PROMPTIN>;
    }

    0;
}

sub error {
    local($num, $error) = @_;
    local($rec) = $records[$num];
    local($*) = 1;
    local($addr) = &lopip'field_value($records[$num], "Reply-To") || 
	&lopip'field_value($records[$num], 'from');
    local($subject);

    return if (! $domail);

    $rec =~ s/^/\t/;
    $rec =~ s/\n/\n\t/g;

    if ($debug > 1) {
	open(MAIL, "|cat");
    }
    else {
	open(MAIL, "|/usr/lib/sendmail -oi -t");
    }

    $subject = sprintf("\"%s\" in %s", 
		       &lopip'field_value($records[$num], 'subject'),
		       &lopip'field_value($records[$num], 'newsgroups'));

    print MAIL <<EOF;
From: \"Jonathan I. Kamens\" <jik@athena.mit.edu>
To: $addr
Subject: \"List of Periodic Informational Postings\" problem -- $subject

Greetings.  I have taken over from Rich Kulawiec the maintenance of
the \"List of Periodic Informational Postings\" that is posted
regularly in the Usenet newsgroups news.lists and news.answers.
As the title implies, his posting contains a list of the various
informational postings that appear periodically on the USENET.

As part of the maintenance of the list, I am trying to confirm the
validity of each entry in the list.  I am doing so by comparing the
entries in the list to the contents of the automatically generated
posting archive on rtfm.mit.edu [18.172.1.27], which is
accessible either via anonymous ftp in /pub/usenet or by mail, through
requests sent to mail-server@rtfm.mit.edu.

As far as I can determine, this entry:

$rec
is incorrect for the following reason:

$error

I would appreciate it if you would respond to this mail and indicate
to me whether or not there really is an error in the list, and if
there is, what I should do in order to correct it.  Note that it is
extremely important for the entry in the list to be EXACTLY correct,
including the From line exactly as it appears in the posting, and the
Subject line with correct capitalization.  Please let me know of ALL
corrections that need to be made.

Please be sure that I can tell which posting you are talking about
when you respond to this mail.

Thank you for your assistance.

Note that this message was generated automatically.

-- 
Jonathan Kamens						jik@MIT.Edu
MIT Information Systems/Athena		    Moderator, news.answers
	      (Send news.answers-related correspondence
		  to news-answers-request@MIT.Edu.)
EOF

    close(MAIL);
}

sub get_file_list {
    local($prefix, $newsgroup, $rest);
    local($start_time);
    local($find_file);
    local($new_opt);
    local($what);

    $start_time = time();
    if ($cache && (! $new) && (-f "$dbdir/find.cache") &&
	(-M "$dbdir/find.cache" < 1)) {
	print "Parsing find cache..." if ($debug);
	$find_file = "$dbdir/find.cache";
	open(FIND, $find_file) || die "Opening $find_file: $!.\n";
	$what = $find_file;
    }
    else {
	print "Running \"find\" on archive and parsing..." if ($debug);
	if ($new) {
	    $find_file = "$dbdir/find.new";
	    $new_opt = "-mtime -$new";
	}
	else {
	    $find_file = "$dbdir/find.cache";
	}
	unlink($find_file);
	$what = "find $dirs -type f \! -name '.#*' $new_opt -print | tee $find_file";
	open(FIND, "$what|") || die "Running $what: $!.\n";
    }
    while (<FIND>) {
	if (! m,$dirpat/([^/]+)/(.+\n),) {
	    warn "File doesn't match expected pattern:\n\t$_";
	    next;
	}
	$prefix = $1;
	$newsgroup = $2;
	$rest = $3;
	$newsgroup =~ s,\.,/,g;
	($rest =~ m,(.*)/,) && ($newsgroup .= "/" . $1);
	$filelist{$newsgroup} .= $_;
    }
    close(FIND) || die "Closing $what: $!.\n";
    printf("done (%d seconds).\n", time() - $start_time)
	if ($debug);
}

sub subject_to_file_regexp {
    local($subject) = @_;
    local($during, $before, $after);

    $subject = &lopip'protect($subject);
    $subject =~ s/([^A-Za-z0-9*])/\\$1/g;
    $subject =~ s/(^|[^*])\*($|[^*])/$1.*$2/g;

    while ($subject =~ /\*{2,}/) {
	$during = "";
	$before = $`;
	$after = $';
	for (1..length($&)) {
	    $during .= "\\*";
	}
	$subject = $before . $during . $after;
    }
    $subject = '/' . $subject . '(\.Z|$)';
}

sub short_name {
    local($file) = @_;
    local($dir, $name);

    if (! (($dir, $name) = ($file =~ m,(.*/)([^/]+)$,))) {
	$dir = "";
	$name = $file;
    }

    $name =~ s/([A-Za-z])[A-Za-z]+/$1/g;

    $dir . $name;
}

sub filename_of {
    local($file) = @_;

    $file =~ m,.*/([^/]+)$,;

    $1;
}

sub subjects_to_file_regexps {
    local(@subjects) = @_;
    local(@short_subjects);
    local($s, $i);

    @short_subjects = ();
    for (@subjects) {
	($s = $_) =~ s/([A-Za-z])[A-Za-z]+/$1/g;
	push(@short_subjects, $s);
    }
    push(@subjects, @short_subjects);
    for ($i = 0; $i < @subjects; $i++) {
	$subjects[$i] = &subject_to_file_regexp($subjects[$i]);
    }
    @subjects;
}

sub matches_oneof {
    local($_, @regexps) = @_;
    local($regexp);

    for $regexp (@regexps) {
	if (m,$regexp,) {
	    return 1;
	}
    }
    0;
}

sub locate_in_file_list {
    local($newsgroup, $filename) = @_;
    local($ngprefix, $ngpat);
    local($*);
    local(@found);

    $* = 0;

    $filename =~ s,^/,,;

    ($ngprefix = $newsgroup) =~ s,\.,/,g;
    ($ngpat = $newsgroup) =~ s,([^\w.]),\\$1,g;

    if ($filename =~ s,(.*)/,,) {
	local($tmp) = $1;

	$ngprefix .= "/" . $tmp;

	$tmp =~ s/(\W)/\\$1/g;
	$ngpat .= "/" . $tmp;
    }

    $ngpat =~ s,\.,[/.],g;

    $* = 1;

    if (wantarray) {
	while ($filelist{$ngprefix} =~ m,$dirpat/$ngpat/$filename,ig) {
	    push(@found, $&);
	}
	@found;
    }
    else {
	if ($filelist{$ngprefix} =~ m,$dirpat/$ngpat/$filename,i) {
	    $&;
	}
	else {
	    undef;
	}
    }
}

sub remove_from_file_list {
    local($file) = @_;
    local($newsgroup);
    local($*) = 1;

    ($file =~ m,(.*)/,) || return;

    $newsgroup = $1;

    $newsgroup =~ s,$dirpat/,,;

    while ($newsgroup =~ s,^([^/]+)\.([^./]+)(/|$),$1/$2$3,) {}

    $file =~ s/(\W)/\\$1/g;

    $filelist{$newsgroup} =~ s/^$file\n//g;
}

# Takes a record number in the database and a newsgroup, and tries to
# find a file matching the record in the newsgroup by archive name.
# Returns a single match or undef.

sub find_file_by_archive_name {
    local($recnum, $newsgroup) = @_;
    local($id) = &lopip'field_value($records[$recnum], 'id');
    local($archive_name_dir, $archive_name, $foundfile);

    foreach $archive_name (&newsgroup_archive_names($newsgroup)) {
	next if (! ($archive_name = &lopip'field_value($records[$recnum],
						       $archive_name)));
	# We need to protect regular expression special characters in
 	# the archive name, so they won't screw up the search in
 	# locate_in_file_list.  However, if we just backslash all
 	# special characters in the entire archive name, we could run
 	# into a problem, because locate_in_file_list backslashes
 	# special characters up to the last slash in file names it's
 	# searching for; the net result of this is that archive names
 	# with slashes in them will have everything before the last
 	# slash quoted twice -- once here, and once again by
 	# locate_in_file_list.  To get around this problem, we split
 	# the archive name at the last slash and, quote the stuff
 	# after it, and then put it back together.
	if ($archive_name =~ s,(.*/),,) {
	    $archive_name_dir = $1;
	    $archive_name =~ s/(\W)/\\$1/g;
	    $archive_name = $archive_name_dir . $archive_name . '(\.Z|$)';
	}
	return $foundfile
	    if ($foundfile = &locate_in_file_list($newsgroup, $archive_name));
    }
    return undef;
}

# Takes a record number and does the following:
# 
# I. For each newsgroup it's posted in:
#    A. Tries to find the article by archive name, unless a
#       by-archive-name file has already been found.  If it succeeds
#       and there are no Single-Subjects, skips to step II.
#    B. Tries to find the article by all Subject lines.
#    C. Checks each Single-Subject.
#          a. Prints a warning for those that match when none of the
#             Subjects did.
#          b. Prints a warning for those that don't match at all when
#             a Subject did. 
#          c. Calls multiple_matches (which either prints a warning or
#             prompts to delete the extras) for those that match more
#             than one when oen of the Subjects matched.
# II. Returns the first by-archive-name file found, or the first
#     by-subject file found, or undef.

# Variables that are capitalized are initialized at the top of the
# function and never changed.

sub find_file {
    local($RECNUM) = @_;
    local(@NEWSGROUPS);
    local(@REGEXP_SUBJECT);
    local(@SINGLE_SUBJECTS, %SINGLE_SUBJECTS, @DONT_SUBJECTS);
    local($ID) = &lopip'field_value($records[$RECNUM], 'id');
    local($re_sub);
    local($by_archive_name, $by_subject, $retval);
    local(@subject_matches, @single_matches, @short_single_matches);
    local($i);
    local(%match_names);

    @NEWSGROUPS = split(/,/, 
			&lopip'field_value($records[$RECNUM], 'newsgroups'));

    @REGEXP_SUBJECT = 
	&subjects_to_file_regexps(&lopip'field_values($records[$RECNUM], 
						      'subject'),
				  &lopip'field_values($records[$RECNUM],
						      'archive-subject'));

    for (@SINGLE_SUBJECTS = &lopip'single_subjects($records[$RECNUM])) {
	$SINGLE_SUBJECTS{$_} = &subject_to_file_regexp($_);
	$SINGLE_SUBJECTS{$_,'short'} = 
	    &subject_to_file_regexp(&short_name($_));
    }

    @DONT_SUBJECTS = 
	&subjects_to_file_regexps(&lopip'dont_subjects($records[$RECNUM]));

    # End of initialization section

    newsgroup: foreach $newsgroup (@NEWSGROUPS) {
	if (! $by_archive_name) {
	    $by_archive_name = &find_file_by_archive_name($RECNUM, $newsgroup);
	    last newsgroup if ($by_archive_name &&
			       (! scalar(keys %SINGLE_SUBJECTS)));
	}

	@subject_matches = ();
	for $re_sub (@REGEXP_SUBJECT) {
	    for (&locate_in_file_list($newsgroup, $re_sub)) {
		push(@subject_matches, $_)
		    if (! &matches_oneof($_, @DONT_SUBJECTS));
	    }
	}
	$by_subject = $subject_matches[0] 
	    if (@subject_matches && (! $by_subject));

	$i = 1;
	for (@SINGLE_SUBJECTS) {
	    @single_matches = ();
	    @short_single_matches = ();
	    for (&locate_in_file_list($newsgroup, $SINGLE_SUBJECTS{$_})) {
		push(@single_matches, $_)
		    if (! &matches_oneof($_, @DONT_SUBJECTS));
	    }
	    for (&locate_in_file_list($newsgroup, 
				      $SINGLE_SUBJECTS{$_,'short'})) {
		push(@short_single_matches, $_)
		    if (! &matches_oneof($_, @DONT_SUBJECTS));
	    }
	    if ((! @subject_matches) && 
		(@single_matches || @short_single_matches)) {
		print "[$ID] Single-Subject \"$_\" matched when Subject(s) didn't (in $newsgroup).\n";
	    }
	    elsif (@subject_matches &&
		   (! (@single_matches || @short_single_matches))) {
		print "[$ID] Single-Subject \"$_\" didn't match when Subject(s) did (in $newsgroup).\n";
	    }
	    else {
		if (@short_single_matches && (! @single_matches)) {
		    print "[$ID] Single-Subject \"$_\" had short-name matches but no long-name matches (in $newsgroup).\n";
		}
		&multiple_matches($newsgroup,
				  &lopip'is_special($records[$RECNUM],
						    'Single-Subject', $i),
				  undef, @single_matches);
		&multiple_matches($newsgroup,
				  &lopip'is_special($records[$RECNUM],
						    'Single-Subject', $i),
				  1, @short_single_matches);
	    }
	}
	continue {
	    $i++;
	}
    }

    if ($retval = ($by_archive_name || $by_subject)) {
	print "[$ID] Found $retval.\n" if ($debug > 1);
    }
    $retval;
}

sub set_secondary_archive_names {
    local($record, *headers, *auxheaders) = @_;
    local(@ngs, $ngs, $ng);
    local($first, $rest);
    local($an);

    $ngs = &header'field_value('newsgroups', %headers);

    return if (! $ngs);

    @ngs = split(/,/, $ngs);

    return if (@ngs == 0);

    foreach $ng (@ngs) {
	# Replace periods with dashes
	$ng =~ s/\./-/g;
	# Capitalize first letter and add suffix
	if ($ng =~ /./) {
	    $first = $&;
	    $rest = $';
	    $first =~ tr/a-z/A-Z/;
	    $ng = $first . $rest . "-archive-name";
	}
	$an = &header'field_value($ng, %auxheaders) ||
	    &header'field_value($ng, %headers);

	if ($an) {
	    ($record = &lopip'set_field($record, $ng, $an, 1)) ||
		die "set_field of $ng failed.\n";
	}
    }

    $record;
}

sub newsgroup_archive_names {
    local($newsgroup) = @_;
    local($cname) = $newsgroup;

    $cname =~ s/\./-/g;
    $cname .= "-archive-name";

    if ($newsgroup =~ /\.answers$/) {
	return($cname, 'archive-name');
    }
    else {
	return($cname);
    }
}

sub do_zcat {
    local($file) = @_;
    local($pid);
    
    if (! defined($pid = open(ZCATREAD, "-|"))) {
	undef;
    }
    elsif ($pid) {
	"ZCATREAD";
    }
    else {
	open(STDERR, ">/dev/null");
	exec("/usr/bin/zcat", $file);
	die "Exec'ing \"zcat\": $!.\n";
    }
}

sub delete {
    local($file) = @_;
    local($dir, $name);

    return if (! $delete);

    ($dir, $name) = ($file =~ m,(.*)/([^/]+)$,);

    rename($file, "$dir/.#$name") ||
	warn "Renaming $file to $dir/.#$name: $!.\n";
}

# Newsgroup is the newsgroup we're searching in.
# Interactive is whether or prompt interactively for the save regexp
#   (if it's false, the newest file determines the save regexp).
# Is_short_names should be set if the matches are short-name matches.
# Matches is the list of matches.  It should be either all full-name
#   matches or all short-name matches.
# 
# The short-name matches for a subject should not be passed into
# multiple_matches until the long-name matches have been passed in in
# in a previous invocation.

sub multiple_matches {
    local($newsgroup, $interactive, $is_short_names, @matches) = @_;
    local(@todelete, $tokeep, @tokeep);
    local($stderr);
    local($name);
    local(@newmatches);
    local(%names);
    local($is_multiple);

    return if (! ($debug || $check_multiples));

    if ($prompt) {
	$stderr = "PROMPTOUT";
    }
    else {
	$stderr = "STDOUT";
    }

# This code could be much simpler if we assumed that the short-name
# files in the archive always had corresponding long-name files.
# Unfortunately, although that's correct most of the time, it isn't
# correct all of the time, so we have to jump through some hoops to
# make sure that short-name files that don't have associated long-name
# files are properly deleted.

# This code here is just to set $is_multiple as a hint, so that we can
# be a little bit less verbose below about what' kept, when possible.

    for (@matches) {
	$names{&filename_of($_)}++;
    }
    $is_multiple++ if (scalar(keys %names) > 1);

    if ($check_multiples && ($is_multiple || $is_short_names)) {
	for (@matches) {
	    $name = &filename_of($_);

	    if ($mkept{$name}) {
		m,$dirprefixpat(.*),;
		print PROMPTOUT "Kept $1\n" if ($is_multiple);
	    }
	    elsif ($mdeleted{$name} || $is_short_names) {
		m,$dirprefixpat(.*),;
		print PROMPTOUT "Removed $1\n";
		&delete($_);
		&remove_from_file_list($_);
	    }
	    else {
		push(@newmatches, $_);
	    }
	}
	@matches = @newmatches;
    }

    %names = ();
    for (@matches) {
	$names{&filename_of($_)}++;
    }

    if (scalar(keys %names) < 2) {

# We add all the ones we're keeping to %mkept, even though they're not
# multiple matches, because this function may get called with
# short-name matches for them, and the code in the "if
# ($check_multiples)" block above will delete any short-name matches
# that don't appear in %mkept.
# 
# This is, unfortunately, a bit of a memory hog, because it means that
# all the matches for any posting which has Single-Subject lines must
# appear in the %mkept array.  We might have to figure out a better
# way to do this later, if it turns out that it's too much of a hog.

	for (@matches) {
	    $mkept{&filename_of($_)}++;
	    $mkept{&filename_of(&short_name($_))}++;
	}
	return;
    }

    if ($debug || $check_multiples) {
	print $stderr "Found multiple matches:\n";
	for (@matches) {
	    m,$dirprefixpat(.*),;
	    print " ", $1, "\n";
	}
    }

    if ($check_multiples) {
	if ($interactive) {
	    if ($prompt) {
		print PROMPTOUT "Keep which one(s) (regexp) (\".\" for all, return for \"$default_tokeep\")\n(doesn't have to match short names corresponding to listed long names)? ";
		$tokeep = <PROMPTIN>;
		chop $tokeep;
		($tokeep = $default_tokeep) if ($tokeep eq "");
		$default_tokeep = $tokeep;
	    }
	    else {
		warn "Keeping all because running non-interactively.\n";
		$tokeep = ".";
	    }
	}
	else {
	    local($time) = 10000; # a really big number
	    local($name) = undef;
	    for (@matches) {
		if ((-f $_) && ((-M _) < $time)) {
		    $time = (-M _);
		    $name = $_;
		}
	    }
	    if ($name) {
		$name =~ s,.*/,/,;
		$name =~ s/(\W)/\\$1/g;
		$tokeep = $name;
	    }
	    else {
		$tokeep = ".";
	    }
	}
	if ($tokeep ne ".") {
	    for (@matches) {
		if (! ($mkept{&filename_of($_)} || eval "/$tokeep/")) {
		    &delete($_);
		    &remove_from_file_list($_);
		    $mdeleted{&filename_of($_)}++;
		    $mdeleted{&filename_of(&short_name($_))}++;
		}
		else {
		    m,$dirprefixpat(.*),;
		    push(@tokeep, $1);
		    $mkept{&filename_of($_)}++;
		    $mkept{&filename_of(&short_name($_))}++;
		}
		if ($@) {
		    warn "Malformed regexp $tokeep: $@.\n";
		}
	    }
	    print PROMPTOUT "Kept @tokeep\n";
	}
    }
}

sub changed_ngs {
    local($n1, $n2) = @_;
    local(%n1, %n2);
    local(@changes);

    %n1 = &ng_array($n1);
    %n2 = &ng_array($n2);

    for (keys %n1) {
	if ($n2{$_} < $n1{$_}) {
	    push(@changes, "deleted $_");
	}
	elsif ($n2{$_} > $n1{$_}) {
	    push(@changes, "added $_");
	}
	delete $n1{$_};
	delete $n2{$_};
    }

    for (keys %n2) {
	push(@changes, "added $_");
    }

    join(", ", @changes);
}

sub ng_array {
    local(@ngs) = split(/,/, $_[0]);
    local(%n);

    for (@ngs) {
	$n{$_}++;
    }

    %n;
}

sub posting_record {
    local($num, $date) = @_;
    local($old);
    local($regdate);

    $old = &lopip'field_value($records[$num], 'Posting-Record');

    if ($old) {
	($regdate = $date) =~ s/(\W)/\\$1/g;
	return if ($old =~ /$regdate/);
	$old .= "\n\t" . $date;
	# Keep only the twelve most recent dates
	$old =~ s/(.*\n)*(([^\n]*\n){11})/$2/;
    }
    else {
	$old = $date;
    }

    $records[$num] = &lopip'set_field($records[$num], 'Posting-Record', $old,
				      1);
    $records[$num] = &lopip'mark_hidden($records[$num], 'Posting-Record');
}

sub namestrip {
    local($raw) = @_;
    local ($clean);

    $clean = $raw;
    $clean =~ s/\(.*\(.*\).*\)//;
    $clean =~ s/\(.*\)//;
    $clean =~ s/^.*\<//;
    $clean =~ s/\>.*$//;
    $clean =~ s/^ //g;
    $clean =~ s/ $//g;
    $clean =~ tr/A-Z/a-z/;
    
    return $clean;
}

