#!/usr/athena/bin/perl

#
# Scans an index file of the FTP archive (by default,
# $PPDIR/find.cache) and compares the names of those entries which
# contain ".answers" or "/answers/" to an index of approved Archive-names
# by default, $NADIR/index), reporting files which are stored under
# invalid archive names.
#
# Currently of rather limited use, since save-faq no longer saves
# files with unapproved (e.g., misspelled) A-ns.
#

umask(002);

$by_hierarchy = "usenet-by-hierarchy";
$by_group = "usenet-by-group";
$archive_loc = "/rtfm/ftp/pub/";

$news_answers_index_name = "/afs/sipb.mit.edu/project/periodic-postings/news.answers/index";
$archive_index = "/afs/sipb.mit.edu/project/periodic-postings/data/find.cache";

($whoami = $0) =~ s,.*/,,;

$usage = "Usage: $whoami [ -index index_file ] [ -archive archive_list] [ -help]";

while ($_ = $ARGV[0], /^-/) {
    shift;
    if (/^-i(|ndex)$/) {
	($news_answers_index_name = shift) ||
	    die "Missing argument to $_ option.\n$usage\n";
    }
    elsif (/^-a(|rchive)$/) {
	($archive_index = shift) ||
	    die "Missing argument to $_ option.\n$usage\n";
    }
    elsif (/^-help$/) {
	print "$usage\n";
	exit 0;
    }
    else {
	die "Unknown option \"$_\".\n$usage\n";
    }
}

sub load_news_answers_index {
    local(@_, $_);
    print "Loading $news_answers_index_name...\n";
    open(NAI, $news_answers_index_name) || die "Opening $news_answers_index_name: $!";
    while (<NAI>) {
	next if (/^\s*\#/);	# skip comments
	@_ = split;
	$news_answers_index{$_[0]}++;
    }
    close(NAI);
}

sub trim_archive_index {
    $loc = $archive_loc . $by_group;
    s^$loc/[a-z]*.answers/^^;
    $loc = $archive_loc . $by_hierarchy;
    s^$loc/[a-z]*/answers/^^;
    chop;
}


if (! %news_answers_index) {
    &load_news_answers_index();
}

open(ARI, $archive_index) || die "Opening $archive_index: $!";
print "The following *.answers archive files are not found in $news_answers_index_name:\n";
while (<ARI>) {
    if (/\.answers/ || /\/answers\//) {
	$orig_an = $_;	
	&trim_archive_index();
	if (! ($news_answers_index{$_})) {print $orig_an;}
    }
}

exit;
