#!/usr/athena/bin/perl
# $Id: save-faq,v 1.105 2006/09/28 01:51:27 arolfe Exp $

umask(002);

push(@INC,
     '/rtfm/faq_server',
     '/rtfm/mserv');

require 'header.pl';
require 'sys/socket.ph';

@groupinfo = getgrnam('faqs');
@groupid = ($<,$groupinfo[2]);

%unchecked_hier = ('alt', 1);
%known_answers_hier = ('alt', 1, 'comp', 2, 'de', 3, 'misc', 4,
			'news', 5, 'rec', 6, 'sci', 7, 'soc', 8, 'talk', 9,
		        'humanities', 10, 'uk', 11);
%german_hier = ('de', 1);
%german_lang_hier = ('de', 1, 'at', 2);

$savedir = "/rtfm/ftp/pub";
$by_group = "usenet-by-group";
$by_hierarchy = "usenet-by-hierarchy";
$news_host = 'news.mit.edu';

$moderator_address = "news-answers-request\@mit.edu";
$checker_address = "news-answers-submit\@rtfm.mit.edu";
$news_answers_index = "/afs/sipb.mit.edu/project/periodic-postings/news.answers/index";
$prefix_length = length($savedir);
$toobig_length = 70;
#$max_length = $toobig_length + $prefix_length;
$max_length = undef;
$ERRORCODE = 255;

$verbose_link = 0;
$verbose_rename = 0;
$verbose_save = 0;
$exit_status = 0;
$compress_size = undef;
$unlink = undef;
$unlinkdirs = undef;
$default_archive_name = undef;
$submitted = undef;
$trim_headers = undef;
$quiet = undef;
$noop = undef;
$mail_reply = undef;
$other_moderated = undef;

if ($ENV{'PATH'} !~ m,(^|:)/usr/ucb($|:),) {
    $ENV{'PATH'} .= ":/usr/ucb";
}

($whoami = $0) =~ s,.*/,,;

$usage = "Usage: $whoami [ -n ] [ -verbose ] [ -nocompress ] [ -unlink ]
	[ -unlinkdirs ] [ -archive_name default_archive_name ]
	[-other_moderated] [-mail_reply] [-submitted] [-trim_mail_headers]
        [-quiet] [ -help ]
  The \"-archive_name\" option can be used to specify an archive name to
  override the contents of the \"Archive-name:\" field in the posting.";



while ($_ = $ARGV[0], /^-/) {
    shift;
    if (/^-v(|erbose)$/) {
	$verbose_save = $verbose_link = 1;
    }
    elsif (/^-n$/) {
	$noop++;
    }
    elsif (/^-nocompress$/) {
	undef $compress_size;
    }
    elsif (/^-unlink$/) {
	$unlink++;
    }
    elsif (/^-unlinkdirs$/) {
	$unlinkdirs++;
    }
    elsif (/^-(archive_name|an)$/) {
	($default_archive_name = shift) ||
	    die "Missing argument to $_ option.\n$usage\n";
    }
    elsif (/^-other_moderated$/) {
	$other_moderated ++;
    }
    elsif (/^-help$/) {
	print "$usage\n";
	exit 0;
    }
    elsif (/^-mail_reply$/) {
	$mail_reply ++;
    }
    elsif (/^-submitted$/) {
	$submitted ++;
	$mail_reply ++;
	$noop ++;
    }
    elsif (/^-t(|rim_mail_headers)$/) {
	$trim_headers ++;
    }
    elsif (/^-quiet$/) {
	$quiet ++;
    }
    else {
	die "Unknown option \"$_\".\n$usage\n";
    }
}

# convert [spc] [tab] [cr] ' " ` / \ to _ 
sub protect {
    local($str) = @_;

    $str =~ tr/ \n\t\'\`\"\\\//_/;

    return($str);
}

# removes filename and trailing slash from full pathname
sub directory_of {
    local($name) = @_[0];
    $name =~ s,/[^/]*$,,;
    $name;
}

# just filename of full path
sub filename_of {
    local($name) = @_[0];
    $name =~ s,.*/,,;
    $name;
}

# make the whole directory hierarchy for a filename
sub make_directory {
    local($file, $perms) = @_;
    local(@dirs, $dir);

    while (! -d $file) {
	push(@dirs, &filename_of($file));
	$file = &directory_of($file);
    }

    while ($dir = pop(@dirs)) {
	$file .= "/$dir";
	mkdir($file, $perms) || die "Making $file: $!.\n";
	&setgroup("$file");
    }
}
	     
# trim off... something...
sub no_prefix {
    local($name) = @_;
    local($dir) = "$savedir/";
    $dir =~ s/(\W)/$1/g;
    $name =~ s/^$dir//;
    $name;
}

# unlinks file, file.Z, and optionally all dirs
sub safeify_file {
    local($fullpath) = $_[0];

    $fullpath =~ s/\.Z$//;

    unlink("$fullpath.Z");
    unlink($fullpath);

    if ($unlinkdirs) {
	do {
	    $fullpath =~ s,/.*,,;
	} while (rmdir($fullpath));
    }
}

# does the linking or unlinking
sub make_links {
    local($fullpath) = shift @_;
    local($filename) = &filename_of($fullpath);
    local($directory) = &directory_of($fullpath);
    foreach $name (@_) {
	if ($unlink) {
	    &safeify_file($name) if (! $noop);
	    print($noop ? " + Would remove " : " + Removed ",
		  &no_prefix($name), ".\n")
		if ($verbose_link);
	}
	else {
	    if (! $noop) {
		&safeify_file($name);
		&make_directory(&directory_of($name), 0775);
# Links should be created with the same owner and group as the original file
		link($fullpath, $name) || 
		    die "Linking $name to $fullpath: $!.\n";
	    }
	    print($noop ? " + Would link " : " + Linked ",
		  &no_prefix($name), "\n   to ", &no_prefix($fullpath), "\n")
		if ($verbose_link);
	}
    }
}

sub shortname {
    local($fullpath) = $_[0];
    local($directory) = &directory_of($fullpath);
    local($filename) = &filename_of($fullpath);
    $filename =~ s/([A-Za-z])[A-Za-z]+/$1/g;
    "$directory/$filename";
}

sub save {
    local($archive_name);
    local(@bad, @linkfiles, $firstoutput, $slashoutput);
    local($fallback);
    local($suffix) = "";
    local($slashgroup);

    while ($newsgroup = shift(@newsgroups)) {
	$fallback = ($newsgroup =~ /\.answers$/);
	($slashgroup = $newsgroup) =~ tr,.,/,;
	$archive_name = &find_archive_name($newsgroup, $fallback);
	if ($archive_name) {
	    
	    $firstoutput = "$savedir/$by_group/$newsgroup/$archive_name";
	    $slashoutput = "$savedir/$by_hierarchy/$slashgroup/$archive_name";
	    last;
	}
	elsif ($fallback) {
	    push(@bad, $newsgroup);
	    next;
	}
	else {
	    $firstoutput = "$savedir/$by_group/$newsgroup/$subject";
	    $slashoutput = "$savedir/$by_hierarchy/$slashgroup/$subject";
	    last;
	}
    }
    
    if (@bad) {
	@newsgroups = (@bad, @newsgroups);
    }
    if (! $firstoutput) {
	warn "Couldn't find file name for $file_name.\n" unless $quiet;
	$exit_status++;
	return;
    }
    
    if ($unlink) {
	&safeify_file($firstoutput) if (! $noop);
	print($noop ? "Would remove " : "Removing ",
	      &no_prefix($firstoutput), ".\n") if ($verbose_save);
    }
    else {
	if (! $noop) {
	    &safeify_file($firstoutput);
	    &make_directory(&directory_of($firstoutput), 0775);
	}
	print($noop ? "Would save " : "Saving ", &no_prefix($firstoutput),
	      ".\n") if ($verbose_save);
	if (! $noop) {
	    open(OUTFILE, ">$firstoutput") || 
		die "Opening $firstoutput for write: $!.\n";
	    print OUTFILE $saved;
	    while (<STDIN>) {
		print OUTFILE || die "Writing $firstoutput: $!.\n";
	    }
	    close(OUTFILE) || die "Closing $firstoutput";
	    &setgroup("$firstoutput");
	    if (defined($compress_size) && 
		(-s $firstoutput >= $compress_size) &&
		&compress($firstoutput)) {
		$suffix = ".Z";
		$firstoutput .= $suffix;
		$slashoutput .= $suffix;
		print " + Compressed it.\n" if ($verbose_save);
	    }
	}
	elsif (defined($compress_size)) {
	    local($count);
	    local($_);
	    while (<STDIN>) {
		$count += length($_);
	    }
	    if ($count >= $compress_size) {
		$suffix = ".Z";
		$firstoutput .= $suffix;
		$slashoutput .= $suffix;
		print " + Would compress it.\n" if ($verbose_save);
	    }
	}
    }
    @linkfiles = ();
    push(@linkfiles, $slashoutput);
    if (defined($max_length) &&
	(length($firstoutput) > $max_length) && (! $archive_name)) {
	push(@linkfiles, &shortname($firstoutput))
	    if ($firstoutput ne &shortname($firstoutput));
	push(@linkfiles, &shortname($slashoutput))
	    if ($slashoutput ne &shortname($slashoutput));
    }
    
    foreach $newsgroup (@newsgroups) {
	$fallback = ($newsgroup =~ /\.answers$/);
	$archive_name = &find_archive_name($newsgroup, $fallback);
	($slashgroup = $newsgroup) =~ tr,.,/,;
	if ($archive_name) {
	    push(@linkfiles, "$savedir/$by_group/$newsgroup/$archive_name$suffix");
	    push(@linkfiles, "$savedir/$by_hierarchy/$slashgroup/$archive_name$suffix");
	}
	elsif ($error_message{"Whitespace in archive name"}) {
	    $exit_status++;
	}
	elsif ($fallback) {
	    &missing_archive_name();
	    next;
	}
	else {
	    push(@linkfiles, "$savedir/$by_group/$newsgroup/$subject$suffix");
	    push(@linkfiles, "$savedir/$by_hierarchy/$slashgroup/$subject$suffix");
	}
    }
    &make_links($firstoutput, @linkfiles);
    &index($suffix);
}

sub compress {
    local($file) = @_;
    local($data);
    
    # Use a pipe to get exit status of compress
    
    if (! (open(COMPRESS, "compress < \'$file\'|") &&
	   open(COMPRESSOUT, ">$file.Z"))) {
	undef;
    }
    else {
	while (read(COMPRESS, $data, 1024)) { # arbitrary block size
	    print COMPRESSOUT $data || return undef;
	}
	close(COMPRESSOUT);
	close(COMPRESS);
	if (! $?) {
	    unlink($file);
	    1;
	}
	else {
	    undef;
	}
    }
}

sub parse_newsgroups {
    local($group_str) = @_;
    local(@groups, %groups, @newgroups);
    local($_);
    
    $group_str =~ s/\s*$//;
    $group_str =~ s/^Newsgroups:\s*//;
    
    @groups = split(/,/, $group_str);
    for (@groups) {
	if (! $_) {
	    warn "$file_name contains null newsgroup on Newsgroups line.\n" unless $quiet;
	    $error_message{"null newsgroup"} =
		"* There is an empty newsgroup name (two commas in a row) in the\n     Newsgroups line of your article.\n";
	    $exit_status++;
	    next;
	}
	if (! /\./) {
	    warn "$file_name contains a newsgroup without dots (\"$_\") on Newsgroups line.\n" unless $quiet;
	    $error_message{"dotless newsgroup \"$_\""} =
		"* Your article contains a newsgroup without dots in its Newsgroups\n     line: $_.\n";
	    $exit_status++;
	    next;
	}

	if (/[\s]/) {
	    warn "$file_name has whitespace in its Newsgroups line: $_.\n" unless $quiet;
	    $error_message{"whitespace in Newsgroups"} =
		"* Your article has an improperly formatted Newsgroups line which\n     contains whitespace.  Newsgroups should be separated by commas,\n     but no spaces or tabs.  There should be exactly one\n     space in the Newsgroups line, right after the colon.\n";
	    $exit_status++;
	    s/\s//g;
	}

	push(@newgroups, $_) if (! $groups{$_}++);
    }
    if ($mail_reply) {
	local ($answers_seen) = undef;
	for $group (@newgroups) {
	    if ($group =~ /answers$/) {
		$answers_seen ++;
	    }
	    else {
		if ($answers_seen) {
		    $error_message{"not eol"} =
			"* The *.answers groups are not listed last in the Newsgroups line.\n";
		    $exit_status++;
		}
	    }
	}
    }
    return(@newgroups);
}

sub parse_subject {
    local($sub_str) = @_;
    local($*) = 1;
    
    $sub_str =~ s/\s*$//;
    $sub_str =~ s/^Subject:\s*//;
    
    return(&protect($sub_str));
}

sub index {
    local($suffix) = @_;
    local($subject, $newsgroups);
    local($archive_name);
    local($fallback);
    local($slashgroup);
    local($dir, $_);
    
    $subject = &header'field_value('subject', %headers);
    $newsgroups = &header'field_value('newsgroups', %headers);
    
    return if (! ($subject && $newsgroups));
    
    foreach $newsgroup (&parse_newsgroups($newsgroups)) {
	($slashgroup = $newsgroup) =~ tr,.,/,;
	$fallback = ($newsgroup =~ /\.answers$/);
	$archive_name = &find_archive_name($newsgroup, $fallback);
	if ($unlink) {
	    $archive_name =~ s/(\W)/\\$1/g;
	}

	if ($archive_name) {
	    for $dir ("$by_group/$newsgroup", "$by_hierarchy/$slashgroup") {
		if ($unlink) {
		    if (! $noop) {
			open(INDEXIN, "$savedir/$dir/index") || next;
			open(INDEXOUT, ">$savedir/$dir/index.new") ||
			    die "Opening $dir/index.new: $!.\n";
			while (<INDEXIN>) {
			    if (/^$archive_name(\.Z|)\t/) {
				$_ = <INDEXIN>;
				next;
			    }
			    print INDEXOUT || 
				die "Writing to $dir/index.new: $!.\n";
			}
			close(INDEXIN);
			close(INDEXOUT) || die "Closing $dir/index.new: $!.\n";
			&setgroup("$savedir/$dir/index");
			rename("$savedir/$dir/index",
			       "$savedir/$dir/.#index");
			rename("$savedir/$dir/index.new",
			       "$savedir/$dir/index") ||
				   die "Renaming $dir/index.new to $dir/index: $!.\n";
		    }
		    print($noop ? " + Would unindex from " : 
			  " + Unindexed from ", $dir, "/index.\n")
			if ($verbose_save);
		}
		else {
		    if (! $noop) {
			open(INDEX, ">>$savedir/$dir/index") ||
			    die "Opening $dir/index: $!.\n";
			print INDEX
			    "$archive_name$suffix\t$subject\n\t$newsgroups\n"
				|| die "Writing $dir/index: $!.\n";
			close(INDEX) || die "Closing $dir/index: $!.\n";
			&setgroup("$savedir/$dir/index");
		    }
		    print($noop ? " + Would index in " : " + Indexed in ",
			  $dir, "/index.\n") if ($verbose_save);
		}
	    }
	}
    }
}

unshift(@ARGV, "-") if (@ARGV == 0);

open(SAVESTDIN, "<&STDIN") || die "Dup\'ing stdin";

while ($file_name = shift) {
    $newsgroups = undef;
    @newsgroups = undef;
    $subject = undef;
    %error_message = undef;
    $missing_required_field = undef;
    
    close(STDIN);
    open(STDIN, ($file_name eq "-" ? "<&SAVESTDIN" : $file_name)) ||
	die "Opening $file_name: $!.\n";
    
    $next_line = undef;

    # remove SMTP From line from start, if there is one
    while (<STDIN>) {
	$next_line = $_ unless (/^>?\s*From /);
	last;
    }
    
    if ($trim_headers) {
	($next_line, %mailheaders) = &header'parse($next_line, "main'STDIN");
    }    

    ($next_line, %headers) = &header'parse($next_line, "main'STDIN");
    $saved = $header'text_read;
    ($next_line, %auxheaders) = &header'parse($next_line, "main'STDIN");
    $saved .= $header'text_read . $next_line;

    $newsgroups = &header'field_value('newsgroups', %headers);
    $subject = &header'field_value('subject', %headers);
    $from = (&header'field_value('from',%headers) || &header'field_value('reply-to',%headers));
    $followup_to = &header'field_value('followup-to', %headers);

# check from; according to RFC 1036, space through tilde (except ( ) < > )
# are valid in name fields, and I'm allowing the same in addresses (except
# for space)
#if (($from !~ /^\s*[\!-\'\*-\;\=\?-\~]+\@[\!-\'\*-\-\/-\;\=\?A-\~]+\.[\!-\'\*-\;\=\?A-\~]+(\s+\([ -\'\*-\;\=\?-\~]*\))?\s*$/) && ($from !~ /^\s*[ -\'\*-\;\=\?-\~]*\s+\<[\!-\'\*-\;\=\?-\~]+\@[\!-\'\*-\-\/-\;\=\?A-\~]+\.[\!-\'\*-\;\=\?A-\~]+\>\s*$/)) {

# Too many addresses come in with non-RFC 1036-compliant addresses; do
# more basic checking instead (allow anything in names)
if (($from !~ /^\s*[\!-\'\*-\;\=\?-\~]+\@[\!-\'\*-\-\/-\;\=\?A-\~]+\.[\!-\'\*-\;\=\?A-\~]+(\s+\(.*\))?\s*$/) && ($from !~ /^.*\<[\!-\'\*-\;\=\?-\~]+\@[\!-\'\*-\-\/-\;\=\?A-\~]+\.[\!-\'\*-\;\=\?A-\~]+\>\s*$/)) {


        warn "$file_name has no From or Reply-To with a valid address.\n" unless $quiet;
        $error_message{"no From"} = "* Your article does not contain a From or Reply-To header with a valid \n     address.  This might be because you didn't provide a complete\n     email address, or because the header is improperly formatted or\n     contains illegal characters.\n";
	$missing_required_field ++;
	$exit_status++;
    }
    
# Check supersedes
    if (&header'field_value('supercedes', %headers)) {
	# This is a relatively common error, so why not check for it?
	# Checking is cheap.
	
	warn "$file_name contains misspelled \"Supercedes\" header.\n" unless $quiet;
	$error_message{"misspelled Supersedes"} =
	    "* Your article contains a \"Supercedes\" header.  The correct spelling\n     is \"Supersedes:\"\n";
	$exit_status++;
    }
    
# Check newsgroups
    # The logic here is really gross and needs to be cleaned up,
    # but I don't have time to do that right now. -- jik

    # Cleaned it up 06/13/97 - pgreene

    if (! $newsgroups) {
	warn "Couldn't find Newsgroups in header of $file_name!\n" unless $quiet;
	$error_message{"No Newsgroups"} =
	    "* Your article seems to lack a Newsgroups header.  This might be because\n     the header was split over several lines, either by you or by your\n     mail software.\n";
	$missing_required_field ++;
	$exit_status++;
    }


else {    
    @newsgroups = &parse_newsgroups($newsgroups);
    %groups = ();
    $news_answers = undef;
    $any_other_groups = undef;
    $checked_groups = undef;
    $german_lang = undef;

    for (@newsgroups) {
	# ignore leading whitespace since we deal with that error elsewhere
	($hierarchy, $rest) = /^\s*([^\.]+)\.(.*)$/;

	$checked_groups++ if ((! $unchecked_hier{$hierarchy}) &&
			      $known_answers_hier{$hierarchy});
	if ($rest eq "answers") {
	    if ($hierarchy eq "news") {
		$news_answers++;
	    }
	    if ($known_answers_hier{$hierarchy}) {
		$answers_groups{$hierarchy}++;
		$checked_answers_groups++ if (! $unchecked_hier{$hierarchy});
	    }
	}
	else {
	    $any_other_groups++;
	    $groups{$hierarchy} .= $_ . ', ';
	    $german_lang++ if $german_lang_hier{$hierarchy};
	}
    }
    
# Any *.answers needs a corresponding *.{other}, except
#     o for de.answers, we need a German-language group
#     o for alt.answers, need alt.* only if (any checked answers groups
#         or it's a new submission)
#     o for our own postings

    foreach $hierarchy (keys %answers_groups) {
	$missing_home = 0;

# skip our postings

	last if (&header'field_value('archive-name', %auxheaders) =~ m,^(news|de)-answers/,);

	if ($german_hier{$hierarchy}) {
	    $missing_home++ if (! $german_lang);
	}
	elsif ($unchecked_hier{"$hierarchy"}) {
            $missing_home++ if ((! $groups{$hierarchy}) &&
                             (($checked_answers_groups) || ($submitted)));
	}
	elsif (($hierarchy ne "news") && (! $groups{$hierarchy})) {
	    $missing_home++;
	}

	if ($missing_home) {

            $text = (($german_hier{$hierarchy}) ?
                 "german-language" : "\"$hierarchy.*\"");

	    warn "$file_name posted to $hierarchy.answers, but not to any other $text groups.\n" unless $quiet;
	    $error_message{"only to $hierarchy.answers"} =			"* Your article was posted to $hierarchy.answers, but not to any other $text\n     group.  Only those *.answers newsgroups which match\n     main-hierarchy groups you are posting to may be included.\n";
	    $exit_status++;
	}
    }

# If posted to any checked *.answers, needs to be posted to a home group too

    if ((! $any_other_groups) && ($checked_groups)) {

        local($an) = (&header'field_value('news-answers-archive-name',
                                          %auxheaders) ||
		     &header'field_value('archive-name', %auxheaders));

# bofh.* hierarchy prohibits external cross-posting
        if ($an !~ /bofh/) {
	     warn "$file_name posted only to *.answers groups\n" unless $quiet;
	     $error_message{"only *.answers"} =
	           "* The Newsgroups line of your article contains only *.answers\n     newsgroups.  It must also be posted to at least one \"home\"\n     newsgroup.\n";
	     $exit_status++;
        }
    }

# If posted to {hier}.*, needs {hier}.answers too, unless
#     o {hier} isn't a known answers hierarchy
#     o {hier} is German-language (posting might be in English)
#     o {hier} is unchecked and no other checked answers groups are included
#     o posting is one of a special set
#     o posting isn't posted to any known answers groups - plg added 12/07/98

    foreach $hierarchy (keys %groups) {

	if (($known_answers_hier{$hierarchy}) &&
	    (! $german_lang_hier{$hierarchy}) &&
	    ((! $unchecked_hier{$hierarchy}) || ($checked_groups)) &&
	    ($checked_answers_groups) &&
	    (! $answers_groups{$hierarchy})) {

	    local($an) = (&header'field_value('news-answers-archive-name',
                                      %auxheaders) ||
		     &header'field_value('archive-name', %auxheaders));

# See the note with these postings in the LoPIP.  Maintainer refuses 
# to post to comp.answers, and it's not worth fighting about.

   	    next if (($an eq 'newprod') || 
		     ($an eq 'sco/xenix') ||
		     ($an eq 'sco/xenix-diff'));

# remove trailing ', '

	    chop($groups{$hierarchy});
	    chop($groups{$hierarchy});

	    warn "$file_name posted to $groups{$hierarchy} but not to $hierarchy.answers\n" unless $quiet;
	    $error_message{"not to $hierarchy.answers"} =
		"* The Newsgroups line does not contain all relevant *.answers\n     newsgroups.  Since you are posting to\n     $groups{$hierarchy}\n     you must also include $hierarchy.answers in your Newsgroups\n     header.\n";
	    $exit_status++;
	}
    }

# If any checked answers groups, or newly submitted, needs news.answers

# Note: this passes articles posted to (e.g.) regional groups and
# already approved for *.answers which don't have news.answers, but it's
# not critical and not easy to check.

    if ((! $news_answers) && 
	(($checked_answers_groups) || $submitted)) {

	warn "$file_name not posted to news.answers.\n" unless $quiet;
	$error_message{"not to news.answers"} =
	    "* The Newsgroups line of your article does not contain news.answers" . ($submitted ? ".\n" : ",\n     although it was posted to other *.answers newsgroups.\n");
	$exit_status++;
    }
}

# Check Followup-To
    if ($news_answers || scalar(keys %answers_groups) || $submitted) {

	if (!$followup_to) {
	    warn "$file_name has no Followup-To: line\n" unless $quiet;
	    $error_message{"no followup-To"} =
		"* Your article seems not to have a Followup-To: line.\n";
	    $missing_required_field ++;
	    $exit_status++;
	}

	$followup_to =~ s/\s*$//;
	$followup_to =~ s/^Followup-To:\s*//; 
	@followupto_groups = split(/,/, $followup_to);
	for (@followupto_groups) {
	    if (/@/) {
		warn "$file_name has illegal E-Mail address $_ in Followup-To: field\n" unless $quiet;
		$error_message{"E-mail in Followup-To"} =
		    "* Your Followup-To line contains the email address\n     $_,\n     which is not valid.  It should have either one or more newsgroups\n     (separated by commas), or the single word \"poster\".  It may not\n     contain an email address or any *.answers newsgroups.\n";
		$exit_status++;
	    }
	    if ($_ eq "junk") {
		warn "$file_name has Followup-To: set to junk; this is not acceptable.\n" unless $quiet;
		$error_message{"Followup-To: junk"} =
		    "* Your article has the Followup-To: line set to \"junk\", which is not\n     valid.  It should have either one or more newsgroups (separated\n     by commas), or the single word \"poster\".  It may not contain an\n     email address or any *.answers newsgroups.\n";
		$exit_status++;
	    }
	    if ($_ eq "Poster") {
		warn "$file_name has Followup-To: set to 'Poster'; should be 'poster'.\n" unless $quiet;
		$error_message{"Followup-To: Poster"} =
		    "* Your article has the Followup-To: line set to \"Poster\", which is not\n     valid.  It should be lower-case, \"poster\".\n";
		$exit_status++;
	    }
	    ($hierarchy, $rest) = /^([^\.]+)\.(.*)$/;
	    if ($rest eq "answers" && $known_answers_hier{$hierarchy}) {
		warn "$file_name has Followup-To: set to $_\n" unless $quiet;
		$error_message{"Followup-To to $_"} =
		    "* Your article's Followup-To line includes $_, which is not\n     acceptable.  It should have either one or more newsgroups\n     (separated by commas), or the single word \"poster\".  It may not\n     contain an email address or any *.answers newsgroups.\n";
		$exit_status++;
	    }
	}
    }

# Check other moderated groups, if requested
    if ($other_moderated) {
	&check_moderated(@newsgroups);
    }

# Check Subject
    if ($subject) {
	$subject = &parse_subject($subject);
    }
    else {
	warn "Couldn't find Subject in header of $file_name!\n" unless $quiet;
	$error_message{"No Subject"} =
	    "* Your article seems to lack a Subject header.\n";
	$missing_required_field ++;
	$exit_status++;
    }
    
# If submitted file, just check for *.answers Archive-name
    if ($submitted) {
	$an = &find_archive_name("news.answers",1);

	if ($error_message{"Whitespace in archive name"}) {
	    $exit_status++;
	}
	elsif (! $an) {
	    &missing_archive_name();
	}
    }
# Otherwise try to save, checking all archive names along the way
    else {
	&save if ($newsgroups && $subject);
    }

    
    if ($mail_reply) {
	if ((! $exit_status) && $submitted) {
            if (&header'field_value('subject', %mailheaders) =~ /[iI]gnore/) {
                 print <<EOS;
Our script did not find any straightforward errors in your submission.
However, because you specified "ignore" in the Subject of your email
to this FAQ-checker, your submission has NOT been forwarded to the
*.answers moderators for final approval.  You will need to submit it
again if you wish to have it approved.  Please do not begin posting
your article to *.answers, or try to submit it to the faq-server,
until you have requested, and received, approval from the moderation
team.

EOS
                 $exit_status++;
	    }
            else {
                 print <<EOS;
Our script did not find any straightforward errors in your submission.
It has been forwarded to the *.answers moderators for final approval.
You do not need to send it again.  The moderators may find other
problems, or they may ask you to change your Subject or Archive-name.
Please do not begin posting your article to *.answers, or try to
submit it to the faq-server, until you have received approval from the
moderation team.

Although we do give priority to submissions which have passed this
automated check, our queue is often several weeks long, so you should
continue posting your FAQ or periodic informational posting to all
other newsgroups besides *.answers while approval is pending.

EOS
                }
print <<EOS;
If you need to send an additional comment, or if this automated
FAQ-checker has made an error, you can contact the *.answers
moderators directly at <$moderator_address>.

EOS
        }
	else {
	    $action = ($submitted ? "resubmitting your article" : "using the faq-server");
	    
	    print <<EOS;
Our automated testing script found some points where your article does
not appear to conform to the *.answers article guidelines.  Please
resolve these problems before $action.  

For more information, consult the guidelines, available by FTP from
<ftp://rtfm.mit.edu/pub/usenet/news.answers/news-answers/guidelines>.

The problem(s) are the following:

EOS
				    
            for $ind (keys %error_message) {
   	        print "$error_message{$ind}\n";
            }
	    if ($missing_required_field) {
		print <<EOS;
Headers which are described as missing might actually be in the wrong
place (e.g., in the auxiliary header when they belong in the main
header) or misspelled, or they might be repeated.  They might also
have a space before the colon, or be missing the necessary space after
it.

EOS
}
# look for case where main headers and submitted headers got intermingled:
# submitted with Newsgroups in the mail headers but not the submitted ones
           if (($submitted) && ($trim_headers) && 
              ($error_message{"No Newsgroups"}) && 
              (&header'field_value('newsgroups', %mailheaders))) {
              warn "$file_name has mail and news headers intermingled\n" 
                    unless $quiet;
	    print <<EOS;

Your submission appears to be in an incorrect format, with the
submitted headers intermingled with the email's own headers.  You
should include all the headers in the body of your message, separated
by a blank line from the email headers.  Most mail software inserts
this blank line automatically, but perhaps yours didn't do so.  Try
adding a blank line to the start of your submission message, or use a
different mail program.  If you continue to have problems, you'll have
to submit your file to the *.answers moderators directly.

EOS
            }

# Check for MIME
            if (&header'field_value('=46rom', %headers) || 
          	 &header'field_value('=46ollowup-to', %headers))
                 {
                 warn "$file_name appears to have been MIME-encoded\n" 
                    unless $quiet;
                 print <<EOS;
Your submission appears to have been MIME-encoded, which may be
causing some of these apparent problems.  Most likely, this was done
by your own mail software, though it could have been done by some
mailer between you and the FAQ-checker.  If you can, turn off any MIME
options in your mail program (perhaps called "quoted-printable").
Also look through your file and remove any 8-bit characters (accented
letters, "smart" quotes, bullets, em dashes, and so on), which often
cause automatic MIME conversion.  If that doesn't help, try using a
different mail program.  As a last option, you can send your file to
us directly at <$moderator_address>.

EOS
             }

           if ($submitted) { 
		print <<EOS;
Resubmissions should be directed to the automated FAQ-checker again,
at <$checker_address>.  More detailed instructions
can be found in the submission guidelines (see above).  If you have 
questions which are not adequately answered in the guidelines, or if
this automated FAQ-checker has made an error, you can contact the
*.answers moderators directly at <$moderator_address>.

EOS
           }
    }
	
	print "\nReference:\n\n";
	print "  [begin reference inclusion]\n\n";
	
	if ($trim_headers) {
	    for $ind (keys %mailheaders) {
                chop ($mailheaders{$ind});
                $mailheaders{$ind} =~ s/\n/\n> /g;
		print "> $mailheaders{$ind}\n";
	    }
	    print "> \n";
	}
	for $ind (keys %headers) {
            chop ($headers{$ind});
            $headers{$ind} =~ s/\n/\n> /g;
	    print "> $headers{$ind}\n";
	}
	print "> \n";
	for $ind (keys %auxheaders) {
            chop ($auxheaders{$ind});
            $auxheaders{$ind} =~ s/\n/\n> /g;
	    print "> $auxheaders{$ind}\n";
	}
	
	print "\n[body of submission deleted for brevity]\n";
	print "  [end reference inclusion]\n";
	
    }
}

sub setgroup {
    local ($filename) = @_;

    if (-o $filename) {
	chown (@groupid, "$filename") || 
	    die "Setting group ID for $filename: $!.\n";
    }
}

sub missing_archive_name {
    warn "Couldn't find Archive-name in $file_name, not storing in $newsgroup.\n" unless $quiet;
    $error_message{"no archive name"} =
	"* No valid Archive-name line could be found in your article.  This might\n     be because you don't have a single completely blank line between\n     your main header and the Archive-name.\n";
    $missing_required_field ++;
    $exit_status++;
}

sub find_archive_name {
    local($newsgroup, $fallback) = @_;
    local($an, $group);
    
    $group = $newsgroup;
    
    $newsgroup =~ s/\./-/g;
    $newsgroup .= "-archive-name";
    
    $an = &header'field_value($newsgroup, %auxheaders) ||
	&header'field_value($newsgroup, %headers);
    
    if ((! $an) && $fallback) {
	$an = $default_archive_name ||
	    &header'field_value('archive-name', %auxheaders) ||
		&header'field_value('archive-name', %headers);
    }
    
    if (($group =~ /\.answers$/) && (! $submitted)) {
	if (! %news_answers_index) {
	    &load_news_answers_index();
	}
	if (! ($news_answers_index{$an} || $unchecked_groups{$group})) {
	    warn "Invalid $group archive name $an in $file_name.\n" unless $quiet;
	    $error_message{"Invalid archive name $an"} =
		"* Your article has the archive name $an,\n     which appears not to be approved by the *.answers moderators\n";
	    $exit_status++;
            if (! $unlink) {
                $an = undef;
            }
	}
    }
    
    if ($an =~ /[\s]/) {
	warn "Whitespace in archive name $an in $file_name.\n" unless $quiet;
	$error_message{"Whitespace in archive name"} =
	    "* Your article contains whitespace in the archive name,\n     $an .\n     This might be because you don't have a completely blank line\n     immediately after your auxiliary header.\n";
	$an = undef;
    }
    $an;
}

if ($exit_status) {
    exit($ERRORCODE);
}
else {
    exit(0);
}

sub load_news_answers_index {
    local(@_, $_);
    
    open(NAI, $news_answers_index) || die "Opening $news_answers_index: $!";
    while (<NAI>) {
	next if (/^\s*\#/);	# skip comments
	@_ = split;
	$news_answers_index{$_[0]}++;
    }
    close(NAI);
}


sub check_moderated {
    local(@groups) = @_;
    local($port, $sockaddr, $aliases, $proto, $name,
	  $port, $type, $len, $thataddr, $sockaddr,
	  $that, $hier, %newsgroups, $oldselect, $_);
    local($group, $from, $to, $flag);
    
    for (@groups) {
	$newsgroups{$_} ++;
    }
    
    $port = 119;
    
    $sockaddr = 'S n a4 x8';
    ($name, $aliases, $proto) = getprotobyname('tcp');
    ($name, $aliases, $port) = getservbyname($port, 'tcp')
	unless $port =~ /^\d+$/;
    ($name, $aliases, $type, $len, $thataddr) = gethostbyname($news_host);
    
    $that = pack($sockaddr, &AF_INET, $port, $thataddr);
    
    socket(S, &PF_INET, &SOCK_STREAM, $proto) || die "socket: $!";
    connect(S, $that) || die "connect to $news_host: $!.\n";
    
    $oldselect = select(S); $| = 1; select($oldselect);
    
    # Read banner line
    ($_ = <S>) || die "reading from $news_host: $!.\n";
    die "unexpected banner code from $news_host: $_" if ($_ !~ /^200\s+/);
    
    print S "list\r\n";
    
    # Read the intro line to the listing
    ($_ = <S>) || die "reading from $news_host: $!.\n";
    die "unexpected response code from $news_host: $_" if ($_ !~ /^215\s+/);
    
  wloop:
    while(<S>) {
	chop;
	if (/^\./) { last wloop; }
	($group, $from, $to, $flag) = split;
	if ($newsgroups{$group} && ($flag eq "m")) {
	    delete $newsgroups{$group};
	    foreach $hier (keys %known_answers_hier) {
		if ($hier . ".answers" eq $group) {
		    next wloop;
		}
	    }
	    warn "$file_name also posted to other moderated group $group\n" unless $quiet;
	    $error_message{"Moderated group $group"} =
		"* Your article is posted to the moderated group\n     $group.\n     ".($submitted ? "You will need to obtain approval from the moderator of that group\n     before submitting your article to us.  Once you have done so, and\n     fixed any other problems reported by this automated FAQ-checker,\n     please send your article directly to us at\n     <$moderator_address>.  You may include \"[submitted]\" in\n     the Subject of your submission to bring it to our immediate\n     attention." : "This script cannot know whether you have approval from its\n     moderator.") . "\n";
	    $exit_status ++;
	}
	else {
	    delete $newsgroups{$group};
	}
	last if (scalar(keys %newsgroups) == 0);
    }
    
    print S "quit\r\n";
    
    close(S);
}
