#!/usr/local/bin/perl

###############################################################################
# NAME: digestnews
#
# PURPOSE: digestify and transmit a newsgroup.
#
# Digestnews gets all the articles from the specified newsgroup, digestifies
# them, and mails the digest(s) out.
#
# This program will work in its current form only on a machine whose MTA is
# MMDF, since it uses the MMDF program "submit" directly in order to retain
# control over the outgoing headers. 
#
# BUGS:
#    Messages which exceed $Maxsize are not broken into parts, so there's really
#    no hard limit on the size of a digest.
#
# AUTHOR:
#    Marc Rouleau, University of Virginia Academic Computing, marc@Virginia.EDU
#
# DATE: 9/5/90
#
# COPYRIGHT:
#    You may use, modify, or distribute this in any way, as long as you retain
#    this notice and do not try to make any money from it.
###############################################################################

###############################################################################
# System-Wide parameters, changeable here only.
###############################################################################

$Sysnewsdir	    =    "/usr/spool/news";
$Submit		    =    "/usr/lib/sendmail";
$Mailhost	    =    "bloom-picayune.mit.edu";
$Digestdir	    =    "digests";
$Articledir	    =    "articles";
$Digest		    =    "digest";
$Body		    =    "body";
$Closing	    =    "closing";
$LastArticleF	    =    "lastarticle";
$ThenDigestF	    =    "thendigest";
$LastDigestF	    =    "lastdigest";
$VolumeF	    =    "volume";
$Digests_per_volume =    999;
$Titlelen	    =    76;
$ArticleSepLen	    =    30;

###############################################################################
# Input parameter defaults.
###############################################################################

$Newsgroup	= "mit.test";
$Listname	= "Test-Digest";
$Homedir	= "/usr/lib/news/digests/test";
$Minsize	= 20000;
$Maxsize	= 30000;
$Archiveperiod	= 28;
$Frequency	= 24;
$Maxage		= 21;
$Expandtabs     = 1;

###############################################################################
# Input parameter processing.
###############################################################################

@args = split(/\s-/," " . join(' ',@ARGV));
foreach $_ (@args) {
    next if (!$_);
    if (/^g\s*(\S*)/) {
	$Newsgroup = $1;
    } elsif (/^l\s*(\S*)/) {
	$Listname = $1;
    } elsif (/^d\s*(\S*)/) {
	$Homedir = $1;
    } elsif (/^s\s*(\S*)/) {
	$Minsize = $1;
    } elsif (/^S\s*(\S*)/) {
	$Maxsize = $1;
    } elsif (/^a\s*(\S*)/) {
	$Archiveperiod = $1;
    } elsif (/^f\s*(\S*)/) {
	$Frequency = $1;
    } elsif (/^A\s*(\S*)/) {
	$Maxage = $1;
    } elsif (/^x\s*(\S*)/) {
	$Expandtabs = $1;
    } else {
	print <<EOF;

Usage: digestnews [-g newsgroup] [-l listname] [-d homedir] [-s minsize]
                  [-S maxsize] [-a archiveperiod] [-f frequency]
                  [-A max_article_age] [-x 0|1]
EOF
	exit 1;
    }
}

###############################################################################
# Initialize other global variables.
###############################################################################

$Now		= time;

chdir($Homedir) || die "Can't cd to $Homedir";
@s = stat($Homedir);
$Listowner = $s[4];

$_ = $Newsgroup; s|\.|/|g;
$Newsdir	= "$Sysnewsdir/$_";

$Listaddr	= "$Listname@$Mailhost";
$Requestaddr	= "$Listname-Request@$Mailhost";
$Distaddr	= "$Listname-Dist@$Mailhost";
$ArticleSep	= "\n" . '-' x $ArticleSepLen . "\n\n";

$LastArticle	= `cat $LastArticleF`; chop($LastArticle);
$ThenDigest	= `cat $ThenDigestF`; chop($ThenDigest);
$LastDigest	= `cat $LastDigestF`; chop($LastDigest);
$Volume		= `cat $VolumeF`; chop($Volume);

$Digestnum	= $LastDigest + 1;
if ($Digestnum > $Digests_per_volume) {
    $Digestnum = 1;
    $Volume++;
}

$_ = `date`;
chop;
@d = split(/\s+/);
$d[5] =~ s/^..//;
$Rfc822Date = sprintf("%s, %s %s %s %s %s",$d[0],$d[2],$d[1],$d[5],$d[3],$d[4]);
($Hour,$x,$x) = split(/:/,$d[3]);
$ThenDigestHours = ($Now - $ThenDigest + 1800) / 3600;

$ConstantHeader	= "From:     Digestifier <$Requestaddr>
To:       $Listaddr
Reply-To: $Listaddr
";

$DigestDate	    =    "Date:     $Rfc822Date\n";
$SubjectIntro	=    "\nContents:\n";
$ArticleStart	=    "\n" . '-' x $Titlelen . "\n\n";

@s = stat($Closing);
$ClosingSize = $s[7];

###############################################################################
# Delete saved digests created more than $Archiveperiod ago.
###############################################################################

while (<$Digestdir/*>) {
    @s = stat($_); $mtime = $s[9];
    $days_old = ($Now - $mtime) / 86400;
    if ($days_old > $Archiveperiod) {
	unlink $_;
    }
}

###############################################################################
# Collect articles in a directory.  Each article will be reformatted to look
# exactly as it will be when it is digestified with the exception that the
# first line of each file will contain the line which will appear in the
# digest's table of contents.  This bit of weirdness simplifies size
# computations.  The format is "  subject (author)".
###############################################################################

#$days{'Jan'} = 0;
#$days{'Feb'} = 31;
#$days{'Mar'} = 59;
#$days{'Apr'} = 90;
#$days{'May'} = 120;
#$days{'Jun'} = 151;
#$days{'Jul'} = 181;
#$days{'Aug'} = 212;
#$days{'Sep'} = 243;
#$days{'Oct'} = 273;
#$days{'Nov'} = 304;
#$days{'Dec'} = 334;
#@x = localtime($Now);
#$nowdays = $x[7];

$NextLastArticle = $LastArticle;
article: while (<$Newsdir/*>) {
print "$_\n";
    $article = $_;
    $article =~ m|([^/]+)$|;
    $artnum = $1;
    next article if ($artnum <= $LastArticle);
    $NextLastArticle = $artnum > $NextLastArticle ? $artnum : $NextLastArticle;

    open(IN,$article) || die "Can't open $article";

    $subj = "[no subject]";
    $name = "";
    while (<IN>) {
	chop;
	if (/^subject:/i) {
	    s/subject:[ \t]*(.*)/  $1/i;
	    $subj = $_;
	} elsif (/^from:/i) {
	    s/from:[ \t]*//i;
	    s/([^<]*)<[^>]*>/$1/ || s/[^\(]*\(([^\)]*)\)/$1/;
	    s/[ \t]*$//;
	    $name = $_;
	} elsif (/^date:[^m]*may/i) {
	    next article;
	} elsif (/^$/) {
	    last;
	}
    }

    $_ = $subj . ($name ? " ($name)" : "") . "\n";

    
    # Replace tabs with spaces to appease Big Blue, who likes to
    # replace them with semicolons!
    
    if ($Expandtabs) {
	while (/\t/) {
	    for ($i = 0; substr($_,$i,1) ne "\t"; $i++) { ; }
	    substr($_,$i,1) = ' ' x (8 - ($i % 8));
	}
    }

    open(OUT,">$Articledir/$artnum") ||
	die "Can't open $Articledir/$artnum for writing";
    print OUT $_;
    open(IN,$article) || "Can't open $article";

    
    # Process the whole article from scratch.
    
    for ($header = 1; <IN>;) {

	
	# Some mailers interpret a ^.$ as end-of-message.
	
	s/^\.$/. /;

	
	# Change ^----- to avoid screwing up undigestifiers.
	
	tr/-/=/ if (/^-----/);

	
	# Replace tabs with spaces to appease Big Blue, who likes to replace
	# them with semicolons!
	
	if ($Expandtabs) {
	    while (/\t/) {
	        for ($i = 0; substr($_,$i,1) ne "\t"; $i++) { ; }
	        substr($_,$i,1) = ' ' x (8 - ($i % 8));
	    }
	}

	if ($header) {
	    if (/^from:/i || /^date:/i || /^reply-to:/i || /^subject:/i) {
		print OUT "$_";
	    } elsif (s/^newsgroups:/Crossposted-To:/i && /,/) {
		s/,$Newsgroup//i || s/$Newsgroup,//i;
		print OUT "$_";
	    } elsif (/^$/) {
		print OUT "$_";
		$header = 0;
	    }
	} else {
	    print OUT "$_";
	}
    }

    print OUT $ArticleSep;
}

system "echo '$NextLastArticle' > $LastArticleF";
chown $Listowner,100,<$Articledir/*>;
close(OUT);

###############################################################################

# Input size information.

###############################################################################

while (<$Articledir/*>) {
    @s = stat($_);
    $Size{$_} = $s[7];
}

###############################################################################

# Digestify the articles if there are enough of them.

###############################################################################

while (1) {

    
    # Figure out the total size of everything except the actual articles,
    # subjects, and separators.
    
    $DigestSubject = "Subject:  $Listname Digest #$Digestnum\n";
    $t = "$Listname Digest #$Digestnum, Volume #$Volume";
    $spaces = ' ' x ($Titlelen - length($t) - length($Rfc822Date));

    $DigestTitle = "\n$t$spaces$Rfc822Date\n";
    $Size = length($ConstantHeader) + length($DigestDate) +
	    length($DigestSubject) + length($DigestTitle) +
	    length($SubjectIntro) + length($ArticleStart) + $ClosingSize;

    
    # Add up all the article sizes.
    
    $tsize = 0;
    foreach $size (values %Size) {
	$tsize += $size;
    }
    last if (($tsize + $Size < $Minsize) &&
	     (($ThenDigestHours < $Frequency) || $got_one));

    
    # Initialize a digest.
    
    $got_one = 0;
    open(OUT,">$Digestdir/$Digest$Digestnum") ||
	die "Can't open $Digestdir/$Digest$Digestnum for writing";
    print OUT $ConstantHeader,$DigestDate,$DigestSubject,$DigestTitle,
	      $SubjectIntro;
    open(BODY,">$Body") || die "Can't open $Body for writing";

    foreach $article (sort(keys %Size)) {
	last if (($Size{$article} + $Size > $Maxsize) && $got_one);
	$Size += $Size{$article};
	$got_one = 1;
	delete $Size{$article};
	close(IN); # reset line counter
	open(IN,$article) || die "Can't open $article";
	while (<IN>) {
	    if ($. == 1) {
		print OUT $_;
	    } else {
		print BODY $_;
	    }
	}
	unlink $article;
    }
    print OUT $ArticleStart;
    close(OUT);
    close(BODY);

    exit if (!$got_one);

    system "cat $Body $Closing >> $Digestdir/$Digest$Digestnum";
    unlink $Body;
    chown $Listowner,100,"$Digestdir/$Digest$Digestnum";
    system "cat $Digestdir/$Digest$Digestnum | $Submit -f$Requestaddr $Distaddr";

    system "echo '$Now' > $ThenDigestF";
    system "echo '$Digestnum' > $LastDigestF";
    system "echo '$Volume' > $VolumeF";
    $Digestnum++;
    if ($Digestnum > $Digests_per_volume) {
	$Digestnum = 1;
	$Volume++;
    }
}

