From Marc Rouleau <mer6g@fuggles.acc.Virginia.EDU> Wed Jan  1 14:52:48 1992
From: Marc Rouleau <mer6g@fuggles.acc.Virginia.EDU>
Date: Wed, 1 Jan 1992 09:33:43 EST
X-Mailer: Mail User's Shell (7.2.3 5/22/91)
To: tytso@Athena.MIT.EDU
Subject: Re: Mailing list software

On Dec 31,  5:33pm, Theodore Ts'o wrote:
> Hi!  Can you give me a pointer to the digestifying software which you're
> using for the Perl-User's Digest?  Thanks!
> 
> 						- Ted


To follow are the script called by cron, the script to prune the log of
incoming postings, the digestifier, and the script that posts mail messages
to newsgroups.  This stuff has some MMDF dependencies, and it's oriented
toward digestification of newsgroups rather than arbitrary collections of
messages.  I have another setup for non-USENET digests which I'd be happy
to mail to you.  The post script gets incoming messages on stdin with special
arguments.  Here's some example entries from our MMDF's alias.sys file:

dead-flames:mmdf|"/usr/bin/perl /usr/mmdf/bin/do_post.pl rec.music.gdead world"
highway61:mmdf|"/usr/bin/perl /usr/mmdf/bin/do_post.pl rec.music.dylan world"
perl-users:mmdf|"/usr/bin/perl /usr/mmdf/bin/do_post.pl comp.lang.perl world"

The post script is used more generally here; for example, it can post to
moderated groups.  This can be used to make a gateway between a mailing
list and a local newsgroup.  Make the newsgroup moderated so that all
postings are mailed to the mailing list, and subscribe an address that
pipes into the post script:

uva-edtech:mmdf|"/usr/bin/perl /usr/mmdf/lib/post -m schools.ml.edtech schools"

    -- Marc Rouleau

-------------------------- cron script ----------------------------
#!/bin/sh

#exit 0

PATH=$PATH:/usr/mmdf/lib
cd /usr/mmdf/digests

LOAD=`uptime|perl -pe 's/.*average: ([^.]+).*/$1/;'`
if [ $LOAD -gt 5 ]; then
    exit 0
fi

digestnews -g rec.music.gdead -l Dead-Flames -d dead-flames -s 20000 -S 31500 -a
 28 -f 24 -A 21 -x 1
digestnews -g comp.lang.perl -l Perl-Users -d perl-users -s 20000 -S 31500 -a 14
 -f 24 -A 21 -x 0
digestnews -g rec.music.dylan -l Highway61 -d highway61 -s 20000 -S 31500 -a 14
-f 24 -A 21 -x 1

prune-folder -f /usr/mmdf/lib/postlog

exit 0
------------------------ end cron script --------------------------
------------------------- prune-folder ----------------------------
#!/uva/bin/mush -F!
# Delete all mail older than 30 days from the specified folder.
# It should be invoked this way:
#
#     prune-folder -f folder-name

pick -ago -30d | d
quit
------------------------ end prune-folder -------------------------
--------------------------- digestnews ----------------------------
#!/usr/bin/perl

###############################################################################
# NAME: digestnews
#
# PURPOSE: digestify and transmit a newsgroup.
#
# Digestnews gets all the articles from the specified newsgroup, digestifies
# them, and mails the digest(s) out.
#
# This program will work in its current form only on a machine whose MTA is
# MMDF, since it uses the MMDF program "submit" directly in order to retain
# control over the outgoing headers. 
#
# BUGS:
#    Messages which exceed $Maxsize are not broken into parts, so there's really
#    no hard limit on the size of a digest.
#
# AUTHOR:
#    Marc Rouleau, University of Virginia Academic Computing, marc@Virginia.EDU
#
# DATE: 9/5/90
#
# COPYRIGHT:
#    You may use, modify, or distribute this in any way, as long as you retain
#    this notice and do not try to make any money from it.
###############################################################################

###############################################################################
# System-Wide parameters, changeable here only.
###############################################################################

$Sysnewsdir	    =    "/usr/spool/news";
$Submit		    =    "/usr/mmdf/lib/submit -mlnt";
$Mailhost	    =    "fuggles.acc.Virginia.EDU";
$Digestdir	    =    "digests";
$Articledir	    =    "articles";
$Digest		    =    "digest";
$Body		    =    "body";
$Closing	    =    "closing";
$LastArticleF	    =    "lastarticle";
$ThenDigestF	    =    "thendigest";
$LastDigestF	    =    "lastdigest";
$VolumeF	    =    "volume";
$Digests_per_volume =    999;
$Titlelen	    =    76;
$ArticleSepLen	    =    30;

###############################################################################
# Input parameter defaults.
###############################################################################

$Newsgroup	= "rec.music.gdead";
$Listname	= "Dead-Flames";
$Homedir	= "/usr/mmdf/digests/dead-flames";
$Minsize	= 20000;
$Maxsize	= 30000;
$Archiveperiod	= 28;
$Frequency	= 24;
$Maxage		= 21;
$Expandtabs     = 1;

###############################################################################
# Input parameter processing.
###############################################################################

@args = split(/\s-/," " . join(' ',@ARGV));
foreach $_ (@args) {
    next if (!$_);
    if (/^g\s*(\S*)/) {
	$Newsgroup = $1;
    } elsif (/^l\s*(\S*)/) {
	$Listname = $1;
    } elsif (/^d\s*(\S*)/) {
	$Homedir = $1;
    } elsif (/^s\s*(\S*)/) {
	$Minsize = $1;
    } elsif (/^S\s*(\S*)/) {
	$Maxsize = $1;
    } elsif (/^a\s*(\S*)/) {
	$Archiveperiod = $1;
    } elsif (/^f\s*(\S*)/) {
	$Frequency = $1;
    } elsif (/^A\s*(\S*)/) {
	$Maxage = $1;
    } elsif (/^x\s*(\S*)/) {
	$Expandtabs = $1;
    } else {
	print <<EOF;

Usage: digestnews [-g newsgroup] [-l listname] [-d homedir] [-s minsize]
                  [-S maxsize] [-a archiveperiod] [-f frequency]
                  [-A max_article_age] [-x 0|1]
EOF
	exit 1;
    }
}

###############################################################################
# Initialize other global variables.
###############################################################################

$Now		= time;

chdir($Homedir) || die "Can't cd to $Homedir";
@s = stat($Homedir);
$Listowner = $s[4];

$_ = $Newsgroup; s|\.|/|g;
$Newsdir	= "$Sysnewsdir/$_";

$Listaddr	= "$Listname@$Mailhost";
$Requestaddr	= "$Listname-Request@$Mailhost";
$Distaddr	= "$Listname-Dist@$Mailhost";
$ArticleSep	= "\n" . '-' x $ArticleSepLen . "\n\n";

$LastArticle	= `cat $LastArticleF`; chop($LastArticle);
$ThenDigest	= `cat $ThenDigestF`; chop($ThenDigest);
$LastDigest	= `cat $LastDigestF`; chop($LastDigest);
$Volume		= `cat $VolumeF`; chop($Volume);

$Digestnum	= $LastDigest + 1;
if ($Digestnum > $Digests_per_volume) {
    $Digestnum = 1;
    $Volume++;
}

$_ = `date`;
chop;
@d = split(/\s+/);
$d[5] =~ s/^..//;
$Rfc822Date = sprintf("%s, %s %s %s %s %s",$d[0],$d[2],$d[1],$d[5],$d[3],$d[4]);
($Hour,$x,$x) = split(/:/,$d[3]);
$ThenDigestHours = ($Now - $ThenDigest + 1800) / 3600;

$ConstantHeader	= "$Requestaddr
$Distaddr

From:     Digestifier <$Requestaddr>
To:       $Listaddr
Reply-To: $Listaddr
";

$DigestDate	    =    "Date:     $Rfc822Date\n";
$SubjectIntro	=    "\nContents:\n";
$ArticleStart	=    "\n" . '-' x $Titlelen . "\n\n";

@s = stat($Closing);
$ClosingSize = $s[7];

###############################################################################
# Delete saved digests created more than $Archiveperiod ago.
###############################################################################

while (<$Digestdir/*>) {
    @s = stat($_); $mtime = $s[9];
    $days_old = ($Now - $mtime) / 86400;
    if ($days_old > $Archiveperiod) {
	unlink $_;
    }
}

###############################################################################
# Collect articles in a directory.  Each article will be reformatted to look
# exactly as it will be when it is digestified with the exception that the
# first line of each file will contain the line which will appear in the
# digest's table of contents.  This bit of weirdness simplifies size
# computations.  The format is "  subject (author)".
###############################################################################

#$days{'Jan'} = 0;
#$days{'Feb'} = 31;
#$days{'Mar'} = 59;
#$days{'Apr'} = 90;
#$days{'May'} = 120;
#$days{'Jun'} = 151;
#$days{'Jul'} = 181;
#$days{'Aug'} = 212;
#$days{'Sep'} = 243;
#$days{'Oct'} = 273;
#$days{'Nov'} = 304;
#$days{'Dec'} = 334;
#@x = localtime($Now);
#$nowdays = $x[7];

$NextLastArticle = $LastArticle;
article: while (<$Newsdir/*>) {
print "$_\n";
    $article = $_;
    $article =~ m|([^/]+)$|;
    $artnum = $1;
    next article if ($artnum <= $LastArticle);
    $NextLastArticle = $artnum > $NextLastArticle ? $artnum : $NextLastArticle;

    open(IN,$article) || die "Can't open $article";

    $subj = "[no subject]";
    $name = "";
    while (<IN>) {
	chop;
	if (/^subject:/i) {
	    s/subject:[ \t]*(.*)/  $1/i;
	    $subj = $_;
	} elsif (/^from:/i) {
	    s/from:[ \t]*//i;
	    s/([^<]*)<[^>]*>/$1/ || s/[^\(]*\(([^\)]*)\)/$1/;
	    s/[ \t]*$//;
	    $name = $_;
	} elsif (/^date:[^m]*may/i) {
	    next article;
	} elsif (/^$/) {
	    last;
	}
    }

    $_ = $subj . ($name ? " ($name)" : "") . "\n";

    
    # Replace tabs with spaces to appease Big Blue, who likes to
    # replace them with semicolons!
    
    if ($Expandtabs) {
	while (/\t/) {
	    for ($i = 0; substr($_,$i,1) ne "\t"; $i++) { ; }
	    substr($_,$i,1) = ' ' x (8 - ($i % 8));
	}
    }

    open(OUT,">$Articledir/$artnum") ||
	die "Can't open $Articledir/$artnum for writing";
    print OUT $_;
    open(IN,$article) || "Can't open $article";

    
    # Process the whole article from scratch.
    
    for ($header = 1; <IN>;) {

	
	# Some mailers interpret a ^.$ as end-of-message.
	
	s/^\.$/. /;

	
	# Change ^----- to avoid screwing up undigestifiers.
	
	tr/-/=/ if (/^-----/);

	
	# Replace tabs with spaces to appease Big Blue, who likes to replace
	# them with semicolons!
	
	if ($Expandtabs) {
	    while (/\t/) {
	        for ($i = 0; substr($_,$i,1) ne "\t"; $i++) { ; }
	        substr($_,$i,1) = ' ' x (8 - ($i % 8));
	    }
	}

	if ($header) {
	    if (/^from:/i || /^date:/i || /^reply-to:/i || /^subject:/i) {
		print OUT "$_";
	    } elsif (s/^newsgroups:/Crossposted-To:/i && /,/) {
		s/,$Newsgroup//i || s/$Newsgroup,//i;
		print OUT "$_";
	    } elsif (/^$/) {
		print OUT "$_";
		$header = 0;
	    }
	} else {
	    print OUT "$_";
	}
    }

    print OUT $ArticleSep;
}

system "echo '$NextLastArticle' > $LastArticleF";
chown $Listowner,100,<$Articledir/*>;
close(OUT);

###############################################################################

# Input size information.

###############################################################################

while (<$Articledir/*>) {
    @s = stat($_);
    $Size{$_} = $s[7];
}

###############################################################################

# Digestify the articles if there are enough of them.

###############################################################################

while (1) {

    
    # Figure out the total size of everything except the actual articles,
    # subjects, and separators.
    
    $DigestSubject = "Subject:  $Listname Digest #$Digestnum\n";
    $t = "$Listname Digest #$Digestnum, Volume #$Volume";
    $spaces = ' ' x ($Titlelen - length($t) - length($Rfc822Date));

    $DigestTitle = "\n$t$spaces$Rfc822Date\n";
    $Size = length($ConstantHeader) + length($DigestDate) +
	    length($DigestSubject) + length($DigestTitle) +
	    length($SubjectIntro) + length($ArticleStart) + $ClosingSize;

    
    # Add up all the article sizes.
    
    $tsize = 0;
    foreach $size (values %Size) {
	$tsize += $size;
    }
    last if (($tsize + $Size < $Minsize) &&
	     (($ThenDigestHours < $Frequency) || $got_one));

    
    # Initialize a digest.
    
    $got_one = 0;
    open(OUT,">$Digestdir/$Digest$Digestnum") ||
	die "Can't open $Digestdir/$Digest$Digestnum for writing";
    print OUT $ConstantHeader,$DigestDate,$DigestSubject,$DigestTitle,
	      $SubjectIntro;
    open(BODY,">$Body") || die "Can't open $Body for writing";

    foreach $article (sort(keys %Size)) {
	last if (($Size{$article} + $Size > $Maxsize) && $got_one);
	$Size += $Size{$article};
	$got_one = 1;
	delete $Size{$article};
	close(IN); # reset line counter
	open(IN,$article) || die "Can't open $article";
	while (<IN>) {
	    if ($. == 1) {
		print OUT $_;
	    } else {
		print BODY $_;
	    }
	}
	unlink $article;
    }
    print OUT $ArticleStart;
    close(OUT);
    close(BODY);

    exit if (!$got_one);

    system "cat $Body $Closing >> $Digestdir/$Digest$Digestnum";
    unlink $Body;
    chown $Listowner,100,"$Digestdir/$Digest$Digestnum";
    system "cat $Digestdir/$Digest$Digestnum | $Submit";

    system "echo '$Now' > $ThenDigestF";
    system "echo '$Digestnum' > $LastDigestF";
    system "echo '$Volume' > $VolumeF";
    $Digestnum++;
    if ($Digestnum > $Digests_per_volume) {
	$Digestnum = 1;
	$Volume++;
    }
}

------------------------- end digestnews --------------------------
----------------------------- post -----------------------------
#!/usr/bin/perl

$ENV{"ORGANIZATION"} = "The Internet";

if ($ARGV[0] =~ /-m/) {
    $moderated = 1;
    shift;
}
die "Not enuf args" unless ($ng = $ARGV[0]);
die "Not enuf args" unless ($dist = $ARGV[1]);
shift;shift;
$inews = "/usr/mmdf/lib/inews.post";
die "Where's inews?" unless (-x $inews);

# should lock this file, but it's just a log ...
open(LOG,">>/usr/mmdf/lib/postlog") || die "Can't open log";
print LOG "\n";

$header = 1;
while (<>) {
    print LOG $_;
    if ($header) {
	chop;
	if (/^$/) {
	    $header = 0;
	    $first_line = 1;
	    if (!$from) {
		print LOG "\n";
		die "No From:";
	    }
	} elsif (!$from && /^From\s+(\S+)/) {
	    $from = $1;
	} elsif (/^From:\s*(\S.*)/i) {
	    $from = $1;

	    # this is a hack to hold possible bounced digests in the queue
	    # to await human examination.
	    
	    $_ = $from;
	    if (/postmaster/i || /^uucp/i || /mail_3_server/i ||
		    /dead-flames/i || /ansp\.br/i) {
		print LOG "\n";
		exit 1;
	    }
	} elsif (/^Reply-To:\s*(\S.*)/i) {
	    $rp = $1;
	} elsif (/^Subject:\s*(\S.*)/i) {
	    $subj = $1;

	    # this is a hack to hold possible bounced digests in the queue
	    # to await human examination.
	    
	    #$_ = $ng;
	    #if (/gdead/ || /perl/) {
		#$_ = $subj;
		#if (/undeliverable/i || /failed/i || /uucp/i) {
		#if (/undeliverable/i || /failed/i || /warning/i) {
		#if (/undeliverable/i || /warning/i || /uucp/i) {
		#if (/undeliverable/i || /failed/i || /warning/i || /uucp/i) {
		#    print LOG "\n";
		#    exit 1;
		#}
	    #}
	}

    } elsif ($first_line) {

	# this is for the folks at rohvm1.bitnet, whose subj headers
	# always say "BITNET Mail Enclosed" or somesuch.

	$first_line = 0;
	if (/^Subject:\s*(\S.*)/i) {
	    $dont_print = 1;
	    $subj = $1;
	} elsif (!$subj) {
	    $subj = "(none)";
	}

	if (!$rp) {
	    $rp = $from;
	    $rp =~ s/[^<]*<([^>]*)>.*/$1/;
	}

	# and this is for the folks at data general, whose messages
	# always start with "CEO summary:"

	$dont_print = 1 if (/^CEO summary:/);

	open(INEWS,"|-") || exec "$inews",'-h','-n',"$ng",'-d',"$dist";

	if ($moderated) {
	    print INEWS "Approved: postmaster@Virginia.EDU\n";
	}
	print INEWS "From: $from\nSubject: $subj\nReply-To: $rp\n\n";

	if (!$dont_print) {
	    s/^>/|/;
	    print INEWS "$_";
	}

    } else {
	s/^>/|/;
	print INEWS "$_";
    }
}

close(INEWS);

if ($error) {
    $error = $? >> 8;
    printf LOG "ERROR: %s\n",$!;
}

print LOG "\n";
close(LOG);

exit $error;
--------------------------- end post ---------------------------

