#!/usr/bin/perl
eval "exec /usr/bin/perl -S $0 $*"
    if $running_under_some_shell;
			# this emulates #! processing on NIH machines.
			# (remove #! line above if indigestible)

eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift;
			# process any FOO=bar switches

#  USAGE: awk -f report_awk /usr/lib/news/log
#
#  AWK script which eats netnews log files and produces a summary of USENET
#  traffic and errors over the period of time that the log was collected.
#
#  December 9, 1986
#
#  Erik E. Fair <dual!fair>
#  Original Author, May 22, 1984
#
#  Brad Eacker <onyx!brad>
#  Modified to simplify the record processing and to sort the output.
#
#  Erik E. Fair <dual!fair>
#  Modifed to provide information about control messages.
#
#  Erik E. Fair <dual!fair>
#  Bug in system name extraction fixed. It was assumed that the forth field
#  (system name) always had a dot. local is one that doesn't. Some others
#  (including 2.9 sites) don't either.
#
#  Earl Wallace <pesnta!earlw>
#  The "sent" field was changed from $5 to $6 in 2.10.2 (beta)
#  named "newstats" and called with no arguments.
#
#  Erik E. Fair <dual!fair>
#  Remove support for 2.10.1, revise for 2.10.2 to provide information
#  about junked articles, garbled articles, and bad newsgroups
#
#  Erik E. Fair <ucbvax!fair>
#  Minor bug fix to bad newsgroup reporting, also now counting ``old''
#  articles as junked, with counter for number that are `old'.
#
#  Erik E. Fair <ucbvax!fair>
#  Fix up the domain & local hosts support
#
#  Erik E. Fair <ucbvax!fair>
#  Fix up the counting of gatewayed material, add counting of "linecount"
#  problems. Additional cleanup to make things faster.
#
#  Ron Heiby <cuae2!rwh>
#  Added counting of some more errors and fixed one bug.
#
#  Joe Buck <epimass!jbuck>
#  Report on group creation.  Mention whether bad newsgroups were created
#  later on.
#
#  Greg Earle <smeagol!earle>
#  Add support for added features of 2.11 that didn't make it into the
#  released version.  These include:
#  - Field 5 = "Expired" when a control cancel invocation tries to 
#    cancel an already expired article.  Added new variable to count
#    these, and are displayed in `Control Invocations' area.
#  - Field 5 = "Aliased",
#    Field 5 = "queued" - ignored (added to `Skip some things we won't
#    bother with' area)
#  - Field 7 = "Unknown" when a bad newsgroup reported.
#	2.11 does not quote these. 2.10.3 did.
#	Handles both cases.
#
$[ = 1;			# set array base to 1
$, = ' ';		# set output field separator
$\ = "\n";		# set output record separator

#
#	this is the prefix that your site uses in hostnames to identify your
#	hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai)
#	You will probably want to change (or add to) the following line
#
$lprefix = '';
$lplen = length($lprefix);
#
#	If you do bi-directional USENET gatewaying (e.g. mailing list
#	to newsgroup where the material flows both ways freely), this
#	should be the name in the sys file that you use to mail stuff
#	to the mailing lists.
#
$pseudo = 'internet';
$rptname = '(GATEWAY)';
#
#	Top level domain names and what network they represent
#	(for use in counting stuff that is gatewayed)
#
$domains{'ARPA'} = $rptname;
$domains{'arpa'} = $rptname;
$domains{'EDU'} = $rptname;
$domains{'edu'} = $rptname;
$domains{'GOV'} = $rptname;
$domains{'gov'} = $rptname;
$domains{'COM'} = $rptname;
$domains{'com'} = $rptname;
$domains{'MIL'} = $rptname;
$domains{'mil'} = $rptname;
$domains{'ORG'} = $rptname;
$domains{'org'} = $rptname;
$domains{'NET'} = $rptname;
$domains{'net'} = $rptname;
$domains{'DEC'} = $rptname;
$domains{'dec'} = $rptname;
$domains{'CSNET'} = $rptname;
$domains{'csnet'} = $rptname;
$domains{'BITNET'} = $rptname;
$domains{'bitnet'} = $rptname;
$domains{'MAILNET'} = $rptname;
$domains{'mailnet'} = $rptname;
$domains{'UUCP'} = $rptname;
$domains{'uucp'} = $rptname;
$domains{'OZ'} = $rptname;
$domains{'oz'} = $rptname;
$domains{'AU'} = $rptname;
$domains{'au'} = $rptname;
$domains{'UK'} = $rptname;
$domains{'uk'} = $rptname;
$domains{'NL'} = $rptname;
$domains{'nl'} = $rptname;
#
#	tilde chosen because it is ASCII 126 (don't change this)
#
$invalid = '~~~~~~';
#
$accept{$invalid} = 0;
$reject{$invalid} = 0;
$xmited{$invalid} = 0;
$control{$invalid} = 0;
$junked{$invalid} = 0;
$neighbor{$invalid} = 0;
$contention = 0;
$badgrp = 0;
$newgrp = 0;
$garbled = 0;
$orphans = 0;
$notsys = 0;
$lcount = 0;
$canfail = 0;
$candup = 0;
$canexp = 0;
$insfail = 0;
$old = 0;
#
#	Skip some things that we won't bother with
#

line: while (<>) {
    chop;	# strip record separator
    @Fld = split(' ');
    if ($_ eq "" || $Fld[5] eq 'from' || $Fld[5] eq 'Cancelling'  || 
	$Fld[5] eq 'Aliased' || $Fld[5] eq 'queued') {
	next line;
    }
    #
    #	Or that we just count
    #
    if ($Fld[5] eq 'Inbound') {
	$garbled++; next line;
    }
    if ($Fld[5] eq 'Orphaned') {
	$orphans++; next line;
    }
    if ($Fld[5] eq 'Newsgroups') {
	$notsys++; next line;
    }
    if ($Fld[5] eq 'waiting') {
	$contention++; next line;
    }
    if ($Fld[5] eq 'Expired') {
	$canexp++; next line;
    }
    if ($Fld[6] eq 'cancel') {
	$canfail++; next line;
    }
    if ($Fld[6] eq 'Cancelled') {
	$candup++; next line;
    }
    if ($Fld[6] eq 'install') {
	$insfail++; next line;
    }
    #
    #	Groups newly created
    #
    if ($Fld[5] eq 'make') {
	$ng = $Fld[7];
	$newng{$ng}++;
	$newgrp++;
	next line;
    }
    #
    #	Articles sent to remote systems (this is what 2.10.2 (beta) says)
    #
    if ($Fld[6] eq 'sent') {
	for ($j = 8; $j <= $#Fld; $j++) {
	    $Fld[$j] =~ s/,.*//;
	    if ($Fld[$j] eq $pseudo) {	
		$Fld[$j] = $rptname;
	    }
	    else {
		$neighbor{$Fld[$j]} = 1;
	    }
	    $xmited{$Fld[$j]}++;
	}
	next line;
    }
    #
    #	Articles sent to remote systems (this is what 2.11 says)
    #
    if ($Fld[5] eq 'sent') {
	for ($j = 7; $j <= $#Fld; $j++) {
	    $Fld[$j] =~ s/,.*//;
	    if ($Fld[$j] eq $pseudo) {
		$Fld[$j] = $rptname;
	    }
	    else {
		$neighbor{$Fld[$j]} = 1;
	    }
	    $xmited{$Fld[$j]}++;
	}
	next line;
    }
    #
    #	Get the name of the system that did this,
    #	taking into account that not everyone believes in domains.
    #

    #	if we get a route addr (we shouldn't, but...), take the last one
    #
     ($hostname = $Fld[4]) =~ s/.*@//;
    #
    #	get the root domain name, and the hostname
    #
    ($sys = $hostname) =~ s/\..*//;
    ($domain = $hostname) =~ s/.*\.//;
    #
    #	check for local system, and if not that, then internet sites.
    #	special case the network name replacement of specific host names,
    #	such that the network name is there only on a `local' posting
    #	(which is really gatewaying in disguise)
    #
    if ($Fld[5] eq 'posted') {
	$prefix = substr($sys, 1, $lplen);
	if ($prefix eq $lprefix) {
	    $sys = 'local';
	}
	else {
	    $dom = $domains{$domain};
	    if ($dom) {
		$sys = $dom;
	    }
	}
    }
    #  
    #	Duplicates & receiveds/posted & control messages
    # 
    if ($Fld[5] eq 'posted' || $Fld[5] eq 'received') {
	$accept{$sys}++;
	if ($Fld[5] eq 'received') {
	    $neighbor{$sys} = 1;
	}
	$nng = (@ngl = split(/,/, $Fld[8]));
	for ($i = 1; $i <= $nng; $i++) {
	    ($ng = $ngl[$i]) =~ s/\..*//;
	    if ($ng) {
		$newsgcnt{$ng}++;
	    }
	}
	next line;
    }
    if ($Fld[5] eq 'Duplicate') {
	if ($Fld[6] eq 'article') {
	    $reject{$hostname}++;
	}
	next line;
    }
    if ($Fld[6] eq 'valid') {
	$junked{$sys}++; next line;
    }
    if ($Fld[6] eq 'too') {
	$junked{$sys}++; $old++; next line;
    }
    if ($Fld[5] eq 'Unknown') {
	$X = (@ngl = split("'", $Fld[7]));
	if ($X == 1) {
	    $ng = $ngl[1];
	}
	elsif ($X == 3) {
	    $ng = $ngl[2];
	}
	$badng{$ng}++;
	$badgrp++;
	next line;
    }
    #
    #	articles who actual line count differs from the Line: header count
    #
    if ($Fld[5] eq 'linecount') {
	$expect = $Fld[7];
	# awk does very strange things with non-numeric characters in numbers
	$expect =~ s/,.*//;	# Strip anything after the comma
	$got = $Fld[9];
	$diff = $got - $expect;
	$lcount++;
	$alc_host{$sys} = 1;
	$neighbor{$sys} = 1;
	if ($diff < 0) {
	    $diff = 0 - $diff;
	    $a_nshort{$sys}++;
	    $a_short{$sys} += $diff;
	    if ($a_smax{$sys} < $diff) {
		$a_smax{$sys} = $diff;
	    }
	}
	else {
	    $a_nlong{$sys}++;
	    $a_long{$sys} += $diff;
	    if ($a_lmax{$sys} < $diff) {
		$a_lmax{$sys} = $diff;
	    }
	}
	next line;
    }
    #
    #	articles who actual line count is Zero
    #
    if ($Fld[7] eq 'linecount') {
	$lcount++;
	$a_zero{$sys}++;
	$reject{$sys}++;
	next line;
    }
    #
    #	Control messages
    #
    if ($Fld[5] eq 'Ctl') {
	$ctot++;
	$control{$sys}++;
	$ctlcnt{$Fld[10]}++;
	next line;
    }
    #
    #	Print anything we didn't recognize, it's probably an error message.
    #	For the submitted report to USENET, do sed -e '1,/^$/d' file | inews
    #	so that this cruft doesn't get out the door.
    #

    print join($,,@Fld);
}

#
#	Summarize and print the report
#

#	special processing for Duplicates, because we can't tell if
#	they came from a netnews neighbor or from the gatewaying
#	activities until we have processed the entire log.
#
foreach $hostname (keys(reject)) {
    #
    #	get the root domain name, and the hostname
    #
    ($sys = $hostname) =~ s/\..*//;
    ($domain = $hostname) =~ s/.*\.//;
    if (!$neighbor{$sys}) {
	$prefix = substr($sys, 1, $lplen);
	if ($prefix eq $lprefix) {	#???
	    $sys = 'local';
	}
	else {
	    $dom = $domains{$domain};
	    if ($dom) {
		$sys = $dom;
	    }
	}
    }
    $i = $reject{$hostname};
    $reject{$hostname} = 0;
    $reject{$sys} += $i;
}

#
#	Same processing for "junked" articles
#	(god I wish AWK had subroutines)
#
foreach $hostname (keys(junked)) {
    #
    #	get the root domain name, and the hostname
    #
    ($sys = $hostname) =~ s/\..*//;
    ($domain = $hostname) =~ s/.*\.//;
    if (!$neighbor{$sys}) {
	$prefix = substr($sys, 1, $lplen);
	if ($prefix eq $lprefix) {	#???
	    $sys = 'local';
	}
	else {
	    $dom = $domains{$domain};
	    if ($dom) {
		$sys = $dom;
	    }
	}
    }
    $i = $junked{$hostname};
    $junked{$hostname} = 0;
    $junked{$sys} += $i;
}

$rtot = 0;
foreach $i (keys(reject)) {
    if ($reject{$i} > 0) {
	$list{$i} = 1;
	$rtot += $reject{$i};
    }
}

$atot = 0;
foreach $i (keys(accept)) {
    $list{$i} = 1;
    $atot += $accept{$i};
}

$xtot = 0;
foreach $i (keys(xmited)) {
    $list{$i} = 1;
    $xtot += $xmited{$i};
}

$ctot = 0;
foreach $i (keys(control)) {
    $list{$i} = 1;
    $ctot += $control{$i};
}

$jtot = 0;
foreach $i (keys(junked)) {
    $list{$i} = 1;
    $jtot += $junked{$i};
}
#
# ctot is part of rtot, so we don't add it in to the grand total.
#
$totarticles = $atot + $rtot;
if ($totarticles == 0) {
    $totarticles = 1;

}
printf
  ("\nSystem\t\tAccept\tReject\tJunked\tXmit to\tControl\t% total\t% rejct\n");
for (; ; ) {
    keys = sort(keys);
    # (WAS) selection sort
#    $i = $invalid;
    foreach $j (sort(keys(list))) {
#	if ($j lt $i) {
#	    $i = $j;
#	}
#    }
#    if ($i eq $invalid) {
#	last;
#    }
#    delete $list{$i};
    #
    #	control & junked are counted under accept.
    #
    $sitetot = $accept{$i} + $reject{$i};
    if ($sitetot == 0) {
	$sitetot = 1;
    }
    $articles{$i} = $sitetot;
    #
    # What an 'orrible printf spec
    #
    printf "%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", $i, $accept{$i},
      $reject{$i}, $junked{$i}, $xmited{$i}, $control{$i},
      ($sitetot * 100) / $totarticles, ($reject{$i} * 100) / $sitetot;
    #
     ;
}
printf "\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", $atot,
  $rtot, $jtot, $xtot, $ctot, 100, ($rtot * 100) / $totarticles;
printf "\nTotal Articles processed %d", $totarticles;
if ($old) {
    printf ', old %d', $old;
}
if ($garbled) {
    printf ', garbled %d', $garbled;
}
if ($orphans) {
    printf ', orphans %d', $orphans;
}
if ($notsys) {
    printf ', not in sys %d', $notsys;
}
if ($insfail) {
    printf ', uninstallable %d', $insfail;
}
if ($contention) {
    printf ', deadlock %d', $contention;
}
printf ("\n");

if ($ctot) {
    printf ("\nControl	Invocations\n");
    foreach $i (keys(ctlcnt)) {
	if ($i eq 'cancel') {
	    printf '%-12s %6d', $i, $ctlcnt{$i};
	    if ($canfail) {
		printf ', %d failed', $canfail;
	    }
	    if ($candup) {
		printf ', %d duplicate', $candup;
	    }
	    if ($canexp) {
		printf ', %d expired', $canexp;
	    }
	    printf ("\n");
	}
	else {
	    printf "%-12s %6d\n", $i, $ctlcnt{$i};
	}
    }
}

if ($lcount) {
    printf ("\nReceived Article Length Problems\n");
    printf
      ("System          Zero Short  Smax  Savg  Long  Lmax  Lavg Total % Tot\n");
    foreach $i (keys(alc_host)) {
	$nlong = $a_nlong{$i};
	$nshort = $a_nshort{$i};
	if ($nlong == 0) {
	    $nlong = 1;
	}
	if ($nshort == 0) {
	    $nshort = 1;
	}
	$lavg = $a_long{$i} / $nlong;
	$savg = $a_short{$i} / $nshort;
	$sitetot = ($a_zero{$i} + $a_nshort{$i} + $a_nlong{$i});
	printf "%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", $i,
	  $a_zero{$i}, $a_nshort{$i}, $a_smax{$i}, $savg, $a_nlong{$i},
	  $a_lmax{$i}, $lavg, $sitetot, ($sitetot * 100) / $articles{$i};
    }
}

if ($atot) {
    printf ("\nNetnews Categories Received\n");
    $l = 0;
    foreach $i (keys(newsgcnt)) {
	if ($l < length($i)) {
	    $l = length($i);
	}
    }
    $fmt = sprintf("%%-%ds %%6d\n", $l);
    for (; ; ) {
	# selection sort
	$max = 0;
	foreach $j (keys(newsgcnt)) {
	    if ($newsgcnt{$j} > $max) {
		$i = $j;
		$max = $newsgcnt{$j};
	    }
	}
	if ($max == 0) {
	    last;
	}
	printf $fmt, $i, $newsgcnt{$i};
	delete $newsgcnt{$i};
    }
}

if ($badgrp) {
    printf ("\nBad Newsgroups Received\n");
    $l = 0;
    foreach $i (keys(badng)) {
	if ($l < length($i)) {
	    $l = length($i);
	}
    }
    $fmt = sprintf("%%-%ds %%5d%%s\n", $l);
    for (; ; ) {
	# selection sort
	$i = $invalid;
	foreach $j (keys(badng)) {
	    if ($j lt $i) {
		$i = $j;
	    }
	}
	if ($i eq $invalid) {
	    last;
	}
	$note = '';
	if ($newng{$i}) {
	    $note = ' (created later)';
	}
	printf $fmt, $i, $badng{$i}, $note;
	delete $badng{$i};
    }
}
if ($newgrp) {
    printf ("\nNewsgroups Created\n");
    $l = 0;
    for (; ; ) {
	# another selection sort
	$i = $invalid;
	foreach $j (keys(newng)) {
	    if ($j lt $i) {	#???
		$i = $j;
	    }
	}
	if ($i eq $invalid) {	#???
	    last;
	}
	print $i;
	delete $newng{$i};
    }
}
