#!/usr/local/bin/perl
#$Id: topfaq.pl,v 1.2 1995/07/25 21:44:56 pshuang Exp $
require 'find.pl';

$xferlog = '/afs/sipb.mit.edu/project/periodic-postings/news.answers/xfer-logs/xferlog';
# copy from rtfm.mit.edu:/usr/adm/xferlog*

$maillog = '/afs/sipb.mit.edu/project/periodic-postings/news.answers/xfer-logs/archive.log';
# copy from rtfm.mit.edu:/usr/local/mserv/logs

$verbose = 1;

chdir('/u1/ftp') || die "cannot chdir to /u1/ftp";

# Read ftp log and store the access counts into %fnames.

warn("reading ftplog") if $verbose;

open(FTPLOG,$xferlog) || die("Could not open $xferlog");
while (<FTPLOG>) {
    @fields = split;
    $fields[8] =~ s/^\///;
    $fnames{$fields[8]}++;
}
close(FTPLOG);

# Check the mail log, keep adding to %fnames
open(MAILLOG,$maillog) || die ("Could not open $maillog");

warn("reading maillog") if $verbose;

mailline: while (<MAILLOG>) {
    @fields = split;

    if ($fields[5] !~ 'sent') {
	next mailline;
    }
    chop;

    ($junk,$fname) = split(/\/\/[ \t]+/,$_);
    if ($fname !~ /usenet/ || $fname =~ /usenet-addr/) {
	next mailline;
    }
    $fname =~ s/ +//g;
# Mung $fnmame until it's in the same format the ftp logfiles,
# i.e. pub/usenet/mumble.foo/foo-faq

    if ($fname =~ /^\//) {
	$fname =~ s/.//;
    }
    if ($fname =~ /^usenet/) {
	$fname = 'pub/' . $fname;
    }
# special case: wildcards sent out

    if ($fname =~ /\*/) {
	foreach $expanded_group (<${fname}>) {
	    $fnames{$expanded_group} ++;
	}
    }
    else {
        $fnames{$fname} ++;
    }
}
close(MAILLOG);

# Now we've got every access stored in %fnames. 
# what we no do is restore it in %acc_count, but fold the
# everything in pub/usenet/comp.foo.mumble (the $fname index
# for the %fnames array) into pub/usenet-by-hierarchy/comp/foo/mumble
# (i.e. $nname).

# This is to reduce the number of later file accesses.

warn("resorting filenames") if $verbose;

foreach $fname (keys %fnames) {
    @pathparts = split('/',$fname);
    if ($pathparts[1] =~ /usenet(-by-group)?/) {
	$pathparts[1] = 'usenet-by-hierarchy';
	$pathparts[2] =~ s/\./\//g;
	$nname = join('/',@pathparts);
    }
    elsif ($pathparts[1] == 'usenet-by-hierarchy') {
	$nname = $fname;
    }
    else {
	next;
    }
    $acc_count{$nname} += $fnames{$fname};
}

undef %fnames ;

warn("building database") if $verbose;

&find('pub/usenet-by-hierarchy/news/answers');

warn("translating filenames") if $verbose;

# Now translate the file names in %acc_count into archive names.
# For *.answers, that's easy, just cut off the
# /pub/usenet/foo/answers/ part.  For the other articles, look
# up their inodes in the %found table.  At present, %found contains
# only files from pub/usenet-by-hierarchy/news/answers, so anything
# not archived to *.answers will not be counted.

foreach $nname (keys %acc_count) {
    @pathparts = split('/',$nname);
    if ($pathparts[3] !~ /answers/) {
	($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($nname);
	$aux_name = $found{$ino};
	if ($aux_name) {
	    $aux_name =~ s,^pub/usenet-by-hierarchy/[^/]*/answers/,, ;
	    $acc_count_2{$aux_name} += $acc_count{$nname};
	}
    } 
    else {
	$aux_name = $nname;
	$aux_name =~ s,^pub/usenet-by-hierarchy/[^/]*/answers/,,;
	if ($aux_name !~ /.#/ && $aux_name ne 'index' 
		&& $aux_name !~ '-listing-$') {
	    $acc_count_2{$aux_name} += $acc_count{$nname};
	}
    }
}

# finally, we're done... write things out.  Let's also sort them ;-)

open(OUTPUT,"|sort +1nr -2 +0 -1");

foreach $aux_name (keys %acc_count_2) {
	print OUTPUT "$aux_name $acc_count_2{$aux_name}\n";
}
close(OUTPUT);

sub wanted {
    s/^\.\///;
    ($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_);
    next if (/\.old$/);
    next if (/\.\#/);
    next if ($name =~ m,news-answers/index$,);
    next if ($dir =~ m,/comp\.mail\.maps$,);
    next if ($dir =~ m,/news\.lists\.ps-maps$,);
    if ($found{$ino}) {
	$clashes++;
	if (($name =~ m,/[^/]*\.answers/,) ||
	    ((length($name) > length($found{$ino})) &&
	     ($found{$ino} !~ m,/[^/]*\.answers/,))) {
	    $found{$ino} = $name;
	}
    }
    else {
	$found++;
	$found{$ino} = $name;
    }
}