#!/usr/athena/bin/perl

$samplerate = shift(@ARGV);

while(<>){
    $ct++;
    if($ct >= $samplerate){
	&analyze($_);
	$hits++;
	$ct=0;
    }
    $lasttime = $time;
}
$firsttime =~ s/\[//g;
$lasttime =~ s/\[//g;

&do_out();

sub analyze {
    local($line) = @_;
   ($host, $crap, $from, $time, $zone, $method, $path, $version, $code, $size) = ($_=~/([^ ]+) ([^ ]+) ([^\[])+\[([^ ]+) ([^\]]+)\] \"([^ ]+) ([^ ]+) ([^\"]+)\" (\d+) (\d+)/);
    $firsttime = $time unless $firsttime;

    if($host=~/^18\./){
	$mit++;
    }

    ($day, $month, $year, $hour, $minute, $second) = split(/[\/:]/, $time);
    $hitshour{int($hour)}++;
    $byteshour{int($hour)}+=$size;

    if($path=~/\.gif$/i || $path=~/\.jpg$/i){
	$path = "All Pictures";
    }
    if($code == 404){
	$path = "404 Not Found";
    }

    $hitsuri{$path}++;
    $bytesuri{$path}+=$size;

    $bytestot+=$size;
}

sub do_out {
    $hitsSC = $hits*$samplerate;
    $mitSC = $mit*$samplerate;
    $bytestotSC = $bytestot*$samplerate;

    print("Summary Period Covers $firsttime to $lasttime\n");
    print("Files Transmitted During Summary Period: $hitsSC\n");
    print("Bytes Transmitted During Summary Period: $bytestotSC\n");

    print("\nHourly Transmission Statistics\n");
    print("\n%Reqs\t%Byte\tBytes\tRequests\tTime\n");
    foreach $h (0..24){
	$rpct = int(10000*$hitshour{$h}/$hits)/100;
	$bpct = int(10000*$byteshour{$h}/$bytestot)/100;
	$bSC = $byteshour{$h}*$samplerate;
	$rSC = $hitshour{$h}*$samplerate;
#	$#="%.2g";
	printf("%.2g\t%.2g\t$bSC\t$rSC\t$h\n", $rpct, $bpct);
    }

    print("Top Documents\n");
    foreach $d (sort {$hitsuri{$b} <=> $hitsuri{$a};} keys %hitsuri){
	$rct++;
	last if $rct >100;
	$dhits = $hitsuri{$d}*$samplerate;
	print("$dhits $d\n");
    }

}
