#!/usr/local/bin/perl

while(<>){
    &analyze($_);
    $hits++;
    if($hits%5000 == 0){
	print(STDERR "Proccessed $hits hits\n");
    }
    $lasttime = $time;
}
$firsttime =~ s/\[//g;
$lasttime =~ s/\[//g;

&do_out();

sub analyze {
    local($line) = @_;
   ($host, $crap, $from, $time, $zone, $method, $path, $version, $code, $size,
    $referer, $agent)
       = 
	   ($line=~
	    /^([^ ]+) ([^ ]+) ([^\[])+\[([^ ]+) ([^\]]+)\] \"([^ ]+) ([^ ]+) ([^\"]+)\" (\d+) ([\d-]+) ([^ ]+) \[([^\]]+)\]/);
    $firsttime = $time unless $firsttime;
    $suplemental = $';

    if($host eq ""){
	$hits--;
	return;
    }

    $path=~ s/\?.*//;
    if(substr($host,0,3) eq "18."){
        $mit++;
    }

    ($day, $month, $year, $hour, $minute, $second) = split(/[\/:]/, $time);
    $hitshour{int($hour)}++;
    $byteshour{int($hour)}+=$size;

    if($path=~/\.gif$/i || $path=~/\.jpe?g$/i){
        $path = "All Pictures";
    }
    if($code == 404){
        $path = "404 Not Found";
    }

    $hitsuri{$path}++;
    $bytesuri{$path}+=$size;
    $hosthits{$host}++;

    $bytestot+=$size;
    $agent =~ s/libwww\S+//ig;
    $agent =~ s/via proxy gateway +[\S]+//g;
    $agent =~ s/[\s]+$//g;
    $browser_type=$agent;
if ($browser_type =~ /^([^\/]+)\/(.*)$/)
     {
         $browser_name = $1;
         $browser_version = $2;

         $browser_name =~ s/Version [\S]+//gi;
         $browser_name =~ s/\(tm\)//gi;
     }
     elsif ($browser_type =~ /^(.+) ([\d]+\.[\d]+.*)/)
     {
         $browser_name = $1;
         $browser_version = $2;

     }
     elsif ($browser_type eq "(null)")
     {                          
         $browser_name = "Unknown";
     }
     else 
     {
         $browser_name = $browser_type;
     }

    $browsers{$browser_name}++;
    $referers{$referer}++ unless $referer=~/mit.edu/i;

}

sub do_out {
    $hitsSC = $hits;
    $mitSC = $mit;
    $bytestotSC = $bytestot;

    print("<h1>Summary Period Covers $firsttime to $lasttime</h1>\n");
    print("<hr>Files Transmitted During Summary Period: $hitsSC<br>\n");
    print("Bytes Transmitted During Summary Period: $bytestotSC<br>\n");
    $mitperc = int(100*$mit/$hits);
    print("Percentage of documents served to MIT hosts: $mitperc<br><hr>\n");
    print("<table border>\n");
    print("<tr><th colspan=5>Hourly Transmission Statistics</th></tr>\n");
    print("<tr><th>%Reqs</th><th>%Byte</th><th>Bytes</th><th>Requests</th><th>Time</th></tr>\n");
    foreach $h (0..24){
        $rpct = int(10000*$hitshour{$h}/$hits)/100;
        $bpct = int(10000*$byteshour{$h}/$bytestot)/100;
        $bSC = $byteshour{$h};
        $rSC = $hitshour{$h};
#       $#="%.2g";
        printf("<tr><td>%.2g</td><td>%.2g</td><td>$bSC</td><td>$rSC</td><td>$h</td></tr>\n", $rpct, $bpct);
    }
    print("\n</table>\n");
    print("<hr>\n");
    print("<table border><tr><th colspan=2>Top Documents</th></tr>\n");
    print("<tr><th>Hits</th><th align=left>Document</th></tr>\n");
    foreach $d (sort {$hitsuri{$b} <=> $hitsuri{$a};} keys %hitsuri){
        $rct++;
        last if $rct >100;
        $dhits = $hitsuri{$d};
        print("<tr><td>$dhits</td><td><a href=http://www.mit.edu$d>$d</a></td></tr>\n");
    }
    print("</table>\n");
    foreach $h (keys %hosthits){
	push(@hostlist, $h) if $hosthits{$h} >50;
    }

    @tophosts = sort {$hosthits{$b} <=> $hosthits{$a};} @hostlist;
    print("<hr><table border><tr><th colspan=2>Top Hosts</th></tr>\n");
    print("<tr><th>Hits from</th><th>Host</th></tr>\n");
    foreach $th (@tophosts){
	$hh++;
	print "<tr><td>$hosthits{$th}</td><td><a href=http://www.mit.edu/machine?$th>$th</a></td></tr>\n";
	last if $hh >21;
    }
    print("</table>\n");

    @topbrowsers = sort {$browsers{$b} <=> $browsers{$a};} keys %browsers;
    print("<hr><table border><tr><th colspan=2>Top Browsers</th></tr>\n");
    print("<tr><th>Hits from</th><th>Browser</th></tr>\n");
    $hh = 0;
    foreach $th (@topbrowsers){
	$hh++;
	$pct = int(100*$browsers{$th}/$hits);
	print "<tr><td>$pct ($browsers{$th})</td><td>$th</td></tr>\n";
	last if $hh >21;
    }
    print("</table>\n");

}
