#!/usr/athena/bin/perl

open(N, "/mit/mkgray/project/weblognorm/normal");

while(<N>){
    ($uri, $norm) = split;
    $norm{$uri} = $norm;
}
print "Read norms, analyzing log\n";
while(<>){
    split;
    $uri = $_[6];
    $uri =~ s/\?.+//g;
    next if(($uri =~ /\.gif$/i) ||
	    ($uri =~ /\.jpg$/i) ||
	    ($uri =~ /\.xbm$/i) ||
	    ($uri =~ /\.jpeg$/i));
    $hits{$uri}++;
    $tct++; $ct++;
    if($ct > 20000){
	print "Purging...\n";
	foreach $u (keys %hits){
	    delete $hits{$u} unless ($hits{$u}>1);
	}
	$ct=0;
    }
}
print "Done, reporting anomolies\n";
$factor = $tct/10000;

foreach $u (keys %hits){
    $actual =$hits{$u}/$factor;
    next unless ($actual > 9);
    if(($actual > 1.4*$norm{$u}) &&
       ($actual > 13)){
	print "high: $u ($actual v. $norm{$u})\n";
    }
    elsif(($actual < .6*$norm{$u}) &&
	  ($norm{$u} > 12)){
	print "low: $u ($actual v. $norm{$u})\n";
    }
}

foreach $u (keys %norm){
    next if $hits{$u};

    print "VERY low: $u (0 v. $norm{$u})\n" if ($norm{$u} >12);
}
