#!/usr/athena/bin/perl

$weight = shift;
open(N, "/mit/mkgray/project/weblognorm/normal");

while(<N>){
    ($uri, $norm) = split;
    $norm{$uri} = $norm;
}
print "Read old norms, analyzing log\n";
while(<>){
    split;
    $uri = $_[6];
    $uri =~ s/\?.+//g;
    next if(($uri =~ /\.gif$/i) ||
	    ($uri =~ /\.jpg$/i) ||
	    ($uri =~ /\.xbm$/i) ||
	    ($uri =~ /\.jpeg$/i));
    $hits{$uri}++;
    $tct++; $ct++;
    if($ct > 20000){
	print "Purging...\n";
	foreach $u (keys %hits){
	    delete $hits{$u} unless ($hits{$u}>1);
	}
	$ct=0;
    }
}
print "Done, reporting anomolies\n";
$factor = $tct/10000;

foreach $u (keys %hits){
    $actual =$hits{$u}/$factor;
    next unless ($actual > 9);
    print "$u has a norm of $actual\n";
    $newnorm{$u} = ($norm{$u}+($weight*$actual))/(1+$weight);
}

foreach $u (keys %norm){
    next if $hits{$u};

    $newnorm{$u} = $norm{$u}/(1+$weight);
}

rename("/mit/mkgray/project/weblognorm/normal", "/mit/mkgray/project/weblognorm/normal.old");
open(NN, ">/mit/mkgray/project/weblognorm/normal");

foreach $u (keys %newnorm){
    print NN "$u $newnorm{$u}\n" unless ($newnorm{$u} < 9);
}
close(NN);
