#!/usr/athena/bin/perl

while(<>){
next if /^From: /;
next if /^Auth: /;
s/[^\w\s]//g;	
@w = split; 
foreach $w (@w) { $wct{$w}++; $rct++;} 
if($rct>1000) {
	$tct+= $rct;
	$rct = 0;
	if($baseline){
	@tw = (sort {($wct{$b}/$tct)/((1+$bline{$b})/$bct) <=> ($wct{$a}/$tct)/((1+$bline{$a})/$bct)} keys %wct);
	@tw = @tw[0..24];
	print "\nTop words\n---------\n";
	for $tw (@tw){
		print "$tw ", int(1000*(($wct{$tw}/$tct)/((1+$bline{$tw})/$bct)))/100, "\n";
	}
	if($tct > 5000){
		$tct = 0;
		%wct = ();
	}
}
	else{
	print "Taking baseline... $tct\n";
}
	if(($tct > 20000) && (!$baseline)){
		%bline = %wct;
		$baseline = 1;
		%wct=();
		$bct = $tct;
		$tct=0;
	}
}
}