#!/usr/local/bin/perl
#--------------------------------------------------------------------
#
#  PROGRAM NAME: Browser Breakdown
#
#  >FILE NAME: browser-breakdown.pl
#  FILE NAME: browser.pl
#
#  DESCRIPTION: 
#   >Takes the 'extra' log file from the HTTP server and tallies the
#   >number of hits from each client.
#  Uses the Apache agent log to do the same.
#
#  FUNCTION LIST: 
#
#  AUTHOR(S): Matthew Gray and Eric Richard
#  MODIFICATIONS: Jered Floyd
#
#  ORIGINAL DATE: Feb 13 1995
#
#  LAST MODIFIED DATE: Jan 8 1996
#
#  NOTES: 
#--------------------------------------------------------------------
# This software is the property of net.Genesis Corp.
# Copyright (c) net.Genesis 1994
#
# It may not be copied, ditributed or modified, in part or in whole, by
# any means whatsoever, without the explicit written permission of
# net.Genesis Corp. This copyright notice MUST be included in all
# copies or portions of the software.
#
# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL NET.GENESIS CORP. BE HELD LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN ACTIOIN OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
#---------------------------------------------------------------------

# =========================== REQUIRES =============================== #

# ======================= GLOBAL VARIABLES =========================== #
$log_file = "/var/local/www/logs/access_log";

%robots = (	"MOMspider", 1,
		"w3new", 1,
		"ArchitextSpider", 1,
		"DIRFAgent", 1,
		"vdkwebi", 1,
		"Scoutget", 1,
		"InfoSeek Robot 1.16", 1,
		"CyberScan", 1,
		"Lycos", 1,
		"CaliforniaBrownSpider", 1,
		"Wobot", 1,
		"Scooter", 1,
		"OMWRobot", 1,
		"WebCrawler", 1,
		"HEAD", 1,
		"Web Robot", 1,
		"Crab", 1,
		"Folio_Retriever", 1,
		"roots", 1,
		"Jobot", 1,
	        "MetaCrawler",1,
                "Charlotte",1,
		);

# ========================== FUNCTIONS =============================== #

# Read in all the entries in the extra log.

$n = ($ENV{'QUERY_STRING'}) ? $ENV{'QUERY_STRING'}:2500;

open (INPUT, "tail -$n $log_file|") || die "Could not open $log_file\n";
print("Content-Type: text/html\n\n");


while(<INPUT>)
{
    next unless /\[([^\]]+)\]$/;
     $browser_type = $1;
#	$browser_type = $_;
#	chop($browser_type);
     $browser_type =~ s/libwww\S+//ig;
     $browser_type =~ s/via proxy gateway +[\S]+//g;

     if ($browser_type =~ /libwww/)
     {
	 print "AHHHHHH!!!!! $browser_type\n";
     }


     $browser_type =~ s/[\s]+$//g;
     if ($browser_type =~ /^([^\/]+)\/(.*)$/)
     {
	 $browser_name = $1;
	 $browser_version = $2;

	 $browser_name =~ s/Version [\S]+//gi;
	 $browser_name =~ s/\(tm\)//gi;
     }
     elsif ($browser_type =~ /^(.+) ([\d]+\.[\d]+.*)/)
     {
	 $browser_name = $1;
	 $browser_version = $2;

     }
     elsif ($browser_type eq "(null)")
     {				
	 $browser_name = "Unknown";
     }
     else 
     {
	 $browser_name = $browser_type;
     }
	 
     $browser_name =~ s/ +/ /g;
     $browser_name =~ s/\/$//;
     
     if ($browser_name =~ /libwww/)
     {
	 print "AHHHHHH!!!!! $browser_name\n";
     }
     
	if($robots{$browser_name}){
		$browser_name .= " --robot--";
	}
     
     if (!(defined $browser{$browser_name}))
     {
#	 print "$browser_name\n";
     }
     if($browser_type =~ /MSIE/){
		$browser_name = "Microsoft Internet Explorer";
	}
     
     $browser{$browser_name}++;
     if($browser_name eq "Mozilla"){
	if($browser_type =~/Windows/){
		if($browser_type =~ /16bit/){
			$mozplat{"Windows3.1"}++;
		}
		else{
			$mozplat{"Windows32bit"}++
		}
	}
	elsif($browser_type =~ /Win95/){
		$mozplat{"Windows32bit"}++;
	}
	elsif($browser_type =~ /Win16/){
		$mozplat{"Windows3.1"}++;
	}
	elsif($browser_type =~ /Win32/){
		$mozplat{"Windows32bit"}++;
	}
	elsif($browser_type =~ /WinNT/){
		$mozplat{"Windows32bit"}++;
	}
	elsif($browser_type =~ /Macintosh/){
		$mozplat{"Macintosh"}++;
	}
	elsif($browser_type =~ /X11/){
		$mozplat{"UNIX"}++;
	}
	($ver, @platform) = split(' ', $browser_version);
	$mozver{$ver}++;
	}
     
     $total_browsers++;
     
     $browser_version{"$1\t$2"}++;				
     
     
}

close (INPUT);

print("<h1>Browser Usage on <a href=\"/\">www.mit.edu</a></h1>\n<strong>[the last $n requests]</strong><p>\n");
print("<table border>\n");
print("<tr><th>Browser Name</th><th width=75>Number of Accesses</th><th width=75>Percentage of Accesses</th><th width=75>Cumulative Percentage</th></tr>\n");

for $browser_type (sort {$browser{$b} <=>$browser{$a};} keys %browser)
{
    if (($browser_count % 5) == 0)
    {			       
	print "\n";
    }
    $browser_count++;

    if($browser_type =~ /--robot--/){
		$robot_tot+=$browser{$browser_type};
	}

    $cum_browsers += $browser{$browser_type};
    $browser_cum = ( $cum_browsers / $total_browsers ) * 100;

    $browser_pct = ($browser{$browser_type} / $total_browsers) * 100;

#    printf ("%-48s %5d    %5.1f      %5.1f\n", $browser_type,$browser{$browser_type}, $browser_pct, $browser_cum);
	if($browser_type eq "Mozilla"){
	print("<tr><td><table><tr>Mozilla</tr>\n");
	printf("<tr><td>");
	print("<table border>");
	print("<tr><th>Mozilla Version</th><th>Number of Accesses</th><th>Percentage of Accesses</th><th>Cumulative Percentage</th></tr>");
	for $mver (sort {$mozver{$b} <=> $mozver{$a};} keys %mozver){
		$cum_ver+=$mozver{$mver};
		$ver_cum= ($cum_ver/$browser{"Mozilla"})*100;
		$ver_pct=100*$mozver{$mver}/$browser{"Mozilla"};
	printf("<tr><td>%48s</td><td align=center>%5d</td><td align=center>%5.1f</td><td align=center>%5.1f</td></tr>\n", $mver,$mozver{$mver}, $ver_pct, $ver_cum);

	}
	print("</table><hr>\n");
	print("<table border><tr><th>Platform</th><th>Percent</th></tr>\n");
	for $mplat (sort {$mozplat{$b} <=> $mozplat{$a};} keys %mozplat) {
	    $mozplatperc = int(100*$mozplat{$mplat}/$browser{"Mozilla"});
	    print("<tr><td>$mplat</td><td>$mozplatperc</td></tr>\n");
	}
	print("</table></table>\n");
	printf("<td align=center valign=top>%5d</td><td align=center valign=top>%5.1f</td><td align=center valign=top>%5.1f</td></tr>\n", $browser{$browser_type}, $browser_pct, 
$browser_cum);
	}
	else{
	printf("<tr><td>%48s</td><td align=center>%5d</td><td align=center>%5.1f</td><td align=center>%5.1f</td></tr>\n", $browser_type,$browser{$browser_type}, $browser_pct, $browser_cum);
	}
}
print("</table>\n");

print("<hr>Robots make up ", int(.5+100*$robot_tot/$cum_browsers), "% of 

hits\n");
