#!/usr/athena/bin/perl
# $Header: /mit/ops/services/olc.new/scripts/stats-digest/RCS/olc-report.pl,v 1.2 1998/03/27 22:15:31 bert Exp $

# Merged 3/98 from report.pl (which sends stuff to the olc_usage meeting)
# and cprivate-report.pl (cprivate meeting).  The two original scripts were
#    Written by Derrick Kong, starflt@mit.edu, 11/93
#    Written by Derrick Kong, starflt@mit.edu, 12/93
# Additional changes by bert Dvornik.

# Usage: 
# One of the following:
#	-w = process last 7 days of data (week)
#	-m = process last 30 days of data (month)
#	-q = process last 90 days of data (quarter)
#	-y = process last 365 days of data (year)
# OR both of 
#	-s startdate = Earliest day of data to process
#	-e enddate   = Last day of data to process
# 	Dates must be in form YYMMDD
# Optional arg:
#	-c           = Use the "cprivate" format
#	-a address   = Mail address to send output to
#       -d           = Debugging (don't send mail, print to stdout)

require 'ctime.pl';
require 'getopts.pl';

&Getopts('wmqya:s:e:cd');

# Directory where the summaries live
$wdir = '/var/athena/olc/stats/summaries';

# Place for the sendmail binary
$sendmail = '/usr/lib/sendmail';

# Hostname of this machine
$host = 'matisse.mit.edu';

# KLUDGE: 'EST' looks much better as the timezone than 'US/Eastern'...
$ENV{'TZ'} = 'EST5EDT' if $ENV{'TZ'} =~ m@^us/eastern$@i;

# Defintions for later use.  
# (Note that the next line initializes the hash, %hour_name.)

@hour_name{'00' .. '23'} =
 ('Mid - 1am',  '1am - 2am',  '2am - 3am',   '3am - 4am',
  '4am - 5am',  '5am - 6am',  '6am - 7am',   '7am - 8am',
  '8am - 9am',  '9am - 10am', '10am - 11am', '11am - Noon',
  'Noon - 1pm', '1pm - 2pm',  '2pm - 3pm',   '3pm - 4pm',
  '4pm - 5pm',  '5pm - 6pm',  '6pm - 7pm',   '7pm - 8pm',
  '8pm - 9pm',  '9pm - 10pm', '10pm - 11pm', '11pm - Mid');

@hourlist = ('00' .. '23');

# Check arguments
if ( (!$opt_s) || (!$opt_e) ) {
    ($opt_w || $opt_m || $opt_q || $opt_y) || &usage_message;
    if ($opt_w) { $factor = 6; }
    elsif ($opt_m) { $factor = 29; }
    elsif ($opt_q) { $factor = 89; }
    else { $factor = 364; }
}
elsif (($opt_s !~ /^\d\d\d\d\d\d$/) || ($opt_e !~ /^\d\d\d\d\d\d$/)) { 
    &usage_message;
}

# Get all summary files (ie, all files in $wdir whose names don't start with .)
opendir(STATSDIR, $wdir);
@filelist = grep(!/^\./, readdir(STATSDIR));
closedir(STATSDIR);

if ($factor != 0) {
# Sort files and get last N files according to option
    @holder = sort @filelist;
    if ($factor > $#holder) { 
	$factor = $#holder;}
    foreach $index ($#holder-$factor .. $#holder) {
	foreach $file ($holder[$index]) {
	    push (@process, $file);
	}
    }
}
else {

# Extract starting and ending day, month, year
    ($s_yr, $s_mo, $s_da) = (substr($opt_s,-6,2),substr($opt_s,-4,2),substr($opt_s,-2,2));
    ($e_yr, $e_mo, $e_da) = (substr($opt_e,-6,2),substr($opt_e,-4,2),substr($opt_e,-2,2));

    foreach $file (@filelist) {
	# chop $file;
	($f_yr, $f_mo, $f_da) = (substr($file,-6,2), substr($file,-4,2), substr($file,-2,2) );

# Check if date of file falls in range between start and end date

	if (&is_before_or_on($s_yr, $s_mo, $s_da,  $f_yr, $f_mo, $f_da)
	    && &is_before_or_on($f_yr, $f_mo, $f_da,  $e_yr, $e_mo, $e_da)) {
	  push (@process,$file);
	}
    }
}

# Sanity to check to make sure we have some files to process
die "No files to process!\n" unless @process;

# Discuss meeting to send to
if (! $opt_d) {
  open(OUT,"| $sendmail -froot@$host -t");
  select(OUT);
}
if (defined $opt_a) {
    print "To: $opt_a\n";
} elsif ($opt_c) {
    print "To: cprivate-mtg@menelaus.local\n";
} else {
    print "To: olc_usage@menelaus.local\n";
}
print "From: OLC periodic digest <root@$host>\n";
print 'Subject: OLC Usage statistics, generated on '.&ctime($^T);

# Process loop
foreach $file (@process) {

# Reset parameters/counters
    undef $mode;

# Parse through the file
    open(IN, "$wdir/$file") || print "Error: cannot open $file: $!\n";
    while(<IN>) {

# Replace multiple spaces with a single space globally
	s/\s+/ /g;
# Skip empty lines, and reset the mode.
	/^\s*$/ && do { 
	    undef $mode;
	    next;
	};

# Other lines to get rid of (header and info lines)
	( (/^-+/) || (/^Time period .+/) || (/^# cons .+/) || (/^ Name .+/) ) && next;

# The following lines will only occur outside any mode
	if (! $mode) {

# Ignore mismatches
	    /^Mismatch/ && next;

# Get start date and individual date
	    /^From: (.+)/ && do {
		$thisdate[$i] = substr($1,0,11) . substr($1,-5,4);
		if (! $startdate) { $startdate = $thisdate[$i]; }
		$i++;
		next;};
# Get enddate continuously (it will be last enddate at end)
	    /^To : (.+)/ && do {
		$enddate = substr($1,0,11) . substr($1,-5,4);
		next;};

# Get number totals and save individual numbers for next section
	    /^Number of questions asked: (\d+)/ && do {
		$n_asked = $1;
		$date_count{$thisdate[$i-1]} = $n_asked;
		$t_asked += $n_asked;
		next;};
	    /^Number asked during on-duty hours: (\d+)/ && do {
		$n_asked_od = $1;
		$t_asked_od += $n_asked_od;
		next;};
	    /^Number of questions doned: (\d+)/ && do {
		$n_done = $1;
		$t_done += $n_done;
		next;};
	    /^Number doned \(asked during on-duty hours\): (\d+)/ && do {
		$n_done_od = $1;
		$t_done_od += $n_done_od;
		next;};
	    /^Number of questions grabbed: (\d+)/ && do {
		$n_grab = $1;
		$t_grab += $n_grab;
		next;};
	    /^Number grabbed \(asked during on-duty hours\): (\d+)/ && do {
		$n_grab_od = $1;
		$t_grab_od += $n_grab_od;
		next;};

# Get average and medians and weight them appropriately
	    /^Avg\. # of questions asked\/hour: (\d+\.\d+)/ && do {
		$avg_ask += $1 * $n_asked;
		next;};
	    /^Med\. # questions asked\/hour: (\d+\.\d+)/ && do {
		$med_ask += $1 * $n_asked;
		next;};
	    /^Avg\. # of questions cancel\/hour: (\d+\.\d+)/ && do {
		$avg_cancel += $1 * $n_done;
		next;};
	    /^Med\. # questions cancel\/hour: (\d+\.\d+)/ && do {
		$med_cancel += $1 * $n_done;
		next;};
	    /^Avg\. # of questions resolved\/hour: (\d+\.\d+)/ && do {
		$avg_res += $1 * $n_done;
		next;};
	    /^Med\. # questions resolved\/hour: (\d+\.\d+)/ && do {
		$med_res += $1 * $n_done;
		next;};
	    /^Avg\. minutes to first response: (\d+\.\d+)/ && do {
	        $avg_fr += $1 * $n_done;
	        next;};
	    /^Med\. minutes to first response: (\d+\.\d+)/ && do {
	        $med_fr += $1 * $n_done;
	        next;};
	    /^Avg\. minutes to resolve: (\d+\.\d+)/ && do {
	        $avg_ttr += $1 * $n_done;
	        next;};
	    /^Med\. minutes to resolve: (\d+\.\d+)/ && do {
	        $med_ttr += $1 * $n_done;
	        next;};
	    /^Avg\. \# of consultant mail msgs: (\d+\.\d+)/ && do {
		$avg_mail += $1 * $n_done;
		next;};
	    /^Avg\. \# of consultant OLC msgs: (\d+\.\d+)/ && do {
		$avg_colc += $1 * $n_done;
		next;};
	    /^Avg\. \# of user OLC msgs: (\d+\.\d+)/ && do {
		$avg_uolc += $1 * $n_done;
		next;};
	    /^Avg\. \# consultants connected: (\d+\.\d+)/ && do {
		$avg_con += $1 * $n_grab;
		next;};
	}

# Set modes
	/^Stats adjusted for on-duty hours .+/ && ($mode = "adj-stats");
	/^Number of questions asked by topic/ && ($mode = "topic");
	/^Number of questions asked and resolved .+/ && ($mode = "hour");
	/^Number of consultants .+/ && ($mode = "connected");
	/^Stats by consultant/ && ($mode = "consultant");
	/^Top ten resolvers/ && ($mode = "ignore");
	/^Top ten grabbers/ && ($mode = "ignore");
	/^Top twenty askers/ && ($mode = "askers");

# Count stats adjusted for on-duty hours
	if ($mode eq "adj-stats") {
	    /^Avg\. minutes to first response: (\d+\.\d+)/ && do {
	        $avg_fr_od += $1 * $n_grab_od;
	        next;};
	    /^Med\. minutes to first response: (\d+\.\d+)/ && do {
	        $med_fr_od += $1 * $n_grab_od;
	        next;};
	    /^Avg\. minutes to resolve: (\d+\.\d+)/ && do {
	        $avg_ttr_od += $1 * $n_done_od;
	        next;};
	    /^Med\. minutes to resolve: (\d+\.\d+)/ && do {
	        $med_ttr_od += $1 * $n_done_od;
	        next;};
	}
# Count number asked by topic
	elsif ($mode eq "topic") {
	    /^ (\w+) (\d+) .+/ && do {
		$topic_count{$1} += $2;
		next;};
	}
# Count number asked and resolved by hour
	elsif ($mode eq "hour") {
	    /^ (.+) : (\d+) \(.+\) (\d+) \(.+\)/ && do {
		$ask_hour{$1} += $2;
		$res_hour{$1} += $3;
		next;};
	}
# Count consultants connected/question
	elsif ($mode eq "connected") {
	    /^ (\d+) : (\d+) \(.+\)/ && do {
		$num_cons{$1} += $2;
		next;};
	}
# Count number grabbed and resolved per consultant
	elsif ($mode eq "consultant") {
	    /^ (\w+) : (\d+) (\d+)/ && do {
		$grab{$1} += $2;
		$res{$1} += $3;
		next;};
	}
# Ignore top ten resolvers and grabbers list; redo from totals data
	elsif ($mode eq "ignore") {
	    /^.+/ && next;}
# Count top 20 askers
	elsif ($mode eq "askers") {
	    /^ (\w+) : (\d+) \(.+\)/ && do {
		$asker{$1} =+ $2;
		next;};
	}
    }
}

# Calculate medians

# Output section
print "\nFrom: $startdate\n";
print   "To  : $enddate\n\n";

print "Number of questions asked:   $t_asked\n";
printf "Number asked during on-duty hours: %4d  (%5.2f %%)\n", $t_asked_od, $t_asked_od * 100.0 / $t_asked;
print "Number of questions doned:   $t_done\n";
print "Number doned (asked during on-duty hours): $t_done_od\n";
if ($t_grab != 0) {
    printf "Number of questions grabbed: %d (%5.2f %%)\n",$t_grab, $t_grab * 100.0 / $t_done;
print "Number grabbed (asked during on-duty hours): $t_grab_od\n\n";

printf "Avg. \# of questions asked/hour:    %.2f\n", $avg_ask/$t_asked;
printf "WA Med. \# questions asked/hour:    %.2f\n\n", $med_ask/$t_asked;

printf "Avg. \# of questions cancel/hour:   %.2f\n", $avg_cancel/$t_done;
printf "WA Med. \# questions cancel/hour:   %.2f\n\n", $med_cancel/$t_done;

printf "Avg. \# of questions resolved/hour: %.2f\n", $avg_res/$t_done;
printf "WA Med. \# questions resolved/hour: %.2f\n\n", $med_res/$t_done;

printf "Avg. minutes to first response:    %.2f\n", $avg_fr/$t_done;
printf "WA Med. minutes to first response: %.2f\n\n", $med_fr/$t_done;

printf "Avg. minutes to resolve:           %.2f\n", $avg_ttr/$t_done;
printf "WA Med. minutes to resolve:        %.2f\n\n", $med_ttr/$t_done;

print  "Stats adjusted for on-duty hours (9am - midnight):\n";
print  "--------------------------------------------------\n";
printf "Avg. minutes to first response:    %.2f\n", $avg_fr_od / $t_grab_od;
printf "WA Med. minutes to first response: %.2f\n", $med_fr_od / $t_grab_od;
printf "Avg. minutes to resolve:           %.2f\n", $avg_ttr_od / $t_done_od;
printf "WA Med. minutes to resolve:        %.2f\n", $med_ttr_od / $t_done_od;
print  "--------------------------------------------------\n\n";

printf "Avg. \# of consultant mail msgs:  %.2f\n", $avg_mail/$t_done;
printf "Avg. \# of consultant OLC msgs:   %.2f\n", $avg_colc/$t_done;
printf "Avg. \# of user OLC msgs:         %.2f\n\n", $avg_uolc/$t_done;

printf "Avg. \# consultants connected:    %.2f\n\n", $avg_con/$t_grab;
} else {
    printf "No Questions grabbed\n";
}

print "Note: WA Med. = Weighted Average Daily Median\n\n";

print "Number of questions asked by topic:\n";
foreach $topic (sort by_topic_val (keys %topic_count)) {
    printf " %-13.13s %4d (%5.2f %%)\n", $topic, scalar($topic_count{$topic}), $topic_count{$topic} * 100.0 /$t_asked;
}

print "\nNumber of questions asked by date:\n";
foreach $date (@thisdate) {
    printf " $date : %4d (%5.2f %%)\n", $date_count{$date}, $date_count{$date} * 100.0 /$t_asked;
}

print "\nNumber of questions asked and resolved by hour:\n";
print "Time period    # ask  (%ask)   # res  (%res)\n";
foreach $hour (@hourlist) {
    printf " %-11.11s : %3d  (%5.2f %%) %4d  (%5.2f %%)\n", $hour_name{$hour}, $ask_hour{$hour_name{$hour}}, $ask_hour{$hour_name{$hour}} * 100.0 / $t_asked, $res_hour{$hour_name{$hour}}, $res_hour{$hour_name{$hour}} * 100.0 / $t_done;
}

print "\nNumber of consultants connected per question:\n";
print "# cons         Questions\n";
foreach $n_cons (sort numerically (keys %num_cons)) {
    printf " %-8.8s : %4d (%5.2f %%)\n",
        $n_cons, $num_cons{$n_cons}, $num_cons{$n_cons} * 100.0 / $t_asked;
}

if (! $opt_c) {
  print "\nStats by consultant:\n";
  print " Name      # grab  # resolve\n";
  foreach $name (sort (keys %grab)) {
    printf " %-8.8s : %4d    %4d\n",
        $name, $grab{$name}, $res{$name};
  }

  print "\nTop ten resolvers:\n";
  @sorted_ids = sort by_res_val (keys %res);
  $max_id = ($#sorted_ids >= 10) ? 10 : $#sorted_ids;
  foreach $name (@sorted_ids[0..$max_id]) {
    printf " %-8.8s : %4d (%5.2f %%)\n",
        $name, $res{$name}, $res{$name} * 100.0 /$t_grab;
  }

  print "\nTop ten grabbers:\n";
  @sorted_ids = sort by_grab_val (keys %grab);
  $max_id = ($#sorted_ids >= 10) ? 10 : $#sorted_ids;
  foreach $name (@sorted_ids[0..$max_id]) {
    printf " %-8.8s : %4d\n", $name, $grab{$name};
  }

  print "\nTop twenty askers:\n";
  @sorted_ids = sort by_asker_val (keys %asker);
  $max_id = ($#sorted_ids >= 20) ? 20 : $#sorted_ids;
  foreach $user (@sorted_ids[0..$max_id]) {
    printf " %-8.8s : %4d (%5.2f %%)\n",
        $user, $asker{$user}, $asker{$user} * 100.0 /$t_asked;
  }
}

close OUT;

#----------------------------------------------------------------------------
sub by_topic_val { $topic_count{$b} <=> $topic_count{$a}; }
sub by_res_val { $res{$b} <=> $res{$a}; }
sub by_grab_val { $grab{$b} <=> $grab{$a}; }
sub by_asker_val { $asker{$b} <=> $asker{$a}; }
sub numerically { $a <=> $b; }
#----------------------------------------------------------------------------
# Usage message
sub usage_message {
    die "Usage: $0 {-w/-m/-q/-y / -s startdate -e enddate} [-c] [-a mail address]\nDates must be in form YYMMDD\n";
}

# return true iff the date ($yr1,$mo1,$da1) is before or on ($yr2,$mo2,$da2).
sub is_before_or_on {
  local($yr1,$mo1,$da1, $yr2,$mo2,$da2) = @_;

  (($yr1 < $yr2)
   || (($yr1 == $yr2) && (($mo1 < $mo2)
			  || (($mo1 == $mo2) && ($da1 <= $da2)))));
}
