#!/tools/perl/bin/perl

#############################################################################
#
#  Usage:  stattyCSE  report-filename  input-files
#
#  'stattyCSE' reads data files in 'ccount' output format and gives a
#  statistical report about control structures and expression of C 
#  source files.
#  See 'README.1' and 'README.2' for more details!
#
#  Author: Joerg Lawrenz, Universitaet Karlsruhe
#  Date:   93/12/1
#
#############################################################################

#
#  First get the report-filename
#

$file = shift;

#
#  Exit if there are not enough arguments in the command line.
#

if (scalar(@ARGV) == 0) {
 print STDERR "Usage: stattyCSE  report-filename  input-files\n";
 exit;
}

#
#  Get the names of the first and the last file for later report information.
#

$FirstFile = $ARGV[0];
$LastFile = $ARGV[$#ARGV];

#
#  Now read all input files line by line, count the objects (expression, 
#  'for', 'if', 'switch', 'while'), accumulates for their attribute field 
#  values the according histograms and count the modules.
#

while (<>) {
  if (/^e/) {
    split;
    $ExprOps  {$_[1]}++;
    $ExprDepth{$_[2]}++;
    $expr++;
    next;
   }
  if (/^f/) {
    split;
    $ForLines    {$_[1]}++;
    $ForSemis    {$_[2]}++;
    $ForDepth    {$_[3]}++;
    $ForOps      {$_[4]}++;
    $ForBreakCont{$_[5]}++;
    $for++;
    next;
   }
  if (/^i/) {
    split;
    $IfLines      {$_[1]}++;
    $IfOps        {$_[2]}++;
    $IfSemis{$_[4]+$_[6]}++;
    $IfPartLines  {$_[3]}++;
    $IfPartSemis  {$_[4]}++;
    $ElsePartLines{$_[5]}++;
    $ElsePartSemis{$_[6]}++;
    $IfDepth      {$_[7]}++;
    $if++;
    next;
   }
  if (/^s/) {
    split;
    $SwitchLines{$_[1]}++;
    $SwitchSemis{$_[2]}++;
    $SwitchDepth{$_[3]}++;
    $SwitchOps  {$_[4]}++;
    $SwitchCase {$_[5]}++;
    $SwitchBreak{$_[6]}++;
    $switch++;
    next;
   }
  if (/^w/) {
    split;
    $WhileLines    {$_[1]}++;
    $WhileSemis    {$_[2]}++;
    $WhileDepth    {$_[3]}++;
    $WhileOps      {$_[4]}++;
    $WhileBreakCont{$_[5]}++;
    $while++;
    next;
   }
  $module++ if /^M/;
}

#
#  If no modules found, quit.
#

if (!$module) {
  print STDERR "No modules found!\n";
  exit;
}

#
#  Now write the report by inserting for each attribute histogram 
#  the statistical results of the subroutine 'stat'.
#

open(FILE, "> $file");

chop($cwd = `pwd`);

print  FILE "CONTROL STRUCTURES & EXPRESSION REPORT  (from $cwd: $FirstFile ... $LastFile):\n\n";

print  FILE "               Average Std.dev.  Max.    25q    [%]     50q    [%]     75q    [%]     90q    [%]     95q    [%]     98q    [%]     99q    [%]\n";
print  FILE "               ------- -------- ------ -------------- -------------- -------------- -------------- -------------- -------------- --------------";
print FILE "\n----- IF -----\n";
&printd('lines',          &stat(*IfLines));
&printd('";"',            &stat(*IfSemis));
&printd('lines if-part',  &stat(*IfPartLines));
&printd('";" if-part',    &stat(*IfPartSemis));
&printd('lines else-part',&stat(*ElsePartLines));
&printd('";" else-part',  &stat(*ElsePartSemis));
&printd('depth',          &stat(*IfDepth));
&printd('operators/1',    &stat(*IfOps));

print FILE "\n--- WHILE ----\n";
&printd('lines',          &stat(*WhileLines));
&printd('";"',            &stat(*WhileSemis));
&printd('"break","cont."',&stat(*WhileBreakCont));
&printd('depth',          &stat(*WhileDepth));
&printd('operators/1',    &stat(*WhileOps));

print FILE "\n---- FOR -----\n";
&printd('lines',          &stat(*ForLines));
&printd('";"',            &stat(*ForSemis));
&printd('"break","cont."',&stat(*ForBreakCont));
&printd('depth',          &stat(*ForDepth));
&printd('operators/2',    &stat(*ForOps));

print FILE "\n--- SWITCH ---\n";
&printd('lines',     &stat(*SwitchLines));
&printd('";"',       &stat(*SwitchSemis));
&printd('"case"',    &stat(*SwitchCase));
&printd('"break"',   &stat(*SwitchBreak));
&printd('depth',     &stat(*SwitchDepth));
&printd('operators/3',&stat(*SwitchOps));

print FILE "\n- EXPRESSION/4 -\n";
&printd('depth',     &stat(*ExprDepth));
&printd('operators', &stat(*ExprOps));

printf FILE "\n\n# Modules:     %6d\n", $module;
printf FILE "# If:          %6d\n", $if;
printf FILE "# While:       %6d\n", $while;
printf FILE "# For:         %6d\n", $for;
printf FILE "# Switch:      %6d\n", $switch;
printf FILE "# Expressions: %6d\n", $expr;

print FILE "\n/1  operators in head condition";
print FILE "\n/2  operators in second expression of head";
print FILE "\n/3  operators in head expression\n";
print FILE "\n/4  statement followed by ';'";

close(FILE);


#############################################################################
#
#  Here is the subroutine 'stat', which calculates statistical values from 
#  a given histogram.
#
#############################################################################

sub stat {
  local(*Array) = @_;
  local($i, $val, $sum, $num, $Average, $StdDev, $Max);
  local(@Quantil_25, @Quantil_50, @Quantil_75);
  local(@Quantil_90, @Quantil_95, @Quantil_98, @Quantil_99);

#  Average:

  while (($i,$val) = each %Array) {
    $sum += $val * $i;
    $num += $val;
    $Max = $i > $Max ? $i : $Max;
   }
  $Average = $num > 0 ? $sum/$num : 0;

#  Standard deviation:

  $sum = 0;
  while (($i,$val) = each %Array) {
    $sum += $val * ($i - $Average)**2;
   }
  $StdDev = $num > 1 ? sqrt($sum/($num-1)) : 0;

#  Quantils:

  $sum = 0;
  foreach $i (sort numerically keys %Array) {
    $sum += 100 * $Array{$i} / $num;

    if ($sum >= 25 && !defined @Quantil_25) {
      @Quantil_25 = ($i,$sum);
     }
    if ($sum >= 50 && !defined @Quantil_50) {
      @Quantil_50 = ($i,$sum);
     }
    if ($sum >= 75 && !defined @Quantil_75) {
      @Quantil_75 = ($i,$sum);
     }
    if ($sum >= 90 && !defined @Quantil_90) {
      @Quantil_90 = ($i,$sum);
     }
    if ($sum >= 95 && !defined @Quantil_95) {
      @Quantil_95 = ($i,$sum);
     }
    if ($sum >= 98 && !defined @Quantil_98) {
      @Quantil_98 = ($i,$sum);
     }
    if ($sum >= 99) {
      @Quantil_99 = ($i,$sum);
      last;
     }
   }

#  Now return all results.

  ($Average, $StdDev, $Max, @Quantil_25, @Quantil_50, @Quantil_75,
   @Quantil_90, @Quantil_95, @Quantil_98, @Quantil_99);
}


#############################################################################
#
#  'printd' is the right output subroutine for the combination of decimal 
#  and floating point values.
#
#############################################################################

sub printd {
   local($object,@list) = @_;

   printf FILE "%-15s%7.1f  %7.1f %6d %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f)\n",
                $object, @list;
}


#############################################################################
#
#  'printp' is the right output subroutine for floating point values only.
#
#############################################################################

sub printp {
   local($object,@list) = @_;

   printf FILE "%-15s%7.1f  %7.1f %6.1f %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f)\n", $object, @list;
}


#############################################################################
#
#  This subroutine compares two values. It is used by a sorting routine 
#  in 'stat'.
#
#############################################################################

sub numerically { $a <=> $b; }
