#!/tools/perl/bin/perl

#############################################################################
#
#  Usage:  stattyM  report-filename  [ + exclude-files - ]  input-files
#
#  'stattyM' reads data files in 'ccount' output format and gives a
#  statistical report about the structure of modules in C source files
#  (only global results are considered here).
#  See 'README.1' and 'README.2' for more details!
#
#  Author: Joerg Lawrenz, Universitaet Karlsruhe
#  Date:   93/12/1
#
#############################################################################

#
#  First get the report-filename
#

$file = shift;

#
#  Now save the names of the exclude-files and count them.
#

if ($ARGV[0] eq '+') {
   shift;
   $FirstExcludeFile = $ARGV[0];
   while ($ARGV[0] ne '-') {
      $filename = shift;
      $EXclude{$filename}++;
      $Mexclude++;
      if (scalar(@ARGV) == 0) {
         print STDERR "Usage: stattyM  report-filename  [ + exclude-files - ]  input-files\n";
         exit;
       }
    }
   shift;
   $LastExcludeFile = $filename;
}

#
# Exit if there are not enough arguments in the command line.
#

if (scalar(@ARGV) == 0) {
 print STDERR "Usage: stattyM  report-filename  [ + exclude-files - ]  input-files\n";
 exit;
}

#
#  Get the names of the first and the last file for later report information.
#

$FirstFile = $ARGV[0];
$LastFile = $ARGV[$#ARGV];

#
#  Now read all input files line by line, accumulates for the module
#  attribute field values and for calculations from them the according
#  histograms, and count the modules.
#

while (<>) {
  if (/^M/) {
    split;

#  For the first 12 attributes, accumulate the histograms only if the
#  current module results are not in a exclude-file.

    if (!$EXclude{$ARGV}) {
        $BYTES  {$_[1]}++;
        $LINES  {$_[2]}++;
        $ByteLineQuotient { sprintf("%.1f", $_[2] ? $_[1]/$_[2] : 0) } ++;
        $Comment{$_[3]}++;
        $PPcomment{$_[4]}++;
        $relevBytes = $_[6]+$_[8]+$_[10];
        $RelevBytes{$relevBytes}++;
        $BLANKS{ $_[1]-$_[3]-$_[4]-$relevBytes }++;

        $CommentPortion  { sprintf("%.1f", 100*$_[3]/$_[1]) } ++;
        $PPCommentPortion{ sprintf("%.1f", 100*$_[4]/$_[1]) } ++;
        $RelevBytePortion{ sprintf("%.1f", 100*$relevBytes/$_[1]) } ++;
        $BlankPortion    { sprintf("%.1f", 
                  100 * (1 - ($_[3]+$_[4]+$relevBytes)/$_[1]) ) } ++;
        
       } 
    $NumId    {$_[5]}++;
    $ByteId   {$_[6]}++;
    $NumSym   {$_[7]}++;
    $ByteSym  {$_[8]}++;
    $NumPP    {$_[9]}++;
    $BytePP   {$_[10]}++;
    $Semi     {$_[11]}++;
    $Functions{$_[12]}++;
    $AveInblockLen{$_[13]}++;
    $DevInblockLen{$_[14]}++;
    $MaxInblockLen{$_[15]}++;
    $AveAveDepth{$_[16]}++;
    $DevAveDepth{$_[17]}++;
    $MaxAveDepth{$_[18]}++;
    $module++;
   }
}

#
#  If no modules found, quit.
#

if (!$module) {
  print STDERR "No modules found!\n";
  exit;
}

#
#  Now write the report by inserting for each attribute histogram
#  the statistical results of the subroutine 'stat'.
#

open(FILE, "> $file");

chop($cwd = `pwd`);

print  FILE "MODULE REPORT  (from $cwd: $FirstFile ... $LastFile, excluded(~): $FirstExcludeFile ... $LastExcludeFile):\n\n";

print  FILE "               Average Std.dev.  Max.    25q    [%]     50q    [%]     75q    [%]     90q    [%]     95q    [%]     98q    [%]     99q    [%]\n";
print  FILE "               ------- -------- ------ -------------- -------------- -------------- -------------- -------------- -------------- --------------\n";
&printd('bytes~',        &stat(*BYTES));
&printd('lines~',        &stat(*LINES));
&printp('bytes/line~',   &stat(*ByteLineQuotient));

print FILE "\nBytes~:\n";
&printd('Comment',       &stat(*Comment));
&printd('PPcomment',     &stat(*PPcomment));
&printd('Relev. symb.',  &stat(*RelevBytes));
&printd('"Blanks"',      &stat(*BLANKS));

print FILE "\nPortions[%]~:\n";
&printp('Comment',       &stat(*CommentPortion));
&printp('PPcomment',     &stat(*PPCommentPortion));
&printp('Relev. symb.',  &stat(*RelevBytePortion));
&printp('"Blanks"',      &stat(*BlankPortion));

print FILE "\n";
&printd('#identifier',   &stat(*NumId));
&printd('bytes id',      &stat(*ByteId));
print FILE "\n";
&printd('#symbols',      &stat(*NumSym));
&printd('bytes sym',     &stat(*ByteSym));
print FILE "\n";
&printd('#PP-directives',&stat(*NumPP));
&printd('bytes PP-dir',  &stat(*BytePP));

print FILE "\n";
&printd('";"',           &stat(*Semi));
&printd('functions',     &stat(*Functions));

print FILE "\nInnermost block length in ';' (*):\n";
&printp('Average',       &stat(*AveInblockLen));
&printp('Std.dev.',      &stat(*DevInblockLen));
&printd('Maximum',       &stat(*MaxInblockLen));

print FILE "\nAverage block nesting depth per function:\n";
&printp('Average',       &stat(*AveAveDepth));
&printp('Std.dev',       &stat(*DevAveDepth));
&printp('Maximum',       &stat(*MaxAveDepth));

printf FILE "\n\nModules total:   %5d\n", $module;
printf FILE "~ Excluded:      %5d\n", $Mexclude;
print FILE "\n";
print FILE "(*) + blockless functions\n";
close(FILE);


#############################################################################
#
#  Here is the subroutine 'stat', which calculates statistical values from
#  a given histogram.
#
#############################################################################

sub stat {
  local(*Array) = @_;
  local($i, $val, $sum, $num, $Average, $StdDev, $Max);
  local(@Quantil_25, @Quantil_50, @Quantil_75);
  local(@Quantil_90, @Quantil_95, @Quantil_98, @Quantil_99);

#  Average:

  while (($i,$val) = each %Array) {
    $sum += $val * $i;
    $num += $val;
    $Max = $i > $Max ? $i : $Max;
   }
  $Average = $num > 0 ? $sum/$num : 0;

#  Standard Deviation:

  $sum = 0;
  while (($i,$val) = each %Array) {
    $sum += $val * ($i - $Average)**2;
   }
  $StdDev = $num > 1 ? sqrt($sum/($num-1)) : 0;

#  Quantils:

  $sum = 0;
  foreach $i (sort numerically keys %Array) {
    $sum += 100 * $Array{$i} / $num;

    if ($sum >= 25 && !defined @Quantil_25) {
      @Quantil_25 = ($i,$sum);
     }
    if ($sum >= 50 && !defined @Quantil_50) {
      @Quantil_50 = ($i,$sum);
     }
    if ($sum >= 75 && !defined @Quantil_75) {
      @Quantil_75 = ($i,$sum);
     }
    if ($sum >= 90 && !defined @Quantil_90) {
      @Quantil_90 = ($i,$sum);
     }
    if ($sum >= 95 && !defined @Quantil_95) {
      @Quantil_95 = ($i,$sum);
     }
    if ($sum >= 98 && !defined @Quantil_98) {
      @Quantil_98 = ($i,$sum);
     }
    if ($sum >= 99) {
      @Quantil_99 = ($i,$sum);
      last;
     }
   }

# Now return all results.

  ($Average, $StdDev, $Max, @Quantil_25, @Quantil_50, @Quantil_75,
   @Quantil_90, @Quantil_95, @Quantil_98, @Quantil_99);
}


#############################################################################
#
#  'printd' is the right output subroutine for the combination of decimal
#  and floating point values.
#
#############################################################################

sub printd {
   local($object,@list) = @_;

   printf FILE "%-15s%7.1f  %7.1f %6d %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f) %6d (%5.1f)\n",
                $object, @list;
}


#############################################################################
#
#  'printp' is the right output subroutine for floating point values only.
#
#############################################################################

sub printp {
   local($object,@list) = @_;

   printf FILE "%-15s%7.1f  %7.1f %6.1f %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f) %6.1f (%5.1f)\n", $object, @list;
}


#############################################################################
#
#  This subroutine compares two values. It is used by a sorting routine
#  in 'stat'.
#
#############################################################################

sub numerically { $a <=> $b; }
