#!/usr/bin/perl -w
#
# htt to html converter.
#
# Creates multipage documents from a template and data file.
#
# Copyright (C) 1999-2000 S Morphet <smorphet@iee.org>
#  
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software 
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#

# Modules
use FileHandle;

# Function to print usage information.
sub DieUsage {
  print "\n";
  print "$id_name $id_version ($id_tag)\n" .
    "Usage: $id_name [options] inputfile\n\n";
  print "Options: -k \t\t Keep intermediate files\n";
  print "\t -v \t\t Be verbose.\n";
  print "\t -nocont \t Don't produce a contents page.\n";
  print "\t -M \t\t Generate dependency information for make(1).\n";
  print "\t -MG \t\t Treat missing files as generated files.  Use with -M.\n";
  print "\t -Mp \t\t Output all files as targets in the dependency output.\n".
    "\t\t\t Use with -M.\n";
  print "\t -d \t\t Replace output file only if it is different.\n";
  print "\t -p \t\t Do not use line control.\n";
  print "\t -hm tag\t The header marker tag (default <HDR%PAGE%>).\n";
  print "\t -hpm tag\t The page name insert marker for the " .
    "header marker tag.\n";
  print "\t -dm tag\t The data marker tag (default <DATA>).\n";
  print "\t -cm tag\t The contents marker tag (default <CONTENTS>).\n";
  print "\t -tm tag\t The template data marker tag (default <PAGE>).\n";
  print "\t -def string\t The define string.\n";
  print "\t -undef string\t The undefine string.\n";
  print "\t -defnmark mk\t The name marker in the (un)define string.\n";
  print "\t -defdmark mk\t The definition marker in the define string.\n";
  print "\t -cpre string\t The prefix (default \"_C_\") for " .
    "contents macros.\n";
  print "\t -o name\t The name of the output file (default is \"index\").\n";
  print "\t -op name\t The name to use for generated pages ".
    "(default \"out\").\n";
  print "\t -x name\t The name to use for the cross reference file.\n" .
    "\t\t\t (default \"index.xref\").\n";
  print "\t -pname name\t Name for next page (deprecated).\n";
  print "\t -intext ext\t The extension for intermediate files " .
    "(default .hss).\n";
  print "\t -outext ext\t The extension for output files (default .html).\n";
  print "\t -hssprog name\t The hss to html converter (default hss2html).\n";
  print "\t -hssopts opts\t Options to pass to the hss2html program.\n";
  print "\t -Idir \t\t Add dir to the list to be searched by #include.\n";
  print "\t -Dmacro[=defn]\t Define a macro, to 1, or defn, just like gcc.\n";
  print "\t -version \t Print version info and exit.\n";
  print "\n";
  exit;
}


# Function to delete working files when program exits unexpectedly.
sub CleanUpFiles {
  my $delfile;

  # If we're keeping files, don't delete anything.  
  if( $keepfiles == 0 ) {
    if( $madexref ) {
      unlink $xreffile;
    }
  
    # We have a list of hss files made so far.
    foreach $delfile (@hsslist) {
      unlink $delfile;
    }
  }
}


# Function to create prev and next file names. 
# Arguments:
#    pagename base
#    current page number.
#    is last page flag
# Always returns a prev file name, but not always a next file name.
# Caller will have to decide whether prev file name is valid.
sub PrevNextNames {
  my ($pagename, $pagenum, $islast) = @_;
  my @retlist = ();

  my $prevnum = $pagenum - 1;
  my $nextnum = $pagenum + 1;

  # Previous file name.
  # This function shouldn't normally get called with pagenum == 0, but
  # if it is, return the 'no prev page' error code.
  if( $pagenum == 0 ) {
    $retlist[0] = 0;
  }else{
    $retlist[0] = $pagenames[$pagenum - 1];
  }

  # Next file name.
  # We need to check whether this is the last page, and
  # return the 'no next page' error code if it is.
  if( $islast ) {
    $retlist[1] = 0;
  }else{
    $retlist[1] = $pagenames[$pagenum+1];
  }

  return @retlist;
}


# Function to make a definition line by replacing markers.
# Similar for undefinitions.
# Arguments:
#    name
#    definition (for Def only)
#    spaces (puts \s* in place of spaces in string - used for searching.)
# Returns: defline
sub MakeDef {
  my ($name, $def, $spaces ) = @_;
  my $defline = $definition;
  $defline =~ s/$defnamemark/$name/;
  $defline =~ s/$defdefmark/$def/;
  if( $spaces ) {
    $defline = s/ /"\\s+"/;
  }
  return $defline;
}
sub MakeUndef {
  my ( $name ) = @_;
  my $defline = $undefinition;
  $defline =~ s/$defnamemark/$name/;
  return $defline;
}


# Function to look for a _C_ definition in a line.
# Arguments:
#    line to search.
# Returns:
#    List.  Element[0] is 0 for fail, 1 for pass.
#           Element[1] is the name, with _C_ prefix.
#           Element[2] is the value.
sub LineIsContentsDef {
  my ($line) = @_;

  my $def = MakeDef( "$c_prefix(.+?)", "(.*)", 0 );
  my $search = "^\\s*$def";

  if( $line =~ /$search/ )
    {
      # Match found.
      return ( 1, "$c_prefix$1", $2);
    }

  # No match found.
  return 0;
}


# Function to process a page.
# Arguments: 
#    pagenumber
#    ref-Template pre-data part.
#    ref-Template post-data part.
#    ref-Page lines.
#    is-last-page flag
sub ProcessPage {
  
  my ( $pagenum, $r_template_pre_part, $r_template_post_part,
       $r_pagelines, $islast ) = @_;

  # Split the page data into header and data parts.
  my $got_data_marker = 0;
  my @header_lines;
  my @data_lines;
  my $pageline;

  foreach $pageline (@$r_pagelines) {
    if( $pageline =~ /^\s*$datamarker\s*$/ ) {
      $got_data_marker = 1;
      next; 
    }
    if( not $got_data_marker ) {
      push( @header_lines, $pageline );
    }else{
      push( @data_lines, $pageline );
    }
  }

  # Check that the data marker was found.
  if( not $got_data_marker ) {
    CleanUpFiles();
    die "$id_name: Data marker not found on page $pagenum\n";
  }

  # We'll split the contents page data part at the <CONTENTS> tag.
  # The before part is local because we use it now, the after part
  # is needed when all the pages have been processed, so is a global.
  my $data_to_output;
  my $defline;

  # The contents page is the first page in the data section of the file,
  # _if_ contents are being processed.

  # Correct page number for contents.
  $pagenum -= 1 if( $do_contents );

  # Page zero is the contents page.
  if( $pagenum == 0 )
    {
      # Split the contents page at the contmarker.
      my $got_cont_marker = 0;
      my $dataline;
      foreach $dataline (@data_lines) {
	if( $dataline =~ /^\s*$contmarker\s*$/ ) {
	  $got_cont_marker = 1;
	  next;
	}
	if( not $got_cont_marker ) {
	  push( @data_pre_cont, $dataline );
	}else{
	  push( @data_post_cont, $dataline );
	}
      }

      if( not $got_cont_marker ) {
	CleanUpFiles();
	die "$id_name: Contents marker not found on the contents page.\n";
      }

      # Contents page processing.  Contents file has already been
      # opened and should be left open.
      print "Processing contents page.\n" if $verbose;

      print CONTENTS @header_lines;  # Header for contents page.
      
      # Write line control.  See the comment in the ordinary page
      # section below.
      if( $linecontrol ) {
	print CONTENTS "#line ",
	  (($#header_lines + 1) + 2), " \"$contfilename$hssext\"\n";
      }

      $defline = MakeDef( "PAGE","0" );
      print CONTENTS "$defline\n";

      $defline = MakeDef( "_THIS_", "$contfilename$htmlext" );
      print CONTENTS "$defline\n";

      $defline = MakeDef( "_CONT_", "$contfilename$htmlext" );      
      print CONTENTS "$defline\n";

      # There's never a previous page for the contents.
      # The first page might have been renamed by -pname though.
      if( $pagenames[1] ne "" ) {
	$defline = MakeDef( "_NEXT_", "$pagenames[1]$htmlext" );
      }else{
	$defline = MakeDef( "_NEXT_", "$outfile" . "01$htmlext" );
      }
      printf CONTENTS "$defline\n";

      print CONTENTS @$r_template_pre_part;  # Template part 1
      print CONTENTS @data_pre_cont;         # First part of page.

      # The per-page contents entries (undefs, defs, and _CONTENTS_)
      # will be written after this line, so we need to add line control
      # to indicate that we are back in the automatically generated
      # part of the file.
      if( $linecontrol ) {
	# Number of header lines.
	# + four sysdefs (constant for contents page) + line control.
	# + number of template_pre_part lines
	# + number of data_pre_cont lines.
	# + 2.
	$lc_linenum = ( ($#header_lines + 1) +
			(4 + 1) +
			($#$r_template_pre_part + 1) +
			($#data_pre_cont + 1) +
			2 );
	print CONTENTS "#line ", $lc_linenum, " \"$contfilename$hssext\"\n";
      }

      # Output some xref entries for the contents page.
      print XREF "<PAGE>\n";

      # Split header into lines and process each one, 
      # looking for _C_ definitions that can go into the XREF file.
      #my @headerlines = split( "\n", $header );
      foreach $hline (@header_lines)
	{
	  # Search for a line containing a #define of a macro.
	  # whose name begins with _C_.
	  @hwords = LineIsContentsDef( $hline );
	  if ( $hwords[0] ) {
	    print XREF "$hwords[1] $hwords[2]\n";
	  }
	}

      print XREF "$c_prefix"."LINK $contfilename$htmlext\n";
      print XREF "$c_prefix"."NUM 0\n";

    }
  # Process a page other than the contents page.
  else
    { 
      # If an alternate page name has been supplied, use that instead
      # of the obvious one.
      my $filebase;
      my $filename;

      print "Processing $outfile, page $pagenum.\n" if $verbose; 

      if( $pagenames[$pagenum] ne "" ){
	# Use the replacement name.
	$filebase = $pagenames[$pagenum];
      }else{
	# Ordinary page processing.  Make a file name.
	$filebase = sprintf "$outfile%.2d", $pagenum;
      }
            
      # Create the file name with extension
      $filename = "$filebase$hssext";

      if( not open (OUTFILE, ">$filename")) { 
	CleanUpFiles();
	die "$id_name: Could not open $filename for output.\n";
      }

      # Add the name of the page file to the list of files to convert to html.
      push( @hsslist, "$filename" );

      # Get names for prev and next files.
      my ($prevname, $nextname) = PrevNextNames($outfile, $pagenum, $islast);
      
      # Write header data.
      print OUTFILE @header_lines;

      # Write line control for the beginning of the system defs.  There
      # isn't a corresponding line in the source files, but it's important
      # to write something, so that we don't report these lines as being
      # part of the source file containing the header data.  We just
      # use the name of the output file, and the current line number, which
      # is the number of lines in the header array, plus two (because we have
      # to count this line control line as one). 
      if( $linecontrol ) {
	print OUTFILE "#line " . 
	  ($#header_lines + 3) . " \"$filename\"\n";
      }

      # Write system definitions.
      $defline = MakeDef( "PAGE", "$pagenum" );
      print OUTFILE "$defline\n";
      $defline = MakeDef( "_THIS_", "$filebase$htmlext" );
      print OUTFILE "$defline\n";
      if( $do_contents ) {
	$defline = MakeDef( "_CONT_", "$contfilename$htmlext" );
	print OUTFILE "$defline\n";
      }
      # Previous file name will not be valid for page 0 if there is
      # no contents page.
      if( ($pagenum > 1) or $do_contents ) {
	$defline = MakeDef( "_PREV_", "$prevname$htmlext" );
	print OUTFILE "$defline\n";
      }
      # Next file name will be valid for all but the last page.
      if( $nextname ) {
	$defline = MakeDef( "_NEXT_", "$nextname$htmlext" );
	print OUTFILE "$defline\n";
      }

      # Write template, body, template parts.
      print OUTFILE @$r_template_pre_part;
      print OUTFILE @data_lines;
      print OUTFILE @$r_template_post_part;
      
      close(OUTFILE);

      # Process page to extract contents file information.
      # We have to do this even if contents are turned off, because
      # this function also generates the x-ref info.
      ProcessContents( $filebase, $pagenum, \@header_lines );
    }
}


# Function to process the contents entry for a page.
# Write x-refs too.
# You have to call this function, even if contents generation is
# turned off, because of the x-ref stuff.  Output to the contents file
# is turned off using the $do_contents variable.
#
# Arguments: 
#   The filename base for the page being processed.
#   The header text for the page.
sub ProcessContents {
  my ( $pagename, $pagenum, $r_header_lines ) = @_;

  my $defline;

  print "Processing contents for $pagename.\n" if $verbose;
  
  # Write separator to xref file.
  print XREF "<PAGE>\n";

  # Look for _C_ definitions in the header lines.
  foreach $hline (@$r_header_lines)
    {
      # Search for a line defining a macro whose name begins with the
      # _C_ prefix.
      @hwords = LineIsContentsDef( $hline );
      if ( $hwords[0] ) {

	# Output to contents, undefining the constant first.
	if( $do_contents ) {
	  $defline = MakeUndef( "$hwords[1]" );
	  print CONTENTS "$defline\n";
	  print CONTENTS "$hline\n";
	}

	# Output the name/def pair to the xref file.
	print XREF "$hwords[1] $hwords[2]\n";
      }
    }

  # Emit lines for lines and page numbers.
  if( $do_contents ) {
    $defline = MakeUndef( "$c_prefix"."LINK" );
    print CONTENTS "$defline\n";
    $defline = MakeDef( "$c_prefix"."LINK", "$pagename$htmlext" );
    print CONTENTS "$defline\n";

    $defline = MakeUndef( "$c_prefix"."NUM" );
    print CONTENTS "$defline\n";
    $defline = MakeDef( "$c_prefix"."NUM", "$pagenum" );
    print CONTENTS "$defline\n";

    # Emit a line to generate contents.
    print CONTENTS "_CONTENTS_\n";
  }

  # Emit the _C_LINK to the XREF file.
  print XREF "$c_prefix"."LINK $pagename$htmlext\n";
  print XREF "$c_prefix"."NUM $pagenum\n";
}


# Scan the input files for page name overrides, dependencies, etc.
sub ScanInputFiles {
  my ($filename) = @_;
  my $incname;
  my $idir;
  local *INFILE;
  my $openok;

  $openok = open INFILE, "<$filename";

  # Failure to open the file if not an error _if_ we're doing makedepgen.
  if( (not $openok) and (not $makedepgen) ) {
    CleanUpFiles();
    die "$id_name: Unable to open $filename.\n";
  }

  if( $makedep ) {
    # File found.  Add to the list.
    push @deplist, $filename;
  }

  # If we couldn't open the file, and we've got this far, it means
  # we're doing makedepgen.  We can't read the file, so we just return.
  return if( not $openok );

  while( <INFILE> ) {

    # Check for include.  Space is allowed before and after the #.
    # Note that this doesn't check that the opening and closing quotes 
    # match.  If making dependencies check for IMPORT too.
    if( /^\s*(\#\s*include)\s+[<\"]\s*(.*?)\s*[>\"]/ or
	($makedep and /<\s*(IMPORT)\s*=\s*[<\"]\s*(.*?)\s*[>\"]>/) ) {

      my $isimport;

      # We need to know if we're dealing with include or import, because
      # we mustn't open the imported file.
      if( $1 eq "IMPORT" ) {
	$isimport = 1;
      }else{
	$isimport = 0;
      }

      # Attempt to read the file using each of the dirs in @includes.
      $incname = "";
      foreach $idir (@includes) {
	if( -f "$idir/$2" ) { 
	  $incname = "$idir/$2";
	  last; 
	}
      }

      if( $incname eq "" ) {
	# The file wasn't found on the include path.

	if( not $makedepgen ) {

	  # If not doing makedepden, it's an error...
	  my $ipath;
	  CleanUpFiles();
	  $ipath = join (":", @includes);
	  die "$id_name:\n" . 
	    "\tUnable to find included file<$2 in include path:\n" .
	    "\t$ipath\n";
	}else{

	  # It must be makedepgen.  If we're processing the template,
	  # add the filename to the dependency list without trying to
	  # open it.
	  push @deplist, $2;
	}
      }else{
	# The file was found.
	
	if( not $isimport ) {
	  # Open the included file and process it.
	  ScanInputFiles( "$incname" );
	}else{
	  # Don't open imported files, just add them to the dependency
	  # list.
	  push @deplist, $incname;
	}
      }
    } else {
      # Something other than include or import.

      # Check for header marker lines.
      if( $_ =~ /^\s*$hdrmarkerA(\s*=\s*(.*?)){0,1}$hdrmarkerB\s*$/ ) {

	if( defined $2 ) {
	  # Page name override detected.
	  $pagenames[$numpagenames] = $2;
	  print "Page name override = $2 on page $numpagenames\n" if $verbose;
	}else{
	  # Automatically generated page name.
	  if( $numpagenames == 0 ) {
	    $pagenames[$numpagenames] = $contfilename;
	  }else{
	    $pagenames[$numpagenames] = sprintf "$outfile%.2d", $numpagenames;
	  }
	}

	$numpagenames += 1;
      }
    }
  }  

  # Finished processing lines.  
  close INFILE;
}


# This is the main function that gets called recursively to process
# the input file.  The argument is the filename.
sub ReadDataWithIncludes {
  my ($filename, $printablefilename) = @_;
  my $incname;
  my $pincname;
  my $idir;
  local *INFILE;
  my $linecount = 1;

  # If the line following the template data marker is an #include,
  # we'll write line control for the return from the include, followed
  # by line control (the same line) for the return point from the
  # inserted page data.  That's harmless.
  my $templateneedslinectrl = 0;

  if( not open INFILE, "<$filename" ) {
    CleanUpFiles();
    die "$id_name: Unable to open $filename.\n";
  }

  # Reading an ordinary file.
  # If using cpp line control, output the file name and line number
  # for the file that has just been opened.
  if( $linecontrol ) {
    if( $gottemplate ) {
      push( @pagelines,     "#line $linecount \"$printablefilename\"\n" );
    }else{
      push( @templatelines, "#line $linecount \"$printablefilename\"\n" );
    }
  }

  while( <INFILE> ) {

    # Check for include.  Space is allowed before and after the #.
    # Note that this doesn't check that the opening and closing quotes 
    # match.
    if( /^\s*\#\s*include\s+[<\"]\s*(.*?)\s*[>\"]/ ) {

      # Attempt to read the file using each of the dirs in @includes.
      $incname = "";
      foreach $idir (@includes) {
	if( -f "$idir/$1" ) { 
	  $incname = "$idir/$1";
	  $pincname = "$1";
	  last; 
	}
      }

      if( $incname eq "" ) {
	my $ipath;
	CleanUpFiles();
	$ipath = join (":", @includes);
	die "$id_name: $printablefilename:$linecount\n" . 
	  "\tUnable to find included file $1 in include path:\n" .
	  "\t$ipath\n";
      }

      ReadDataWithIncludes( "$incname", $1 );

      # Increment the line counter for the #include line.
      $linecount += 1;

      # If using cpp line control, output the file name and line number
      # for the file that we've just returned to.
      if( $linecontrol ) {
	if( $gottemplate ) {
	  push( @pagelines,     "#line $linecount \"$printablefilename\"\n" );
	} else {
	  push( @templatelines, "#line $linecount \"$printablefilename\"\n" );
	}
      }

    } else {

      if( $gottemplate ) {
      
	# If the template has already been read, process data lines.
	
	# Look for the page separator.
	# We don't need to extract the (optional) page name here.
	if( $_ =~ /^\s*$hdrmarkerA(\s*=\s*(.*?)){0,1}$hdrmarkerB\s*$/ ) {

	  # The HDR tag precedes the first page and was detected as
	  # the end of the tempate, so on finding the next we know that
	  # a whole page has been read.  At the end of the file, there
	  # won't be another marker, so we'll call ProcessPage again to
	  # process the last lot of test.
	  if( $pagenum >= 1 ) {
	    
	    # Process the page now.
	    ProcessPage( $pagenum, 
			 \@template_pre_data, \@template_post_data,
			 \@pagelines, 0 );
	  }
	  
	  # Reset the stored page.
	  @pagelines = ();
	  $pagenum++;
	  $pageneedslinectrl = 1;

	  # Count the line that we found the HDR tag on.
	  $linecount += 1;

	  # Advance directly to the next line.
	  next;
	}
      
	# At the start of the page we need some line control.
	if( $linecontrol and $pageneedslinectrl ) {
	  push( @pagelines, "#line $linecount \"$printablefilename\"\n" );
	}

	# Check that the line ends in newline.  If it doesn't it must
	# be the last line in a file.
	if( not $_ =~ /\n/ ) {
	  print "$id_name: $printablefilename:$linecount\n" .
	    "\tWarning - File does not end in newline.\n";
	  $_ = "$_\n";
	}

	# Add the line to pagelines.
	push( @pagelines, $_ );

	# If the line just added was the <DATA> or <CONTENTS> (if
	# we're processing the contents page) marker, we need to add
	# line control before the next line, so leave the flag set.
	# Otherwise, clear it.
	if( /^\s*$contmarker\s*$/ and $do_contents and ($pagenum == 1) ) {
	  $pageneedslinectrl = 1;
	}elsif( /^\s*$datamarker\s*$/ ) {
	  $pageneedslinectrl = 1;
	} else {
	  $pageneedslinectrl = 0;
	}
	
      } else {	
	# Process template lines.
	
	if( $_ =~ /^\s*$hdrmarkerA(\s*=\s*(.*?)){0,1}$hdrmarkerB\s*$/ ) {
	  # This line gets thrown away, so the first part of the data section
	  # is a page of data.

	  # When we reach the first header marker we've reached the
	  # end of the template part of the file, so create lists from
	  # the lines read so far, before and after the <PAGE> marker.
	  $got_tempdata_marker = 0;
	  foreach $templine (@templatelines) {
	    if( $templine =~ /^\s*$tempdatamarker\s*$/ ) {
	      $got_tempdata_marker = 1;
	      next;
	    }
	    if( not $got_tempdata_marker ) {
	      push( @template_pre_data, $templine );
	    }else{
	      push( @template_post_data, $templine );
	    }
	  }
	  
	  # If the template data marker was not found, abort.
	  if( not $got_tempdata_marker ) {
	    CleanUpFiles();
	    die "$id_name: $printablefilename:$linecount\n" .
	      "\tTemplate data marker not found.\n";
	  }

	  $gottemplate = 1;
	  
	} else {

	  # If we just wrote the <PAGE> marker ($tempdatamarker) on
	  # the previous line then we need to add line control before
	  # this line because we will always return from the inserted
	  # page data to this point.
	  if( $linecontrol and $templateneedslinectrl ) {
	    push( @templatelines,
		  "#line $linecount \"$printablefilename\"\n" );
	  }

	  # Check that the line ends in newline.  If it doesn't it must
	  # be the last line in a file.
	  if( not $_ =~ /\n/ ) {
	    print "$id_name: $printablefilename:$linecount\n" .
	      "\tWarning - File does not end in newline.\n";
	    $_ = "$_\n";
	  }

	  # Add the line to the template.
	  push( @templatelines, $_ );

	  # If we've just seen the <PAGE> marker, set the flag to add
	  # line control to the next template line.  Otherwise, clear
	  # the flag.
	  if( /^\s*$tempdatamarker\s*$/ ) {
	    $templateneedslinectrl = 1;
	  }else{
	    $templateneedslinectrl = 0;
	  }
	}
	# End of processing template lines.
      }
      # Increment the line counter for the line just processed.
      $linecount += 1;

      # End of processing non-include line.
    }
    # End of while <INFILE> loop.
  }
  
  close INFILE;
}


#########################
## Main program start. ##
#########################

# Defaults for all arguments...

# Misc.
$keepfiles = 0;
$verbose = 0;
$diffout = 0;
$linecontrol = 1;

# Field markers for data file
$hdrpagemarker = "%PAGE%";
$hdrmarker  = "<HDR$hdrpagemarker>";
$hdrmarkerA = "";
$hdrmarkerB = "";

$datamarker = "<DATA>";

# Marker to show where contents list should go in contents page.
$contmarker = "<CONTENTS>";

# Template data marker
$tempdatamarker = "<PAGE>";

# How to identify definitions
$defnamemark = "%NAME%";
$defdefmark = "%DEF%";
$definition = "#define $defnamemark $defdefmark";
$undefinition = "#undef $defnamemark";

$c_prefix = "_C_";

$hssprog = "hss2html";
$hssopts = "";

# The name of the contents, xref, and page output files.
$contfilename = "index";
$outfile = "out";
$xreffile = "index.xref"; # This is a full name, with extension.
$madexref = 0;

# Filename extensions.
$hssext = ".hss";
$htmlext = ".html";

# Command line defines will be passed through to the hss2html program.
%defines = ();
$defstring = "";

# Include search dirs, because cpp never gets the chance...
@includes = ();
$incstring = "";

# Get information from rcs.
# Spaces before the closing quotes are preserved by RCS and stop
# emacs syntax highlighting thinking there's a variable called $'
$programid = '$Id: htt2html,v 1.30 2000/06/17 15:51:05 sdm Exp $ ';
$programid =~ /\$Id: (.*?),v (.*?) (.*?) /;
$id_name = $1;
$id_version = $2;
$id_date = $3;

$tagname   = '$Name: Guava-1_0_3 $ ';
$tagname   =~ /\$Name: (.*?) /;
if( $1 ne "" ) {
  $id_tag = $1;
}else{
  $id_tag = "none";
}

# Create a contents file if this variable is set.  Default is to
# create a contents file.  If the pages being created are not part of
# a single document, but share the same template, you may not need a
# contents file.  You can turn contents generation off with -nocont,
# and the pages will remain independent.  
$do_contents = 1;

# Replacement names for page files:

# Found on the command line.
@cmdpagenames = ();
$numcmdpagenames = 0;

# Scanned in from <HDR> markers.
@pagenames = ();
$numpagenames = 0;

# Dependency mode.
$makedep = 0;
$makedepstd = 0;
$makedepgen = 0;
$makedepfile = 0;

# Dependencies.
@deplist = ();

# Check arguments and open input files.
$numargs = $#ARGV;
while( ($numargs >= 0) and $ARGV[0] =~ /^-/ )
  {  
    if( $ARGV[0] eq "-k" ) {
      $keepfiles = 1;
    }
    elsif( $ARGV[0] eq "-v" ) {
      $verbose += 1;
    }
    elsif( $ARGV[0] eq "-d" ) {
      $diffout = 1;
    }
    elsif( $ARGV[0] eq "-p" ) {
      $linecontrol = 0;
    }
    elsif( $ARGV[0] eq "-M" ) {
      $makedep = 1;
    }
    elsif( $ARGV[0] eq "-MG" ) {
      $makedepgen = 1;
    }
    elsif( $ARGV[0] eq "-Mp" ) {
      $makedepfile = 1;
    }
    elsif( $ARGV[0] eq "-nocont" ) {
      $do_contents = 0;
    }
    elsif( $ARGV[0] eq "-hm" ) {
      shift;
      $numargs -= 1;
      $hdrmarker = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-hpm" ) {
      shift;
      $numargs -= 1;
      $hdrpagemarker = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-dm" ) {
      shift;
      $numargs -= 1;
      $datamarker = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-cm" ) {
      shift;
      $numargs -= 1;
      $contmarker = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-tm" ) {
      shift;
      $numargs -= 1;
      $tempdatamarker = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-def" ) {
      shift;
      $numargs -= 1;
      $definition = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-defnmark" ) {
      shift;
      $numargs -= 1;
      $defnamemark = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-defdmark" ) {
      shift;
      $numargs -= 1;
      $defdefmark = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-undef" ) {
      shift;
      $numargs -= 1;
      $undefinition = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-cpre" ) {
      shift;
      $numargs -= 1;
      $c_prefix = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-o" ) {
      shift;
      $numargs -= 1;
      $contfilename = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-x" ) {
      shift;
      $numargs -= 1;
      $xreffile = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-op" ) {
      shift;
      $numargs -= 1;
      $outfile = $ARGV[0];
    }
    elsif( $ARGV[0] eq "-pname" ) {
      shift;
      $numargs -= 1;
      push( @cmdpagenames, $ARGV[0] );
      $numcmdpagenames += 1;
    }
    elsif( $ARGV[0] eq "-intext" ) {
      shift;
      $numargs -= 1;
      $hssext = ".$ARGV[0]";
    }
    elsif( $ARGV[0] eq "-outext" ) {
      shift;
      $numargs -= 1;
      $htmlext= ".$ARGV[0]";
    }
    elsif( $ARGV[0] eq "-hssprog" ) {
      shift;
      $numargs -= 1;
      $hssprog= "$ARGV[0]";
    }
    elsif( $ARGV[0] eq "-hssopts" ) {
      shift;
      $numargs -= 1;
      $hssopts= "$ARGV[0]";
    }
    elsif( $ARGV[0] =~ "^-I" ) {
      $incstring .= " $ARGV[0]";
      $_ = $ARGV[0];

      # Strip trailing slash.
      s/(.*)\/$/$1/;

      /^-I(.*)/;
      if($1 eq ""){
	die "$id_name: Include directory is blank.  Use -Idir " .
	  "without a space, perhaps?\n";
      }
      push( @includes, $1 );
    }    
    elsif( $ARGV[0] =~ /^-D/ ) {
      # Definitions might have a value or not.
      $defstring .= " $ARGV[0]";
      if( $ARGV[0] =~ "=" ) {
	$_ = $ARGV[0];
	/^-D(.*?)=(.*)/;
	$name = $1;
	$value = $2;
      }else{
	$_ = $ARGV[0];
	/^-D(.*)/;
	$name = $1;
	$value = "1";
      }
      # Add the name and value to a hash.
      $defines{$name} = $value;
    }
    elsif( $ARGV[0] eq "-version" ) {
      print "$id_name, version $id_version, release $id_tag, $id_date.\n";
      exit;
    }
    else {
      print "$id_name: Unknown option $ARGV[0]\n";
      DieUsage;
    }

    $numargs -= 1;
    shift;
  }

DieUsage if( $numargs != 0 );
$templatefile = $ARGV[0];

# Are we using line control?
if( $verbose ) {
  if( $linecontrol ) {
    print "Using line control.\n";
  } else {
    print "Not using line control.\n";
  }
}

# Check the markers are present in the define string 
unless( $definition =~ /$defnamemark/ ) {
  die "$id_name: Name marker \"$defnamemark\" not found in " .
    "definition string.\n";
}
unless( $definition =~ /$defdefmark/ ) {
  die "$id_name: Definition marker \"$defdefmark\" not found in " .
    "definition string.\n";
}
unless( $undefinition =~ /$defnamemark/ ) {
  die "$id_name: Name marker \"$defnamemark\" not found in " .
    "undefinition string.\n";
}

# Check the marker is present in the header marker tag.
unless( $hdrmarker =~ /$hdrpagemarker/ ) {
  die "$id_name: Page marker \"$hdrpagemarker\" not found in " .
    "header marker string.\n";
}

# Split the header marker into two, before and after the page marker.
( $hdrmarkerA, $hdrmarkerB ) = split /$hdrpagemarker/, $hdrmarker, 2;

# If the contents file name has been supplied with an extension,
# remove it.  It will be added back on when the file is created.
if( $contfilename =~ /(.*)\.[^\.]$/ ) { 
  print "Removing extension from contents " .
    "file name $contfilename > " if $verbose;
  $contfilename = $1;
  print "$contfilename.\n" if $verbose;
}

# If not doing makedep, make sure makedepgen and makedegfile aren't
# used either.
if( (not $makedep) && $makedepgen ) { 
  die "$id_name: -MG cannot be used without -M.\n"; 
}
if( (not $makedep) && $makedepfile ) { 
  die "$id_name: -Mp cannot be used without -M.\n"; 
}

# Set a flag for 'ordinary' makedep, so we can just test one flag
# each time.
$makedepstd = 1 if( $makedep and not $makedepfile );

# Add the directory of the input file name to the include path.
if( $templatefile =~ /^(.*)\/(.*)/ ) {
  # If the input file isn't in the current dir, add the path to @includes.
  push( @includes, "$1" );

  # Add the source directory to the incstring so that hss2html will
  # see it.  hss2html doesn't see #includes when it's called from
  # htt2html, but it will need to search for IMPORTed files.
  $incstring .= " -I$1";

}else{
  # Else, put "." into the include path.
  push( @includes, "." );
}


# Scan the input files for page name overrides, and put them in
# the @pagenames array.
if( not $do_contents ) {
  # Padding, so that page name numbering works nicely.
  $pagenames[0] = "no_contents_page";
  $numpagenames += 1;
}
ScanInputFiles( $templatefile );
print "Completed page name scan.\n" if $verbose;

# Merge command line page name overrides into the @pagenames array.
if( $do_contents ) { 

  # If we have a contents page, it is stored in $pagenames[0] and the
  # first entry in @cmdnamepages corresponds to it.
  for( $p = 0; $p < $numpagenames; $p++ ) {
    if( $p < $numcmdpagenames ) {
      print "Page name overriden by -pname option.\n" if $verbose;
      $pagenames[$p] = $cmdpagenames[$p];
    }
    print "Page name for page $p = $pagenames[$p]\n" if $verbose;
  }
} else { 

  # If we don't have a contents page, the first page name is in 
  # $pagenames[1], and $cmdpagenames[0] corresponds to that page.
  for( $p = 1; $p < $numpagenames; $p++ ) {
    if( $p <= $numcmdpagenames ) {
      print "Page name overriden by -pname option.\n" if $verbose;
      $pagenames[$p] = $cmdpagenames[$p-1];
    }
    print "Page name for page $p = $pagenames[$p]\n" if $verbose;
  }
}


if( $makedep ) {

  my %seen;
  my $page;
  my $firstpage;

  # Output dependency information and exit.
  
  # Remove duplicates from the list, preserving the order.
  # From the Perl Cookbook, pp102.
  %seen = ();
  @uniquedeps = grep { ! $seen{$_} ++ } @deplist;

  $firstpage = $do_contents ? 0 : 1;

  # Gcc removes the pathnames from the target file names, so we should too.
  
  # Output the target file name(s).
  if( $makedepstd ) {
    # Standard make dep just outputs the name of the contents page,
    # or the first page, if there is no contents page.
    ($targ = "$pagenames[$firstpage]$htmlext: ") =~ s/^.*\///;
    print $targ;
  } else {
    # Multipage makedep outputs the names of all the output pages.
    if( $#pagenames > $firstpage ) {
      foreach $targ ( @pagenames[ $firstpage .. $#pagenames-1] ) {
	$targ =~ s/^.*\///;    
	print "$targ$htmlext ";
      }
    }
    $targ = "$pagenames[$#pagenames]$htmlext: ";
    $targ =~ s/^.*\///;    
    print $targ;
  }

  # Write out each dependency.
  foreach $dep (@uniquedeps) {
    
    # Strip leading './' from names.
    if( $dep =~ /^.\/(.*)/ ){
      $dep = $1;
    }
    print "$dep ";
  }
  print "\n";

  exit;
}


if( $verbose ){
  print "Looking for input files...\n";
  print "$templatefile\n";

  print "Output filename base = $outfile\n";
  if( $do_contents ) {
    print "Contents file will be called $contfilename.\n";
  }else{
    print "Contents file will not be produced.\n";    
  }
}

# Open the contents file for output.
if( $do_contents ) {
  if( not open( CONTENTS, ">$contfilename$hssext" )) {
    CleanUpFiles();
    die "$id_name: Unable to open contents file: $contfilename$hssext\n";
  }

  # Add the name of the page file to the list of files to convert to html.
  push( @hsslist, "$contfilename$hssext" );
}

# Open the crossref file for output.
if( not open( XREF, ">$xreffile" )) {
  CleanUpFiles();
  die "$id_name: Unable to open x-ref file: $xreffile\n";
}
$madexref = 1;


# Real processing starts now...

# Clear the template and pagelines.
@templatelines = ();
@pagelines = ();
$pageneedslinectrl = 1;

# Reset page counters before processing pages.
$pagenum = 1;
$gottemplate = 0;

# Read the template file, upto the first <HDR> marker, which is the
# start of the data section. Then read data, splitting the contents
# into pages.  Process included files all the way.
ReadDataWithIncludes( $templatefile, $templatefile );

# If we finish reading, and we haven't found the end of the template,
# complain.
if( not $gottemplate ) {
  CleanUpFiles();
  die "$id_name: Unable to find a header marker for the first page.\n";
}
  
# Finally, process the text after the last <HDR>.  Send the last-page flag.
ProcessPage( $pagenum, 
	     \@template_pre_data, \@template_post_data, \@pagelines, 1 );

# Output the final parts of the contents file and close it.
if( $do_contents ) {
  #print CONTENTS $contents_after_list;
  print CONTENTS @data_post_cont;
  print CONTENTS @template_post_data;
  close( CONTENTS );
}

close( XREF );

# Some on/off options passed through to hss2html.
$opts = "";
$opts .= "-v " if $verbose;
$opts .= "-k " if $keepfiles;
$opts .= "-d " if $diffout;
$opts .= "-p " if not $linecontrol;

foreach $convertfile (@hsslist) {

  # Run the hss to html converter on all the files in turn.
  print "Converting $convertfile to HTML\n" if $verbose;

  # Flush verbose output before running an external program.
  STDOUT->flush();

  $rc = system "$hssprog $hssopts $defstring $incstring $opts " .
    "-x $xreffile $convertfile";

  if( $rc ) {
    CleanUpFiles();
    die "$id_name: Conversion to HTML failed for $convertfile.\n";
  }

  if( $keepfiles == 0 ) {
    print "Removing intermediate file $convertfile\n" if $verbose;
    unlink $convertfile;
  }
}

if( $keepfiles == 0 ) {
  print "Removing xref file $xreffile\n" if $verbose;
  unlink $xreffile;
}
