#!/usr/bin/perl -w

use strict;
use utf8;
use Encode;
use charnames ":full";
use HTML::Entities;

sub balance {
    my ($s, $open, $close) = @_;
    if (index($s,$close) > 0 && (index($s,$close) < index($s,$open)
                              ||  index($s,$open) == -1)) {
        $s = $open.$s;
    }
    if (rindex($s,$open) > rindex($s,$close)
        || (rindex($s,$open) > 0 && rindex($s,$close) == -1)) {
        $s .= $close;
    }
    return $s;
}

    open(FILE, "/mit/xavid/.zsigs/dead") || die "Can't open dead!";

    my $fish = '';    
    while (<FILE>) {
        chomp;
        $fish="$fish $_";
        $_="$fish";
        if (/[.!?][ ]*$/ or /Act/) {
          $fish = '';
                 s/(Mr[.]|Mrs[.]|Dr[.]|Ms[.])[ ]/$1\N{NO-BREAK SPACE}/g;
                 s/(.{20,80})([.!?]|[.]”)[ \t\n]+([A-Z])/$1$2\n$3/g;
                 s|<br */?>|\n|g;
                 s/ [.]/./g;
                 s/ [:]/:/g;
                 s/[ ][ ]+/ /g;
                 s/CHAPTER.*$//g;
                 my @sublines = split /\n/;
                 foreach (@sublines) {
                     chomp;
                     $_ = balance($_, '<em>', '</em>');
                     $_ = balance($_, '<strong>', '</strong>');
                     s'<em>'@i{'g;
                     s'<strong>'@b{';
                     s#</(em|strong)>#}#g;
                     s#</?\w+[^>]*>##g;
                     s/^\s*//;
                     s/\s*$//;
                     # Balance quotes
                     $_ = balance($_, '“', '”');
                     utf8::encode($_);
                     if (/\w/) {
                         print "$_\n";
                     }
                 }
           }
    }
