#!/afs/athena/contrib/perl/perl

$|=1;
sub load_dict {

    local($dict) = @_;
    print("Loading dictionary...($dict)\n");
    open(DI, $dict);
while(<DI>){
    chop;
    if(/^\w+$/){
	if(length ==2){push(@flist2,$_);
#			print("Adding: $_\n");
		   }
	elsif(length ==3){push(@flist3,$_);
#			print("Adding: $_\n");
		      }
	elsif(length ==4){push(@flist4,$_);
#			print("Adding: $_\n");
		      }
	elsif(length ==5){push(@flist5,$_);
		      }
	elsif(length ==6){push(@flist6,$_);
		      }
	elsif(length ==7){push(@flist7,$_);
		      }
	elsif(length ==8){push(@flist8,$_);
		      }
	elsif(length ==9){push(@flit9,$_);
		      }
	if(length((flist2)%10)==0){print(".");}
    }
}
print("Loaded.\n");
    close(DI);
}
&load_dict("/afs/sipb/user/mkgray/words");
@cipher = ('a'..'z');
print("Getting frequencies...\n");
while (<>) {
    print("Translating to lowercase...");
    tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
    $ctext .= $_;
    foreach $x ('a'..'z') {
	while(/$x/g){
#	    print("$x,$_\n");
	    $noc{$x}++;	
	}	
    }
}
close(FI);
print("Got em.\n");

for($x=0;$x<=25;$x++) 
{
print("$noc{$cipher[$x]} $cipher[$x], ");
}
print("Sorting...");
@toplets = sort byfreq keys %noc;
print("done.\n");
print("Top 5 characters:\n");
print("$toplets[0], $toplets[1], $toplets[2], $toplets[3], $toplets[4]\n");
for $l (@toplets){
    print "$l $noc{$l}\n";
}
print("\nOk.  Let's try substituting E for $toplets[0] and T for either $toplets[1] or $toplets[2] or $toplets[3]\n");

print("First, let's see if we can find any THE's.\n");
$*=1;
$the1m = "$toplets[1].$toplets[0]";
$the2m = "$toplets[2].$toplets[0]";
$the3m = "$toplets[3].$toplets[0]";
print "$the1m $the2m $the3m\n";
$kct = ($ctext =~ s/k/k/g);
$the1 = ($ctext =~ s/($the1m)/\1/g);
$the2 = ($ctext =~ s/($the2m)/\1/g);
$the3 = ($ctext =~ s/($the3m)/\1/g);

print "$the1 $the2 $the2 ($kct)\n";

while($ctext =~/$toplets[1].$toplets[0]/g){
    $w = $&;
    print "Match $1\n";
    $w =~ s/ //g;
    $thea{$w}++;
    @thes = sort byvalue keys %thea;
}
print("$thes[0] == THE.\n");
@thel = split(//, $thes[0]);
$ntext1 = $ctext;
$ntext1 =~ s/$toplets[0]/E/g;
$e = $toplets[0];
$ntext1 =~ s/$thel[1]/H/g;
$h = $thel[1];
$ntext1 =~ s/$thel[0]/T/g;
$t = $thel[0];
$cracked = 3;
$ntext2 = $ctext;

print("Ok, now we are guessing at some two letter words...\n");

while($ctext =~/ .. /g){
    $w = $&;
    $w =~ s/ //g;
    $twoa{$w}++;
    @twos = sort byvaluet keys %twoa;
}
foreach $tw (@twos){
    if($tw=~/$t./){$tw =~ s/$t//; $o = $tw; $to =1;}
    last if $to;
}
if($o){
    print("$o == O.\n");
    $cracked++;
    $ntext1 =~ s/$o/O/g;
    foreach $tw (@twos){
	if($tw=~/$o./){$tw =~ s/$o//; $f = $tw; $of =1;}
	last if $of;
    }
    if($f){
	print("$f == F.\n");
	$ntext1 =~ s/$f/F/g;
	$cracked++;}}

#print("Ok, at this point we do the hi-frequency word attack.\n");
$continue=1;
for $count (1..7){

    $continue = 0;
    
    
#  ************ Two letter words *************
#print("Starting with two letter words...\n");
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/ \w\w /g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist2) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist2);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $match[0] is $word.\n");
		foreach $cnt (0..$#lets){
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist2 = grep(!/$match[0]/i,@flist2);
	    }
	}
    }
    
#print("...three letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/ \w\w\w /g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    
	    if(grep(/$search/, @flist3) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist3);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist3 = grep(!/$match[0]/i,@flist3);
	    }
	}
    }
    
##print("...four letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/ \w\w\w\w /g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist4) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist4);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist4 = grep(!/$match[0]/i,@flist4);
	    }
	}
    }
    
##print("...five letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/ \w\w\w\w\w /g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist5) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist5);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist5 = grep(!/$match[0]/i,@flist5);
	    }
	}
    }
    
##print("...six letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/ \w\w\w\w\w\w /g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist6) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist6);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist6 = grep(!/$match[0]/i,@flist6);
	    }
	}
    }
    
    
##print("...seven letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/\W\w\w\w\w\w\w\w\W/g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist7) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist7);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist7 = grep(!/$match[0]/i,@flist7);
	    }
	}
    }
    
##print("...eight letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/\W\w\w\w\w\w\w\w\w\W/g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist8) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist8);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		print("Aha!  $word is $match[0].\n");
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist8 = grep(!/$match[0]/i,@flist8);
	    }
	}
    }
    
##print("...nine letter words...\n");
    %ntwoa=();
    @ntwos=();
    foreach $nothing (0..0){	#do five times to make progress
	while($ntext1 =~/\W\w\w\w\w\w\w\w\w\w\W/g){
	    $w = $&;
	    $w =~ s/ //g;
	    $ntwoa{$w}++;
	    @ntwos = sort byvaluetn keys %ntwoa;
	}
	foreach $tn (0.. $#ntwos/2){
	    $word = $ntwos[$tn];
	    $search = $word;
	    $search =~ s/[a-z]/\./g;
	    $search =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
	    if(grep(/$search/, @flist9) == 1){
		@lets = split(//, $word);
		@match = grep(/$search/, @flist9);
		$match[0] =~ tr/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;
		@clets = split(//, $match[0]);
		foreach $cnt (0..$#lets){
		    print("$lets[$cnt] == $clets[$cnt]\n");
		    $ntext1 =~ s/$lets[$cnt]/$clets[$cnt]/g;}
		@flist9 = grep(!/$match[0]/i,@flist9);
	    }
	}

    }
	&load_dict('/usr/dict/words') if ($count == 3);
}
print("Final:\n$ntext1\n");
$ntext1 =~ y/a-z/_/;
print("With blanks:\n$ntext1\n");

sub byvaluet{
    $twoa{$b} <=> $twoa{$a};
}

sub byvaluetn{
    $ntwoa{$b} <=> $ntwoa{$a};
}
sub byvalue {
    $thea{$b} <=> $thea{$a};
}

sub byfreq {
    
    $noc{$b} <=> $noc{$a};
}


