From eichin@athena.mit.edu Mon Nov 25 03:09:17 1991
To: Perl-Users@fuggles.acc.Virginia.EDU
From: "Mark W. Eichin" <eichin@athena.mit.edu>
Subject: zcat (LZ uncompress) in perl...
Reply-To: eichin@athena.mit.edu
Date: Sat, 23 Nov 1991 15:33:43 GMT

I've had some "obfuscated" zcat implementations lying around for 4 or
5 years, particularly one written by James Woods back in 1987. It
occurred to me that since they were so short, they shouldn't take very
long to port to perl. I've been using it for a few weeks and some
other postings have spurred me to share it.
	Some of the original headers from the C version:
# From: jaw@aurora.UUCP (James A. Woods)
# Newsgroups: comp.sources.misc
# Subject: Submission for comp-sources-misc
# Message-ID: <681@aurora.UUCP>
# Date: 21 May 87 09:00:59 GMT
	One interesting "perl" twist is that the original routine had
a loop which read through an array backwards and sent each character
to output. The "literal" translation to perl was
	    do {
		--$s;
		&putchar($ss[$s]);
	    } while($s>0);
However, further analysis of the code indicated that $ss was a
complete, independent array, and all of it was getting emitted. A
*vast* speedup was obtained by rewriting it as
	print pack("c*",reverse @ss);
Invoking &zcat'main will uncompress stdin onto stdout; look at the
text of the "main" routine to see how to call it from code directly.
(Then think about the DATA filehandle, and unpack("u")... :-)
	I haven't really looked into the memory usage since it works
for the one example I care about - I have a star-pattern root window
bitmap which is around 7K but expands to 130K, and it takes about 40
seconds on a DECMIPS to unpack it. (With the do-loop above, it took 40
*minutes* :-) Consider further optimization a challenge.
				_Mark_ <eichin@athena.mit.edu>
				MIT Student Information Processing Board
				Cygnus Support <eichin@cygnus.com>

#!perl
# Contents: zcat (LZ uncompress)
# Author: Mark Eichin <eichin@athena.mit.edu> <eichin@cygnus.com>
# Usage: zcat'main runs from STDIN to STDOUT. Read it.
package zcat;

sub init {
    $i=$q=$b=$n=$j=$O=$S=$o=$v=$c=$m=$f=0; $k=16; $e=128; $M=(1<<(16));
}

sub getchar {
    local($CH);
    if ($source eq "STDIN") { read(STDIN,$CH,1); }
    else { $CH = substr($indata,$idx++,1); }
    return ord($CH) if (defined $CH);
    -1;
}

sub putchar {
    if ($dest eq "STDOUT") { print pack("c",$_[0]); } 
    else { $outdata .= pack("c",$_[0]); }
}

sub g {
    if($j>0||$O>=$S||$f>$m) {
	if($f>$m) { $m= ++$n==$k?$M:(1<<($n))-1; }
	if($j>0) { $m=(1<<($n=9))-1;$j=0; }
	if ($source eq "STDIN") { $S=read(STDIN,$D,$n); } 
	else {
	    $S = length($indata)-$idx;
	    if ($S > $n) { $S = $n; }
	    $D = substr($indata,$idx,$S); $idx+=$S;
	}
	return -1 if(!defined($S) || $S == 0);
	$O=0; $S=($S<<3)-$n+1;
    }
    $q=$O; $b=$n; $p=$q>>3; $q&=7;
    $c=(ord(substr($D,$p,1))>>$q&((1<<(8-$q))-1));$p++;
    $q=8-$q; $b-=$q;
    if($b>=8) { $c|=(ord(substr($D,$p,1))&255)<<$q,$q+=8,$b-=8;$p++; }
    $c|=( ord(substr($D,$p,1))&((1<<($b))-1))<<$q;
    $O+=$n;
    $c;
}
sub main {
    &init;
    if ($slurptest) {
	$source = "var"; undef $/; $indata = <>; $idx = 0;
	
	$outdata = ""; $dest = "var";
    } else {
	$source = "STDIN";
	$dest = "STDOUT";
    }

    &uncompress;
    if ($slurptest) {
	print $outdata;
    }
}
sub uncompress {
    die "Bad COMPRESS file format\n" if(&getchar!=31||&getchar!=157);
    $k=&getchar;
    $e=$k&128; $k&=31; $M=(1<<($k));
    die "Bad COMPRESS file format\n" if($k>16);
    &z;
    0;
}

sub z {
    local($w)=0;
    local($s)=0;
    $m=(1<<($n=9))-1;

    @t = (0 x 256); @h = 0..255;

    $f=$e?257:256; $i=$o=&g;
    return if($o==-1);
    &putchar($i);
    undef @ss; $s = 0; 
    while(($w=&g)>-1) {
	if($w==256&&$e) { @t = (0 x 256); $j=1; $f=256; last if(($w=&g)==-1); }
	$v=$w;
	if($w>=$f) { $ss[$s]=$i; $s++; $w=$o; }
	while($w>=256) { $ss[$s]=$h[$w]; $s++; $w=$t[$w]; }
	$ss[$s]=$i=$h[$w]; $s++;

	if ($dest eq "STDOUT") { print pack("c*",reverse @ss);	} 
	else { $outdata .= pack("c*",reverse @ss); }
	undef @ss; $s = 0; 
	if(($w=$f)<$M) { $t[$w]=$o; $h[$w]=$i; $f=$w+1; }
	$o=$v;
    }
}

1;


