#!/usr/bin/perl -w
#
# $Id: analyze_tiff,v 1.3 1999/01/21 20:17:01 ejb Exp $
# $Source: /home/ejb/scripts/RCS/analyze_tiff,v $
# $Author: ejb $
#

require 5.002;
use strict;
use POSIX;

my $whoami = ($0 =~ m,([^/\\]*)$,) ? $1 : $0;
#my $dirname = ($0 =~ m,(.*)[/\\][^/\\]+$,) ? $1 : ".";

my $debug = 1;

my $little_endian = 1;
my $big_endian = 2;

my @data_types_names = (qw(notype byte ascii short long rational sbyte),
			qw(undefined sshort slong srational float double));

my $tiff_notype = 0;	# placeholder
my $tiff_byte = 1;	# 8-bit unsigned integer
my $tiff_ascii = 2;	# 8-bit bytes w/ last byte null
my $tiff_short = 3;	# 16-bit unsigned integer
my $tiff_long = 4;	# 32-bit unsigned integer
my $tiff_rational = 5;	# 64-bit unsigned fraction
my $tiff_sbyte = 6;	# 8-bit signed integer
my $tiff_undefined = 7;	# 8-bit untyped data
my $tiff_sshort = 8;	# 16-bit signed integer
my $tiff_slong = 9;	# 32-bit signed integer
my $tiff_srational = 10; # 64-bit signed fraction
my $tiff_float = 11;	# 32-bit IEEE floating point
my $tiff_double = 12;	# 64-bit IEEE floating point

if ((@ARGV == 1) && ($ARGV[0] eq "--version"))
{
    print "$whoami version 1.0\n";
    exit 0;
}

my @files = @ARGV;

&usage() unless @files;

while (@files)
{
    my $file = shift(@files);
    eval
    {
	&process($file);
    };
    warn $@ if $@;
}

sub process
{
    my $file = shift;
    my @range = ();
    local(*F);
    &dprint("File $file\n");
    open(F, "<$file") or die "$whoami: can't open $file: $!\n";
    my $buf;
    &read_bytes(\@range, \*F, \$buf, 2);
    my $byte_order = 0;
    if ($buf eq "II")
    {
	$byte_order = $little_endian;
	&dprint("Byte order: little-endian\n");
    }
    elsif ($buf eq "MM")
    {
	$byte_order = $big_endian;
	&dprint("Byte order: big-endian\n");
    }
    else
    {
	die "$whoami: $file: is not a tiff file\n";
    }
    my $version = (&read_short(\@range, $file, \*F, $byte_order, 1, 0))[0];
    if ($version != 42)
    {
	die "$whoami: $file: is not a tiff file\n";
    }

    my $offset = (&read_long(\@range, $file, \*F, $byte_order, 1, 0))[0];
    &safe_seek($file, \*F, $offset);

    my $dir = 0;
    while ($offset)
    {
	++$dir;
	&dprint("Directory $dir, offset $offset\n");
	my $n_entries =
	    (&read_short(\@range, $file, \*F, $byte_order, 1, 0))[0];
	
	my $strips = undef;
	my $width = undef;
	my $length = undef;
	my $rows_per_strip = undef;
	my @offsets = ();
	my @bytecounts = ();

	my $i = 0;
	my $last_tag = 0;
	for ($i = 0; $i < $n_entries; ++$i)
	{
	    my $pos = tell(F);
	    my $tag =
		(&read_short(\@range, $file, \*F, $byte_order, 1, 0))[0];
	    if ($tag <= $last_tag)
	    {
		die "$whoami: $file: tags are not in increasing order\n";
	    }
	    $last_tag = $tag;
	    my $data_type =
		(&read_short(\@range, $file, \*F, $byte_order, 1, 0))[0];
	    my $count =
		(&read_long(\@range, $file, \*F, $byte_order, 1, 0))[0];
	    
	    my $type_name = $data_types_names[$data_type];

	    my $reader = undef;
	    if (($data_type == $tiff_byte) ||
		($data_type == $tiff_sbyte))
	    {
		$reader = \&read_byte;
	    }
	    elsif ($data_type == $tiff_ascii)
	    {
		$reader = \&read_string;
	    }
	    elsif (($data_type == $tiff_short) ||
		   ($data_type == $tiff_sshort))
	    {
		$reader = \&read_short;
	    }
	    elsif (($data_type == $tiff_long) ||
		   ($data_type == $tiff_slong))
	    {
		$reader = \&read_long;
	    }
	    elsif (($data_type == $tiff_rational) ||
		   ($data_type == $tiff_srational))
	    {
		$reader = \&read_rational;
	    }
	    elsif ($data_type == $tiff_float)
	    {
		$count *= 4;
		$reader = \&read_discard;
	    }
	    elsif ($data_type == $tiff_double)
	    {
		$count *= 8;
		$reader = \&read_discard;
	    }
	    elsif ($data_type == $tiff_undefined)
	    {
		$reader = \&read_discard;
	    }
	    else
	    {
		# Don't know how to interpret data.  It would be
		# unsafe to ignore it in case the data referenced by
		# this tag appears at the end of the file.
		die "$whoami: $file: unhandled field data type $data_type\n";
	    }

	    my @data = &$reader(\@range, $file, \*F, $byte_order, $count, 1);
	    my $data = $data[0];

	    my $tag_name = undef;
	    if ($tag == 256)
	    {
		$width = $data;
		$tag_name = "width";
	    }
	    elsif ($tag == 257)
	    {
		$length = $data;
		$tag_name = "length";
	    }
	    elsif ($tag == 273)
	    {
		@offsets = @data;
		$tag_name = "strip offsets";
	    }
	    elsif ($tag == 279)
	    {
		@bytecounts = @data;
		$tag_name = "strip byte counts";
	    }
	    elsif ($tag == 278)
	    {
		$rows_per_strip = $data;
		$tag_name = "rows per strip";
	    }
	    else
	    {
		$tag_name = "tag $tag ($type_name)";
	    }

	    &dprint("  $tag_name ($count)");
	    if (defined $data)
	    {
		&dprint(": " . join(', ', @data));
	    }
	    &dprint("\n");

	    &safe_seek($file, \*F, $pos + 12);
	}

	$offset =
	    (&read_long(\@range, $file, \*F, $byte_order, 1, 0))[0];

	# Check data

	die "$whoami: some required values are missing\n"
	    unless ((defined $width) &&
		    (defined $length) &&
		    (defined $rows_per_strip) &&
		    scalar(@offsets) &&
		    scalar(@bytecounts));

	$strips = int(($length + ($rows_per_strip - 1)) / $rows_per_strip);
	&dprint("  strips: $strips\n");
	if (! (($strips == scalar(@offsets)) &&
	       ($strips == scalar(@bytecounts))))
	{
	    die "$whoami: $file: number of strips is inconsistent with " .
		"number of bytecounts and/or offsets\n";
	}

	for ($i = 0; $i < $strips; ++$i)
	{
	    &mark_range(\@range, $offsets[$i],
			$offsets[$i] + $bytecounts[$i] - 1);
	}

    }

    close(F);

    @range = sort { $a->[0] <=> $b->[0] } @range;
    my @new = (shift(@range));
    while (@range)
    {
	my $item = shift(@range);
	my ($start, $end) = @$item;
	my $oldend = $new[-1]->[1];
	if ($start <= $oldend)
	{
	    die "$whoami: overlapping bytes read\n";
	}
	elsif ($start == $oldend + 1)
	{
	    $new[-1]->[1] = $end;
	}
	else
	{
	    push(@new, [$start, $end]);
	}
    }
    my $last_used_byte = $new[-1]->[1];
    my $desired_size = $last_used_byte + 1;
    my $size = (stat($file))[7];
    my $junk_bytes = $size - $desired_size;
    map { print "  ", $_->[0], "--", $_->[1], "\n" } @new;
    &dprint("  junk bytes: $junk_bytes\n");
}

sub safe_seek
{
    my ($file, $fh, $offset) = @_;
    seek($fh, $offset, SEEK_SET) or
	die "$whoami: $file: seek $offset failed\n";
}

sub read_n_items
{
    my ($range, $file, $fh, $byte_order, $count, $field_data, $item_size) = @_;
    my $size = $item_size * $count;
    my $extra_bytes = 0;
    if ($field_data)
    {
	if ($size > 4)
	{
	    # We must read an offset and seek to that offset
	    my $offset = (&read_long($range, $file, $fh, $byte_order, 1, 0))[0];
	    &safe_seek($file, $fh, $offset);
	}
	else
	{
	    # Some extra bytes have to be marked as read after we
	    # finish reading what we want.
	    $extra_bytes = 4 - $size;
	}
    }
    my @result = ();
    my $i;
    for ($i = 0; $i < $count; ++$i)
    {
	my $buf;
	&read_bytes($range, $fh, \$buf, $item_size);
	push(@result, $buf);
    }
    if ($extra_bytes > 0)
    {
	&mark_range($range, tell($fh), tell($fh) + $extra_bytes - 1);
    }
    @result;
}

sub read_byte
{
    my ($range, $file, $fh, $byte_order, $count, $field_data) = @_;
    my @data = &read_n_items($range, $file, $fh, $byte_order,
			     $count, $field_data, 1);
    map { ord($_) } @data;
}

sub read_short
{
    my ($range, $file, $fh, $byte_order, $count, $field_data) = @_;
    my $format = (($byte_order == $little_endian) ? "v" : "n");
    my @data = &read_n_items($range, $file, $fh, $byte_order,
			     $count, $field_data, 2);
    map { unpack($format, $_) } @data;
}

sub read_long
{
    my ($range, $file, $fh, $byte_order, $count, $field_data) = @_;
    my $format = (($byte_order == $little_endian) ? "V" : "N");
    my @data = &read_n_items($range, $file, $fh, $byte_order,
			     $count, $field_data, 4);
    map { unpack($format, $_) } @data;
}

sub read_rational
{
    my ($range, $file, $fh, $byte_order, $count, $field_data) = @_;
    my @data = &read_long($range, $file, $fh, $byte_order,
			  2 * $count, $field_data);
    my @result = ();
    while (@data)
    {
	my $numerator = shift(@data);
	my $denominator = shift(@data);
	push(@result, sprintf("%d/%d (%f)", $numerator, $denominator,
			      $numerator / $denominator));
    }
    @result;
}

sub read_string
{
    my ($range, $file, $fh, $byte_order, $length, $field_data) = @_;
    my @data = &read_n_items($range, $file, $fh, $byte_order,
			     1, $field_data, $length);
    my $result = $data[0];
    chop($result);		# ignore NULL terminator
    $result;
}

sub read_discard
{
    my ($range, $file, $fh, $byte_order, $count, $field_data) = @_;
    my @data = &read_n_items($range, $file, $fh, $byte_order,
			     1, $field_data, $count);
    undef;
}

sub read_bytes
{
    my ($range, $fh, $bufp, $count) = @_;
    &mark_range($range, tell($fh), tell($fh) + $count - 1);
    read($fh, $$bufp, $count);
}

sub mark_range
{
    my ($range, $start, $end) = @_;
    push(@$range, [$start, $end]);
}

sub dprint
{
    print @_ if $debug;
}

sub usage
{
    die "Usage: $whoami tiff [ tiff ... ]\n";
}