#-*-perl-*-
# Parse "ls -lR" type listings
# use lsparse'reset( dirname ) repeately
# By Lee McLoughlin <lmjm@doc.ic.ac.uk>
#
# $Id: lsparse.pl,v 2.1 1993/06/28 15:03:08 lmjm Exp lmjm $
# $Log: lsparse.pl,v $
# Revision 2.1  1993/06/28  15:03:08  lmjm
# Full 2.1 release
#
#

# This has better be available via your PERLLIB environment variable
require 'dateconv.pl';

package lsparse;

# The current directory is stripped off the
# start of the returned pathname
# $match is a pattern that matches this
local( $match );

# The filestore type being scanned
$lsparse'fstype = 'unix';

# Keep whatever case is on the remote system.  Otherwise lowercase it.
$lsparse'vms_keep_case = '';

# A name to report when errors occur
$lsparse'name = 'unknown';

# Name of routine to call to parse incoming listing lines
$ls_line = '';

# Set the directory that is being scanned and
# check that the scan routing for this fstype exists
# returns false if the fstype is unknown.
sub lsparse'reset
{
	$here = $currdir = @_[0];
	$now = time;
	# Vms tends to give FULL pathnames reguardless of where
	# you generate the dir listing from.
	$vms_strip = $currdir;
	$vms_strip =~ s,^/+,,;
	$vms_strip =~ s,/+$,,;

	$ls_line = "lsparse'line_$fstype";
	return( defined( &$ls_line ) );
}

# See line_unix following routine for call/return details.
# This calls the filestore specific parser.
sub lsparse'line
{
	# ls_line is setup in lsparse'reset to the name of the function
	local( $path, $size, $time, $type, $mode ) =
		eval "&$ls_line( @_ )";

	# Zap any leading ./  (Somehow they still creep thru.)
	$path =~ s:^(\./)+::;
	return ($path, $size, $time, $type, $mode);
}

# --------------------- parse standard Unix ls output
# for each file or directory line found return a tuple of
# (pathname, size, time, type, mode)
# pathname is a full pathname relative to the directory set by reset()
# size is the size in bytes (this is always 0 for directories)
# time is a Un*x time value for the file
# type is "f" for a file, "d" for a directory and
#         "l linkname" for a symlink
sub lsparse'line_unix
{
	local( $fh ) = @_;
	local( $non_crud, $perm_denied );

	if( eof( $fh ) ){
		return( "", 0, 0, 0 );
	}

	while( <$fh> ){
		# Stomp on carriage returns
		s/\015//g;

		# I'm about to look at this at lot
		study;

		# Try and spot crud in the line and avoid it
		# You can get:
		# -rw-r--r-ls: navn/internett/RCS/nsc,v: Permission denied
		# ls: navn/internett/RCS/bih,v: Permission denied
		# -  1 43       daemon       1350 Oct 28 14:03 sognhs
		# -rwcannot access .stuff/incoming
		# cannot access .stuff/.cshrc
		if( m%^(.*)/bin/ls:.*Permission denied% ||
		   m%^(.*)ls:.*Permission denied% ||
		   m%^(.*)(cannot|can not) access % ){
			if( ! $non_crud ){
				$non_crud = $1;
			}
			next;
		}
		# Also try and spot non ls "Permission denied" messages.  These
		# are a LOT harder to handle as the key part is at the end
		# of the message.  For now just zap any line containing it
		# and the first line following (as it will PROBABLY have been broken).
		#
		if( /.:\s*Permission denied/ ){
			$perm_denied = 1;
			next;
		}
		if( $perm_denied ){
			$perm_denied = "";
			warn "Warning: input corrupted by 'Permission denied'",
				"errors, about line $. of $lsparse'name\n";
			next;
		}
		# Not found's are like Permission denied's.  They can start part
		# way through a line but with no way of spotting where they begin
		if( /not found/ ){
			$not_found = 1;
			next;
		}
		if( $not_found ){
			$not_found = "";
			warn "Warning: input corrupted by 'not found' errors",
				" about line $. of $lsparse'name\n";
			next;
		}
		
		if( $non_crud ){
			$_ = $non_crud . $_;
			$non_crud = "";
		}
		
		if( /^([\-lrwxsSt]{10}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){
			local( $kind, $size, $lsdate, $file ) = ($1, $2, $3, $5);
			
			if( $file eq '.' || $file eq '..' ){
				next;
			}

			local( $time ) = &main'lstime_to_time( $lsdate );
			local( $type ) = '?';
			local( $mode ) = 0;

			# This should be a symlink
			if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){
				$file = $1;
				$type = "l $2";
			}
			elsif( $kind =~ /^-/ ){
				# (hopefully) a regular file
				$type = 'f';
			}
			
			$mode = &chars_to_mode( $kind );

			$currdir =~ s,/+,/,g;
			$file =~ s,^/$match,,;
			$file = "/$currdir/$file";
			$file =~ s,/+,/,g;
			return( substr( $file, 1 ), $size, $time, $type, $mode );
		}
		# Match starts of directories.  Try not to match
		# directories whose naes ending in :
		elsif( /^([\.\/]*.*):$/ && ! /^[dcbsp].*\s.*\s.*:$/ ){
			if( $1 eq '.' ){
				next;
			}
			elsif( $1 !~ /^\// ){
				$currdir = "$here/$1";
			}
			else {
				$currdir = "$1";
			}
			$currdir =~ s,/+,/,g;
			$match = $currdir;
			$match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
			return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
		}
		elsif( /^[dcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
			;
		}
		elsif( /^.*[Uu]pdated.*:/ ){
			# Probably some line like:
			# Last Updated:  Tue Oct  8 04:30:50 EDT 1991
			# skip it
			next;
		}
		elsif( /^([\.\/]*[^\s]*)/ ){
			# Just for the export.lcs.mit.edu ls listing
			$match = $currdir = "$1/";
			$match =~ s/[\+\(\[\*\?]/\\$1/g;
		}		
		else {
			printf( "Unmatched line: %s", $_ );
		}
	}
	return( '', 0, 0, 0, 0 );
}

# Convert the mode chars at the start of an ls-l entry into a number
sub chars_to_mode
{
	local( $chars ) = @_;
	local( @kind, $c );

	# Split and remove first char
	@kind = split( //, $kind );
	shift( @kind );

	foreach $c ( @kind ){
		$mode <<= 1;
		if( $c ne '-' && $c ne 'S' && $c ne 'T' ){
			$mode |= 1;
		}
	}

	# check for "special" bits

	# uid bit
	if( /^...s....../i ){
	    $mode |= 04000;
	}

	# gid bit
	if( /^......s.../i ){
	    $mode |= 02000;
	}

	# sticky bit
	if( /^.........t/i ){
	    $mode |= 01000;
	}

	return $mode;
}

# --------------------- parse dls output

# dls is a descriptive ls that some sites use.
# this parses the output of dls -dtR

# for each file or directory line found return a tuple of
# (pathname, size, time, type, mode)
# pathname is a full pathname relative to the directory set by reset()
# size is the size in bytes (this is always 0 for directories)
# time is a Un*x time value for the file
# type is "f" for a file, "d" for a directory and
#         "l linkname" for a symlink
sub lsparse'line_dls
{
	local( $fh ) = @_;
	local( $non_crud, $perm_denied );

	if( eof( $fh ) ){
		return( "", 0, 0, 0 );
	}

	while( <$fh> ){
		# Stomp on carriage returns
		s/\015//g;

		# I'm about to look at this at lot
		study;

		if( /^(\S*)\s+(\-|\=|\d+)\s+((\w\w\w\s+\d+|\d+\s+\w\w\w)\s+(\d+:\d+|\d\d\d\d))\s+(.+)\n/ ){
			local( $file, $size, $lsdate, $description ) =
				($1, $2, $3, $6);
			$file =~ s/\s+$//;
			local( $time, $type, $mode );
			
			if( $file =~ m|/$| ){
				# a directory
				$file =~ s,/$,,;
				$time = 0;
				$type = 'd';
				$mode = 0555;
			}
			else {
				# a file
				$time = &main'lstime_to_time( $lsdate );
				$type = 'f';
				$mode = 0444;
			}

			# Handle wrapped long filenames
			if( $filename ne '' ){
				$file = $filename;
			}
			$filename = '';

			$file =~ s/\s*$//;
			$file = "$currdir/$file";
			$file =~ s,/+,/,g;
			return( substr( $file, 1 ), $size, $time, $type, $mode );
		}
		elsif( /^(.*):$/ ){
			if( $1 eq '.' ){
				next;
			}
			elsif( $1 !~ /^\// ){
				$currdir = "$here/$1/";
			}
			else {
				$currdir = "$1/";
			}
			$filename = '';
			$currdir =~ s,/+,/,g;
			$match = $currdir;
			$match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
			return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
		}
		else {
			# If a filename is long then it is on a line by itself
			# with the details on the next line
			chop( $filename = $_ );
		}
	}
	return( '', 0, 0, 0, 0 );
}

# --------------------- parse netware output

# For each file or directory line found return a tuple of
# (pathname, size, time, type, mode)
# pathname is a full pathname relative to the directory set by reset()
# size is the size in bytes (this is always 0 for directories)
# time is a Un*x time value for the file
# type is "f" for a file, "d" for a directory and
#         "l linkname" for a symlink
sub lsparse'line_netware
{
	local( $fh ) = @_;

	if( eof( $fh ) ){
		return( "", 0, 0, 0 );
	}

	while( <$fh> ){
		# Stomp on carriage returns
		s/\015//g;
# Unix vs NetWare:
#1234567890 __________.*_____________ d+  www dd  dddd (.*)\n
#drwxr-xr-x   2 jrd      other        512 Feb 29  1992 vt100
#   kind     			      size lsdate       file
#123456789012sw+ ____.*_______\s+(\d+)   \s+  wwwsddsdd:dd\s+ (.*)\n  
#- [R----F--] jrd                197928       Sep 25 15:19    kermit.exe
#d [R----F--] jrd                   512       Oct 06 09:31    source
#d [RWCEAFMS] jrd                   512       Sep 04 14:38    lwp

		if( /^([d|l|\-]\s\[[RWCEAFMS\-]{8}\])\s\w+\s+(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/) {
			local( $kind, $size, $lsdate, $file ) =
						 ( $1, $2, $3, $5);
			if( $file eq '.' || $file eq '..' ){
				next;
			}
			local( $time ) = &main'lstime_to_time( $lsdate );
			local( $type ) = '?';
			local( $mode ) = 0;

			# This should be a symlink
			if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){
				$file = $1;
				$type = "l $2";
			}
			elsif( $kind =~ /^-/ ){
				# (hopefully) a regular file
				$type = 'f';
			}
			
			$mode = &netware_to_mode( $kind );

			if( $kind =~ /^d/ ) {
				# a directory
				$type = 'd';
				$size = 0;   # Don't believe the report size
			}
			$currdir =~ s,/+,/,g;
			$file =~ s,^/$match,,;
			$file = "/$currdir/$file";
			$file =~ s,/+,/,g;
			return( substr( $file, 1 ), $size, $time, $type, $mode );
		}

		elsif( /^[dcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
			;
		}
		elsif( /^.*[Uu]pdated.*:/ ){
			# Probably some line like:
			# Last Updated:  Tue Oct  8 04:30:50 EDT 1991
			# skip it
			next;
		}
		else {
			printf( "Unmatched line: %s", $_ );
			return( '', 0, 0, 0, 0 );
		}
	}
	return( '', 0, 0, 0, 0 );
}

# Convert NetWare file access mode chars at the start of a DIR entry 
# into a Unix access number.
sub netware_to_mode
{
	local( $chars ) = @_;
	local( @kind, $c );

	# Split and remove first three characters
	@kind = split( //, $kind );
	shift( @kind );		# omit directory "d" field
	shift( @kind );		# omit space separator
	shift( @kind );		# omit left square bracket
	$mode = 0;		# init $mode to no access

	foreach $c ( @kind ){
		if( $c eq 'R' )	{$mode |= 0x644;}	## r/w r r
		if( $c eq 'W' ) {$mode |= 0x222;}	## w   w w
		if( $c eq 'F' ) {$mode |= 0x444;}	## r   r r
		}
	return $mode;
}
# --------------------- parse VMS dir output
# for each file or directory line found return a tuple of
# (pathname, size, time, type, mode)
# pathname is a full pathname relative to the directory set by reset()
# size is the size in bytes (this is always 0 for directories)
# time is a Un*x time value for the file
# type is "f" for a file, "d" for a directory and
#         "l linkname" for a symlink
sub lsparse'line_vms
{
	local( $fh ) = @_;
	local( $non_crud, $perm_denied );

	if( eof( $fh ) ){
		return( "", 0, 0, 0 );
	}

	while( <$fh> ){
		# Stomp on carriage returns
		s/\015//g;

		# I'm about to look at this at lot
		study;

		if( /^\s*$/ ){
			next;
		}

		if( /^\s*Total of/i ){
			# Just a size report ignore
			next;
		}

		if( /\%RMS-E-PRV|insufficient privilege/i ){
			# A permissions error - skip the line
			next;
		}

		# Upper case is so ugly
		if( ! $lsparse'vms_keep_case ){
			tr/A-Z/a-z/;
		}

		# DISK$ANON:[ANONYMOUS.UNIX]
		if( /^([^:]+):\[([^\]+]+)\]\s*$/ ){
			# The directory name
			# Use the Unix convention of /'s in filenames not
			# .'s
			$currdir = '/' . $2;
			$currdir =~ s,\.,/,g;
			$currdir =~ s,/+,/,g;
			$currdir =~ s,^/$vms_strip,,;
			if( $currdir eq '' ){
				next;
			}
			$match = $currdir;
			$match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
#print ">>>match=$match currdir=$currdir\n";
			return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
		}
		
	# MultiNet FTP
	# DSPD.MAN;1  9   1-APR-1991 12:55 [SG,ROSENBLUM] (RWED,RWED,RE,RE)
	# CMU/VMS-IP FTP
	# [VMSSERV.FILES]ALARM.DIR;1      1/3          5-MAR-1993 18:09
		local( $dir, $file, $vers, $size, $lsdate, $got );
		$got = 0;
		# For now ignore user and mode
		if( /^((\S+);(\d+))?\s+(\d+)\s+(\d+-\S+-\d+\s+\d+:\d+)/ ){
			($file, $vers, $size, $lsdate) = ($2,$3,$4,$5);
			$got = 1;
		}
		elsif( /^(\[([^\]]+)\](\S+);(\d+))?\s+\d+\/\d+\s+(\d+-\S+-\d+\s+\d+:\d+)\s*$/ ){
			($dir,$file,$vers,$lsdate) = ($2,$3,$4,$5);
			$got = 1;
		}
		# The sizes mean nothing under unix...
		$size = 0;
		
		if( $got ){
			local( $time ) = &main'lstime_to_time( $lsdate );
			local( $type ) = 'f';
			local( $mode ) = 0444;

			# Handle wrapped long filenames
			if( $filename ne '' ){
				$file = $filename;
				$vers = $version;
				if( $directory ){
					$dir = $directory;
				}
			}
			if( defined( $dir ) ){
				$dir =~ s/\./\//g;
				$file = $dir . '/' . $file;
			}
			$filename = '';

			if( $file =~ /^(.*)\.dir(;\d+)?$/ ){
				if( ! $vms_keep_dotdir ){
					$file = $1 . $2;
				}
				$type = 'd';
				$mode = 0555;
			}

			$lsparse'vers = $vers;

#print "file=|$file| match=|$match| vms_strip=|$vms_strip|\n";
			$file =~ s,^,/,;
			$file =~ s,^/$match,,;
			if( ! defined( $dir ) ){
				$file = "$currdir/$file";
			}
			$file =~ s,^$vms_strip,,;
			$file =~ s,/+,/,g;
#print  "file=|$file|\n";
			return( substr( $file, 1 ), $size, $time, $type, $mode );
		}
		elsif( /^\[([^\]]+)\](\S+);(\d+)\s*$/ ){
			# If a filename is long then it is on a line by itself
			# with the details on the next line
			local( $d, $f, $v ) = ($1, $2, $3);
			$d =~ s/\./\//g;
			$directory = $d;
			$filename = $f;
			$version = $v;
		}
		elsif( /^(\S+);(\d+)\s*$/ ){
			# If a filename is long then it is on a line by itself
			# with the details on the next line
			$filename = $1;
			$version = $2;
		}
		else {
			printf( "Unmatched line: %s", $_ );
		}
	}
	return( '', 0, 0, 0, 0 );
}

# -----
1;
