#! /usr/local/bin/perl
#
# @(#)url_get.pl	1.7 7/19/94
# @(#)url_get.pl	1.7 /tmp_mnt/home/ccix/u16/graphics/staff/zippy/perl/url_get/SCCS/s.url_get.pl
#
# url_get.pl      --- get a document given a WWW URL
#
# Modified by Jack Lund 7/19/94 to add functionality and deal with HTTP
# 1.0 headers
#
# Hacked by Stephane Bortzmeyer <bortzmeyer@cnam.cnam.fr> to add "ftp" URLs.
# 22 Jan 1994
#
# Jack Lund 9/3/93 zippy@ccwf.cc.utexas.edu
#
# from hget by:
# Oscar Nierstrasz 26/8/93 oscar@cui.unige.ch
#
# Syntax:
#
# &url_get($url, [$file])
#
# $url - URL of document you want
#
# $file - optional file you want it put into. Specify "&STDOUT" if you
#         want it to go to stdout; Leave this off if you want url_get to
#         return the document as one (possibly VERY LARGE) string
########################################################################

$home = $ENV{"HOME"};

require "our-chat2.pl";
require "URL.pl";
require "ftplib.pl";

sub url_get {
    local($url, $file) = @_;
    local($loseheader) = ($opt_h ? 0 : 1);
    local($debug) = ($opt_d ? 1 : 0);

    ($protocol, $host, $port, $rest1, $rest2, $rest3) = &url'parse_url($url);

# Convert any characters in the string specified in hex by "%xx" to
# the correct character. Note we do this *after* parsing the URL!

    $rest1 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge;

    if ($protocol eq "http") {
	return &url_get'http_get($host,$port,$rest1,$loseheader,$debug,$file);
    }

    if ($protocol eq "gopher") {

# Convert from hex. See above.

	$rest2 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge if ($rest2);
	$rest3 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge if ($rest3);

	return &url_get'gopher_get($host, $port, $rest1, $rest2, $rest3, $file);
    }

    if ($protocol eq "file" || $protocol eq "ftp") {
	return &url_get'file_get($host, $port, $rest1, $file);
    }

    if ($protocol eq "news") {
	return &url_get'news_get($host, $port, $rest1, $file);
    }

    die "Protocol $protocol not supported!\n";
}

package url_get;     # Everything after this is "private"

sub http_get {
    local($host,$port,$request,$loseheader,$debug,$file) = @_;
    local($output) = "";

    if ($file) { open(OUT, ">$file") || die "Error opening output file $file: $!\n"; }

    $handle = &chat'open_port($host, $port);
    if (!defined($handle)) {
        if ($! && $! != "") {
            die "Error opening port $port on $host: $!\n";
        } else {
            die "Host not found: $host\n";
        }
    }
    &chat'print($handle,"GET $request HTTP/1.0\r\n\r\n")
        || die "Error sending GET request: $!\n";
    *S = *chat'S;
    $_ = <S>;
    if (m#^HTTP/\d+\.\d+ (\d\d\d) (.+)$#) {
        $status = $1;
        $reason = $2;
        if (! $debug && $status > 299) {
            warn "Error returned from server: $status $reason\n";
        }
        if ($debug) {
            warn "$_";
            while (<S>) {
                last if (/^\s*$/);
                warn "$_";
            }
        }
        elsif ($loseheader) {
            while (<S>) {
                last if (/^\s*$/);
                if (! /^[a-zA-Z\-]+: /) {
                    warn "Bad MIME header line: $_";
                }
            }
        }
        else {
	    if ($file) { print OUT $_; }
            else { $output .= $_; }
        }
    }
    else {
	if ($file) { print OUT $_; }
        else { $output .= $_; }
    }
    while (<S>) {
	if ($file) { print OUT $_; }
        else { $output .= $_; }
    }
    &chat'close($handle);
    close(OUT) if ($file);
    return($output) unless ($file);
}

sub gopher_get {
    local($host,$port,$gtype,$selector,$search,$file) = @_;
    local($bintypes) = "59sgI";       # Binary gopher types
    local($goodtypes) = "01579sghI";  # types we can handle
    local($output) = "";

    if ($file) { open(OUT, ">$file") || die "Error opening output file $file: $!\n"; }
    $request = ($search ? "$selector\t$search\t$" : $selector);
    ($handle = &chat'open_port($host, $port))
        || die "chat'open($host,$port): $!\n";
    &chat'print($handle,"$request\n")
        || die "chat'print($request): $!\n";
	*S = *chat'S;

    if (index($goodtypes, $gtype) == -1) {
	die "Can't retrieve gopher type $gtype\n";
    }

# If this is a binary document, retreive it using sysreads rather
# than <S>

    if (index($bintypes, $gtype) > -1) {
	$done = 0;
	$rmask = "";
	vec($rmask,fileno(S),1) = 1;
	do {
	    ($nfound, $rmask) =
		select($rmask, undef, undef, $timeout);
	    if ($nfound) {
		$nread = sysread(S, $thisbuf, 1024);
		if ($nread > 0) {
                    $output .= $thisbuf;
                    if ($file)
                    {
		        syswrite(OUT, $thisbuf, $nread)
                            || die "Syswrite: $!\n";
                    } else {
                        $output .= $thisbuf;
                    }
		} else {
		    $done++;
		}
	    } else {
		warn "Timeout\n"; $done++;
	    }
	} until $done;
    }

# This is an ASCII document, and we can get it line-by-line using <S>

    else {
	while (<S>) {
	    last if (/^\.\r\n$/);
	    chop; chop;
            if ($file) { print OUT "$_\n"; }
            else { $output .= "$_\n"; }
	}
    }
    &chat'close($handle);
    close(OUT) if ($file);
    return($output) unless ($file);
}

sub file_get {
    local($host, $port, $path, $file) = @_;
    local($error);
    local($output) = "";

    $localhost = `hostname`;
    if ($host eq $localhost && !defined($port)) {
	open(IN, $path) || die "$path: $!\n";
	$binary = (-B IN ? 1 : 0);
	if ($file) { open(OUT, ">$file") || die "Error opening output file $file: $!\n"; }
	if ($binary)
	{
	    $done = 0;
	    $rmask = "";
	    vec($rmask,fileno(S),1) = 1;
	    do {
		($nfound, $rmask) =
		    select($rmask, undef, undef, $timeout);
		if ($nfound) {
		    $nread = sysread(S, $thisbuf, 1024);
		    if ($nread > 0) {
		        if ($file)
                        {
                            syswrite(OUT, $thisbuf, $nread)
                                || die "Syswrite: $!\n";
                        } else { $output .= $thisbuf; }
		    } else {
			$done++;
		    }
		} else {
		    warn "Timeout\n"; $done++;
		}
	    } until $done;
	}
	else
	{
	    while (<IN>) {
		if ($file) { print OUT "$_\n"; }
		else { $output .= "$_\n"; }
	    }
	}
	close(IN);
	close(OUT) if ($file);
    }
    else {
	&ftp'open($host) || die "Unable to open ftp connection to $host\n";
        if ($opt_b && ! &ftp'type("I")) {
            $error=&ftp'error;
            die "$error\n";
        }
        if ($file) { &ftp'get_file($path, $file)
	    || die "Unable to get file $path from $host\n"; }
	else { $output = &ftp'get($path)
	    || die "Unable to get file $path from $host\n"; }
	&ftp'close;
    }
    return($output) unless ($file);
}

sub news_get {
    local($host, $port, $article) = @_;
    local($output) = "";

    if ($file) { open(OUT, ">$file") || die "Error opening output file $file: $!\n"; }
    ($handle = &chat'open_port($host, $port))
        || die "chat'open($host,$port): $!\n";

    if ($article =~ /^[^<].+@.+[^>]$/) {
	$request = "article <$article>";
    }
    elsif ($article =~ /^<.+@.+>$/) {
	$request = "article $article";
    }
    elsif ($article =~ /^\*$/) {
	die "Only support URLs of the form: news:article\n";
    }
    elsif ($article) {
	die "Only support URLs of the form: news:article\n";
    }
    else {
	die "Bad url\n";
    }

# Read NNTP Connect message

    *S = *chat'S;
    $string = <S>;
    $string =~ /^(\d*) (.*)$/;
    die "NNTP Error: $2\n" unless ($1 eq '200');

# Send request

    &chat'print($handle,"$request\r\n")
        || die "chat'print($request): $!\n";

# Read reply message

    $string = <S>;
    $string =~ /^(\d*) (.*)$/;
    die "NNTP Error: $2\n" unless ($1 eq '220');

# Get article

    while (<S>) {
	last if (/^\.\r\n$/);
	chop; chop;
        if ($file) { print OUT "$_\n"; }
        else { $output .= "$_\n"; }
    }
    &chat'print($handle,"quit\n")
        || die "chat'print(quit): $!\n";
    &chat'close($handle);
    close(OUT) if ($file);
    return($output) unless ($file);
}
