#
# URL.pl - package to parse WWW URLs
#
# @(#)URL.pl	1.4 3/24/94
# @(#)URL.pl	1.4 /tmp_mnt/home/ccix/u16/graphics/staff/zippy/perl/SCCS/s.URL.pl
#
# Hacked by Stephane Bortzmeyer <bortzmeyer@cnam.cnam.fr> to add support
# for empty paths in URLs and to accept dashes in host names. 22 Jan 1994
#
# Jack Lund 9/3/93 zippy@ccwf.cc.utexas.edu
#

package url;

# Default port numbers for URL services

$ftp_port = 21;
$http_port = 80;
$gopher_port = 70;
$telnet_port = 23;
$wais_port = 210;
$news_port = 119;

# syntax: &url'parse_url(URL)
# returns array containing following:
# 	protocol	protocol string from url. ex: "gopher", "http".
#	host		host that specified protocol server is running on
#	port		port that server answers on
# the rest of the array is protocol-dependant. See code for details.
#

sub parse_url {
    local($url) = @_;

    if ($url =~ m#^(\w+):#) {
	$1 =~ tr/A-Z/a-z/;
	$protocol = $1;
    } else {
	return undef;
    }

    if ($protocol eq "file" || $protocol eq "ftp") {

# URL of type: file://hostname[:port]/path

	if ($url =~ m#^\s*\w+://([^ \t/:]+):?(\d*)(/.*)$#) {
	    $1 =~ tr/A-Z/a-z/;
	    $host = $1;
	    $port = ($2 ne "" ? $2 : $ftp_port);
	    $path = $3;
	    return ($protocol, $host, $port, $path);
	}

# URL of type: file:/path

	if ($url =~ m#^\s*\w+:(/.*)$#) {
	    $host = `hostname`;  # Current host
	    $port = undef;
	    return ($protocol, $host, $port, $1);
	}
	return undef;
    }

    if ($protocol eq "news") {

# URL of type: news://host[:port]/article

	if ($url =~ m#^\s*\w+://([^ \t:/]):?(\d*)/(.*)$#) {
	    $host = $1;
	    $port = ($2 ne "" ? $2 : $news_port);
	    $selector = $3;
	}

# URL of type: news:article

	elsif ($url =~ m#^\s*\w+:(.*)$#) {
	    $host = $ENV{"NNTPSERVER"};
	    unless ($host) {
		warn "Couldn't get NNTP server name\n";
		return undef;
	    }
	    $port = $news_port;
	    $selector = $1;
	}
	else {
	    return undef;
	}
	return ($protocol, $host, $port, $selector);
    }

# URL of type: http://host[:port]/path[?search-string]

    if ($protocol eq "http") {
	if ($url =~ m#^\s*\w+://([\w-\.]+):?(\d*)(/[^ \t\?]*)\??(.)*$#) {
	    $1 =~ tr/A-Z/a-z/;
	    $server = $1;
	    $port = ($2 ne "" ? $2 : $http_port);
	    $path = $3;
	    $search = $4;
	    return ($protocol, $server, $port, $path, $search);
	}
	return undef;
    }

# URL of type: telnet://user@host[:port]

    if ($protocol eq "telnet") {
	if ($url =~ m#^\s*\w+://([^@]+)@([^: \t]+):?(\d*)$#) {
	    $user = $1;
	    $2 =~ tr/A-Z/a-z/;
	    $host = $2;
	    $port = (defined($3) ? $3 : $telnet_port);
	    return($protocol, $host, $port, $user);
	}

# URL of type: telnet://host[:port]

	if ($url =~ m#^\s*\w+://([^: \t]+):?(\d*)$#) {
	    $1 =~ tr/A-Z/a-z/;
	    $host = $1;
	    $port = (defined($2) ? $2 : $telnet_port);
	    return($protocol, $host, $port);
	}
	return undef;
    }

# URL of type: gopher://host[:port]/[gtype]selector-string[?search-string]

    if ($protocol eq "gopher") {
	if ($url =~ m#^\s*\w+://([\w-\.]+):?(\d*)/(\w?)([^ \t\?]*)\??(.*)$#) {
	    $1 =~ tr/A-Z/a-z/;
	    $server = $1;
	    $port = ($2 ne "" ? $2 : $gopher_port);
	    $gtype = ($3 ne "" ? $3 : 1);
	    $selector = $4;
	    $search = $5;
	    return ($protocol, $server, $port, $gtype, $selector, $search);
	}
	return undef;
    }

# URL of type: wais://host[:port]/database?search-string

    if ($protocol eq "wais") {
	if ($url =~ m#^\s\w+://([\w-\.]+):?(\d*)/([^\?]+)\??(.*)$#) {
	    $1 =~ tr/A-Z/a-z/;
	    $server = $1;
	    $port = (defined($2) ? $2 : $wais_port);
	    $database = $3;
	    $search = $4;
	    return ($protocol, $server, $port, $database, $search);
	}
	return undef;
    }
}
