#!/mit/perl5/arch/sun4x_56/bin/perl5.00503
#
# /usr/athena/bin/perl
#
# 40foo finder.
#
# checks for dead links.
#
# 
use strict;
use HTML::Parser;
use HTML::LinkExtor;  
use LWP::UserAgent;
use HTTP::Request;
use URI::URL;
#
my $startpage = $ARGV[0];
print "looking at.. $startpage\n";
#
# install the callback:
my @links = ();
sub callback {
    my($tag, %attr) = @_;
# we only look closer at <img ...> and <a >
    return if (($tag ne 'img') &&
	       ($tag ne 'a') &&
	       ($tag ne 'IMG') && $tag ne 'A');  
    push(@links, values %attr);
}
# (it might be diffent from $url)
my $p = HTML::LinkExtor->new(\&callback);
$p->parse_file($startpage);
#
my $foo;
foreach $foo (@links) {
    print " ... $foo\n";
}
# still need to correct to absolute URLs.
my $base = $ENV{"PWD"};
$base =~ /\b.(\w+)$/;
$base = $&;
$base =~ s/\///;
$base = "http://www.mit.edu/" . $base . "/";
print "base $base $&\n";
@links = map { $_ = url($_, $base)->abs; } @links;
foreach $foo (@links) {
    print " ___ $foo\n";
}
my $anchor = " ";
my @bad_links = ();
foreach $anchor  (@links) {
    my $ua = new LWP::UserAgent;

    my $request = new HTTP::Request("GET",
				    $anchor);
    my $response = $ua->request($request);

    if (!$response->is_success) {
	push(@bad_links, $anchor);
	print "$anchor\n";
    }

}


 
