#!/usr/bin/perl

unshift(@INC, "/netgen/www/comprehensive/quick250");
require 'should.pl';            # Should and auxilliary routines
require 'log.pl';               # Logging routines & badlink
require 'analyze.pl';           # Analyze document.  get_* & is_real...
require 'queue.pl';             # Qeueuing routines
require 'url.pl';               # get_url and auxilliary stuff
require '../src/util.pl';

@mylist = ();
$myn =$addn= 0; $| =1;
mkdir("/usr/tmp/quick.$$", 0777);

open(URL, ">/usr/tmp/quick.$$/0");
print URL $ARGV[0]."\n";
close(URL);

&logger'init;
&Init;

while($addn < 110000){
    if($#mylist >= 0){
	$url = shift(@mylist);
	print("checking memory for $url\n");
	next if (`grep -F -x $url /usr/tmp/quick.$$/memory` ne "");
	print("Is it a hash?\n");
	next if $url =~ /[#\$]/;
	$addn++;
	print("Getting $url\n");
	($hdrs, $doc) = &get_url($url);

	if($nkids > 6){
	    for $i (1..3){
		print("Waiting for a child to exit...\n");
		wait();
		print("...done\n");
		$nkids--;
	    }
	}

	print("Forking...\n");
	$pid = fork();
	if(!$pid){
	    &process_doc($url, $hdrs, $doc);
	    print("Done processing $url\n");
	    exit;
	}
	$nkids++;
    }
    else{
        $waited=0;
	&load_list($myn++,0);
    }
}

sub load_list {
    local($file, $waited) = @_;
    print "Loading $file\n";
    if(-e "/usr/tmp/quick.$$/$file"){
	open(FILE, "/usr/tmp/quick.$$/$file");
	while(<FILE>){
	    chop;
	    print("Putting $_ on list\n");
	    push(@mylist, $_);
	}
	print("Done.\n");
    }
    else{
	print "Waiting...(for the $waited th time) to load $file\n";
	sleep(5);
	if($waited > 5){
	    $waited=0;
	    $myn--;
	    $file--;
	}
	&load_list($file, $waited++);
    }
}

sub process_doc {
    local($url, $hdrs, $doc) = @_;

    print("Processing $url\n");
    @local_links = &get_links($doc);
    $title = &get_title($doc); $headers = '';
    $headers = &get_headers($doc);
    $beginning = &get_beginning($doc, $beg_def{$ancestors[0]});
    $types = &get_types($doc);
    $markup = &get_markup($doc);
    &log_visit($url, $title, $headers, $beginning, $types,
	       $#local_links, $#ancestors, $markup);

    print("Writing links ($#local_links)\n");
    $ppid = getppid();
    open(Q, ">>/usr/tmp/quick.$ppid/$addn") || warn "Hey, can't write $addn";
    foreach $ll (@local_links){
	$fullurl = &url'ConstructURL($url, $ll);
#	$fullurl =~ y/A-Z/a-z/;
	print Q $fullurl."\n";
	($site, $path, $port) = &url'ParseURL($fullurl);
	print("Adding $fullurl\n") if !$didsite{$site};
	&AddHost($site, $fullurl) if !$didsite{$site};
	$didsite{$site}=1;
    }
    close(Q);
}
