#!/usr/bin/perl

unshift(@INC, "/netgen/www/comprehensive/quick250");
$debug = 0;
#require 'sys/socket.ph';
#require "fcntl.ph";

sub  AF_INET {2; }
sub  SOCK_STREAM { 2;}

require 'should.pl';            # Should and auxilliary routines
require 'log.pl';               # Logging routines & badlink
require 'analyze.pl';           # Analyze document.  get_* & is_real...
require 'queue.pl';             # Qeueuing routines
require 'url.pl';               # get_url and auxilliary stuff
require '../src/util.pl';
$proto = ((getprotobyname("tcp"))[2] || 0);
$|=1;
$addhowmanypages = 25000;

@mylist = ();
$myn =$addn= 0; $| =1;
mkdir("/usr/tmp/quick.$$", 0777);

open(URL, ">/usr/tmp/quick.$$/0");
print URL $ARGV[0]."\n";
close(URL);

&logger'init;
&Init;



while($addn < $addhowmanypages){
    print("MAINLOOP: Added $addn pages, proceeding\n");
    if($#mylist >= 0){
	print("MAINLOOP: Starting cleanup of list (Currently $#mylist items)\n");
	@newlist = grep(!`grep -F -x $_ /usr/tmp/quick.$$/memory`, @mylist);
	print("MAINLOOP: Done with list cleanup (Now $#newlist items)\n");
	&do_urls(@newlist);
	print("MAINLOOP: Done fetching and logging URLs\n");
    }
    print("MAINLOOP: Fetching list $myn\n");
    &load_list($myn++);
    print("MAINLOOP: Done fetching list $myn (Done with $addn pages)\n");
}
 
sub do_doc {
    local($url, $hdrs, $doc) = @_;
    $addn++;
    if($nkids > 6){
	for $i (1..3){
	    print("Waiting for a child to exit...\n");
	    wait();
	    print("...done\n");
	    $nkids--;
	}
    }
    
    print("Forking...\n");
    $pid = fork();
    if(!$pid){
	&process_doc($url, $hdrs, $doc);
	print("Done processing $url\n");
	exit;
    }
    $nkids++;
}

sub load_list {
    local($file) = @_;
    print "Waiting to load $file\n";
    sleep(5);
    print "Loading $file\n";
    if(-e "/usr/tmp/quick.$$/$file"){
        open(FILE, "/usr/tmp/quick.$$/$file");
        while(<FILE>){
            chop;
            print("Putting $_ on list\n");
            push(@mylist, $_);
        }
        print("Done.\n");
    }
    else{
        print "Waiting...\n";
        sleep(5);
        &load_list($file);
    }
}

sub process_doc {
    local($url, $hdrs, $doc) = @_;

    print("Processing $url\n");
    @local_links = &get_links($doc);
    $title = &get_title($doc); $headers = '';
    $headers = &get_headers($doc);
    $beginning = &get_beginning($doc, $beg_def{$ancestors[0]});
    $types = &get_types($doc);
    $markup = &get_markup($doc);
    &log_visit($url, $title, $headers, $beginning, $types,
               $#local_links, $#ancestors, $markup);

    print("Writing links ($#local_links)\n");
    $ppid = getppid();
    open(Q, ">>/usr/tmp/quick.$ppid/$addn") || warn "Hey, can't write $addn";
    foreach $ll (@local_links){
	next if $ll=~/[^\w\_\d\-\.\/\:\~]/;
        $fullurl = &url'ConstructURL($url, $ll);
        $fullurl =~ y/A-Z/a-z/;
        print Q $fullurl."\n";
        ($site, $path, $port) = &url'ParseURL($fullurl);
        print("Adding $fullurl\n") if !$didsite{$site};
        &AddHost($site, $fullurl) if !$didsite{$site};
        $didsite{$site}=1;
    }
    close(Q);
}


sub generate_socket {
    local($fhandle)=@_;
    socket($fhandle, &AF_INET, &SOCK_STREAM, $proto) ||
	die "socket: $!\n";
#    fcntl($fhandle, &F_GETFL, $tmp='') ||
#	die "fcntl(\&F_GETFL) failed: $!";
#    fcntl($fhandle, &F_SETFL, $tmp | &FNDELAY) ||
#	die "fcntl(\&F_SETFL) failed: $!";
}
sub do_urls {
    local(@urls)=@_;
    $donenum = $#urls+1;
    $sockaddr = 'S n a4 x8';
    $allsockets = 40;
    $usedsockets = 0;
    $sockname = "SOCKAAA";

    print("Doing run of $donenum urls\n");
    while($donecount < $donenum){

	# First, make sure all available sockets are open
	while(($usedsockets < $allsockets) && ($#urls >=0)){
	    $url = shift(@urls);
	    ($host, $path, $port) = &url'ParseURL($url);
	    ($host) = (gethostbyname($host))[4];
	    &generate_socket($sockname);
	    $urlsock{$sockname} = $url;
	    $hostsock{$sockname} = $host;
	    $portsock{$sockname} = $port;
	    $pathsock{$sockname} = $path;
	    print("Connecting $sockname\n");
	    $result=connect($sockname, pack($sockaddr, &AF_INET, $port, $host));
	    print("Connected $sockname\n");
	    warn("Connect: $result ($!)\n") if $debug;
	    $sockfileno{fileno($sockname)} = $sockname;
	    $fn = fileno($sockname);
	    warn("Socket ($sockname)[$fn] for $url generated\n") if $debug;
	    vec($win, fileno($sockname), 1) = 1;
	    vec($rin, fileno($sockname), 1) = 1;
	    
	    $sockname++;
	    $usedsockets++;
	}
	
	# See who is ready for writing and if we have to tell them anything
#	print("Checking for writability...\n");
	if(select(undef, $wout=$win, undef, 0)){
	    $woutbits = unpack("b*", $wout);
	    $winbits = unpack("b*", $win);
	    while(($idx = index($woutbits, "1", $idx+1)) !=-1){
		# Write to it
		$fh = $sockfileno{$idx};
		if(!getpeername($fh)){
		    warn("Getpeername undefined\n") if $debug;
		    $err = getsockopt($fh, &SOL_SOCKET, &SO_ERROR), "\n";
		    warn("$!\n");
		    $! = $err;
		    warn(0+$!, "$!") if $debug;
		}
		else{
		    if(!$wrote{$idx}){
			warn("Ready to write: $woutbits ($winbits)\n") if $debug;
			$sent = send($fh,"GET $pathsock{$sockfileno{$idx}} HTTP/1.0\nUser-Agent: Wander by Matthew Gray <mkgray@netgen.com>  quickwww/1.0\r\n\r\n",0);
			warn("Sent: $sent ($!)\n") if $debug;
			$wrote{$idx}=1;
			warn("Sent request on $fh ($idx);\n") if $debug;
		    }
		}
	    }
	}
	
	# Now, read anything there is to read.
#	print("Checking for readability\n");
	$idx = 0;
	if(select($rout=$rin, undef, undef, 0)){
	    $routbits = unpack("b*", $rout);
	    $rinbits = unpack("b*", $rin);
	    while(($idx = index($routbits, "1", $idx+1)) !=-1){
#		if($wrote{$idx}){
		    
		    warn("Reading ($routbits) [$rinbits]\n") if $debug;
		    $fh = $sockfileno{$idx};
		    $nread = read($fh, $outtemp,512);
		    $output{$idx} .= $outtemp;
		    if(!$nread){
			&close_connection($idx);
#		    }
		}
	    }
	}
    }
}


# Things to reset on close:
# hostsock, portsock, pathsock, sockfileno, wrote, rin, win, output

sub close_connection {
    local($fno) = @_;

    $fh = $sockfileno{$fno};
    $usedsockets--;
    $host = $hostsock{$fh};
    $port = $portsock{$fh};
    $path = $pathsock{$fh};
    $url = $urlsock{$fh};
    undef $hostsock{$fh};
    undef $portsock{$fh};
    undef $pathsock{$fh};
    undef $urlsock{$fh};
    undef $wrote{$fno};
    vec($win, $fno, 1) = 0;
    vec($rin, $fno, 1) = 0;
    undef $sockfileno{$fno};

    ($head, @body) = split("\n\n", $output{$fno});
    $body= join("\n\n", @body);
    undef $output{$fno};
    &do_doc($url, $head, $body);
    $donecount++;
    print("Done with $donecount documents (out of $donenum)\n");
    close($fh);
}
