#!/usr/bin/perl

$path = "/mit/course-search/web_scripts/search/urls";
#$path = "/tmp/course/";

use CGI qw(:standard :cgi-lib);
use URI::Escape;
use Tie::IxHash;
use File::Spec::Functions;
use File::Path qw(make_path);
use File::Basename;

($arg1, $arg2) = split('/', param('coursenumber'));

$type = param('type');
#This part of the code support queries like "a/6.002" that won't send you straight to the saved result.
if($arg2 eq "") {
    $num = $arg1;
} else {
    $num = $arg2;
    if($arg1 eq 'a') { #For searches like a/6.002, return a list of all results, even if one has been saved.
	$type = 'Course Search';
    }
}
LOG($num);
LOG($ENV{'QUERY_STRING'});

if($type eq 'Course Search') {
    display_results($num);
}

redirect_if_known($num);
display_results($num);

sub get {
    my $url = @_[0];
    my $file = @_[1];
    LOG($file);

    my $pid = fork;
    if($pid == 0) {
	alarm(5);
	open(STDERR, ">/dev/null") or die $!;
	make_path(dirname($file));
	my $error = system(qw"wget --load-cookies cookie --timeout=2 --spider -nv", $url);
	if(!$error) {
	    $error = system(qw"wget --load-cookies cookie --timeout=2", $url, "-O", "$file.html", "-o", "$file.error");
	}
	if($error) {
	    if(-e "$file.html") {
		system(qw"rm", "$file.html"); # File must be outdated.
	    }
	    exit 17;
	}
	exit;
    }
    if($pid == -1) { #if the process fails to fork, e.g. there are already 1024 processes running or something
	print "ERROR. Try again in a minute.";
	LOG('ERROR forking ' . $url);
	exit;
    }
    $children{$url} = $pid;
}

sub LOG {
# Turned off logging because it was taking up space.
#    my ($string) = @_;
#    open(LOG, ">>", catfile($path, "LOG"));
#    print LOG "$string\n";
#    close(LOG);
}
sub redirect_if_known {
    my ($query) = @_;
    LOG('redirect_if_know ' . $query);
    my $file = catfile($path, 'save', uri_escape(lc($query)));
    LOG($file);
    if(-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	print redirect(-uri => $line, -status => 303);
	exit;
    }
}

sub display_results {
    my ($num) = @_;
    $numresults = 0;
    %numbers = ();
    %numbersJ = ();
    %urls = ();

    print header();
    print start_html(-title => 'course.mit.edu');
    print '<form action="http://course.mit.edu/save.pl"
      method="post">';
    list_saved_current_if_exists($num);
    # The below list is appended to by a semesterly cron job. Be careful if editing it!
    propose_old_by_term_if_exists($num, 'sp11', 'Spring 2011');
    propose_old_by_term_if_exists($num, 'fa10', 'Fall 2010');
    propose_old_by_term_if_exists($num, 'sp10', 'Spring 2010');
    propose_old_by_term_if_exists($num, 'fa09', 'Fall 2009');
    propose_old_by_term_if_exists($num, 'sp09', 'Spring 2009');
    propose_old_by_term_if_exists($num, 'fa08', 'Fall 2008');
    propose_old_by_term_if_exists($num, 'sp08', 'Spring 2008');
    propose_old_by_term_if_exists($num, 'fa07', 'Fall 2007');
    propose_old_by_term_if_exists($num, 'sp07', 'Spring 2007');
    
    %children = ();
    
    $url = "http://student.mit.edu/catalog/search.cgi?search=" . uri_escape(lc($num));
    $file = catfile($path, "cat/" . uri_escape(lc($num)));
    make_path(dirname($file));    
    $error = system(qw"wget --load-cookies cookie -nv", $url, "-O", "$file.html", "-o", "$file.error");
    
    #first, look in the course catalog for any possible eqivalent numbers
    if(!$error) {
	open(CATALOG, "$file.html");
	@data = <CATALOG>;
	close(CATALOG);
	foreach $line (@data) {
	    if($line =~ m/<p><b(.*)\/b>/) {
		$full = $1;
		if($full =~ m/>(.*?)J?</) {#strips off rest of tags, and any trailing J
		    $numbers{$1} = 1 if(!($1 =~ m/.*\..*\..*/)); #sometimes a range of special class numbers.
		}
	    }
	    if($line =~ m/<br>\(Same subject as (.*)\)|<br>\(Subject meets with (.*)\)|<br>\(Offered under: (.*)\)/) {
		@links = split(/\, |<br>/,$1 . $2 . $3); #splits on either commas or "<br>"
		foreach $link (@links) {
		    if($link =~ m/>(.*?)J?</) {#strips off the link, and any trailing J
			$numbers{$1} = 1;
		    }
		}
	    }
	    if($line =~ m/<br>URL: <a href\=\"(.*)\">/) {#urls in the course catalog are often wrong, so we try them as a last resort.
		push(@last_resort,$1);
		LOG("last resort: $1");
	    }
	}
	$numbers{$num} = 1;
    }

    #some websites include J after the course number
    foreach $number (keys %numbers) {
	$numbersJ{$number} = 1;
	$numbersJ{$number . "J"} = 1;
    }
    foreach $number (keys %numbersJ) {
	@number = split(/\./, $number);
	tie %sites, "Tie::IxHash"; #keeps %sites in the order I put them
	%sites = ("web" => "http://web.mit.edu/$number", 
		  "www" => "http://web.mit.edu/$number/www", 
		  # The below line is modified by a semesterly cron job. Be careful if editing it!
		  "stellar/fa11" => "http://stellar.mit.edu/S/course/$number[0]/fa11/$number/index.html",
		  "theory.csail" => "http://theory.csail.mit.edu/classes/$number", 
		  "courses.csail" => "http://courses.csail.mit.edu/$number", 
		  "math" => "http://www-math.mit.edu/$number");
	foreach $site (keys(%sites)) {
	    get($sites{$site}, catfile($path, "$site/" . uri_escape(lc($number))));
	}
	foreach $site (keys(%sites)) {
	    propose_url_if_good($sites{$site}, catfile($path, "$site/" . uri_escape(lc($number))));
	}
    }
    for $url (@last_resort) {
	get($url, catfile($path, "catalog/" . uri_escape(lc($num))));
    }
    for $url (@last_resort) {
	propose_url_if_good($url, catfile($path, "catalog/" . uri_escape(lc($num))));
    }
    print '</form>';

    # The below line is modified by a semesterly cron job. Be careful if editing it!
    @old_terms = ("sp11", "fa10", "sp10", "fa09", "sp09", "fa08", "sp08", "fa07", "sp07", "fa06", "sp06", "fa05", "sp05", "fa04", "sp04", "fa03", "sp03", "fa02");
    for $term (@old_terms) {
	for $number (keys %numbersJ) {
	    @number = split(/\./, $number);
	    get("http://stellar.mit.edu/S/course/@number[0]/$term/$number/index.html", catfile($path, "stellar/$term/" . uri_escape(lc($number))));
	}
    }
    for $term (@old_terms) {
	for $number (keys %numbersJ) {
	    @number = split(/\./, $number);
	    list_url_if_good("http://stellar.mit.edu/S/course/@number[0]/$term/$number/index.html", catfile($path, "stellar/$term/" . uri_escape(lc($number))));
	}
    }

    list_ocw_if_exists();
    
    print "<p>$numresults Results Found</p>";
    print '<p>If the correct webpage for the class was not found in the search results, click <a href="https://scripts-cert.mit.edu/~course-search/search/auth/save.pl">here</a> to add a new link using MIT Certificates.</p>';
    print '<hr />
<p>Send questions and comments to course@mit.edu</p>
<p><a href="http://scripts.mit.edu">
<img alt="powered by scripts.mit.edu"
src="http://scripts.mit.edu/media/powered_by.gif" /></a></p>';
    print end_html();
    exit();
}

sub list_ocw_if_exists {
    my $url = 'http://ocw.mit.edu/OcwWeb/Global/all-courses.htm';
    my $file = catfile($path, 'ocw');
    
    get($url, $file);
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    if(-e "$file.html") {
 	open(PAGE, "$file.html");
 	@data = <PAGE>;
 	close(PAGE);
 	for $line (@data) {
 	    for $number (keys %numbersJ) {
		$number = lc($number);
		$number =~ s/\./-/;
 		if($line =~ m/.*href=\"(.*\/$number-.*)\">.*/) {
 		    list_url("http://ocw.mit.edu$1");
 		}
 	    }
 	}
    }
}

sub propose_old_by_term_if_exists {
    my ($query, $term, $long_name) = @_;
    LOG('propose_old_by_term_if_exists ' . $term . " " . $query);
    my $file = catfile("$path-$term", "save", uri_escape(lc($query)));
    if (-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	propose_url($line, ' This URL was saved in ' . $long_name . '.');
    }
}

sub list_saved_current_if_exists {
    my ($query) = @_;
    LOG('list_saved_current_if_exists ' . $query);
    my $file = catfile($path, 'save', uri_escape(lc($query)));
    if (-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	list_url($line, ' This URL is currently the default.');
    }
}

#Proposing a URL means that the URL is listed, and a button allowing it to be saved is added.
sub propose_url_if_good { 
    my ($url, $file) = @_;

    my $original_url = $url;
    
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    my $error = 0;

    if(-e "$file.html")
    {
	open(PAGE, "$file.html");
	@data = <PAGE>;
	close(PAGE);
	if($#data>3) {
	    if(@data[3] =~ m/  <TITLE>Index of .*<\/TITLE>/) {
		$error = 17;
	    }
	    if(@data[3] =~ m/.*<title>stellar error.*<\/title>.*/) {
		$error = 17;
	    }
	}
	open(PAGE, "$file.error");
	@data = <PAGE>;
	close(PAGE);
	foreach $line (@data) {
	    if($line =~ m/Location: (.*) \[following\]/) {
		$url = $1;
# 		if($original_url eq $saved_url) {
# 		    LOG('overwriting ' . $original_url);
# 		    LOG('overwriting ' . $saved_url);
# 		    LOG('overwriting with ' . $url);
# 		    my $file = '/mit/course-search/web_scripts/search/urls/save/' . $num;
# 		    unlink($file);
# 		    open(FILE, "> $file");
# 		    print FILE $url;
# 		    close(FILE);
#		}
	    }
	}
	propose_url($url) if(!$error);
    }
}

#Listing a URL just shows the URL, but does not allow it to be saved (used for old Stellar websites).
sub list_url_if_good {
    my $url = @_[0];
    my $file = @_[1];
    
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    my $error = 0;

    if(-e "$file.html") {
	open(PAGE, "$file.html");
	@data = <PAGE>;
	close(PAGE);
	if($#data>3) {
	    if(@data[3] =~ m/  <TITLE>Index of .*<\/TITLE>/) {
		$error = 17;
	    }
	    if(@data[3] =~ m/.*<title>stellar error.*<\/title>.*/) {
		$error = 17;
	    }
	}
	list_url($url) if(!$error);
    }
}

sub propose_url {
    my ($location, $text) = @_;
    list_url($location, input({-type => 'submit', -name => $num, -value => "Save $location"}) . $text);
}

sub list_url {
    my ($location, $text) = @_;
    return if $urls{$location};
    $urls{$location} = 1;
    LOG($location);
    print p(a({-href => $location}, escapeHTML($location)), ' ', $text);
    $numresults++;
}
