#!/usr/bin/perl

$path = "/mit/course-search/web_scripts/search/urls";
#$path = "/tmp/course/";

use CGI qw(:standard :cgi-lib);
use URI::Escape;
use Tie::IxHash;
use File::Spec::Functions;
use File::Path qw(make_path);
use File::Basename;

($arg1, $arg2) = split('/', param('coursenumber'));

$type = param('type');
#This part of the code support queries like "a/6.002" that won't send you straight to the saved result.
if($arg2 eq "") {
    $num = $arg1;
} else {
    $num = $arg2;
    if($arg1 eq 'a') { #For searches like a/6.002, return a list of all results, even if one has been saved.
	$type = 'Course Search';
    }
}
LOG($num);
LOG($ENV{'QUERY_STRING'});

if ($num =~ /^[\w.]*$/) {
    if ($type eq 'Course Search') {
        display_results($num);
    } else {
        redirect_or_link_to_search($num);
    }
} else {
    print header();
    print start_html(-title => 'course.mit.edu');
    print '<p>Invalid course number.</p>';
    print '<a href="http://course.mit.edu">Return to search</a>';
    print end_html();
}

sub get {
    my $url = @_[0];
    my $file = @_[1];
    LOG($file);

    my $pid = fork;
    if($pid == 0) {
	alarm(5);
	open(STDERR, ">/dev/null") or die $!;
	make_path(dirname($file));
        my @common = (qw"wget --load-cookies cookie -U", "Wget/1.16.1 (course.mit.edu)", qw"--timeout=2");
        push @common, "--header", "X-Forwarded-For: ".remote_addr();
	my $error = system(@common, qw"--spider -nv", $url);
	if(!$error) {
	    $error = system(@common, $url, "-O", "$file.html", "-o", "$file.error");
	}
	if($error) {
	    if(-e "$file.html") {
		system(qw"rm", "$file.html"); # File must be outdated.
	    }
	    exit 17;
	}
	exit;
    }
    if($pid == -1) { #if the process fails to fork, e.g. there are already 1024 processes running or something
	print "ERROR. Try again in a minute.";
	LOG('ERROR forking ' . $url);
	exit;
    }
    $children{$url} = $pid;
}

sub LOG {
# Turned off logging because it was taking up space.
#    my ($string) = @_;
#    open(LOG, ">>", catfile($path, "LOG"));
#    print LOG "$string\n";
#    close(LOG);
}

sub get_terms { return ("fa25", "sp25", "fa24", "sp24", "fa23", "sp23", "fa22", "sp22", "fa21", "sp21", "fa20", "sp20", "fa19", "sp19", "fa18", "sp18", "fa17", "sp17", "fa16", "sp16", "fa15", "sp15", "fa14", "sp14", "fa13", "sp13", "fa12", "sp12", "fa11", "sp11", "fa10", "sp10", "fa09", "sp09", "fa08", "sp08", "fa07", "sp07", "fa06", "sp06", "fa05", "sp05", "fa04", "sp04", "fa03", "sp03", "fa02"); }

sub to_long_term {
     my ($term) = @_;
     $term =~ s/fa/Fall 20/g;
     $term =~ s/sp/Spring 20/g;
     return $term
}

sub redirect_or_link_to_search {
    redirect_if_known(@_);
    link_to_search(@_);
}

sub redirect_if_known {
    my ($query) = @_;
    LOG('redirect_if_know ' . $query);
    my $file = catfile($path, 'save', uri_escape(lc($query)));
    LOG($file);
    if(-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	print redirect(-uri => $line, -status => 303);
	exit;
    }
}

sub link_to_search() {
    my ($query) = @_;
    print header();
    print start_html(-title => 'course.mit.edu');
    $search_url = "http://course.mit.edu/a/$query";
    print '<p>No result saved. To search a number of MIT websites, click <a href="' . $search_url . '">here</a>.</p>';
    print '<p>If you already know the correct page for this semester, click <a href="https://scripts-cert.mit.edu/~course-search/search/auth/save.pl">here</a> to add a new link using MIT Certificates.</p>';
    print '<hr />
<p>Send questions and comments to course@mit.edu</p>
<p><a href="http://scripts.mit.edu">
<img alt="powered by scripts.mit.edu"
src="http://scripts.mit.edu/media/powered_by.gif" /></a></p>';
    print end_html();
    exit();
}

sub display_results {
    my ($num) = @_;
    $numresults = 0;
    %numbers = ();
    %numbersJ = ();
    %urls = ();

    print header();
    print start_html(-title => 'course.mit.edu');
    print '<form action="http://course.mit.edu/save.pl"
      method="post">';
    list_saved_current_if_exists($num);
    my @terms = get_terms();
    foreach my $term (@terms) {
        propose_old_by_term_if_exists($num, $term, to_long_term($term));
    }
    
    %children = ();
    
    $url = "http://student.mit.edu/catalog/search.cgi?search=" . uri_escape(lc($num));
    $file = catfile($path, "cat/" . uri_escape(lc($num)));
    make_path(dirname($file));    
    $error = system(qw"wget --load-cookies cookie -nv", $url, "-O", "$file.html", "-o", "$file.error");
    
    #first, look in the course catalog for any possible eqivalent numbers
    if(!$error) {
	open(CATALOG, "$file.html");
	@data = <CATALOG>;
	close(CATALOG);
	foreach $line (@data) {
	    if($line =~ m/<p><b(.*)\/b>/) {
		$full = $1;
		if($full =~ m/>(.*?)J?</) {#strips off rest of tags, and any trailing J
		    $numbers{$1} = 1 if(!($1 =~ m/.*\..*\..*/)); #sometimes a range of special class numbers.
		}
	    }
	    if($line =~ m/<br>\(Same subject as (.*)\)|<br>\(Subject meets with (.*)\)|<br>\(Offered under: (.*)\)/) {
		@links = split(/\, |<br>/,$1 . $2 . $3); #splits on either commas or "<br>"
		foreach $link (@links) {
		    if($link =~ m/>(.*?)J?</) {#strips off the link, and any trailing J
			$numbers{$1} = 1;
		    }
		}
	    }
	    if($line =~ m/<br>URL: <a href\=\"(.*)\">/) {#urls in the course catalog are often wrong, so we try them as a last resort.
		push(@last_resort,$1);
		LOG("last resort: $1");
	    }
	}
	$numbers{$num} = 1;
    }

    #some websites include J after the course number
    foreach $number (keys %numbers) {
	$numbersJ{$number} = 1;
	$numbersJ{$number . "J"} = 1;
    }
    foreach $number (keys %numbersJ) {
	@number = split(/\./, $number);
	tie %sites, "Tie::IxHash"; #keeps %sites in the order I put them
	%sites = ("web" => "http://web.mit.edu/$number", 
		  "www" => "http://web.mit.edu/$number/www", 
		  "stellar/$terms[0]" => "http://stellar.mit.edu/S/course/$number[0]/$terms[0]/$number/index.html",
		  "theory.csail" => "http://theory.csail.mit.edu/classes/$number", 
		  "courses.csail" => "http://courses.csail.mit.edu/$number", 
		  "math" => "http://www-math.mit.edu/$number");
	foreach $site (keys(%sites)) {
	    get($sites{$site}, catfile($path, "$site/" . uri_escape(lc($number))));
	}
	foreach $site (keys(%sites)) {
	    propose_url_if_good($sites{$site}, catfile($path, "$site/" . uri_escape(lc($number))));
	}
    }
    for $url (@last_resort) {
	get($url, catfile($path, "catalog/" . uri_escape(lc($num))));
    }
    for $url (@last_resort) {
	propose_url_if_good($url, catfile($path, "catalog/" . uri_escape(lc($num))));
    }
    print '</form>';

    my @old_terms = @terms[1 .. $#terms];
    for $term (@old_terms) {
	for $number (keys %numbersJ) {
	    @number = split(/\./, $number);
	    get("http://stellar.mit.edu/S/course/@number[0]/$term/$number/index.html", catfile($path, "stellar/$term/" . uri_escape(lc($number))));
	}
    }
    for $term (@old_terms) {
	for $number (keys %numbersJ) {
	    @number = split(/\./, $number);
	    list_url_if_good("http://stellar.mit.edu/S/course/@number[0]/$term/$number/index.html", catfile($path, "stellar/$term/" . uri_escape(lc($number))));
	}
    }

    list_ocw_if_exists();
    
    print "<p>$numresults Results Found</p>";
    print '<p>If the correct webpage for the class was not found in the search results, click <a href="https://scripts-cert.mit.edu/~course-search/search/auth/save.pl">here</a> to add a new link using MIT Certificates.</p>';
    print '<hr />
<p>Send questions and comments to course@mit.edu</p>
<p><a href="http://scripts.mit.edu">
<img alt="powered by scripts.mit.edu"
src="http://scripts.mit.edu/media/powered_by.gif" /></a></p>';
    print end_html();
    exit();
}

sub list_ocw_if_exists {
    my $url = 'http://ocw.mit.edu/OcwWeb/Global/all-courses.htm';
    my $file = catfile($path, 'ocw');
    
    get($url, $file);
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    if(-e "$file.html") {
 	open(PAGE, "$file.html");
 	@data = <PAGE>;
 	close(PAGE);
 	for $line (@data) {
 	    for $number (keys %numbersJ) {
		$number = lc($number);
		$number =~ s/\./-/;
 		if($line =~ m/.*href=\"(.*\/$number-.*)\">.*/) {
 		    list_url("http://ocw.mit.edu$1");
 		}
 	    }
 	}
    }
}

sub propose_old_by_term_if_exists {
    my ($query, $term, $long_name) = @_;
    LOG('propose_old_by_term_if_exists ' . $term . " " . $query);
    my $file = catfile("$path-$term", "save", uri_escape(lc($query)));
    if (-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	propose_url($line, ' This URL was saved in ' . $long_name . '.');
    }
}

sub list_saved_current_if_exists {
    my ($query) = @_;
    LOG('list_saved_current_if_exists ' . $query);
    my $file = catfile($path, 'save', uri_escape(lc($query)));
    if (-e $file) {
	open(FILE, $file);
	chomp(my $line = <FILE>);
	close(FILE);
	list_url($line, ' This URL is currently the default.');
    }
}

#Proposing a URL means that the URL is listed, and a button allowing it to be saved is added.
sub propose_url_if_good { 
    my ($url, $file) = @_;

    my $original_url = $url;
    
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    my $error = 0;

    if(-e "$file.html")
    {
	open(PAGE, "$file.html");
	@data = <PAGE>;
	close(PAGE);
	if($#data>3) {
	    if(@data[3] =~ m/  <TITLE>Index of .*<\/TITLE>/) {
		$error = 17;
	    }
	    if(@data[3] =~ m/.*<title>stellar error.*<\/title>.*/) {
		$error = 17;
	    }
	}
	foreach $line (@data) {
	    # New stellar error.
	    if(index($line, "The page you are looking for was not found") != -1) {
		$error = 17;
	    }
	}
	open(PAGE, "$file.error");
	@data = <PAGE>;
	close(PAGE);
	foreach $line (@data) {
	    if($line =~ m/Location: (.*) \[following\]/) {
		$url = $1;
# 		if($original_url eq $saved_url) {
# 		    LOG('overwriting ' . $original_url);
# 		    LOG('overwriting ' . $saved_url);
# 		    LOG('overwriting with ' . $url);
# 		    my $file = '/mit/course-search/web_scripts/search/urls/save/' . $num;
# 		    unlink($file);
# 		    open(FILE, "> $file");
# 		    print FILE $url;
# 		    close(FILE);
#		}
	    }
	}
	propose_url($url) if(!$error);
    }
}

#Listing a URL just shows the URL, but does not allow it to be saved (used for old Stellar websites).
sub list_url_if_good {
    my $url = @_[0];
    my $file = @_[1];
    
    my $pid = $children{$url};
    waitpid($pid,WEXITSTATUS);
    
    my $error = 0;

    if(-e "$file.html") {
	open(PAGE, "$file.html");
	@data = <PAGE>;
	close(PAGE);
	if($#data>3) {
	    if(@data[3] =~ m/  <TITLE>Index of .*<\/TITLE>/) {
		$error = 17;
	    }
	    if(@data[3] =~ m/.*<title>stellar error.*<\/title>.*/) {
		$error = 17;
	    }
	}
	foreach $line (@data) {
	    # New stellar error.
	    if(index($line, "The page you are looking for was not found") != -1) {
		$error = 17;
	    }
	}
	list_url($url) if(!$error);
    }
}

sub propose_url {
    my ($location, $text) = @_;
    list_url($location, input({-type => 'submit', -name => $num, -value => "Save $location"}) . $text);
}

sub list_url {
    my ($location, $text) = @_;
    return if $urls{$location};
    $urls{$location} = 1;
    LOG($location);
    print p(a({-href => $location}, escapeHTML($location)), ' ', $text);
    $numresults++;
}
