#  A simple interface to search the MITSFS pinkdex.
# 
#  $Id: pinkdex.pl,v 1.11 2010/02/17 22:40:24 root Exp $

# If the dex format ever changes there are (potentially) two places
# that need to change, both marked with "### DEX format parsing":
# 
# - In main::do_pinkdex where the search regexp is constructed
# - In main::do_pinkdex_verbose where the line is split apart
#
# In addition, if the set of shelfcodes changes, the shelfcodes file
# needs to be updated (MITSFS doesn't appear to have a public list, so
# you'll need to ask them for it).

use strict;
use warnings;

package pinkdex;

use CGI qw(:standard start_pre end_pre keywords);
use CGI::Carp qw(fatalsToBrowser);
use HTML::Entities;
use URI::Escape;

use constant PINKDEX_FILE => "/afs/athena.mit.edu/activity/m/mitsfs/Public/pinkdex.txt";
use constant SHELFCODES_FILE => "/afs/sipb.mit.edu/project/www/root/Pinkdex_shelfcodes.txt";

sub search_form() {
  return (
	  start_form(-method=>"GET"),
	  radio_group(-name => "type",
		      -values => [qw/title author/],
		      -labels => {title => "Title Search",
				  author => "Author Search"}),
	  br(),
	  textfield(-name => "rregexp",
		    -size => 80),
	  submit(-label => "Do Search"),
	  end_form(),
	 );
}

sub info() {
  print (header(),
	 start_html("SIPB MITSFS Library Pinkdex Search Engine"),
	 h1("SIPB MITSFS Library Pinkdex Search Engine"),
	 p("This is a simple interface developed by SIPB to search the ",
	   a({href=>"/activities/mitsfs/homepage.html"}, "MIT Science Fiction Library"),"'s catalog."
	  ),
	 h2("MITSFS now has their own ",
	    a({href=>"http://mitsfs.mit.edu/pinkdex/"}, "online pindex"),
	    " which you may find better than this search interface."),
	 p(["You can search by author or by title. Matches are case insensitive. Bug reports and comments should be e-mailed to stuffmaster\@mit.edu.",
	    "Examples:"]),
	 dl(
	    dt("An author search:"),
	    dd(code("zelazny")),

	    dt("A title search: Don't try to quote any spaces or
            characters; everything after title or author is passed as
            a single token."),
	    dd(code("foundation and empire")),

	    dt("A more specific author search:"),
	    dd(code("adams, douglas")),

	    dt("A regular expression title search:"),
	    dd(code("start trek [IVX]+"))
	   ),
	 search_form(),
	 end_html()
	);
}


sub main::do_pinkdex {
    my $query = param("rregexp") or join(" ", keywords);

    $query =~ tr/\+/ /;

    unless ($query) {
      info();
      return;
    }

    my ($type, $regexp) = (param("type"), param("rregexp"));
    unless ($regexp) {
      ($type, $regexp) = split(" ", $query, 2);
      # N.B. Splitting on a space actually splits on any whitespace.
    }

    eval { /$regexp/ };
    if ($@ ne "") {
	error("error in regular expression: $@");
    }

    $type = lc($type);

    my ($regexp_author, $regexp_title);

    if ($type eq "author") {
	$regexp_author = ".*$regexp.*";
	$regexp_title = ".*";
    }
    elsif ($type eq "title") {
      $regexp_author = ".*";
      $regexp_title = ".*$regexp.*";
    }
    else {
      error ('you must specify author or title search');
    }

    open (PINKDEX, "<", PINKDEX_FILE)
	or error ("error opening ".PINKDEX_FILE.": $!");
    print (
	   header,
	   start_html("SIPB MITSFS Pinkdex search results"),
	   p(a({-href=>"/pinkdex"},i("back to pinkdex help"))),
	   h1("results of ", $type, " search for: ", encode_entities($regexp)),
	   search_form()
	  );

    my $current_author;
    while (my $line = <PINKDEX>) {
	chomp $line;

	### DEX format parsing
	if ($line =~ /^($regexp_author)<($regexp_title)<(.*)<(.*)/oi) {
	    if ($current_author ne $1) {
		$current_author = $1;
		print h2($current_author, ":");
	    }
	    my $title = $2;
	    my $series = ($3 ? " [$3]" : "");
	    print a({-href=>"/pinkdex_v?".uri_escape($line)}, "$title$series"), br();
	}
    }
    unless($current_author) {
	print p(b("No matches found."));
    }

    print end_html;
    close (PINKDEX);
}

open(SHELFCODES, "<", SHELFCODES_FILE) or die "Couldn't read ".SHELFCODES_FILE;
my @shelf_codes = <SHELFCODES>;
chomp @shelf_codes;
close(SHELFCODES);
my @shelf_codes = map { split /\t/, $_, 2 } @shelf_codes;
our %shelf_codes = @shelf_codes;

sub parse_shelf_code {
	my ($rawcode) = @_;
	my (@entries) = split (",", $rawcode);

	print (
	       h2("shelf code description"),
	       p("raw shelf code: ", $rawcode),
	       start_pre
	      );
	foreach (@entries) {
		# delete trailing :'s and numbers
		s/[:0-9]+$//;
		my ($description) = $shelf_codes{$_};
		if ($description) {
			printf ("%10s: %s\n", $_, $description);
		} else {
			printf ("%10s: UNKNOWN CODE!\n", $_);		
		}
	}
	print (
	       end_pre,
	       p(a({-href => "/Pinkdex_shelfcodes.txt"}, "list of possible shelf codes"))
	      );
}

sub main::do_pinkdex_verbose {
  my $query = uri_unescape(join(" ", keywords));

  ### DEX format parsing
  my ($author, $title, $series, $shelfcode) = split (/</, $query);
  $series = ($series ? " [$series]" : "");
  print (
	 header,
	 start_html("SIPB MITSFS Library Pinkdex: ",
		    encode_entities("$title$series")),
	 h1("Title: ", encode_entities("$title$series")),
	 h1("Author(s): ", encode_entities($author)),
	 p(a({-href=>"/pinkdex?type=author;rregexp=".uri_escape($author)},
	     "Select to retrieve other titles by this author"))
	);
  parse_shelf_code($shelfcode);
  print (
	 p(a({-href=>"/pinkdex"},i("back to pinkdex help"))),
	 end_html
	);
}

sub error {
    my($msg) = join(" ", @_);
    print (
	   header(-status=>500),
	   start_html("Server Error"),
	   h1("Server Error"),
	   p(strong($msg)),
	   p(<<EOM),
If you feel this is a server problem and wish to report it, please
include the error code, the requested URL, which and what version
browser you are using, and any other facts that might be relevant to: <P>
<a href="/comment">webmaster\@mit.edu</a>
EOM
	   end_html
	  );
    exit 1;
}

1;
