#!/usr/bin/perl -w
# $HeadURL: file:///afs/sipb.mit.edu/contrib/weather/SVNRep/tools/WeatherDB.pm $
# $Id: WeatherDB.pm 13 2007-09-11 17:40:18Z foley $
# Perl module for loading the dumped weather information (from extract-station.pl)
# By Joe Foley<foley@mit.edu>

package WeatherDB;
use strict;

use Data::Dumper;
use Carp;
use IO::File;


sub new {
    my $class = shift;
    my $self = { 
	DB => undef,
	headers => undef,
	weatherfile => undef,
	annotate => 1,  # set this to true to put a header at the top
	# sadly, we can't use this since
	# only YearMonthDay + Time is unique
#	def_field => "WBAN Number",
	path => undef,
	@_,  # get arguments passed
    };
#    print Data::Dumper->Dump([$self],[qw($self)]);
    bless $self, $class;
    $self->init();    
    return($self);
}

sub init {
    my $self = shift;
    my $weatherfile = shift || $self->{weatherfile} || 
	confess "WeatherDB:init(): Error, \$self->{weatherfile} not set.";
#    $self->{weatherfile} = $weatherfile;

    print "Loading weather information from $weatherfile\n";
    my $WH = new IO::File "$weatherfile", "r"
	or croak "Error:  Unable to read weatherfile \"$weatherfile\": $!\n";


    $_ = $WH->getline(); chomp($_);
    my @headers = split(/\,\s*/, $_);
    $self->{headers} = \@headers;
    print "Data found: ";
    foreach (@headers) {print "$_, "; }
    print "\n";
    # for now we just want the headers to inspect for fun
    # there's no point in loading the whole thing in
    $WH->close();
}

sub filter {
    my $self = shift;
    my $wantheaders_ref = shift;
    my $outputfile = shift || "filtered.csv";

    my $FH = new IO::File "$outputfile", "w"
	or croak "Error:  Unable to open outputfile \"$outputfile\": $!\n";

    my $weatherfile = shift || $self->{weatherfile};

    my $WH = new IO::File "$weatherfile", "r"
	or croak "Error:  Unable to read weatherfile \"$weatherfile\": $!\n";


    my @headers = @{$self->{headers}};
    # probably an easier way to do this, but I don't
    # know it.
    my $i = 0; my %hrev;
    foreach(@headers) {
	$hrev{$_} = $i++;
    }
    my @wantindex;
    $self->{annotate} and print $FH "YearMonthDayTime";
    print "Extracting: ";
    my $hline = "";
    foreach(@{$wantheaders_ref}) {
	push @wantindex, $hrev{$_};
	$self->{annotate} and print $FH ", $_";
	print "$_, ";
    }
    print "\n";
    $self->{annotate} and print $FH "\n";
    
    my $cnt = 0;
    while($_=$WH->getline()) {
	my @line = split(/\,\s*/);
	
	# check if this is a header line, if so skip it
	$line[0] eq $headers[0] and next;
	
#	print Data::Dumper->Dump([$tmp], [qw($tmp)]);
	my $key = $line[$hrev{YearMonthDay}].$line[$hrev{Time}];
	
	my $retline = $key; 
	foreach(@wantindex) {
	    $retline .= ",".$line[$_];
	}
	print $FH "$retline\n";
	#print "\nfiltered: $retline\n";
	$cnt++ % 50 == 0 and print STDERR "*";
    }
    print STDERR "\n"
}
    
sub load {
    # this is the memory hog
    # load the whole sucker into the DB
    my $self = shift;
    my $weatherfile = shift || $self->{weatherfile};

    my $DB = {};
    my $WH = new IO::File "$weatherfile", "r"
	or croak "Error:  Unable to read weatherfile \"$weatherfile\": $!\n";

    $WH->getline(); chomp();
    my @headers = split(/\,\s*/);
    $self->{headers} = \@headers;

    while($WH->getline()) {
	my $tmp = {};
	my @line = split(/\,\s*/);
	foreach(@headers) {
	    my $header = $_;
	    my $val = shift(@line);
	    $tmp->{$header} = $val;
	}
#	print Data::Dumper->Dump([$tmp], [qw($tmp)]);
	my $key = $tmp->{YearMonthDay}.$tmp->{Time};
	$DB->{$key} = $tmp;
    }

#    print Data::Dumper->Dump([$DB], [qw($DB)]);
    $self->{DB} = $DB;
}

1;
