#!/usr/bin/perl -w
# $HeadURL: file:///afs/sipb.mit.edu/contrib/weather/SVNRep/tools/NOAAFilter.pm $
# $Id: NOAAFilter.pm 17 2007-09-18 21:57:29Z foley $
# Perl module for loading the station information
# By Joe Foley<foley@mit.edu>
# Based upon extract-station.pl by Joseph Sokol-Margolis <seph@mit.edu>

package NOAAFilter;
use strict;

use Data::Dumper;
use Carp;
use File::Find;
use IO::File;
use Search::Dict;  # allows us to look for a line in a file fast


sub new {
    my $class = shift;
    my $self = { 
	DB => undef,
	stationfile => undef,
	def_field => "WBAN Number",
	file_base => "",
	filelist => undef,
	path => undef,
	@_,  # get arguments passed
    };
    bless $self, $class;
    $self->init();
    return($self);
}

sub init {
    my $self = shift;

# lets see where we're at
    my $pwd = `pwd`;  chomp($pwd);
    my $basedir = "$pwd";
    if($pwd =~ m|^/afs/sipb(.mit.edu)?/contrib/weather|) {
	$basedir = "/afs/sipb.mit.edu/contrib/weather";
    }
    if($pwd =~ m|(.*/weather)|) {
	$basedir = $1;
    }
    
    my $uncompdir = "$basedir/uncompressed";
    my @filelist;
    my $f = shift || $self->{file_base} || "hourly.txt";
    $self->{file_base} = $f;
    print  STDERR "Looking in $uncompdir for $f\n";
    sub wanted { 
#	print $_;
	no warnings 'File::Find';
	push @filelist, "$File::Find::name" if $_ =~ /$f/;  
    }

    finddepth(\&wanted, $uncompdir);

    $self->{filelist} = \@filelist;
    print STDERR "Found ".scalar(@filelist)." suitable files.\n";
    scalar(@filelist) <= 0 and croak "Error:  No suitable files!";
}

sub extract {
    my $self = shift;
    my $stationid = shift;
    my $dumpfile = shift || $self->{dumpfile} || "dump-$stationid.txt";
    $self->{dumpfile} = $dumpfile;

    confess "Must specify a station id" unless($stationid);
    confess "That doesn't look like a good station id" unless(length $stationid == 5);

    my $DUMPH = new IO::File "$dumpfile", "w";
    
    my $extracted = 0;
    foreach my $file (sort @{$self->{filelist}}) {

	print STDERR "Pulling station information for $stationid from $file";
	
	my $FILEH = new IO::File "$file", "r";
	
	unless(defined $FILEH) {
	    print STDERR "Couldn't open file $file\n";
	    next;
	}
	
	# The first line contains the headers
	my $headerline = $FILEH->getline || croak "This file only had a header line.";
	my @headers = split(/\,/, $headerline);
#	print STDERR "Headers: ";
#	print STDERR @headers;
	print $DUMPH "$headerline";
	
	look $FILEH, "$stationid" || croak "Error: No mention of this stationid in $file";
	my $i = 0;
	while(<$FILEH>) {
	    # we're going to go through a lot of data, so this better be fast	    
#	    $_ =~ /([0-9]+)\,*/;
	    my @line = split(/\,/);
	    if($line[0] eq $stationid) {
		print $DUMPH $_;		
		if($i++ % 10 == 1) {
		    print STDERR ".";
		}
	    }
	    else {
		# the numbers are contiguous, so we can skip the rest
		last;
	    }
	}
	$extracted += $i;
	$FILEH->close();
	print STDERR "Done.\n"	
    }
    print STDERR "Extracted $extracted lines and dumped to $dumpfile\n";
}


sub test {
    my $filt = NOAAFilter->new();
}

1;
