#!/afs/athena/contrib/perl/p
#
# File: nntpfetch - retrieve news articles from an NNTP server
# $Header: /afs/sipb.mit.edu/contrib/perl/nntp/RCS/nntpfetch,v 1.5 1992/11/21 00:22:38 ckclark Exp $
# 
# KOPYKNOT (K) 1991 Free Knoware Foundation, Ink.
# 
# This file is part of Randall's NNTP grep-n'-fetch-it utilites.
# 
# NNTP grep-n'-fetch-it is free software.  You can redistribute it and
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation or not.  That's what makes
# this a Kopyknot.  You can use this code for any purpose whatsoever.
# If you can make a dime off it, great.  I'm not out to stop you.  If
# you want to claim you wrote it, great.  It ain't up to me to blow your
# cover.  Go for it.
# 
# NNTP grep-n'-fetch-it is distributed in the hope that it will make
# Randall a famous net.personality.  It includes ABSOLUTELY NO WARRANTY.
# In fact, you should know that this code will probably fail in lots
# of different ways on lots of different machines.  If you want to fix
# the problems, great.  If you want to tell me what you changed, great.
# If you want me to fix something for you, go screw yourself.  I'm busy.
# 
# This Kopyknot was inspired by the GNU Copyleft.  I think that Richard
# Stallman is a mensch.  If I weren't so obsessed by the almighty buck,
# I'd try to get a job working for him.  If you want more information on
# the GNU General Public License, get GNU Emacs, or write to the Free
# Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
# 
# I hope you don't feel insulted by my parody, Richard.

unshift(@INC, "/afs/sipb.mit.edu/contrib/perl");
unshift(@INC, "/afs/sipb.mit.edu/contrib/perl/nntp");

require('getopts.pl');
require('nntp.pl');
#require('Dates.pl');		# Alan Stebbens needs to add a fix.
require('junkDates.pl');	# use this temporarily.

$field = "subject";		# grep subject lines by default
$flg = "o";			# compile pattern only once
$opt_o = "xxyy";		# base name of output file
$opp = "";			# don't do inverse search by default

$secs_per_day = 24.0 * 60.0 * 60.0;
($dummy1, $dummy2, @jan1_1970) = &Str2Date("1 Jan 1970");
$jan1_1970 = &Date2Abs(@jan1_1970);

sub dateval
{
    @_[0] =~ /([0-9][0-9]:[0-9][0-9].*)$/;
    local($timestr) = $1;
    local($datestr) = $`;
    local($dfmt, $dstr, @mdy) = &Str2Date($datestr);
    local($tfmt, $tstr, @hmsz) = &Str2Time($timestr);
    # conserve precision by calibrating to 1 January 1970
    (&Date2Abs(@mdy) - $jan1_1970) + &Time2GMSecs(@hmsz) / $secs_per_day;
}

sub bydate
{
    local ($val);
    $val = (&dateval($arts{$a}) <=> &dateval($arts{$b}));
    $val ? $val
	: ((split(/: /, $nums{$a}))[1] <=> (split(/: /, $nums{$b}))[1]);
}

sub usage
{
    die <<"EndoUsage"
usage: $0 [-<options>] <pattern> <newsgroup> ...
    options:	-s: 		search subjects (default)
		-f: 		search from lines
		-b: 		search article bodies
		-a: 		search entire articles
		-h <field>:	search specified header field
		-i:		case insensitive search
		-v:		match inverse of pattern
		-n:		no search; get all articles
		-u:		limit search to unread articles
		-r <range>:	limit search to range
		-o <name>:	set output file base name
		-F <newsrc>:	use alternate newsrc file
		-U:		update the newsrc file
		-S <server>:	use alternate news server
EndoUsage
    ;
}

# process command line arguments
&usage if ! &Getopts('sfbah:ivnur:o:F:S:U');

# set the article header field that is to be matched
$field = "article" if $opt_a;
$field = "body" if $opt_b;
$field = $opt_h if $opt_h;
$field = "from" if $opt_f;
$field = "subject" if $opt_s;
$flg = $opt_i ? "io" : "o";

$opp = "!" if $opt_v;		# match the opposite of pattern

$opt_u = 1 if $opt_F;		# always use .newsrc if specified
$opt_F = ($ENV{"HOME"} . "/.newsrc") if ! $opt_F;

# don't let the user specify a messy range
$opt_r = &nntp'canon_artlist($opt_r) if $opt_r;

# open a connection to the news server.  program will die if bad connection
$server = $opt_S ? &nntp'connect($opt_S) : &nntp'connect;

# get the pattern to match from the command line
&usage if ! ($opt_n || ($pat = shift));

# make a list of each group to scan
&usage if ! @ARGV;
while ($_ = shift)
{
    push(@groups, &nntp'list($server, $_));
}
die "No groups matched.\n" if (! @groups);

# fix pattern to match article number at beginning
$pat =~ s/^\^/^[0-9]+ / unless $field =~ /body|article/;

foreach $ng (@groups)
{
    next if $opt_u && ! &nntp'newsrc_get($opt_F, $ng, 0);
    

    @lim = &nntp'setgroup($server, $ng);
    next if ! @lim;		# this should never ever happen
    next if $lim[0] eq "0";
    
    # generate an article range list for this news group
    $rangelist = "$lim[1]-$lim[2]";
    if ($opt_u)
    {
	# user wants to look at unread messages only
	local ($read) = &nntp'newsrc_get($opt_F, $ng, 0);
	$rangelist = $read && 
	    &nntp'canon_inverse(&nntp'canon_artlist($read),
				 $lim[1], $lim[2]);
    }
    # intersect with the user's specified range.
    $rangelist = &nntp'canon_isect($rangelist, $opt_r) if $opt_r;
    
    # now do the hard stuff
    foreach $rng (&nntp'canon_expand($rangelist))
    {
	local ($first, $last) = split(/-/, $rng);
	
	if ($opt_n)
	{
	    # get the message-id's then format the array elements
	    #  like "<nnnn>: <message-id>" as idgrepfield returns them.
	    &nntp'fields($server, *arts, "message-id", $first, $last);
	    grep(s/ /: /, @arts); # add article number
	}
	elsif ($field !~ /body|article/)
	{
	    @arts = &nntp'idgrepfield($server, $field, "$opp/$pat/$flg",
				       $first, $last);
	}
	else
	{
	    @arts = &nntp'idgreptext($server, $field, "$opp/$pat/$flg",
				      $first, $last);
	}
	foreach (@arts)
	{
          if ($_)
          {        
              split(?: ?);
              $nums{$_[1]} = "$ng: $_[0]"; # news group and article number
              $arts{$_[1]} = &nntp'field($server, "date", $_[1]);
          }
	}
    }
}

# save articles in order of posting time
@ids = sort(bydate keys(%arts));
die("No articles matched.\n") if ! @ids;
$fmt = sprintf("%s.%%0%dd", $opt_o, log(@ids + 0) / log(10.0) + 1);

$i = 1;
foreach $mid (@ids)
{
    $fname = sprintf($fmt, $i);
    print "Saving $nums{$mid} in file: $fname\n";
    if (open(file, ">$fname"))
    {
	@ngnum = split(?: ?, $nums{$mid});
	&nntp'setgroup($server, $ngnum[0]);
	$article = &nntp'article($server, $ngnum[1]);

	# if lookup in newsgroup fails, try lookup by message-id
	if (! $article)
	{
	    $article = &nntp'article($server, $mid);
	}

	$article =~ s/\r\n/\n/g;
	$article =~ s/\n\.\./\n./g;
	print(file $article);
	close(file);
    }
    else
    {
	print(STDERR "Error writing $fname: $!\n");
    }
    $i++;
}

# update the user's newsrc file
if ($opt_U)
{
    @oldnews = values(%nums);

    foreach (@oldnews)
    {
	split(?: ?);		# seperate news group from article
	$readinglist{$_[0]} .= "$_[1],";
    }

    # now update the newsrc entries.
    foreach (keys(%readinglist))
    {
	chop($readinglist{$_});	# one too many commas
	&nntp'newsrc_merge($opt_F, $_, &nntp'canon_artlist($readinglist{$_}));
    }

    &nntp'newsrc_write($opt_F);
}
