# File: nntp.pl - utility functions to talk to an NNTP server
# 
# KOPYKNOT (K) 1991 Free Knoware Foundation, Ink.
# $Header: /afs/athena.mit.edu/user/c/k/ckclark/RCS/nntp.pl,v 1.3 1992/09/17 08:02:50 ckclark Exp $
# 
# This file is part of Randall's NNTP grep-n'-fetch-it utilites.
# 
# NNTP grep-n'-fetch-it is free software.  You can redistribute it and
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation or not.  That's what makes
# this a Kopyknot.  You can use this code for any purpose whatsoever.
# If you can make a dime off it, great.  I'm not out to stop you.  If
# you want to claim you wrote it, great.  It ain't up to me to blow your
# cover.  Go for it.
# 
# NNTP grep-n'-fetch-it is distributed in the hope that it will make
# Randall a famous net.personality.  It includes ABSOLUTELY NO WARRANTY.
# In fact, you should know that this code will probably fail in lots
# of different ways on lots of different machines.  If you want to fix
# the problems, great.  If you want to tell me what you changed, great.
# If you want me to fix something for you, go screw yourself.  I'm busy.
# 
# This Kopyknot was inspired by the GNU Copyleft.  I think that Richard
# Stallman is a mensch.  If I weren't so obsessed by the almighty buck,
# I'd try to get a job working for him.  If you want more information on
# the GNU General Public License, get GNU Emacs, or write to the Free
# Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
# 
# I hope you don't feel insulted by my parody, Richard.

package nntp;

require('/afs/athena/user/m/k/mkgray/perl/chat2.pl'); # because the one in the
				# distribution doesn't work under solaris
				# marc says he'll apply the fix sometime
				# soon.  Change made on (8/18/94)

# file in which to find the name of the nntp server
$servfile = "/afs/sipb.mit.edu/contrib/perl/nntp/newsserver";

# end of line pattern for responses
$cr = '\r?\n';

# timeout for most commands
$default_timeout = 200;
# timeout during connect
$connect_timeout = 100;
# timeout for xhdr command
$xhdr_timeout = 500;

# pattern to match a complete group name
$groupmatchpat = '[a-zA-Z0-9][a-zA-Z.+_-]*';

# error strings
$noservermsg =
    "No NNTP server specified.  Check NNTPSERVER environment variable\n";
$badservermsg = "No connection to NNTP server %s: %s\n";

@groups = ();			# start with an empty list
%newsrc = ();			# newsrc entries
%subscribed = ();		# entry will be true if subscribed
@ngorder = ();			# to remember order of entries in newsrc

#***********************************************************************
# $hndl = &nntp'connect($server);
#
#   attempt a connection to the nntp server.  the argument $server is
#   optional and will be looked up in the environment or the nntpserver
#   file if not supplied.  $server may be specified as a host name or
#   the internet address of the host.
#
#   a chat-handle will be returned if successful.  otherwise, a fatal
#   error occurs.
#***********************************************************************
sub connect
{
    local($server) = $_[0];
    local($conn);

    $server || ($server = $ENV{"NNTPSERVER"}) ||
	(open(servfile) ? chop($server = <servfile>) : "");

    die $noservermsg if ! $server;
    ($conn = &chat'open_port($server, 119)) ||
	die sprintf($badservermsg, $server, $!);

    &chat'expect($conn, $connect_timeout,
		 "^20[01] .*$cr", '$conn',
		 "^5.. .*$cr", 'die "Server error: " . $&',
		 "TIMEOUT", 'die "Server connection timeout: $server\n"',
		 "EOF", 'die "I/O error\n"');
}

#***********************************************************************
# $perl_pattern = &nntp'grouppat($generic_pattern)
#
#   try to be smart about whether the user is smart.  changes input
#   patterns of the form 'comp.all' to 'comp\..*'
#***********************************************************************
sub grouppat
{
    local($pat) = $_[0];

    if (($pat =~ /^$groupmatchpat$/o) && !($pat =~ /\.\./))
    {
	# not a complex pattern
	$pat =~ s/\./\\./g;
	$pat =~ s/^/^/;		# caret on the front
	$pat =~ s/$/$/;		# dollar on the end
    }
    $pat =~ s/(\.all)+$/.*/;
    $pat =~ s/^(all\.)+/.*/;
    $pat =~ s/\ball\b/.*/g;
    $pat;
}

#***********************************************************************
# &nntp'getgroups
#
#   @groups is the list of groups known by the nntp server.
#   %groups is an associative array of the max articles numbers
#   used for quick lookup.  These variables are private to the
#   nntp package.
#***********************************************************************
sub getgroups
{
    local($conn) = $_[0];
    local($ng);
    local($groups);
    local($cmdstat);
    local($save_multi) = $*;
    
    if (! @groups)
    {
	&chat'print($conn, "list\n");
	
	# get a list of all the groups, one line at a time
	$cmdstat = &chat'expect($conn, $default_timeout,
				"^215 .*$cr", '1',
				"^5.. .*$cr", 'warn "$&"; 0',
				"^\\.$cr", '0',
				"^.*$cr", '0',  # cleanup garbage
				"TIMEOUT", 'die "Server timeout\n"',
				"EOF", 'die "I/O error\n"');

	if ($cmdstat)
	{
	    $* = 1;		# multi line searching
	    $groups = &chat'expect($conn, $default_timeout,
				   "^\\.$cr", '$`',
				   "TIMEOUT", 'die "Server timeout\n"',
				   "EOF", 'die "I/O error\n"');
	    $* = $save_multi;
	    @groups = split(/$cr/, $groups);
	}

	# build associative array for quick lookups
	foreach (@groups)
	{
	    ($ng = $_) =~ s/ .*//;
	    $groups{$ng} = $_;
	}
    }
}

#***********************************************************************
# @matching_groups = &nntp'list($conn, $group_pattern)
#
#   return a list of the groups which match the group pattern.
#***********************************************************************
sub list
{
    local($conn) = $_[0];
    local($pat) = $_[1] ? &grouppat($_[1]) : '.*';
    local(@grouplist);

    # get an array of all the groups known at the server
    &getgroups($conn) if ! @groups;

    @grouplist = @groups;	# copy list of groups
    
    # use eval to avoid repeated pattern evaluations
    eval("grep(s/ .*// && /$pat/, \@grouplist);");
}

sub setgroup
{
    local($conn) = $_[0];
    local($ng) = $_[1];
    local($artcls);

    &getgroups($conn) if ! @groups;

    defined($groups{$ng}) || return ();

    &chat'print($conn, "group $ng\n");
    ($artcls = &chat'expect($conn, $default_timeout,
			    "^211 .*$cr", '$&',
			    "^4.. .*$cr", '()',
			    "TIMEOUT", 'die "Server timeout\n"',
			    "EOF", 'die "I/O error\n"')) || return ();
    (split(/ /, $artcls))[1..3];
}

# return the article contents in a (potentially large) string.
# 
# $msg = &msgtext($conn, $num-or-id, "article" | "body" | "head")
#
sub msgtext
{
    local($conn) = $_[0];
    local($num) = $_[1];
    local($cmd) = $_[2];	# body or article
    local($done) = 0;
    local($msg);
    local($save_multi) = $*;

    &chat'print($conn, "$cmd $num\n");
    $msgok = &chat'expect($conn, $default_timeout,
			  "^220 .*$cr", '1',
			  "^222 .*$cr", '1',
			  "^4.. .*$cr", '0',
			  "TIMEOUT", 'die "Server timeout\n"',
			  "EOF", 'die "I/O error\n"');
    $msgok || return "";

    # gather the lines of the text into an array.  stop when you see
    # a dot on a line by itself.
    $* = 1;
    $msg = &chat'expect($conn, $default_timeout,
			 "^\\.$cr", '$`',
			"TIMEOUT", 'die "Server timeout\n"',
			"EOF", 'die "I/O error\n"');
    $* = $save_multi;
    $msg;
}

sub article
{
    &msgtext($_[0], $_[1], "article");
}

sub body
{
    &msgtext($_[0], $_[1], "body");
}

sub articles
{
    local($conn) = shift;
    local(*msgs) = shift;
    local($n) = 0;

    @msgs = ();
    foreach (@_)
    {
	$msgs[$n++] = &msgtext($conn, $_, "article");
    }
}

sub bodies
{
    local($conn) = shift;
    local(*msgs) = shift;
    local($n) = 0;

    @msgs = ();
    foreach (@_)
    {
	$msgs[$n++] = &msgtext($conn, $_, "body");
    }
}

sub field
{
    local($conn) = $_[0];
    local($flname) = $_[1];
    local($num) = $_[2];	# may be a message id
    local($ret) = "";
    local($done) = 0;

    @flds = ();

    &chat'print($conn, "xhdr $flname $num\n");
    &chat'expect($conn, $xhdr_timeout,
		 "^221 .*$cr", '1',
		 "^4.. .*$cr", '0',
		 "TIMEOUT", 'die "Server timeout\n"',
		 "EOF", 'die "I/O error\n"') || return($ret);

    while (! $done)
    {
	&chat'expect($conn, $default_timeout,
		     "^\\.$cr", '$done = 1',
		     "^..*$cr", 'chop($&); chop($&); $ret = $&',
		     "TIMEOUT", 'die "Server timeout\n"',
		     "EOF", 'die "I/O error\n"');
    }
    $ret =~ s/^[^ ]* //;		# get rid of article number or message id
    $ret;
}

sub fields
{
    local($conn) = $_[0];
    local(*flds) = $_[1];
    local($flname) = $_[2];
    local($first) = $_[3];
    local($last) = $_[4];
    local($start, $end);
    local($lines);
    local($save_multi) = $*;

    @flds = ();

    $start = $first;
    $end = $start + 49;		# get 50 at a time
    while ($start < $last + 1)
    {
	$end = $last if $end > $last;

	&chat'print($conn, "xhdr $flname $start-$end\n");
	&chat'expect($conn, $default_timeout,
		     "^221 .*$cr", '1',
		     "^4.. .*$cr", '0',
		     "TIMEOUT", 'die "Server timeout\n"',
		     "EOF", 'die "I/O error\n"') || return;

	$#flds = $end - $first;	# pre-extend for efficiency

	$* = 1;
	$lines = &chat'expect($conn, $default_timeout,
			      "^\\.$cr", '$`',
			      "TIMEOUT", 'die "Server timeout\n"',
			      "EOF", 'die "I/O error\n"');
	$* = 0;
	grep(/^([0-9]+) .*/ && ($flds[$1 - $first] = $_),
	     split(/$cr/, $lines));
	$* = $save_multi;
	$start = $end + 1;	# ready for next loop
	$end = $start + 49;
    }
}

sub grepfield
{
    local($conn) = $_[0];
    local($flname) = $_[1];
    local($expr) = $_[2];
    local($first) = $_[3];
    local($last) = $_[4];
    local(@flds);
    local(@rtn) = ();

    &fields($conn, *flds, $flname, $first, $last);

    foreach (eval("grep($expr, \@flds)"))
    {
	$_ =~ /^([0-9]+) /;
	push(@rtn, $_) if $1 >= $first;
    }
    @rtn;
}

sub idgrepfield
{
    local($conn) = $_[0];
    local($flname) = $_[1];
    local($expr) = $_[2];
    local($first) = $_[3];
    local($last) = $_[4];
    local(@flds, @ids);
    local(@rtn) = ();

    &fields($conn, *flds, $flname, $first, $last);
    &fields($conn, *ids, "message-id", $first, $last);

    foreach (eval("grep($expr, \@flds)"))
    {
	$_ =~ /^([0-9]+) /;
	if ($1 >= $first)
	{
	    local($idx) = $1 - $first;
	    $ids[$idx] =~ s/^([0-9]+) /\1: /;
	    push(@rtn, $ids[$idx]);
	}
    }
    @rtn;
}

sub greptext
{
    local($conn) = $_[0];
    local($cmd) = $_[1];
    local($expr) = $_[2];
    local($first) = $_[3];
    local($last) = $_[4];
    local(@msg);
    local(@rtn) = ();
    local($art);

    foreach $art ($first..$last)
    {
	@msg = split(/$cr/, &msgtext($conn, $art, $cmd));
	@msg = eval("grep($expr, \@msg)");
	push(@rtn, grep($_ = "$art $_", @msg));	# tack on article number
    }
    @rtn;
}

sub idgreptext
{
    local($conn) = $_[0];
    local($all) = $_[1] == "article";
    local($expr) = $_[2];
    local($first) = $_[3];
    local($last) = $_[4];
    local(@msgs, @lines, @match);
    local(@rtn) = ();
    local(@ids);

    if ($all)
    {
	&articles($conn, *msgs, $first..$last);
    }
    else
    {
	&bodies($conn, *msgs, $first..$last);
    }
    &fields($conn, *ids, "message-id", $first, $last);

    $first = 0;			# now use $first as loop counter
    foreach (@msgs)
    {
	@lines = split(/\r\n/, $_);
	if (eval("grep($expr, \@lines)"))
	{
	    $ids[$first] =~ s/^([0-9]+) /\1: /;
	    push(@rtn, $ids[$first]);
	}
	$first++;
    }

    @rtn;
}

# get the read messages information from a newsrc file
sub getrc
{
    local ($fname) = $_[0];
    local ($ng, $artcls);

    if (! open(RC, "<$fname"))
    {
	warn("Can't open $fname: $!\n");
	$newsrc{$fname, 0} = 1;	# only scream once
	return;
    }

    while (<RC>)
    {
	chop;
	($ng, $artcls) = split(/[!: \t]+/, $_, 2);
	$newsrc{$fname, $ng} = $artcls ? $artcls : "0-0";
	$subscribed{$fname, $ng} = /^$groupmatchpat:/o;
	push(@ngorder, "$fname $ng");
    }
    $newsrc{$fname, 0} = 1;	# don't do this very often
    close (RC);
}

# return the list of already-read articles in the user's newsrc file for
# the specified news group.  if the $force flag is true, get the
# information whether subscribed or not.  returns false if not subscribed
# and not # $force'd.  returns "0-0" if the group exists in the newsrc file,
# but has no already-read list was encountered.
sub newsrc_get
{
    local ($fname, $ng, $force) = @_;

    &getrc($fname) if ! $newsrc{$fname, 0};
    $force ? $newsrc{$fname, $ng}
        : $subscribed{$fname, $ng} && $newsrc{$fname, $ng};
}

# replace or create a newsrc entry
sub newsrc_put
{
    local($fname, $ng, $ranges) = @_;

    &getrc($fname) if ! $newsrc{$fname, 0};
    if (! defined($newsrc{$fname, $ng}))
    {
	push(@ngorder, "$fname $ng");
	$subscribed{$fname, $ng} = 1;
    }
    $newsrc{$fname, $ng} = $ranges;
}

# merge newly read articles into the newsrc entry
sub newsrc_merge
{
    local($fname, $ng, $ranges) = @_;

    &getrc($fname) if ! $newsrc{$fname, 0};
    if (! defined($newsrc{$fname, $ng}))
    {
	push(@ngorder, "$fname $ng");
	$subscribed{$fname, $ng} = 1;
    }
    $newsrc{$fname, $ng} = &canon_union($newsrc{$fname, $ng}, $ranges);
}

# subscribe or unsubscribe to the indicated news group
sub newsrc_subscribe
{
    local($fname, $ng, $onoff) = @_;

    &getrc($fname) if ! $newsrc{$fname, 0};
    if (! defined($newsrc{$fname, $ng}))
    {
	push(@ngorder, "$fname $ng");
	# ok to leave $newsrc{$fname, $ng} undefined
    }
    $subscribed{$fname, $ng} = $onoff ? 1 : 0;
}

# return an array of newsgroups in the order of the newsrc file
sub newsrc_list
{
    local($fname) = $_[0];

    &getrc($fname) if ! $newsrc{$fname, 0};

    local(@ngs) = grep(/^$fname /, @ngorder);

    grep(s/^$fname //, @ngs);
    @ngs;
}

# update the newsrc file
sub newsrc_write
{
    local($fname) = $_[0];	# newsrc file name to write
    local(@ngs);
    local($rlist);

    open(FH, ">$fname") || die("Can't write to $fname: $!");
    @ngs = grep(/^$fname /, @ngorder);
    foreach (@ngs)
    {
	split(? ?);		# seperate file from newsgroup
	($rlist = $newsrc{$_[0], $_[1]}) =~ s/^0-0$//;
	printf(FH "%s%s %s\n", $_[1], $subscribed{$_[0], $_[1]} ? ":" : "!",
	       $rlist);
    }
    close(FH);
}

# comparison function used in canon_artlist
sub cmp_range
{
    $a <=> $b;			# #force numeric comparison
}

# take a list of article ranges and return a canonical list.  the
# list returned will be sorted in ascending order with no overlapping
# ranges.  the input and output are in string format.
sub canon_artlist
{
    local($l) = $_[0];
    local(@l);
    local(@can);

    $l =~ s/\s+//;		# annihilate spaces
    @l = split(/,/, $l);	# get ranges into array

    # flip any descending ranges
    grep(/(\d+)-(\d+)/ && ($2 < $1) && ($_ = "$2-$1"), @l);

    # expand single numbers to (redundant) range format
    grep(/^(\d+)$/ && ($_ = "$1-$1"), @l);

    # sort the array by first elements
    @l = sort(cmp_range @l);

    # build a canonical list by combining overlapping adjacent ranges
    ($min, $max) = ($l[0] =~ /(\d+)-(\d+)/);
    $min = 1 if $min <= 0;
    $max = 1 if $max <= 0;
    foreach $el (@l)
    {
	local($beg, $end) = ($el =~ /(\d+)-(\d+)/);

	if ($beg > $max + 1)
	{
	    push(@can, "$min-$max");
	    $min = $beg > 0 ? $beg : 1;
	    $max = $end;
	}
	else
	{
	    $max = $end if $end > $max;
	}
    }
    push(@can, "$min-$max");	# don't forget the last range

    # handle single number ranges
    grep(/(\d+)-(\d+)/ && $1 == $2 && ($_ = "$1"), @can);

    # return a string delimited by commas
    join(',', @can);
}

# return the article list of articles not included in the input list,
# but bound by a low and high value.  this is used to determine unread
# articles given the entry in the .newsrc file and the range of
# articles held by the nntpserver.
sub canon_inverse
{
    local($l) = shift;
    local($lo, $hi) = @_;
    local(@l) = ();

    # build an array of article ranges
    @l = split(/,/, $l);
    grep(/^(\d+)$/ && ($_ = "$1-$1"), @l);

    # find overlapping articles
    @l = grep(/(\d+)-(\d+)/ && ($1 <= $hi && $2 >= $lo), @l);
    return ("$lo-$hi") if ! @l;

    if (@l[0] <= $lo)
    {
	shift(@l) =~ /(\d+)-(\d+)/;
	$lo = $2 + 1;
    }

    grep(/(\d+)-(\d+)/ && ($_ = ($lo . "-" . ($1 - 1))) && ($lo = $2 + 1), @l);
    push(@l, "$lo-$hi") if ($lo <= $hi);

    # handle single number ranges
    grep(/(\d+)-(\d+)/ && $1 == $2 && ($_ = "$1"), @l);

    # return a string delimited by commas
    join(',', @l);
}

# intersect two canonical article lists and return the result
sub canon_isect
{
    local(@first) = split(/,/, $_[0]);
    local(@second) = split(/,/, $_[1]);
    local(@can) = ();

    return("") if (!@first || !@second);

    # expand single numbers to (redundant) range format
    grep(/^(\d+)$/ && ($_ = "$1-$1"), @first);
    grep(/^(\d+)$/ && ($_ = "$1-$1"), @second);

    local($lo, $hi, @overlap);
    foreach $el (@first)
    {
	($lo, $hi) = split(/-/, $el);
	@overlap = grep(/(\d+)-(\d+)/ && ($1 <= $hi && $2 >= $lo), @second);
	next if ! @overlap;	# save time if no overlapping ranges

	# fix up the first and last overlapping ranges
        $overlap[0] = "$lo-$2"
	    if ($overlap[0] =~ /(\d+)-(\d+)/) && ($1 < $lo);
	$overlap[$#overlap] = "$1-$hi"
	    if ($overlap[$#overlap] =~ /(\d+)-(\d+)/) && ($2 > $hi);
	push(@can, @overlap);
    }

    # handle single number ranges
    grep(/(\d+)-(\d+)/ && $1 == $2 && ($_ = "$1"), @can);

    # return a string delimited by commas
    join(',', @can);
}

# return the canonical union of two canonical article lists
sub canon_union
{
    &canon_artlist($_[0] . "," . $_[1]); # Hah!
}

# return an array of ranges that is the expansion of the canonical
# article list.
sub canon_expand
{
    local (@ret) = split(/,/, $_[0]);
    grep(/^(\d+)$/ && ($_ = "$1-$1"), @ret);
    @ret;
}

1;