#!/usr/local/bin/perl 
#/****************************************************
#*****************************************************
#**
#** SOURCE NAME | pgnews, (Perl Get News)
#**             | 
#**    SYNOPSIS | pgnews [-h hostname]
#**             | 
#** DESCRIPTION | pgnews goes to a specified NNTP server
#**             | and retrieves news articles by newsgroup
#**             | and saves them to a specified file in
#**             | mailbox format.
#**             | Please see the NOTES section.
#**             | 
#**     CHANGES | Programmer:         Date:     Reason/Comments
#**             | Jeffrey B. McGough  09-05-91  initial
#**             | Jeffrey B. McGough  09-06-91  Added select (see FIXES)
#**             | Jeffrey B. McGough  10-06-91  Fixed erronious end of article
#**             | Jeffrey B. McGough  07-09-92  Fixed dup article bug
#**             | Jeffrey B. McGough  07-13-92  (See FIXES)
#**             | Jeffrey B. McGough  07-27-92  VERSION 2.0 (See FIXES)
#**             | 
#**       NOTES | Pgnews needs a file named .pgnews to read
#**             | its newsgroup, last message, and savefile from.
#**             | .pgnews format is:
#**             | newsgroup number savefile
#**             | Example:
#**             | comp.unix.wizards 7800 cuw
#**             | comp.unix.shell 3203 cus
#**             | comp.unix.questions 546 cuq
#**             | 
#**             | comp.unix.wizards will be saved to file cuw in
#**             | mailbox format starting at article 7800 etc.
#**             | 
#**       FIXES | 09-06-91: added select on S to keep the client
#**             | from getting out of sync.
#**             | Jeffrey B. McGough  mcgough@wrdis01.af.mil
#**             | 
#**             | 10-06-91: Fixed an overlooked END of ARTICLE
#**             | bug... Thanks to a member(s) of the issos
#**             | group at Ohio State.
#**             | Jeffrey B. McGough  mcgough@wrdis01.af.mil
#**             | 
#**             | 07-09-92: Fixed a duplicate article bug
#**             | pointed out to me by kenr@bridge.cc.rochester.edu
#**             | and gort@bridge.cc.rochester.edu. Thanks
#**             | for the help with the fix.
#**             | Jeffrey B. McGough  mcgough@wrdis01.af.mil
#**             | 
#**             | 07-13-92: Added code to take a -h option
#**             | for a host to use as a server...
#**             | Thanks to Barry Hassler...
#**             | Added code written by sherman@unx.sas.com
#**             | to make the header From_ line more RFC976
#**             | compatable for the ELM mailer...
#**             | Jeffrey B. McGough  mcgough@wrdis01.af.mil
#**             | 
#**             | 07-27-92: Added 15 minute timeout to the
#**             | select stuff. Just in case the server goes
#**             | away we won't sit around forever listening
#**             | to a dead connection...
#**             | Exception to the time out:
#**             | While in the loop where we slurp up the
#**             | article from the server, the select will not
#**             | work...
#**             | A friend of mine (Barry Hassler) seems to think
#**             | that at that point Perl has already sucked
#**             | the whole thing into its own buffers...
#**             | Care to comment Larry?
#**             | Anyway I commented out that select. If anyone
#**             | has any ideas please let me know...
#**             | Went ahead and built VERSION 2.0
#**             | Jeffrey B. McGough  mcgough@wrdis01.af.mil
#**             | 
#****************************************************/

require 'sys/socket.ph'; # The way I coded the sockets is this necessary?

$VERSION = '2.0';

$port = 119; # For NNTP
# HOSTNAME for the server...
#$host = 'localhost';
$host = 'news.mit.edu';
# Pack format...
$sockaddr = 'S n a4 x8';

$DOMAIN = 2;
$STYLE = 1;

while ($arg = shift(@ARGV))
{
	if ($arg =~ /-.*h/)
	{
		$host=shift(@ARGV);
		if ($host eq "")
		{
			printf ("Need host name after -h\n");
			exit 1;
		}
		next;
	}
	if ($arg =~ /-group/)
	{
		$group = shift;
		print "Group: $group\n";
	}
	if ($arg =~ /-articles/)
	{
		$articles=1;
		# from <tchrist>'s linerange program
		($_ = '('. join(",", @ARGV) .')' ) =~ s/-/../g;
		s/.*/"join(' ',$&)"/ee;
		@articlenums=split;
		print "Articles: ",join(',',@articlenums),"\n";
	}

	printf "Unknown option: '%s'\n", $arg;
	exit 1;
}


$newsfile = '.pgnews';
$nnewsfile = '.pgnews.new';

$rin = $rout = '';

($name, $aliases, $proto) = getprotobyname('tcp');
($name, $aliases, $type, $len, $hostaddr) = gethostbyname($host);

$sock = pack($sockaddr, $DOMAIN, $port, $hostaddr);

socket(S, $DOMAIN, $STYLE, $proto) || die $!;
connect(S, $sock) || die $!;
select(S); $| = 1; select(STDOUT);
#set up for select
vec($rin, fileno(S), 1) = 1;
#this select will block until the server gives us something.
$nfound = select($rout=$rin, undef, undef, 900);
if ($nfound == 0)
{
	print "Socket timed out...";
	exit 1;
}
$_ = <S>; #Read one line to see if we got a good connection.
if ($_ !~ /^2../)
{
	print;
	die "Service unavailable";
}
open(GRP, "< $newsfile") || die "Could not open $newsfile: $!";
open(NGRP, "> $nnewsfile") || die "Could not open $nnewsfile: $!";
select(NGRP); $| = 1; select(STDOUT);
while(<GRP>)
{
	chop;
	($grp, $lgot, $file) = split;
	print(S "group $grp\n");
	#this select will block until the server gives us something.
	$nfound = select($rout=$rin, undef, undef, 900);
	if ($nfound == 0)
	{
		print "Socket timed out...";
		exit 1;
	}
	$_ = <S>; #Make sure the group change worked...
	($stat, $num, $first, $last) = split;
	if( $stat !~ /^2../ )
	{
		print;
		warn "Bad group";
		print(NGRP "$grp $lgot $file\n");
		next;
	}
	# good group open output file...
	open(OUTFILE, ">>$file") || die "Could not open $file";

	if ( $first > $lgot )
	{
		$lgot = $first;
	}
	if ( $lgot <= $last )
	{
		foreach $art ($lgot..$last)
		{
			print(S "article $art\n");
			#this select will block until the server gives us something.
			$nfound = select($rout=$rin, undef, undef, 900);
			if ($nfound == 0)
			{
				print "Socket timed out...";
				exit 1;
			}
			$_ = <S>; #get error if one exists
			if($_ !~ /^2../)
			{
				print;
				warn "No article by that number";
			}
			else
			{
# We now slurp the whole article into the array article...
# HMMM is this good or bad...
# It gives me the WILLIES   [:^)    Jeffrey B. McGough
				@article = ();
				do
				{
# The next few lines have been commented out because they don't work
# JBM 07-27-92
#					$nfound = select($rout=$rin, undef, undef, 900);
#					if ($nfound == 0)
#					{
#						print "Socket timed out...";
#						exit 1;
#					}
					$lgot = $art;
					$_ = <S>;
					s/\r//;
					if( $_ ne ".\n")
					{
						s/^\.//;
						push(@article,$_);
						s/^\./../;
					}	
					else
					{
						push(@article,"\n");
					}
				} until $_ eq ".\n";
				# replace the Path: with a from line
				splice(@article, 0, 1, &from_line(@article));
				print OUTFILE @article;
			}
		}	
	}
	else
	{
		$lgot -= 1;
	}
	close(OUTFILE);
	$lgot += 1;
	print(NGRP "$grp $lgot $file\n");
}
close(NGRP);
close(GRP);
system("mv $nnewsfile $newsfile");
print( S "quit\n");
close(S);

# We parse through @article to build a more proper From_ line
sub from_line
{

	local(@article) = @_;

	local($header) = $true;		# we are in the header of the mail
	local($date,$month,$year,$time,$day);

	for (@article)
	{
		if ($header == $true)
		{
			if (/^Path: ([^ \n]+)/)
			{
				$path = $1;
			}
			elsif (/^Date: /)
			{
				if (/^Date: (\d*) (\D*) (\d*) (\S*)/) 
				{
					$date = $1;
					$month = $2;
					$year = $3; 
					$time = $4;
				}
				elsif (/^Date: (\D*), (\d*) (\D*) (\d*) (\S*)/)
				{
					$day = $1;
					$date = $2;
					$month = $3;
					$year = $4; 
					$time = $5;
				}
				$year =~ s/^([0-9])([0-9])$/19$1$2/;	# convert 2 digit year to 4 
				if ($day eq "")
				{
					$day = &day_of_week($month,$date,$year);
				}
			}
			$header = $false if /^\n$/;
		}
	}
	$from_line = sprintf("From %s %s %s %2s %s %s %s\n",
		$path, $day, $month, $date, $time, $year);
	return($from_line);
}

# This gives us the day of week from the date...
sub day_of_week
{
	local($month,$date,$year) = @_;
	local($day);


	if ($month <= 2)
	{ 
		$month += 12;
		$year--;
	}

	$day = ($date + $month * 2 + int(($month + 1) * 6 / 10) + $year + 
		int($year / 4) - int($year / 100) + int($year / 400) + 2) % 7;

	if ($day == 0)
	{
 		$day = 7;
	}

	return (NULL, Sun, Mon, Tue, Wed, Thu, Fri, Sat, Sun)[$day];
}
