#!/bin/sh
# -*- perl -*-
# This code allows us to start perl from our path or an environment variable
# rather than hardcoding a path into the #! line.  It works from sh or csh.
(exit $?0) && eval 'exec ${QPERLQ-perl} -x $0 ${1+"$@"}'
if (! $?QPERLQ) setenv QPERLQ perl
exec $QPERLQ -x $0 $argv:q

#!/usr/local/bin/perl -w
#
# $Id: split_rmail,v 1.11 1999/04/08 21:09:04 ejb Exp $
# $Source: /home/ejb/scripts/RCS/split_rmail,v $
# $Author: ejb $
#
#
# This script splits an rmail file into individual files with one
# message per file.  The files are named yyyy-mm-dd-hh:mm:ss-nn based on 
# parsing the Date: header.
#

require 5.002;
use strict;

my $whoami = ($0 =~ m,([^/\\]*)$,) ? $1 : $0;
#my $dirname = ($0 =~ m,(.*)[/\\][^/\\]+$,) ? $1 : ".";
    
my $daynames = '(?:sun|mon|tue|wed|thu|fri|sat)';
my @monthnames = qw(jan feb mar apr may jun jul aug sep oct nov dec);
my $monthnames = '(?:' . join('|', @monthnames) . ')';
my $i;
my %monthnames = ();
for ($i = 1; $i <= 12; $i++)
{
    $monthnames{$monthnames[$i-1]} = $i;
}

my $check = 0;
my $force = 0;
my $rmail = undef;
while (@ARGV)
{
    my $arg = shift(@ARGV);
    if ($arg eq "-check")
    {
	$check = 1;
    }
    elsif ($arg eq "-force")
    {
	$force = 1;
    }
    else
    {
	$rmail = $arg;
	last;
    }
}
&usage() if ((@ARGV > 0) || (! defined($rmail)));
my $dir = ($rmail =~ m,^(.*)/[^/]+$,) ? $1 : ".";

my ($month, $year) = (localtime(time))[4,5];
my $this_ym = sprintf("%04d-%02d", $year + 1900, $month + 1);

die "$whoami: $rmail must exist and be a plain file\n"
    unless (-f $rmail && (! -l $rmail));

open(RMAIL, "<$rmail") || die "$whoami: can't open $rmail to read: $!\n";
my $header = scalar(<RMAIL>);
die "$whoami: $rmail does not look like an RMAIL file\n"
    unless ($header =~ m/^BABYL OPTIONS:/);
close(RMAIL);

opendir(DIR, "$dir") or die "$whoami: can't opendir $dir: $!\n";
my @entries = readdir(DIR) or die "$whoami: can't readdir $dir: $!\n";
closedir(DIR);

# Dir entries is used to keep track of the maximum sequence number of
# messages for a given timestamp in $dir.
my %dir_entries = ();
for (@entries)
{
    if (m/^(\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2})-(\d+)$/)
    {
	my $date = $1;
	my $seqno = $2;
	my $old_seqno = 0;
	if (exists $dir_entries{$date})
	{
	    $old_seqno = $dir_entries{$date};
	}
	if ($seqno > $old_seqno)
	{
	    $dir_entries{$date} = $seqno;
	}
    }
}

$/ = "\037";
open(RMAIL, "<$rmail") || die "$whoami: can't open $rmail to read: $!\n";

$header = scalar(<RMAIL>); # throw away previously verfied first record
my $msg;
my $msgno = 0;
my $old = 0;
my $new = 0;
my $newfile = "$rmail.tmp";
open(NEW, ">$newfile") or die "$whoami: can't open $newfile: $!\n";
print NEW $header or die "$whoami: can't write to $newfile: !$\n";
while (defined ($msg = <RMAIL>))
{
    $msgno++;
    $msg =~ s/^\014\n//;
    $msg =~ s/\037$//;
    my @lines = split("\n", $msg);
    my $datestring = "";

  line:
    for (@lines)
    {
	if (m/^\s*$/)
	{
	    die "$whoami: $rmail: message $msgno: no Date header found\n";
	}
	elsif (m/^Date:(.*)$/)
	{
	    $datestring = &parse_date($1);
	    last line;
	}
    }

    if ($datestring eq "")
    {
	die "$whoami: INTERNAL ERROR: " .
	    "$rmail/$msgno: failed to compute datestring";
    }

    my $seqno = &get_seqno($datestring);
    my $name = "$datestring-$seqno";

    next if $check;

    if ($force || ($datestring lt $this_ym))
    {
	$old++;
	if (&duplicate($datestring, $seqno, $msg))
	{
	    print "$whoami: message $msgno is a duplicate.\n";
	}
	else
	{
	    open(MSG, ">$dir/$name") or
		die "$whoami: can't create $dir/$name: $!\n";
	    print MSG $msg or
		die "$whoami: saving message $dir/$name failed: $!\n";
	    close(MSG) or die "$whoami: closing $dir/$name failed: $!\n";
	    chmod 0444, "$dir/$name";
	}
    }
    else
    {
	print NEW "\014\n$msg\037" or
	    die "$whoami: can't append message to $newfile: $!\n";
	$new++;
    }
}

close(RMAIL);
close(NEW) or die "$whoami: close $newfile failed: $!\n";

if (! $check)
{
    if ($old == 0)
    {
	print "No old messages found in $rmail\n";
	unlink $newfile;
    }
    else
    {
	my $bak = "$rmail.bak";
	unlink $bak;
	if (rename $rmail, $bak)
	{
	    print "$whoami: renamed $rmail to $bak\n";
	}
	if ($new)
	{
	    if (rename $newfile, $rmail)
	    {
		print "$whoami: created new $rmail file\n";
	    }
	}
	else
	{
	    if (! $force)
	    {
		print "$whoami: no messages from this month found\n";
	    }
	    unlink $newfile;
	}
    }
}

sub parse_date
{
    my $date = shift;
    my $odate = $date;
    my $year = undef;
    my $month = undef;
    my $day = undef;
    my $hour = undef;
    my $minute = undef;
    my $second = undef;

    # Kill day of week, if any
    $date =~ s/($daynames)(, ?)?//i;

    if ($date =~ s/(\d{1,2}) +($monthnames) +(\d{2}(?:\d{2})?)//i)
    {
	$year = $3;
	$month = $2;
	$month =~ tr/A-Z/a-z/;
	$month = $monthnames{$month};
	$day = $1;
	if ($year =~ m/^\d{2}$/)
	{
	    $year += 1900;
	}
    }
    if ($date =~ s/(\d{1,2}):(\d{1,2})(:(\d{1,2}))?//)
    {
	$hour = $1;
	$minute = $2;
	$second = ((defined($3)) ? $4 : 0);
    }

    $date =~ s/[\+\-]?\d{4}//g;
    $date =~ s/[\+\-]([\d\+\-]+(:\d{1,2})?)//;
    $date =~ s/\([A-Z\s]+[A-Z]{3}\)//;
    $date =~ s/\(?[A-Z]{3}( [A-Z]{3})*([\+\-]([\d\+\-]+))?\)?//i;
    $date =~ s/(\w+\s+)+Time//i;
    $date =~ s/ [A-Z]{1,2}$//;
    $date =~ s/\"//g;
    $date =~ s/^\s+//;
    $date =~ s/\s*$//;

    if ($date ne "")
    {
	die "$whoami: $rmail: message $msgno: leftover from $odate: $date\n";
    }
    if (! ((defined($year) && defined($month) && defined($day) &&
	    defined($hour) && defined($minute) && defined($second))))
    {
	die "$whoami: $rmail: message $msgno: missting stuff from $odate\n";
    }
    sprintf("%04d-%02d-%02d-%02d:%02d:%02d",
	    $year, $month, $day, $hour, $minute, $second);
}

sub get_seqno
{
    my $datestring = shift;
    if (! exists $dir_entries{$datestring})
    {
	$dir_entries{$datestring} = 0;
    }
    ++$dir_entries{$datestring};
}

sub usage
{
    die "Usage: $whoami [ -check | -force ] rmailfile\n";
}

sub duplicate
{
    my ($datestring, $seqno, $msg) = @_;
    return 0 if $seqno == 1;

    my $i;
    for ($i = 1; $i < $seqno; $i++)
    {
	if (open(MSG, "<$dir/$datestring-$i"))
	{
	    local($/) = undef;
	    my $othermsg = <MSG>;
	    close(MSG);
	    return 1 if ($msg eq $othermsg);
	}
    }

    0;
}
