#!/usr/athena/bin/perl 
# $Header: /afs/sipb.mit.edu/machine/charon2/src/RCS/nightly_jobs.pl,v 1.3 1999/10/01 05:26:07 jweiss Exp $

# require 'syslog.pl';
use Sys::Syslog;
require 'errno.ph';
eval "require 'sys/resource.ph';";

# Takes a list of jobs and runs them one after the other, in a subshell,
# with a separate AFS process group for each one.  Each job also has an
# owner, and if the job has any output, it is sent to the owner.

$tf = "/var/tmp/nightly.$$";	# for output of each job
$jobfile = '/etc/nightly_jobs.conf';
$pidfile = '/etc/nightly_jobs.pid';
$debug = 0;
$usage = "Usage: $0 [ -conf conf-file ] [ -pidfile pid-file ]
	[ -start job-number] [ -debug ]";
$ONE_MINUTE = 60;
$ONE_HOUR = $ONE_MINUTE * 60;
$ONE_DAY = $ONE_HOUR * 24;
$wait_interval = $ONE_MINUTE;
$max_wait_interval = $ONE_HOUR;
$wait_maximum = $ONE_DAY;
$starting_job = 1;
$sendmail = "/usr/lib/sendmail";
if ( -f "/usr/sbin/sendmail" ) { $sendmail = "/usr/sbin/sendmail"; }

&openlog($0, 'pid', 'local1');

eval "setpriority(&PRIO_PROCESS, 0, 10);";
&syslog('warning', "Couldn't setpriority: %s", $@) if ($@);

while (($_ = $ARGV[0]) && /^-/) {
    shift;
    if (/^-conf$/) {
	if (! ($jobfile = shift)) {
	    &die("Missing argument to -conf command-line option.\n$usage\n");
	}
	next;
    }
    elsif (/^-pidfile$/) {
	if (! ($pidfile = shift)) {
	    &die("Missing argument to -pidfile command-line option.\n$usage\n");
	}
	next;
    }
    elsif (/^-start$/) {
	if (! ($starting_job = shift)) {
	    &die("Missing argument to $_ command-line option.\n$usage\n");
	}
	next;
    }
    elsif (/^-debug$/) {
	$debug++;
	next;
    }
    elsif (/^--$/) {
	last;
    }
    else {
	&die("Unknown command-line option $_.\n$usage\n");
    }
}

&die($usage) if (@ARGV > 0);

open(NEWPID, ">$pidfile.$$") || &die("Opening $pidfile.$$ for write: $!");
print NEWPID "$$\n" || &die("Writing to $pidfile.$$: $!");
close(NEWPID) || &die("Closing $pidfile.$$: $!");

while (! link("$pidfile.$$", $pidfile)) {
    &die("Link to $pidfile.$$ from $pidfile failed: $!") if ($! != &EEXIST);
    if (! ($oldpid = do $pidfile)) {
	if (-f $pidfile) {
	    &die("$pidfile is corrupt or unreadable (can't get PID from it)");
	}
	next;
    }
    if (! kill(0, $oldpid)) {
	&die("Couldn't signal $oldpid: $!") if ($! != &ESRCH);
	&syslog('warning', "Old run (pid %d) appears to have died.  Removing %s and continuing.", $oldpid, $pidfile);
	unlink($pidfile);
	last;
    }
    $important_only++;
    &die("Waited $wait_total seconds for PID $oldpid to die.  Giving up and exiting.")
	if ($wait_total > $wait_maximum);
    &syslog('notice', 
	    "Old run (pid %d) is still running.  Sleeping %d seconds.",
	    $oldpid, $wait_interval);
    sleep $wait_interval;
    $wait_total += $wait_interval;
    $wait_interval *= 2 if ($wait_interval < $max_wait_interval);
}

unlink("$pidfile.$$");
$we_own_the_lock++;

open(JOBS, $jobfile) || &die("Can't open $jobfile: $!");

while (&next_job()) {
    unlink($tf);

    if ($job_number < $starting_job) {
	&syslog('info', "Skipping job %d (%s) because it's lower than requested starting job %d",
		$job_number, $name, $starting_job);
	if ($debug) {
	    print STDERR "Skipping job $job_number ($name) because it's lower than requested starting job $starting_job\n";
	}
	next;
    }

    if ($important_only && !$important) {
	&syslog('notice', "Skipping unimportant job %d (%s)", 
		$job_number, $name);
	next;
    }

    &syslog('info', "Starting job %d (%s)", $job_number, $name);

    open(SHELL, "|$shell > $tf 2>&1") || &die("Running $shell > $tf: $!");

    print SHELL $job_text || &die("Writing to $shell: $!");

    close(SHELL);

    $status = $? >> 8;
    $output = (! -z $tf);

    if ($status || $output) {
	open(MAIL, "| $sendmail -t") || &die("Running $sendmail: $!");

	if ($debug) {
	    print STDERR
		"Job $job_number ($name) exited with status $status.\n"
		if ($status);
	    if ($output) {
		print STDERR
		    "Job $job_number ($name) had output, starting with:\n";
		open(HEAD, "head -10 $tf|");
		for (1..10) {
		    $_ = <HEAD>;
		    print "\t$_";
		}
		close(HEAD);
	    }
	}

	eval {
	    print(MAIL
		  "To: $owner\n",
		  "Subject: Report on your nightly job",
		  $name ? " ($name)\n\n" : "\n\n") || die;
	    print MAIL "Report on your job:\n\n$job_text\n" || die;
	    if ($status) {
		print MAIL "The job exited with status $status.\n\n" || die;
	    }
	    if ($output) {
		print MAIL "The job produced the following output:\n\n" || die;
		open(OUTPUT, $tf) || &die("Opening $tf: $!");
		while (<OUTPUT>) {
		    print MAIL $_ || die;
		}
		close(OUTPUT);
	    }
	};
	&die("Writing to $sendmail: $!") if ($@);
	close(MAIL);
	&die("$sendmail exited with non-zero status (job number $job_number ($name))")
	    if ($?);
    }

    &syslog('info', "Finished job %d (%s)", $job_number, $name);
}

close(JOBS);

unlink($pidfile, $tf);

sub next_job {
    $owner = "root";
    $job_text = "";
    $name = undef;
    $job_number++;
    $prefix = "/\^/";
    $terminator = "/\^\$/";
    $shell = "/bin/sh";
    $important = 0;
    local($_);

    while (<JOBS>) {
	next if (/^\#/ || /^\s*$/);
	chop;
	eval;
	&die("Error in Perl configuration line $_: $@") if ($@);
	last;
    }
		 
    while (<JOBS>) {
	$ret = eval $terminator;
	&die("Error evaluating terminator \"$terminator\" for job $job_number ($name): $@") if ($@);
	if ($ret) {
	    last;
	}
	$ret = eval $prefix;
	&die("Error evaluating prefix \"$prefix\" for job $job_number ($name): $@") if ($@);
	if ($ret) {
	    $job_text .= $_;
	    next;
	}
    }

    if ($debug && $job_text) {
	local($text) = $job_text;
	local($*) = 1;
	$text =~ s/^/\t/g;

	print STDERR <<EOF;
owner		$owner
name		$name
job number	$job_number
prefix		$prefix
terminator	$terminator
shell		$shell
important	$important
job text
$text
EOF
    }

    return $job_text;
}

sub die {
    local($error) = @_;

    unlink($tf);

    unlink("$pidfile.$$");
    unlink($pidfile) if ($we_own_the_lock);

    &syslog('err', "%s", $error);
    
    open(MAIL, "|$sendmail -t") || die "Running $sendmail: $!\n";

    print MAIL "Subject: Error from $0\nTo: root\n\n$error\n";

    close(MAIL);

    die "$0: $error\n";
}
