package lopip;



#############
#           #
# CONSTANTS #
#           #
#############

$valid_char = '*';		# marks field that should be considered valid
$hidden_char = '+';		# marks field that should not be displayed on
				# clean output
$special_char = '&';		# marks field that is "special"
$important_char = '!';		# marks a field whose change is
				# important and should show up in the
				# diff posting

$special_chars = "$valid_char$hidden_char$special_char$important_char";
				# all special characters that can appear at the
				# beginning of fields, for use in regular
				# expressions

$start_time = time();		# used to generate new IDs

@aux_fields = (".*archive-name", "date", "summary", "posting-record");

$debug = 0;


################
#              #
# INPUT/OUTPUT #
#              #
################

# Read data into an array of records from a file (whose name is passed
# in) and an auxiliary file (whose name is derived from the main
# file).  The array of records is returned.

sub new_id {
    "$$." . $start_time++;
}

sub read_data {
    local($file) = $_[0];
    local($/) = "\n\n";
    local($*) = 0;
    local(@records);
    local(%main);
    local($id);

    open(MAIN, $file) || die "Opening $file: $!.\n";

    while (<MAIN>) {
	# Strip all blank lines from the beginning of the record, and
 	# make sure that there's a single newline at the end of the
 	# record.  Then, toss any records that consist of only a
 	# single newline.  This prevents there from being problems
 	# when there are extra newlines in between records or at the
 	# end of the file.
	s/^\n*//;
	s/\n*$/\n/;
	next if ($_ eq "\n");
	die "Multiple ID fields in file $file, record:\n$_" if (&is_multiple('id', $_));
	if (! ($id = &field_value($_, 'id'))) {
	    $id = &new_id;
	    $_ = &set_field_internal($_, 'ID', $id, 0, 0);
	    $_ = &mark_hidden($_, 'ID');
	}
	push(@records, $_);
	$main{$id} = @records - 1;
	if ($debug && ((@records % 100) == 0)) {
	    print STDERR @records + 0, "\n";
	}
    }

    close(MAIN);

    open(AUX, "$file.aux") || die "Opening $file.aux: $!.\n";

    while (<AUX>) {
	s/^\n*//;
	s/\n*$/\n/;
	next if ($_ eq "\n");
	if (! ($id = &field_value($_, 'id'))) {
	    die "Missing ID in auxiliary file $file.aux entry:\n$_";
	}
	next if (! defined($main{$id}));
	($_) = &strip_field_lines($_, 'id');
	$records[$main{$id}] = &merge_record($records[$main{$id}], $_);
    }

    close(AUX);

    @records;
}

# Write to the specified file, and to its associated auxiliary file,
# the specified array of records.  The data being written is PASSED BY
# REFERENCE.

sub write_data {
    local($file, *records) = @_;
    local($main, $aux, $id);
    local(%ids);
    local($*) = 1;
    local($newmain) = "$file.new$$";
    local($newaux) = "$file.aux.new$$";
    local($_);

    open(MAIN, ">$newmain") || (warn("Opening $newmain for write: $!.\n"),
			     goto bad);
    open(AUX, ">$newaux") || 
	(warn("Opening $newaux for write: $!.\n"),
	 goto bad);

    for (@records) {
	if (&is_multiple('id', $_)) {
	    warn "Multiple ID fields in file $file, record:\n$_";
	    goto bad;
	}

	(($_, $id) = &check_id($_)) || goto bad;

	if ($ids{$id}++) {
	    warn "Duplicate ID $id in data in file $file!\n";
	    goto bad;
	}

	($main, $aux) = &split_record($_);
	return undef if (! $main);

	print MAIN $main, "\n" || (warn("Writing $newmain: $!.\n"),
				   goto bad);
	if ($aux) {
	    $aux = &set_field_internal($aux, 'ID', $id, 0, 0);
	    $aux = &mark_hidden($aux, 'ID');
	    print AUX $aux, "\n" ||
		(warn("Writing $newaux: $!.\n"), goto bad);
	}
    }

    close(MAIN) || (warn("Closing $newmain: $!.\n"), goto bad);
    close(AUX) || (warn("Closing $newaux: $!.\n"), goto bad);

    unlink("$file.old", "$file.aux.old");
    link($file, "$file.old");
    link("$file.aux", "$file.aux.old");

    rename($newmain, $file) ||
	(warn("Renaming $newmain to $file: $!.\n"), goto bad);
    rename($newaux, "$file.aux") ||
	(warn("Renaming $newaux to $file.aux: $!.\n"), goto bad);

    return 1;

  bad:
    close(MAIN); close(AUX); # may already be closed; we don't care
    unlink($newmain, $newaux);
    undef;
}

# Write to the specified file handle the specified records, in a format
# suitable for the List of Periodic Informational Postings.
# 
# Data is PASSED BY REFERENCE.

sub posting_write {
    local($fhandle, *records) = @_;
    local($*) = 0;
    local($_, $i);
    
    foreach $i (@records) {
	$_ = $i;
	s/\n--\n/\n/;
	next if (&is_special($_, 'subject'));
	while ($_ ne "") {
	    if (/^[$special_chars]*[$hidden_char]/) {
		s/.*\n//;
		while (s/^[ \t].*\n//) { }
	    }
	    else {
		s/^[$special_chars]+//;
		s/(.*\n)//;
		print($fhandle " $1");
		print($fhandle " $1") while (s/^([ \t].*\n)//);
	    }
	}
	print($fhandle "\n");
    }
}



#####################
#                   #
# DATA REFORMATTING #
#                   #
#####################

# Takes an array of records and sorts them in place.
# The program prints an error if the same sort key is encountered
# more than once.
# 
# The array is PASSED BY REFERENCE.

sub sort_data {
    local(*records) = @_;
    local(%records);
    local($key);
    local($unique) = 0;

    while ($_ = pop(@records)) {
	$key = &sort_key($_);
	if ($records{$key}) {
	    warn "Duplicate sort key encountered in file $file, record:\n$_\n";
	    $key .= $unique++;
	}
	$records{$key} = $_;
    }

    foreach $key (sort keys(%records)) {
	push(@records, $records{$key});
    }
}

# Return the sort key of a specified entry.

sub sort_key {
    &field_value($_, "newsgroups") . "\n" . &field_value($_, "subject");
}

sub split_record {
    local(@split) = split(/^--\n/, $_[0]);

    if (@split > 2) {
	warn "Too many \"--\" separators in:\n$_[0]";
	return ();
    }
    else {
	@split;
    }
}

sub merge_record {
    local($main, $aux) = @_;

    if ($aux) {
	$main . "--\n" . $aux;
    }
    else {
	$main;
    }
}

# Check if there's an ID, add one if there needs to be one, return it

sub check_id {
    local($record) = @_;
    local($main, $aux) = &split_record($record);
    local($id);

    return undef if (! $main);

    $id = &field_value($main, 'id');
    $auxid = &field_value($aux, 'id');

    if (! $id) {
	if ($auxid) {
	    warn "ID field in auxiliary part instead of main part:\n$record";
	    return ();
	}
	$id = &new_id;
	$record = &set_field_internal($record, 'ID', $id, 0, 0);
	$record = &mark_hidden($record, 'ID');
	($record, $id);
    }
    elsif ($auxid) {
	warn "Duplicate ID field in auxiliary part:\n$record";
	return ();
    }
    else {
	($record, $id);
    }
}
    
# Reformat a series of records, passed in an array by reference.

sub reformat_data {
    local(*records) = @_;
    local($i);

    for ($i = 0; $i < @records; $i++) {
	($records[$i] = &reformat_entry($records[$i])) || return undef;
    }
    1;
}

# Reformat an entry in place, sorting and adding new fields as necessary.
# 
# Dies if the four required fields (from, newsgroups, subject, frequency)
# are not present.

sub reformat_entry {
    local($entry) = @_;
    local(@from, @newsgroups, @subject, @frequency, $id);
    local($main, $aux);
    local($*) = 1;
    local($_, $af,$newsgroups);

    (($entry, $id) = &check_id($entry)) || return undef;

    ($main, $aux) = &split_record($entry);

    return undef if (! $main);

    for (&lopip'field_names($main)) {
	for $af (@aux_fields) {
	    if (/^$af$/) {
		warn "Auxiliary data field \"$_\" in main part of:\n$entry";
		return undef;
	    }
	}
    }

    aux_field: for (&lopip'field_names($aux)) {
	for $af (@aux_fields) {
	    next aux_field if (/^$af$/);
	}
	warn "Non-auxiliary data field \"$_\" in auxiliary part of:\n$entry";
	return undef;
    }

    for ('from', 'newsgroups', 'subject', 'frequency') {
	if (&is_hidden($main, $_)) {
	    warn "Required $_ field should not be hidden in record:\n$main\n";
	    return undef;
	}
    }
    $newsgroups = &field_value($main, 'newsgroups');
    if ($newsgroups =~ /\s/) {
	warn "Newsgroups line contains whitespace in:\n$entry";
	return undef;
    }
    if ($newsgroups =~ /,$/) {
	warn "Newsgroups line hs comma at end in:\n$entry";
	return undef;
    }
    ($main, @from) = &strip_fields_lines($main, 'from');
    ($main, @newsgroups) = &strip_fields_lines($main, 'newsgroups');
    ($main, @subject) = &strip_fields_lines($main, 'subject');
    ($main, @frequency) = &strip_fields_lines($main, 'frequency');

    # Place the four required field in the proper order, and make sure
    # they exist.

    if (! (@from && @newsgroups && @subject && @frequency)) {
	warn "Missing required field in main part of:\n$entry";
	return undef;
    }

    $main = join("\n", @newsgroups, @subject, @from, @frequency) . "\n" .
	$main;

    $entry = &merge_record($main, $aux);

    # Make sure there is exactly one space after colons, and no extra
    # whitespace at line endings.  However, don't remove whitespace
    # from lines that are completely blank other than the whitespace.

    $entry =~ s/^([^ \t:]+:)[ \t]*/$1 /g;
    $entry =~ s/([^ \t\n])[ \t]+$/$1/g;

    $entry;
}



###################
#                 #
# MISC. UTILITIES #
#                 #
###################

sub protect {
    local($str) = @_;

    $str =~ tr/ \n\011\140\047\042\134\057/_/;

    return($str);
}

sub strip_special {
    local($record) = @_;
    local($*) = 1;

    $record =~ s/^[$special_chars]+//g;
    $record;
}

    

###################
#                 #
# FIELD FUNCTIONS #
#                 #
###################

# This function has to be a bit hairy to deal with multi-line fields.
# It returns the field value, without its final newline, with or without
# the field name and flag characters associated with it, and with or
# without stripping the field from the record before returning it.
# 
# If the "multiple" flag is true, then a list of matches is returned
# (i.e., the return is ($record, $match1, $match2, ..., $matchn).
# Otherwise, only a single match is returned (and possibly stripped),
# even if there are multiple matches.

sub field_value_internal {
    local($record, $field, $strip, $leave_tag, $multiple, $which) = @_;
    local($*);
    local($after, $match, @matches);
    local($newrecord);

    if ($multiple && $which) {
	die "Internal error!\n\tfield_value_internal called with both \$multiple and \$which set.\n";
    }
    if ($which) {
	$which--;
    }
    else {
	$which = undef; # in case it was 0 instead of undef coming in
    }

    $field =~ s/(\W)/\\$1/g;

    while (# perl -c -w warning can be ignored, we really do want to
	   # perform an assignment in this conditional predicate....
	   ($* = 1) &&
	   ($record =~ /^([$special_chars]+|)($field:[ \t]*)(.*\n)/i)) {
	$newrecord .= $`; #`
	$match = $&;
	$record = $'; #'

	$* = 0;
	while ($record =~ s/^([ \t]+.*\n)//) {
	    $match .= $&;
	}

	if (!$strip || (defined($which) && ($which != @matches))) {
	    $newrecord .= $match;
	}

	($match =~ s/^[^:]+:\s*//) if (! $leave_tag);
	chop $match if ($match);

	push(@matches, $match);

	last if (! ($multiple || defined($which)));
    }

    if (defined($which)) {
	($newrecord . $record, $matches[$which]);
    }
    else {
	($newrecord . $record, @matches);
    }
}

# All of these functions DO NOT INCLUDE the field's trailing newline in
# their return value.

# Return the field value in the specified entry of the specified field.
# The return value has its field name stripped off.

sub field_value {
    local($record, $field) = @_;
    local(@foo) = &field_value_internal($record, $field);
    $foo[1];
}

sub field_values {
    local($record, $field) = @_;
    local(@foo) = &field_value_internal($record, $field, undef, undef,
					"multiple");
    shift @foo;
    @foo;
}

sub field_value_n {
    local($record, $field, $which) = @_;
    local(@foo) = &field_value_internal($record, $field, undef, undef, undef,
					$which);
    $foo[1];
}

# Return the field as above, but strip it from the entry when doing so.

sub strip_field {
    local($record, $field) = @_;
    &field_value_internal($record, $field, "strip");
}

sub strip_fields {
    local($record, $field) = @_;
    &field_value_internal($record, $field, "strip", undef, "multiple");
}

sub strip_field_n {
    local($record, $field, $which) = @_;
    &field_value_internal($record, $field, "strip", undef, undef, $which);
}

# Return the field, without stripping, but include the special characters
# and field name.

sub field_lines {
    local($record, $field) = @_;
    local(@foo) = &field_value_internal($record, $field, undef, "leave_tag");
    $foo[1];
}

sub fields_lines {
    local($record, $field) = @_;
    local(@foo) = &field_value_internal($record, $field, undef, "leave_tag",
					"multiple");
}

sub field_lines_n {
    local($record, $field, $which) = @_;
    local(@foo) = &field_value_internal($record, $field, undef, "leave_tag",
					undef, $which);
    $foo[1];
}

# Strip as above, but include the special characters and field name 
# in the field value that is returned.

sub strip_field_lines {
    local($record, $field) = @_;
    &field_value_internal($record, $field, "strip", "leave_tag");
}

sub strip_fields_lines {
    local($record, $field) = @_;
    &field_value_internal($record, $field, "strip", "leave_tag", "multiple");
}

sub strip_field_lines_n {
    local($record, $field, $which) = @_;
    &field_value_internal($record, $field, "strip", "leave_tag", undef, 
			  $which);
}




# Returns the Single-Subject fields, without their trailing newlines

sub single_subjects {
    local($record) = @_;
    &field_values($record, 'Single-Subject');
}

# Returns the Dont-Match fields, without their trailing newlines

sub dont_subjects {
    local($record) = @_;
    &field_values($record, 'Dont-Match');
}




# Field marking and unmarking

sub mark_or_unmark_field {
    local($record, $field, $char, $which, $mark) = @_;
    local($*) = 1;
    local($chars);
    local($rest);
    local($before, $after);
    local($newrecord);

    $field =~ s/(\W)/\\$1/g;

    $which = 1 if (! $which);

    # This would be much cleaner if we could use /.../gi.
    # Unfortunately, we can't because of a bug in perl 4.036. XXX

    while ($which) {
	if ($record =~ /^([$special_chars]*)($field:.*\n)/i) {
	    if ($which == 1) {
		$before = $`; #`
		$after = $'; #'
		$chars = $1;
		$rest = $2;
		
		$chars =~ s/[$char]//g;
		$chars .= $char if $mark;

		$record = $before . $chars . $rest . $after;
		last;
	    }
	    else {
		local($*) = 0;
		$newrecord .= $` . $&; #`
		$record = $';
		while ($record =~ s/^[ \t]+.*\n//) {
		    $newrecord .= $&;
		}
		$which--;
	    }
	}
	else {
	    last;
	}
    }

    warn "Error " . ($mark ? "" : "un") . "marking \"$field\" occurrence $which with $char in:\n$record"
	if (! $which);

    $newrecord . $record;
}

sub mark_field {
    local($record, $field, $char, $which) = @_;
    &mark_or_unmark_field($record, $field, $char, $which, 1);
}

sub unmark_field {
    local($record, $field, $char, $which) = @_;
    &mark_or_unmark_field($record, $field, $char, $which, undef);
}

sub check_field {
    local($record, $field, $char, $which) = @_;
    local($*) = 1;

    if ($which) {
	$record = &field_lines_n($record, $field);
    }

    $field =~ s/(\W)/\\$1/g;

    $record =~ /^[$special_chars]*[${char}][$special_chars]*${field}:/i;
}



sub mark_valid {
    local($record, $field, $which) = @_;
    &mark_field($record, $field, $valid_char, $which);
}

sub mark_invalid {
    local($record, $field, $which) = @_;
    &unmark_field($record, $field, $valid_char, $which);
}

sub is_valid {
    local($record, $field, $which) = @_;
    &check_field($record, $field, $valid_char, $which);
}

sub mark_hidden {
    local($record, $field, $which) = @_;
    &mark_field($record, $field, $hidden_char, $which);
}

sub mark_unhidden {
    local($record, $field, $which) = @_;
    &unmark_field($record, $field, $hidden_char, $which);
}

sub is_hidden {
    local($record, $field, $which) = @_;
    &check_field($record, $field, $hidden_char, $which);
}

sub mark_special {
    local($record, $field, $which) = @_;
    &mark_field($record, $field, $special_char, $which);
}

sub mark_normal {
    local($record, $field, $which) = @_;
    &unmark_field($record, $field, $special_char, $which);
}

sub is_special {
    local($record, $field, $which) = @_;
    &check_field($record, $field, $special_char, $which);
}

sub mark_important {
    local($record, $field, $which) = @_;
    &mark_field($record, $field, $important_char, $which);
}

sub mark_unimportant {
    local($record, $field, $which) = @_;
    &unmark_field($record, $field, $important_char, $which);
}

sub is_important {
    local($record, $field, $which) = @_;
    &check_field($record, $field, $important_char, $which);
}




sub set_field_internal {
    local($record, $name, $value, $inaux, $reformat, $which) = @_;
    local($*) = 1;
    local($old_val);
    local($flags);
    local($main, $aux);

    if ((! $which) && (&is_multiple($record, $name))) {
	die "set_field_internal called without \$which on multiple field!\n";
    }

    ($record, $old_val) = &strip_field_lines_n($record, $name, 
					       $which ? $which : 1);

    $flags = $1 if ($old_val =~ /^([$special_chars]+)/);

    ($main, $aux) = split(/^--\n/, $record, 2);

    if ($inaux) {
	$aux .= "$flags$name: $value\n";
    }
    else {
	$main .= "$flags$name: $value\n";
    }

    if ($aux) {
	$record = $main . "--\n" . $aux;
    }
    else {
	$record = $main;
    }

    (($record = &reformat_entry($record)) || return undef) if ($reformat);
    $record;
}

sub set_field {
    &set_field_internal(@_, 1);
}

# Args: $record, $name, $value, $inaux, $which

sub set_field_n {
    &set_field_internal($_[0], $_[1], $_[2], $_[3], 1, $_[4]);
}

sub new_field {
    local($record, $name, $value, $inaux, $reformat) = @_;

    if ($inaux) {
	$record .= "$name: $value\n";
    }
    else {
	local($*) = 1;
	local($main, $aux) = split(/^--\n/, $record, 2);
	$main .= "$name: $value\n";
	$record = $main . "--\n" . $aux;
    }

    (($record = &reformat_entry($record)) || return undef) if ($reformat);
    $record;
}




sub archive_names {
    local($*) = 1;
    local($_) = @_;

    (/^[$special_chars]*([A-Za-z][^:]*-archive-name):/gi,
     /^[$special_chars]*(archive-name):/gi);
}

sub field_names {
    local($*) = 1;
    local($_) = @_;
    local(@fields);

    while (/^[$special_chars]*([^\s]+):/g) {
	push(@fields, $1);
	$fields[$#fields] =~ tr/A-Z/a-z/;
    }

    @fields;
}

sub is_multiple {
    local($field, $record) = @_;
    local($val);

    ($record, $val) = &strip_field_lines($record, $field);
    if ($val) {
	($record, $val) = &strip_field_lines($record, $field);
	if ($val) {
	    return 1;
	}
    }
    undef;
}
			

###########
#         #
# TESTING #
#         #
###########

#@records = &read_data("../data/list");

1;
