) { # Strip all blank lines from the beginning of the record, and # make sure that there's a single newline at the end of the # record. Then, toss any records that consist of only a # single newline. This prevents there from being problems # when there are extra newlines in between records or at the # end of the file. s/^\n*//; s/\n*$/\n/; next if ($_ eq "\n"); die "Multiple ID fields in file $file, record:\n$_" if (&is_multiple('id', $_)); if (! ($id = &field_value($_, 'id'))) { $id = &new_id; $_ = &set_field_internal($_, 'ID', $id, 0, 0); $_ = &mark_hidden($_, 'ID'); } push(@records, $_); $main{$id} = @records - 1; if ($debug && ((@records % 100) == 0)) { print STDERR @records + 0, "\n"; } } close(MAIN); open(AUX, "$file.aux") || die "Opening $file.aux: $!.\n"; while () { s/^\n*//; s/\n*$/\n/; next if ($_ eq "\n"); if (! ($id = &field_value($_, 'id'))) { die "Missing ID in auxiliary file $file.aux entry:\n$_"; } next if (! defined($main{$id})); ($_) = &strip_field_lines($_, 'id'); $records[$main{$id}] = &merge_record($records[$main{$id}], $_); } close(AUX); @records; } # Write to the specified file, and to its associated auxiliary file, # the specified array of records. The data being written is PASSED BY # REFERENCE. sub write_data { local($file, *records) = @_; local($main, $aux, $id); local(%ids); local($*) = 1; local($newmain) = "$file.new$$"; local($newaux) = "$file.aux.new$$"; local($_); open(MAIN, ">$newmain") || (warn("Opening $newmain for write: $!.\n"), goto bad); open(AUX, ">$newaux") || (warn("Opening $newaux for write: $!.\n"), goto bad); for (@records) { if (&is_multiple('id', $_)) { warn "Multiple ID fields in file $file, record:\n$_"; goto bad; } (($_, $id) = &check_id($_)) || goto bad; if ($ids{$id}++) { warn "Duplicate ID $id in data in file $file!\n"; goto bad; } ($main, $aux) = &split_record($_); return undef if (! $main); print MAIN $main, "\n" || (warn("Writing $newmain: $!.\n"), goto bad); if ($aux) { $aux = &set_field_internal($aux, 'ID', $id, 0, 0); $aux = &mark_hidden($aux, 'ID'); print AUX $aux, "\n" || (warn("Writing $newaux: $!.\n"), goto bad); } } close(MAIN) || (warn("Closing $newmain: $!.\n"), goto bad); close(AUX) || (warn("Closing $newaux: $!.\n"), goto bad); unlink("$file.old", "$file.aux.old"); link($file, "$file.old"); link("$file.aux", "$file.aux.old"); rename($newmain, $file) || (warn("Renaming $newmain to $file: $!.\n"), goto bad); rename($newaux, "$file.aux") || (warn("Renaming $newaux to $file.aux: $!.\n"), goto bad); return 1; bad: close(MAIN); close(AUX); # may already be closed; we don't care unlink($newmain, $newaux); undef; } # Write to the specified file handle the specified records, in a format # suitable for the List of Periodic Informational Postings. # # Data is PASSED BY REFERENCE. sub posting_write { local($fhandle, *records) = @_; local($*) = 0; local($_, $i); foreach $i (@records) { $_ = $i; s/\n--\n/\n/; next if (&is_special($_, 'subject')); while ($_ ne "") { if (/^[$special_chars]*[$hidden_char]/) { s/.*\n//; while (s/^[ \t].*\n//) { } } else { s/^[$special_chars]+//; s/(.*\n)//; print($fhandle " $1"); print($fhandle " $1") while (s/^([ \t].*\n)//); } } print($fhandle "\n"); } } ##################### # # # DATA REFORMATTING # # # ##################### # Takes an array of records and sorts them in place. # The program prints an error if the same sort key is encountered # more than once. # # The array is PASSED BY REFERENCE. sub sort_data { local(*records) = @_; local(%records); local($key); local($unique) = 0; while ($_ = pop(@records)) { $key = &sort_key($_); if ($records{$key}) { warn "Duplicate sort key encountered in file $file, record:\n$_\n"; $key .= $unique++; } $records{$key} = $_; } foreach $key (sort keys(%records)) { push(@records, $records{$key}); } } # Return the sort key of a specified entry. sub sort_key { &field_value($_, "newsgroups") . "\n" . &field_value($_, "subject"); } sub split_record { local(@split) = split(/^--\n/, $_[0]); if (@split > 2) { warn "Too many \"--\" separators in:\n$_[0]"; return (); } else { @split; } } sub merge_record { local($main, $aux) = @_; if ($aux) { $main . "--\n" . $aux; } else { $main; } } # Check if there's an ID, add one if there needs to be one, return it sub check_id { local($record) = @_; local($main, $aux) = &split_record($record); local($id); return undef if (! $main); $id = &field_value($main, 'id'); $auxid = &field_value($aux, 'id'); if (! $id) { if ($auxid) { warn "ID field in auxiliary part instead of main part:\n$record"; return (); } $id = &new_id; $record = &set_field_internal($record, 'ID', $id, 0, 0); $record = &mark_hidden($record, 'ID'); ($record, $id); } elsif ($auxid) { warn "Duplicate ID field in auxiliary part:\n$record"; return (); } else { ($record, $id); } } # Reformat a series of records, passed in an array by reference. sub reformat_data { local(*records) = @_; local($i); for ($i = 0; $i < @records; $i++) { ($records[$i] = &reformat_entry($records[$i])) || return undef; } 1; } # Reformat an entry in place, sorting and adding new fields as necessary. # # Dies if the four required fields (from, newsgroups, subject, frequency) # are not present. sub reformat_entry { local($entry) = @_; local(@from, @newsgroups, @subject, @frequency, $id); local($main, $aux); local($*) = 1; local($_, $af,$newsgroups); (($entry, $id) = &check_id($entry)) || return undef; ($main, $aux) = &split_record($entry); return undef if (! $main); for (&lopip'field_names($main)) { for $af (@aux_fields) { if (/^$af$/) { warn "Auxiliary data field \"$_\" in main part of:\n$entry"; return undef; } } } aux_field: for (&lopip'field_names($aux)) { for $af (@aux_fields) { next aux_field if (/^$af$/); } warn "Non-auxiliary data field \"$_\" in auxiliary part of:\n$entry"; return undef; } for ('from', 'newsgroups', 'subject', 'frequency') { if (&is_hidden($main, $_)) { warn "Required $_ field should not be hidden in record:\n$main\n"; return undef; } } $newsgroups = &field_value($main, 'newsgroups'); if ($newsgroups =~ /\s/) { warn "Newsgroups line contains whitespace in:\n$entry"; return undef; } if ($newsgroups =~ /,$/) { warn "Newsgroups line hs comma at end in:\n$entry"; return undef; } ($main, @from) = &strip_fields_lines($main, 'from'); ($main, @newsgroups) = &strip_fields_lines($main, 'newsgroups'); ($main, @subject) = &strip_fields_lines($main, 'subject'); ($main, @frequency) = &strip_fields_lines($main, 'frequency'); # Place the four required field in the proper order, and make sure # they exist. if (! (@from && @newsgroups && @subject && @frequency)) { warn "Missing required field in main part of:\n$entry"; return undef; } $main = join("\n", @newsgroups, @subject, @from, @frequency) . "\n" . $main; $entry = &merge_record($main, $aux); # Make sure there is exactly one space after colons, and no extra # whitespace at line endings. However, don't remove whitespace # from lines that are completely blank other than the whitespace. $entry =~ s/^([^ \t:]+:)[ \t]*/$1 /g; $entry =~ s/([^ \t\n])[ \t]+$/$1/g; $entry; } ################### # # # MISC. UTILITIES # # # ################### sub protect { local($str) = @_; $str =~ tr/ \n\011\140\047\042\134\057/_/; return($str); } sub strip_special { local($record) = @_; local($*) = 1; $record =~ s/^[$special_chars]+//g; $record; } ################### # # # FIELD FUNCTIONS # # # ################### # This function has to be a bit hairy to deal with multi-line fields. # It returns the field value, without its final newline, with or without # the field name and flag characters associated with it, and with or # without stripping the field from the record before returning it. # # If the "multiple" flag is true, then a list of matches is returned # (i.e., the return is ($record, $match1, $match2, ..., $matchn). # Otherwise, only a single match is returned (and possibly stripped), # even if there are multiple matches. sub field_value_internal { local($record, $field, $strip, $leave_tag, $multiple, $which) = @_; local($*); local($after, $match, @matches); local($newrecord); if ($multiple && $which) { die "Internal error!\n\tfield_value_internal called with both \$multiple and \$which set.\n"; } if ($which) { $which--; } else { $which = undef; # in case it was 0 instead of undef coming in } $field =~ s/(\W)/\\$1/g; while (# perl -c -w warning can be ignored, we really do want to # perform an assignment in this conditional predicate.... ($* = 1) && ($record =~ /^([$special_chars]+|)($field:[ \t]*)(.*\n)/i)) { $newrecord .= $`; #` $match = $&; $record = $'; #' $* = 0; while ($record =~ s/^([ \t]+.*\n)//) { $match .= $&; } if (!$strip || (defined($which) && ($which != @matches))) { $newrecord .= $match; } ($match =~ s/^[^:]+:\s*//) if (! $leave_tag); chop $match if ($match); push(@matches, $match); last if (! ($multiple || defined($which))); } if (defined($which)) { ($newrecord . $record, $matches[$which]); } else { ($newrecord . $record, @matches); } } # All of these functions DO NOT INCLUDE the field's trailing newline in # their return value. # Return the field value in the specified entry of the specified field. # The return value has its field name stripped off. sub field_value { local($record, $field) = @_; local(@foo) = &field_value_internal($record, $field); $foo[1]; } sub field_values { local($record, $field) = @_; local(@foo) = &field_value_internal($record, $field, undef, undef, "multiple"); shift @foo; @foo; } sub field_value_n { local($record, $field, $which) = @_; local(@foo) = &field_value_internal($record, $field, undef, undef, undef, $which); $foo[1]; } # Return the field as above, but strip it from the entry when doing so. sub strip_field { local($record, $field) = @_; &field_value_internal($record, $field, "strip"); } sub strip_fields { local($record, $field) = @_; &field_value_internal($record, $field, "strip", undef, "multiple"); } sub strip_field_n { local($record, $field, $which) = @_; &field_value_internal($record, $field, "strip", undef, undef, $which); } # Return the field, without stripping, but include the special characters # and field name. sub field_lines { local($record, $field) = @_; local(@foo) = &field_value_internal($record, $field, undef, "leave_tag"); $foo[1]; } sub fields_lines { local($record, $field) = @_; local(@foo) = &field_value_internal($record, $field, undef, "leave_tag", "multiple"); } sub field_lines_n { local($record, $field, $which) = @_; local(@foo) = &field_value_internal($record, $field, undef, "leave_tag", undef, $which); $foo[1]; } # Strip as above, but include the special characters and field name # in the field value that is returned. sub strip_field_lines { local($record, $field) = @_; &field_value_internal($record, $field, "strip", "leave_tag"); } sub strip_fields_lines { local($record, $field) = @_; &field_value_internal($record, $field, "strip", "leave_tag", "multiple"); } sub strip_field_lines_n { local($record, $field, $which) = @_; &field_value_internal($record, $field, "strip", "leave_tag", undef, $which); } # Returns the Single-Subject fields, without their trailing newlines sub single_subjects { local($record) = @_; &field_values($record, 'Single-Subject'); } # Returns the Dont-Match fields, without their trailing newlines sub dont_subjects { local($record) = @_; &field_values($record, 'Dont-Match'); } # Field marking and unmarking sub mark_or_unmark_field { local($record, $field, $char, $which, $mark) = @_; local($*) = 1; local($chars); local($rest); local($before, $after); local($newrecord); $field =~ s/(\W)/\\$1/g; $which = 1 if (! $which); # This would be much cleaner if we could use /.../gi. # Unfortunately, we can't because of a bug in perl 4.036. XXX while ($which) { if ($record =~ /^([$special_chars]*)($field:.*\n)/i) { if ($which == 1) { $before = $`; #` $after = $'; #' $chars = $1; $rest = $2; $chars =~ s/[$char]//g; $chars .= $char if $mark; $record = $before . $chars . $rest . $after; last; } else { local($*) = 0; $newrecord .= $` . $&; #` $record = $'; while ($record =~ s/^[ \t]+.*\n//) { $newrecord .= $&; } $which--; } } else { last; } } warn "Error " . ($mark ? "" : "un") . "marking \"$field\" occurrence $which with $char in:\n$record" if (! $which); $newrecord . $record; } sub mark_field { local($record, $field, $char, $which) = @_; &mark_or_unmark_field($record, $field, $char, $which, 1); } sub unmark_field { local($record, $field, $char, $which) = @_; &mark_or_unmark_field($record, $field, $char, $which, undef); } sub check_field { local($record, $field, $char, $which) = @_; local($*) = 1; if ($which) { $record = &field_lines_n($record, $field); } $field =~ s/(\W)/\\$1/g; $record =~ /^[$special_chars]*[${char}][$special_chars]*${field}:/i; } sub mark_valid { local($record, $field, $which) = @_; &mark_field($record, $field, $valid_char, $which); } sub mark_invalid { local($record, $field, $which) = @_; &unmark_field($record, $field, $valid_char, $which); } sub is_valid { local($record, $field, $which) = @_; &check_field($record, $field, $valid_char, $which); } sub mark_hidden { local($record, $field, $which) = @_; &mark_field($record, $field, $hidden_char, $which); } sub mark_unhidden { local($record, $field, $which) = @_; &unmark_field($record, $field, $hidden_char, $which); } sub is_hidden { local($record, $field, $which) = @_; &check_field($record, $field, $hidden_char, $which); } sub mark_special { local($record, $field, $which) = @_; &mark_field($record, $field, $special_char, $which); } sub mark_normal { local($record, $field, $which) = @_; &unmark_field($record, $field, $special_char, $which); } sub is_special { local($record, $field, $which) = @_; &check_field($record, $field, $special_char, $which); } sub mark_important { local($record, $field, $which) = @_; &mark_field($record, $field, $important_char, $which); } sub mark_unimportant { local($record, $field, $which) = @_; &unmark_field($record, $field, $important_char, $which); } sub is_important { local($record, $field, $which) = @_; &check_field($record, $field, $important_char, $which); } sub set_field_internal { local($record, $name, $value, $inaux, $reformat, $which) = @_; local($*) = 1; local($old_val); local($flags); local($main, $aux); if ((! $which) && (&is_multiple($record, $name))) { die "set_field_internal called without \$which on multiple field!\n"; } ($record, $old_val) = &strip_field_lines_n($record, $name, $which ? $which : 1); $flags = $1 if ($old_val =~ /^([$special_chars]+)/); ($main, $aux) = split(/^--\n/, $record, 2); if ($inaux) { $aux .= "$flags$name: $value\n"; } else { $main .= "$flags$name: $value\n"; } if ($aux) { $record = $main . "--\n" . $aux; } else { $record = $main; } (($record = &reformat_entry($record)) || return undef) if ($reformat); $record; } sub set_field { &set_field_internal(@_, 1); } # Args: $record, $name, $value, $inaux, $which sub set_field_n { &set_field_internal($_[0], $_[1], $_[2], $_[3], 1, $_[4]); } sub new_field { local($record, $name, $value, $inaux, $reformat) = @_; if ($inaux) { $record .= "$name: $value\n"; } else { local($*) = 1; local($main, $aux) = split(/^--\n/, $record, 2); $main .= "$name: $value\n"; $record = $main . "--\n" . $aux; } (($record = &reformat_entry($record)) || return undef) if ($reformat); $record; } sub archive_names { local($*) = 1; local($_) = @_; (/^[$special_chars]*([A-Za-z][^:]*-archive-name):/gi, /^[$special_chars]*(archive-name):/gi); } sub field_names { local($*) = 1; local($_) = @_; local(@fields); while (/^[$special_chars]*([^\s]+):/g) { push(@fields, $1); $fields[$#fields] =~ tr/A-Z/a-z/; } @fields; } sub is_multiple { local($field, $record) = @_; local($val); ($record, $val) = &strip_field_lines($record, $field); if ($val) { ($record, $val) = &strip_field_lines($record, $field); if ($val) { return 1; } } undef; } ########### # # # TESTING # # # ########### #@records = &read_data("../data/list"); 1;