#!/usr/bin/perl # close-books.pl - Archive old GnuCash transactions # Copyright (C) 2005, 2008 Jonathan Kamens. # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # You may have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 59 Temple # Place, Suite 330, Boston, MA 02111-1307 USA, or visit # http://www.gnu.org/licenses/licenses.html#GPL. # Please send enhancements or bug fixes to jik@kamens.brookline.ma.us. # This is $Revision: 1.11 $. # XXX I assume that all the transactions are currency transactions in the same # currency. That is, I don't deal with multiple currencies in the same file # and I don't deal with non-currency transactions, e.g., stocks. I also don't # do any error-checking in this area, so the wrong thing will probably happen # if you try to archive a file with different currencies and/or non-currency # transactions in it. So don't do that! I also assume that the commodity-scu # and denominator are 100 everywhere (I do a little bit of error-checking about # this, but not much). I also assume that split:value and split:quantity are # the same everywhere, because I don't know what it means when they're # different. Patches to fix all this are welcome. # # 2009-01-15 - spork - added (or so I think) the ability to deal with # stock transactions. (It works for me anyway.) I haven't tested with # multiple currencies because I didn't have a need for it. I also # added the '--immutable' option. Very little error checking is done # on the account names listed in the immutable file. If you specified them # in a goofy manner, then the goofy account name is the one you get. # # I also found that this script craps hard if the transaction description # is blank. Don't ask why mine had blank ones -- I think it was some 15 # year old imported transactions from quicken. Instead of fixing this # script (since the error may be in my data) I opened the gnucash xml # file with vi and did: # :1,$s/X \$help, 'verbose+' => \$verbose, 'dryrun' => \$dryrun, 'start-time=s' => \$start_str, 'end-time=s' => \$end_str, 'archive-file=s' => \$archive_file, 'output-file=s' => \$output_file, 'immutable=s' => \$immutable_file, "archive-all=s\@" => \@archive_all, 'reconcile-all' => \$reconcile_all)); map($_ = qr/$_/, @archive_all); if ($help) { print $usage; exit; } $input_file = shift @ARGV || '-'; die "$whoami: Too many arguments\n$usage" if (@ARGV); if ($start_str && ! ($start_time = str2time($start_str))) { die "$whoami: Couldn't parse start time \"$start_str\"\n$usage"; } if ($end_str && ! ($end_time = str2time($end_str))) { die "$whoami: Couldn't parse end time \"$end_str\"\n$usage"; } $end_time -= 1; my (%immutable_account_name, %immutable_account_guid); if ($immutable_file) { &verbose("Reading immutable file...\n"); open(IMFILE,"$immutable_file") or die "$whoami: Couldn't open immutable file $immutable_file: $!\n"; while () { chomp; &verbose("loaded immutable account name: $_\n", 2); $immutable_account_name{$_} = 1; } close IMFILE; } my $parser; if ($libxml) { $parser = XML::LibXML->new(); } else { $parser = new XML::DOM::Parser; } # We have two copies of the file we're archiving. The first is the # one we'll write out as the pruned file, and the second is the one # we'll write out as the archive file. We iterate through all # transactions in the pruned file. For each one we want to archive, # we check if we've created archive transactions in the pruned file # for each of the accounts in the transaction, create the ones that # are missing, add the transaction to all the archive transactions, # then remove the transaction from the pruned file but not from the # archive file. For each transaction we do *not* want to archive, we # remove it from the archive file but not from the pruned file. When # we're done, we should be left with the correct pruned file and # archive file and we can simply save them. &verbose("Parsing..."); my $doc; if ($libxml) { $doc = $parser->parse_file($input_file); } else { $doc = $parser->parsefile($input_file); } &verbose(" done.\n"); &verbose("Cloning..."); my $archive_doc = $doc->cloneNode(1); &verbose(" done.\n"); # We need the transaction count nodes for both files so that we can # adjust them as appropriate when we add or remove transactions. my($gnc_v2) = $doc->getElementsByTagName("gnc-v2", 0) or die; my($gnc_book) = $gnc_v2->getElementsByTagName("gnc:book", 0) or die; sub get_transaction_count_node { my($book) = @_; my(@counts) = $book->getElementsByTagName("gnc:count-data", 0); foreach my $count (@counts) { my $type_str; if ($libxml) { $type_str = $count->getAttribute("cd:type"); } else { my $attributes = $count->getAttributes; my($type) = $attributes->getNamedItem("cd:type"); $type_str = &node_value($type->getFirstChild); } if ($type_str eq "transaction") { return $count->getFirstChild; } } die "Could not find transaction count node"; } my $pruned_count_node = &get_transaction_count_node($gnc_book); my($archive_gnc_v2) = $archive_doc->getElementsByTagName("gnc-v2", 0) or die; my($archive_gnc_book) = $archive_gnc_v2->getElementsByTagName("gnc:book", 0) or die; my $archive_count_node = &get_transaction_count_node($archive_gnc_book); my $total = &node_value($pruned_count_node); &verbose("Transaction count is $total\n"); # Find the opening balances account and immutable accounts (if any) my $ob_account; my %account_parent_map; my %account_fullname; foreach my $account ($gnc_book->getElementsByTagName("gnc:account", 0)) { my($name_node) = $account->getElementsByTagName("act:name"); my($name_text_node) = $name_node->getFirstChild; next if (! $name_text_node); my $acct_name = &node_value($name_text_node); my $acct_guid = &node_value(($account->getElementsByTagName("act:id"))[0]->getFirstChild); my $parent_node = $account->getElementsByTagName("act:parent"); if ($parent_node) { $account_parent_map{$acct_guid} = $parent_node; my $parent_name = $account_fullname{$parent_node}; if ($parent_name eq 'Root Account') { $account_fullname{$acct_guid} = $acct_name; } else { $account_fullname{$acct_guid} = $account_fullname{$parent_node} . ':' . $acct_name; } } else { $account_fullname{$acct_guid} = $acct_name; } if (check_immutable($account_fullname{$acct_guid} )) { &verbose("Found immutable guid for $account_fullname{$acct_guid}\n", 2); $immutable_account_guid{$acct_guid} = $account_fullname{$acct_guid}; } if (&node_value($name_text_node) eq 'Opening Balances') { $ob_account = $acct_guid; } } die "Could not find Opening Balances account ID" if (! $ob_account); &verbose("Archiving..."); my $pct = 0; my $done = 0; my(@transactions) = $gnc_book->getElementsByTagName("gnc:transaction", 0) or die; foreach my $transaction (@transactions) { &do_transaction($transaction); print STDERR "."; my $new_pct = int($done++ / $total * 100); if ($new_pct != $pct) { print STDERR "$new_pct%\n"; $pct = $new_pct; } } &verbose(" done.\n"); if (! $dryrun) { if ($output_file) { if ($libxml) { $doc->toFile($output_file); } else { $doc->printToFile($output_file); } } else { if ($libxml) { $doc->toFH(\*STDOUT); } else { $doc->printToFileHandle(\*STDOUT); } } if ($archive_file) { if ($libxml) { $archive_doc->toFile($archive_file); } else { $archive_doc->printToFile($archive_file); } } } # Process a transaction. Ignore the return value. sub do_transaction { my($transaction) = @_; my $id = &get_child_value($transaction, 'trn:id'); if (&archive_transaction($id, $transaction)) { &remove_transaction($gnc_book, $pruned_count_node, $id); } else { &remove_transaction($archive_gnc_book, $archive_count_node, $id); } } # Try to archive a transaction. Returns true if it was archived. sub archive_transaction { my($id, $transaction) = @_; my($posted); # Confirm that it's in the valid date range if ($start_time || $end_time) { my($tmp1) = $transaction->getElementsByTagName("trn:date-posted", 0); my $tmp = &get_child_value($tmp1, 'ts:date'); if (! defined($posted = str2time($tmp))) { warn "$whoami: Couldn't parse timestamp \"$tmp\" in transaction $id\n"; return undef; } if ($start_time && $posted < $start_time) { &verbose("Skipping too-early transaction $id\n", 2); return undef; } if ($end_time && $posted > $end_time) { &verbose("Skipping too-late transaction $id\n", 2); return undef; } } # Confirm that all splits are reconciled or don't need to be, and that # none of the splits are in the Opening Balances account. my($splits) = $transaction->getElementsByTagName("trn:splits", 0) or die; my(@splits) = $splits->getElementsByTagName("trn:split", 0); foreach my $split (@splits) { my $split_account = &get_child_value($split, 'split:account'); if ($immutable_account_guid{$split_account}) { &verbose("Skipping immutable transaction $id\n", 2); return undef; } my $reconciled_state = &get_child_value($split, 'split:reconciled-state'); if (($reconciled_state ne 'y') && ! &split_is_all_account($split)) { &verbose("Skipping unreconciled transaction $id\n", 2); return undef; } if (&get_child_value($split, 'split:account') eq $ob_account) { &verbose("Skippping openine balances transaction $id\n", 2); return undef; } } &verbose("Archiving transaction $id.\n"); foreach my $split (@splits) { &archive_split($split); } 1; } # Archive the specified split in an archive transaction for its account, # creating an appropriate archive transaction if one does not already exist. my %archive_transactions; sub archive_split { my($split) = @_; my($acct, $rs, $rs_key, $at, $ot); # We first check if an archive transactions exists for the account ID and # reconciled state in the split we're archiving. If not, we create one as # follows: (1) clone the parent transaction of the split, give it a new id, # set its parent, set its description to "Archived transactions [start] - # [end]", "Archived transactions starting [start]", "Archived transactions # through [end]", or "Archived transactions" as appropriate, and clear its # check number if it has one; (2) remove from the clone all the splits # except the one we're archiving; (3) remove the memo from the split, if # it's set; (4) clone the split we're archiving, set its parent, give it a # new id, set its account to the opening balances account, and set its # value to the opposite of the value of the split we're archiving; (5) # increment the transaction account for the pruned file; (6) save it in # %archive_transactions. $ot = $split->getParentNode->getParentNode; $acct = &get_child_value($split, 'split:account'); $rs = &get_child_value($split, 'split:reconciled-state'); $rs_key = $reconcile_all ? 'y' : $rs; if (! ($at = $archive_transactions{$acct}{$rs_key})) { my $split_id = &get_child_value($split, 'split:id'); &verbose("New archive transaction from split ID $split_id.\n", 2); $at = $ot->cloneNode(1); &set_id($at, 'trn:id'); # Remove notes, if any my($slots) = $at->getElementsByTagName("trn:slots", 0); if ($slots and (my(@slots) = $slots->getElementsByTagName("slot", 0))) { foreach my $slot (@slots) { my($key) = $slot->getElementsByTagName("slot:key", 0); my($text) = $key->getFirstChild; if ($text eq 'notes') { $slots->removeChild($slot); } } if (! $slots->getFirstChild) { $at->removeChild($slots); } } $ot->getParentNode->appendChild($at); &set_child_value($at, 'trn:description', ($start_str ? ($end_str ? "Archived transactions $start_str - $end_str" : "Archived transactions starting $start_str") : ($end_str ? "Archived transactions through $end_str" : "Archived transactions"))); if (my($number) = $at->getElementsByTagName("trn:num", 0)) { $at->removeChild($number); } my($splits) = $at->getElementsByTagName("trn:splits", 0) or die; my(@splits) = $splits->getElementsByTagName("trn:split", 0) or die; foreach my $split2 (@splits) { my($id) = &get_child_value($split2, "split:id"); if ($id eq $split_id) { &make_reconciled($split2); if (my($memo) = $split2->getElementsByTagName("split:memo", 0)) { $split2->removeChild($memo); } $split = $split2; } else { $splits->removeChild($split2); } } my $ob_split = $split->cloneNode(1); $split->getParentNode->appendChild($ob_split); &set_id($ob_split, 'split:id'); &set_child_value($ob_split, 'split:account', $ob_account); my $val = &get_child_value($ob_split, 'split:value'); my $quan = &get_child_value($ob_split, 'split:quantity'); if ($val !~ s/^-//) { $val = '-' . $val; } if ($quan !~ s/^-//) { $quan = '-' . $quan; } &set_child_value($ob_split, 'split:value', $val); &set_child_value($ob_split, 'split:quantity', $quan); if ($libxml) { $pruned_count_node->setData(&node_value($pruned_count_node) + 1); } else { $pruned_count_node->setNodeValue(&node_value($pruned_count_node) + 1); } $archive_transactions{$acct}{$rs_key} = $at; return; } # If the archive transaction for the account ID we're reconciling *does* # exist, then add the split we're archiving to it as follows: (1) if its # date-posted is earlier than the date-posted of the new transaction, reset # it; (2) if its date-entered is earlier than the date-entered of the new # transaction, reset it; (3) if our split has its reconcile-date set, and # the archive transaction doesn't or it's earlier, reset it; (4) adjust the # amounts of the dates in the archive transaction to include the # transaction we're archiving. &verbose("Existing archive transaction.\n", 2); my($old_node, $new_node, $old_val, $new_val, $new_quan, $old_quan); my($old_node) = $at->getElementsByTagName('trn:date-posted', 0) or die; my($new_node) = $ot->getElementsByTagName('trn:date-posted', 0) or die; my($old_val) = &get_child_value($old_node, 'ts:date'); my($new_val) = &get_child_value($new_node, 'ts:date'); if ($old_val lt $new_val) { &set_child_value($old_node, 'ts:date', $new_val); } my($old_node) = $at->getElementsByTagName('trn:date-entered', 0) or die; my($new_node) = $ot->getElementsByTagName('trn:date-entered', 0) or die; my($old_val) = &get_child_value($old_node, 'ts:date'); my($new_val) = &get_child_value($new_node, 'ts:date'); if ($old_val lt $new_val) { &set_child_value($old_node, 'ts:date', $new_val); } my($this_acct_node, $ob_acct_node); my($splits) = $at->getElementsByTagName("trn:splits", 0); foreach my $split ($splits->getElementsByTagName("trn:split", 0)) { my $acct_text = &get_child_value($split, "split:account"); if ($acct_text eq $acct) { $this_acct_node = $split; } elsif ($acct_text eq $ob_account) { $ob_acct_node = $split; } } die if (! ($this_acct_node && $ob_acct_node)); if (my($new_node) = $split->getElementsByTagName("split:reconcile-date", 0)) { my($at_splits) = $at->getElementsByTagName("trn:splits", 0) or die; my($at_split) = $at_splits->getElementsByTagName("trn:split") or die; my($old_node) = $at_split->getElementsByTagName("split:reconcile-date", 0); if (! $old_node) { $new_node = $new_node->cloneNode(1); $this_acct_node->appendChild($new_node); $new_node = $new_node->cloneNode(1); $ob_acct_node->appendChild($new_node); } else { $new_val = &get_child_value($new_node, 'ts:date'); $old_val = &get_child_value($old_node, 'ts:date'); if ($old_val lt $new_val) { my($tmp) = $this_acct_node->getElementsByTagName("split:reconcile-date", 0); &set_child_value($tmp, 'ts:date', $new_val); ($tmp) = $ob_acct_node->getElementsByTagName("split:reconcile-date", 0); &set_child_value($tmp, 'ts:date', $new_val); } } } $new_val = &get_child_value($split, 'split:value'); $old_val = &get_child_value($this_acct_node, 'split:value'); $new_val =~ s,(/.*)$,,; my $denom = $1; $old_val =~ s,(/.*)$,,; if ($1 ne $denom) { die("Incompatible value denominators $1 and $denom:\n" . $split->toString . "\n" . $this_acct_node->toString . "\n"); } $new_val += $old_val; $new_val .= $denom; $new_quan = &get_child_value($split, 'split:quantity'); $old_quan = &get_child_value($this_acct_node, 'split:quantity'); $new_quan =~ s,(/.*)$,,; my $denom_quan = $1; $old_quan =~ s,(/.*)$,,; if ($1 ne $denom_quan) { die("Incompatible quantity denominators $1 and $denom_quan:\n" . $split->toString . "\n" . $this_acct_node->toString . "\n"); } $new_quan += $old_quan; $new_quan .= $denom_quan; &set_child_value($this_acct_node, 'split:value', $new_val); &set_child_value($this_acct_node, 'split:quantity', $new_quan); if ($new_val !~ s/^-//) { $new_val = '-' . $new_val; } if ($new_quan !~ s/^-//) { $new_quan = '-' . $new_quan; } &set_child_value($ob_acct_node, 'split:value', $new_val); &set_child_value($ob_acct_node, 'split:quantity', $new_quan); } # Mark a split reconciled and give it a reconcile date if it isn't already # marked reconciled. sub make_reconciled { my($split) = @_; if (&get_child_value($split, 'split:reconciled-state') eq 'y') { return; } &set_child_value($split, 'split:reconciled-state', 'y'); if ($split->getElementsByTagName('split:reconcile-date', 0)) { return; } my $reconcile_date = $doc->createElement('split:reconcile-date'); $split->appendChild($reconcile_date); my $ts_date = $doc->createElement('ts:date'); $reconcile_date->appendChild($ts_date); if ($libxml) { $ts_date->appendText(&gnc_date(time())); } else { $ts_date->addText(&gnc_date(time())); } } # Format a timestamp in gnc timestamp format sub gnc_date { my($t) = @_; strftime("%Y-%m-%d %H:%M:%S %z", localtime($t)); } # Give a node a new ID. sub set_id { my($node, $id_tag) = @_; my($new_id); $new_id = sprintf("%x", time()); while (length($new_id) < $id_length) { $new_id .= sprintf("%x", int(rand(16))); } &set_child_value($node, $id_tag, $new_id); } # Set the text value of the specified (by tag) child node of the specified node. sub set_child_value { my($node, $tag, $value) = @_; if (! $node) { die; } my($child) = $node->getElementsByTagName($tag, 0) or die "No child named $tag"; my($value_node) = $child->getFirstChild; die("$whoami: Couldn't find tag $tag for node:\n" . $node->toString . "\n") if (! $value_node); if ($libxml) { $value_node->setData($value); } else { $value_node->setNodeValue($value); } } # Get the text vlaue of the specified (by tag) child node of the specified node. sub get_child_value { my($node, $tag) = @_; confess "Undefined node" if (! $node); my($child) = $node->getElementsByTagName($tag, 0) or die "No child named $tag"; my($value_node) = $child->getFirstChild; die("$whoami: Couldn't find tag $tag for node:\n" . $node->toString . "\n") if (! $value_node); &node_value($value_node); } # Determine if a particular split is from an account all of whose transactions # are being archived. sub split_is_all_account { return undef if (! @archive_all); my($split) = @_; my $account_id = &get_child_value($split, 'split:account'); my($account_name) = &account_name_from_id($account_id); return scalar grep($account_name =~ /$_/, @archive_all); } # Convert an account ID to its name. my %account_name_from_id; sub account_name_from_id { my($id) = @_; my($name); if (! %account_name_from_id) { my(%names, %parents); foreach my $account ($gnc_book->getElementsByTagName("gnc:account")) { my $id = &node_value(($account->getElementsByTagName("act:id", 0))[0]->getFirstChild); my($name) = $account->getElementsByTagName("act:name", 0); next if (! ($name = $name->getFirstChild)); $names{$id} = &node_value($name); my($parent) = $account->getElementsByTagName("act:parent", 0); if ($parent) { $parents{$id} = &node_value($parent->getFirstChild); } } foreach my $id (keys %names) { $account_name_from_id{$id} = $names{$id}; my $parent = $id; while ($parent = $parents{$parent}) { last if ($names{$parent} eq 'Root Account'); $account_name_from_id{$id} = $names{$parent} . ":" . $account_name_from_id{$id}; } &verbose("Converted account ID $id to $account_name_from_id{$id}.\n", 2); } } return $account_name_from_id{$id}; } # Remove transaction from indicated document and decrease indicated count node my %transactions; sub remove_transaction { my($book, $count, $id) = @_; if (! $transactions{$book}) { foreach my $transaction ($book->getElementsByTagName("gnc:transaction", 0)) { my $id = &node_value(($transaction->getElementsByTagName("trn:id", 0))[0]->getFirstChild); $transactions{$book}{$id} = $transaction; } } my($transaction) = $transactions{$book}{$id}; die "$whoami: Couldn't find transaction $id\n" if (! $transaction); $transaction->getParentNode->removeChild($transaction); if ($libxml) { $count->setData(&node_value($count) - 1); } else { $count->setNodeValue(&node_value($count) - 1); } } # Display a message to stderr if the verbose level is at least the specified # level (or 1 if no level is specified). sub verbose { my($msg, $level) = @_; $level = 1 if (! $level); if ($verbose >= $level) { print(STDERR $msg); } } sub node_value { my($node) = @_; if ($libxml) { $node->data; } else { $node->getNodeValue; } } sub check_immutable { # return 1 if the account name is among the immutables. One should # check the current account and all of the parent accounts # this is done by checking against the (global variable) # %immutable_account_name my ($act_name) = @_; while ($act_name) { my $level = split(/:/,$act_name); if ($immutable_account_name{$act_name}) { return 1; } $act_name =~ s/:[^:]+$//; last if ($level <= 1); } return 0; }