#!/usr/bin/perl # Copyright (c) 2009 Jonathan Kamens . # # May be distributed under the terms of the GNU General Public # License, Version 3 or any newer version at your discretion. See # http://www.gnu.org/licenses/gpl.html for license terms. # Files messages from your Sent Items folder into your other IMAP # folders automatically based on the messages already in those folders # (i.e., once part of a conversation is in a particular folder, the # rest should go there as well). Uses the "In-Reply-To" and # "References" headers in the messages you've sent, matched up against # the "Message-ID" and "Resent-Message-ID" headers in the messages in # other folders, to figure out where to file messages (which means, # obviously, that it won't be able to auto-file messages that aren't # replies to other messages you've already filed). Run with "--help" # for more information. # $Id: file-sent-items.pl,v 1.9 2010/10/28 18:42:44 jik Exp $ use strict; use warnings; use Date::Parse; use File::Basename; use Getopt::Long; use Mail::IMAPClient; use Term::ReadPassword; my $whoami = basename $0; my $usage = "Usage: $whoami [--help] [--verbose] [--quiet] [--dryrun] [--server=hostname] [--user=username] [--password=password] [--sent-items-folder=folder-name [...]] [--folder-include-regexp=regexp] [--folder-exclude-regexp=regexp] Default sent items folder is 'Sent Items'. Ignores (case-insensitive) Inbox, Trash, Drafts, and Outbox folders as well as folders starting with \"Junk \" and \"Deleted \" by default. To override, specify a different regexp with --folder-exclude-regexp, or specify --folder-exclude-regexp=\"\" to ignore no folders.\n"; # 1. Open connection to IMAP server. # 2. Fetch Date, Subject, Message-ID, Resent-Message-ID, References, # In-Reply-To headers from all messages in sent folder. # 3. Find earliest message in folder. # 4. Fetch list of all other folders. # 5. Fetch Message-ID and Resent-Message-ID headers of messages in # other folders since earliest message in sent folder (minus a # month's grace period). # 6. Index messages in other folders by Message ID, References, In-Reply-To. # 7. For each message in sent folder: # a. Find message IDs in In-Reply-To and References (reverse order) # headers. # b. See if any of them can be located in another folder. # c. If so, move the message into that folder and add it to the # index. # d. Otherwise, see if there's exactly one other folder with # responses to this message. # e. If so, move the message into that folder and add it to the # index. my($help, $verbose, $quiet, $dryrun, $server, $user, $password, @sent_folders, $folder_include_regexp, $folder_exclude_regexp); die $usage if (! GetOptions("help" => sub { print $usage; exit; }, "verbose" => \$verbose, "quiet" => \$quiet, "dryrun" => \$dryrun, "server=s" => \$server, "user=s" => \$user, "password=s" => \$password, "sent-items-folder=s@" => \@sent_folders, "folder-include-regexp=s" => \$folder_include_regexp, "folder-exclude-regexp=s" => \$folder_exclude_regexp, )); $server = "localhost" if (! $server); if (! $user) { $user = $ENV{"USER"} || $ENV{"LOGNAME"}; die "Could not determine username from \$USER or \$LOGNAME\n" if (! $user); } @sent_folders = ("Sent Items") if (! @sent_folders); $folder_exclude_regexp = '^(?i:inbox$|trash$|drafts$|outbox$|junk |deleted )' if (! defined($folder_exclude_regexp)); if (! $password) { $password = read_password("Enter password for $user\@$server: "); } if ($verbose) { &verbose("Connecting..."); } my $imap = Mail::IMAPClient->new ( Server => $server, User => $user, Password => $password, ) or die "$whoami: IMAP connect failed: $@\n"; &verbose(" done\n"); my($folder_sent_msgs, $folder_sent_headers, $earliest); foreach my $sent_folder (@sent_folders) { &verbose("Selecting $sent_folder..."); $imap->select($sent_folder) or die "$whoami: Can't select folder $sent_folder: ", $imap->LastError, "\n"; &verbose(" done\nFetching sent messages list..."); my $sent_msgs = $imap->sort("DATE", "US-ASCII", "NOT DELETED") or die "$whoami: IMAP search error: ", $imap->LastError, "\n"; &verbose(" done\n"); if (! ($sent_msgs && @$sent_msgs)) { next; } &verbose("Fetching sent messages headers..."); my $sent_headers = $imap->parse_headers ($sent_msgs, "Date", "Subject", "Message-ID", "Resent-Message-ID", "References", "In-Reply-To" ) or die "$whoami: Header fetch error: ", $imap->LastError, "\n"; &verbose(" done\n"); my $this_earliest = str2time($sent_headers->{$sent_msgs->[0]}->{"Date"}->[0]); if ($this_earliest && (! $earliest || ($this_earliest < $earliest))) { $earliest = $this_earliest; } $folder_sent_msgs->{$sent_folder} = $sent_msgs; $folder_sent_headers->{$sent_folder} = $sent_headers; } if (! $folder_sent_msgs) { print "No messages to file!\n" if (! $quiet); exit; } die "$whoami: Couldn't determine date of earliest message in @sent_folders\n" if (! $earliest); $earliest = $imap->Rfc2060_date($earliest - 30 * 24 * 60 * 60); &verbose("Fetching folder list..."); my(@folders) = @{$imap->folders()}; if ($folder_exclude_regexp) { @folders = grep(! /$folder_exclude_regexp/o, @folders); } if ($folder_include_regexp) { @folders = grep(/$folder_include_regexp/o, @folders); } @folders = grep(! &in($_, @sent_folders), @folders); &verbose(" done: @folders\n"); my(%index, %ref_index); sub index_add { my($folder, $id, $reply, $refs) = @_; return if (! $id); $index{$id} = $folder; if ($reply) { $ref_index{$id}->{$folder} = 1; } if ($refs) { foreach my $ref (split(' ', $refs)) { $ref_index{$ref}->{$folder} = 1; } } } foreach my $folder (@folders) { &verbose("Indexing $folder..."); $imap->select($folder) or die "$whoami: Can't select folder $folder: ", $imap->LastError, "\n"; my $msgs = $imap->search("SINCE" => $earliest, "NOT", "DELETED"); if ($msgs && @$msgs) { my $headers = $imap->parse_headers($msgs, "Message-ID", "Resent-Message-ID", "In-Reply-To", "References"); next if (! ($headers && %$headers)); foreach my $message (keys %$headers) { my $id = ($headers->{$message}->{"Resent-Message-ID"}->[0] || $headers->{$message}->{"Message-ID"}->[0]); &index_add($folder, $id, $headers->{$message}->{"In-Reply-To"}->[0], $headers->{$message}->{"References"}->[0]); &verbose("."); } } &verbose(" done\n"); } foreach my $sent_folder (@sent_folders) { next if (! $folder_sent_msgs->{$sent_folder}); &verbose("Returning to $sent_folder..."); my $sent_msgs = $folder_sent_msgs->{$sent_folder}; my $sent_headers = $folder_sent_headers->{$sent_folder}; $imap->select($sent_folder) or die "$whoami: Can't select folder $sent_folder: ", $imap->LastError, "\n"; &verbose(" done\nFiling..."); message: foreach my $message (@$sent_msgs) { my $h = $sent_headers->{$message}; my(@ids); if ($h->{"In-Reply-To"}->[0]) { push(@ids, $h->{"In-Reply-To"}->[0]); } if ($h->{"References"}->[0]) { push(@ids, reverse(split(' ', $h->{"References"}->[0]))); } my $subject = $h->{"Subject"}->[0]; my $date = $h->{"Date"}->[0]; foreach my $id (@ids) { my $folder; if ($folder = $index{$id}) { print "$subject ($date) -> $folder (found predecessor)\n" if (! $quiet); } elsif ($ref_index{$id} and scalar keys %{$ref_index{$id}} == 1) { ($folder) = keys %{$ref_index{$id}}; print "$subject ($date) -> $folder (found reply)\n" if (! $quiet); } if ($folder) { if (! $dryrun) { $imap->see($message); $imap->move($folder, $message) or die("$whoami: moving message to $folder: ", $imap->LastError, "\n"); } &index_add($folder, ($h->{"Resent-Message-ID"}->[0] || $h->{"Message-ID"}->[0]), $h->{"In-Reply-To"}->[0], $h->{"References"}->[0]); next message; } } print "Could not move $subject ($date)\n" if (! $quiet); } &verbose(" done\n"); } sub verbose { return if (! $verbose); print STDERR @_; } sub in { my($member, @set) = @_; scalar grep($member eq $_, @set); } # $Log: file-sent-items.pl,v $ # Revision 1.9 2010/10/28 18:42:44 jik # Add support for indexing based on replies received from others. # Mark moved messages seen. # # Revision 1.8 2009/12/30 20:49:24 jik # Don't set default $folder_exclude_regexp if it's set on the command # line! # Fix some minor nits in the formatting of verbose messages. # When looking for prior messages in other folders, don't look at # deleted messages. # Add change log to bottom of script. #