#!/bin/bash # Copyright (C) 2010 Jonathan Kamens # # The current version of this script is available from # . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # Command-line options: # # --help Print help message and exit # --keep Keep existing /tmp/notspam and /tmp/bogospam files instead of # regenerating them from ~/Mail/notspam* and ~/Mail/bogospam* # --spamok Ignore the fact that some of the messages in /tmp/bogospam were # classified by bogofilter as ham (i.e., bogofilter is wrong # about these messages). # --hamok Ignore the fact that some of the messages in /tmp/notspam were # classified by bogofilter as spam. # --wordlist Use the specified wordlist instead of the default. # How to use the script: # # 1. Edit the "for file in" code block below as necessary to tell the script # where your ham and spam archives are and how they're compressed. # 2. Run the script. # 3. If it complains about spam messages in /tmp/notspam, load the file into an # editor, look for "X-Bogosity: Spam" lines, and confirm that the messages # that contain them are all in fact ham. If not, then move them from your # ham mbox archive to your spam mbox archive and run the script again. # 4. Ditto for ham messages in /tmp/bogospam, but this time look for # "X-Bogosity: Ham" # 5. When /tmp/notspam and /tmp/bogospam are correct, run with "--hamok # --spamok --keep" to tell the script not to regenerate them and to go ahead # and do the tuning without worrying about seemingly misclassified messages. # 6. When it's done, copy the results into your .bogofilter.cf. USAGE="Usage: `basename $0` [--help] [--keep] [--spamok] [--hamok] [--wordlist]" while [ "$*" ]; do case "$1" in --verbose) shift; VERBOSE=-v ;; --help) shift; echo $USAGE; exit ;; --spamok) shift; SPAMOK=yes ;; --hamok) shift; HAMOK=yes ;; --keep) shift; KEEP=yes ;; --wordlist) shift; WORDLIST=-D ;; *) echo "Unrecognized option: $1" 1>&2 echo "$USAGE" 1>&2 exit 1 ;; esac done set -x cd ~/Mail if [ ! "$KEEP" ]; then for file in notspam bogospam; do rm -f /tmp/$file (bzcat $file.*.bz2; cat $file.? $file) | perl -e ' open(MSF, "$ENV{HOME}/.bogofilter/milter-subject-filters"); while () { s/^\s+//; s/\s+$//; next if (/^\#/); next if (/^$/); push(@sres, $_); } if (@sres) { $sre = "(?:" . join("|", @sres) . ")"; $sre = qr/$sre/; } $/ = "\n\From "; while (<>) { ($header, $body) = split(/\n\n/); if (($sre and $header =~ /\nSubject:\s*(.*\S)/ and $1 =~ /$sre/o) # or (($header =~ /\nFrom: SpamQuarantine\@mit\.edu/) and # ($header =~ /\nSubject: Spam Quarantine Summary/)) ) { print "From " if ($. == 1); } else { print; } } ' | bogofilter -M -p > /tmp/$file done fi if [ ! "$SPAMOK" ]; then if grep -q -s '^X-Bogosity: Ham' /tmp/bogospam; then echo "WARNING: Ham in /tmp/bogospam; specify --spamok (and maybe --keep) to override" 1>&2 exit 1 fi fi if [ ! "$HAMOK" ]; then if grep -q -s '^X-Bogosity: Spam' /tmp/notspam; then echo "WARNING: Spam in /tmp/notspam; specify --hamok (and maybe --keep) to override" 1>&2 exit 1 fi fi if ! bogotune $WORDLIST $VERBOSE -T 0 -n /tmp/notspam -s /tmp/bogospam; then if [ "$WORDLIST"x == ""x ]; then echo "" echo "Trying again with internal word list." echo "" bogotune -D $VERBOSE -T 0 -n /tmp/notspam -s /tmp/bogospam fi fi if [ $? == 0 ]; then rm -f /tmp/notspam /tmp/bogospam fi