#!/usr/bin/ruby DEBUG_LEVEL = 3 require 'ftools.rb' def debug(minlevel, text) return unless DEBUG_LEVEL >= minlevel puts "[#{Time.now}] [#{minlevel}]: #{text}" end class Mbox attr_reader :fspath def initialize(acc, folderpath) @folderpath = folderpath $accounts.has_key? acc or raise "unknown account #{acc}" folderpath =~ /^\// or raise "invalid folderpath #{folderpath}" fp = folderpath.gsub(/\/\/+/,'/') fp.sub!(/\/$/, '') fp.sub!(/^\//, '') fp.gsub!(/(.)\//, "\\1.sbd/") @fspath = $accounts[acc]['dir'] + "/" + fp @acc = acc @content = nil end def exists? FileTest.exists? @fspath end def create p = @fspath.clone p =~ /(.+)\/(.+)/ dirname, filename = $1, $2 unless FileTest.directory? dirname File.mkpath dirname end begin unless FileTest.exists? p f = open p, "w" end end while p.sub!(/\.sbd\/[^\/]+$/, '') end def truncate if exists? (open @fspath, "w").close else create end end def append(*msgs) f = open @fspath, "a" msgs.each {|msg| f.write(msg.raw_content) } f.close end def content unless @content fp = open @fspath, "r" @content = fp.read fp.close end @content end def each_message content.scan(/^(From [^\n]+\n)(.+?)(?=^From |\Z)/m) {|f_,m| msg = Msg.new msg.raw_content = f_ + m m =~ /(.+?)\015?\012\015?\012(.*)/m header, body = $1, $2 header.scan(/^(\S+):\s*(.*(?:\n\s+.*)*)/) {|hn, hv| msg.add_header(hn, hv) } msg.body = body yield msg } end def size content.scan(/^From /).size end end class Msg attr_accessor :headers, :body, :raw_content attr_reader :deleted def initialize @body = nil @headers = {} @deleted = false end def add_header(name, value) dname = name.downcase if @headers.has_key? dname @headers[dname] += "\n" + value else @headers[dname] = value if dname == 'x-mozilla-status' and value.hex & 8 > 0 @deleted = true end end end end class Rule attr_accessor :name, :bool, :cond, :acc, :folderpath def initialize(name, bool, cond, acc, folderpath) @name = name @bool = bool @cond = cond @acc = acc @folderpath = folderpath end def match(msg) debug 9,"starting rule #{@name}, bool=#{bool}" m = false @cond.each {|c| m = false b = false s = '' sel, pred, arg = c case sel when 'to or cc' then b = true if msg.headers.has_key?('to') or msg.headers.has_key?('cc') s = (msg.headers['to'] || '') + (msg.headers['cc'] || '') when /^(to|subject|from|reply-to|cc|bcc)$/ then b = true if msg.headers[$1] s = msg.headers[$1] when /^header (.+)$/ then b = true if msg.headers[$1] s = (msg.headers.has_key?($1) ? msg.headers[$1] : '') when 'body' then b = true if msg.body.size s = msg.body end ds = s ? s.downcase : '' case pred when "contains" then m=true if ds.index(arg) when "doesn't contain" then m=true if not ds.index(arg) when "begins with" then m=true if ds.index(arg) == 0 when "doesn't begin with" then m=true if ds.index(arg) != 0 when "matches" then m=true if s =~ arg when "doesn't match" then m=true if not s =~ arg when "exists" then m=b when "doesn't exist" then m=(not b) end debug 9,"testing `#{ds}' #{pred} `#{arg}' ? #{m}" break if @bool == 'AND' and not m return true if @bool == 'OR' and m } return true if m debug 9,"ending rule #{name}, no match" return end end debug 1, "start" # get accounts data from 'accounts.dat' f = open "accounts.dat", "r" $accounts = {} f.each {|line| line.gsub!(/\r|\n/, '') next unless line =~ /\S/ next if line =~ /^#/ line =~ /^([^|]+)\|([^|]+)\|(.+)$/ or raise "accounts.dat:#{f.lineno}: syntax error: #{line}" name, dir, type = $1, $2, $3 FileTest.exists? dir or raise "accounts.dat:#{f.lineno}: #{dir} doesn't exist" $accounts[name] = {'dir'=>dir, 'type'=>type} } f.close # get rules in '*.rules' rules = [] rules_memory = {} Dir.glob('*.rules').each {|filename| f = open filename, "r" f.each {|line| line.gsub!(/\r|\n/, '') next unless line =~ /\S/ next if line =~ /^#/ line =~ /^(\d+)\|([^|]+)\|((?:([A-Z]+) \(.+?\) ?)+)\|\|([^|]+)\|(.+)$/ or raise "#{filename}:#{f.lineno}: syntax error: #{line}" date, name, cond, bool, acc, folderpath = $1, $2, $3, $4, $5, $6 date =~ /^(19|20)\d\d[03][0-9][0-3][0-9]$/ or raise "#{filename}:#{f.lineno}: invalid date #{date}" rules_memory.has_key? name and raise "#{filename}:#{f.lineno}: duplicate rule #{name}" ['AND','OR'].index bool or raise "#{filename}:#{f.lineno}: bool must be AND/OR" if folderpath[-1,1] == "/"; folderpath.chop!; end if folderpath[0,1] != "/"; folderpath = "/" + folderpath; end if acc != "*" $accounts.has_key? acc or raise "#{filename}:#{f.lineno}: unknown account #{acc}" end i = 0 conds = [] cond.scan(/(\S+) \((.+?)\)(?: |$)/) {|l, c| i += 1 l == bool or raise "#{filename}:#{f.lineno}: bool must be all #{bool}" c =~ /^([^,]+),([^,]+),?(.*)$/ or raise "#{filename}:#{f.lineno}:cond #{i}: syntax error: #{c}" sel, pred, arg = $1, $2, $3 sel =~ /^(from|body|date|to|to or cc|subject|header \S+)$/ or raise "#{filename}:#{f.lineno}:cond #{i}: unknown selector #{sel}" pred =~ Regexp.new("^(contains|doesn't contain|begins with|doesn't " + "begin with|matches|doesn't match|exists|doesn't "+ "exist)\$") or raise "#{filename}:#{f.lineno}:cond #{i}: unknown predicate #{pred}" if pred =~ /exist/ arg.empty? or raise "#{filename}:#{f.lineno}:cond #{i}: unwanted arg #{arg}" end if pred =~ /match/ arg =~ Regexp.new('^/(.+)/(i?)') or raise "#{filename}:#{f.lineno}:cond #{i}: syntax error: #{arg}" arg = Regexp.new($1.empty? ? "/$1/m" : "/$1/im") else arg.downcase! end conds.push [sel,pred,arg] } rules.push Rule.new(name, bool, conds, acc, folderpath) } } # get lists data from 'lists.dat' f = open "lists.dat", "r" $lists = {} f.each {|line| line.gsub(/\r|\n/, '') next unless line =~ /\S/ next if line =~ /^#/ line =~ /^(\d+)\|([^|]+)\|([^|]+)\|([^|]+)\|(.+)$/ or raise "lists.dat:#{f.lineno}: syntax error: #{line}" date, names, mlm, addr, acc = $1, $2, $3, $4, $5 names = names.split ","; name = names[0]; aliases = names[1..-1] $accounts.has_key? acc or raise "lists.dat:#{f.lineno}: invalid account #{acc}" ['EZMLM','MAILMAN','MAJORDOMO','PETIDOMO','LISTSERV','fml','?'].index mlm or raise "lists.dat:#{f.lineno}: unknown mlm #{mlm}" $lists[name] = {'mlm' => mlm, 'addr' => addr, 'acc' => acc, 'aliases' => aliases} } f.close # insert list rules $lists.each_pair {|name, list| yymm = Time.now.strftime "%Y%m" firstletter = name =~ /^([A-Za-z])/ ? $1.downcase : "_" rule = Rule.new("list #{name}", 'OR', [], list['acc'], "/lists/#{firstletter}/#{name}/#{yymm}.mbox") names = [name]; list['aliases'].each {|n| names.push n} names.each {|n| rule.cond.push ['to or cc','contains',n] rule.cond.push ['reply-to','contains',n] if n =~ /(.+)@lists?\.(.+)/i rule.cond.push ['to or cc','contains',"#{$1}@#{$2}"] rule.cond.push ['reply-to','contains',"#{$1}@#{$2}"] end } rules.push rule } debug 1, "there are #{rules.size} rule(s)" # now process all messages in all account's inbox nmatches = {} $accounts.each_key {|name| debug 1, "processing Inbox for account #{name}" inbox = Mbox.new name, '/Inbox' i = 0 debug 2, "there are #{inbox.size} message(s) in /Inbox" inbox_msgs = [] inbox.each_message {|msg| debug 5, "processing message #{i}, deleted=#{msg.deleted}" i += 1; moved = false next if msg.deleted rules.each {|rule| if rule.match(msg) if nmatches[rule.name] nmatches[rule.name] += 1 else nmatches[rule.name] = 1 end uidl = '' if msg.headers['x-uidl'] msg.headers['x-uidl'] =~ /(\S+)/ uidl = " (uidl #{$1})" end debug 3, "msg #{i}#{uidl} matches #{rule.name}" if rule.folderpath != '/Inbox' target = Mbox.new(rule.acc == '*' ? name : rule.acc, rule.folderpath) target.create if not target.exists? debug 4, "appending message to #{target.fspath}" target.append(msg) moved = true end break end } inbox_msgs.push(msg) unless moved } nmoved = inbox.size - inbox_msgs.size if nmoved > 0 debug 3, "we moved #{nmoved} messages from /Inbox" debug 1, "rewriting /Inbox, #{inbox_msgs.size} message(s)" new_inbox = Mbox.new(name, "/Inbox.tmp") new_inbox.truncate new_inbox.append(*inbox_msgs) File.rename(new_inbox.fspath, inbox.fspath) end } # statistik debug 2, "filter report:" totmoved = 0; totspam = 0 (nmatches.sort{|a,b|b[1]<=>a[1]}).each {|m| debug 2, sprintf(" %-68s: %3d", m[0], m[1]) totmoved += 1 totspam += 1 if m[0] =~ /^spam/ } debug 2, "we moved #{totmoved} message(s) in total" if totmoved > 0 debug 2, "we caught #{totspam} spam(s) in total" if totspam > 0 debug 1, "end"