Release 0.7.0

Release 0.6.9
2005-05-31 18:08:20 +00:00 · 2005-05-31 18:07:19 +00:00
14 changed files with 929 additions and 272 deletions
@@ -8,8 +8,10 @@ include testsample.py
 include testmime.py
 include testbms.py
 include testdspam.py
+include rejects.py
 include bms.py
 include spf.py
+include spfquery.py
 include test.py
 include sample.py
 include test/*
@@ -1,5 +1,18 @@
 Here is a history of user visible changes to Python milter.

+0.7.0	SPF check hello name
+	Move pythonsock to /var/run/milter
+	Move milter.cfg to /etc/mail/pymilter.cfg
+	Check M$ style XML CID records by converting to SPF
+	Recognize, but never match ip6 - until we properly support it.
+	Option to reject when no PTR and no SPF
+0.6.9	Reject invalid SRS immediately for benefit of callback verifiers
+	Fix include bug in spf.py
+	Fix check_header bug
+	Fix setup.py to work with python < 2.2.3, thanks to Eric S. Johansson
+	Test driver for SPF test suite.  Fix bugs and add features to
+	pass most of test suite.
+	Use best_guess() and get_header() in bms.py for SPF support
 0.6.8	Defang message/rfc822 content_type with boundary 
 	Support SPF delegation
 	Reject neutral SPF result for selected domains
@@ -7,6 +20,7 @@ Here is a history of user visible changes to Python milter.
 	Don't report "spoofed" unless rcpt looks like SRS
 	Check for bounce with multiple rcpts
 	Make dspam see Received-SPF headers
+	Fix sysv init for Redhat 9 and other single ps line per process systems
 0.6.7	Fix failure to remove explicit unix socket thanks to Alexander again.
 	Support SRS forgery detection.
 	Detect thread resource starvation in Milter.py.
@@ -1,13 +1,36 @@
+Message not saved for following traceback:
+Traceback (most recent call last):
+  File "/usr/lib/python2.3/site-packages/Milter.py", line 188, in <lambda>
+    milter.set_eom_callback(lambda ctx: ctx.getpriv().eom())
+  File "bms.py", line 935, in eom
+    msg.dump(out)
+  File "/usr/lib/python2.3/site-packages/mime.py", line 347, in dump
+    g.flatten(self,unixfrom=unixfrom)
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 102, in flatten
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 130, in _write
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 156, in _dispatch
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 199, in _handle_text
+TypeError: string payload expected: <type 'list'>
+------------
+spf.py has no recursion bound on CNAME lookup
+Support SMTP AUTH and disable SPF checks when connection is authorized.
+Web admin interface
+RHSBL
+Check valid domains allowed by internal senders to detect PCs infected
+with spam trojans.
+Do CBV (callback verification) for mail with no published SPF record.
+message log for automated stats and blacklisting
+Skip dspam when SPF pass?
+Report 551 with rcpt on SPF fail?
+check spam keywords with character classes, e.g.
+	{a}=[a@ãä], {i}=[i1í], {e}=[eë], {o}=[o0ö]
+
 Implement RRS - a backdoor for non-SRS forwarders.  User lists non-SRS 
 forwarder accounts, and a util provides a special local alias for the
 user to give to the forwarder.  Alias only works for mail from that
 forwarder.  Milter gets forwarder domain from alias and uses it to
 SPF check forwarder.

-adapt init script to work on RH9
-Skip dspam when SPF pass?
-Report 551 with rcpt on SPF fail?
-
 Another special dspam user, 'honeypot', can be listed in innoculations.
 All email to those addresses is treated as known spam.

@@ -1,6 +1,64 @@
 #!/usr/bin/env python
 # A simple milter.
 # $Log$
+# Revision 1.114  2004/07/27 00:40:12  stuart
+# Make reject on no PTR optional.
+#
+# Revision 1.113  2004/07/23 23:11:14  stuart
+# Log known malformed messages differently than general processing exceptions.
+#
+# Revision 1.112  2004/07/21 19:18:33  stuart
+# Punt on UnicodeDecodeError when decoding headers.
+# Accept a pass with default SPF for missing reverse IP.
+#
+# Revision 1.111  2004/07/18 13:13:31  stuart
+# Reject invalid SRS only for SRS domain (which is the only one we
+# know the key for).
+# Reject senders that have neither reverse IP nor SPF.
+#
+# Revision 1.110  2004/06/12 03:13:18  stuart
+# Block bounces only for SRS domain.  Also treat mail from
+# postmaster or mailer-daemon as DSN for SRS/SES checking purposes.
+#
+# Revision 1.109  2004/05/01 02:56:55  stuart
+# Let multiple screeners share work.
+#
+# Revision 1.108  2004/04/29 20:36:23  stuart
+# Require HELO name
+#
+# Revision 1.107  2004/04/24 22:55:29  stuart
+# Move some files to make the RPM more standard.
+#
+# Revision 1.106  2004/04/21 18:29:08  stuart
+# Validate hello name with SPF.
+#
+# Revision 1.105  2004/04/20 15:16:00  stuart
+# Release 0.6.9
+#
+# Revision 1.104  2004/04/19 21:56:26  stuart
+# Support SPF best_guess and get_header
+#
+# Revision 1.103  2004/04/10 02:31:01  stuart
+# Fix timeout config
+#
+# Revision 1.102  2004/04/08 20:25:11  stuart
+# Make libmilter timeout a config option
+#
+# Revision 1.101  2004/04/08 19:18:16  stuart
+# Preserve case of local part in sender
+#
+# Revision 1.100  2004/04/08 18:41:15  stuart
+# Reject numeric hello names
+#
+# Revision 1.99  2004/04/06 19:46:39  stuart
+# Reject invalid SRS immediately for benefit of CallBack Verifiers.
+#
+# Revision 1.98  2004/04/06 15:28:20  stuart
+# Release 0.6.8-2
+#
+# Revision 1.97  2004/04/06 13:07:43  stuart
+# Pass original header name to check_header
+#
 # Revision 1.96  2004/04/06 03:27:03  stuart
 # bugs from Redhat 9 testing
 #
@@ -154,90 +212,6 @@
 # Revision 1.47  2003/08/26 05:01:38  stuart
 # Release 0.6.0
 #
-# Revision 1.46  2003/08/26 04:45:16  stuart
-# Modest dspam control
-#
-# Revision 1.43  2003/06/25 17:00:02  stuart
-# fix hostaddr test
-#
-# Revision 1.42  2003/06/25 16:45:59  stuart
-# Not using checking hostaddr properly
-#
-# Revision 1.41  2003/06/25 15:57:54  stuart
-# Ready for 5.5 release.
-#
-# Revision 1.40  2003/06/25 15:41:41  stuart
-# recognize internal connections.
-# Give legitimate users a clue about banned subject keywords.
-#
-# Revision 1.39  2002/12/14 00:36:59  stuart
-# Smart alias feature
-#
-# Revision 1.38  2002/11/14 17:52:53  stuart
-# Redirection feature for wiretap
-#
-# Revision 1.37  2002/11/07 23:52:09  stuart
-# config fixes
-#
-# Revision 1.36  2002/10/04 05:27:38  stuart
-# Add get_submsg to allow modifying rfc822 attachment.
-#
-# Revision 1.35  2002/10/03 01:31:18  stuart
-# Test encoded rfc822 attachment
-#
-# Revision 1.34  2002/10/03 00:55:42  stuart
-# Decode rfc822 attachments
-#
-# Revision 1.33  2002/10/02 18:49:02  stuart
-# Save and log messages which cause an exception while parsing attachments.
-#
-# Revision 1.32  2002/09/24 01:38:05  stuart
-# Doc updates.
-#
-# Revision 1.31  2002/09/13 22:14:06  stuart
-# Release 0.5.0 wrapup
-#
-# Revision 1.30  2002/09/13 20:22:37  stuart
-# Additional config items
-#
-# Revision 1.29  2002/08/20 04:40:46  stuart
-# Use config file
-#
-# Revision 1.28  2002/07/12 19:40:38  stuart
-# Update docs, minor bugs.
-#
-# Revision 1.27  2002/06/16 02:06:24  stuart
-# SPAM tweaks
-#
-# Revision 1.26  2002/06/07 22:07:30  stuart
-# Isolate local hacks to configuration data.
-#
-# Revision 1.25  2002/05/02 20:41:00  stuart
-# Top level virus needs top level header change.
-#
-# Revision 1.24  2002/05/02 20:31:43  stuart
-# Handle quoted-printable HTML attachments.
-# Remove entire attachment when HTML can't be parsed by sgmllib.
-#
-# Revision 1.23  2002/05/02 03:42:31  stuart
-# base64 no longer needed
-#
-# Revision 1.22  2002/05/02 03:12:39  stuart
-# Move check_html to mime module.
-#
-# Revision 1.21  2002/05/02 02:48:22  stuart
-# Remove scripts from HTML even with base64 encoding.
-#
-# Revision 1.20  2002/05/02 00:21:01  stuart
-# Test filtering HTML attachments.
-#
-# Revision 1.19  2002/05/01 22:12:41  stuart
-# Remove scripts from HTML attachments.
-#
-# Revision 1.18  2002/03/01 20:29:00  stuart
-# Ready for release.
-#
-
 # Author: Stuart D. Gathman <stuart@bmsi.com>
 # Copyright 2001 Business Management Systems, Inc.
 # This code is under GPL.  See COPYING for details.
@@ -252,17 +226,22 @@ import Milter
 import tempfile
 import ConfigParser
 import time
+import re
+
 from fnmatch import fnmatchcase
 from email.Header import decode_header

 # Import pysrs if available
 try:
  import SRS
-  import re
  srsre = re.compile(r'^SRS[01][+-=]',re.IGNORECASE)
 except: SRS = None
+
+# Import spf if available
 try: import spf
 except: spf = None
+
+ip4re = re.compile(r'^[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*$')
 #import syslog
 #syslog.openlog('milter')

@@ -294,13 +273,17 @@ dspam_users = {}
 dspam_userdir = None
 dspam_exempt = {}
 dspam_whitelist = {}
-dspam_screener = None
+dspam_screener = ()
 dspam_internal = True	# True if internal mail should be dspammed
 dspam_reject = ()
-dspam_sizelimit = 80000
+dspam_sizelimit = 180000
 srs = None
 srs_reject_spoofed = False
+srs_fwdomain = None
 spf_reject_neutral = ()
+spf_best_guess = False
+spf_reject_noptr = False
+timeout = 600

 class MilterConfigParser(ConfigParser.ConfigParser):

@@ -350,7 +333,8 @@ class MilterConfigParser(ConfigParser.ConfigParser):
 def read_config(list):
  cp = MilterConfigParser({
    'tempdir': "/var/log/milter/save",
-    'socket': "/var/log/milter/pythonsock",
+    'socket': "/var/run/milter/pythonsock",
+    'timeout': '600',
    'scan_html': 'no',
    'scan_rfc822': 'yes',
    'block_chinese': 'no',
@@ -358,12 +342,15 @@ def read_config(list):
    'blind_wiretap': 'yes',
    'maxage': '8',
    'hashlength': '8',
-    'reject_spoofed': 'no'
+    'reject_spoofed': 'no',
+    'reject_noptr': 'no',
+    'best_guess': 'no'
  })
  cp.read(list)
  tempfile.tempdir = cp.get('milter','tempdir')
-  global socketname, scan_rfc822, scan_html, block_chinese
+  global socketname, scan_rfc822, scan_html, block_chinese, timeout
  socketname = cp.get('milter','socket')
+  timeout = cp.getint('milter','timeout')
  scan_rfc822 = cp.getboolean('milter','scan_rfc822')
  scan_html = cp.getboolean('milter','scan_html')
  block_chinese = cp.getboolean('milter','block_chinese')
@@ -402,13 +389,13 @@ def read_config(list):

  global dspam_dict, dspam_users, dspam_userdir, dspam_exempt
  global dspam_screener,dspam_whitelist,dspam_reject,dspam_sizelimit
-  global spf_reject_neutral,SRS
+  global spf_reject_neutral,spf_best_guess,SRS,spf_reject_noptr
  dspam_dict = cp.getdefault('dspam','dspam_dict')
  dspam_exempt = cp.getaddrset('dspam','dspam_exempt')
  dspam_whitelist = cp.getaddrset('dspam','dspam_whitelist')
  dspam_users = cp.getaddrdict('dspam','dspam_users')
  dspam_userdir = cp.getdefault('dspam','dspam_userdir')
-  dspam_screener = cp.getdefault('dspam','dspam_screener')
+  dspam_screener = cp.getlist('dspam','dspam_screener')
  dspam_reject = cp.getlist('dspam','dspam_reject')
  if cp.has_option('dspam','dspam_sizelimit'):
    dspam_sizelimit = cp.getint('dspam','dspam_sizelimit')
@@ -416,11 +403,13 @@ def read_config(list):
  if spf:
    spf.DELEGATE = cp.getdefault('spf','delegate')
    spf_reject_neutral = cp.getlist('spf','reject_neutral')
+    spf_best_guess = cp.getboolean('spf','best_guess')
+    spf_reject_noptr = cp.getboolean('spf','reject_noptr')
  srs_config = cp.getdefault('srs','config')
  if srs_config: cp.read([srs_config])
  srs_secret = cp.getdefault('srs','secret')
  if SRS and srs_secret:
-    global srs,srs_reject_spoofed
+    global srs,srs_reject_spoofed,srs_fwdomain
    database = cp.getdefault('srs','database')
    srs_reject_spoofed = cp.getboolean('srs','reject_spoofed')
    maxage = cp.getint('srs','maxage')
@@ -433,7 +422,7 @@ def read_config(list):
    else:
      srs = SRS.Guarded.Guarded(secret=srs_secret,
        maxage=maxage,hashlength=hashlength,separator=separator)
-
+    srs_fwdomain = cp.getdefault('srs','fwdomain')

 def parse_addr(t):
  if t.startswith('<') and t.endswith('>'): t = t[1:-1]
@@ -454,6 +443,8 @@ def parse_header(val):
      try:
 	return u.encode(enc)
      except UnicodeError: continue
+  except UnicodeDecodeError:
+    return val
  except LookupError:
    return val

@@ -494,6 +485,7 @@ class bmsMilter(Milter.Milter):
    self.log('%s: %s' % (name,val))

  def connect(self,hostname,unused,hostaddr):
+    self.missing_ptr = hostname.startswith('[') and hostname.endswith(']')
    self.internal_connection = False
    self.trusted_relay = False
    self.receiver = self.getsymval('j')
@@ -521,11 +513,17 @@ class bmsMilter(Milter.Milter):
    if self.trusted_relay:
      connecttype += ' TRUSTED'
    self.log("connect from %s at %s %s" % (hostname,hostaddr,connecttype))
+    self.hello_name = None
+    self.connecthost = hostname
    return Milter.CONTINUE

  def hello(self,hostname):
    self.hello_name = hostname
    self.log("hello from %s" % hostname)
+    if ip4re.match(hostname):
+      self.log("REJECT: numeric hello name:",hostname)
+      self.setreply('550','5.7.1','hello name cannot be numeric ip')
+      return Milter.REJECT
    if not self.internal_connection and hostname in hello_blacklist:
      self.log("REJECT: spam from self:",hostname)
      self.setreply('550','5.7.1','I hate talking to myself.')
@@ -573,79 +571,57 @@ class bmsMilter(Milter.Milter):
 	self.dspam = False
    else:
      self.rejectvirus = False
+    if not self.hello_name:
+      self.log("REJECT: missing HELO")
+      self.setreply('550','5.7.1',"It's polite to say HELO first.")
+      return Milter.REJECT
    if not (self.internal_connection or self.trusted_relay)	\
    	and self.connectip and spf:
      return self.check_spf()
    return Milter.CONTINUE

  def check_spf(self):
-    user,host = spf.split_email(self.canon_from,self.hello_name)
-    self.sender = '@'.join((user,host))
-    res,code,txt = spf.check(self.connectip,self.canon_from,self.hello_name)
-    if res in ('deny', 'fail'):
-      self.log('REJECT: SPF %s %i %s' % (res,code,txt))
-      # improve default explanation, but don't wipe out text from SPF record
-      if txt == 'access denied':	
-        txt = 'SPF fail: see http://spf.pobox.com/why.html'
-      self.setreply(str(code),'5.7.1',txt)
+    t = parse_addr(self.mailfrom)
+    if len(t) == 2: t[1] = t[1].lower()
+    q = spf.query(self.connectip,'@'.join(t),self.hello_name)
+    q.set_default_explanation('SPF fail: see http://spf.pobox.com/why.html')
+    res,code,txt = q.check()
+    receiver = self.receiver
+    if res == 'none':
+      if self.mailfrom != '<>':
+	# check hello name via spf
+	hres,hcode,htxt = spf.check(self.connectip,'',self.hello_name)
+	if hres in ('deny','fail','neutral','softfail'):
+	  self.log('REJECT: hello SPF: %s %i %s' % (hres,hcode,htxt))
+	  self.setreply('550','5.7.1',htxt)
 	  return Milter.REJECT
-    if res == 'pass':
-#       Received-SPF: pass (mybox.example.org: domain of
-#                           myname@example.com designates 192.0.2.1 as
-#                           permitted sender);
-#                           receiver=mybox.example.org;
-#                           client_ip=192.0.2.1;
-#                           envelope-from=myname@example.com;
-      self.add_header('Received-SPF',"""pass (%(receiver)s: domain of
-      %(sender)s designates %(connectip)s as permitted sender);
-      receiver=%(receiver)s; client_ip=%(connectip)s;
-      envelope-from=%(canon_from)s;""" % self.__dict__)
-    elif res == 'none' or res == 'unknown' and txt == 'no SPF record':
-#       Received-SPF: none (mybox.example.org: myname@example.com does
-#                           not designated permitted sender hosts)
-      self.add_header('Received-SPF',"""none (%(receiver)s: %(sender)s does
-      	not designate permitted sender hosts)""" % self.__dict__)
-    elif res == 'softfail':
-#       Received-SPF: softfail (mybox.example.org: domain of transitioning
-#                              myname@example.com does not designate
-#                              192.0.2.1 as permitted sender)
-      self.add_header('Received-SPF',
-      	"""softfail (%(receiver)s: domain of transitioning
-	%(sender)s does not designate
-	%(connectip)s as permitted sender)""" % self.__dict__)
-    elif res == 'neutral':
-      if host in spf_reject_neutral:
-        self.log('REJECT: SPF neutral for',self.sender)
+      if spf_best_guess:
+	#self.log('SPF: no record published, guessing')
+	q.set_default_explanation(
+		'SPF guess: see http://spf.pobox.com/why.html')
+	# best_guess should not result in fail
+	res,code,txt = q.best_guess()
+	receiver += ': guessing'
+      if self.missing_ptr and res in ('neutral', 'none') and spf_reject_noptr:
+        self.log('REJECT: no PTR or SPF')
 	self.setreply('550','5.7.1',
-	  'mail from %s must pass SPF: http://spf.pobox.com/why.html' % host
+  'You must have a reverse lookup or publish SPF: http://spf.pobox.com'
 	)
 	return Milter.REJECT
-#       Received-SPF: neutral (mybox.example.org: 192.0.2.1 is neither
-#                             permitted nor denied by domain of
-#                             myname@example.com)
-      self.add_header('Received-SPF',
-      	"""neutral (%(receiver)s: %(connectip)s is neither
-	permitted nor denied by domain of %(sender)s)""" % self.__dict__)
-    elif res == 'unknown':
-#       Received-SPF: unknown -extension:foo (mybox.example.org: domain
-#                      of myname@example.com uses mechanism
-#			not recognized by this client)
-      self.spf_mech = txt
-      self.add_header('Received-SPF',
-      	"""unknown %(spf_mech)s (%(receiver)s: domain
-	of %(sender)s uses mechanism not recognized by this client)"""
-	% self.__dict__)
-    elif res == 'error':
-#   	Received-SPF: error (mybox.example.org: error in processing
-#                           during lookup of myname@example.com: DNS
-#                           timeout)
-      self.add_header('Received-SPF',
-      	"""error (%s: error in processing
-	during lookup of %s: %s)""" % (self.receiver,self.sender,txt))
+    if res in ('deny', 'fail'):
+      self.log('REJECT: SPF %s %i %s' % (res,code,txt))
+      self.setreply(str(code),'5.7.1',txt)
+      return Milter.REJECT
+    if res == 'neutral' and q.o in spf_reject_neutral:
+      self.log('REJECT: SPF neutral for',q.s)
+      self.setreply('550','5.7.1',
+	'mail from %s must pass SPF: http://spf.pobox.com/why.html' % q.o
+      )
+      return Milter.REJECT
+    if res == 'error':
      self.setreply(str(code),'4.3.0',txt)
      return Milter.TEMPFAIL
-    else:
-      self.log('SPF: %s %i %s' % (res,code,txt))
+    self.add_header('Received-SPF',q.get_header(res,receiver))
    return Milter.CONTINUE

  # hide_path causes a copy of the message to be saved - until we
@@ -659,27 +635,33 @@ class bmsMilter(Milter.Milter):
    self.log("rcpt to",to,str)
    t = parse_addr(to.lower())
    if len(t) == 2:
-      if self.mailfrom == '<>':
+      user,domain = t
+      if self.mailfrom == '<>' or self.canon_from.startswith('postmaster@') \
+      	or self.canon_from.startswith('mailer-daemon@'):
        if self.recipients:
 	  self.log('REJECT: Multiple bounce recipients')
 	  self.setreply('550','5.7.1','Multiple bounce recipients')
 	  return Milter.REJECT
-        if srs and not (self.internal_connection or self.trusted_relay):
+        if srs and not (self.internal_connection or self.trusted_relay) \
+		and domain == srs_fwdomain:
 	  oldaddr = '@'.join(parse_addr(to))
 	  try:
 	    newaddr = srs.reverse(oldaddr)
+	    # Currently, a sendmail map reverses SRS.  We just log it here.
 	    self.log("srs rcpt:",newaddr)
 	  except:
 	    if srsre.match(oldaddr):
-	      self.log("srs spoofed:",oldaddr)
+	      self.log("REJECT: srs spoofed:",oldaddr)
+	      self.setreply('550','5.7.1','Invalid SRS signature')
+	      return Milter.REJECT
 	    self.data_allowed = not srs_reject_spoofed
+      # non DSN mail to SRS address will bounce due to invalid local part
      self.recipients.append('@'.join(t))
-      user,domain = t
      users = check_user.get(domain)
      if self.discard:
        self.del_recipient(to)
      if users and not user in users:
-        self.log('REJECT: RCPT TO:',to,str)
+        self.log('REJECT: RCPT TO:',to)
 	return Milter.REJECT
      if user in block_forward.get(domain,()):
        self.forward = False
@@ -708,7 +690,8 @@ class bmsMilter(Milter.Milter):
    return Milter.CONTINUE

  # Heuristic checks for spam headers
-  def check_header(self,lname,val):
+  def check_header(self,name,val):
+    lname = name.lower()
    # val is decoded header value
    if lname == 'subject':

@@ -743,6 +726,7 @@ class bmsMilter(Milter.Milter):
      if not self.forward:
 	if lval.startswith("fwd:") or lval.startswith("[fw"):
 	  self.log('REJECT: %s: %s' % (name,val))
+	  self.setreply('550','5.7.1','I find unedited forwards annoying')
 	  return Milter.REJECT

    # check for invalid message id
@@ -765,7 +749,7 @@ class bmsMilter(Milter.Milter):
  def header(self,name,hval):
    if not self.data_allowed:
      self.log('REJECT: bounce with no SRS encoding')
-      self.setreply('550','5.7.1',"spoofed reply address")
+      self.setreply('550','5.7.1',"I did not send you this message.")
      return Milter.REJECT
    lname = name.lower()
    # decode near ascii text to unobfuscate
@@ -777,7 +761,7 @@ class bmsMilter(Milter.Milter):
 	  self.log('REJECT: %s: %s' % (name,hval))
 	  self.setreply('550','5.7.1',"We don't understand chinese")
 	  return Milter.REJECT
-      rc = self.check_header(lname,val)
+      rc = self.check_header(name,val)
      if rc != Milter.CONTINUE: return rc
    # log selected headers
    if log_headers or lname in ('subject','x-mailer'):
@@ -911,21 +895,22 @@ class bmsMilter(Milter.Milter):
 	    print x
    # screen if no recipients are dspam_users
    if not modified and dspam_screener and not self.internal_connection \
-    	and (self.dspam or self.reject_spam):
+    	and self.dspam:
      self.fp.seek(0)
      txt = self.fp.read()
      if len(txt) > dspam_sizelimit:
 	self.log("Large message:",len(txt))
 	return False
-      if not ds.check_spam(dspam_screener,txt,self.recipients,
+      screener = dspam_screener[self.id % len(dspam_screener)]
+      if not ds.check_spam(screener,txt,self.recipients,
      	classify=True,quarantine=not self.reject_spam):
 	self.fp = None
 	if self.reject_spam:
-	  self.log("DSPAM:",dspam_screener,
+	  self.log("DSPAM:",screener,
 	  	'REJECT: X-DSpam-Score: %f' % ds.probability)
 	  self.setreply('550','5.7.1','Your Message looks spammy')
 	  return True
-	self.log("DSPAM:",dspam_screener,"SCREENED")
+	self.log("DSPAM:",screener,"SCREENED")
    return modified

  def eom(self):
@@ -960,16 +945,18 @@ class bmsMilter(Milter.Milter):
      fname = tempfile.mktemp(".fail")	# save message that caused crash
      os.rename(self.tempname,fname)
      self.tempname = None
-      self.log("FAIL: %s" % fname)	# log filename
      if exc_type == email.Errors.BoundaryError:
+	self.log("MALFORMED: %s" % fname)	# log filename
 	self.setreply('554','5.7.7',
 		'Boundary error in your message, are you a spammer?')
        return Milter.REJECT
      if exc_type == email.Errors.HeaderParseError:
+	self.log("MALFORMED: %s" % fname)	# log filename
 	self.setreply('554','5.7.7',
 		'Header parse error in your message, are you a spammer?')
        return Milter.REJECT
      # let default exception handler print traceback and return 451 code
+      self.log("FAIL: %s" % fname)	# log filename
      raise
    if rc == Milter.REJECT: return rc;
    if rc == Milter.DISCARD: return rc;
@@ -1031,6 +1018,7 @@ class bmsMilter(Milter.Milter):
      os.remove(self.tempname)	# remove in case session aborted
    if self.fp:
      self.fp.close()
+    sys.stdout.flush()
    return Milter.CONTINUE

  def abort(self):
@@ -1045,13 +1033,13 @@ def main():
  if srs or len(discard_users) > 0 or smart_alias or dspam_userdir:
    flags = flags + Milter.DELRCPT
  Milter.set_flags(flags)
-  print "bms milter startup"
+  print "%s bms milter startup" % time.strftime('%Y%b%d %H:%M:%S')
  sys.stdout.flush()
-  Milter.runmilter("pythonfilter",socketname,600)
-  print "bms milter shutdown"
+  Milter.runmilter("pythonfilter",socketname,timeout)
+  print "%s bms milter shutdown" % time.strftime('%Y%b%d %H:%M:%S')

 if __name__ == "__main__":
-  read_config(["milter.cfg"])
+  read_config(["/etc/mail/pymilter.cfg","milter.cfg"])
  if dspam_dict:
    import dspam	# low level spam check
  if dspam_userdir:
@@ -0,0 +1,153 @@
+#!/usr/bin/python2.3
+
+# Convert a MS Caller-ID entry (XML) to a SPF entry
+#
+# (c) 2004 by Ernesto Baschny
+# (c) 2004 Python version by Stuart Gathman
+#
+# Date: 2004-02-25
+# Version: 1.0
+#
+# Usage:
+#  ./cid2spf.pl "<ep xmlns='http://ms.net/1'>...</ep>"
+#
+# Note that the 'include' directives will also have to be checked and
+# "translated". Future versions of this script might be able to get a
+# domain name as an argument and "crawl" the DNS for the necessary
+# information.
+#
+# A complete reverse translation (SPF -> CID) might be impossible, since
+# there are no way to handle:
+# - PTR and EXISTS mechanism 
+# - MX mechanism with an different domain as argument
+# - macros
+# 
+# References:
+# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
+# http://spf.pobox.com/
+#
+# Known bugs:
+# - Currently it won't handle the exclusions provided in the A and R
+#   tags (prefix '!'). They will show up "as-is" in the SPF record
+# - I really haven't read the MS-CID specs in-depth, so there are probably
+#   other bugs too :)
+#
+# Ernesto Baschny <ernst@baschny.de>
+#
+
+import xml.sax
+import spf
+
+# -------------------------------------------------------------------------
+class CIDParser(xml.sax.ContentHandler):
+  "Convert a MS Caller-ID entry (XML) to a SPF entry"
+
+  def __init__(self,q=None):
+    self.spf = []
+    self.action = '-all'
+    self.has_servers = None
+    self.spf_entry = None
+    if q:
+      self.spf_query = q
+    else:
+      self.spf_query = spf.query(i='127.0.0.1', s='localhost', h='unknown')
+
+  def startElement(self,tag,attr):
+      if tag == 'm':
+	if self.has_servers != None and not self.has_servers:
+	  raise ValueError(
+    "Declared <noMailServers\> and later <m>, this CID entry is not valid."
+	  )
+	self.has_servers = True
+      elif tag == 'noMailServers':
+	if self.has_servers:
+	  raise ValueError(
+    "Declared <m> and later <noMailServers\>, this CID entry is not valid."
+	  )
+	self.has_servers = False
+      elif tag == 'ep':
+	if attr.has_key('testing') and attr.getValue('testing') == 'true':
+	  # A CID with 'testing' found:
+	  # From the MS-specs:
+	  #  "Documents in which such attribute is present with a true
+	  #  value SHOULD be entirely ignored (one should act as if the
+	  #  document were absent)"
+	  # From the SPF-specs:
+	  #  "Neutral (?): The SPF client MUST proceed as if a domain did
+	  #  not publish SPF data."
+	  # So we set SPF action to "neutral":
+	  self.action = '?all'
+      elif tag == 'mx':
+	  # The empty MX-tag, same as SPF's MX-mechanism
+	  self.spf.append('mx')
+      self.tag = tag
+
+  def characters(self,text):
+	tag = self.tag
+	# Remove starting and trailing spaces from text:
+	text = text.strip()
+
+	if tag == 'a' or tag == 'r':
+	    # The A and R tags from MS-CID are both handled by the 
+	    # ipv4/6-mechanisms from SPF:
+	    if text.find(':') < 0:
+	      mechanism = 'ip4'
+	    else:
+	      mechanism = 'ip6'
+	    self.spf.append(mechanism + ':' + text)
+	elif tag == 'indirect':
+	    # MS-CID's indirect is "sort of" the include from SPF:
+	    # Not really true, because the <indirect> tag from MS-CID also 
+	    # provides a fallback in case the included domain doesn't provide
+	    # _ep-records: The inbound MX-servers of the included domains
+	    # are added to the list of allowed outgoing mailservers for the
+	    # domain that declared the _ep-record with the <indirect> tag.
+	    # In SPF you would use the 'mx:domain' to handle this, but this
+	    # wouldn't depend on referred domain having or not SPF-records.
+	    cid_xml = self.cid_txt(text)
+	    if cid_xml:
+	      p = CIDParser()
+	      xml.sax.parseString(cid_xml,p)
+	      if p.has_servers != False:
+		self.spf += p.spf
+	    else:
+	      self.spf.append('mx:' + text)
+
+  def cid_txt(self,domain):
+    q = self.spf_query
+    domain='_ep.' + domain
+    a = q.dns_txt(domain)
+    if not a: return None
+    if a[0].lower().startswith('<ep ') and a[-1].lower().endswith('</ep>'):
+      return ''.join(a)
+    return None
+
+  def endElement(self,tag):
+      if tag == 'ep':
+	# This is the end... assemble what we've got
+	spf_entry = ['v=spf1']
+	if self.has_servers != False:
+	  spf_entry += self.spf
+	spf_entry.append(self.action)
+	self.spf_entry = ' '.join(spf_entry)
+
+  def spf_txt(self,cid_xml):
+    if not cid_xml.startswith('<'):
+      cid_xml = self.cid_txt(cid_xml)
+      if not cid_xml: return None
+    # Parse the beast. Any XML-problem will be reported by xlm.sax
+    self.spf_entry = None
+    xml.sax.parseString(cid_xml,self)
+    return self.spf_entry
+
+if __name__ == '__main__':
+  import sys
+  if len(sys.argv) < 2:
+    print >>sys.stderr, \
+      """Usage: %s "<ep xmlns='http://ms.net/1'>...</ep>" """ % sys.argv[0]
+    sys.exit(1)
+
+  cid_xml = sys.argv[1]
+
+  p = CIDParser()
+  print p.spf_txt(cid_xml)
@@ -134,5 +134,26 @@ is a milter declaration for sendmail.cf with all timeouts specified:
 Xpythonfilter, S=local:/var/log/milter/pythonsock, F=T, T=C:5m;S:20s;R:60s;E:5m
 </pre>

+<a name="spf">
+<li> Q. So how do I use the SPF support?  The sample.py milter doesn't seem
+        to use it.
+<p>  A. The bms.py milter supports spf.  The RedHat RPMs will set almost
+everything up for you.  For other systems:
+<ol type=i>
+<li> Arrange to run bms.py in the background (as a service perhaps) and
+     redirect output and errors to a logfile.  For instance, on AIX you'll want
+     to use SRC (System Resource Controller).  
+<li> Copy milter.cfg to the directory you run bms.py in, and edit it.  The
+     comments should explain the options. 
+<li> Start bms.py in the background as arranged.
+<li> Add Xpythonfilter to sendmail.cf or add an INPUT_MAIL_FILTER to
+     sendmail.mc.  Regen sendmail.cf if you use sendmail.mc and restart 
+     sendmail.
+<li> Arrange to rotate log files and remove old defang files in 
+     <code>tempdir</code>.  The RedHat RPM uses <code>logrotate</code> for
+     logfiles and a simple cron script using <code>find</code> to clean
+     <code>tempdir</code>.
+</ol>
+
 </ol>
 </html>
@@ -1,10 +1,15 @@
 # features intended to filter or block incoming mail
 [milter]
+;socket=/var/run/milter/pythonsock
 tempdir = /var/log/milter/save
+;timeout=600
+
 scan_rfc822 = 1
 # can be CPU intensive
 scan_html = 0
+# reject asian fonts because we can't read them
 block_chinese = 1
+# users who hate forwarded mail
 ;block_forward = egghead@mycorp.com, busybee@mycorp.com
 log_headers = 0
 # Reject mail for domains mentioned unless user is mentioned here also
@@ -12,7 +17,9 @@ log_headers = 0
 # porn words are case insensitive
 porn_words = penis, breast, pussy, horse cock, porn, xenical, diet pill, d1ck,
 	vi*gra, vi-a-gra, viag, tits, p0rn, hunza, horny, sexy, c0ck,
-	p-e-n-i-s, hydrocodone, vicodin, xanax, vicod1n, x@nax
+	p-e-n-i-s, hydrocodone, vicodin, xanax, vicod1n, x@nax, diazepam,
+	v1@gra, xan@x, cialis, ci@lis, frëe, xãnax, valíum, vãlium, via-gra,
+	x@n3x, vicod3n, penís, c0d1n, phentermine, en1arge, dip1oma, v1codin
 # spam words are case sensitive
 spam_words = $$$, !!!, XXX, FREE, HGH

@@ -43,6 +50,10 @@ reject_spoofed = 0
 ;delegate = domain.com
 # domains where a neutral SPF result should cause mail to be rejected
 ;reject_neutral = aol.com
+# use a default (v=spf1 a/24 mx/24 ptr) when no SPF records are published
+;best_guess = 0
+# reject senders that have neither PTR nor SPF records
+;reject_noptr = 0

 # features intended to clean up outgoing mail
 [scrub]
@@ -93,6 +104,8 @@ blind = 1
 # defining this activates the dspam application
 # dspam and dspam-python must be installed
 ;dspam_userdir=/var/lib/dspam
+# do not dspam messages larger than this
+;dspam_sizelimit=180000

 # Map email addresses and aliases to dspam users
 ;dspam_users=david,goliath,spam,falsepositive
@@ -24,7 +24,7 @@ ALT="Viewable With Any Browser" BORDER="0"></A>
  Stuart D. Gathman</a><br>
 This web page is written by Stuart D. Gathman<br>and<br>sponsored by
 <a href="http://www.bmsi.com">Business Management Systems, Inc.</a> <br>
-Last updated Apr 05, 2004</h4>
+Last updated Jun 08, 2004</h4>

 See the <a href="faq.html">FAQ</a> | <a href="#download">Download now</a> |
 <a href="/mailman/listinfo/pymilter">Subscribe to mailing list</a>
@@ -40,20 +40,24 @@ Version 8.12 seems to be more robust, and includes new privilege
 separation features to enhance security.
 I recommend upgrading.

-<h2> <a name=dspam>Bayesian Filtering</a> </h2>
+<h2> Recent Changes </h2>

+Release 0.6.6 adds support for <a href="http://spf.pobox.com/">SPF</a>,
+a protocol to prevent forging of the envelope from address.  
+SPF support requires <a href="http://pydns.sourceforge.net/">pydns</a>.
+The included spf.py module is an updated version of the original 1.6
+version at <a href="http://www.wayforward.net/spf/">wayforward.net</a>.
+The updated version tracks the draft RFC and test suite.
+<p>
+The FAQ addresses <a href="faq.html#spf">how to get started with SPF</a>.
+<p>
+Release 0.6.1 adds a full milter based dspam application.
+<p>
 I have selected the <a href="http://www.nuclearelephant.com/projects/dspam/">
 dspam bayes filter project</a> and <a href="dspam.html">
 packaged it for python</a>.
 Release 0.6.0 offers a simple application of dspam I call "header triage",
-which rejects messages with spammy headers.  Since sendmail has to
-read the entire message anyway once we start reading headers, it
-would probably be better to scan the whole message - except that 
-we replace dangerous attachments elsewhere in the milter  - which screws up the
-body statistics for messages with dangerous attachments.
-<p>
-Release 0.6.1 adds a full milter based dspam application.
-<p>
+which rejects messages with spammy headers.  
 To use header triage, you must have <a href="dspam.html">DSPAM</a> installed,
 and select a dictionary that is well moderated by someone who gets
 lots of spam.  That dictionary can be used to block spam that is 
@@ -140,14 +144,43 @@ wiretapping, and Win32 virus protection milter.

 <h3><a name=download>Downloading</a></h3>

-The latest stable release is <a href="#stable">0.6.6</a>. A stable
+The latest stable release is <a href="#stable">0.6.9</a>. A stable
 release is one which has been installed (and working correctly) on
 production systems long enough to convince me that it is stable.  As
 the package gains more features and complexity, stable will mean no
 bug reports from outside users either.
 <p>
-The latest version is 0.6.7.  See the <a href=NEWS>Change Log</a>.
-
+The latest version is 0.6.9-1.  See the <a href=NEWS>Change Log</a>.
+<p>
+<a name="stable"><b>Stable</b></a>
+<a href="http://bmsi.com/python/milter-0.6.9.tar.gz">
+milter-0.6.9.tar.gz</a> Add SPF test suite driver, and validate
+spf.py against test suite.  Add best_guess and get_header to spf.py.
+Libmilter timeout option in config.
+<br>
+<a href="http://bmsi.com/linux/rh72/milter-0.6.9-1.i386.rpm">
+milter-0.6.9-1.i386.rpm</a> Binary RPM for Redhat 7.x, now requires 
+	sendmail-8.12 and <a href="http://www.python.org/2.3.3/rpms.html">
+	python2.3</a>.
+<br>
+<a href="http://bmsi.com/linux/rh9/milter-0.6.9-1.src.rpm">
+milter-0.6.9-1.src.rpm</a> Source RPM for Redhat 9,7.x.  
+<p>
+<a href="http://bmsi.com/python/milter-0.6.8.tar.gz">
+milter-0.6.8.tar.gz</a> Include Received-SPF headers in Dspam analysis.
+Fix sysv init for Redhat 9 and later.  Reject bounces with multiple
+recipients.
+<br>
+<a href="http://bmsi.com/python/milter-0.6.8.patch">milter-0.6.8.patch</a>
+Last minutes fixes from production testing.
+<p>
+<a href="http://bmsi.com/linux/rh72/milter-0.6.8-3.i386.rpm">
+milter-0.6.8-3.i386.rpm</a> Binary RPM for Redhat 7.x, now requires 
+	sendmail-8.12 and <a href="http://www.python.org/2.3.3/rpms.html">
+	python2.3</a>. 
+<br>
+<a href="http://bmsi.com/linux/rh9/milter-0.6.8-3.src.rpm">
+milter-0.6.8-3.src.rpm</a> Source RPM for Redhat 9,7.x.  
 <p>
 <a href="http://bmsi.com/python/milter-0.6.7.tar.gz">
 milter-0.6.7.tar.gz</a> Explicit local socket bug,
@@ -169,7 +202,6 @@ Release 0.6.7-3 patches:
 <li> Reject neutral SPF result for selected domains
 </ul>
 <p>
-<a name="stable"><b>Stable</b></a>
 <a href="http://bmsi.com/python/milter-0.6.6.tar.gz">
 milter-0.6.6.tar.gz</a> Plug another memory leak, 
 <a href="http://spf.pobox.com/">SPF</a> support, hello blacklist.
@@ -1,10 +1,10 @@
 %define name milter
-%define version 0.6.8
+%define version 0.7.0
 %define release 1
 # Redhat 7.x and earlier (multiple ps lines per thread)
-#%define sysvinit rc7
+%define sysvinit milter.rc7
 # RH9, other systems (single ps line per process)
-%define sysvinit rc
+#define sysvinit milter.rc
 %ifos Linux
 %define python python2.3
 %else
@@ -16,7 +16,7 @@ Name: %{name}
 Version: %{version}
 Release: %{release}
 Source: %{name}-%{version}.tar.gz
-#Patch: %{name}.patch
+#Patch: %{name}-%{version}.patch
 Copyright: GPL
 Group: Development/Libraries
 BuildRoot: %{_tmppath}/%{name}-buildroot
@@ -43,8 +43,10 @@ env CFLAGS="$RPM_OPT_FLAGS" %{python} setup.py build
 rm -rf $RPM_BUILD_ROOT
 %{python} setup.py install --root=$RPM_BUILD_ROOT --record=INSTALLED_FILES
 mkdir -p $RPM_BUILD_ROOT/var/log/milter
+mkdir -p $RPM_BUILD_ROOT/etc/mail
 mkdir $RPM_BUILD_ROOT/var/log/milter/save
-cp bms.py milter.cfg $RPM_BUILD_ROOT/var/log/milter
+cp bms.py $RPM_BUILD_ROOT/var/log/milter
+cp milter.cfg $RPM_BUILD_ROOT/etc/mail/pymilter.cfg

 # logfile rotation
 mkdir -p $RPM_BUILD_ROOT/etc/logrotate.d
@@ -81,7 +83,7 @@ exec >>milter.log 2>&1
 echo $! >/var/run/milter/milter.pid
 EOF
 mkdir -p $RPM_BUILD_ROOT/etc/rc.d/init.d
-cp milter.%{sysvinit} $RPM_BUILD_ROOT/etc/rc.d/init.d/milter
+cp %{sysvinit} $RPM_BUILD_ROOT/etc/rc.d/init.d/milter
 ed $RPM_BUILD_ROOT/etc/rc.d/init.d/milter <<'EOF'
 /^python=/
 c
@@ -103,6 +105,9 @@ mkssys -s milter -p /var/log/milter/start.sh -u 25 -S -n 15 -f 9 -G mail || :
 if [ $1 = 0 ]; then
  rmssys -s milter || :
 fi
+%else
+%post
+echo "pythonsock has moved to /var/run/milter, update /etc/mail/sendmail.cf"
 %endif

 %clean
@@ -124,9 +129,28 @@ rm -rf $RPM_BUILD_ROOT
 %dir /var/log/milter/save
 %config /var/log/milter/start.sh
 %config /var/log/milter/bms.py
-%config /var/log/milter/milter.cfg
+%config(noreplace) /etc/mail/pymilter.cfg

 %changelog
+* Fri Jul 23 2004 Stuart Gathman <stuart@bmsi.com> 0.7.0-1
+- SPF check hello name
+- Move pythonsock to /var/run/milter
+- Move milter.cfg to /etc/mail/pymilter.cfg
+- Check M$ style XML CID records by converting to SPF
+- Recognize, but never match ip6 until we properly support it.
+- Option to reject when no PTR and no SPF
+* Fri Apr 09 2004 Stuart Gathman <stuart@bmsi.com> 0.6.9-1
+- Validate spf.py against test suite, and add Received-SPF support to spf.py
+- Support best_guess for SPF
+- Reject numeric hello names
+- Preserve case of local part in sender
+- Make libmilter timeout a config option
+- Fix setup.py to work with python < 2.2.3
+* Tue Apr 06 2004 Stuart Gathman <stuart@bmsi.com> 0.6.8-3
+- Reject invalid SRS immediately for benefit of callback verifiers
+- Fix include bug in spf.py
+* Tue Apr 06 2004 Stuart Gathman <stuart@bmsi.com> 0.6.8-2
+- Bug in check_header
 * Mon Apr 05 2004 Stuart Gathman <stuart@bmsi.com> 0.6.8-1
 - Don't report spoofed unless rcpt looks like SRS
 - Check for bounce with multiple rcpts
@@ -1,4 +1,10 @@
 # $Log$
+# Revision 1.53  2004/04/24 22:53:20  stuart
+# Rename some local variables to avoid shadowing builtins
+#
+# Revision 1.52  2004/04/24 22:47:13  stuart
+# Convert header values to str
+#
 # Revision 1.51  2004/03/25 03:19:10  stuart
 # Correctly defang rfc822 attachments when boundary specified with
 # content-type message/rfc822.
@@ -192,19 +198,19 @@ class MimeParser(Parser):
                text = firstbodyline + '\n' + text
            container.set_payload(text)

-def unquote(str):
+def unquote(s):
    """Remove quotes from a string."""
-    if len(str) > 1:
-        if str.startswith('"'):
-	  if str.endswith('"'):
-            str = str[1:-1]
+    if len(s) > 1:
+        if s.startswith('"'):
+	  if s.endswith('"'):
+            s = s[1:-1]
 	  else: # remove garbage after trailing quote
-	    try: str = str[1:str[1:].index('"')+1]
-	    except: return str
-	  return str.replace('\\\\', '\\').replace('\\"', '"')
-        if str.startswith('<') and str.endswith('>'):
-            return str[1:-1]
-    return str
+	    try: s = s[1:s[1:].index('"')+1]
+	    except: return s
+	  return s.replace('\\\\', '\\').replace('\\"', '"')
+        if s.startswith('<') and s.endswith('>'):
+            return s[1:-1]
+    return s

 from types import TupleType

@@ -216,21 +222,21 @@ def _unquotevalue(value):

 email.Message._unquotevalue = _unquotevalue

-def _parseparam(str):
+def _parseparam(s):
    plist = []
-    while str[:1] == ';':
-	str = str[1:]
-	end = str.find(';')
-	while end > 0 and (str.count('"',0,end) & 1):
-	  end = str.find(';',end + 1)
-	if end < 0: end = len(str)
-	f = str[:end]
+    while s[:1] == ';':
+	s = s[1:]
+	end = s.find(';')
+	while end > 0 and (s.count('"',0,end) & 1):
+	  end = s.find(';',end + 1)
+	if end < 0: end = len(s)
+	f = s[:end]
 	if '=' in f:
 	    i = f.index('=')
 	    f = f[:i].strip().lower() + \
 		    '=' + f[i+1:].strip()
 	plist.append(f.strip())
-	str = str[end:]
+	s = s[end:]
    return plist

 # Enhance email.Message 
@@ -350,9 +356,9 @@ class MimeMessage(Message):
    return self.get('content-transfer-encoding',None)

  # Decode body to stream according to transfer encoding, return encoding name
-  def decode(self,filter):
+  def decode(self,filt):
    try:
-      filter.write(self.get_payload(decode=True))
+      filt.write(self.get_payload(decode=True))
    except:
      pass
    return self.getencoding()
@@ -363,7 +369,7 @@ class MimeMessage(Message):
  def __setitem__(self, name, value):
    rc = Message.__setitem__(self,name,value)
    self.modified = True
-    if self.headerchange: self.headerchange(self,name,value)
+    if self.headerchange: self.headerchange(self,name,str(value))
    return rc

  def __delitem__(self, name):
@@ -423,7 +429,7 @@ See your administrator.

 def check_name(msg,savname=None,ckname=check_ext):
  "Replace attachment with a warning if its name is suspicious."
-  for (key,name) in msg.getnames():
+  for key,name in msg.getnames():
    badname = ckname(name)
    if badname:
      hostname = socket.gethostname()
@@ -582,14 +588,14 @@ def check_html(msg,savname=None):
 	msgtype = 'text/html'
  if msgtype == 'text/html':
    out = StringIO.StringIO()
-    filter = HTMLScriptFilter(out)
+    htmlfilter = HTMLScriptFilter(out)
    try:
-      filter.write(msg.get_payload(decode=True))
-      filter.close()
+      htmlfilter.write(msg.get_payload(decode=True))
+      htmlfilter.close()
    #except sgmllib.SGMLParseError:
    except:
      #mimetools.copyliteral(msg.get_payload(),open('debug.out','w')
-      filter.close()
+      htmlfilter.close()
      hostname = socket.gethostname()
      msg.set_payload(
  "An HTML attachment could not be parsed.  The original is saved as '%s:%s'"
@@ -600,7 +606,7 @@ def check_html(msg,savname=None):
      name = "WARNING.TXT"
      msg["Content-Type"] = "text/plain; name="+name
      return Milter.CONTINUE
-    if filter.modified:
+    if htmlfilter.modified:
      msg.set_payload(out)	# remove embedded scripts
      del msg["content-transfer-encoding"]
      email.Encoders.encode_quopri(msg)
@@ -0,0 +1,38 @@
+# Analyze milter log to find abusers
+
+fp = open('/var/log/milter/milter.log','r')
+subdict = {}
+ipdict = {}
+spamcnt = {}
+for line in fp:
+  a = line.split(None,4)
+  if len(a) < 4: continue
+  dt,tm,id,op = a[:4]
+  if op == 'Subject:':
+    if len(a) > 4: subdict[id] = a[4].rstrip()
+  elif op == 'connect':
+    ipdict[id] = a[4].rstrip()
+  elif op in ('eom','dspam'):
+    if id in subdict: del subdict[id]
+    if id in ipdict: del ipdict[id]
+  elif op in ('REJECT:','DSPAM:','SPAM:','abort'):
+    if id in subdict:
+      if id in ipdict:
+        ip = ipdict[id]
+	del ipdict[id]
+	f,host,raw = ip.split(None,2)
+	if host in spamcnt:
+	  spamcnt[host] += 1
+	else:
+	  spamcnt[host] = 1
+      else: ip = ''
+      print dt,tm,op,a[4].rstrip(),subdict[id]
+      del subdict[id]
+    else:
+      print line.rstrip()
+print len(subdict),'leftover entries'
+
+spamlist = filter(lambda x: x[1] > 1,spamcnt.items())
+spamlist.sort(lambda x,y: x[1] - y[1])
+for ip,cnt in spamlist:
+  print cnt,ip
@@ -1,10 +1,18 @@
 import os
+import sys
 from distutils.core import setup, Extension

 # FIXME: on some versions of sendmail, smutil is renamed to sm
 libs = ["milter", "smutil"]

-setup(name = "milter", version = "0.6.8",
+# patch distutils if it can't cope with the "classifiers" or
+# "download_url" keywords
+if sys.version < '2.2.3':
+  from distutils.dist import DistributionMetadata
+  DistributionMetadata.classifiers = None
+  DistributionMetadata.download_url = None
+
+setup(name = "milter", version = "0.7.0",
 	description="Python interface to sendmail milter API",
 	long_description="""\
 This is a python extension module to enable python scripts to
@@ -40,7 +40,35 @@ For news, bugfixes, etc. visit the home page for this implementation at
 #                      ditch the annoying Python 2.4 FutureWarning
 #   18-dec-2003, v1.6, Failures on Intel hardware: endianness.  Use ! on
 #                      struct.pack(), struct.unpack().
+#
+# Development taken over by Stuart Gathman <stuart@bmsi.com> since
+# Terrence is not responding to email.
+#
 # $Log$
+# Revision 1.13  2004/07/23 19:23:12  stuart
+# Always fail to match on ip6, until we support it properly.
+#
+# Revision 1.12  2004/07/23 18:48:15  stuart
+# Fold CID parsing into spf
+#
+# Revision 1.11  2004/07/21 21:32:01  stuart
+# Handle CID records (Microsoft XML format).
+#
+# Revision 1.10  2004/04/19 22:12:11  stuart
+# Release 0.6.9
+#
+# Revision 1.9  2004/04/18 03:29:35  stuart
+# Pass most tests except -local and -rcpt-to
+#
+# Revision 1.8  2004/04/17 22:17:55  stuart
+# Header comment method.
+#
+# Revision 1.7  2004/04/17 18:22:48  stuart
+# Support default explanation.
+#
+# Revision 1.6  2004/04/06 20:18:02  stuart
+# Fix bug in include
+#
 # Revision 1.5  2004/04/05 22:29:46  stuart
 # SPF best_guess,
 #
@@ -78,6 +106,144 @@ import struct  # for pack() and unpack()
 import time    # for time()

 import DNS	# http://pydns.sourceforge.net
+import xml.sax
+
+# -------------------------------------------------------------------------
+# Convert a MS Caller-ID entry (XML) to a SPF entry
+#
+# (c) 2004 by Ernesto Baschny
+# (c) 2004 Python version by Stuart Gathman
+#
+# Date: 2004-02-25
+# Version: 1.0
+#
+# Usage:
+#  ./cid2spf.pl "<ep xmlns='http://ms.net/1'>...</ep>"
+#
+# Note that the 'include' directives will also have to be checked and
+# "translated". Future versions of this script might be able to get a
+# domain name as an argument and "crawl" the DNS for the necessary
+# information.
+#
+# A complete reverse translation (SPF -> CID) might be impossible, since
+# there are no way to handle:
+# - PTR and EXISTS mechanism 
+# - MX mechanism with an different domain as argument
+# - macros
+# 
+# References:
+# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
+# http://spf.pobox.com/
+#
+# Known bugs:
+# - Currently it won't handle the exclusions provided in the A and R
+#   tags (prefix '!'). They will show up "as-is" in the SPF record
+# - I really haven't read the MS-CID specs in-depth, so there are probably
+#   other bugs too :)
+#
+# Ernesto Baschny <ernst@baschny.de>
+#
+
+class CIDParser(xml.sax.ContentHandler):
+  "Convert a MS Caller-ID entry (XML) to a SPF entry."
+
+  def __init__(self,q=None):
+    self.spf = []
+    self.action = '-all'
+    self.has_servers = None
+    self.spf_entry = None
+    if q:
+      self.spf_query = q
+    else:
+      self.spf_query = query(i='127.0.0.1', s='localhost', h='unknown')
+
+  def startElement(self,tag,attr):
+      if tag == 'm':
+	if self.has_servers != None and not self.has_servers:
+	  raise ValueError(
+    "Declared <noMailServers\> and later <m>, this CID entry is not valid."
+	  )
+	self.has_servers = True
+      elif tag == 'noMailServers':
+	if self.has_servers:
+	  raise ValueError(
+    "Declared <m> and later <noMailServers\>, this CID entry is not valid."
+	  )
+	self.has_servers = False
+      elif tag == 'ep':
+	if attr.has_key('testing') and attr.getValue('testing') == 'true':
+	  # A CID with 'testing' found:
+	  # From the MS-specs:
+	  #  "Documents in which such attribute is present with a true
+	  #  value SHOULD be entirely ignored (one should act as if the
+	  #  document were absent)"
+	  # From the SPF-specs:
+	  #  "Neutral (?): The SPF client MUST proceed as if a domain did
+	  #  not publish SPF data."
+	  # So we set SPF action to "neutral":
+	  self.action = '?all'
+      elif tag == 'mx':
+	  # The empty MX-tag, same as SPF's MX-mechanism
+	  self.spf.append('mx')
+      self.tag = tag
+
+  def characters(self,text):
+	tag = self.tag
+	# Remove starting and trailing spaces from text:
+	text = text.strip()
+
+	if tag == 'a' or tag == 'r':
+	    # The A and R tags from MS-CID are both handled by the 
+	    # ipv4/6-mechanisms from SPF:
+	    if text.find(':') < 0:
+	      mechanism = 'ip4'
+	    else:
+	      mechanism = 'ip6'
+	    self.spf.append(mechanism + ':' + text)
+	elif tag == 'indirect':
+	    # MS-CID's indirect is "sort of" the include from SPF:
+	    # Not really true, because the <indirect> tag from MS-CID also 
+	    # provides a fallback in case the included domain doesn't provide
+	    # _ep-records: The inbound MX-servers of the included domains
+	    # are added to the list of allowed outgoing mailservers for the
+	    # domain that declared the _ep-record with the <indirect> tag.
+	    # In SPF you would use the 'mx:domain' to handle this, but this
+	    # wouldn't depend on referred domain having or not SPF-records.
+	    cid_xml = self.cid_txt(text)
+	    if cid_xml:
+	      p = CIDParser()
+	      xml.sax.parseString(cid_xml,p)
+	      if p.has_servers != False:
+		self.spf += p.spf
+	    else:
+	      self.spf.append('mx:' + text)
+
+  def cid_txt(self,domain):
+    q = self.spf_query
+    domain='_ep.' + domain
+    a = q.dns_txt(domain)
+    if not a: return None
+    if a[0].lower().startswith('<ep ') and a[-1].lower().endswith('</ep>'):
+      return ''.join(a)
+    return None
+
+  def endElement(self,tag):
+      if tag == 'ep':
+	# This is the end... assemble what we've got
+	spf_entry = ['v=spf1']
+	if self.has_servers != False:
+	  spf_entry += self.spf
+	spf_entry.append(self.action)
+	self.spf_entry = ' '.join(spf_entry)
+
+  def spf_txt(self,cid_xml):
+    if not cid_xml.startswith('<'):
+      cid_xml = self.cid_txt(cid_xml)
+      if not cid_xml: return None
+    # Parse the beast. Any XML-problem will be reported by xlm.sax
+    self.spf_entry = None
+    xml.sax.parseString(cid_xml,self)
+    return self.spf_entry

 # 32-bit IPv4 address mask
 MASK = 0xFFFFFFFFL
@@ -99,12 +265,13 @@ JOINERS = {'l': '.', 's': '.'}
 RESULTS = {'+': 'pass', '-': 'fail', '?': 'neutral', '~': 'softfail',
           'pass': 'pass', 'fail': 'fail', 'unknown': 'unknown',
 	   'neutral': 'neutral', 'softfail': 'softfail',
-	   'none': 'none' }
+	   'none': 'none', 'deny': 'fail' }

 EXPLANATIONS = {'pass': 'sender SPF verified', 'fail': 'access denied',
-                'unknown': 'SPF unknown', 'softfail': 'domain in transition',
+                'unknown': 'SPF unknown',
+		'softfail': 'domain in transition',
 		'neutral': 'access neither permitted nor denied',
-		'none': 'no SPF records'
+		'none': ''
 		}

 # if set to a domain name, search _spf.domain namespace if no SPF record
@@ -123,7 +290,7 @@ except NameError:
 # standard default SPF record
 DEFAULT_SPF = 'v=spf1 a/24 mx/24 ptr'

-def check(i, s, h,default=None):
+def check(i, s, h,local=None):
 	"""Test an incoming MAIL FROM:<s>, from a client with ip address i.
 	h is the HELO/EHLO domain name.

@@ -137,21 +304,7 @@ def check(i, s, h,default=None):
 	#>>> check(i='61.51.192.42', s='liukebing@bcc.com', h='bmsi.com')

 	"""
-	if i.startswith('127.'):
-		return ('pass', 250, 'local connections always pass')
-
-	try:
-		q = query(i=i, s=s, h=h)
-		spf = q.dns_spf(q.d)
-		if not spf and default:
-		  spf = default
-		return q.check(spf)
-	except DNS.DNSError:
-		return ('error', 450, 'SPF DNS Error')
-
-def best_guess(i, s, h,spf=DEFAULT_SPF):
-	q = query(i=i, s=s, h=h)
-	return q.check(spf)
+	return query(i=i, s=s, h=h,local=local).check()

 class query(object):
 	"""A query object keeps the relevant information about a single SPF
@@ -172,7 +325,7 @@ class query(object):

 	Also keeps cache: DNS cache.
 	"""
-	def __init__(self, i, s, h):
+	def __init__(self, i, s, h,local=None):
 		self.i, self.s, self.h = i, s, h
 		self.l, self.o = split_email(s, h)
 		self.t = str(int(time.time()))
@@ -180,6 +333,13 @@ class query(object):
 		self.d = self.o
 		self.p = None
 		self.cache = {}
+		self.exps = dict(EXPLANATIONS)
+		self.local = local	# local policy
+
+	def set_default_explanation(self,exp):
+		exps = self.exps
+		for i in 'softfail','fail','unknown':
+		  exps[i] = exp

 	def getp(self):
 		if not self.p:
@@ -190,17 +350,32 @@ class query(object):
 				self.p = self.i
 		return self.p

-	def check(self, spf):
+	def best_guess(self,spf=DEFAULT_SPF):
+		"""Return a best guess based on a default SPF record"""
+		return self.check(spf)
+
+	def check(self, spf=None):
 		"""
 	Returns (result, mta-status-code, explanation) where
-		result in ['fail', 'unknown', 'pass']
+	result in ['fail', 'softfail', 'neutral' 'unknown', 'pass', 'error']
 		"""
+		if self.i.startswith('127.'):
+			return ('pass', 250, 'local connections always pass')
+
+		try:
+			if not spf:
+			    spf = self.dns_spf(self.d)
+			if self.local and spf:
+			    spf += ' ' + self.local
 			return self.check1(spf, self.d, 0)
+		except DNS.DNSError:
+			return ('error', 450, 'SPF DNS Error')

 	def check1(self, spf, domain, recursion):
 		# spf rfc: 3.7 Processing Limits
 		#
-		if recursion > 10:
+		if recursion > 20:
+			self.prob =  'Mechanisms used too many DNS lookups'
 			return ('unknown', 250, 'SPF recursion limit exceeded')
 		try:
 			tmp, self.d = self.d, domain
@@ -216,20 +391,21 @@ class query(object):
 		"""

 		if not spf:
-			return ('none', 250, 'no SPF records')
+			return ('none', 250, EXPLANATIONS['none'])

 		# split string by whitespace, drop the 'v=spf1'
 		#
 		spf = spf.split()[1:]

 		# copy of explanations to be modified by exp=
-		exps = dict(EXPLANATIONS)
+		exps = self.exps
 		redirect = None

 		# no mechanisms at all cause unknown result, unless
 		# overridden with 'default=' modifier
 		#
 		default = 'neutral'
+		self.mech = []		# unknown mechanisms

 		# Look for modifiers
 		#
@@ -268,12 +444,21 @@ class query(object):

 			if m == 'include':
 			    if arg != self.d:
-					tmp = self.check1(self.dns_spf(arg),
+				res,code,txt = self.check1(self.dns_spf(arg),
 						  arg, recursion + 1)
-					if tmp[0] == 'pass':
+				if res == 'pass':
 					break
-					if tmp[0] != 'fail':
-						return tmp
+				if res in ('fail','neutral','softfail'):
+					continue
+				if res == 'none':
+				  	self.prob = \
+					  'Could not find a valid SPF record'
+				  	res = 'unknown'
+				return res,code,txt
+			    else:
+			    	self.prob = 'Required option is missing'
+				self.mech.append(mech)
+				return ('unknown', 250, 'missing SPF option')

 			elif m == 'all':
 				break
@@ -292,9 +477,14 @@ class query(object):
 				             cidrlength):
 					break

-			elif m in ('ip4', 'ipv4') and arg != self.d:
+			elif m in ('ip4', 'ipv4', 'ip') and arg != self.d:
 				if cidrmatch(self.i, [arg], cidrlength):
 					break
+			elif m == 'ip6':
+			# Until we support IPV6, we should never
+			# get an IPv6 connection.  So this mech
+			# will never match.
+				pass

 			elif m in ('ptr', 'prt'):
 				if domainmatch(self.validated_ptrs(self.i),
@@ -304,7 +494,9 @@ class query(object):
 			else:
 				# unknown mechanisms cause immediate unknown
 				# abort results
-				return ('unknown', 250, mech)
+				self.mech.append(mech)
+				self.prob = 'Unknown mechanism found'
+				return ('unknown',250,'unknown SPF mechanism')

 		else:
 			# no matches
@@ -321,7 +513,10 @@ class query(object):

 	def get_explanation(self, spec):
 		"""Expand an explanation."""
+		if spec:
 		  return self.expand(''.join(self.dns_txt(self.expand(spec))))
+		else:
+		  return 'explanation : Required option is missing'

 	def expand(self, str):
 		"""Do SPF RFC macro expansion.
@@ -422,18 +617,27 @@ class query(object):
 		is found.
 		"""
 		a = [t for t in self.dns_txt(domain) if t.startswith('v=spf1')]
-		if not a and DELEGATE:
+		if not a:
+		  if DELEGATE:
 		    a = [t
 		      for t in self.dns_txt(domain+'._spf.'+DELEGATE)
 			if t.startswith('v=spf1')
 		    ]
+		  if not a:
+		    # No SPF record: convert and return CID if present
+		    p = CIDParser(q=self)
+		    return p.spf_txt(domain)
+
 		if len(a) == 1:
 			return a[0]
 		else:
 			return None

 	def dns_txt(self, domainname):
+		"Get a list of TXT records for a domain name."
+		if domainname:
 		  return [t for a in self.dns(domainname, 'TXT') for t in a]
+		return []

 	def dns_mx(self, domainname):
 		"""Get a list of IP addresses for all MX exchanges for a
@@ -490,6 +694,46 @@ class query(object):
 			result = self.dns(cname, qtype)
 		return result

+	def get_header(self,res,receiver):
+	  if res in ('pass','fail'):
+	    return '%s (%s: %s) client-ip=%s; envelope-from=%s; helo=%s;' % (
+	  	res,receiver,self.get_header_comment(res),self.i,
+	        self.l + '@' + self.o, self.h)
+	  if res == 'unknown':
+	    return '%s (%s: %s)' % (' '.join([res] + self.mech),
+	      receiver,self.get_header_comment(res))
+	  return '%s (%s: %s)' % (res,receiver,self.get_header_comment(res))
+
+	def get_header_comment(self,res):
+		"""Return comment for Received-SPF header.
+		"""
+		sender = self.o
+		if res == 'pass':
+		  if self.i.startswith('127.'):
+		    return "localhost is always allowed."
+		  else: return \
+		    "domain of %s designates %s as permitted sender" \
+			% (sender,self.i)
+		elif res == 'softfail': return \
+      "transitioning domain of %s does not designate %s as permitted sender" \
+			% (sender,self.i)
+		elif res == 'neutral': return \
+		    "%s is neither permitted nor denied by domain of %s" \
+		    	% (self.i,sender)
+		elif res == 'none': return \
+		    "%s is neither permitted nor denied by domain of %s" \
+		    	% (self.i,sender)
+		    #"%s does not designate permitted sender hosts" % sender
+		elif res == 'unknown': return \
+		    "error in processing during lookup of domain of %s: %s" \
+		    	% (sender, self.prob)
+		elif res == 'error': return \
+		    "error in processing during lookup of %s" % sender
+		elif res == 'fail': return \
+		    "domain of %s does not designate %s as permitted sender" \
+			% (sender,self.i)
+		raise ValueError("invalid SPF result for header comment: "+res)
+
 def split_email(s, h):
 	"""Given a sender email s and a HELO domain h, create a valid tuple
 	(l, d) local-part and domain-part.
@@ -0,0 +1,91 @@
+#!/usr/bin/python2.3
+# $Log$
+# Revision 2.3  2004/04/19 22:12:11  stuart
+# Release 0.6.9
+#
+# Revision 2.2  2004/04/18 03:29:35  stuart
+# Pass most tests except -local and -rcpt-to
+#
+# Revision 2.1  2004/04/08 18:41:15  stuart
+# Reject numeric hello names
+#
+# Driver for SPF test system
+
+import spf
+import sys
+
+from optparse import OptionParser
+
+class PerlOptionParser(OptionParser):
+    def _process_args (self, largs, rargs, values):
+        """_process_args(largs : [string],
+                         rargs : [string],
+                         values : Values)
+
+        Process command-line arguments and populate 'values', consuming
+        options and arguments from 'rargs'.  If 'allow_interspersed_args' is
+        false, stop at the first non-option argument.  If true, accumulate any
+        interspersed non-option arguments in 'largs'.
+        """
+        while rargs:
+            arg = rargs[0]
+            # We handle bare "--" explicitly, and bare "-" is handled by the
+            # standard arg handler since the short arg case ensures that the
+            # len of the opt string is greater than 1.
+            if arg == "--":
+                del rargs[0]
+                return
+            elif arg[0:2] == "--":
+                # process a single long option (possibly with value(s))
+                self._process_long_opt(rargs, values)
+            elif arg[:1] == "-" and len(arg) > 1:
+                # process a single perl style long option
+		rargs[0] = '-' + arg
+                self._process_long_opt(rargs, values)
+            elif self.allow_interspersed_args:
+                largs.append(arg)
+                del rargs[0]
+            else:
+		return
+
+def format(q):
+  res,code,txt = q.check()
+  print res
+  if res in ('pass','neutral','unknown'): print
+  else: print txt
+  print 'spfquery:',q.get_header_comment(res)
+  print 'Received-SPF:',q.get_header(res,'spfquery')
+
+def main(argv):
+  parser = PerlOptionParser()
+  parser.add_option("--file",dest="file")
+  parser.add_option("--ip",dest="ip")
+  parser.add_option("--sender",dest="sender")
+  parser.add_option("--helo",dest="hello_name")
+  parser.add_option("--local",dest="local_policy")
+  parser.add_option("--rcpt-to",dest="rcpt")
+  parser.add_option("--default-explanation",dest="explanation")
+  parser.add_option("--sanitize",type="int",dest="sanitize")
+  parser.add_option("--debug",type="int",dest="debug")
+  opts,args = parser.parse_args(argv)
+  if opts.ip:
+    q = spf.query(opts.ip,opts.sender,opts.hello_name,local=opts.local_policy)
+    if opts.explanation:
+      q.set_default_explanation(opts.explanation)
+    format(q)
+  if opts.file:
+    if opts.file == '0':
+      fp = sys.stdin
+    else:
+      fp = open(opts.file,'r')
+    for ln in fp:
+      ip,sender,helo,rcpt = ln.split(None,3)
+      q = spf.query(ip,sender,helo,local=opts.local_policy)
+      if opts.explanation:
+	q.set_default_explanation(opts.explanation)
+      format(q)
+    fp.close()
+    
+if __name__ == "__main__":
+  import sys
+  main(sys.argv[1:])
Author	SHA1	Message	Date
Stuart Gathman	802dc01c84	Release 0.7.0	2005-05-31 18:08:20 +00:00
Stuart Gathman	1205d50bc4	Release 0.6.9	2005-05-31 18:07:19 +00:00