Release 0.7.0

2005-05-31 18:08:20 +00:00
13 changed files with 555 additions and 79 deletions
@@ -8,6 +8,7 @@ include testsample.py
 include testmime.py
 include testbms.py
 include testdspam.py
+include rejects.py
 include bms.py
 include spf.py
 include spfquery.py
@@ -1,5 +1,11 @@
 Here is a history of user visible changes to Python milter.

+0.7.0	SPF check hello name
+	Move pythonsock to /var/run/milter
+	Move milter.cfg to /etc/mail/pymilter.cfg
+	Check M$ style XML CID records by converting to SPF
+	Recognize, but never match ip6 - until we properly support it.
+	Option to reject when no PTR and no SPF
 0.6.9	Reject invalid SRS immediately for benefit of callback verifiers
 	Fix include bug in spf.py
 	Fix check_header bug
@@ -1,10 +1,25 @@
+Message not saved for following traceback:
+Traceback (most recent call last):
+  File "/usr/lib/python2.3/site-packages/Milter.py", line 188, in <lambda>
+    milter.set_eom_callback(lambda ctx: ctx.getpriv().eom())
+  File "bms.py", line 935, in eom
+    msg.dump(out)
+  File "/usr/lib/python2.3/site-packages/mime.py", line 347, in dump
+    g.flatten(self,unixfrom=unixfrom)
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 102, in flatten
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 130, in _write
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 156, in _dispatch
+  File "/var/tmp/python2.3-2.3.3-root/usr/lib/python2.3/email/Generator.py", line 199, in _handle_text
+TypeError: string payload expected: <type 'list'>
+------------
+spf.py has no recursion bound on CNAME lookup
+Support SMTP AUTH and disable SPF checks when connection is authorized.
 Web admin interface
-RHBL
+RHSBL
 Check valid domains allowed by internal senders to detect PCs infected
 with spam trojans.
 Do CBV (callback verification) for mail with no published SPF record.
 message log for automated stats and blacklisting
-adapt init script to work on RH9
 Skip dspam when SPF pass?
 Report 551 with rcpt on SPF fail?
 check spam keywords with character classes, e.g.
@@ -1,6 +1,37 @@
 #!/usr/bin/env python
 # A simple milter.
 # $Log$
+# Revision 1.114  2004/07/27 00:40:12  stuart
+# Make reject on no PTR optional.
+#
+# Revision 1.113  2004/07/23 23:11:14  stuart
+# Log known malformed messages differently than general processing exceptions.
+#
+# Revision 1.112  2004/07/21 19:18:33  stuart
+# Punt on UnicodeDecodeError when decoding headers.
+# Accept a pass with default SPF for missing reverse IP.
+#
+# Revision 1.111  2004/07/18 13:13:31  stuart
+# Reject invalid SRS only for SRS domain (which is the only one we
+# know the key for).
+# Reject senders that have neither reverse IP nor SPF.
+#
+# Revision 1.110  2004/06/12 03:13:18  stuart
+# Block bounces only for SRS domain.  Also treat mail from
+# postmaster or mailer-daemon as DSN for SRS/SES checking purposes.
+#
+# Revision 1.109  2004/05/01 02:56:55  stuart
+# Let multiple screeners share work.
+#
+# Revision 1.108  2004/04/29 20:36:23  stuart
+# Require HELO name
+#
+# Revision 1.107  2004/04/24 22:55:29  stuart
+# Move some files to make the RPM more standard.
+#
+# Revision 1.106  2004/04/21 18:29:08  stuart
+# Validate hello name with SPF.
+#
 # Revision 1.105  2004/04/20 15:16:00  stuart
 # Release 0.6.9
 #
@@ -242,14 +273,16 @@ dspam_users = {}
 dspam_userdir = None
 dspam_exempt = {}
 dspam_whitelist = {}
-dspam_screener = None
+dspam_screener = ()
 dspam_internal = True	# True if internal mail should be dspammed
 dspam_reject = ()
 dspam_sizelimit = 180000
 srs = None
 srs_reject_spoofed = False
+srs_fwdomain = None
 spf_reject_neutral = ()
 spf_best_guess = False
+spf_reject_noptr = False
 timeout = 600

 class MilterConfigParser(ConfigParser.ConfigParser):
@@ -300,7 +333,7 @@ class MilterConfigParser(ConfigParser.ConfigParser):
 def read_config(list):
  cp = MilterConfigParser({
    'tempdir': "/var/log/milter/save",
-    'socket': "/var/log/milter/pythonsock",
+    'socket': "/var/run/milter/pythonsock",
    'timeout': '600',
    'scan_html': 'no',
    'scan_rfc822': 'yes',
@@ -310,6 +343,7 @@ def read_config(list):
    'maxage': '8',
    'hashlength': '8',
    'reject_spoofed': 'no',
+    'reject_noptr': 'no',
    'best_guess': 'no'
  })
  cp.read(list)
@@ -355,13 +389,13 @@ def read_config(list):

  global dspam_dict, dspam_users, dspam_userdir, dspam_exempt
  global dspam_screener,dspam_whitelist,dspam_reject,dspam_sizelimit
-  global spf_reject_neutral,spf_best_guess,SRS
+  global spf_reject_neutral,spf_best_guess,SRS,spf_reject_noptr
  dspam_dict = cp.getdefault('dspam','dspam_dict')
  dspam_exempt = cp.getaddrset('dspam','dspam_exempt')
  dspam_whitelist = cp.getaddrset('dspam','dspam_whitelist')
  dspam_users = cp.getaddrdict('dspam','dspam_users')
  dspam_userdir = cp.getdefault('dspam','dspam_userdir')
-  dspam_screener = cp.getdefault('dspam','dspam_screener')
+  dspam_screener = cp.getlist('dspam','dspam_screener')
  dspam_reject = cp.getlist('dspam','dspam_reject')
  if cp.has_option('dspam','dspam_sizelimit'):
    dspam_sizelimit = cp.getint('dspam','dspam_sizelimit')
@@ -370,11 +404,12 @@ def read_config(list):
    spf.DELEGATE = cp.getdefault('spf','delegate')
    spf_reject_neutral = cp.getlist('spf','reject_neutral')
    spf_best_guess = cp.getboolean('spf','best_guess')
+    spf_reject_noptr = cp.getboolean('spf','reject_noptr')
  srs_config = cp.getdefault('srs','config')
  if srs_config: cp.read([srs_config])
  srs_secret = cp.getdefault('srs','secret')
  if SRS and srs_secret:
-    global srs,srs_reject_spoofed
+    global srs,srs_reject_spoofed,srs_fwdomain
    database = cp.getdefault('srs','database')
    srs_reject_spoofed = cp.getboolean('srs','reject_spoofed')
    maxage = cp.getint('srs','maxage')
@@ -387,7 +422,7 @@ def read_config(list):
    else:
      srs = SRS.Guarded.Guarded(secret=srs_secret,
        maxage=maxage,hashlength=hashlength,separator=separator)
-
+    srs_fwdomain = cp.getdefault('srs','fwdomain')

 def parse_addr(t):
  if t.startswith('<') and t.endswith('>'): t = t[1:-1]
@@ -408,6 +443,8 @@ def parse_header(val):
      try:
 	return u.encode(enc)
      except UnicodeError: continue
+  except UnicodeDecodeError:
+    return val
  except LookupError:
    return val

@@ -448,6 +485,7 @@ class bmsMilter(Milter.Milter):
    self.log('%s: %s' % (name,val))

  def connect(self,hostname,unused,hostaddr):
+    self.missing_ptr = hostname.startswith('[') and hostname.endswith(']')
    self.internal_connection = False
    self.trusted_relay = False
    self.receiver = self.getsymval('j')
@@ -475,6 +513,8 @@ class bmsMilter(Milter.Milter):
    if self.trusted_relay:
      connecttype += ' TRUSTED'
    self.log("connect from %s at %s %s" % (hostname,hostaddr,connecttype))
+    self.hello_name = None
+    self.connecthost = hostname
    return Milter.CONTINUE

  def hello(self,hostname):
@@ -531,6 +571,10 @@ class bmsMilter(Milter.Milter):
 	self.dspam = False
    else:
      self.rejectvirus = False
+    if not self.hello_name:
+      self.log("REJECT: missing HELO")
+      self.setreply('550','5.7.1',"It's polite to say HELO first.")
+      return Milter.REJECT
    if not (self.internal_connection or self.trusted_relay)	\
    	and self.connectip and spf:
      return self.check_spf()
@@ -543,12 +587,27 @@ class bmsMilter(Milter.Milter):
    q.set_default_explanation('SPF fail: see http://spf.pobox.com/why.html')
    res,code,txt = q.check()
    receiver = self.receiver
-    if res == 'none' and spf_best_guess:
-      #self.log('SPF: no record published, guessing')
-      q.set_default_explanation('SPF guess: see http://spf.pobox.com/why.html')
-      # best_guess should not result in fail
-      res,code,txt = q.best_guess()
-      receiver += ': guessing'
+    if res == 'none':
+      if self.mailfrom != '<>':
+	# check hello name via spf
+	hres,hcode,htxt = spf.check(self.connectip,'',self.hello_name)
+	if hres in ('deny','fail','neutral','softfail'):
+	  self.log('REJECT: hello SPF: %s %i %s' % (hres,hcode,htxt))
+	  self.setreply('550','5.7.1',htxt)
+	  return Milter.REJECT
+      if spf_best_guess:
+	#self.log('SPF: no record published, guessing')
+	q.set_default_explanation(
+		'SPF guess: see http://spf.pobox.com/why.html')
+	# best_guess should not result in fail
+	res,code,txt = q.best_guess()
+	receiver += ': guessing'
+      if self.missing_ptr and res in ('neutral', 'none') and spf_reject_noptr:
+        self.log('REJECT: no PTR or SPF')
+	self.setreply('550','5.7.1',
+  'You must have a reverse lookup or publish SPF: http://spf.pobox.com'
+	)
+	return Milter.REJECT
    if res in ('deny', 'fail'):
      self.log('REJECT: SPF %s %i %s' % (res,code,txt))
      self.setreply(str(code),'5.7.1',txt)
@@ -576,15 +635,19 @@ class bmsMilter(Milter.Milter):
    self.log("rcpt to",to,str)
    t = parse_addr(to.lower())
    if len(t) == 2:
-      if self.mailfrom == '<>':
+      user,domain = t
+      if self.mailfrom == '<>' or self.canon_from.startswith('postmaster@') \
+      	or self.canon_from.startswith('mailer-daemon@'):
        if self.recipients:
 	  self.log('REJECT: Multiple bounce recipients')
 	  self.setreply('550','5.7.1','Multiple bounce recipients')
 	  return Milter.REJECT
-        if srs and not (self.internal_connection or self.trusted_relay):
+        if srs and not (self.internal_connection or self.trusted_relay) \
+		and domain == srs_fwdomain:
 	  oldaddr = '@'.join(parse_addr(to))
 	  try:
 	    newaddr = srs.reverse(oldaddr)
+	    # Currently, a sendmail map reverses SRS.  We just log it here.
 	    self.log("srs rcpt:",newaddr)
 	  except:
 	    if srsre.match(oldaddr):
@@ -592,13 +655,13 @@ class bmsMilter(Milter.Milter):
 	      self.setreply('550','5.7.1','Invalid SRS signature')
 	      return Milter.REJECT
 	    self.data_allowed = not srs_reject_spoofed
+      # non DSN mail to SRS address will bounce due to invalid local part
      self.recipients.append('@'.join(t))
-      user,domain = t
      users = check_user.get(domain)
      if self.discard:
        self.del_recipient(to)
      if users and not user in users:
-        self.log('REJECT: RCPT TO:',to,str)
+        self.log('REJECT: RCPT TO:',to)
 	return Milter.REJECT
      if user in block_forward.get(domain,()):
        self.forward = False
@@ -686,7 +749,7 @@ class bmsMilter(Milter.Milter):
  def header(self,name,hval):
    if not self.data_allowed:
      self.log('REJECT: bounce with no SRS encoding')
-      self.setreply('550','5.7.1',"spoofed reply address")
+      self.setreply('550','5.7.1',"I did not send you this message.")
      return Milter.REJECT
    lname = name.lower()
    # decode near ascii text to unobfuscate
@@ -832,21 +895,22 @@ class bmsMilter(Milter.Milter):
 	    print x
    # screen if no recipients are dspam_users
    if not modified and dspam_screener and not self.internal_connection \
-    	and (self.dspam or self.reject_spam):
+    	and self.dspam:
      self.fp.seek(0)
      txt = self.fp.read()
      if len(txt) > dspam_sizelimit:
 	self.log("Large message:",len(txt))
 	return False
-      if not ds.check_spam(dspam_screener,txt,self.recipients,
+      screener = dspam_screener[self.id % len(dspam_screener)]
+      if not ds.check_spam(screener,txt,self.recipients,
      	classify=True,quarantine=not self.reject_spam):
 	self.fp = None
 	if self.reject_spam:
-	  self.log("DSPAM:",dspam_screener,
+	  self.log("DSPAM:",screener,
 	  	'REJECT: X-DSpam-Score: %f' % ds.probability)
 	  self.setreply('550','5.7.1','Your Message looks spammy')
 	  return True
-	self.log("DSPAM:",dspam_screener,"SCREENED")
+	self.log("DSPAM:",screener,"SCREENED")
    return modified

  def eom(self):
@@ -881,16 +945,18 @@ class bmsMilter(Milter.Milter):
      fname = tempfile.mktemp(".fail")	# save message that caused crash
      os.rename(self.tempname,fname)
      self.tempname = None
-      self.log("FAIL: %s" % fname)	# log filename
      if exc_type == email.Errors.BoundaryError:
+	self.log("MALFORMED: %s" % fname)	# log filename
 	self.setreply('554','5.7.7',
 		'Boundary error in your message, are you a spammer?')
        return Milter.REJECT
      if exc_type == email.Errors.HeaderParseError:
+	self.log("MALFORMED: %s" % fname)	# log filename
 	self.setreply('554','5.7.7',
 		'Header parse error in your message, are you a spammer?')
        return Milter.REJECT
      # let default exception handler print traceback and return 451 code
+      self.log("FAIL: %s" % fname)	# log filename
      raise
    if rc == Milter.REJECT: return rc;
    if rc == Milter.DISCARD: return rc;
@@ -967,13 +1033,13 @@ def main():
  if srs or len(discard_users) > 0 or smart_alias or dspam_userdir:
    flags = flags + Milter.DELRCPT
  Milter.set_flags(flags)
-  print "bms milter startup"
+  print "%s bms milter startup" % time.strftime('%Y%b%d %H:%M:%S')
  sys.stdout.flush()
  Milter.runmilter("pythonfilter",socketname,timeout)
-  print "bms milter shutdown"
+  print "%s bms milter shutdown" % time.strftime('%Y%b%d %H:%M:%S')

 if __name__ == "__main__":
-  read_config(["milter.cfg"])
+  read_config(["/etc/mail/pymilter.cfg","milter.cfg"])
  if dspam_dict:
    import dspam	# low level spam check
  if dspam_userdir:
@@ -0,0 +1,153 @@
+#!/usr/bin/python2.3
+
+# Convert a MS Caller-ID entry (XML) to a SPF entry
+#
+# (c) 2004 by Ernesto Baschny
+# (c) 2004 Python version by Stuart Gathman
+#
+# Date: 2004-02-25
+# Version: 1.0
+#
+# Usage:
+#  ./cid2spf.pl "<ep xmlns='http://ms.net/1'>...</ep>"
+#
+# Note that the 'include' directives will also have to be checked and
+# "translated". Future versions of this script might be able to get a
+# domain name as an argument and "crawl" the DNS for the necessary
+# information.
+#
+# A complete reverse translation (SPF -> CID) might be impossible, since
+# there are no way to handle:
+# - PTR and EXISTS mechanism 
+# - MX mechanism with an different domain as argument
+# - macros
+# 
+# References:
+# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
+# http://spf.pobox.com/
+#
+# Known bugs:
+# - Currently it won't handle the exclusions provided in the A and R
+#   tags (prefix '!'). They will show up "as-is" in the SPF record
+# - I really haven't read the MS-CID specs in-depth, so there are probably
+#   other bugs too :)
+#
+# Ernesto Baschny <ernst@baschny.de>
+#
+
+import xml.sax
+import spf
+
+# -------------------------------------------------------------------------
+class CIDParser(xml.sax.ContentHandler):
+  "Convert a MS Caller-ID entry (XML) to a SPF entry"
+
+  def __init__(self,q=None):
+    self.spf = []
+    self.action = '-all'
+    self.has_servers = None
+    self.spf_entry = None
+    if q:
+      self.spf_query = q
+    else:
+      self.spf_query = spf.query(i='127.0.0.1', s='localhost', h='unknown')
+
+  def startElement(self,tag,attr):
+      if tag == 'm':
+	if self.has_servers != None and not self.has_servers:
+	  raise ValueError(
+    "Declared <noMailServers\> and later <m>, this CID entry is not valid."
+	  )
+	self.has_servers = True
+      elif tag == 'noMailServers':
+	if self.has_servers:
+	  raise ValueError(
+    "Declared <m> and later <noMailServers\>, this CID entry is not valid."
+	  )
+	self.has_servers = False
+      elif tag == 'ep':
+	if attr.has_key('testing') and attr.getValue('testing') == 'true':
+	  # A CID with 'testing' found:
+	  # From the MS-specs:
+	  #  "Documents in which such attribute is present with a true
+	  #  value SHOULD be entirely ignored (one should act as if the
+	  #  document were absent)"
+	  # From the SPF-specs:
+	  #  "Neutral (?): The SPF client MUST proceed as if a domain did
+	  #  not publish SPF data."
+	  # So we set SPF action to "neutral":
+	  self.action = '?all'
+      elif tag == 'mx':
+	  # The empty MX-tag, same as SPF's MX-mechanism
+	  self.spf.append('mx')
+      self.tag = tag
+
+  def characters(self,text):
+	tag = self.tag
+	# Remove starting and trailing spaces from text:
+	text = text.strip()
+
+	if tag == 'a' or tag == 'r':
+	    # The A and R tags from MS-CID are both handled by the 
+	    # ipv4/6-mechanisms from SPF:
+	    if text.find(':') < 0:
+	      mechanism = 'ip4'
+	    else:
+	      mechanism = 'ip6'
+	    self.spf.append(mechanism + ':' + text)
+	elif tag == 'indirect':
+	    # MS-CID's indirect is "sort of" the include from SPF:
+	    # Not really true, because the <indirect> tag from MS-CID also 
+	    # provides a fallback in case the included domain doesn't provide
+	    # _ep-records: The inbound MX-servers of the included domains
+	    # are added to the list of allowed outgoing mailservers for the
+	    # domain that declared the _ep-record with the <indirect> tag.
+	    # In SPF you would use the 'mx:domain' to handle this, but this
+	    # wouldn't depend on referred domain having or not SPF-records.
+	    cid_xml = self.cid_txt(text)
+	    if cid_xml:
+	      p = CIDParser()
+	      xml.sax.parseString(cid_xml,p)
+	      if p.has_servers != False:
+		self.spf += p.spf
+	    else:
+	      self.spf.append('mx:' + text)
+
+  def cid_txt(self,domain):
+    q = self.spf_query
+    domain='_ep.' + domain
+    a = q.dns_txt(domain)
+    if not a: return None
+    if a[0].lower().startswith('<ep ') and a[-1].lower().endswith('</ep>'):
+      return ''.join(a)
+    return None
+
+  def endElement(self,tag):
+      if tag == 'ep':
+	# This is the end... assemble what we've got
+	spf_entry = ['v=spf1']
+	if self.has_servers != False:
+	  spf_entry += self.spf
+	spf_entry.append(self.action)
+	self.spf_entry = ' '.join(spf_entry)
+
+  def spf_txt(self,cid_xml):
+    if not cid_xml.startswith('<'):
+      cid_xml = self.cid_txt(cid_xml)
+      if not cid_xml: return None
+    # Parse the beast. Any XML-problem will be reported by xlm.sax
+    self.spf_entry = None
+    xml.sax.parseString(cid_xml,self)
+    return self.spf_entry
+
+if __name__ == '__main__':
+  import sys
+  if len(sys.argv) < 2:
+    print >>sys.stderr, \
+      """Usage: %s "<ep xmlns='http://ms.net/1'>...</ep>" """ % sys.argv[0]
+    sys.exit(1)
+
+  cid_xml = sys.argv[1]
+
+  p = CIDParser()
+  print p.spf_txt(cid_xml)
@@ -134,5 +134,26 @@ is a milter declaration for sendmail.cf with all timeouts specified:
 Xpythonfilter, S=local:/var/log/milter/pythonsock, F=T, T=C:5m;S:20s;R:60s;E:5m
 </pre>

+<a name="spf">
+<li> Q. So how do I use the SPF support?  The sample.py milter doesn't seem
+        to use it.
+<p>  A. The bms.py milter supports spf.  The RedHat RPMs will set almost
+everything up for you.  For other systems:
+<ol type=i>
+<li> Arrange to run bms.py in the background (as a service perhaps) and
+     redirect output and errors to a logfile.  For instance, on AIX you'll want
+     to use SRC (System Resource Controller).  
+<li> Copy milter.cfg to the directory you run bms.py in, and edit it.  The
+     comments should explain the options. 
+<li> Start bms.py in the background as arranged.
+<li> Add Xpythonfilter to sendmail.cf or add an INPUT_MAIL_FILTER to
+     sendmail.mc.  Regen sendmail.cf if you use sendmail.mc and restart 
+     sendmail.
+<li> Arrange to rotate log files and remove old defang files in 
+     <code>tempdir</code>.  The RedHat RPM uses <code>logrotate</code> for
+     logfiles and a simple cron script using <code>find</code> to clean
+     <code>tempdir</code>.
+</ol>
+
 </ol>
 </html>
@@ -1,6 +1,6 @@
 # features intended to filter or block incoming mail
 [milter]
-;socket=/var/log/milter/pythonsock
+;socket=/var/run/milter/pythonsock
 tempdir = /var/log/milter/save
 ;timeout=600

@@ -19,7 +19,7 @@ porn_words = penis, breast, pussy, horse cock, porn, xenical, diet pill, d1ck,
 	vi*gra, vi-a-gra, viag, tits, p0rn, hunza, horny, sexy, c0ck,
 	p-e-n-i-s, hydrocodone, vicodin, xanax, vicod1n, x@nax, diazepam,
 	v1@gra, xan@x, cialis, ci@lis, frëe, xãnax, valíum, vãlium, via-gra,
-	x@n3x, vicod3n, penís, v|c0d1n, phentermine, en1arge, dip1oma, v1codin
+	x@n3x, vicod3n, penís, c0d1n, phentermine, en1arge, dip1oma, v1codin
 # spam words are case sensitive
 spam_words = $$$, !!!, XXX, FREE, HGH

@@ -52,6 +52,8 @@ reject_spoofed = 0
 ;reject_neutral = aol.com
 # use a default (v=spf1 a/24 mx/24 ptr) when no SPF records are published
 ;best_guess = 0
+# reject senders that have neither PTR nor SPF records
+;reject_noptr = 0

 # features intended to clean up outgoing mail
 [scrub]
@@ -24,7 +24,7 @@ ALT="Viewable With Any Browser" BORDER="0"></A>
  Stuart D. Gathman</a><br>
 This web page is written by Stuart D. Gathman<br>and<br>sponsored by
 <a href="http://www.bmsi.com">Business Management Systems, Inc.</a> <br>
-Last updated Apr 21, 2004</h4>
+Last updated Jun 08, 2004</h4>

 See the <a href="faq.html">FAQ</a> | <a href="#download">Download now</a> |
 <a href="/mailman/listinfo/pymilter">Subscribe to mailing list</a>
@@ -40,11 +40,8 @@ Version 8.12 seems to be more robust, and includes new privilege
 separation features to enhance security.
 I recommend upgrading.

-<h2> <a name=dspam>Bayesian Filtering</a> </h2>
+<h2> Recent Changes </h2>

-I have selected the <a href="http://www.nuclearelephant.com/projects/dspam/">
-dspam bayes filter project</a> and <a href="dspam.html">
-packaged it for python</a>.
 Release 0.6.6 adds support for <a href="http://spf.pobox.com/">SPF</a>,
 a protocol to prevent forging of the envelope from address.  
 SPF support requires <a href="http://pydns.sourceforge.net/">pydns</a>.
@@ -52,15 +49,15 @@ The included spf.py module is an updated version of the original 1.6
 version at <a href="http://www.wayforward.net/spf/">wayforward.net</a>.
 The updated version tracks the draft RFC and test suite.
 <p>
-Release 0.6.0 offers a simple application of dspam I call "header triage",
-which rejects messages with spammy headers.  Since sendmail has to
-read the entire message anyway once we start reading headers, it
-would probably be better to scan the whole message - except that 
-we replace dangerous attachments elsewhere in the milter  - which screws up the
-body statistics for messages with dangerous attachments.
+The FAQ addresses <a href="faq.html#spf">how to get started with SPF</a>.
 <p>
 Release 0.6.1 adds a full milter based dspam application.
 <p>
+I have selected the <a href="http://www.nuclearelephant.com/projects/dspam/">
+dspam bayes filter project</a> and <a href="dspam.html">
+packaged it for python</a>.
+Release 0.6.0 offers a simple application of dspam I call "header triage",
+which rejects messages with spammy headers.  
 To use header triage, you must have <a href="dspam.html">DSPAM</a> installed,
 and select a dictionary that is well moderated by someone who gets
 lots of spam.  That dictionary can be used to block spam that is 
@@ -1,5 +1,5 @@
 %define name milter
-%define version 0.6.9
+%define version 0.7.0
 %define release 1
 # Redhat 7.x and earlier (multiple ps lines per thread)
 %define sysvinit milter.rc7
@@ -43,8 +43,10 @@ env CFLAGS="$RPM_OPT_FLAGS" %{python} setup.py build
 rm -rf $RPM_BUILD_ROOT
 %{python} setup.py install --root=$RPM_BUILD_ROOT --record=INSTALLED_FILES
 mkdir -p $RPM_BUILD_ROOT/var/log/milter
+mkdir -p $RPM_BUILD_ROOT/etc/mail
 mkdir $RPM_BUILD_ROOT/var/log/milter/save
-cp bms.py milter.cfg $RPM_BUILD_ROOT/var/log/milter
+cp bms.py $RPM_BUILD_ROOT/var/log/milter
+cp milter.cfg $RPM_BUILD_ROOT/etc/mail/pymilter.cfg

 # logfile rotation
 mkdir -p $RPM_BUILD_ROOT/etc/logrotate.d
@@ -103,6 +105,9 @@ mkssys -s milter -p /var/log/milter/start.sh -u 25 -S -n 15 -f 9 -G mail || :
 if [ $1 = 0 ]; then
  rmssys -s milter || :
 fi
+%else
+%post
+echo "pythonsock has moved to /var/run/milter, update /etc/mail/sendmail.cf"
 %endif

 %clean
@@ -124,9 +129,16 @@ rm -rf $RPM_BUILD_ROOT
 %dir /var/log/milter/save
 %config /var/log/milter/start.sh
 %config /var/log/milter/bms.py
-%config /var/log/milter/milter.cfg
+%config(noreplace) /etc/mail/pymilter.cfg

 %changelog
+* Fri Jul 23 2004 Stuart Gathman <stuart@bmsi.com> 0.7.0-1
+- SPF check hello name
+- Move pythonsock to /var/run/milter
+- Move milter.cfg to /etc/mail/pymilter.cfg
+- Check M$ style XML CID records by converting to SPF
+- Recognize, but never match ip6 until we properly support it.
+- Option to reject when no PTR and no SPF
 * Fri Apr 09 2004 Stuart Gathman <stuart@bmsi.com> 0.6.9-1
 - Validate spf.py against test suite, and add Received-SPF support to spf.py
 - Support best_guess for SPF
@@ -1,4 +1,10 @@
 # $Log$
+# Revision 1.53  2004/04/24 22:53:20  stuart
+# Rename some local variables to avoid shadowing builtins
+#
+# Revision 1.52  2004/04/24 22:47:13  stuart
+# Convert header values to str
+#
 # Revision 1.51  2004/03/25 03:19:10  stuart
 # Correctly defang rfc822 attachments when boundary specified with
 # content-type message/rfc822.
@@ -192,19 +198,19 @@ class MimeParser(Parser):
                text = firstbodyline + '\n' + text
            container.set_payload(text)

-def unquote(str):
+def unquote(s):
    """Remove quotes from a string."""
-    if len(str) > 1:
-        if str.startswith('"'):
-	  if str.endswith('"'):
-            str = str[1:-1]
+    if len(s) > 1:
+        if s.startswith('"'):
+	  if s.endswith('"'):
+            s = s[1:-1]
 	  else: # remove garbage after trailing quote
-	    try: str = str[1:str[1:].index('"')+1]
-	    except: return str
-	  return str.replace('\\\\', '\\').replace('\\"', '"')
-        if str.startswith('<') and str.endswith('>'):
-            return str[1:-1]
-    return str
+	    try: s = s[1:s[1:].index('"')+1]
+	    except: return s
+	  return s.replace('\\\\', '\\').replace('\\"', '"')
+        if s.startswith('<') and s.endswith('>'):
+            return s[1:-1]
+    return s

 from types import TupleType

@@ -216,21 +222,21 @@ def _unquotevalue(value):

 email.Message._unquotevalue = _unquotevalue

-def _parseparam(str):
+def _parseparam(s):
    plist = []
-    while str[:1] == ';':
-	str = str[1:]
-	end = str.find(';')
-	while end > 0 and (str.count('"',0,end) & 1):
-	  end = str.find(';',end + 1)
-	if end < 0: end = len(str)
-	f = str[:end]
+    while s[:1] == ';':
+	s = s[1:]
+	end = s.find(';')
+	while end > 0 and (s.count('"',0,end) & 1):
+	  end = s.find(';',end + 1)
+	if end < 0: end = len(s)
+	f = s[:end]
 	if '=' in f:
 	    i = f.index('=')
 	    f = f[:i].strip().lower() + \
 		    '=' + f[i+1:].strip()
 	plist.append(f.strip())
-	str = str[end:]
+	s = s[end:]
    return plist

 # Enhance email.Message 
@@ -350,9 +356,9 @@ class MimeMessage(Message):
    return self.get('content-transfer-encoding',None)

  # Decode body to stream according to transfer encoding, return encoding name
-  def decode(self,filter):
+  def decode(self,filt):
    try:
-      filter.write(self.get_payload(decode=True))
+      filt.write(self.get_payload(decode=True))
    except:
      pass
    return self.getencoding()
@@ -363,7 +369,7 @@ class MimeMessage(Message):
  def __setitem__(self, name, value):
    rc = Message.__setitem__(self,name,value)
    self.modified = True
-    if self.headerchange: self.headerchange(self,name,value)
+    if self.headerchange: self.headerchange(self,name,str(value))
    return rc

  def __delitem__(self, name):
@@ -423,7 +429,7 @@ See your administrator.

 def check_name(msg,savname=None,ckname=check_ext):
  "Replace attachment with a warning if its name is suspicious."
-  for (key,name) in msg.getnames():
+  for key,name in msg.getnames():
    badname = ckname(name)
    if badname:
      hostname = socket.gethostname()
@@ -582,14 +588,14 @@ def check_html(msg,savname=None):
 	msgtype = 'text/html'
  if msgtype == 'text/html':
    out = StringIO.StringIO()
-    filter = HTMLScriptFilter(out)
+    htmlfilter = HTMLScriptFilter(out)
    try:
-      filter.write(msg.get_payload(decode=True))
-      filter.close()
+      htmlfilter.write(msg.get_payload(decode=True))
+      htmlfilter.close()
    #except sgmllib.SGMLParseError:
    except:
      #mimetools.copyliteral(msg.get_payload(),open('debug.out','w')
-      filter.close()
+      htmlfilter.close()
      hostname = socket.gethostname()
      msg.set_payload(
  "An HTML attachment could not be parsed.  The original is saved as '%s:%s'"
@@ -600,7 +606,7 @@ def check_html(msg,savname=None):
      name = "WARNING.TXT"
      msg["Content-Type"] = "text/plain; name="+name
      return Milter.CONTINUE
-    if filter.modified:
+    if htmlfilter.modified:
      msg.set_payload(out)	# remove embedded scripts
      del msg["content-transfer-encoding"]
      email.Encoders.encode_quopri(msg)
@@ -0,0 +1,38 @@
+# Analyze milter log to find abusers
+
+fp = open('/var/log/milter/milter.log','r')
+subdict = {}
+ipdict = {}
+spamcnt = {}
+for line in fp:
+  a = line.split(None,4)
+  if len(a) < 4: continue
+  dt,tm,id,op = a[:4]
+  if op == 'Subject:':
+    if len(a) > 4: subdict[id] = a[4].rstrip()
+  elif op == 'connect':
+    ipdict[id] = a[4].rstrip()
+  elif op in ('eom','dspam'):
+    if id in subdict: del subdict[id]
+    if id in ipdict: del ipdict[id]
+  elif op in ('REJECT:','DSPAM:','SPAM:','abort'):
+    if id in subdict:
+      if id in ipdict:
+        ip = ipdict[id]
+	del ipdict[id]
+	f,host,raw = ip.split(None,2)
+	if host in spamcnt:
+	  spamcnt[host] += 1
+	else:
+	  spamcnt[host] = 1
+      else: ip = ''
+      print dt,tm,op,a[4].rstrip(),subdict[id]
+      del subdict[id]
+    else:
+      print line.rstrip()
+print len(subdict),'leftover entries'
+
+spamlist = filter(lambda x: x[1] > 1,spamcnt.items())
+spamlist.sort(lambda x,y: x[1] - y[1])
+for ip,cnt in spamlist:
+  print cnt,ip
@@ -12,7 +12,7 @@ if sys.version < '2.2.3':
  DistributionMetadata.classifiers = None
  DistributionMetadata.download_url = None

-setup(name = "milter", version = "0.6.9",
+setup(name = "milter", version = "0.7.0",
 	description="Python interface to sendmail milter API",
 	long_description="""\
 This is a python extension module to enable python scripts to
@@ -45,6 +45,15 @@ For news, bugfixes, etc. visit the home page for this implementation at
 # Terrence is not responding to email.
 #
 # $Log$
+# Revision 1.13  2004/07/23 19:23:12  stuart
+# Always fail to match on ip6, until we support it properly.
+#
+# Revision 1.12  2004/07/23 18:48:15  stuart
+# Fold CID parsing into spf
+#
+# Revision 1.11  2004/07/21 21:32:01  stuart
+# Handle CID records (Microsoft XML format).
+#
 # Revision 1.10  2004/04/19 22:12:11  stuart
 # Release 0.6.9
 #
@@ -97,6 +106,144 @@ import struct  # for pack() and unpack()
 import time    # for time()

 import DNS	# http://pydns.sourceforge.net
+import xml.sax
+
+# -------------------------------------------------------------------------
+# Convert a MS Caller-ID entry (XML) to a SPF entry
+#
+# (c) 2004 by Ernesto Baschny
+# (c) 2004 Python version by Stuart Gathman
+#
+# Date: 2004-02-25
+# Version: 1.0
+#
+# Usage:
+#  ./cid2spf.pl "<ep xmlns='http://ms.net/1'>...</ep>"
+#
+# Note that the 'include' directives will also have to be checked and
+# "translated". Future versions of this script might be able to get a
+# domain name as an argument and "crawl" the DNS for the necessary
+# information.
+#
+# A complete reverse translation (SPF -> CID) might be impossible, since
+# there are no way to handle:
+# - PTR and EXISTS mechanism 
+# - MX mechanism with an different domain as argument
+# - macros
+# 
+# References:
+# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
+# http://spf.pobox.com/
+#
+# Known bugs:
+# - Currently it won't handle the exclusions provided in the A and R
+#   tags (prefix '!'). They will show up "as-is" in the SPF record
+# - I really haven't read the MS-CID specs in-depth, so there are probably
+#   other bugs too :)
+#
+# Ernesto Baschny <ernst@baschny.de>
+#
+
+class CIDParser(xml.sax.ContentHandler):
+  "Convert a MS Caller-ID entry (XML) to a SPF entry."
+
+  def __init__(self,q=None):
+    self.spf = []
+    self.action = '-all'
+    self.has_servers = None
+    self.spf_entry = None
+    if q:
+      self.spf_query = q
+    else:
+      self.spf_query = query(i='127.0.0.1', s='localhost', h='unknown')
+
+  def startElement(self,tag,attr):
+      if tag == 'm':
+	if self.has_servers != None and not self.has_servers:
+	  raise ValueError(
+    "Declared <noMailServers\> and later <m>, this CID entry is not valid."
+	  )
+	self.has_servers = True
+      elif tag == 'noMailServers':
+	if self.has_servers:
+	  raise ValueError(
+    "Declared <m> and later <noMailServers\>, this CID entry is not valid."
+	  )
+	self.has_servers = False
+      elif tag == 'ep':
+	if attr.has_key('testing') and attr.getValue('testing') == 'true':
+	  # A CID with 'testing' found:
+	  # From the MS-specs:
+	  #  "Documents in which such attribute is present with a true
+	  #  value SHOULD be entirely ignored (one should act as if the
+	  #  document were absent)"
+	  # From the SPF-specs:
+	  #  "Neutral (?): The SPF client MUST proceed as if a domain did
+	  #  not publish SPF data."
+	  # So we set SPF action to "neutral":
+	  self.action = '?all'
+      elif tag == 'mx':
+	  # The empty MX-tag, same as SPF's MX-mechanism
+	  self.spf.append('mx')
+      self.tag = tag
+
+  def characters(self,text):
+	tag = self.tag
+	# Remove starting and trailing spaces from text:
+	text = text.strip()
+
+	if tag == 'a' or tag == 'r':
+	    # The A and R tags from MS-CID are both handled by the 
+	    # ipv4/6-mechanisms from SPF:
+	    if text.find(':') < 0:
+	      mechanism = 'ip4'
+	    else:
+	      mechanism = 'ip6'
+	    self.spf.append(mechanism + ':' + text)
+	elif tag == 'indirect':
+	    # MS-CID's indirect is "sort of" the include from SPF:
+	    # Not really true, because the <indirect> tag from MS-CID also 
+	    # provides a fallback in case the included domain doesn't provide
+	    # _ep-records: The inbound MX-servers of the included domains
+	    # are added to the list of allowed outgoing mailservers for the
+	    # domain that declared the _ep-record with the <indirect> tag.
+	    # In SPF you would use the 'mx:domain' to handle this, but this
+	    # wouldn't depend on referred domain having or not SPF-records.
+	    cid_xml = self.cid_txt(text)
+	    if cid_xml:
+	      p = CIDParser()
+	      xml.sax.parseString(cid_xml,p)
+	      if p.has_servers != False:
+		self.spf += p.spf
+	    else:
+	      self.spf.append('mx:' + text)
+
+  def cid_txt(self,domain):
+    q = self.spf_query
+    domain='_ep.' + domain
+    a = q.dns_txt(domain)
+    if not a: return None
+    if a[0].lower().startswith('<ep ') and a[-1].lower().endswith('</ep>'):
+      return ''.join(a)
+    return None
+
+  def endElement(self,tag):
+      if tag == 'ep':
+	# This is the end... assemble what we've got
+	spf_entry = ['v=spf1']
+	if self.has_servers != False:
+	  spf_entry += self.spf
+	spf_entry.append(self.action)
+	self.spf_entry = ' '.join(spf_entry)
+
+  def spf_txt(self,cid_xml):
+    if not cid_xml.startswith('<'):
+      cid_xml = self.cid_txt(cid_xml)
+      if not cid_xml: return None
+    # Parse the beast. Any XML-problem will be reported by xlm.sax
+    self.spf_entry = None
+    xml.sax.parseString(cid_xml,self)
+    return self.spf_entry

 # 32-bit IPv4 address mask
 MASK = 0xFFFFFFFFL
@@ -330,9 +477,14 @@ class query(object):
 				             cidrlength):
 					break

-			elif m in ('ip4', 'ipv4') and arg != self.d:
+			elif m in ('ip4', 'ipv4', 'ip') and arg != self.d:
 				if cidrmatch(self.i, [arg], cidrlength):
 					break
+			elif m == 'ip6':
+			# Until we support IPV6, we should never
+			# get an IPv6 connection.  So this mech
+			# will never match.
+				pass

 			elif m in ('ptr', 'prt'):
 				if domainmatch(self.validated_ptrs(self.i),
@@ -465,17 +617,24 @@ class query(object):
 		is found.
 		"""
 		a = [t for t in self.dns_txt(domain) if t.startswith('v=spf1')]
-		if not a and DELEGATE:
-		  a = [t
-		    for t in self.dns_txt(domain+'._spf.'+DELEGATE)
-		      if t.startswith('v=spf1')
-		  ]
+		if not a:
+		  if DELEGATE:
+		    a = [t
+		      for t in self.dns_txt(domain+'._spf.'+DELEGATE)
+			if t.startswith('v=spf1')
+		    ]
+		  if not a:
+		    # No SPF record: convert and return CID if present
+		    p = CIDParser(q=self)
+		    return p.spf_txt(domain)
+
 		if len(a) == 1:
 			return a[0]
 		else:
 			return None

 	def dns_txt(self, domainname):
+		"Get a list of TXT records for a domain name."
 		if domainname:
 		  return [t for a in self.dns(domainname, 'TXT') for t in a]
 		return []