From 3844751ef05320724204ff1b02d8fc4ef7be5530 Mon Sep 17 00:00:00 2001 From: "Stuart D. Gathman" Date: Tue, 21 Apr 2020 18:20:16 -0400 Subject: [PATCH] Envelope and header values consistently decoded from utf-8. See RFC 8616. --- Doxyfile | 2 +- Milter/__init__.py | 11 +++++--- Milter/sgmllib.py | 1 - makefile | 4 +-- miltermodule.c | 1 + sample.py | 9 +++++-- setup.py | 2 +- milter-template.py => template.py | 45 ++++++++++++++++++++----------- testsample.py | 20 +++++++++++++- 9 files changed, 68 insertions(+), 27 deletions(-) rename milter-template.py => template.py (84%) diff --git a/Doxyfile b/Doxyfile index 236e1c1..6ced63d 100644 --- a/Doxyfile +++ b/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = pymilter # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.0.2 +PROJECT_NUMBER = 1.0.5 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/Milter/__init__.py b/Milter/__init__.py index c37f391..5d1e98b 100755 --- a/Milter/__init__.py +++ b/Milter/__init__.py @@ -349,13 +349,18 @@ class Base(object): # @since 0.9.2 @nocallback def data(self): return CONTINUE - ## Called with bytes for header callback. - # Converts to unicode with surrogate escape. Can be overriden - # to pass bytes to @link #header the header callback @endlink. + ## Called with bytes by default global header callback. + # @param fld name decoded as ascii + # @param val field value as bytes + # @since 1.0.5 + # Converts from utf-8 to unicode with surrogate escape. Can be overriden + # to pass bytes to @link #header the header callback @endlink instead. def header_bytes(self,fld,val): s = val.decode(encoding='utf-8',errors='surrogateescape') return self.header(fld,s) ## Called for each header field in the message body. + # @param field name decoded as ascii + # @param value field value decoded as utf-8 on python3 @nocallback def header(self,field,value): return CONTINUE ## Called at the blank line that terminates the header fields. diff --git a/Milter/sgmllib.py b/Milter/sgmllib.py index 5433e04..28fe5d0 100644 --- a/Milter/sgmllib.py +++ b/Milter/sgmllib.py @@ -1,4 +1,3 @@ - """A parser for SGML, using the derived class as a static DTD.""" # XXX This only supports those SGML features used by HTML. diff --git a/makefile b/makefile index 4c0568f..b799281 100644 --- a/makefile +++ b/makefile @@ -1,10 +1,10 @@ web: doxygen test -L doc/html/milter_api || ln -sf /usr/share/doc/sendmail-milter-devel doc/html/milter_api - rsync -ravKk doc/html/ bmsi.com:/var/www/html/pymilter + rsync -ravKk doc/html/ pymilter.org:/var/www/html/milter/pymilter cd doc/html; zip -r ../../doc . -VERSION=1.0.4 +VERSION=1.0.5 PKG=pymilter-$(VERSION) SRCTAR=$(PKG).tar.gz diff --git a/miltermodule.c b/miltermodule.c index 2987b02..ffa1053 100644 --- a/miltermodule.c +++ b/miltermodule.c @@ -675,6 +675,7 @@ milter_wrap_header(SMFICTX *ctx, char *headerf, char *headerv) { c = _get_context(ctx); if (!c) return SMFIS_TEMPFAIL; #if PY_MAJOR_VERSION >= 3 + /* pass val as bytes so Milter.Base.header_bytes can do surrogate escape. */ arglist = Py_BuildValue("(Osy)", c, headerf, headerv); #else arglist = Py_BuildValue("(Oss)", c, headerf, headerv); diff --git a/sample.py b/sample.py index 0889c19..9565591 100644 --- a/sample.py +++ b/sample.py @@ -24,7 +24,12 @@ class sampleMilter(Milter.Milter): def log(self,*msg): print("%s [%d]" % (strftime('%Y%b%d %H:%M:%S'),self.id),end=None) - for i in msg: print(i,end=None) + for i in msg: + try: + print(i,end=None) + except UnicodeEncodeError: + s = i.encode(encoding='utf-8',errors='surrogateescape') + print(s,end=None) print() def __init__(self): @@ -104,7 +109,7 @@ class sampleMilter(Milter.Milter): if lname in ('subject','x-mailer'): self.log('%s: %s' % (name,val)) if self.fp: - self.fp.write(("%s: %s\n" % (name,val)).encode()) # add header to buffer + self.fp.write(("%s: %s\n" % (name,val)).encode(errors='surrogateescape')) # add header to buffer return Milter.CONTINUE def eoh(self): diff --git a/setup.py b/setup.py index 457be60..90868f8 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ libdirs = ["/usr/lib/libmilter"] # needed for Debian modules = ["mime"] # NOTE: importing Milter to obtain version fails when milter.so not built -setup(name = "pymilter", version = '1.0.4', +setup(name = "pymilter", version = '1.0.5', description="Python interface to sendmail milter API", long_description="""\ This is a python extension module to enable python scripts to diff --git a/milter-template.py b/template.py similarity index 84% rename from milter-template.py rename to template.py index b2239d2..0f8679e 100644 --- a/milter-template.py +++ b/template.py @@ -1,6 +1,7 @@ ## To roll your own milter, create a class that extends Milter. -# See the pymilter project at http://bmsi.com/python/milter.html -# based on Sendmail's milter API +# This is a useless example to show basic features of Milter. +# See the pymilter project at https://pymilter.org based +# on Sendmail's milter API # This code is open-source on the same terms as Python. ## Milter calls methods of your class at milter events. @@ -10,21 +11,22 @@ from __future__ import print_function import Milter try: - from StringIO import StringIO + from StringIO import StringIO as BytesIO except: - from io import StringIO + from io import BytesIO import time import email import sys from socket import AF_INET, AF_INET6 from Milter.utils import parse_addr if True: + # for logging process - usually not needed from multiprocessing import Process as Thread, Queue else: from threading import Thread from Queue import Queue -logq = Queue(maxsize=4) +logq = None class myMilter(Milter.Base): @@ -78,9 +80,10 @@ class myMilter(Milter.Base): # NOTE: self.fp is only an *internal* copy of message data. You # must use addheader, chgheader, replacebody to change the message # on the MTA. - self.fp = StringIO() + self.fp = BytesIO() self.canon_from = '@'.join(parse_addr(mailfrom)) - self.fp.write('From %s %s\n' % (self.canon_from,time.ctime())) + self.fp.write(b'From %s %s\n' % (self.canon_from.encode(), + time.ctime().encode())) return Milter.CONTINUE @@ -95,12 +98,12 @@ class myMilter(Milter.Base): @Milter.noreply def header(self, name, hval): - self.fp.write("%s: %s\n" % (name,hval)) # add header to buffer + self.fp.write(b'%s: %s\n' % (name.encode(),hval.encode())) # add header to buffer return Milter.CONTINUE @Milter.noreply def eoh(self): - self.fp.write("\n") # terminate headers + self.fp.write(b'\n') # terminate headers return Milter.CONTINUE @Milter.noreply @@ -110,7 +113,7 @@ class myMilter(Milter.Base): def eom(self): self.fp.seek(0) - msg = email.message_from_file(self.fp) + msg = email.message_from_binary_file(self.fp) # many milter functions can only be called from eom() # example of adding a Bcc: self.addrcpt('<%s>' % 'spy@example.com') @@ -128,13 +131,14 @@ class myMilter(Milter.Base): ## === Support Functions === def log(self,*msg): - logq.put((msg,self.id,time.time())) + t = (msg,self.id,time.time()) + if logq: + logq.put(t) + else: + # logmsg(*t) + pass -def background(): - while True: - t = logq.get() - if not t: break - msg,id,ts = t +def logmsg(msg,id,ts): print("%s [%d]" % (time.strftime('%Y%b%d %H:%M:%S',time.localtime(ts)),id), end=None) # 2005Oct13 02:34:11 [1] msg1 msg2 msg3 ... @@ -142,6 +146,12 @@ def background(): print() sys.stdout.flush() +def background(): + while True: + t = logq.get() + if not t: break + logmsg(*t) + ## === def main(): @@ -163,4 +173,7 @@ def main(): print("%s bms milter shutdown" % time.strftime('%Y%b%d %H:%M:%S')) if __name__ == "__main__": + # You probably do not need a logging process, but if you do, this + # is one way to do it. + logq = Queue(maxsize=4) main() diff --git a/testsample.py b/testsample.py index 8618cd8..175cea8 100644 --- a/testsample.py +++ b/testsample.py @@ -1,6 +1,7 @@ import unittest import Milter import sample +import template import mime import zipfile from Milter.test import TestBase @@ -21,6 +22,23 @@ class BMSMilterTestCase(unittest.TestCase): self.zf.close() self.zf = None + def testTemplate(self,fname='test2'): + ctx = TestCtx() + Milter.factory = template.myMilter + ctx._setsymval('{auth_authen}','batman') + ctx._setsymval('{auth_type}','batcomputer') + ctx._setsymval('j','mailhost') + count = 10 + while count > 0: + rc = ctx._connect(helo='milter-template.example.org') + self.assertEquals(rc,Milter.CONTINUE) + with open('test/'+fname,'rb') as fp: + rc = ctx._feedFile(fp) + milter = ctx.getpriv() + self.assertFalse(ctx._bodyreplaced,"Message body replaced") + ctx._close() + count -= 1 + def testHeader(self,fname='utf8'): ctx = TestCtx() Milter.factory = sample.sampleMilter @@ -28,7 +46,7 @@ class BMSMilterTestCase(unittest.TestCase): ctx._setsymval('{auth_type}','batcomputer') ctx._setsymval('j','mailhost') rc = ctx._connect() - self.assertTrue(rc == Milter.CONTINUE) + self.assertEquals(rc,Milter.CONTINUE) with open('test/'+fname,'rb') as fp: rc = ctx._feedFile(fp) milter = ctx.getpriv()