First cut at encoding error decorator

This commit is contained in:
Stuart D. Gathman
2020-06-17 13:55:26 -04:00
parent 132e8326b5
commit 4c7c76fca4
2 changed files with 22 additions and 6 deletions
+18 -4
View File
@@ -181,15 +181,26 @@ def noreply(func):
wrapper.milter_protocol = nr_mask wrapper.milter_protocol = nr_mask
return wrapper return wrapper
## Function decorator to set encoding error strategy.
# Current RFCs define UTF-8 as the standard encoding for SMTP
# envelope and header fields. By default, Milter.Base decodes
# envelope and header values with errors='surrogateescape'.
# This decorator can change the error strategy to, e.g., 'ignore' or 'replace'.
def encodingerror(strategy):
def setstrategy(func):
func.error_strategy = strategy
return func
return setstrategy
## Function decorator to set macros used in a callback. ## Function decorator to set macros used in a callback.
# By default, the MTA sends all macros defined for a callback. # By default, the MTA sends all macros defined for a callback.
# If some or all of these are unused, the bandwidth can be saved # If some or all of these are unused, the bandwidth can be saved
# by listing the ones that are used. # by listing the ones that are used.
# @since 1.0.2 # @since 1.0.2
def symlist(*syms): def symlist(*syms):
def setsyms(func):
if len(syms) > 5: if len(syms) > 5:
raise ValueError('@symlist limited to 5 macros by MTA: '+func.__name__) raise ValueError('@symlist limited to 5 macros by MTA: '+func.__name__)
def setsyms(func):
if func.__name__ not in MACRO_CALLBACKS: if func.__name__ not in MACRO_CALLBACKS:
raise ValueError('@symlist applied to non-symlist method: '+func.__name__) raise ValueError('@symlist applied to non-symlist method: '+func.__name__)
func._symlist = syms func._symlist = syms
@@ -328,7 +339,8 @@ class Base(object):
# or trap utf-8 conversion exception, etc. # or trap utf-8 conversion exception, etc.
def envfrom_bytes(self,*b): def envfrom_bytes(self,*b):
try: try:
s = (v.decode(encoding='utf-8',errors='surrogateescape') for v in b) e = getattr(self.envfrom,'error_strategy','surrogateescape')
s = (v.decode(encoding='utf-8',errors=e) for v in b)
except UnicodeDecodeError: s = b except UnicodeDecodeError: s = b
return self.envfrom(fld,*s) return self.envfrom(fld,*s)
## Called when the SMTP client says MAIL FROM. Called by the ## Called when the SMTP client says MAIL FROM. Called by the
@@ -349,7 +361,8 @@ class Base(object):
# or trap utf-8 conversion exception, etc. # or trap utf-8 conversion exception, etc.
def envrcpt_bytes(self,*b): def envrcpt_bytes(self,*b):
try: try:
s = (v.decode(encoding='utf-8',errors='surrogateescape') for v in b) e = getattr(self.envrcpt,'error_strategy','surrogateescape')
s = (v.decode(encoding='utf-8',errors=e) for v in b)
except UnicodeDecodeError: s = b except UnicodeDecodeError: s = b
return self.envrcpt(fld,*s) return self.envrcpt(fld,*s)
## Called when the SMTP client says RCPT TO. Called by the ## Called when the SMTP client says RCPT TO. Called by the
@@ -377,7 +390,8 @@ class Base(object):
# to pass bytes to @link #header the header callback @endlink instead. # to pass bytes to @link #header the header callback @endlink instead.
def header_bytes(self,fld,val): def header_bytes(self,fld,val):
try: try:
s = val.decode(encoding='utf-8',errors='surrogateescape') e = getattr(self.header,'error_strategy','surrogateescape')
s = val.decode(encoding='utf-8',errors=e)
except UnicodeDecodeError: s = val except UnicodeDecodeError: s = val
return self.header(fld,s) return self.header(fld,s)
## Called for each header field in the message body. ## Called for each header field in the message body.
+3 -1
View File
@@ -67,13 +67,15 @@ class sampleMilter(Milter.Milter):
self.log("rcpt to",to,str) self.log("rcpt to",to,str)
return Milter.CONTINUE return Milter.CONTINUE
@Milter.encodingerror('replace')
def header(self,name,val): def header(self,name,val):
lname = name.lower() lname = name.lower()
if lname == 'subject': if lname == 'subject':
# even if we wanted the Taiwanese spam, we can't read Chinese # even if we wanted the Taiwanese spam, we can't read Chinese
# (delete if you read chinese mail) # (delete if you read chinese mail)
print('val=',val.encode(errors='surrogateescape')) #print('val=',val.encode(errors='surrogateescape'))
print('val=',val)
if val.startswith('=?big5') or val.startswith('=?ISO-2022-JP'): if val.startswith('=?big5') or val.startswith('=?ISO-2022-JP'):
self.log('REJECT: %s: %s' % (name,val)) self.log('REJECT: %s: %s' % (name,val))
#self.setreply('550','','Go away spammer') #self.setreply('550','','Go away spammer')