From 589853aee5ef21a78152b7c9880ddc48beeec83e Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 21:16:31 +1000
Subject: [PATCH 01/10] Split out dkim.canonicalization, and test it. Note that
 test_wsp_strips_headers is correctly failing, as trailing whitespace is not
 stripped from header names.

---
 dkim/__init__.py                    | 42 ++-------------
 dkim/canonicalization.py            | 66 +++++++++++++++++++++++
 dkim/tests/__init__.py              |  2 +
 dkim/tests/test_canonicalization.py | 83 +++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+), 38 deletions(-)
 create mode 100644 dkim/canonicalization.py
 create mode 100644 dkim/tests/test_canonicalization.py

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 23a006c..0762ce7 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -25,6 +25,10 @@ import logging
 import re
 import time
 
+from dkim.canonicalization import (
+    Relaxed,
+    Simple,
+    )
 from dkim.crypto import (
     DigestTooLargeError,
     parse_pem_private_key,
@@ -40,8 +44,6 @@ from dkim.util import (
     )
 
 __all__ = [
-    "Simple",
-    "Relaxed",
     "InternalError",
     "KeyFormatError",
     "MessageFormatError",
@@ -50,42 +52,6 @@ __all__ = [
     "verify",
 ]
 
-
-class Simple:
-    """Class that represents the "simple" canonicalization algorithm."""
-
-    name = b"simple"
-
-    @staticmethod
-    def canonicalize_headers(headers):
-        # No changes to headers.
-        return headers
-
-    @staticmethod
-    def canonicalize_body(body):
-        # Ignore all empty lines at the end of the message body.
-        return re.sub(b"(\r\n)*$", b"\r\n", body)
-
-class Relaxed:
-    """Class that represents the "relaxed" canonicalization algorithm."""
-
-    name = b"relaxed"
-
-    @staticmethod
-    def canonicalize_headers(headers):
-        # Convert all header field names to lowercase.
-        # Unfold all header lines.
-        # Compress WSP to single space.
-        # Remove all WSP at the start or end of the field value (strip).
-        return [(x[0].lower(), re.sub(br"\s+", b" ", re.sub(b"\r\n", b"", x[1])).strip()+b"\r\n") for x in headers]
-
-    @staticmethod
-    def canonicalize_body(body):
-        # Remove all trailing WSP at end of lines.
-        # Compress non-line-ending WSP to single space.
-        # Ignore all empty lines at the end of the message body.
-        return re.sub(b"(\r\n)*$", b"\r\n", re.sub(br"[\x09\x20]+", b" ", re.sub(b"[\\x09\\x20]+\r\n", b"\r\n", body)))
-
 class DKIMException(Exception):
     """Base class for DKIM errors."""
     pass
diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
new file mode 100644
index 0000000..2cff4c2
--- /dev/null
+++ b/dkim/canonicalization.py
@@ -0,0 +1,66 @@
+# This software is provided 'as-is', without any express or implied
+# warranty.  In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+#    claim that you wrote the original software. If you use this software
+#    in a product, an acknowledgment in the product documentation would be
+#    appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+#    misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2008 Greg Hewgill http://hewgill.com
+#
+# This has been modified from the original software.
+# Copyright (c) 2011 William Grant <me@williamgrant.id.au>
+
+import re
+
+
+class Simple:
+    """Class that represents the "simple" canonicalization algorithm."""
+
+    name = b"simple"
+
+    @staticmethod
+    def canonicalize_headers(headers):
+        # No changes to headers.
+        return headers
+
+    @staticmethod
+    def canonicalize_body(body):
+        # Ignore all empty lines at the end of the message body.
+        return re.sub(b"(\r\n)*$", b"\r\n", body)
+
+
+class Relaxed:
+    """Class that represents the "relaxed" canonicalization algorithm."""
+
+    name = b"relaxed"
+
+    @staticmethod
+    def canonicalize_headers(headers):
+        # Convert all header field names to lowercase.
+        # Unfold all header lines.
+        # Compress WSP to single space.
+        # Remove all WSP at the start or end of the field value (strip).
+        return [
+            (x[0].lower(),
+             re.sub(br"\s+", b" ", re.sub(b"\r\n", b"", x[1])).strip()
+             + b"\r\n")
+            for x in headers]
+
+    @staticmethod
+    def canonicalize_body(body):
+        # Remove all trailing WSP at end of lines.
+        removed_trailing_wsp = re.sub(b"[\\x09\\x20]+\r\n", b"\r\n", body)
+        # Compress non-line-ending WSP to single space.
+        compressed_wsp = re.sub(br"[\x09\x20]+", b" ", removed_trailing_wsp)
+        # Ignore all empty lines at the end of the message body.
+        removed_trailing_lines = re.sub(b"(\r\n)*$", b"\r\n", compressed_wsp)
+        return removed_trailing_lines
diff --git a/dkim/tests/__init__.py b/dkim/tests/__init__.py
index 69857e3..a7c2733 100644
--- a/dkim/tests/__init__.py
+++ b/dkim/tests/__init__.py
@@ -21,11 +21,13 @@ import unittest
 
 def test_suite():
     from dkim.tests import (
+        test_canonicalization,
         test_crypto,
         test_dkim,
         test_util,
         )
     modules = [
+        test_canonicalization,
         test_crypto,
         test_dkim,
         test_util,
diff --git a/dkim/tests/test_canonicalization.py b/dkim/tests/test_canonicalization.py
new file mode 100644
index 0000000..16a0e3f
--- /dev/null
+++ b/dkim/tests/test_canonicalization.py
@@ -0,0 +1,83 @@
+# This software is provided 'as-is', without any express or implied
+# warranty.  In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+#    claim that you wrote the original software. If you use this software
+#    in a product, an acknowledgment in the product documentation would be
+#    appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+#    misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2011 William Grant <me@williamgrant.id.au>
+
+import unittest
+
+from dkim.canonicalization import Simple, Relaxed
+
+
+class TestSimpleAlgorithm(unittest.TestCase):
+
+    def test_headers_untouched(self):
+        test_headers = [(b'Foo  ', b'bar\r\n'), (b'Foo', b'baz\r\n')]
+        self.assertEqual(
+            test_headers,
+            Simple.canonicalize_headers(test_headers))
+
+    def test_strips_trailing_empty_lines_from_body(self):
+        self.assertEqual(
+            b'Foo  \tbar    \r\n',
+            Simple.canonicalize_body(
+                b'Foo  \tbar    \r\n\r\n'))
+
+
+class TestRelaxedAlgorithm(unittest.TestCase):
+
+    def test_lowercases_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar\r\n'), (b'baz', b'Foo\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar\r\n'), (b'BaZ', b'Foo\r\n')]))
+
+    def test_unfolds_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar\r\n baz\r\n')]))
+
+    def test_wsp_compresses_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar \t baz\r\n')]))
+
+    def test_wsp_strips_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo  ', b'   Bar \t baz   \r\n')]))
+
+    def test_strips_trailing_wsp_from_body(self):
+        self.assertEqual(
+            b'Foo\r\nbar\r\n',
+            Relaxed.canonicalize_body(b'Foo  \t\r\nbar\r\n'))
+
+    def test_wsp_compresses_body(self):
+        self.assertEqual(
+            b'Foo bar\r\n',
+            Relaxed.canonicalize_body(b'Foo  \t  bar\r\n'))
+
+    def test_strips_trailing_empty_lines_from_body(self):
+        self.assertEqual(
+            b'Foo\r\nbar\r\n',
+            Relaxed.canonicalize_body(b'Foo\r\nbar\r\n\r\n\r\n'))
+
+
+def test_suite():
+    from unittest import TestLoader
+    return TestLoader().loadTestsFromName(__name__)

From a1fc55bcaab1f4e8cb2735eb695c886a678c9b38 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 21:25:20 +1000
Subject: [PATCH 02/10] Rewrite tests to use an assertCanonicalForm helper.

---
 dkim/tests/test_canonicalization.py | 82 +++++++++++++++++------------
 1 file changed, 49 insertions(+), 33 deletions(-)

diff --git a/dkim/tests/test_canonicalization.py b/dkim/tests/test_canonicalization.py
index 16a0e3f..5269f72 100644
--- a/dkim/tests/test_canonicalization.py
+++ b/dkim/tests/test_canonicalization.py
@@ -21,61 +21,77 @@ import unittest
 from dkim.canonicalization import Simple, Relaxed
 
 
-class TestSimpleAlgorithm(unittest.TestCase):
+class BaseCanonicalizationTest(unittest.TestCase):
 
-    def test_headers_untouched(self):
+    def assertCanonicalForm(self, expected, input):
+        self.assertEqual(expected, self.func(expected))
+        self.assertEqual(expected, self.func(input))
+
+
+class TestSimpleAlgorithmHeaders(BaseCanonicalizationTest):
+
+    func = staticmethod(Simple.canonicalize_headers)
+
+    def test_untouched(self):
         test_headers = [(b'Foo  ', b'bar\r\n'), (b'Foo', b'baz\r\n')]
-        self.assertEqual(
+        self.assertCanonicalForm(
             test_headers,
-            Simple.canonicalize_headers(test_headers))
+            test_headers)
+
+
+class TestSimpleAlgorithmBody(BaseCanonicalizationTest):
+
+    func = staticmethod(Simple.canonicalize_body)
 
     def test_strips_trailing_empty_lines_from_body(self):
-        self.assertEqual(
+        self.assertCanonicalForm(
             b'Foo  \tbar    \r\n',
-            Simple.canonicalize_body(
-                b'Foo  \tbar    \r\n\r\n'))
+            b'Foo  \tbar    \r\n\r\n')
 
 
-class TestRelaxedAlgorithm(unittest.TestCase):
+class TestRelaxedAlgorithmHeaders(BaseCanonicalizationTest):
 
-    def test_lowercases_headers(self):
-        self.assertEqual(
+    func = staticmethod(Relaxed.canonicalize_headers)
+
+    def test_lowercases_names(self):
+        self.assertCanonicalForm(
             [(b'foo', b'Bar\r\n'), (b'baz', b'Foo\r\n')],
-            Relaxed.canonicalize_headers(
-                [(b'Foo', b'Bar\r\n'), (b'BaZ', b'Foo\r\n')]))
+            [(b'Foo', b'Bar\r\n'), (b'BaZ', b'Foo\r\n')])
 
-    def test_unfolds_headers(self):
-        self.assertEqual(
+    def test_unfolds_values(self):
+        self.assertCanonicalForm(
             [(b'foo', b'Bar baz\r\n')],
-            Relaxed.canonicalize_headers(
-                [(b'Foo', b'Bar\r\n baz\r\n')]))
+            [(b'Foo', b'Bar\r\n baz\r\n')])
 
-    def test_wsp_compresses_headers(self):
-        self.assertEqual(
+    def test_wsp_compresses_values(self):
+        self.assertCanonicalForm(
             [(b'foo', b'Bar baz\r\n')],
-            Relaxed.canonicalize_headers(
-                [(b'Foo', b'Bar \t baz\r\n')]))
+            [(b'Foo', b'Bar \t baz\r\n')])
 
-    def test_wsp_strips_headers(self):
-        self.assertEqual(
+    def test_wsp_strips(self):
+        self.assertCanonicalForm(
             [(b'foo', b'Bar baz\r\n')],
-            Relaxed.canonicalize_headers(
-                [(b'Foo  ', b'   Bar \t baz   \r\n')]))
+            [(b'Foo  ', b'   Bar \t baz   \r\n')])
 
-    def test_strips_trailing_wsp_from_body(self):
-        self.assertEqual(
+
+class TestRelaxedAlgorithmBody(BaseCanonicalizationTest):
+
+    func = staticmethod(Relaxed.canonicalize_body)
+
+    def test_strips_trailing_wsp(self):
+        self.assertCanonicalForm(
             b'Foo\r\nbar\r\n',
-            Relaxed.canonicalize_body(b'Foo  \t\r\nbar\r\n'))
+            b'Foo  \t\r\nbar\r\n')
 
-    def test_wsp_compresses_body(self):
-        self.assertEqual(
+    def test_wsp_compresses(self):
+        self.assertCanonicalForm(
             b'Foo bar\r\n',
-            Relaxed.canonicalize_body(b'Foo  \t  bar\r\n'))
+            b'Foo  \t  bar\r\n')
 
-    def test_strips_trailing_empty_lines_from_body(self):
-        self.assertEqual(
+    def test_strips_trailing_empty_lines(self):
+        self.assertCanonicalForm(
             b'Foo\r\nbar\r\n',
-            Relaxed.canonicalize_body(b'Foo\r\nbar\r\n\r\n\r\n'))
+            b'Foo\r\nbar\r\n\r\n\r\n')
 
 
 def test_suite():

From 28aaa6f2f28d3d0e4e12c401f08dd8ba6317cc11 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 21:52:33 +1000
Subject: [PATCH 03/10] dkim.canonicalization now has an algorithms dict
 mapping names to algorithms. Use it.

---
 dkim/__init__.py         | 42 +++++++++++++++-------------------------
 dkim/canonicalization.py |  3 +++
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 0762ce7..3533444 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -25,10 +25,7 @@ import logging
 import re
 import time
 
-from dkim.canonicalization import (
-    Relaxed,
-    Simple,
-    )
+from dkim.canonicalization import algorithms
 from dkim.crypto import (
     DigestTooLargeError,
     parse_pem_private_key,
@@ -234,7 +231,7 @@ def fold(header):
 
 
 def sign(message, selector, domain, privkey, identity=None,
-         canonicalize=(Simple, Simple), include_headers=None, length=False,
+         canonicalize=(b'simple', b'simple'), include_headers=None, length=False,
          logger=None):
     """Sign an RFC822 message and return the DKIM-Signature header line.
 
@@ -261,7 +258,7 @@ def sign(message, selector, domain, privkey, identity=None,
     if identity is not None and not identity.endswith(domain):
         raise ParameterError("identity must end with domain")
 
-    headers = canonicalize[0].canonicalize_headers(headers)
+    headers = algorithms[canonicalize[0]].canonicalize_headers(headers)
 
     if include_headers is None:
         include_headers = [x[0].lower() for x in headers]
@@ -269,7 +266,7 @@ def sign(message, selector, domain, privkey, identity=None,
         include_headers = [x.lower() for x in include_headers]
     sign_headers = [x for x in headers if x[0].lower() in include_headers]
 
-    body = canonicalize[1].canonicalize_body(body)
+    body = algorithms[canonicalize[1]].canonicalize_body(body)
 
     h = hashlib.sha256()
     h.update(body)
@@ -278,7 +275,9 @@ def sign(message, selector, domain, privkey, identity=None,
     sigfields = [x for x in [
         (b'v', b"1"),
         (b'a', b"rsa-sha256"),
-        (b'c', b"/".join((canonicalize[0].name, canonicalize[1].name))),
+        (b'c', b"/".join(
+            (algorithms[canonicalize[0]].name,
+             algorithms[canonicalize[1]].name))),
         (b'd', domain),
         (b'i', identity or b"@"+domain),
         length and (b'l', len(body)),
@@ -291,7 +290,7 @@ def sign(message, selector, domain, privkey, identity=None,
     ] if x]
 
     sig_value = fold(b"; ".join(b"=".join(x) for x in sigfields))
-    dkim_header = canonicalize[0].canonicalize_headers([
+    dkim_header = algorithms[canonicalize[0]].canonicalize_headers([
         [b'DKIM-Signature', b' ' + sig_value]])[0]
     # the dkim sig is hashed with no trailing crlf, even if the
     # canonicalization algorithm would add one.
@@ -356,23 +355,14 @@ def verify(message, logger=None, dnsfunc=dnstxt):
     else:
         can_body = b"simple"
 
-    if can_headers == b"simple":
-        canonicalize_headers = Simple
-    elif can_headers == b"relaxed":
-        canonicalize_headers = Relaxed
-    else:
-        logger.error("unknown header canonicalization (%s)" % can_headers)
-        return False
-
-    headers = canonicalize_headers.canonicalize_headers(headers)
-
-    if can_body == b"simple":
-        body = Simple.canonicalize_body(body)
-    elif can_body == b"relaxed":
-        body = Relaxed.canonicalize_body(body)
-    else:
-        logger.error("unknown body canonicalization (%s)" % can_body)
+    try:
+        header_algorithm = algorithms[can_headers]
+        body_algorithm = algorithms[can_body]
+    except KeyError as e:
+        logger.error("unknown canonicalization algorithm: %s" % e.message)
         return False
+    headers = header_algorithm.canonicalize_headers(headers)
+    body = body_algorithm.canonicalize_body(body)
 
     if sig[b'a'] == b"rsa-sha1":
         hasher = hashlib.sha1
@@ -418,7 +408,7 @@ def verify(message, logger=None, dnsfunc=dnstxt):
     include_headers = re.split(br"\s*:\s*", sig[b'h'])
     h = hasher()
     hash_headers(
-        h, canonicalize_headers, headers, include_headers, sigheaders, sig)
+        h, header_algorithm, headers, include_headers, sigheaders, sig)
     signature = base64.b64decode(re.sub(br"\s+", b"", sig[b'b']))
     try:
         return RSASSA_PKCS1_v1_5_verify(
diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
index 2cff4c2..ef192c0 100644
--- a/dkim/canonicalization.py
+++ b/dkim/canonicalization.py
@@ -64,3 +64,6 @@ class Relaxed:
         # Ignore all empty lines at the end of the message body.
         removed_trailing_lines = re.sub(b"(\r\n)*$", b"\r\n", compressed_wsp)
         return removed_trailing_lines
+
+
+algorithms = dict((c.name, c) for c in (Simple, Relaxed))

From 61cd0e10801e7273b17b7d4fdaa77de956afbd15 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:03:14 +1000
Subject: [PATCH 04/10] Factor out all the canonicalization regexps.

---
 dkim/canonicalization.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
index ef192c0..0f81f9e 100644
--- a/dkim/canonicalization.py
+++ b/dkim/canonicalization.py
@@ -22,6 +22,22 @@
 import re
 
 
+def strip_trailing_whitespace(content):
+    return re.sub(b"[\t ]+\r\n", b"\r\n", content)
+
+
+def compress_whitespace(content):
+    return re.sub(b"[\t ]+", b" ", content)
+
+
+def strip_trailing_lines(content):
+    return re.sub(b"(\r\n)*$", b"\r\n", content)
+
+
+def unfold_header_value(content):
+    return re.sub(b"\r\n", b"", content)
+
+
 class Simple:
     """Class that represents the "simple" canonicalization algorithm."""
 
@@ -35,7 +51,7 @@ class Simple:
     @staticmethod
     def canonicalize_body(body):
         # Ignore all empty lines at the end of the message body.
-        return re.sub(b"(\r\n)*$", b"\r\n", body)
+        return strip_trailing_lines(body)
 
 
 class Relaxed:
@@ -51,19 +67,16 @@ class Relaxed:
         # Remove all WSP at the start or end of the field value (strip).
         return [
             (x[0].lower(),
-             re.sub(br"\s+", b" ", re.sub(b"\r\n", b"", x[1])).strip()
-             + b"\r\n")
+             compress_whitespace(unfold_header_value(x[1])).strip() + b"\r\n")
             for x in headers]
 
     @staticmethod
     def canonicalize_body(body):
         # Remove all trailing WSP at end of lines.
-        removed_trailing_wsp = re.sub(b"[\\x09\\x20]+\r\n", b"\r\n", body)
         # Compress non-line-ending WSP to single space.
-        compressed_wsp = re.sub(br"[\x09\x20]+", b" ", removed_trailing_wsp)
         # Ignore all empty lines at the end of the message body.
-        removed_trailing_lines = re.sub(b"(\r\n)*$", b"\r\n", compressed_wsp)
-        return removed_trailing_lines
+        return strip_trailing_lines(
+            compress_whitespace(strip_trailing_whitespace(body)))
 
 
 algorithms = dict((c.name, c) for c in (Simple, Relaxed))

From 5da23e5856e15a6cdd996c72f952b4bb9cbaea99 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:03:41 +1000
Subject: [PATCH 05/10] rstrip header field names in relaxed, as the spec says.

---
 dkim/canonicalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
index 0f81f9e..d474921 100644
--- a/dkim/canonicalization.py
+++ b/dkim/canonicalization.py
@@ -66,7 +66,7 @@ class Relaxed:
         # Compress WSP to single space.
         # Remove all WSP at the start or end of the field value (strip).
         return [
-            (x[0].lower(),
+            (x[0].lower().rstrip(),
              compress_whitespace(unfold_header_value(x[1])).strip() + b"\r\n")
             for x in headers]
 

From b78a41d6bfba503c4e274f7b38b3f37c908c0faf Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:04:21 +1000
Subject: [PATCH 06/10] Add an __all__.

---
 dkim/canonicalization.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
index d474921..9b191f2 100644
--- a/dkim/canonicalization.py
+++ b/dkim/canonicalization.py
@@ -21,6 +21,10 @@
 
 import re
 
+__all__ = [
+    'algorithms',
+    ]
+
 
 def strip_trailing_whitespace(content):
     return re.sub(b"[\t ]+\r\n", b"\r\n", content)

From a82f3110ea6c47a9d9c1da5114bb538e09db8dba Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:07:42 +1000
Subject: [PATCH 07/10] Split out DNS utilities into their own module.

---
 dkim/__init__.py | 33 ++---------------------------
 dkim/dns.py      | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 31 deletions(-)
 create mode 100644 dkim/dns.py

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 3533444..518a6c0 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -34,6 +34,7 @@ from dkim.crypto import (
     RSASSA_PKCS1_v1_5_verify,
     UnparsableKeyError,
     )
+from dkim.dns import get_txt
 from dkim.util import (
     get_default_logger,
     InvalidTagValueList,
@@ -180,36 +181,6 @@ def rfc822_parse(message):
 
 
 
-def dnstxt_dnspython(name):
-    """Return a TXT record associated with a DNS name."""
-    a = dns.resolver.query(name, dns.rdatatype.TXT)
-    for r in a.response.answer:
-        if r.rdtype == dns.rdatatype.TXT:
-            return b"".join(r.items[0].strings)
-    return None
-
-
-def dnstxt_pydns(name):
-    """Return a TXT record associated with a DNS name."""
-    # Older pydns releases don't like a trailing dot.
-    if name.endswith('.'):
-        name = name[:-1]
-    DNS.ParseResolvConf()
-    response = DNS.DnsRequest(name, qtype='txt').req()
-    if not response.answers:
-        return None
-    return response.answers[0]['data'][0]
-
-
-# Prefer dnspython if it's there, otherwise use pydns.
-try:
-    import dns.resolver
-    dnstxt = dnstxt_dnspython
-except ImportError:
-    import DNS
-    dnstxt = dnstxt_pydns
-
-
 def fold(header):
     """Fold a header line into multiple crlf-separated lines at column 72."""
     i = header.rfind(b"\r\n ")
@@ -315,7 +286,7 @@ def sign(message, selector, domain, privkey, identity=None,
     return b'DKIM-Signature: ' + sig_value + b"\r\n"
 
 
-def verify(message, logger=None, dnsfunc=dnstxt):
+def verify(message, logger=None, dnsfunc=get_txt):
     """Verify a DKIM signature on an RFC822 formatted message.
 
     @param message: an RFC822 formatted message (with either \\n or \\r\\n line endings)
diff --git a/dkim/dns.py b/dkim/dns.py
new file mode 100644
index 0000000..c158f2b
--- /dev/null
+++ b/dkim/dns.py
@@ -0,0 +1,55 @@
+# This software is provided 'as-is', without any express or implied
+# warranty.  In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+#    claim that you wrote the original software. If you use this software
+#    in a product, an acknowledgment in the product documentation would be
+#    appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+#    misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2008 Greg Hewgill http://hewgill.com
+#
+# This has been modified from the original software.
+# Copyright (c) 2011 William Grant <me@williamgrant.id.au>
+
+
+__all__ = [
+    'get_txt'
+    ]
+
+
+def get_txt_dnspython(name):
+    """Return a TXT record associated with a DNS name."""
+    a = dns.resolver.query(name, dns.rdatatype.TXT)
+    for r in a.response.answer:
+        if r.rdtype == dns.rdatatype.TXT:
+            return b"".join(r.items[0].strings)
+    return None
+
+
+def get_txt_pydns(name):
+    """Return a TXT record associated with a DNS name."""
+    # Older pydns releases don't like a trailing dot.
+    if name.endswith('.'):
+        name = name[:-1]
+    DNS.ParseResolvConf()
+    response = DNS.DnsRequest(name, qtype='txt').req()
+    if not response.answers:
+        return None
+    return response.answers[0]['data'][0]
+
+
+# Prefer dnspython if it's there, otherwise use pydns.
+try:
+    import dns.resolver
+    get_txt = get_txt_dnspython
+except ImportError:
+    import DNS
+    get_txt = get_txt_pydns

From 5f949337d119265a1ad89302e61a12003a450eb3 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:24:21 +1000
Subject: [PATCH 08/10] Handle Unicode in get_txt.

---
 dkim/__init__.py        | 10 ++--------
 dkim/dns.py             | 17 +++++++++++++++--
 dkim/tests/test_dkim.py |  6 +++++-
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 518a6c0..427bb48 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -356,14 +356,8 @@ def verify(message, logger=None, dnsfunc=get_txt):
             (base64.b64encode(bodyhash), sig[b'bh']))
         return False
 
-    # dnstxt wants Unicode
-    try:
-        selector = sig[b's'].decode('ascii')
-        domain = sig[b'd'].decode('ascii')
-    except UnicodeDecodeError:
-        return False
-    name = "%s._domainkey.%s." % (selector, domain)
-    s = dnsfunc(name).encode('utf-8')
+    name = sig[b's'] + b"._domainkey." + sig[b'd'] + b"."
+    s = dnsfunc(name)
     if not s:
         return False
     try:
diff --git a/dkim/dns.py b/dkim/dns.py
index c158f2b..e14c939 100644
--- a/dkim/dns.py
+++ b/dkim/dns.py
@@ -49,7 +49,20 @@ def get_txt_pydns(name):
 # Prefer dnspython if it's there, otherwise use pydns.
 try:
     import dns.resolver
-    get_txt = get_txt_dnspython
+    _get_txt = get_txt_dnspython
 except ImportError:
     import DNS
-    get_txt = get_txt_pydns
+    _get_txt = get_txt_pydns
+
+
+def get_txt(name):
+    """Return a TXT record associated with a DNS name.
+
+    @param name: The bytestring domain name to look up.
+    """
+    # pydns needs Unicode, but DKIM's d= is ASCII (already punycoded).
+    try:
+        unicode_name = name.decode('ascii')
+    except UnicodeDecodeError:
+        return None
+    return _get_txt(unicode_name).decode('utf-8')
diff --git a/dkim/tests/test_dkim.py b/dkim/tests/test_dkim.py
index afe7fbc..ec23e22 100644
--- a/dkim/tests/test_dkim.py
+++ b/dkim/tests/test_dkim.py
@@ -53,8 +53,12 @@ class TestSignAndVerify(unittest.TestCase):
         self.key = read_test_data("test.private")
 
     def dnsfunc(self, domain):
+        try:
+            domain = domain.decode('ascii')
+        except UnicodeDecodeError:
+            return None
         self.assertEqual('test._domainkey.example.com.', domain)
-        return read_test_data("test.txt").decode('utf-8')
+        return read_test_data("test.txt")
 
     def test_verifies(self):
         # A message verifies after being signed.

From 472750c0db656dd061493934cb84b4e4288f219e Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 22:34:23 +1000
Subject: [PATCH 09/10] Signature algorithm names are now defined in
 dkim.crypto.

---
 dkim/__init__.py | 18 +++++++++---------
 dkim/crypto.py   |  6 ++++++
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 427bb48..dee2b7a 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -28,6 +28,7 @@ import time
 from dkim.canonicalization import algorithms
 from dkim.crypto import (
     DigestTooLargeError,
+    HASH_ALGORITHMS,
     parse_pem_private_key,
     parse_public_key,
     RSASSA_PKCS1_v1_5_sign,
@@ -202,8 +203,9 @@ def fold(header):
 
 
 def sign(message, selector, domain, privkey, identity=None,
-         canonicalize=(b'simple', b'simple'), include_headers=None, length=False,
-         logger=None):
+         canonicalize=(b'simple', b'simple'),
+         signature_algorithm=b'rsa-sha256',
+         include_headers=None, length=False, logger=None):
     """Sign an RFC822 message and return the DKIM-Signature header line.
 
     @param message: an RFC822 formatted message (with either \\n or \\r\\n line endings)
@@ -245,7 +247,7 @@ def sign(message, selector, domain, privkey, identity=None,
 
     sigfields = [x for x in [
         (b'v', b"1"),
-        (b'a', b"rsa-sha256"),
+        (b'a', signature_algorithm),
         (b'c', b"/".join(
             (algorithms[canonicalize[0]].name,
              algorithms[canonicalize[1]].name))),
@@ -335,12 +337,10 @@ def verify(message, logger=None, dnsfunc=get_txt):
     headers = header_algorithm.canonicalize_headers(headers)
     body = body_algorithm.canonicalize_body(body)
 
-    if sig[b'a'] == b"rsa-sha1":
-        hasher = hashlib.sha1
-    elif sig[b'a'] == b"rsa-sha256":
-        hasher = hashlib.sha256
-    else:
-        logger.error("unknown signature algorithm (%s)" % sig[b'a'])
+    try:
+        hasher = HASH_ALGORITHMS[sig[b'a']]
+    except KeyError as e:
+        logger.error("unknown signature algorithm: %s" % e.message)
         return False
 
     if b'l' in sig:
diff --git a/dkim/crypto.py b/dkim/crypto.py
index 2fb96e5..245079b 100644
--- a/dkim/crypto.py
+++ b/dkim/crypto.py
@@ -21,6 +21,7 @@
 
 __all__ = [
     'DigestTooLargeError',
+    'HASH_ALGORITHMS',
     'parse_pem_private_key',
     'parse_private_key',
     'parse_public_key',
@@ -30,6 +31,7 @@ __all__ = [
     ]
 
 import base64
+import hashlib
 import re
 
 from dkim.asn1 import (
@@ -76,6 +78,10 @@ ASN1_RSAPrivateKey = [
     ])
 ]
 
+HASH_ALGORITHMS = {
+    b'rsa-sha1': hashlib.sha1,
+    b'rsa-sha256': hashlib.sha256,
+    }
 
 # These values come from RFC 3447, section 9.2 Notes, page 43.
 HASH_ID_MAP = {

From 382d9157c53f8753a0e165c4555ce75e54f9e0b1 Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 23:13:45 +1000
Subject: [PATCH 10/10] Fix get_txt to assume unicode results.

---
 dkim/dns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkim/dns.py b/dkim/dns.py
index e14c939..130f14c 100644
--- a/dkim/dns.py
+++ b/dkim/dns.py
@@ -65,4 +65,4 @@ def get_txt(name):
         unicode_name = name.decode('ascii')
     except UnicodeDecodeError:
         return None
-    return _get_txt(unicode_name).decode('utf-8')
+    return _get_txt(unicode_name).encode('utf-8')