From 589853aee5ef21a78152b7c9880ddc48beeec83e Mon Sep 17 00:00:00 2001
From: William Grant <me@williamgrant.id.au>
Date: Fri, 3 Jun 2011 21:16:31 +1000
Subject: [PATCH] Split out dkim.canonicalization, and test it. Note that
 test_wsp_strips_headers is correctly failing, as trailing whitespace is not
 stripped from header names.

---
 dkim/__init__.py                    | 42 ++-------------
 dkim/canonicalization.py            | 66 +++++++++++++++++++++++
 dkim/tests/__init__.py              |  2 +
 dkim/tests/test_canonicalization.py | 83 +++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+), 38 deletions(-)
 create mode 100644 dkim/canonicalization.py
 create mode 100644 dkim/tests/test_canonicalization.py

diff --git a/dkim/__init__.py b/dkim/__init__.py
index 23a006c..0762ce7 100644
--- a/dkim/__init__.py
+++ b/dkim/__init__.py
@@ -25,6 +25,10 @@ import logging
 import re
 import time
 
+from dkim.canonicalization import (
+    Relaxed,
+    Simple,
+    )
 from dkim.crypto import (
     DigestTooLargeError,
     parse_pem_private_key,
@@ -40,8 +44,6 @@ from dkim.util import (
     )
 
 __all__ = [
-    "Simple",
-    "Relaxed",
     "InternalError",
     "KeyFormatError",
     "MessageFormatError",
@@ -50,42 +52,6 @@ __all__ = [
     "verify",
 ]
 
-
-class Simple:
-    """Class that represents the "simple" canonicalization algorithm."""
-
-    name = b"simple"
-
-    @staticmethod
-    def canonicalize_headers(headers):
-        # No changes to headers.
-        return headers
-
-    @staticmethod
-    def canonicalize_body(body):
-        # Ignore all empty lines at the end of the message body.
-        return re.sub(b"(\r\n)*$", b"\r\n", body)
-
-class Relaxed:
-    """Class that represents the "relaxed" canonicalization algorithm."""
-
-    name = b"relaxed"
-
-    @staticmethod
-    def canonicalize_headers(headers):
-        # Convert all header field names to lowercase.
-        # Unfold all header lines.
-        # Compress WSP to single space.
-        # Remove all WSP at the start or end of the field value (strip).
-        return [(x[0].lower(), re.sub(br"\s+", b" ", re.sub(b"\r\n", b"", x[1])).strip()+b"\r\n") for x in headers]
-
-    @staticmethod
-    def canonicalize_body(body):
-        # Remove all trailing WSP at end of lines.
-        # Compress non-line-ending WSP to single space.
-        # Ignore all empty lines at the end of the message body.
-        return re.sub(b"(\r\n)*$", b"\r\n", re.sub(br"[\x09\x20]+", b" ", re.sub(b"[\\x09\\x20]+\r\n", b"\r\n", body)))
-
 class DKIMException(Exception):
     """Base class for DKIM errors."""
     pass
diff --git a/dkim/canonicalization.py b/dkim/canonicalization.py
new file mode 100644
index 0000000..2cff4c2
--- /dev/null
+++ b/dkim/canonicalization.py
@@ -0,0 +1,66 @@
+# This software is provided 'as-is', without any express or implied
+# warranty.  In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+#    claim that you wrote the original software. If you use this software
+#    in a product, an acknowledgment in the product documentation would be
+#    appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+#    misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2008 Greg Hewgill http://hewgill.com
+#
+# This has been modified from the original software.
+# Copyright (c) 2011 William Grant <me@williamgrant.id.au>
+
+import re
+
+
+class Simple:
+    """Class that represents the "simple" canonicalization algorithm."""
+
+    name = b"simple"
+
+    @staticmethod
+    def canonicalize_headers(headers):
+        # No changes to headers.
+        return headers
+
+    @staticmethod
+    def canonicalize_body(body):
+        # Ignore all empty lines at the end of the message body.
+        return re.sub(b"(\r\n)*$", b"\r\n", body)
+
+
+class Relaxed:
+    """Class that represents the "relaxed" canonicalization algorithm."""
+
+    name = b"relaxed"
+
+    @staticmethod
+    def canonicalize_headers(headers):
+        # Convert all header field names to lowercase.
+        # Unfold all header lines.
+        # Compress WSP to single space.
+        # Remove all WSP at the start or end of the field value (strip).
+        return [
+            (x[0].lower(),
+             re.sub(br"\s+", b" ", re.sub(b"\r\n", b"", x[1])).strip()
+             + b"\r\n")
+            for x in headers]
+
+    @staticmethod
+    def canonicalize_body(body):
+        # Remove all trailing WSP at end of lines.
+        removed_trailing_wsp = re.sub(b"[\\x09\\x20]+\r\n", b"\r\n", body)
+        # Compress non-line-ending WSP to single space.
+        compressed_wsp = re.sub(br"[\x09\x20]+", b" ", removed_trailing_wsp)
+        # Ignore all empty lines at the end of the message body.
+        removed_trailing_lines = re.sub(b"(\r\n)*$", b"\r\n", compressed_wsp)
+        return removed_trailing_lines
diff --git a/dkim/tests/__init__.py b/dkim/tests/__init__.py
index 69857e3..a7c2733 100644
--- a/dkim/tests/__init__.py
+++ b/dkim/tests/__init__.py
@@ -21,11 +21,13 @@ import unittest
 
 def test_suite():
     from dkim.tests import (
+        test_canonicalization,
         test_crypto,
         test_dkim,
         test_util,
         )
     modules = [
+        test_canonicalization,
         test_crypto,
         test_dkim,
         test_util,
diff --git a/dkim/tests/test_canonicalization.py b/dkim/tests/test_canonicalization.py
new file mode 100644
index 0000000..16a0e3f
--- /dev/null
+++ b/dkim/tests/test_canonicalization.py
@@ -0,0 +1,83 @@
+# This software is provided 'as-is', without any express or implied
+# warranty.  In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+#    claim that you wrote the original software. If you use this software
+#    in a product, an acknowledgment in the product documentation would be
+#    appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+#    misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2011 William Grant <me@williamgrant.id.au>
+
+import unittest
+
+from dkim.canonicalization import Simple, Relaxed
+
+
+class TestSimpleAlgorithm(unittest.TestCase):
+
+    def test_headers_untouched(self):
+        test_headers = [(b'Foo  ', b'bar\r\n'), (b'Foo', b'baz\r\n')]
+        self.assertEqual(
+            test_headers,
+            Simple.canonicalize_headers(test_headers))
+
+    def test_strips_trailing_empty_lines_from_body(self):
+        self.assertEqual(
+            b'Foo  \tbar    \r\n',
+            Simple.canonicalize_body(
+                b'Foo  \tbar    \r\n\r\n'))
+
+
+class TestRelaxedAlgorithm(unittest.TestCase):
+
+    def test_lowercases_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar\r\n'), (b'baz', b'Foo\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar\r\n'), (b'BaZ', b'Foo\r\n')]))
+
+    def test_unfolds_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar\r\n baz\r\n')]))
+
+    def test_wsp_compresses_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo', b'Bar \t baz\r\n')]))
+
+    def test_wsp_strips_headers(self):
+        self.assertEqual(
+            [(b'foo', b'Bar baz\r\n')],
+            Relaxed.canonicalize_headers(
+                [(b'Foo  ', b'   Bar \t baz   \r\n')]))
+
+    def test_strips_trailing_wsp_from_body(self):
+        self.assertEqual(
+            b'Foo\r\nbar\r\n',
+            Relaxed.canonicalize_body(b'Foo  \t\r\nbar\r\n'))
+
+    def test_wsp_compresses_body(self):
+        self.assertEqual(
+            b'Foo bar\r\n',
+            Relaxed.canonicalize_body(b'Foo  \t  bar\r\n'))
+
+    def test_strips_trailing_empty_lines_from_body(self):
+        self.assertEqual(
+            b'Foo\r\nbar\r\n',
+            Relaxed.canonicalize_body(b'Foo\r\nbar\r\n\r\n\r\n'))
+
+
+def test_suite():
+    from unittest import TestLoader
+    return TestLoader().loadTestsFromName(__name__)