Issue 19548: update codecs module documentation

- clarified the distinction between text encodings and other codecs - clarified relationship with builtin open and the io module - consolidated documentation of error handlers into one section - clarified type constraints of some behaviours - added tests for some of the new statements in the docs
2025-11-26 13:22:51 +00:00 · 2015-01-07 00:22:00 +10:00 · 2015-01-07 00:22:00 +10:00 · b9fdb7a452
commit b9fdb7a452
parent fcfed19913
9 changed files with 426 additions and 372 deletions
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -1139,6 +1139,8 @@ class RecodingTest(unittest.TestCase):
        # Python used to crash on this at exit because of a refcount
        # bug in _codecsmodule.c

+        self.assertTrue(f.closed)
+
 # From RFC 3492
 punycode_testcases = [
    # A Arabic (Egyptian):
@ -1591,6 +1593,16 @@ class IDNACodecTest(unittest.TestCase):
        self.assertEqual(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
        self.assertEqual(encoder.encode("", True), b"")

+    def test_errors(self):
+        """Only supports "strict" error handler"""
+        "python.org".encode("idna", "strict")
+        b"python.org".decode("idna", "strict")
+        for errors in ("ignore", "replace", "backslashreplace",
+                "surrogateescape"):
+            self.assertRaises(Exception, "python.org".encode, "idna", errors)
+            self.assertRaises(Exception,
+                b"python.org".decode, "idna", errors)
+
 class CodecsModuleTest(unittest.TestCase):

    def test_decode(self):
@ -1668,6 +1680,24 @@ class CodecsModuleTest(unittest.TestCase):
        for api in codecs.__all__:
            getattr(codecs, api)

+    def test_open(self):
+        self.addCleanup(support.unlink, support.TESTFN)
+        for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'):
+            with self.subTest(mode), \
+                    codecs.open(support.TESTFN, mode, 'ascii') as file:
+                self.assertIsInstance(file, codecs.StreamReaderWriter)
+
+    def test_undefined(self):
+        self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
+        self.assertRaises(UnicodeError, codecs.decode, b'abc', 'undefined')
+        self.assertRaises(UnicodeError, codecs.encode, '', 'undefined')
+        self.assertRaises(UnicodeError, codecs.decode, b'', 'undefined')
+        for errors in ('strict', 'ignore', 'replace', 'backslashreplace'):
+            self.assertRaises(UnicodeError,
+                codecs.encode, 'abc', 'undefined', errors)
+            self.assertRaises(UnicodeError,
+                codecs.decode, b'abc', 'undefined', errors)
+
 class StreamReaderTest(unittest.TestCase):

    def setUp(self):
@ -1801,13 +1831,10 @@ if hasattr(codecs, "mbcs_encode"):
 #    "undefined"

 # The following encodings don't work in stateful mode
-broken_unicode_with_streams = [
+broken_unicode_with_stateful = [
    "punycode",
    "unicode_internal"
 ]
-broken_incremental_coders = broken_unicode_with_streams + [
-    "idna",
-]

 class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
    def test_basics(self):
@ -1827,7 +1854,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
                (chars, size) = codecs.getdecoder(encoding)(b)
                self.assertEqual(chars, s, "encoding=%r" % encoding)

-            if encoding not in broken_unicode_with_streams:
+            if encoding not in broken_unicode_with_stateful:
                # check stream reader/writer
                q = Queue(b"")
                writer = codecs.getwriter(encoding)(q)
@ -1845,7 +1872,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
                    decodedresult += reader.read()
                self.assertEqual(decodedresult, s, "encoding=%r" % encoding)

-            if encoding not in broken_incremental_coders:
+            if encoding not in broken_unicode_with_stateful:
                # check incremental decoder/encoder and iterencode()/iterdecode()
                try:
                    encoder = codecs.getincrementalencoder(encoding)()
@ -1894,7 +1921,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
        from _testcapi import codec_incrementalencoder, codec_incrementaldecoder
        s = "abc123"  # all codecs should be able to encode these
        for encoding in all_unicode_encodings:
-            if encoding not in broken_incremental_coders:
+            if encoding not in broken_unicode_with_stateful:
                # check incremental decoder/encoder (fetched via the C API)
                try:
                    cencoder = codec_incrementalencoder(encoding)
@ -1934,7 +1961,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
        for encoding in all_unicode_encodings:
            if encoding == "idna": # FIXME: See SF bug #1163178
                continue
-            if encoding in broken_unicode_with_streams:
+            if encoding in broken_unicode_with_stateful:
                continue
            reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding)))
            for t in range(5):
@ -1967,7 +1994,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
        # Check that getstate() and setstate() handle the state properly
        u = "abc123"
        for encoding in all_unicode_encodings:
-            if encoding not in broken_incremental_coders:
+            if encoding not in broken_unicode_with_stateful:
                self.check_state_handling_decode(encoding, u, u.encode(encoding))
                self.check_state_handling_encode(encoding, u, u.encode(encoding))

@ -2171,6 +2198,7 @@ class WithStmtTest(unittest.TestCase):
        f = io.BytesIO(b"\xc3\xbc")
        with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
            self.assertEqual(ef.read(), b"\xfc")
+        self.assertTrue(f.closed)

    def test_streamreaderwriter(self):
        f = io.BytesIO(b"\xc3\xbc")