Issue #19619: Blacklist non-text codecs in method API

str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings.
2025-11-24 20:30:18 +00:00 · 2013-11-22 22:39:36 +10:00 · 2013-11-22 22:39:36 +10:00 · c72e4e6dcc
commit c72e4e6dcc
parent 322f5ba0d8
13 changed files with 291 additions and 93 deletions
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
 ### Codec base classes (defining the API)

 class CodecInfo(tuple):
+    """Codec details when looking up the codec registry"""
+
+    # Private API to allow Python 3.4 to blacklist the known non-Unicode
+    # codecs in the standard library. A more general mechanism to
+    # reliably distinguish test encodings from other codecs will hopefully
+    # be defined for Python 3.5
+    #
+    # See http://bugs.python.org/issue19619
+    _is_text_encoding = True # Assume codecs are text encodings by default

    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
-        incrementalencoder=None, incrementaldecoder=None, name=None):
+        incrementalencoder=None, incrementaldecoder=None, name=None,
+        *, _is_text_encoding=None):
        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
        self.name = name
        self.encode = encode
@ -84,6 +94,8 @@ class CodecInfo(tuple):
        self.incrementaldecoder = incrementaldecoder
        self.streamwriter = streamwriter
        self.streamreader = streamreader
+        if _is_text_encoding is not None:
+            self._is_text_encoding = _is_text_encoding
        return self

    def __repr__(self):