bpo-37751: Fix codecs.lookup() normalization (GH-15092)

Fix codecs.lookup() to normalize the encoding name the same way than encodings.normalize_encoding(), except that codecs.lookup() also converts the name to lower case.
2025-10-10 00:43:41 +00:00 · 2019-08-21 21:26:20 +08:00 · 2019-08-21 21:26:20 +08:00 · 20f59fe1f7
commit 20f59fe1f7
parent 87bc3b7a0b
2 changed files with 17 additions and 16 deletions
--- a/Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst
+++ b/Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst
@ -0,0 +1 @@
 Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case.
--- a/Python/codecs.c
+++ b/Python/codecs.c
@ -49,15 +49,16 @@ int PyCodec_Register(PyObject *search_function)
    return -1;
 }
-/* Convert a string to a normalized Python string: all characters are
+extern int _Py_normalize_encoding(const char *, char *, size_t);
-   converted to lower case, spaces are replaced with underscores. */
+
 /* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
   converted to lower case, spaces and hyphens are replaced with underscores. */
 static
 PyObject *normalizestring(const char *string)
 {
    size_t i;
    size_t len = strlen(string);
-    char *p;
+    char *encoding;
    PyObject *v;
    if (len > PY_SSIZE_T_MAX) {
@ -65,20 +66,19 @@ PyObject *normalizestring(const char *string)
        return NULL;
    }
-    p = PyMem_Malloc(len + 1);
+    encoding = PyMem_Malloc(len + 1);
-    if (p == NULL)
+    if (encoding == NULL)
        return PyErr_NoMemory();
-    for (i = 0; i < len; i++) {
+
-        char ch = string[i];
+    if (!_Py_normalize_encoding(string, encoding, len + 1))
-        if (ch == ' ')
+    {
-            ch = '-';
+        PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
-        else
+        PyMem_Free(encoding);
-            ch = Py_TOLOWER(Py_CHARMASK(ch));
+        return NULL;
        p[i] = ch;
    }
-    p[i] = '\0';
+
-    v = PyUnicode_FromString(p);
+    v = PyUnicode_FromString(encoding);
-    PyMem_Free(p);
+    PyMem_Free(encoding);
    return v;
 }
		`@ -0,0 +1 @@`
							Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case.