bpo-36297: remove "unicode_internal" codec (GH-12342)

2025-10-09 08:31:26 +00:00 · 2019-03-18 15:44:11 +09:00 · 2019-03-18 15:44:11 +09:00 · 6a16b18224
commit 6a16b18224
parent 6fb544d8bc
12 changed files with 40 additions and 529 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
    return result;
 }

-/* --- Unicode Internal Codec ------------------------------------------- */
-
-PyObject *
-_PyUnicode_DecodeUnicodeInternal(const char *s,
-                                 Py_ssize_t size,
-                                 const char *errors)
-{
-    const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
-    _PyUnicodeWriter writer;
-    const char *end;
-    const char *reason;
-    PyObject *errorHandler = NULL;
-    PyObject *exc = NULL;
-
-    if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                     "unicode_internal codec has been deprecated",
-                     1))
-        return NULL;
-
-    if (size < 0) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    if (size == 0)
-        _Py_RETURN_UNICODE_EMPTY();
-
-    _PyUnicodeWriter_Init(&writer);
-    if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
-        PyErr_NoMemory();
-        goto onError;
-    }
-    writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
-
-    end = s + size;
-    while (s < end) {
-        Py_UNICODE uch;
-        Py_UCS4 ch;
-        if (end - s < Py_UNICODE_SIZE) {
-            endinpos = end-starts;
-            reason = "truncated input";
-            goto error;
-        }
-        /* We copy the raw representation one byte at a time because the
-           pointer may be unaligned (see test_codeccallbacks). */
-        ((char *) &uch)[0] = s[0];
-        ((char *) &uch)[1] = s[1];
-#ifdef Py_UNICODE_WIDE
-        ((char *) &uch)[2] = s[2];
-        ((char *) &uch)[3] = s[3];
-#endif
-        ch = uch;
-#ifdef Py_UNICODE_WIDE
-        /* We have to sanity check the raw data, otherwise doom looms for
-           some malformed UCS-4 data. */
-        if (ch > 0x10ffff) {
-            endinpos = s - starts + Py_UNICODE_SIZE;
-            reason = "illegal code point (> 0x10FFFF)";
-            goto error;
-        }
-#endif
-        s += Py_UNICODE_SIZE;
-#ifndef Py_UNICODE_WIDE
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
-        {
-            Py_UNICODE uch2;
-            ((char *) &uch2)[0] = s[0];
-            ((char *) &uch2)[1] = s[1];
-            if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
-            {
-                ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
-                s += Py_UNICODE_SIZE;
-            }
-        }
-#endif
-
-        if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
-            goto onError;
-        continue;
-
-  error:
-        startinpos = s - starts;
-        if (unicode_decode_call_errorhandler_writer(
-                errors, &errorHandler,
-                "unicode_internal", reason,
-                &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &writer))
-            goto onError;
-    }
-
-    Py_XDECREF(errorHandler);
-    Py_XDECREF(exc);
-    return _PyUnicodeWriter_Finish(&writer);
-
-  onError:
-    _PyUnicodeWriter_Dealloc(&writer);
-    Py_XDECREF(errorHandler);
-    Py_XDECREF(exc);
-    return NULL;
-}
-
 /* --- Latin-1 Codec ------------------------------------------------------ */

 PyObject *