[3.9] bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939) (GH-28945)

They support now splitting escape sequences between input chunks. Add the third parameter "final" in codecs.unicode_escape_decode(). It is True by default to match the former behavior. (cherry picked from commit c96d1546b1) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
2025-12-23 09:19:18 +00:00 · 2021-10-14 20:03:29 +03:00 · 2021-10-14 20:03:29 +03:00 · 7c722e32bf
commit 7c722e32bf
parent 38fadbc5b9
10 changed files with 9836 additions and 4890 deletions
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@ -857,12 +857,20 @@ PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(

 /* --- Unicode-Escape Codecs ---------------------------------------------- */

-/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
-   chars. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
+/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
        const char *string,     /* Unicode-Escape encoded string */
        Py_ssize_t length,      /* size of string */
        const char *errors,     /* error handling */
+        Py_ssize_t *consumed    /* bytes consumed */
+);
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+   chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+        const char *string,     /* Unicode-Escape encoded string */
+        Py_ssize_t length,      /* size of string */
+        const char *errors,     /* error handling */
+        Py_ssize_t *consumed,   /* bytes consumed */
        const char **first_invalid_escape  /* on return, points to first
                                              invalid escaped char in
                                              string. */