[3.9] bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939) (GH-28945)

They support now splitting escape sequences between input chunks. Add the third parameter "final" in codecs.unicode_escape_decode(). It is True by default to match the former behavior. (cherry picked from commit c96d1546b1) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
2025-08-02 08:02:56 +00:00 · 2021-10-14 20:03:29 +03:00 · 2021-10-14 20:03:29 +03:00 · 7c722e32bf
commit 7c722e32bf
parent 38fadbc5b9
10 changed files with 9836 additions and 4890 deletions
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@ -487,17 +487,20 @@ _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
 _codecs.unicode_escape_decode
    data: Py_buffer(accept={str, buffer})
    errors: str(accept={str, NoneType}) = None
+    final: bool(accept={int}) = True
    /
 [clinic start generated code]*/

 static PyObject *
 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
-                                   const char *errors)
-/*[clinic end generated code: output=3ca3c917176b82ab input=8328081a3a569bd6]*/
+                                   const char *errors, int final)
+/*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
 {
-    PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
-                                                      errors);
-    return codec_tuple(decoded, data->len);
+    Py_ssize_t consumed = data->len;
+    PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
+                                                               errors,
+                                                               final ? NULL : &consumed);
+    return codec_tuple(decoded, consumed);
 }

 /*[clinic input]