[3.11] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944) (GH-134341)

If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().

_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
(cherry picked from commit 9f69a58623)
(cherry picked from commit 6279eb8c07)
(cherry picked from commit a75953b347)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Serhiy Storchaka 2025-06-02 18:52:52 +03:00 committed by GitHub
parent 461ca2cbfb
commit 73b3040f59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 196 additions and 56 deletions

View file

@ -130,12 +130,15 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
len = p - buf;
s = buf;
const char *first_invalid_escape;
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
int first_invalid_escape_char;
const char *first_invalid_escape_ptr;
v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (v != NULL && first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
/* We have not decref u before because first_invalid_escape points
if (v != NULL && first_invalid_escape_ptr != NULL) {
if (warn_invalid_escape_sequence(parser, first_invalid_escape_ptr, t) < 0) {
/* We have not decref u before because first_invalid_escape_ptr points
inside u. */
Py_XDECREF(u);
Py_DECREF(v);
@ -149,14 +152,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{
const char *first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
int first_invalid_escape_char;
const char *first_invalid_escape_ptr;
PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (result == NULL) {
return NULL;
}
if (first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
if (first_invalid_escape_ptr != NULL) {
if (warn_invalid_escape_sequence(p, first_invalid_escape_ptr, t) < 0) {
Py_DECREF(result);
return NULL;
}