mirror of
https://github.com/python/cpython.git
synced 2025-07-19 01:05:26 +00:00
[3.12] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944) (#134337)
If the error handler is used, a new bytes object is created to set as the object attribute of UnicodeDecodeError, and that bytes object then replaces the original data. A pointer to the decoded data will became invalid after destroying that temporary bytes object. So we need other way to return the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). _PyBytes_DecodeEscape() does not have such issue, because it does not use the error handlers registry, but it should be changed for compatibility with _PyUnicode_DecodeUnicodeEscapeInternal(). (cherry picked from commit9f69a58623
) (cherry picked from commit6279eb8c07
)
This commit is contained in:
parent
310cd8943a
commit
4398b788ff
8 changed files with 194 additions and 57 deletions
|
@ -181,15 +181,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
|
|||
len = p - buf;
|
||||
s = buf;
|
||||
|
||||
const char *first_invalid_escape;
|
||||
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
|
||||
int first_invalid_escape_char;
|
||||
const char *first_invalid_escape_ptr;
|
||||
v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
|
||||
&first_invalid_escape_char,
|
||||
&first_invalid_escape_ptr);
|
||||
|
||||
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
||||
// when we are decoding the string but we preserve the line numbers.
|
||||
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
||||
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
|
||||
/* We have not decref u before because first_invalid_escape points
|
||||
inside u. */
|
||||
if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) {
|
||||
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) {
|
||||
/* We have not decref u before because first_invalid_escape_ptr
|
||||
points inside u. */
|
||||
Py_XDECREF(u);
|
||||
Py_DECREF(v);
|
||||
return NULL;
|
||||
|
@ -202,14 +205,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
|
|||
static PyObject *
|
||||
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
||||
{
|
||||
const char *first_invalid_escape;
|
||||
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
|
||||
int first_invalid_escape_char;
|
||||
const char *first_invalid_escape_ptr;
|
||||
PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
|
||||
&first_invalid_escape_char,
|
||||
&first_invalid_escape_ptr);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (first_invalid_escape != NULL) {
|
||||
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
|
||||
if (first_invalid_escape_ptr != NULL) {
|
||||
if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue