mirror of
https://github.com/python/cpython.git
synced 2025-07-17 08:15:19 +00:00
[3.14] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133942)
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
(cherry picked from commit 9f69a58623
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
f0a7a6c2cc
commit
69b4387f78
9 changed files with 160 additions and 80 deletions
|
@ -6621,13 +6621,15 @@ _PyUnicode_GetNameCAPI(void)
|
|||
/* --- Unicode Escape Codec ----------------------------------------------- */
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
||||
_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors,
|
||||
Py_ssize_t *consumed,
|
||||
const char **first_invalid_escape)
|
||||
int *first_invalid_escape_char,
|
||||
const char **first_invalid_escape_ptr)
|
||||
{
|
||||
const char *starts = s;
|
||||
const char *initial_starts = starts;
|
||||
_PyUnicodeWriter writer;
|
||||
const char *end;
|
||||
PyObject *errorHandler = NULL;
|
||||
|
@ -6635,7 +6637,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|||
_PyUnicode_Name_CAPI *ucnhash_capi;
|
||||
|
||||
// so we can remember if we've seen an invalid escape char or not
|
||||
*first_invalid_escape = NULL;
|
||||
*first_invalid_escape_char = -1;
|
||||
*first_invalid_escape_ptr = NULL;
|
||||
|
||||
if (size == 0) {
|
||||
if (consumed) {
|
||||
|
@ -6723,9 +6726,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|||
}
|
||||
}
|
||||
if (ch > 0377) {
|
||||
if (*first_invalid_escape == NULL) {
|
||||
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
||||
already incremented s. */
|
||||
if (*first_invalid_escape_char == -1) {
|
||||
*first_invalid_escape_char = ch;
|
||||
if (starts == initial_starts) {
|
||||
/* Back up 3 chars, since we've already incremented s. */
|
||||
*first_invalid_escape_ptr = s - 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
WRITE_CHAR(ch);
|
||||
|
@ -6820,9 +6826,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|||
goto error;
|
||||
|
||||
default:
|
||||
if (*first_invalid_escape == NULL) {
|
||||
*first_invalid_escape = s-1; /* Back up one char, since we've
|
||||
already incremented s. */
|
||||
if (*first_invalid_escape_char == -1) {
|
||||
*first_invalid_escape_char = c;
|
||||
if (starts == initial_starts) {
|
||||
/* Back up one char, since we've already incremented s. */
|
||||
*first_invalid_escape_ptr = s - 1;
|
||||
}
|
||||
}
|
||||
WRITE_ASCII_CHAR('\\');
|
||||
WRITE_CHAR(c);
|
||||
|
@ -6867,19 +6876,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
|
|||
const char *errors,
|
||||
Py_ssize_t *consumed)
|
||||
{
|
||||
const char *first_invalid_escape;
|
||||
PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
|
||||
int first_invalid_escape_char;
|
||||
const char *first_invalid_escape_ptr;
|
||||
PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
|
||||
consumed,
|
||||
&first_invalid_escape);
|
||||
&first_invalid_escape_char,
|
||||
&first_invalid_escape_ptr);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
if (first_invalid_escape != NULL) {
|
||||
unsigned char c = *first_invalid_escape;
|
||||
if ('4' <= c && c <= '7') {
|
||||
if (first_invalid_escape_char != -1) {
|
||||
if (first_invalid_escape_char > 0xff) {
|
||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||
"\"\\%.3s\" is an invalid octal escape sequence. "
|
||||
"\"\\%o\" is an invalid octal escape sequence. "
|
||||
"Such sequences will not work in the future. ",
|
||||
first_invalid_escape) < 0)
|
||||
first_invalid_escape_char) < 0)
|
||||
{
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
|
@ -6889,7 +6899,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
|
|||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||
"\"\\%c\" is an invalid escape sequence. "
|
||||
"Such sequences will not work in the future. ",
|
||||
c) < 0)
|
||||
first_invalid_escape_char) < 0)
|
||||
{
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue