[3.13] gh-123378: fix a crash in UnicodeError.__str__ (GH-124935) (#125099)

gh-123378: fix a crash in `UnicodeError.__str__` (GH-124935)
(cherry picked from commit ba14dfafd9)

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Miss Islington (bot) 2024-10-08 14:06:57 +02:00 committed by GitHub
parent 4eab6e8d29
commit 84991153da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 93 additions and 45 deletions

View file

@ -8,6 +8,7 @@ import pickle
import weakref import weakref
import errno import errno
from codecs import BOM_UTF8 from codecs import BOM_UTF8
from itertools import product
from textwrap import dedent from textwrap import dedent
from test.support import (captured_stderr, check_impl_detail, from test.support import (captured_stderr, check_impl_detail,
@ -1336,6 +1337,29 @@ class ExceptionTests(unittest.TestCase):
for klass in klasses: for klass in klasses:
self.assertEqual(str(klass.__new__(klass)), "") self.assertEqual(str(klass.__new__(klass)), "")
def test_unicode_error_str_does_not_crash(self):
# Test that str(UnicodeError(...)) does not crash.
# See https://github.com/python/cpython/issues/123378.
for start, end, objlen in product(
range(-5, 5),
range(-5, 5),
range(7),
):
obj = 'a' * objlen
with self.subTest('encode', objlen=objlen, start=start, end=end):
exc = UnicodeEncodeError('utf-8', obj, start, end, '')
self.assertIsInstance(str(exc), str)
with self.subTest('translate', objlen=objlen, start=start, end=end):
exc = UnicodeTranslateError(obj, start, end, '')
self.assertIsInstance(str(exc), str)
encoded = obj.encode()
with self.subTest('decode', objlen=objlen, start=start, end=end):
exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
self.assertIsInstance(str(exc), str)
@no_tracing @no_tracing
def test_badisinstance(self): def test_badisinstance(self):
# Bug #2542: if issubclass(e, MyException) raises an exception, # Bug #2542: if issubclass(e, MyException) raises an exception,

View file

@ -0,0 +1,3 @@
Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
values are invalid or out-of-range. Patch by Bénédikt Tran.

View file

@ -2959,46 +2959,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeEncodeError_str(PyObject *self) UnicodeEncodeError_str(PyObject *self)
{ {
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;
PyObject *encoding_str = NULL; PyObject *encoding_str = NULL;
if (!uself->object) if (exc->object == NULL) {
/* Not properly initialized. */ /* Not properly initialized. */
return PyUnicode_FromString(""); return PyUnicode_FromString("");
}
/* Get reason and encoding as strings, which they might not be if /* Get reason and encoding as strings, which they might not be if
they've been modified after we were constructed. */ they've been modified after we were constructed. */
reason_str = PyObject_Str(uself->reason); reason_str = PyObject_Str(exc->reason);
if (reason_str == NULL) if (reason_str == NULL) {
goto done; goto done;
encoding_str = PyObject_Str(uself->encoding); }
if (encoding_str == NULL) encoding_str = PyObject_Str(exc->encoding);
if (encoding_str == NULL) {
goto done; goto done;
}
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); Py_ssize_t start = exc->start, end = exc->end;
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
const char *fmt; const char *fmt;
if (badchar <= 0xff) if (badchar <= 0xff) {
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U"; fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
else if (badchar <= 0xffff) }
else if (badchar <= 0xffff) {
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
else }
else {
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
}
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
fmt, fmt,
encoding_str, encoding_str,
(int)badchar, (int)badchar,
uself->start, start,
reason_str); reason_str);
} }
else { else {
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
"'%U' codec can't encode characters in position %zd-%zd: %U", "'%U' codec can't encode characters in position %zd-%zd: %U",
encoding_str, encoding_str,
uself->start, start,
uself->end-1, end - 1,
reason_str); reason_str);
} }
done: done:
@ -3072,41 +3081,46 @@ error:
static PyObject * static PyObject *
UnicodeDecodeError_str(PyObject *self) UnicodeDecodeError_str(PyObject *self)
{ {
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;
PyObject *encoding_str = NULL; PyObject *encoding_str = NULL;
if (!uself->object) if (exc->object == NULL) {
/* Not properly initialized. */ /* Not properly initialized. */
return PyUnicode_FromString(""); return PyUnicode_FromString("");
}
/* Get reason and encoding as strings, which they might not be if /* Get reason and encoding as strings, which they might not be if
they've been modified after we were constructed. */ they've been modified after we were constructed. */
reason_str = PyObject_Str(uself->reason); reason_str = PyObject_Str(exc->reason);
if (reason_str == NULL) if (reason_str == NULL) {
goto done; goto done;
encoding_str = PyObject_Str(uself->encoding); }
if (encoding_str == NULL) encoding_str = PyObject_Str(exc->encoding);
if (encoding_str == NULL) {
goto done; goto done;
}
if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) { Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); Py_ssize_t start = exc->start, end = exc->end;
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
"'%U' codec can't decode byte 0x%02x in position %zd: %U", "'%U' codec can't decode byte 0x%02x in position %zd: %U",
encoding_str, encoding_str,
byte, badbyte,
uself->start, start,
reason_str); reason_str);
} }
else { else {
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
"'%U' codec can't decode bytes in position %zd-%zd: %U", "'%U' codec can't decode bytes in position %zd-%zd: %U",
encoding_str, encoding_str,
uself->start, start,
uself->end-1, end - 1,
reason_str reason_str);
);
} }
done: done:
Py_XDECREF(reason_str); Py_XDECREF(reason_str);
@ -3169,42 +3183,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
static PyObject * static PyObject *
UnicodeTranslateError_str(PyObject *self) UnicodeTranslateError_str(PyObject *self)
{ {
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;
if (!uself->object) if (exc->object == NULL) {
/* Not properly initialized. */ /* Not properly initialized. */
return PyUnicode_FromString(""); return PyUnicode_FromString("");
}
/* Get reason as a string, which it might not be if it's been /* Get reason as a string, which it might not be if it's been
modified after we were constructed. */ modified after we were constructed. */
reason_str = PyObject_Str(uself->reason); reason_str = PyObject_Str(exc->reason);
if (reason_str == NULL) if (reason_str == NULL) {
goto done; goto done;
}
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); Py_ssize_t start = exc->start, end = exc->end;
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
const char *fmt; const char *fmt;
if (badchar <= 0xff) if (badchar <= 0xff) {
fmt = "can't translate character '\\x%02x' in position %zd: %U"; fmt = "can't translate character '\\x%02x' in position %zd: %U";
else if (badchar <= 0xffff) }
else if (badchar <= 0xffff) {
fmt = "can't translate character '\\u%04x' in position %zd: %U"; fmt = "can't translate character '\\u%04x' in position %zd: %U";
else }
else {
fmt = "can't translate character '\\U%08x' in position %zd: %U"; fmt = "can't translate character '\\U%08x' in position %zd: %U";
}
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
fmt, fmt,
(int)badchar, (int)badchar,
uself->start, start,
reason_str reason_str);
); }
} else { else {
result = PyUnicode_FromFormat( result = PyUnicode_FromFormat(
"can't translate characters in position %zd-%zd: %U", "can't translate characters in position %zd-%zd: %U",
uself->start, start,
uself->end-1, end - 1,
reason_str reason_str);
);
} }
done: done:
Py_XDECREF(reason_str); Py_XDECREF(reason_str);