mirror of
https://github.com/python/cpython.git
synced 2025-07-23 03:05:38 +00:00
[3.13] gh-123378: fix a crash in UnicodeError.__str__
(GH-124935) (#125099)
gh-123378: fix a crash in `UnicodeError.__str__` (GH-124935)
(cherry picked from commit ba14dfafd9
)
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
4eab6e8d29
commit
84991153da
3 changed files with 93 additions and 45 deletions
|
@ -8,6 +8,7 @@ import pickle
|
||||||
import weakref
|
import weakref
|
||||||
import errno
|
import errno
|
||||||
from codecs import BOM_UTF8
|
from codecs import BOM_UTF8
|
||||||
|
from itertools import product
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from test.support import (captured_stderr, check_impl_detail,
|
from test.support import (captured_stderr, check_impl_detail,
|
||||||
|
@ -1336,6 +1337,29 @@ class ExceptionTests(unittest.TestCase):
|
||||||
for klass in klasses:
|
for klass in klasses:
|
||||||
self.assertEqual(str(klass.__new__(klass)), "")
|
self.assertEqual(str(klass.__new__(klass)), "")
|
||||||
|
|
||||||
|
def test_unicode_error_str_does_not_crash(self):
|
||||||
|
# Test that str(UnicodeError(...)) does not crash.
|
||||||
|
# See https://github.com/python/cpython/issues/123378.
|
||||||
|
|
||||||
|
for start, end, objlen in product(
|
||||||
|
range(-5, 5),
|
||||||
|
range(-5, 5),
|
||||||
|
range(7),
|
||||||
|
):
|
||||||
|
obj = 'a' * objlen
|
||||||
|
with self.subTest('encode', objlen=objlen, start=start, end=end):
|
||||||
|
exc = UnicodeEncodeError('utf-8', obj, start, end, '')
|
||||||
|
self.assertIsInstance(str(exc), str)
|
||||||
|
|
||||||
|
with self.subTest('translate', objlen=objlen, start=start, end=end):
|
||||||
|
exc = UnicodeTranslateError(obj, start, end, '')
|
||||||
|
self.assertIsInstance(str(exc), str)
|
||||||
|
|
||||||
|
encoded = obj.encode()
|
||||||
|
with self.subTest('decode', objlen=objlen, start=start, end=end):
|
||||||
|
exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
|
||||||
|
self.assertIsInstance(str(exc), str)
|
||||||
|
|
||||||
@no_tracing
|
@no_tracing
|
||||||
def test_badisinstance(self):
|
def test_badisinstance(self):
|
||||||
# Bug #2542: if issubclass(e, MyException) raises an exception,
|
# Bug #2542: if issubclass(e, MyException) raises an exception,
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
|
||||||
|
objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
|
||||||
|
values are invalid or out-of-range. Patch by Bénédikt Tran.
|
|
@ -2959,46 +2959,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
UnicodeEncodeError_str(PyObject *self)
|
UnicodeEncodeError_str(PyObject *self)
|
||||||
{
|
{
|
||||||
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
|
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
PyObject *reason_str = NULL;
|
PyObject *reason_str = NULL;
|
||||||
PyObject *encoding_str = NULL;
|
PyObject *encoding_str = NULL;
|
||||||
|
|
||||||
if (!uself->object)
|
if (exc->object == NULL) {
|
||||||
/* Not properly initialized. */
|
/* Not properly initialized. */
|
||||||
return PyUnicode_FromString("");
|
return PyUnicode_FromString("");
|
||||||
|
}
|
||||||
|
|
||||||
/* Get reason and encoding as strings, which they might not be if
|
/* Get reason and encoding as strings, which they might not be if
|
||||||
they've been modified after we were constructed. */
|
they've been modified after we were constructed. */
|
||||||
reason_str = PyObject_Str(uself->reason);
|
reason_str = PyObject_Str(exc->reason);
|
||||||
if (reason_str == NULL)
|
if (reason_str == NULL) {
|
||||||
goto done;
|
goto done;
|
||||||
encoding_str = PyObject_Str(uself->encoding);
|
}
|
||||||
if (encoding_str == NULL)
|
encoding_str = PyObject_Str(exc->encoding);
|
||||||
|
if (encoding_str == NULL) {
|
||||||
goto done;
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
|
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
|
||||||
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
|
Py_ssize_t start = exc->start, end = exc->end;
|
||||||
|
|
||||||
|
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
|
||||||
|
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
|
||||||
const char *fmt;
|
const char *fmt;
|
||||||
if (badchar <= 0xff)
|
if (badchar <= 0xff) {
|
||||||
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
|
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
|
||||||
else if (badchar <= 0xffff)
|
}
|
||||||
|
else if (badchar <= 0xffff) {
|
||||||
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
|
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
|
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
|
||||||
|
}
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
fmt,
|
fmt,
|
||||||
encoding_str,
|
encoding_str,
|
||||||
(int)badchar,
|
(int)badchar,
|
||||||
uself->start,
|
start,
|
||||||
reason_str);
|
reason_str);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
"'%U' codec can't encode characters in position %zd-%zd: %U",
|
"'%U' codec can't encode characters in position %zd-%zd: %U",
|
||||||
encoding_str,
|
encoding_str,
|
||||||
uself->start,
|
start,
|
||||||
uself->end-1,
|
end - 1,
|
||||||
reason_str);
|
reason_str);
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
|
@ -3072,41 +3081,46 @@ error:
|
||||||
static PyObject *
|
static PyObject *
|
||||||
UnicodeDecodeError_str(PyObject *self)
|
UnicodeDecodeError_str(PyObject *self)
|
||||||
{
|
{
|
||||||
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
|
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
PyObject *reason_str = NULL;
|
PyObject *reason_str = NULL;
|
||||||
PyObject *encoding_str = NULL;
|
PyObject *encoding_str = NULL;
|
||||||
|
|
||||||
if (!uself->object)
|
if (exc->object == NULL) {
|
||||||
/* Not properly initialized. */
|
/* Not properly initialized. */
|
||||||
return PyUnicode_FromString("");
|
return PyUnicode_FromString("");
|
||||||
|
}
|
||||||
|
|
||||||
/* Get reason and encoding as strings, which they might not be if
|
/* Get reason and encoding as strings, which they might not be if
|
||||||
they've been modified after we were constructed. */
|
they've been modified after we were constructed. */
|
||||||
reason_str = PyObject_Str(uself->reason);
|
reason_str = PyObject_Str(exc->reason);
|
||||||
if (reason_str == NULL)
|
if (reason_str == NULL) {
|
||||||
goto done;
|
goto done;
|
||||||
encoding_str = PyObject_Str(uself->encoding);
|
}
|
||||||
if (encoding_str == NULL)
|
encoding_str = PyObject_Str(exc->encoding);
|
||||||
|
if (encoding_str == NULL) {
|
||||||
goto done;
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
|
Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
|
||||||
int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
|
Py_ssize_t start = exc->start, end = exc->end;
|
||||||
|
|
||||||
|
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
|
||||||
|
int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
|
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
|
||||||
encoding_str,
|
encoding_str,
|
||||||
byte,
|
badbyte,
|
||||||
uself->start,
|
start,
|
||||||
reason_str);
|
reason_str);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
"'%U' codec can't decode bytes in position %zd-%zd: %U",
|
"'%U' codec can't decode bytes in position %zd-%zd: %U",
|
||||||
encoding_str,
|
encoding_str,
|
||||||
uself->start,
|
start,
|
||||||
uself->end-1,
|
end - 1,
|
||||||
reason_str
|
reason_str);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
Py_XDECREF(reason_str);
|
Py_XDECREF(reason_str);
|
||||||
|
@ -3169,42 +3183,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
UnicodeTranslateError_str(PyObject *self)
|
UnicodeTranslateError_str(PyObject *self)
|
||||||
{
|
{
|
||||||
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
|
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
PyObject *reason_str = NULL;
|
PyObject *reason_str = NULL;
|
||||||
|
|
||||||
if (!uself->object)
|
if (exc->object == NULL) {
|
||||||
/* Not properly initialized. */
|
/* Not properly initialized. */
|
||||||
return PyUnicode_FromString("");
|
return PyUnicode_FromString("");
|
||||||
|
}
|
||||||
|
|
||||||
/* Get reason as a string, which it might not be if it's been
|
/* Get reason as a string, which it might not be if it's been
|
||||||
modified after we were constructed. */
|
modified after we were constructed. */
|
||||||
reason_str = PyObject_Str(uself->reason);
|
reason_str = PyObject_Str(exc->reason);
|
||||||
if (reason_str == NULL)
|
if (reason_str == NULL) {
|
||||||
goto done;
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
|
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
|
||||||
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
|
Py_ssize_t start = exc->start, end = exc->end;
|
||||||
|
|
||||||
|
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
|
||||||
|
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
|
||||||
const char *fmt;
|
const char *fmt;
|
||||||
if (badchar <= 0xff)
|
if (badchar <= 0xff) {
|
||||||
fmt = "can't translate character '\\x%02x' in position %zd: %U";
|
fmt = "can't translate character '\\x%02x' in position %zd: %U";
|
||||||
else if (badchar <= 0xffff)
|
}
|
||||||
|
else if (badchar <= 0xffff) {
|
||||||
fmt = "can't translate character '\\u%04x' in position %zd: %U";
|
fmt = "can't translate character '\\u%04x' in position %zd: %U";
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
fmt = "can't translate character '\\U%08x' in position %zd: %U";
|
fmt = "can't translate character '\\U%08x' in position %zd: %U";
|
||||||
|
}
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
fmt,
|
fmt,
|
||||||
(int)badchar,
|
(int)badchar,
|
||||||
uself->start,
|
start,
|
||||||
reason_str
|
reason_str);
|
||||||
);
|
}
|
||||||
} else {
|
else {
|
||||||
result = PyUnicode_FromFormat(
|
result = PyUnicode_FromFormat(
|
||||||
"can't translate characters in position %zd-%zd: %U",
|
"can't translate characters in position %zd-%zd: %U",
|
||||||
uself->start,
|
start,
|
||||||
uself->end-1,
|
end - 1,
|
||||||
reason_str
|
reason_str);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
Py_XDECREF(reason_str);
|
Py_XDECREF(reason_str);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue