From ddc27f9c385f57db1c227b655ec84dcf097a8976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 1 Mar 2025 12:01:20 +0100 Subject: [PATCH] gh-128974: Fix `UnicodeError.__str__` when custom attributes have side-effects (#128975) Fix some crashes when (custom) attributes of `UnicodeError` objects implement `object.__str__` with side-effects. --- Lib/test/test_exceptions.py | 37 +++++++++++ ...-01-18-10-50-04.gh-issue-128974.KltI-A.rst | 3 + Objects/exceptions.c | 61 ++++++++++++++----- 3 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index bf0bc53b634..20c617f8108 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1360,6 +1360,43 @@ class ExceptionTests(unittest.TestCase): exc = UnicodeDecodeError('utf-8', encoded, start, end, '') self.assertIsInstance(str(exc), str) + def test_unicode_error_evil_str_set_none_object(self): + def side_effect(exc): + exc.object = None + self.do_test_unicode_error_mutate(side_effect) + + def test_unicode_error_evil_str_del_self_object(self): + def side_effect(exc): + del exc.object + self.do_test_unicode_error_mutate(side_effect) + + def do_test_unicode_error_mutate(self, side_effect): + # Test that str(UnicodeError(...)) does not crash when + # side-effects mutate the underlying 'object' attribute. + # See https://github.com/python/cpython/issues/128974. + + class Evil(str): + def __str__(self): + side_effect(exc) + return self + + for reason, encoding in [ + ("reason", Evil("utf-8")), + (Evil("reason"), "utf-8"), + (Evil("reason"), Evil("utf-8")), + ]: + with self.subTest(encoding=encoding, reason=reason): + with self.subTest(UnicodeEncodeError): + exc = UnicodeEncodeError(encoding, "x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + with self.subTest(UnicodeDecodeError): + exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + + with self.subTest(UnicodeTranslateError): + exc = UnicodeTranslateError("x", 0, 1, Evil("reason")) + self.assertRaises(TypeError, str, exc) + @no_tracing def test_badisinstance(self): # Bug #2542: if issubclass(e, MyException) raises an exception, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst new file mode 100644 index 00000000000..fc4453ae3f2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst @@ -0,0 +1,3 @@ +Fix a crash in :meth:`UnicodeError.__str__ ` when custom +attributes implement :meth:`~object.__str__` with side-effects. +Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 28c7fdbd47b..e30fea0f37a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2784,6 +2784,8 @@ SyntaxError_str(PyObject *op) if (!filename && !have_lineno) return PyObject_Str(self->msg ? self->msg : Py_None); + // Even if 'filename' can be an instance of a subclass of 'str', + // we only render its "true" content and do not use str(filename). if (filename && have_lineno) result = PyUnicode_FromFormat("%S (%U, line %ld)", self->msg ? self->msg : Py_None, @@ -2901,6 +2903,35 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); +/* + * Check the validity of 'attr' as a unicode or bytes object depending + * on 'as_bytes'. + * + * The 'name' is the attribute name and is only used for error reporting. + * + * On success, this returns 0. + * On failure, this sets a TypeError and returns -1. + */ +static int +check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) +{ + assert(as_bytes == 0 || as_bytes == 1); + if (attr == NULL) { + PyErr_Format(PyExc_TypeError, + "UnicodeError '%s' attribute is not set", + name); + return -1; + } + if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { + PyErr_Format(PyExc_TypeError, + "UnicodeError '%s' attribute must be a %s", + name, as_bytes ? "bytes" : "string"); + return -1; + } + return 0; +} + + /* * Check the validity of 'attr' as a unicode or bytes object depending * on 'as_bytes' and return a new reference on it if it is the case. @@ -2913,19 +2944,8 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, static PyObject * as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { - assert(as_bytes == 0 || as_bytes == 1); - if (attr == NULL) { - PyErr_Format(PyExc_TypeError, "%s attribute not set", name); - return NULL; - } - if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { - PyErr_Format(PyExc_TypeError, - "%s attribute must be %s", - name, - as_bytes ? "bytes" : "unicode"); - return NULL; - } - return Py_NewRef(attr); + int rc = check_unicode_error_attribute(attr, name, as_bytes); + return rc < 0 ? NULL : Py_NewRef(attr); } @@ -3591,7 +3611,10 @@ UnicodeEncodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3711,7 +3734,10 @@ UnicodeDecodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", true) < 0) { + goto done; + } Py_ssize_t len = PyBytes_GET_SIZE(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3807,7 +3833,10 @@ UnicodeTranslateError_str(PyObject *self) if (reason_str == NULL) { goto done; } - + // call to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end;