mirror of
https://github.com/python/cpython.git
synced 2025-08-23 02:04:56 +00:00
gh-102406: replace exception chaining by PEP-678 notes in codecs (#102407)
This commit is contained in:
parent
e6ecd3e6b4
commit
76350e85eb
5 changed files with 64 additions and 206 deletions
|
@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote(
|
||||||
PyObject *exc,
|
PyObject *exc,
|
||||||
PyObject *note);
|
PyObject *note);
|
||||||
|
|
||||||
/* Helper that attempts to replace the current exception with one of the
|
|
||||||
* same type but with a prefix added to the exception text. The resulting
|
|
||||||
* exception description looks like:
|
|
||||||
*
|
|
||||||
* prefix (exc_type: original_exc_str)
|
|
||||||
*
|
|
||||||
* Only some exceptions can be safely replaced. If the function determines
|
|
||||||
* it isn't safe to perform the replacement, it will leave the original
|
|
||||||
* unmodified exception in place.
|
|
||||||
*
|
|
||||||
* Returns a borrowed reference to the new exception (if any), NULL if the
|
|
||||||
* existing exception was left in place.
|
|
||||||
*/
|
|
||||||
PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
|
|
||||||
const char *prefix_format, /* ASCII-encoded string */
|
|
||||||
...
|
|
||||||
);
|
|
||||||
|
|
||||||
/* In signalmodule.c */
|
/* In signalmodule.c */
|
||||||
|
|
||||||
int PySignal_SetWakeupFd(int fd);
|
int PySignal_SetWakeupFd(int fd);
|
||||||
|
|
|
@ -2819,24 +2819,19 @@ class TransformCodecTest(unittest.TestCase):
|
||||||
self.assertIsNone(failure.exception.__cause__)
|
self.assertIsNone(failure.exception.__cause__)
|
||||||
|
|
||||||
@unittest.skipUnless(zlib, "Requires zlib support")
|
@unittest.skipUnless(zlib, "Requires zlib support")
|
||||||
def test_custom_zlib_error_is_wrapped(self):
|
def test_custom_zlib_error_is_noted(self):
|
||||||
# Check zlib codec gives a good error for malformed input
|
# Check zlib codec gives a good error for malformed input
|
||||||
msg = "^decoding with 'zlib_codec' codec failed"
|
msg = "decoding with 'zlib_codec' codec failed"
|
||||||
with self.assertRaisesRegex(Exception, msg) as failure:
|
with self.assertRaises(Exception) as failure:
|
||||||
codecs.decode(b"hello", "zlib_codec")
|
codecs.decode(b"hello", "zlib_codec")
|
||||||
self.assertIsInstance(failure.exception.__cause__,
|
self.assertEqual(msg, failure.exception.__notes__[0])
|
||||||
type(failure.exception))
|
|
||||||
|
|
||||||
def test_custom_hex_error_is_wrapped(self):
|
def test_custom_hex_error_is_noted(self):
|
||||||
# Check hex codec gives a good error for malformed input
|
# Check hex codec gives a good error for malformed input
|
||||||
msg = "^decoding with 'hex_codec' codec failed"
|
msg = "decoding with 'hex_codec' codec failed"
|
||||||
with self.assertRaisesRegex(Exception, msg) as failure:
|
with self.assertRaises(Exception) as failure:
|
||||||
codecs.decode(b"hello", "hex_codec")
|
codecs.decode(b"hello", "hex_codec")
|
||||||
self.assertIsInstance(failure.exception.__cause__,
|
self.assertEqual(msg, failure.exception.__notes__[0])
|
||||||
type(failure.exception))
|
|
||||||
|
|
||||||
# Unfortunately, the bz2 module throws OSError, which the codec
|
|
||||||
# machinery currently can't wrap :(
|
|
||||||
|
|
||||||
# Ensure codec aliases from http://bugs.python.org/issue7475 work
|
# Ensure codec aliases from http://bugs.python.org/issue7475 work
|
||||||
def test_aliases(self):
|
def test_aliases(self):
|
||||||
|
@ -2860,11 +2855,8 @@ class TransformCodecTest(unittest.TestCase):
|
||||||
self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
|
self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
|
||||||
|
|
||||||
|
|
||||||
# The codec system tries to wrap exceptions in order to ensure the error
|
# The codec system tries to add notes to exceptions in order to ensure
|
||||||
# mentions the operation being performed and the codec involved. We
|
# the error mentions the operation being performed and the codec involved.
|
||||||
# currently *only* want this to happen for relatively stateless
|
|
||||||
# exceptions, where the only significant information they contain is their
|
|
||||||
# type and a single str argument.
|
|
||||||
|
|
||||||
# Use a local codec registry to avoid appearing to leak objects when
|
# Use a local codec registry to avoid appearing to leak objects when
|
||||||
# registering multiple search functions
|
# registering multiple search functions
|
||||||
|
@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name):
|
||||||
return _TEST_CODECS.get(codec_name)
|
return _TEST_CODECS.get(codec_name)
|
||||||
|
|
||||||
|
|
||||||
class ExceptionChainingTest(unittest.TestCase):
|
class ExceptionNotesTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.codec_name = 'exception_chaining_test'
|
self.codec_name = 'exception_notes_test'
|
||||||
codecs.register(_get_test_codec)
|
codecs.register(_get_test_codec)
|
||||||
self.addCleanup(codecs.unregister, _get_test_codec)
|
self.addCleanup(codecs.unregister, _get_test_codec)
|
||||||
|
|
||||||
|
@ -2901,91 +2893,77 @@ class ExceptionChainingTest(unittest.TestCase):
|
||||||
_TEST_CODECS[self.codec_name] = codec_info
|
_TEST_CODECS[self.codec_name] = codec_info
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def assertWrapped(self, operation, exc_type, msg):
|
def assertNoted(self, operation, exc_type, msg):
|
||||||
full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
|
full_msg = r"{} with {!r} codec failed".format(
|
||||||
operation, self.codec_name, exc_type.__name__, msg)
|
operation, self.codec_name)
|
||||||
with self.assertRaisesRegex(exc_type, full_msg) as caught:
|
with self.assertRaises(exc_type) as caught:
|
||||||
yield caught
|
yield caught
|
||||||
self.assertIsInstance(caught.exception.__cause__, exc_type)
|
self.assertIn(full_msg, caught.exception.__notes__[0])
|
||||||
self.assertIsNotNone(caught.exception.__cause__.__traceback__)
|
caught.exception.__notes__.clear()
|
||||||
|
|
||||||
def raise_obj(self, *args, **kwds):
|
def raise_obj(self, *args, **kwds):
|
||||||
# Helper to dynamically change the object raised by a test codec
|
# Helper to dynamically change the object raised by a test codec
|
||||||
raise self.obj_to_raise
|
raise self.obj_to_raise
|
||||||
|
|
||||||
def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
|
def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
|
||||||
self.obj_to_raise = obj_to_raise
|
self.obj_to_raise = obj_to_raise
|
||||||
self.set_codec(self.raise_obj, self.raise_obj)
|
self.set_codec(self.raise_obj, self.raise_obj)
|
||||||
with self.assertWrapped("encoding", exc_type, msg):
|
with self.assertNoted("encoding", exc_type, msg):
|
||||||
"str_input".encode(self.codec_name)
|
"str_input".encode(self.codec_name)
|
||||||
with self.assertWrapped("encoding", exc_type, msg):
|
with self.assertNoted("encoding", exc_type, msg):
|
||||||
codecs.encode("str_input", self.codec_name)
|
codecs.encode("str_input", self.codec_name)
|
||||||
with self.assertWrapped("decoding", exc_type, msg):
|
with self.assertNoted("decoding", exc_type, msg):
|
||||||
b"bytes input".decode(self.codec_name)
|
b"bytes input".decode(self.codec_name)
|
||||||
with self.assertWrapped("decoding", exc_type, msg):
|
with self.assertNoted("decoding", exc_type, msg):
|
||||||
codecs.decode(b"bytes input", self.codec_name)
|
codecs.decode(b"bytes input", self.codec_name)
|
||||||
|
|
||||||
def test_raise_by_type(self):
|
def test_raise_by_type(self):
|
||||||
self.check_wrapped(RuntimeError, "")
|
self.check_note(RuntimeError, "")
|
||||||
|
|
||||||
def test_raise_by_value(self):
|
def test_raise_by_value(self):
|
||||||
msg = "This should be wrapped"
|
msg = "This should be noted"
|
||||||
self.check_wrapped(RuntimeError(msg), msg)
|
self.check_note(RuntimeError(msg), msg)
|
||||||
|
|
||||||
def test_raise_grandchild_subclass_exact_size(self):
|
def test_raise_grandchild_subclass_exact_size(self):
|
||||||
msg = "This should be wrapped"
|
msg = "This should be noted"
|
||||||
class MyRuntimeError(RuntimeError):
|
class MyRuntimeError(RuntimeError):
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
|
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
|
||||||
|
|
||||||
def test_raise_subclass_with_weakref_support(self):
|
def test_raise_subclass_with_weakref_support(self):
|
||||||
msg = "This should be wrapped"
|
msg = "This should be noted"
|
||||||
class MyRuntimeError(RuntimeError):
|
class MyRuntimeError(RuntimeError):
|
||||||
pass
|
pass
|
||||||
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
|
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
|
||||||
|
|
||||||
def check_not_wrapped(self, obj_to_raise, msg):
|
def test_init_override(self):
|
||||||
def raise_obj(*args, **kwds):
|
|
||||||
raise obj_to_raise
|
|
||||||
self.set_codec(raise_obj, raise_obj)
|
|
||||||
with self.assertRaisesRegex(RuntimeError, msg):
|
|
||||||
"str input".encode(self.codec_name)
|
|
||||||
with self.assertRaisesRegex(RuntimeError, msg):
|
|
||||||
codecs.encode("str input", self.codec_name)
|
|
||||||
with self.assertRaisesRegex(RuntimeError, msg):
|
|
||||||
b"bytes input".decode(self.codec_name)
|
|
||||||
with self.assertRaisesRegex(RuntimeError, msg):
|
|
||||||
codecs.decode(b"bytes input", self.codec_name)
|
|
||||||
|
|
||||||
def test_init_override_is_not_wrapped(self):
|
|
||||||
class CustomInit(RuntimeError):
|
class CustomInit(RuntimeError):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
self.check_not_wrapped(CustomInit, "")
|
self.check_note(CustomInit, "")
|
||||||
|
|
||||||
def test_new_override_is_not_wrapped(self):
|
def test_new_override(self):
|
||||||
class CustomNew(RuntimeError):
|
class CustomNew(RuntimeError):
|
||||||
def __new__(cls):
|
def __new__(cls):
|
||||||
return super().__new__(cls)
|
return super().__new__(cls)
|
||||||
self.check_not_wrapped(CustomNew, "")
|
self.check_note(CustomNew, "")
|
||||||
|
|
||||||
def test_instance_attribute_is_not_wrapped(self):
|
def test_instance_attribute(self):
|
||||||
msg = "This should NOT be wrapped"
|
msg = "This should be noted"
|
||||||
exc = RuntimeError(msg)
|
exc = RuntimeError(msg)
|
||||||
exc.attr = 1
|
exc.attr = 1
|
||||||
self.check_not_wrapped(exc, "^{}$".format(msg))
|
self.check_note(exc, "^{}$".format(msg))
|
||||||
|
|
||||||
def test_non_str_arg_is_not_wrapped(self):
|
def test_non_str_arg(self):
|
||||||
self.check_not_wrapped(RuntimeError(1), "1")
|
self.check_note(RuntimeError(1), "1")
|
||||||
|
|
||||||
def test_multiple_args_is_not_wrapped(self):
|
def test_multiple_args(self):
|
||||||
msg_re = r"^\('a', 'b', 'c'\)$"
|
msg_re = r"^\('a', 'b', 'c'\)$"
|
||||||
self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
|
self.check_note(RuntimeError('a', 'b', 'c'), msg_re)
|
||||||
|
|
||||||
# http://bugs.python.org/issue19609
|
# http://bugs.python.org/issue19609
|
||||||
def test_codec_lookup_failure_not_wrapped(self):
|
def test_codec_lookup_failure(self):
|
||||||
msg = "^unknown encoding: {}$".format(self.codec_name)
|
msg = "^unknown encoding: {}$".format(self.codec_name)
|
||||||
# The initial codec lookup should not be wrapped
|
|
||||||
with self.assertRaisesRegex(LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
"str input".encode(self.codec_name)
|
"str input".encode(self.codec_name)
|
||||||
with self.assertRaisesRegex(LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception.
|
|
@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Helper to do the equivalent of "raise X from Y" in C, but always using
|
|
||||||
* the current exception rather than passing one in.
|
|
||||||
*
|
|
||||||
* We currently limit this to *only* exceptions that use the BaseException
|
|
||||||
* tp_init and tp_new methods, since we can be reasonably sure we can wrap
|
|
||||||
* those correctly without losing data and without losing backwards
|
|
||||||
* compatibility.
|
|
||||||
*
|
|
||||||
* We also aim to rule out *all* exceptions that might be storing additional
|
|
||||||
* state, whether by having a size difference relative to BaseException,
|
|
||||||
* additional arguments passed in during construction or by having a
|
|
||||||
* non-empty instance dict.
|
|
||||||
*
|
|
||||||
* We need to be very careful with what we wrap, since changing types to
|
|
||||||
* a broader exception type would be backwards incompatible for
|
|
||||||
* existing codecs, and with different init or new method implementations
|
|
||||||
* may either not support instantiation with PyErr_Format or lose
|
|
||||||
* information when instantiated that way.
|
|
||||||
*
|
|
||||||
* XXX (ncoghlan): This could be made more comprehensive by exploiting the
|
|
||||||
* fact that exceptions are expected to support pickling. If more builtin
|
|
||||||
* exceptions (e.g. AttributeError) start to be converted to rich
|
|
||||||
* exceptions with additional attributes, that's probably a better approach
|
|
||||||
* to pursue over adding special cases for particular stateful subclasses.
|
|
||||||
*
|
|
||||||
* Returns a borrowed reference to the new exception (if any), NULL if the
|
|
||||||
* existing exception was left in place.
|
|
||||||
*/
|
|
||||||
PyObject *
|
|
||||||
_PyErr_TrySetFromCause(const char *format, ...)
|
|
||||||
{
|
|
||||||
PyObject* msg_prefix;
|
|
||||||
PyObject *instance_args;
|
|
||||||
Py_ssize_t num_args, caught_type_size, base_exc_size;
|
|
||||||
va_list vargs;
|
|
||||||
int same_basic_size;
|
|
||||||
|
|
||||||
PyObject *exc = PyErr_GetRaisedException();
|
|
||||||
PyTypeObject *caught_type = Py_TYPE(exc);
|
|
||||||
/* Ensure type info indicates no extra state is stored at the C level
|
|
||||||
* and that the type can be reinstantiated using PyErr_Format
|
|
||||||
*/
|
|
||||||
caught_type_size = caught_type->tp_basicsize;
|
|
||||||
base_exc_size = _PyExc_BaseException.tp_basicsize;
|
|
||||||
same_basic_size = (
|
|
||||||
caught_type_size == base_exc_size ||
|
|
||||||
(_PyType_SUPPORTS_WEAKREFS(caught_type) &&
|
|
||||||
(caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *))
|
|
||||||
)
|
|
||||||
);
|
|
||||||
if (caught_type->tp_init != (initproc)BaseException_init ||
|
|
||||||
caught_type->tp_new != BaseException_new ||
|
|
||||||
!same_basic_size ||
|
|
||||||
caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
|
|
||||||
/* We can't be sure we can wrap this safely, since it may contain
|
|
||||||
* more state than just the exception type. Accordingly, we just
|
|
||||||
* leave it alone.
|
|
||||||
*/
|
|
||||||
PyErr_SetRaisedException(exc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check the args are empty or contain a single string */
|
|
||||||
instance_args = ((PyBaseExceptionObject *)exc)->args;
|
|
||||||
num_args = PyTuple_GET_SIZE(instance_args);
|
|
||||||
if (num_args > 1 ||
|
|
||||||
(num_args == 1 &&
|
|
||||||
!PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
|
|
||||||
/* More than 1 arg, or the one arg we do have isn't a string
|
|
||||||
*/
|
|
||||||
PyErr_SetRaisedException(exc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Ensure the instance dict is also empty */
|
|
||||||
if (!_PyObject_IsInstanceDictEmpty(exc)) {
|
|
||||||
/* While we could potentially copy a non-empty instance dictionary
|
|
||||||
* to the replacement exception, for now we take the more
|
|
||||||
* conservative path of leaving exceptions with attributes set
|
|
||||||
* alone.
|
|
||||||
*/
|
|
||||||
PyErr_SetRaisedException(exc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For exceptions that we can wrap safely, we chain the original
|
|
||||||
* exception to a new one of the exact same type with an
|
|
||||||
* error message that mentions the additional details and the
|
|
||||||
* original exception.
|
|
||||||
*
|
|
||||||
* It would be nice to wrap OSError and various other exception
|
|
||||||
* types as well, but that's quite a bit trickier due to the extra
|
|
||||||
* state potentially stored on OSError instances.
|
|
||||||
*/
|
|
||||||
va_start(vargs, format);
|
|
||||||
msg_prefix = PyUnicode_FromFormatV(format, vargs);
|
|
||||||
va_end(vargs);
|
|
||||||
if (msg_prefix == NULL) {
|
|
||||||
Py_DECREF(exc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)",
|
|
||||||
msg_prefix, Py_TYPE(exc)->tp_name, exc);
|
|
||||||
Py_DECREF(msg_prefix);
|
|
||||||
PyObject *new_exc = PyErr_GetRaisedException();
|
|
||||||
PyException_SetCause(new_exc, exc);
|
|
||||||
PyErr_SetRaisedException(new_exc);
|
|
||||||
return new_exc;
|
|
||||||
}
|
|
||||||
|
|
|
@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
|
||||||
return codec_getstreamcodec(encoding, stream, errors, 3);
|
return codec_getstreamcodec(encoding, stream, errors, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Helper that tries to ensure the reported exception chain indicates the
|
|
||||||
* codec that was invoked to trigger the failure without changing the type
|
|
||||||
* of the exception raised.
|
|
||||||
*/
|
|
||||||
static void
|
static void
|
||||||
wrap_codec_error(const char *operation,
|
add_note_to_codec_error(const char *operation,
|
||||||
const char *encoding)
|
const char *encoding)
|
||||||
{
|
{
|
||||||
/* TrySetFromCause will replace the active exception with a suitably
|
PyObject *exc = PyErr_GetRaisedException();
|
||||||
* updated clone if it can, otherwise it will leave the original
|
if (exc == NULL) {
|
||||||
* exception alone.
|
return;
|
||||||
*/
|
}
|
||||||
_PyErr_TrySetFromCause("%s with '%s' codec failed",
|
PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed",
|
||||||
operation, encoding);
|
operation, encoding);
|
||||||
|
if (note == NULL) {
|
||||||
|
_PyErr_ChainExceptions1(exc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int res = _PyException_AddNote(exc, note);
|
||||||
|
Py_DECREF(note);
|
||||||
|
if (res < 0) {
|
||||||
|
_PyErr_ChainExceptions1(exc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
PyErr_SetRaisedException(exc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode an object (e.g. a Unicode object) using the given encoding
|
/* Encode an object (e.g. a Unicode object) using the given encoding
|
||||||
|
@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object,
|
||||||
|
|
||||||
result = PyObject_Call(encoder, args, NULL);
|
result = PyObject_Call(encoder, args, NULL);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
wrap_codec_error("encoding", encoding);
|
add_note_to_codec_error("encoding", encoding);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object,
|
||||||
|
|
||||||
result = PyObject_Call(decoder, args, NULL);
|
result = PyObject_Call(decoder, args, NULL);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
wrap_codec_error("decoding", encoding);
|
add_note_to_codec_error("decoding", encoding);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
if (!PyTuple_Check(result) ||
|
if (!PyTuple_Check(result) ||
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue