gh-127691: add type checks when using PyUnicodeError objects (GH-127694)

This commit is contained in:
Bénédikt Tran 2024-12-13 17:16:22 +01:00 committed by GitHub
parent 6446408d42
commit 8bc18182a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 162 additions and 73 deletions

View file

@ -1045,6 +1045,12 @@ New features
* Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling * Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling
deferred reference counting, as outlined in :pep:`703`. deferred reference counting, as outlined in :pep:`703`.
* The :ref:`Unicode Exception Objects <unicodeexceptions>` C API
now raises a :exc:`TypeError` if its exception argument is not
a :exc:`UnicodeError` object.
(Contributed by Bénédikt Tran in :gh:`127691`.)
Porting to Python 3.14 Porting to Python 3.14
---------------------- ----------------------

View file

@ -0,0 +1,3 @@
The :ref:`Unicode Exception Objects <unicodeexceptions>` C API
now raises a :exc:`TypeError` if its exception argument is not
a :exc:`UnicodeError` object. Patch by Bénédikt Tran.

View file

@ -2668,7 +2668,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error."); "Unicode related error.");
static PyObject * static PyObject *
get_string(PyObject *attr, const char *name) get_bytes(PyObject *attr, const char *name)
{ {
if (!attr) { if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
@ -2748,40 +2748,74 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
return end; return end;
} }
PyObject * #define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR))
PyUnicodeEncodeError_GetEncoding(PyObject *exc) #define PyUnicodeError_Check(PTR) \
PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError)
#define PyUnicodeError_CAST(PTR) \
(assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR))
static inline int
check_unicode_error_type(PyObject *self, const char *expect_type)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); if (!PyUnicodeError_Check(self)) {
PyErr_Format(PyExc_TypeError,
"expecting a %s object, got %T", expect_type, self);
return -1;
}
return 0;
}
static inline PyUnicodeErrorObject *
as_unicode_error(PyObject *self, const char *expect_type)
{
int rc = check_unicode_error_type(self, expect_type);
return rc < 0 ? NULL : _PyUnicodeError_CAST(self);
} }
PyObject * PyObject *
PyUnicodeDecodeError_GetEncoding(PyObject *exc) PyUnicodeEncodeError_GetEncoding(PyObject *self)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
} }
PyObject * PyObject *
PyUnicodeEncodeError_GetObject(PyObject *exc) PyUnicodeDecodeError_GetEncoding(PyObject *self)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
} }
PyObject * PyObject *
PyUnicodeDecodeError_GetObject(PyObject *exc) PyUnicodeEncodeError_GetObject(PyObject *self)
{ {
return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
return exc == NULL ? NULL : get_unicode(exc->object, "object");
} }
PyObject * PyObject *
PyUnicodeTranslateError_GetObject(PyObject *exc) PyUnicodeDecodeError_GetObject(PyObject *self)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
return exc == NULL ? NULL : get_bytes(exc->object, "object");
}
PyObject *
PyUnicodeTranslateError_GetObject(PyObject *self)
{
PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
return exc == NULL ? NULL : get_unicode(exc->object, "object");
} }
int int
PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
if (exc == NULL) {
return -1;
}
PyObject *obj = get_unicode(exc->object, "object"); PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) { if (obj == NULL) {
return -1; return -1;
@ -2796,8 +2830,11 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
int int
PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
PyObject *obj = get_string(exc->object, "object"); if (exc == NULL) {
return -1;
}
PyObject *obj = get_bytes(exc->object, "object");
if (obj == NULL) { if (obj == NULL) {
return -1; return -1;
} }
@ -2809,45 +2846,63 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
int int
PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start)
{ {
return PyUnicodeEncodeError_GetStart(exc, start); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
if (exc == NULL) {
return -1;
}
PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) {
return -1;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
Py_DECREF(obj);
*start = unicode_error_adjust_start(exc->start, size);
return 0;
} }
static inline int static inline int
unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
{ {
((PyUnicodeErrorObject *)self)->start = start; PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
exc->start = start;
return 0; return 0;
} }
int int
PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start)
{ {
return unicode_error_set_start_impl(exc, start); int rc = check_unicode_error_type(self, "UnicodeEncodeError");
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
} }
int int
PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start)
{ {
return unicode_error_set_start_impl(exc, start); int rc = check_unicode_error_type(self, "UnicodeDecodeError");
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
} }
int int
PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start)
{ {
return unicode_error_set_start_impl(exc, start); int rc = check_unicode_error_type(self, "UnicodeTranslateError");
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
} }
int int
PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
if (exc == NULL) {
return -1;
}
PyObject *obj = get_unicode(exc->object, "object"); PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) { if (obj == NULL) {
return -1; return -1;
@ -2862,8 +2917,11 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
int int
PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
PyObject *obj = get_string(exc->object, "object"); if (exc == NULL) {
return -1;
}
PyObject *obj = get_bytes(exc->object, "object");
if (obj == NULL) { if (obj == NULL) {
return -1; return -1;
} }
@ -2875,108 +2933,130 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
int int
PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end)
{ {
return PyUnicodeEncodeError_GetEnd(exc, end); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
if (exc == NULL) {
return -1;
}
PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) {
return -1;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
Py_DECREF(obj);
*end = unicode_error_adjust_end(exc->end, size);
return 0;
} }
static inline int static inline int
unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
{ {
((PyUnicodeErrorObject *)exc)->end = end; PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
exc->end = end;
return 0; return 0;
} }
int int
PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end)
{ {
return unicode_error_set_end_impl(exc, end); int rc = check_unicode_error_type(self, "UnicodeEncodeError");
return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
} }
int int
PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end)
{ {
return unicode_error_set_end_impl(exc, end); int rc = check_unicode_error_type(self, "UnicodeDecodeError");
return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
} }
int int
PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end)
{ {
return unicode_error_set_end_impl(exc, end); int rc = check_unicode_error_type(self, "UnicodeTranslateError");
} return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
PyObject *
PyUnicodeEncodeError_GetReason(PyObject *exc)
{
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
} }
PyObject * PyObject *
PyUnicodeDecodeError_GetReason(PyObject *exc) PyUnicodeEncodeError_GetReason(PyObject *self)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
} }
PyObject * PyObject *
PyUnicodeTranslateError_GetReason(PyObject *exc) PyUnicodeDecodeError_GetReason(PyObject *self)
{ {
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
}
PyObject *
PyUnicodeTranslateError_GetReason(PyObject *self)
{
PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
} }
int int
PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason)
{ {
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
reason); return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
} }
int int
PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason)
{ {
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
reason); return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
} }
int int
PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason)
{ {
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
reason); return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
} }
static int static int
UnicodeError_clear(PyUnicodeErrorObject *self) UnicodeError_clear(PyObject *self)
{ {
Py_CLEAR(self->encoding); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_CLEAR(self->object); Py_CLEAR(exc->encoding);
Py_CLEAR(self->reason); Py_CLEAR(exc->object);
Py_CLEAR(exc->reason);
return BaseException_clear((PyBaseExceptionObject *)self); return BaseException_clear((PyBaseExceptionObject *)self);
} }
static void static void
UnicodeError_dealloc(PyUnicodeErrorObject *self) UnicodeError_dealloc(PyObject *self)
{ {
PyTypeObject *type = Py_TYPE(self);
_PyObject_GC_UNTRACK(self); _PyObject_GC_UNTRACK(self);
UnicodeError_clear(self); (void)UnicodeError_clear(self);
Py_TYPE(self)->tp_free((PyObject *)self); type->tp_free(self);
} }
static int static int
UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg) UnicodeError_traverse(PyObject *self, visitproc visit, void *arg)
{ {
Py_VISIT(self->encoding); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_VISIT(self->object); Py_VISIT(exc->encoding);
Py_VISIT(self->reason); Py_VISIT(exc->object);
Py_VISIT(exc->reason);
return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
} }
@ -3015,7 +3095,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
return -1; return -1;
} }
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->encoding, Py_NewRef(encoding));
Py_XSETREF(exc->object, Py_NewRef(object)); Py_XSETREF(exc->object, Py_NewRef(object));
exc->start = start; exc->start = start;
@ -3027,7 +3107,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeEncodeError_str(PyObject *self) UnicodeEncodeError_str(PyObject *self)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;
PyObject *encoding_str = NULL; PyObject *encoding_str = NULL;
@ -3135,7 +3215,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
} }
} }
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->encoding, Py_NewRef(encoding));
Py_XSETREF(exc->object, object /* already a strong reference */); Py_XSETREF(exc->object, object /* already a strong reference */);
exc->start = start; exc->start = start;
@ -3147,7 +3227,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeDecodeError_str(PyObject *self) UnicodeDecodeError_str(PyObject *self)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;
PyObject *encoding_str = NULL; PyObject *encoding_str = NULL;
@ -3236,7 +3316,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
return -1; return -1;
} }
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->object, Py_NewRef(object)); Py_XSETREF(exc->object, Py_NewRef(object));
exc->start = start; exc->start = start;
exc->end = end; exc->end = end;
@ -3248,7 +3328,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeTranslateError_str(PyObject *self) UnicodeTranslateError_str(PyObject *self)
{ {
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL; PyObject *result = NULL;
PyObject *reason_str = NULL; PyObject *reason_str = NULL;