gh-111178: fix UBSan failures in Modules/pyexpat.c (GH-129789)

Fix UBSan failures for `xmlparseobject`, XML handler setters
Suppress unused return values

Rewrite `RC_HANDLER` with better semantics
This commit is contained in:
Bénédikt Tran 2025-02-24 11:56:32 +01:00 committed by GitHub
parent ef29104f7d
commit c070abadb2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -92,12 +92,14 @@ typedef struct {
PyObject **handlers; PyObject **handlers;
} xmlparseobject; } xmlparseobject;
#define xmlparseobject_CAST(op) ((xmlparseobject *)(op))
#include "clinic/pyexpat.c.h" #include "clinic/pyexpat.c.h"
#define CHARACTER_DATA_BUFFER_SIZE 8192 #define CHARACTER_DATA_BUFFER_SIZE 8192
typedef void (*xmlhandlersetter)(XML_Parser self, void *meth); typedef const void *xmlhandler;
typedef void* xmlhandler; typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
struct HandlerInfo { struct HandlerInfo {
const char *name; const char *name;
@ -108,6 +110,12 @@ struct HandlerInfo {
static struct HandlerInfo handler_info[64]; static struct HandlerInfo handler_info[64];
#define CALL_XML_HANDLER_SETTER(HANDLER_INFO, XML_PARSER, XML_HANDLER) \
do { \
xmlhandlersetter setter = (xmlhandlersetter)(HANDLER_INFO).setter; \
setter((XML_PARSER), (XML_HANDLER)); \
} while (0)
/* Set an integer attribute on the error object; return true on success, /* Set an integer attribute on the error object; return true on success,
* false on an exception. * false on an exception.
*/ */
@ -312,7 +320,7 @@ flush_character_buffer(xmlparseobject *self)
static void static void
my_CharacterDataHandler(void *userData, const XML_Char *data, int len) my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
{ {
xmlparseobject *self = (xmlparseobject *) userData; xmlparseobject *self = xmlparseobject_CAST(userData);
if (PyErr_Occurred()) if (PyErr_Occurred())
return; return;
@ -345,7 +353,7 @@ static void
my_StartElementHandler(void *userData, my_StartElementHandler(void *userData,
const XML_Char *name, const XML_Char *atts[]) const XML_Char *name, const XML_Char *atts[])
{ {
xmlparseobject *self = (xmlparseobject *)userData; xmlparseobject *self = xmlparseobject_CAST(userData);
if (have_handler(self, StartElement)) { if (have_handler(self, StartElement)) {
PyObject *container, *rv, *args; PyObject *container, *rv, *args;
@ -430,45 +438,57 @@ my_StartElementHandler(void *userData,
} }
} }
#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \ #define RC_HANDLER(RETURN_TYPE, NAME, PARAMS, \
RETURN, GETUSERDATA) \ INIT, PARSE_FORMAT, CONVERSION, \
static RC \ RETURN_VARIABLE, GETUSERDATA) \
my_##NAME##Handler PARAMS {\ static RETURN_TYPE \
xmlparseobject *self = GETUSERDATA ; \ my_ ## NAME ## Handler PARAMS { \
PyObject *args = NULL; \ xmlparseobject *self = GETUSERDATA; \
PyObject *rv = NULL; \
INIT \ INIT \
\ if (!have_handler(self, NAME)) { \
if (have_handler(self, NAME)) { \ return RETURN_VARIABLE; \
if (PyErr_Occurred()) \ } \
return RETURN; \ if (PyErr_Occurred()) { \
if (flush_character_buffer(self) < 0) \ return RETURN_VARIABLE; \
return RETURN; \ } \
args = Py_BuildValue PARAM_FORMAT ;\ if (flush_character_buffer(self) < 0) { \
if (!args) { flag_error(self); return RETURN;} \ return RETURN_VARIABLE; \
} \
PyObject *args = Py_BuildValue PARSE_FORMAT; \
if (args == NULL) { \
flag_error(self); \
return RETURN_VARIABLE; \
} \
self->in_callback = 1; \ self->in_callback = 1; \
rv = call_with_frame(#NAME,__LINE__, \ PyObject *rv = call_with_frame( \
#NAME, __LINE__, \
self->handlers[NAME], args, self); \ self->handlers[NAME], args, self); \
self->in_callback = 0; \ self->in_callback = 0; \
Py_DECREF(args); \ Py_DECREF(args); \
if (rv == NULL) { \ if (rv == NULL) { \
flag_error(self); \ flag_error(self); \
return RETURN; \ return RETURN_VARIABLE; \
} \ } \
CONVERSION \ CONVERSION \
Py_DECREF(rv); \ Py_DECREF(rv); \
} \ return RETURN_VARIABLE; \
return RETURN; \
} }
#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \ #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\ RC_HANDLER( \
(xmlparseobject *)userData) void, NAME, PARAMS, \
/* no init */, PARAM_FORMAT, \
/* no conversion */, /* no return var */, \
xmlparseobject_CAST(userData) \
)
#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\ #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \ RC_HANDLER( \
int, NAME, PARAMS, \
int rc = 0;, PARAM_FORMAT, \
rc = PyLong_AsLong(rv);, rc, \ rc = PyLong_AsLong(rv);, rc, \
(xmlparseobject *)userData) xmlparseobject_CAST(userData) \
)
VOID_HANDLER(EndElement, VOID_HANDLER(EndElement,
(void *userData, const XML_Char *name), (void *userData, const XML_Char *name),
@ -549,7 +569,7 @@ my_ElementDeclHandler(void *userData,
const XML_Char *name, const XML_Char *name,
XML_Content *model) XML_Content *model)
{ {
xmlparseobject *self = (xmlparseobject *)userData; xmlparseobject *self = xmlparseobject_CAST(userData);
PyObject *args = NULL; PyObject *args = NULL;
if (have_handler(self, ElementDecl)) { if (have_handler(self, ElementDecl)) {
@ -979,8 +999,6 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/ /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
{ {
xmlparseobject *new_parser; xmlparseobject *new_parser;
int i;
pyexpat_state *state = PyType_GetModuleState(cls); pyexpat_state *state = PyType_GetModuleState(cls);
new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type); new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
@ -1015,6 +1033,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
XML_SetUserData(new_parser->itself, (void *)new_parser); XML_SetUserData(new_parser->itself, (void *)new_parser);
/* allocate and clear handlers first */ /* allocate and clear handlers first */
size_t i;
for (i = 0; handler_info[i].name != NULL; i++) for (i = 0; handler_info[i].name != NULL; i++)
/* do nothing */; /* do nothing */;
@ -1026,12 +1045,12 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
clear_handlers(new_parser, 1); clear_handlers(new_parser, 1);
/* then copy handlers from self */ /* then copy handlers from self */
for (i = 0; handler_info[i].name != NULL; i++) { for (size_t i = 0; handler_info[i].name != NULL; i++) {
PyObject *handler = self->handlers[i]; PyObject *handler = self->handlers[i];
if (handler != NULL) { if (handler != NULL) {
new_parser->handlers[i] = Py_NewRef(handler); new_parser->handlers[i] = Py_NewRef(handler);
handler_info[i].setter(new_parser->itself, struct HandlerInfo info = handler_info[i];
handler_info[i].handler); CALL_XML_HANDLER_SETTER(info, new_parser->itself, info.handler);
} }
} }
@ -1240,30 +1259,34 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
} }
static int static int
xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
{ {
for (int i = 0; handler_info[i].name != NULL; i++) { xmlparseobject *self = xmlparseobject_CAST(op);
Py_VISIT(op->handlers[i]); for (size_t i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(self->handlers[i]);
} }
Py_VISIT(Py_TYPE(op)); Py_VISIT(Py_TYPE(op));
return 0; return 0;
} }
static int static int
xmlparse_clear(xmlparseobject *op) xmlparse_clear(PyObject *op)
{ {
clear_handlers(op, 0); xmlparseobject *self = xmlparseobject_CAST(op);
Py_CLEAR(op->intern); clear_handlers(self, 0);
Py_CLEAR(self->intern);
return 0; return 0;
} }
static void static void
xmlparse_dealloc(xmlparseobject *self) xmlparse_dealloc(PyObject *op)
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
(void)xmlparse_clear(self); (void)xmlparse_clear(op);
if (self->itself != NULL) if (self->itself != NULL) {
XML_ParserFree(self->itself); XML_ParserFree(self->itself);
}
self->itself = NULL; self->itself = NULL;
if (self->handlers != NULL) { if (self->handlers != NULL) {
@ -1281,19 +1304,24 @@ xmlparse_dealloc(xmlparseobject *self)
static PyObject * static PyObject *
xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi) xmlparse_handler_getter(PyObject *op, void *closure)
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info)); assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info); int handlernum = (int)(hi - handler_info);
PyObject *result = self->handlers[handlernum]; PyObject *result = self->handlers[handlernum];
if (result == NULL) if (result == NULL) {
result = Py_None; result = Py_None;
}
return Py_NewRef(result); return Py_NewRef(result);
} }
static int static int
xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi) xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info)); assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info); int handlernum = (int)(hi - handler_info);
if (v == NULL) { if (v == NULL) {
@ -1323,8 +1351,9 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *h
otherwise would, but that's really an odd case. A more otherwise would, but that's really an odd case. A more
elaborate system of handlers and state could remove the elaborate system of handlers and state could remove the
C handler more effectively. */ C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) if (handlernum == CharacterData && self->in_callback) {
c_handler = noop_character_data_handler; c_handler = noop_character_data_handler;
}
v = NULL; v = NULL;
} }
else if (v != NULL) { else if (v != NULL) {
@ -1332,15 +1361,16 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *h
c_handler = handler_info[handlernum].handler; c_handler = handler_info[handlernum].handler;
} }
Py_XSETREF(self->handlers[handlernum], v); Py_XSETREF(self->handlers[handlernum], v);
handler_info[handlernum].setter(self->itself, c_handler); CALL_XML_HANDLER_SETTER(handler_info[handlernum], self->itself, c_handler);
return 0; return 0;
} }
#define INT_GETTER(name) \ #define INT_GETTER(name) \
static PyObject * \ static PyObject * \
xmlparse_##name##_getter(xmlparseobject *self, void *closure) \ xmlparse_##name##_getter(PyObject *op, void *Py_UNUSED(closure)) \
{ \ { \
return PyLong_FromLong((long) XML_Get##name(self->itself)); \ xmlparseobject *self = xmlparseobject_CAST(op); \
return PyLong_FromLong((long)XML_Get##name(self->itself)); \
} }
INT_GETTER(ErrorCode) INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber) INT_GETTER(ErrorLineNumber)
@ -1353,21 +1383,24 @@ INT_GETTER(CurrentByteIndex)
#undef INT_GETTER #undef INT_GETTER
static PyObject * static PyObject *
xmlparse_buffer_text_getter(xmlparseobject *self, void *closure) xmlparse_buffer_text_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->buffer != NULL); return PyBool_FromLong(self->buffer != NULL);
} }
static int static int
xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure) xmlparse_buffer_text_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1; return -1;
} }
int b = PyObject_IsTrue(v); int b = PyObject_IsTrue(v);
if (b < 0) if (b < 0) {
return -1; return -1;
}
if (b) { if (b) {
if (self->buffer == NULL) { if (self->buffer == NULL) {
self->buffer = PyMem_Malloc(self->buffer_size); self->buffer = PyMem_Malloc(self->buffer_size);
@ -1379,8 +1412,9 @@ xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
} }
} }
else if (self->buffer != NULL) { else if (self->buffer != NULL) {
if (flush_character_buffer(self) < 0) if (flush_character_buffer(self) < 0) {
return -1; return -1;
}
PyMem_Free(self->buffer); PyMem_Free(self->buffer);
self->buffer = NULL; self->buffer = NULL;
} }
@ -1388,14 +1422,16 @@ xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
} }
static PyObject * static PyObject *
xmlparse_buffer_size_getter(xmlparseobject *self, void *closure) xmlparse_buffer_size_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
return PyLong_FromLong((long) self->buffer_size); xmlparseobject *self = xmlparseobject_CAST(op);
return PyLong_FromLong(self->buffer_size);
} }
static int static int
xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure) xmlparse_buffer_size_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1; return -1;
@ -1408,8 +1444,9 @@ xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
new_buffer_size = PyLong_AsLong(v); new_buffer_size = PyLong_AsLong(v);
if (new_buffer_size <= 0) { if (new_buffer_size <= 0) {
if (!PyErr_Occurred()) if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero"); PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
}
return -1; return -1;
} }
@ -1444,68 +1481,78 @@ xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
} }
static PyObject * static PyObject *
xmlparse_buffer_used_getter(xmlparseobject *self, void *closure) xmlparse_buffer_used_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
return PyLong_FromLong((long) self->buffer_used); xmlparseobject *self = xmlparseobject_CAST(op);
return PyLong_FromLong(self->buffer_used);
} }
static PyObject * static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure) xmlparse_namespace_prefixes_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->ns_prefixes); return PyBool_FromLong(self->ns_prefixes);
} }
static int static int
xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure) xmlparse_namespace_prefixes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1; return -1;
} }
int b = PyObject_IsTrue(v); int b = PyObject_IsTrue(v);
if (b < 0) if (b < 0) {
return -1; return -1;
}
self->ns_prefixes = b; self->ns_prefixes = b;
XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
return 0; return 0;
} }
static PyObject * static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure) xmlparse_ordered_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->ordered_attributes); return PyBool_FromLong(self->ordered_attributes);
} }
static int static int
xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure) xmlparse_ordered_attributes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1; return -1;
} }
int b = PyObject_IsTrue(v); int b = PyObject_IsTrue(v);
if (b < 0) if (b < 0) {
return -1; return -1;
}
self->ordered_attributes = b; self->ordered_attributes = b;
return 0; return 0;
} }
static PyObject * static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure) xmlparse_specified_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
{ {
return PyBool_FromLong((long) self->specified_attributes); xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->specified_attributes);
} }
static int static int
xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure) xmlparse_specified_attributes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{ {
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) { if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1; return -1;
} }
int b = PyObject_IsTrue(v); int b = PyObject_IsTrue(v);
if (b < 0) if (b < 0) {
return -1; return -1;
}
self->specified_attributes = b; self->specified_attributes = b;
return 0; return 0;
} }
@ -1516,10 +1563,10 @@ static PyMemberDef xmlparse_members[] = {
}; };
#define XMLPARSE_GETTER_DEF(name) \ #define XMLPARSE_GETTER_DEF(name) \
{#name, (getter)xmlparse_##name##_getter, NULL, NULL}, {#name, xmlparse_##name##_getter, NULL, NULL},
#define XMLPARSE_GETTER_SETTER_DEF(name) \ #define XMLPARSE_GETTER_SETTER_DEF(name) \
{#name, (getter)xmlparse_##name##_getter, \ {#name, xmlparse_##name##_getter, \
(setter)xmlparse_##name##_setter, NULL}, xmlparse_##name##_setter, NULL},
static PyGetSetDef xmlparse_getsetlist[] = { static PyGetSetDef xmlparse_getsetlist[] = {
XMLPARSE_GETTER_DEF(ErrorCode) XMLPARSE_GETTER_DEF(ErrorCode)
@ -1655,8 +1702,8 @@ static int init_handler_descrs(pyexpat_state *state)
for (i = 0; handler_info[i].name != NULL; i++) { for (i = 0; handler_info[i].name != NULL; i++) {
struct HandlerInfo *hi = &handler_info[i]; struct HandlerInfo *hi = &handler_info[i];
hi->getset.name = hi->name; hi->getset.name = hi->name;
hi->getset.get = (getter)xmlparse_handler_getter; hi->getset.get = xmlparse_handler_getter;
hi->getset.set = (setter)xmlparse_handler_setter; hi->getset.set = xmlparse_handler_setter;
hi->getset.closure = &handler_info[i]; hi->getset.closure = &handler_info[i];
PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset); PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
@ -2120,7 +2167,7 @@ pyexpat_clear(PyObject *module)
static void static void
pyexpat_free(void *module) pyexpat_free(void *module)
{ {
pyexpat_clear((PyObject *)module); (void)pyexpat_clear((PyObject *)module);
} }
static PyModuleDef_Slot pyexpat_slots[] = { static PyModuleDef_Slot pyexpat_slots[] = {
@ -2151,22 +2198,24 @@ PyInit_pyexpat(void)
static void static void
clear_handlers(xmlparseobject *self, int initial) clear_handlers(xmlparseobject *self, int initial)
{ {
int i = 0; for (size_t i = 0; handler_info[i].name != NULL; i++) {
if (initial) {
for (; handler_info[i].name != NULL; i++) {
if (initial)
self->handlers[i] = NULL; self->handlers[i] = NULL;
}
else { else {
Py_CLEAR(self->handlers[i]); Py_CLEAR(self->handlers[i]);
handler_info[i].setter(self->itself, NULL); CALL_XML_HANDLER_SETTER(handler_info[i], self->itself, NULL);
} }
} }
} }
static struct HandlerInfo handler_info[] = { static struct HandlerInfo handler_info[] = {
// The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \ #define HANDLER_INFO(name) \
{#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name}, {#name, (xmlhandlersetter)XML_Set##name, my_##name},
HANDLER_INFO(StartElementHandler) HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler) HANDLER_INFO(EndElementHandler)