gh-111178: fix UBSan failures in Modules/pyexpat.c (GH-129789)

Fix UBSan failures for `xmlparseobject`, XML handler setters
Suppress unused return values

Rewrite `RC_HANDLER` with better semantics
This commit is contained in:
Bénédikt Tran 2025-02-24 11:56:32 +01:00 committed by GitHub
parent ef29104f7d
commit c070abadb2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -92,12 +92,14 @@ typedef struct {
PyObject **handlers;
} xmlparseobject;
#define xmlparseobject_CAST(op) ((xmlparseobject *)(op))
#include "clinic/pyexpat.c.h"
#define CHARACTER_DATA_BUFFER_SIZE 8192
typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
typedef void* xmlhandler;
typedef const void *xmlhandler;
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
struct HandlerInfo {
const char *name;
@ -108,6 +110,12 @@ struct HandlerInfo {
static struct HandlerInfo handler_info[64];
#define CALL_XML_HANDLER_SETTER(HANDLER_INFO, XML_PARSER, XML_HANDLER) \
do { \
xmlhandlersetter setter = (xmlhandlersetter)(HANDLER_INFO).setter; \
setter((XML_PARSER), (XML_HANDLER)); \
} while (0)
/* Set an integer attribute on the error object; return true on success,
* false on an exception.
*/
@ -312,7 +320,7 @@ flush_character_buffer(xmlparseobject *self)
static void
my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
{
xmlparseobject *self = (xmlparseobject *) userData;
xmlparseobject *self = xmlparseobject_CAST(userData);
if (PyErr_Occurred())
return;
@ -345,7 +353,7 @@ static void
my_StartElementHandler(void *userData,
const XML_Char *name, const XML_Char *atts[])
{
xmlparseobject *self = (xmlparseobject *)userData;
xmlparseobject *self = xmlparseobject_CAST(userData);
if (have_handler(self, StartElement)) {
PyObject *container, *rv, *args;
@ -430,45 +438,57 @@ my_StartElementHandler(void *userData,
}
}
#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
RETURN, GETUSERDATA) \
static RC \
my_##NAME##Handler PARAMS {\
xmlparseobject *self = GETUSERDATA ; \
PyObject *args = NULL; \
PyObject *rv = NULL; \
#define RC_HANDLER(RETURN_TYPE, NAME, PARAMS, \
INIT, PARSE_FORMAT, CONVERSION, \
RETURN_VARIABLE, GETUSERDATA) \
static RETURN_TYPE \
my_ ## NAME ## Handler PARAMS { \
xmlparseobject *self = GETUSERDATA; \
INIT \
\
if (have_handler(self, NAME)) { \
if (PyErr_Occurred()) \
return RETURN; \
if (flush_character_buffer(self) < 0) \
return RETURN; \
args = Py_BuildValue PARAM_FORMAT ;\
if (!args) { flag_error(self); return RETURN;} \
if (!have_handler(self, NAME)) { \
return RETURN_VARIABLE; \
} \
if (PyErr_Occurred()) { \
return RETURN_VARIABLE; \
} \
if (flush_character_buffer(self) < 0) { \
return RETURN_VARIABLE; \
} \
PyObject *args = Py_BuildValue PARSE_FORMAT; \
if (args == NULL) { \
flag_error(self); \
return RETURN_VARIABLE; \
} \
self->in_callback = 1; \
rv = call_with_frame(#NAME,__LINE__, \
PyObject *rv = call_with_frame( \
#NAME, __LINE__, \
self->handlers[NAME], args, self); \
self->in_callback = 0; \
Py_DECREF(args); \
if (rv == NULL) { \
flag_error(self); \
return RETURN; \
return RETURN_VARIABLE; \
} \
CONVERSION \
Py_DECREF(rv); \
} \
return RETURN; \
return RETURN_VARIABLE; \
}
#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
(xmlparseobject *)userData)
RC_HANDLER( \
void, NAME, PARAMS, \
/* no init */, PARAM_FORMAT, \
/* no conversion */, /* no return var */, \
xmlparseobject_CAST(userData) \
)
#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
RC_HANDLER( \
int, NAME, PARAMS, \
int rc = 0;, PARAM_FORMAT, \
rc = PyLong_AsLong(rv);, rc, \
(xmlparseobject *)userData)
xmlparseobject_CAST(userData) \
)
VOID_HANDLER(EndElement,
(void *userData, const XML_Char *name),
@ -549,7 +569,7 @@ my_ElementDeclHandler(void *userData,
const XML_Char *name,
XML_Content *model)
{
xmlparseobject *self = (xmlparseobject *)userData;
xmlparseobject *self = xmlparseobject_CAST(userData);
PyObject *args = NULL;
if (have_handler(self, ElementDecl)) {
@ -979,8 +999,6 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
{
xmlparseobject *new_parser;
int i;
pyexpat_state *state = PyType_GetModuleState(cls);
new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
@ -1015,6 +1033,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
XML_SetUserData(new_parser->itself, (void *)new_parser);
/* allocate and clear handlers first */
size_t i;
for (i = 0; handler_info[i].name != NULL; i++)
/* do nothing */;
@ -1026,12 +1045,12 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
clear_handlers(new_parser, 1);
/* then copy handlers from self */
for (i = 0; handler_info[i].name != NULL; i++) {
for (size_t i = 0; handler_info[i].name != NULL; i++) {
PyObject *handler = self->handlers[i];
if (handler != NULL) {
new_parser->handlers[i] = Py_NewRef(handler);
handler_info[i].setter(new_parser->itself,
handler_info[i].handler);
struct HandlerInfo info = handler_info[i];
CALL_XML_HANDLER_SETTER(info, new_parser->itself, info.handler);
}
}
@ -1240,30 +1259,34 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
}
static int
xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
{
for (int i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(op->handlers[i]);
xmlparseobject *self = xmlparseobject_CAST(op);
for (size_t i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(self->handlers[i]);
}
Py_VISIT(Py_TYPE(op));
return 0;
}
static int
xmlparse_clear(xmlparseobject *op)
xmlparse_clear(PyObject *op)
{
clear_handlers(op, 0);
Py_CLEAR(op->intern);
xmlparseobject *self = xmlparseobject_CAST(op);
clear_handlers(self, 0);
Py_CLEAR(self->intern);
return 0;
}
static void
xmlparse_dealloc(xmlparseobject *self)
xmlparse_dealloc(PyObject *op)
{
xmlparseobject *self = xmlparseobject_CAST(op);
PyObject_GC_UnTrack(self);
(void)xmlparse_clear(self);
if (self->itself != NULL)
(void)xmlparse_clear(op);
if (self->itself != NULL) {
XML_ParserFree(self->itself);
}
self->itself = NULL;
if (self->handlers != NULL) {
@ -1281,19 +1304,24 @@ xmlparse_dealloc(xmlparseobject *self)
static PyObject *
xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
xmlparse_handler_getter(PyObject *op, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info);
PyObject *result = self->handlers[handlernum];
if (result == NULL)
if (result == NULL) {
result = Py_None;
}
return Py_NewRef(result);
}
static int
xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
{
xmlparseobject *self = xmlparseobject_CAST(op);
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
int handlernum = (int)(hi - handler_info);
if (v == NULL) {
@ -1323,8 +1351,9 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *h
otherwise would, but that's really an odd case. A more
elaborate system of handlers and state could remove the
C handler more effectively. */
if (handlernum == CharacterData && self->in_callback)
if (handlernum == CharacterData && self->in_callback) {
c_handler = noop_character_data_handler;
}
v = NULL;
}
else if (v != NULL) {
@ -1332,15 +1361,16 @@ xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *h
c_handler = handler_info[handlernum].handler;
}
Py_XSETREF(self->handlers[handlernum], v);
handler_info[handlernum].setter(self->itself, c_handler);
CALL_XML_HANDLER_SETTER(handler_info[handlernum], self->itself, c_handler);
return 0;
}
#define INT_GETTER(name) \
static PyObject * \
xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
xmlparse_##name##_getter(PyObject *op, void *Py_UNUSED(closure)) \
{ \
return PyLong_FromLong((long) XML_Get##name(self->itself)); \
xmlparseobject *self = xmlparseobject_CAST(op); \
return PyLong_FromLong((long)XML_Get##name(self->itself)); \
}
INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)
@ -1353,21 +1383,24 @@ INT_GETTER(CurrentByteIndex)
#undef INT_GETTER
static PyObject *
xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
xmlparse_buffer_text_getter(PyObject *op, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->buffer != NULL);
}
static int
xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
xmlparse_buffer_text_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
}
int b = PyObject_IsTrue(v);
if (b < 0)
if (b < 0) {
return -1;
}
if (b) {
if (self->buffer == NULL) {
self->buffer = PyMem_Malloc(self->buffer_size);
@ -1379,8 +1412,9 @@ xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
}
}
else if (self->buffer != NULL) {
if (flush_character_buffer(self) < 0)
if (flush_character_buffer(self) < 0) {
return -1;
}
PyMem_Free(self->buffer);
self->buffer = NULL;
}
@ -1388,14 +1422,16 @@ xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
}
static PyObject *
xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
xmlparse_buffer_size_getter(PyObject *op, void *Py_UNUSED(closure))
{
return PyLong_FromLong((long) self->buffer_size);
xmlparseobject *self = xmlparseobject_CAST(op);
return PyLong_FromLong(self->buffer_size);
}
static int
xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
xmlparse_buffer_size_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
@ -1408,8 +1444,9 @@ xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
new_buffer_size = PyLong_AsLong(v);
if (new_buffer_size <= 0) {
if (!PyErr_Occurred())
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
}
return -1;
}
@ -1444,68 +1481,78 @@ xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
}
static PyObject *
xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
xmlparse_buffer_used_getter(PyObject *op, void *Py_UNUSED(closure))
{
return PyLong_FromLong((long) self->buffer_used);
xmlparseobject *self = xmlparseobject_CAST(op);
return PyLong_FromLong(self->buffer_used);
}
static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
xmlparse_namespace_prefixes_getter(PyObject *op, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->ns_prefixes);
}
static int
xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
xmlparse_namespace_prefixes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
}
int b = PyObject_IsTrue(v);
if (b < 0)
if (b < 0) {
return -1;
}
self->ns_prefixes = b;
XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
return 0;
}
static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
xmlparse_ordered_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->ordered_attributes);
}
static int
xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
xmlparse_ordered_attributes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
}
int b = PyObject_IsTrue(v);
if (b < 0)
if (b < 0) {
return -1;
}
self->ordered_attributes = b;
return 0;
}
static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
xmlparse_specified_attributes_getter(PyObject *op, void *Py_UNUSED(closure))
{
return PyBool_FromLong((long) self->specified_attributes);
xmlparseobject *self = xmlparseobject_CAST(op);
return PyBool_FromLong(self->specified_attributes);
}
static int
xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
xmlparse_specified_attributes_setter(PyObject *op, PyObject *v, void *Py_UNUSED(closure))
{
xmlparseobject *self = xmlparseobject_CAST(op);
if (v == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
return -1;
}
int b = PyObject_IsTrue(v);
if (b < 0)
if (b < 0) {
return -1;
}
self->specified_attributes = b;
return 0;
}
@ -1516,10 +1563,10 @@ static PyMemberDef xmlparse_members[] = {
};
#define XMLPARSE_GETTER_DEF(name) \
{#name, (getter)xmlparse_##name##_getter, NULL, NULL},
{#name, xmlparse_##name##_getter, NULL, NULL},
#define XMLPARSE_GETTER_SETTER_DEF(name) \
{#name, (getter)xmlparse_##name##_getter, \
(setter)xmlparse_##name##_setter, NULL},
{#name, xmlparse_##name##_getter, \
xmlparse_##name##_setter, NULL},
static PyGetSetDef xmlparse_getsetlist[] = {
XMLPARSE_GETTER_DEF(ErrorCode)
@ -1655,8 +1702,8 @@ static int init_handler_descrs(pyexpat_state *state)
for (i = 0; handler_info[i].name != NULL; i++) {
struct HandlerInfo *hi = &handler_info[i];
hi->getset.name = hi->name;
hi->getset.get = (getter)xmlparse_handler_getter;
hi->getset.set = (setter)xmlparse_handler_setter;
hi->getset.get = xmlparse_handler_getter;
hi->getset.set = xmlparse_handler_setter;
hi->getset.closure = &handler_info[i];
PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
@ -2120,7 +2167,7 @@ pyexpat_clear(PyObject *module)
static void
pyexpat_free(void *module)
{
pyexpat_clear((PyObject *)module);
(void)pyexpat_clear((PyObject *)module);
}
static PyModuleDef_Slot pyexpat_slots[] = {
@ -2151,22 +2198,24 @@ PyInit_pyexpat(void)
static void
clear_handlers(xmlparseobject *self, int initial)
{
int i = 0;
for (; handler_info[i].name != NULL; i++) {
if (initial)
for (size_t i = 0; handler_info[i].name != NULL; i++) {
if (initial) {
self->handlers[i] = NULL;
}
else {
Py_CLEAR(self->handlers[i]);
handler_info[i].setter(self->itself, NULL);
CALL_XML_HANDLER_SETTER(handler_info[i], self->itself, NULL);
}
}
}
static struct HandlerInfo handler_info[] = {
// The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \
{#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
{#name, (xmlhandlersetter)XML_Set##name, my_##name},
HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler)