gh-111178: fix UBSan failures in Modules/_sre/sre.c (GH-128250)

fix UBSan failures for `PatternObject`, `MatchObject`, `TemplateObject`, `ScannerObject`
This commit is contained in:
Bénédikt Tran 2025-01-07 12:47:18 +01:00 committed by GitHub
parent bcdf654c8a
commit a734c1e304
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -395,6 +395,11 @@ static struct PyModuleDef sremodule;
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
#define _PatternObject_CAST(op) ((PatternObject *)(op))
#define _MatchObject_CAST(op) ((MatchObject *)(op))
#define _TemplateObject_CAST(op) ((TemplateObject *)(op))
#define _ScannerObject_CAST(op) ((ScannerObject *)(op))
/*[clinic input] /*[clinic input]
module _sre module _sre
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
@ -699,8 +704,9 @@ pattern_error(Py_ssize_t status)
} }
static int static int
pattern_traverse(PatternObject *self, visitproc visit, void *arg) pattern_traverse(PyObject *op, visitproc visit, void *arg)
{ {
PatternObject *self = _PatternObject_CAST(op);
Py_VISIT(Py_TYPE(self)); Py_VISIT(Py_TYPE(self));
Py_VISIT(self->groupindex); Py_VISIT(self->groupindex);
Py_VISIT(self->indexgroup); Py_VISIT(self->indexgroup);
@ -712,8 +718,9 @@ pattern_traverse(PatternObject *self, visitproc visit, void *arg)
} }
static int static int
pattern_clear(PatternObject *self) pattern_clear(PyObject *op)
{ {
PatternObject *self = _PatternObject_CAST(op);
Py_CLEAR(self->groupindex); Py_CLEAR(self->groupindex);
Py_CLEAR(self->indexgroup); Py_CLEAR(self->indexgroup);
Py_CLEAR(self->pattern); Py_CLEAR(self->pattern);
@ -724,13 +731,13 @@ pattern_clear(PatternObject *self)
} }
static void static void
pattern_dealloc(PatternObject* self) pattern_dealloc(PyObject *self)
{ {
PyTypeObject *tp = Py_TYPE(self); PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
if (self->weakreflist != NULL) { PatternObject *obj = _PatternObject_CAST(self);
PyObject_ClearWeakRefs((PyObject *) self); if (obj->weakreflist != NULL) {
PyObject_ClearWeakRefs(self);
} }
(void)pattern_clear(self); (void)pattern_clear(self);
tp->tp_free(self); tp->tp_free(self);
@ -1497,7 +1504,7 @@ _sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
#endif /* Py_DEBUG */ #endif /* Py_DEBUG */
static PyObject * static PyObject *
pattern_repr(PatternObject *obj) pattern_repr(PyObject *self)
{ {
static const struct { static const struct {
const char *name; const char *name;
@ -1512,6 +1519,8 @@ pattern_repr(PatternObject *obj)
{"re.DEBUG", SRE_FLAG_DEBUG}, {"re.DEBUG", SRE_FLAG_DEBUG},
{"re.ASCII", SRE_FLAG_ASCII}, {"re.ASCII", SRE_FLAG_ASCII},
}; };
PatternObject *obj = _PatternObject_CAST(self);
PyObject *result = NULL; PyObject *result = NULL;
PyObject *flag_items; PyObject *flag_items;
size_t i; size_t i;
@ -1579,8 +1588,9 @@ PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
/* PatternObject's 'groupindex' method. */ /* PatternObject's 'groupindex' method. */
static PyObject * static PyObject *
pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) pattern_groupindex(PyObject *op, void *Py_UNUSED(ignored))
{ {
PatternObject *self = _PatternObject_CAST(op);
if (self->groupindex == NULL) if (self->groupindex == NULL)
return PyDict_New(); return PyDict_New();
return PyDictProxy_New(self->groupindex); return PyDictProxy_New(self->groupindex);
@ -2245,8 +2255,9 @@ _validate(PatternObject *self)
/* match methods */ /* match methods */
static int static int
match_traverse(MatchObject *self, visitproc visit, void *arg) match_traverse(PyObject *op, visitproc visit, void *arg)
{ {
MatchObject *self = _MatchObject_CAST(op);
Py_VISIT(Py_TYPE(self)); Py_VISIT(Py_TYPE(self));
Py_VISIT(self->string); Py_VISIT(self->string);
Py_VISIT(self->regs); Py_VISIT(self->regs);
@ -2255,8 +2266,9 @@ match_traverse(MatchObject *self, visitproc visit, void *arg)
} }
static int static int
match_clear(MatchObject *self) match_clear(PyObject *op)
{ {
MatchObject *self = _MatchObject_CAST(op);
Py_CLEAR(self->string); Py_CLEAR(self->string);
Py_CLEAR(self->regs); Py_CLEAR(self->regs);
Py_CLEAR(self->pattern); Py_CLEAR(self->pattern);
@ -2264,10 +2276,9 @@ match_clear(MatchObject *self)
} }
static void static void
match_dealloc(MatchObject* self) match_dealloc(PyObject *self)
{ {
PyTypeObject *tp = Py_TYPE(self); PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
(void)match_clear(self); (void)match_clear(self);
tp->tp_free(self); tp->tp_free(self);
@ -2376,8 +2387,9 @@ _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
} }
static PyObject* static PyObject*
match_group(MatchObject* self, PyObject* args) match_group(PyObject *op, PyObject* args)
{ {
MatchObject *self = _MatchObject_CAST(op);
PyObject* result; PyObject* result;
Py_ssize_t i, size; Py_ssize_t i, size;
@ -2411,8 +2423,9 @@ match_group(MatchObject* self, PyObject* args)
} }
static PyObject* static PyObject*
match_getitem(MatchObject* self, PyObject* name) match_getitem(PyObject *op, PyObject* name)
{ {
MatchObject *self = _MatchObject_CAST(op);
return match_getslice(self, name, Py_None); return match_getslice(self, name, Py_None);
} }
@ -2654,16 +2667,18 @@ PyDoc_STRVAR(match_group_doc,
For 0 returns the entire match."); For 0 returns the entire match.");
static PyObject * static PyObject *
match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) match_lastindex_get(PyObject *op, void *Py_UNUSED(ignored))
{ {
MatchObject *self = _MatchObject_CAST(op);
if (self->lastindex >= 0) if (self->lastindex >= 0)
return PyLong_FromSsize_t(self->lastindex); return PyLong_FromSsize_t(self->lastindex);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
static PyObject * static PyObject *
match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) match_lastgroup_get(PyObject *op, void *Py_UNUSED(ignored))
{ {
MatchObject *self = _MatchObject_CAST(op);
if (self->pattern->indexgroup && if (self->pattern->indexgroup &&
self->lastindex >= 0 && self->lastindex >= 0 &&
self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
@ -2676,8 +2691,9 @@ match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
} }
static PyObject * static PyObject *
match_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) match_regs_get(PyObject *op, void *Py_UNUSED(ignored))
{ {
MatchObject *self = _MatchObject_CAST(op);
if (self->regs) { if (self->regs) {
return Py_NewRef(self->regs); return Py_NewRef(self->regs);
} else } else
@ -2780,27 +2796,29 @@ pattern_new_match(_sremodulestate* module_state,
/* scanner methods (experimental) */ /* scanner methods (experimental) */
static int static int
scanner_traverse(ScannerObject *self, visitproc visit, void *arg) scanner_traverse(PyObject *op, visitproc visit, void *arg)
{ {
ScannerObject *self = _ScannerObject_CAST(op);
Py_VISIT(Py_TYPE(self)); Py_VISIT(Py_TYPE(self));
Py_VISIT(self->pattern); Py_VISIT(self->pattern);
return 0; return 0;
} }
static int static int
scanner_clear(ScannerObject *self) scanner_clear(PyObject *op)
{ {
ScannerObject *self = _ScannerObject_CAST(op);
Py_CLEAR(self->pattern); Py_CLEAR(self->pattern);
return 0; return 0;
} }
static void static void
scanner_dealloc(ScannerObject* self) scanner_dealloc(PyObject *self)
{ {
PyTypeObject *tp = Py_TYPE(self); PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
state_fini(&self->state); ScannerObject *scanner = _ScannerObject_CAST(self);
state_fini(&scanner->state);
(void)scanner_clear(self); (void)scanner_clear(self);
tp->tp_free(self); tp->tp_free(self);
Py_DECREF(tp); Py_DECREF(tp);
@ -2957,8 +2975,9 @@ pattern_scanner(_sremodulestate *module_state,
/* template methods */ /* template methods */
static int static int
template_traverse(TemplateObject *self, visitproc visit, void *arg) template_traverse(PyObject *op, visitproc visit, void *arg)
{ {
TemplateObject *self = _TemplateObject_CAST(op);
Py_VISIT(Py_TYPE(self)); Py_VISIT(Py_TYPE(self));
Py_VISIT(self->literal); Py_VISIT(self->literal);
for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) { for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
@ -2968,8 +2987,9 @@ template_traverse(TemplateObject *self, visitproc visit, void *arg)
} }
static int static int
template_clear(TemplateObject *self) template_clear(PyObject *op)
{ {
TemplateObject *self = _TemplateObject_CAST(op);
Py_CLEAR(self->literal); Py_CLEAR(self->literal);
for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) { for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
Py_CLEAR(self->items[i].literal); Py_CLEAR(self->items[i].literal);
@ -2978,10 +2998,9 @@ template_clear(TemplateObject *self)
} }
static void static void
template_dealloc(TemplateObject *self) template_dealloc(PyObject *self)
{ {
PyTypeObject *tp = Py_TYPE(self); PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
(void)template_clear(self); (void)template_clear(self);
tp->tp_free(self); tp->tp_free(self);
@ -3056,8 +3075,10 @@ cleanup:
static Py_hash_t static Py_hash_t
pattern_hash(PatternObject *self) pattern_hash(PyObject *op)
{ {
PatternObject *self = _PatternObject_CAST(op);
Py_hash_t hash, hash2; Py_hash_t hash, hash2;
hash = PyObject_Hash(self->pattern); hash = PyObject_Hash(self->pattern);
@ -3148,7 +3169,7 @@ static PyMethodDef pattern_methods[] = {
}; };
static PyGetSetDef pattern_getset[] = { static PyGetSetDef pattern_getset[] = {
{"groupindex", (getter)pattern_groupindex, (setter)NULL, {"groupindex", pattern_groupindex, NULL,
"A dictionary mapping group names to group numbers."}, "A dictionary mapping group names to group numbers."},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
@ -3166,9 +3187,9 @@ static PyMemberDef pattern_members[] = {
}; };
static PyType_Slot pattern_slots[] = { static PyType_Slot pattern_slots[] = {
{Py_tp_dealloc, (destructor)pattern_dealloc}, {Py_tp_dealloc, pattern_dealloc},
{Py_tp_repr, (reprfunc)pattern_repr}, {Py_tp_repr, pattern_repr},
{Py_tp_hash, (hashfunc)pattern_hash}, {Py_tp_hash, pattern_hash},
{Py_tp_doc, (void *)pattern_doc}, {Py_tp_doc, (void *)pattern_doc},
{Py_tp_richcompare, pattern_richcompare}, {Py_tp_richcompare, pattern_richcompare},
{Py_tp_methods, pattern_methods}, {Py_tp_methods, pattern_methods},
@ -3189,7 +3210,7 @@ static PyType_Spec pattern_spec = {
}; };
static PyMethodDef match_methods[] = { static PyMethodDef match_methods[] = {
{"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, {"group", match_group, METH_VARARGS, match_group_doc},
_SRE_SRE_MATCH_START_METHODDEF _SRE_SRE_MATCH_START_METHODDEF
_SRE_SRE_MATCH_END_METHODDEF _SRE_SRE_MATCH_END_METHODDEF
_SRE_SRE_MATCH_SPAN_METHODDEF _SRE_SRE_MATCH_SPAN_METHODDEF
@ -3204,11 +3225,11 @@ static PyMethodDef match_methods[] = {
}; };
static PyGetSetDef match_getset[] = { static PyGetSetDef match_getset[] = {
{"lastindex", (getter)match_lastindex_get, (setter)NULL, {"lastindex", match_lastindex_get, NULL,
"The integer index of the last matched capturing group."}, "The integer index of the last matched capturing group."},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL, {"lastgroup", match_lastgroup_get, NULL,
"The name of the last matched capturing group."}, "The name of the last matched capturing group."},
{"regs", (getter)match_regs_get, (setter)NULL}, {"regs", match_regs_get, NULL, NULL},
{NULL} {NULL}
}; };