[3.13] gh-126033: fix UAF in xml.etree.ElementTree.Element.remove when concurrent mutations happen (GH-126124) (#131929)

gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124)
(cherry picked from commit bab1398a47)

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Miss Islington (bot) 2025-03-31 14:50:03 +02:00 committed by GitHub
parent 588bb6ddf4
commit b41c8cc671
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 214 additions and 37 deletions

View file

@ -18,9 +18,11 @@ import sys
import textwrap
import types
import unittest
import unittest.mock as mock
import warnings
import weakref
from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
@ -121,6 +123,21 @@ ATTLIST_XML = """\
</foo>
"""
def is_python_implementation():
assert ET is not None, "ET must be initialized"
assert pyET is not None, "pyET must be initialized"
return ET is pyET
def equal_wrapper(cls):
"""Mock cls.__eq__ to check whether it has been called or not.
The behaviour of cls.__eq__ (side-effects included) is left as is.
"""
eq = cls.__eq__
return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@ -2642,6 +2659,7 @@ class BasicElementTest(ElementTestCase, unittest.TestCase):
class BadElementTest(ElementTestCase, unittest.TestCase):
def test_extend_mutable_list(self):
class X:
@property
@ -2680,18 +2698,168 @@ class BadElementTest(ElementTestCase, unittest.TestCase):
e = ET.Element('foo')
e.extend(L)
def test_remove_with_mutating(self):
class X(ET.Element):
def __eq__(self, o):
del e[:]
return False
e = ET.Element('foo')
e.extend([X('bar')])
self.assertRaises(ValueError, e.remove, ET.Element('baz'))
def test_remove_with_clear_assume_missing(self):
# gh-126033: Check that a concurrent clear() for an assumed-to-be
# missing element does not make the interpreter crash.
self.do_test_remove_with_clear(raises=True)
e = ET.Element('foo')
e.extend([ET.Element('bar')])
self.assertRaises(ValueError, e.remove, X('baz'))
def test_remove_with_clear_assume_existing(self):
# gh-126033: Check that a concurrent clear() for an assumed-to-be
# existing element does not make the interpreter crash.
self.do_test_remove_with_clear(raises=False)
def do_test_remove_with_clear(self, *, raises):
# Until the discrepency between "del root[:]" and "root.clear()" is
# resolved, we need to keep two tests. Previously, using "del root[:]"
# did not crash with the reproducer of gh-126033 while "root.clear()"
# did.
class E(ET.Element):
"""Local class to be able to mock E.__eq__ for introspection."""
class X(E):
def __eq__(self, o):
del root[:]
return not raises
class Y(E):
def __eq__(self, o):
root.clear()
return not raises
if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext
self.assertIs(E.__eq__, object.__eq__)
for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
self.enterContext(self.subTest(side_effect=side_effect))
# test removing R() from [U()]
for R, U, description in [
(E, Z, "remove missing E() from [Z()]"),
(Z, E, "remove missing Z() from [E()]"),
(Z, Z, "remove missing Z() from [Z()]"),
]:
with self.subTest(description):
root = E('top')
root.extend([U('one')])
with get_checker_context():
root.remove(R('missing'))
# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))
# Test removing root[0] from [Z()].
#
# Since we call root.remove() with root[0], Z.__eq__()
# will not be called (we branch on the fast Py_EQ path).
with self.subTest("remove root[0] from [Z()]"):
root = E('top')
root.append(Z('rem'))
with equal_wrapper(E) as f, equal_wrapper(Z) as g:
root.remove(root[0])
f.assert_not_called()
g.assert_not_called()
# Test removing root[1] (of type R) from [U(), R()].
is_special = is_python_implementation() and raises and Z is Y
if is_python_implementation() and raises and Z is Y:
# In pure Python, using root.clear() sets the children
# list to [] without calling list.clear().
#
# For this reason, the call to root.remove() first
# checks root[0] and sets the children list to []
# since either root[0] or root[1] is an evil element.
#
# Since checking root[1] still uses the old reference
# to the children list, PyObject_RichCompareBool() branches
# to the fast Py_EQ path and Y.__eq__() is called exactly
# once (when checking root[0]).
continue
else:
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])
def test_remove_with_mutate_root_assume_missing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# missing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=True)
def test_remove_with_mutate_root_assume_existing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# existing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=False)
def do_test_remove_with_mutate_root(self, *, raises):
E = ET.Element
class Z(E):
def __eq__(self, o):
del root[0]
return not raises
if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext
# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))
# test removing root[1] (of type R) from [U(), R()]
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])
def cases_for_remove_missing_with_mutations(self, E, Z):
# Cases for removing R() from [U(), V()].
# The case U = V = R = E is not interesting as there is no mutation.
for U, V in [(E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {E.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield E, U, V, description
for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {Z.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield Z, U, V, description
def cases_for_remove_existing_with_mutations(self, E, Z):
# Cases for removing root[1] (of type R) from [U(), R()].
# The case U = R = E is not interesting as there is no mutation.
for U, R, description in [
(E, Z, "remove root[1] from [E(), Z()]"),
(Z, E, "remove root[1] from [Z(), E()]"),
(Z, Z, "remove root[1] from [Z(), Z()]"),
]:
description = (f"remove root[1] (of type {R.__name__}) "
f"from [{U.__name__}(), {R.__name__}()]")
yield R, U, description
@support.infinite_recursion(25)
def test_recursive_repr(self):

View file

@ -0,0 +1,3 @@
:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
<xml.etree.ElementTree.Element.remove>` when the element is
concurrently mutated. Patch by Bénédikt Tran.

View file

@ -847,6 +847,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0)
goto error;
// TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) {
@ -1625,42 +1626,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{
Py_ssize_t i;
int rc;
PyObject *found;
if (!self->extra) {
/* element has no children, so raise exception */
PyErr_SetString(
PyExc_ValueError,
"list.remove(x): x not in list"
);
return NULL;
}
for (i = 0; i < self->extra->length; i++) {
if (self->extra->children[i] == subelement)
// When iterating over the list of children, we need to check that the
// list is not cleared (self->extra != NULL) and that we are still within
// the correct bounds (i < self->extra->length).
//
// We deliberately avoid protecting against children lists that grow
// faster than the index since list objects do not protect against it.
int rc = 0;
for (i = 0; self->extra && i < self->extra->length; i++) {
if (self->extra->children[i] == subelement) {
rc = 1;
break;
rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
if (rc > 0)
break;
if (rc < 0)
}
PyObject *child = Py_NewRef(self->extra->children[i]);
rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
Py_DECREF(child);
if (rc < 0) {
return NULL;
}
else if (rc > 0) {
break;
}
}
if (i >= self->extra->length) {
/* subelement is not in children, so raise exception */
PyErr_SetString(
PyExc_ValueError,
"list.remove(x): x not in list"
);
if (rc == 0) {
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
found = self->extra->children[i];
// An extra check must be done if the mutation occurs at the very last
// step and removes or clears the 'extra' list (the condition on the
// length would not be satisfied any more).
if (self->extra == NULL || i >= self->extra->length) {
Py_RETURN_NONE;
}
PyObject *found = self->extra->children[i];
self->extra->length--;
for (; i < self->extra->length; i++)
for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1];
}
Py_DECREF(found);
Py_RETURN_NONE;