[3.13] gh-126033: fix UAF in xml.etree.ElementTree.Element.remove when concurrent mutations happen (GH-126124) (#131929)

gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124)
(cherry picked from commit bab1398a47)

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Miss Islington (bot) 2025-03-31 14:50:03 +02:00 committed by GitHub
parent 588bb6ddf4
commit b41c8cc671
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 214 additions and 37 deletions

View file

@ -18,9 +18,11 @@ import sys
import textwrap import textwrap
import types import types
import unittest import unittest
import unittest.mock as mock
import warnings import warnings
import weakref import weakref
from contextlib import nullcontext
from functools import partial from functools import partial
from itertools import product, islice from itertools import product, islice
from test import support from test import support
@ -121,6 +123,21 @@ ATTLIST_XML = """\
</foo> </foo>
""" """
def is_python_implementation():
assert ET is not None, "ET must be initialized"
assert pyET is not None, "pyET must be initialized"
return ET is pyET
def equal_wrapper(cls):
"""Mock cls.__eq__ to check whether it has been called or not.
The behaviour of cls.__eq__ (side-effects included) is left as is.
"""
eq = cls.__eq__
return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
def checkwarnings(*filters, quiet=False): def checkwarnings(*filters, quiet=False):
def decorator(test): def decorator(test):
def newtest(*args, **kwargs): def newtest(*args, **kwargs):
@ -2642,6 +2659,7 @@ class BasicElementTest(ElementTestCase, unittest.TestCase):
class BadElementTest(ElementTestCase, unittest.TestCase): class BadElementTest(ElementTestCase, unittest.TestCase):
def test_extend_mutable_list(self): def test_extend_mutable_list(self):
class X: class X:
@property @property
@ -2680,18 +2698,168 @@ class BadElementTest(ElementTestCase, unittest.TestCase):
e = ET.Element('foo') e = ET.Element('foo')
e.extend(L) e.extend(L)
def test_remove_with_mutating(self): def test_remove_with_clear_assume_missing(self):
class X(ET.Element): # gh-126033: Check that a concurrent clear() for an assumed-to-be
def __eq__(self, o): # missing element does not make the interpreter crash.
del e[:] self.do_test_remove_with_clear(raises=True)
return False
e = ET.Element('foo')
e.extend([X('bar')])
self.assertRaises(ValueError, e.remove, ET.Element('baz'))
e = ET.Element('foo') def test_remove_with_clear_assume_existing(self):
e.extend([ET.Element('bar')]) # gh-126033: Check that a concurrent clear() for an assumed-to-be
self.assertRaises(ValueError, e.remove, X('baz')) # existing element does not make the interpreter crash.
self.do_test_remove_with_clear(raises=False)
def do_test_remove_with_clear(self, *, raises):
# Until the discrepency between "del root[:]" and "root.clear()" is
# resolved, we need to keep two tests. Previously, using "del root[:]"
# did not crash with the reproducer of gh-126033 while "root.clear()"
# did.
class E(ET.Element):
"""Local class to be able to mock E.__eq__ for introspection."""
class X(E):
def __eq__(self, o):
del root[:]
return not raises
class Y(E):
def __eq__(self, o):
root.clear()
return not raises
if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext
self.assertIs(E.__eq__, object.__eq__)
for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
self.enterContext(self.subTest(side_effect=side_effect))
# test removing R() from [U()]
for R, U, description in [
(E, Z, "remove missing E() from [Z()]"),
(Z, E, "remove missing Z() from [E()]"),
(Z, Z, "remove missing Z() from [Z()]"),
]:
with self.subTest(description):
root = E('top')
root.extend([U('one')])
with get_checker_context():
root.remove(R('missing'))
# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))
# Test removing root[0] from [Z()].
#
# Since we call root.remove() with root[0], Z.__eq__()
# will not be called (we branch on the fast Py_EQ path).
with self.subTest("remove root[0] from [Z()]"):
root = E('top')
root.append(Z('rem'))
with equal_wrapper(E) as f, equal_wrapper(Z) as g:
root.remove(root[0])
f.assert_not_called()
g.assert_not_called()
# Test removing root[1] (of type R) from [U(), R()].
is_special = is_python_implementation() and raises and Z is Y
if is_python_implementation() and raises and Z is Y:
# In pure Python, using root.clear() sets the children
# list to [] without calling list.clear().
#
# For this reason, the call to root.remove() first
# checks root[0] and sets the children list to []
# since either root[0] or root[1] is an evil element.
#
# Since checking root[1] still uses the old reference
# to the children list, PyObject_RichCompareBool() branches
# to the fast Py_EQ path and Y.__eq__() is called exactly
# once (when checking root[0]).
continue
else:
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])
def test_remove_with_mutate_root_assume_missing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# missing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=True)
def test_remove_with_mutate_root_assume_existing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# existing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=False)
def do_test_remove_with_mutate_root(self, *, raises):
E = ET.Element
class Z(E):
def __eq__(self, o):
del root[0]
return not raises
if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext
# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))
# test removing root[1] (of type R) from [U(), R()]
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])
def cases_for_remove_missing_with_mutations(self, E, Z):
# Cases for removing R() from [U(), V()].
# The case U = V = R = E is not interesting as there is no mutation.
for U, V in [(E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {E.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield E, U, V, description
for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {Z.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield Z, U, V, description
def cases_for_remove_existing_with_mutations(self, E, Z):
# Cases for removing root[1] (of type R) from [U(), R()].
# The case U = R = E is not interesting as there is no mutation.
for U, R, description in [
(E, Z, "remove root[1] from [E(), Z()]"),
(Z, E, "remove root[1] from [Z(), E()]"),
(Z, Z, "remove root[1] from [Z(), Z()]"),
]:
description = (f"remove root[1] (of type {R.__name__}) "
f"from [{U.__name__}(), {R.__name__}()]")
yield R, U, description
@support.infinite_recursion(25) @support.infinite_recursion(25)
def test_recursive_repr(self): def test_recursive_repr(self):

View file

@ -0,0 +1,3 @@
:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
<xml.etree.ElementTree.Element.remove>` when the element is
concurrently mutated. Patch by Bénédikt Tran.

View file

@ -847,6 +847,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0) if (element_resize(element, self->extra->length) < 0)
goto error; goto error;
// TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) { for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo); PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) { if (!child || !Element_Check(st, child)) {
@ -1625,42 +1626,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{ {
Py_ssize_t i; Py_ssize_t i;
int rc; // When iterating over the list of children, we need to check that the
PyObject *found; // list is not cleared (self->extra != NULL) and that we are still within
// the correct bounds (i < self->extra->length).
if (!self->extra) { //
/* element has no children, so raise exception */ // We deliberately avoid protecting against children lists that grow
PyErr_SetString( // faster than the index since list objects do not protect against it.
PyExc_ValueError, int rc = 0;
"list.remove(x): x not in list" for (i = 0; self->extra && i < self->extra->length; i++) {
); if (self->extra->children[i] == subelement) {
return NULL; rc = 1;
}
for (i = 0; i < self->extra->length; i++) {
if (self->extra->children[i] == subelement)
break; break;
rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); }
if (rc > 0) PyObject *child = Py_NewRef(self->extra->children[i]);
rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
Py_DECREF(child);
if (rc < 0) {
return NULL;
}
else if (rc > 0) {
break; break;
if (rc < 0) }
}
if (rc == 0) {
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL; return NULL;
} }
if (i >= self->extra->length) { // An extra check must be done if the mutation occurs at the very last
/* subelement is not in children, so raise exception */ // step and removes or clears the 'extra' list (the condition on the
PyErr_SetString( // length would not be satisfied any more).
PyExc_ValueError, if (self->extra == NULL || i >= self->extra->length) {
"list.remove(x): x not in list" Py_RETURN_NONE;
);
return NULL;
} }
found = self->extra->children[i]; PyObject *found = self->extra->children[i];
self->extra->length--; self->extra->length--;
for (; i < self->extra->length; i++) for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1]; self->extra->children[i] = self->extra->children[i+1];
}
Py_DECREF(found); Py_DECREF(found);
Py_RETURN_NONE; Py_RETURN_NONE;