[3.12] gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) (GH-115623) (GH-116248)

Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods:

- `xml.etree.ElementTree.XMLParser.flush`
- `xml.etree.ElementTree.XMLPullParser.flush`
- `xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`
- `xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`
- `xml.sax.expatreader.ExpatParser.flush`

Based on the "flush" idea from https://github.com/python/cpython/pull/115138#issuecomment-1932444270 .

- Please treat as a security fix related to CVE-2023-52425.

(cherry picked from commit 6a95676bb5)
(cherry picked from commit 73807eb634)
(cherry picked from commit eda2963378)

---------

Includes code suggested-by: Snild Dolkow <snild@sony.com>
and by core dev Serhiy Storchaka.
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Sebastian Pipping 2024-03-06 23:01:45 +01:00 committed by GitHub
parent 2528e46470
commit 0a01ed6c2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 439 additions and 21 deletions

View file

@ -121,10 +121,6 @@ ATTLIST_XML = """\
</foo>
"""
fails_with_expat_2_6_0 = (unittest.expectedFailure
if pyexpat.version_info >= (2, 6, 0) else
lambda test: test)
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@ -1382,12 +1378,14 @@ class ElementTreeTest(unittest.TestCase):
class XMLPullParserTest(unittest.TestCase):
def _feed(self, parser, data, chunk_size=None):
def _feed(self, parser, data, chunk_size=None, flush=False):
if chunk_size is None:
parser.feed(data)
else:
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
if flush:
parser.flush()
def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
@ -1405,34 +1403,32 @@ class XMLPullParserTest(unittest.TestCase):
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
def test_simple_xml(self, chunk_size=None):
def test_simple_xml(self, chunk_size=None, flush=False):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
self._feed(parser, "<!-- comment -->\n", chunk_size)
self._feed(parser, "<!-- comment -->\n", chunk_size, flush)
self.assert_event_tags(parser, [])
self._feed(parser,
"<root>\n <element key='value'>text</element",
chunk_size)
chunk_size, flush)
self.assert_event_tags(parser, [])
self._feed(parser, ">\n", chunk_size)
self._feed(parser, ">\n", chunk_size, flush)
self.assert_event_tags(parser, [('end', 'element')])
self._feed(parser, "<element>text</element>tail\n", chunk_size)
self._feed(parser, "<empty-element/>\n", chunk_size)
self._feed(parser, "<element>text</element>tail\n", chunk_size, flush)
self._feed(parser, "<empty-element/>\n", chunk_size, flush)
self.assert_event_tags(parser, [
('end', 'element'),
('end', 'empty-element'),
])
self._feed(parser, "</root>\n", chunk_size)
self._feed(parser, "</root>\n", chunk_size, flush)
self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())
@fails_with_expat_2_6_0
def test_simple_xml_chunk_1(self):
self.test_simple_xml(chunk_size=1)
self.test_simple_xml(chunk_size=1, flush=True)
@fails_with_expat_2_6_0
def test_simple_xml_chunk_5(self):
self.test_simple_xml(chunk_size=5)
self.test_simple_xml(chunk_size=5, flush=True)
def test_simple_xml_chunk_22(self):
self.test_simple_xml(chunk_size=22)
@ -1631,6 +1627,57 @@ class XMLPullParserTest(unittest.TestCase):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
def test_flush_reparse_deferral_enabled(self):
if pyexpat.version_info < (2, 6, 0):
self.skipTest(f'Expat {pyexpat.version_info} does not '
'support reparse deferral')
parser = ET.XMLPullParser(events=('start', 'end'))
for chunk in ("<doc", ">"):
parser.feed(chunk)
self.assert_event_tags(parser, []) # i.e. no elements started
if ET is pyET:
self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
parser.flush()
self.assert_event_tags(parser, [('start', 'doc')])
if ET is pyET:
self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
parser.feed("</doc>")
parser.close()
self.assert_event_tags(parser, [('end', 'doc')])
def test_flush_reparse_deferral_disabled(self):
parser = ET.XMLPullParser(events=('start', 'end'))
for chunk in ("<doc", ">"):
parser.feed(chunk)
if pyexpat.version_info >= (2, 6, 0):
if not ET is pyET:
self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled '
'methods not available in C')
parser._parser._parser.SetReparseDeferralEnabled(False)
self.assert_event_tags(parser, []) # i.e. no elements started
if ET is pyET:
self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
parser.flush()
self.assert_event_tags(parser, [('start', 'doc')])
if ET is pyET:
self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
parser.feed("</doc>")
parser.close()
self.assert_event_tags(parser, [('end', 'doc')])
#
# xinclude tests (samples from appendix C of the xinclude specification)