gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) (GH-115623)

Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods:

- `xml.etree.ElementTree.XMLParser.flush`
- `xml.etree.ElementTree.XMLPullParser.flush`
- `xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`
- `xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`
- `xml.sax.expatreader.ExpatParser.flush`

Based on the "flush" idea from https://github.com/python/cpython/pull/115138#issuecomment-1932444270 .

### Notes

- Please treat as a security fix related to CVE-2023-52425.

Includes code suggested-by: Snild Dolkow <snild@sony.com>
and by core dev Serhiy Storchaka.
This commit is contained in:
Sebastian Pipping 2024-02-29 23:52:50 +01:00 committed by GitHub
parent d01886c5c9
commit 6a95676bb5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 435 additions and 21 deletions

View file

@ -19,6 +19,7 @@ from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
import os.path
import pyexpat
import shutil
import sys
from urllib.error import URLError
@ -1214,6 +1215,56 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
def test_flush_reparse_deferral_enabled(self):
if pyexpat.version_info < (2, 6, 0):
self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral')
result = BytesIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)
for chunk in ("<doc", ">"):
parser.feed(chunk)
self.assertEqual(result.getvalue(), start) # i.e. no elements started
self.assertTrue(parser._parser.GetReparseDeferralEnabled())
parser.flush()
self.assertTrue(parser._parser.GetReparseDeferralEnabled())
self.assertEqual(result.getvalue(), start + b"<doc>")
parser.feed("</doc>")
parser.close()
self.assertEqual(result.getvalue(), start + b"<doc></doc>")
def test_flush_reparse_deferral_disabled(self):
result = BytesIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)
for chunk in ("<doc", ">"):
parser.feed(chunk)
if pyexpat.version_info >= (2, 6, 0):
parser._parser.SetReparseDeferralEnabled(False)
self.assertEqual(result.getvalue(), start) # i.e. no elements started
self.assertFalse(parser._parser.GetReparseDeferralEnabled())
parser.flush()
self.assertFalse(parser._parser.GetReparseDeferralEnabled())
self.assertEqual(result.getvalue(), start + b"<doc>")
parser.feed("</doc>")
parser.close()
self.assertEqual(result.getvalue(), start + b"<doc></doc>")
# ===== Locator support
def test_expat_locator_noinfo(self):