[3.12] gh-121188: Sanitize invalid XML characters in regrtest (GH-121195) (#121205)

gh-121188: Sanitize invalid XML characters in regrtest (GH-121195)

When creating the JUnit XML file, regrtest now escapes characters
which are invalid in XML, such as the chr(27) control character used
in ANSI escape sequences.
(cherry picked from commit af8c3d7a26)

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Miss Islington (bot) 2024-07-01 10:47:38 +02:00 committed by GitHub
parent 99bc8589f0
commit b80edafff2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 91 additions and 5 deletions

View file

@ -21,6 +21,8 @@ import sysconfig
import tempfile
import textwrap
import unittest
from xml.etree import ElementTree
from test import support
from test.support import os_helper
from test.libregrtest import cmdline
@ -2221,6 +2223,44 @@ class ArgsTestCase(BaseTestCase):
self.check_executed_tests(output, testname, stats=1, parallel=True)
self.assertNotIn('SPAM SPAM SPAM', output)
def test_xml(self):
code = textwrap.dedent(r"""
import unittest
from test import support
class VerboseTests(unittest.TestCase):
def test_failed(self):
print("abc \x1b def")
self.fail()
""")
testname = self.create_test(code=code)
# Run sequentially
filename = os_helper.TESTFN
self.addCleanup(os_helper.unlink, filename)
output = self.run_tests(testname, "--junit-xml", filename,
exitcode=EXITCODE_BAD_TEST)
self.check_executed_tests(output, testname,
failed=testname,
stats=TestStats(1, 1, 0))
# Test generated XML
with open(filename, encoding="utf8") as fp:
content = fp.read()
testsuite = ElementTree.fromstring(content)
self.assertEqual(int(testsuite.get('tests')), 1)
self.assertEqual(int(testsuite.get('errors')), 0)
self.assertEqual(int(testsuite.get('failures')), 1)
testcase = testsuite[0][0]
self.assertEqual(testcase.get('status'), 'run')
self.assertEqual(testcase.get('result'), 'completed')
self.assertGreater(float(testcase.get('time')), 0)
for out in testcase.iter('system-out'):
self.assertEqual(out.text, r"abc \x1b def")
class TestUtils(unittest.TestCase):
def test_format_duration(self):
@ -2403,6 +2443,25 @@ class TestUtils(unittest.TestCase):
self.assertTrue(match_test(test_chdir))
self.assertFalse(match_test(test_copy))
def test_sanitize_xml(self):
sanitize_xml = utils.sanitize_xml
# escape invalid XML characters
self.assertEqual(sanitize_xml('abc \x1b\x1f def'),
r'abc \x1b\x1f def')
self.assertEqual(sanitize_xml('nul:\x00, bell:\x07'),
r'nul:\x00, bell:\x07')
self.assertEqual(sanitize_xml('surrogate:\uDC80'),
r'surrogate:\udc80')
self.assertEqual(sanitize_xml('illegal \uFFFE and \uFFFF'),
r'illegal \ufffe and \uffff')
# no escape for valid XML characters
self.assertEqual(sanitize_xml('a\n\tb'),
'a\n\tb')
self.assertEqual(sanitize_xml('valid t\xe9xt \u20ac'),
'valid t\xe9xt \u20ac')
if __name__ == '__main__':
unittest.main()