gh-121188: Sanitize invalid XML characters in regrtest (#121195)

When creating the JUnit XML file, regrtest now escapes characters
which are invalid in XML, such as the chr(27) control character used
in ANSI escape sequences.
This commit is contained in:
Victor Stinner 2024-07-01 10:30:33 +02:00 committed by GitHub
parent f80376b129
commit af8c3d7a26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 91 additions and 5 deletions

View file

@ -21,6 +21,8 @@ import sysconfig
import tempfile
import textwrap
import unittest
from xml.etree import ElementTree
from test import support
from test.support import import_helper
from test.support import os_helper
@ -2254,6 +2256,44 @@ class ArgsTestCase(BaseTestCase):
self.check_executed_tests(output, testname, stats=1, parallel=True)
self.assertNotIn('SPAM SPAM SPAM', output)
def test_xml(self):
code = textwrap.dedent(r"""
import unittest
from test import support
class VerboseTests(unittest.TestCase):
def test_failed(self):
print("abc \x1b def")
self.fail()
""")
testname = self.create_test(code=code)
# Run sequentially
filename = os_helper.TESTFN
self.addCleanup(os_helper.unlink, filename)
output = self.run_tests(testname, "--junit-xml", filename,
exitcode=EXITCODE_BAD_TEST)
self.check_executed_tests(output, testname,
failed=testname,
stats=TestStats(1, 1, 0))
# Test generated XML
with open(filename, encoding="utf8") as fp:
content = fp.read()
testsuite = ElementTree.fromstring(content)
self.assertEqual(int(testsuite.get('tests')), 1)
self.assertEqual(int(testsuite.get('errors')), 0)
self.assertEqual(int(testsuite.get('failures')), 1)
testcase = testsuite[0][0]
self.assertEqual(testcase.get('status'), 'run')
self.assertEqual(testcase.get('result'), 'completed')
self.assertGreater(float(testcase.get('time')), 0)
for out in testcase.iter('system-out'):
self.assertEqual(out.text, r"abc \x1b def")
class TestUtils(unittest.TestCase):
def test_format_duration(self):
@ -2437,6 +2477,25 @@ class TestUtils(unittest.TestCase):
self.assertTrue(match_test(test_chdir))
self.assertFalse(match_test(test_copy))
def test_sanitize_xml(self):
sanitize_xml = utils.sanitize_xml
# escape invalid XML characters
self.assertEqual(sanitize_xml('abc \x1b\x1f def'),
r'abc \x1b\x1f def')
self.assertEqual(sanitize_xml('nul:\x00, bell:\x07'),
r'nul:\x00, bell:\x07')
self.assertEqual(sanitize_xml('surrogate:\uDC80'),
r'surrogate:\udc80')
self.assertEqual(sanitize_xml('illegal \uFFFE and \uFFFF'),
r'illegal \ufffe and \uffff')
# no escape for valid XML characters
self.assertEqual(sanitize_xml('a\n\tb'),
'a\n\tb')
self.assertEqual(sanitize_xml('valid t\xe9xt \u20ac'),
'valid t\xe9xt \u20ac')
if __name__ == '__main__':
unittest.main()