Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format Will backport.
2025-11-26 13:22:51 +00:00 · 2005-09-30 04:46:49 +00:00 · 2005-09-30 04:46:49 +00:00 · 484d9a409a
commit 484d9a409a
parent aa93517de8
4 changed files with 19 additions and 1 deletions
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@ -889,6 +889,15 @@ def testEncodings():
            and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
            and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
            "testEncodings - encoding EURO SIGN")
+
+    # Verify that character decoding errors throw exceptions instead of crashing
+    try:
+        doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
+    except UnicodeDecodeError:
+        pass
+    else:
+        print 'parsing with bad encoding should raise a UnicodeDecodeError'
+
    doc.unlink()

 class UserDataHandler: