Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format

Will backport.
This commit is contained in:
Neal Norwitz 2005-09-30 04:46:49 +00:00
parent aa93517de8
commit 484d9a409a
4 changed files with 19 additions and 1 deletions

View file

@ -889,6 +889,15 @@ def testEncodings():
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN")
# Verify that character decoding errors throw exceptions instead of crashing
try:
doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
except UnicodeDecodeError:
pass
else:
print 'parsing with bad encoding should raise a UnicodeDecodeError'
doc.unlink()
class UserDataHandler: