mirror of
https://github.com/python/cpython.git
synced 2025-11-01 18:51:43 +00:00
Close #1767933: Badly formed XML using etree and utf-16. Patch by Serhiy Storchaka, with some minor fixes by me
This commit is contained in:
parent
1191709b13
commit
00f402bfcb
3 changed files with 258 additions and 123 deletions
|
|
@ -21,7 +21,7 @@ import unittest
|
|||
import weakref
|
||||
|
||||
from test import support
|
||||
from test.support import findfile, import_fresh_module, gc_collect
|
||||
from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect
|
||||
|
||||
pyET = None
|
||||
ET = None
|
||||
|
|
@ -888,65 +888,6 @@ def check_encoding(encoding):
|
|||
"""
|
||||
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
||||
|
||||
def encoding():
|
||||
r"""
|
||||
Test encoding issues.
|
||||
|
||||
>>> elem = ET.Element("tag")
|
||||
>>> elem.text = "abc"
|
||||
>>> serialize(elem)
|
||||
'<tag>abc</tag>'
|
||||
>>> serialize(elem, encoding="utf-8")
|
||||
b'<tag>abc</tag>'
|
||||
>>> serialize(elem, encoding="us-ascii")
|
||||
b'<tag>abc</tag>'
|
||||
>>> serialize(elem, encoding="iso-8859-1")
|
||||
b"<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
|
||||
|
||||
>>> elem.text = "<&\"\'>"
|
||||
>>> serialize(elem)
|
||||
'<tag><&"\'></tag>'
|
||||
>>> serialize(elem, encoding="utf-8")
|
||||
b'<tag><&"\'></tag>'
|
||||
>>> serialize(elem, encoding="us-ascii") # cdata characters
|
||||
b'<tag><&"\'></tag>'
|
||||
>>> serialize(elem, encoding="iso-8859-1")
|
||||
b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
|
||||
|
||||
>>> elem.attrib["key"] = "<&\"\'>"
|
||||
>>> elem.text = None
|
||||
>>> serialize(elem)
|
||||
'<tag key="<&"\'>" />'
|
||||
>>> serialize(elem, encoding="utf-8")
|
||||
b'<tag key="<&"\'>" />'
|
||||
>>> serialize(elem, encoding="us-ascii")
|
||||
b'<tag key="<&"\'>" />'
|
||||
>>> serialize(elem, encoding="iso-8859-1")
|
||||
b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>" />'
|
||||
|
||||
>>> elem.text = '\xe5\xf6\xf6<>'
|
||||
>>> elem.attrib.clear()
|
||||
>>> serialize(elem)
|
||||
'<tag>\xe5\xf6\xf6<></tag>'
|
||||
>>> serialize(elem, encoding="utf-8")
|
||||
b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
|
||||
>>> serialize(elem, encoding="us-ascii")
|
||||
b'<tag>åöö<></tag>'
|
||||
>>> serialize(elem, encoding="iso-8859-1")
|
||||
b"<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
|
||||
|
||||
>>> elem.attrib["key"] = '\xe5\xf6\xf6<>'
|
||||
>>> elem.text = None
|
||||
>>> serialize(elem)
|
||||
'<tag key="\xe5\xf6\xf6<>" />'
|
||||
>>> serialize(elem, encoding="utf-8")
|
||||
b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />'
|
||||
>>> serialize(elem, encoding="us-ascii")
|
||||
b'<tag key="åöö<>" />'
|
||||
>>> serialize(elem, encoding="iso-8859-1")
|
||||
b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />'
|
||||
"""
|
||||
|
||||
def methods():
|
||||
r"""
|
||||
Test serialization methods.
|
||||
|
|
@ -2166,16 +2107,185 @@ class ElementSlicingTest(unittest.TestCase):
|
|||
self.assertEqual(self._subelem_tags(e), ['a1'])
|
||||
|
||||
|
||||
class StringIOTest(unittest.TestCase):
|
||||
class IOTest(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_encoding(self):
|
||||
# Test encoding issues.
|
||||
elem = ET.Element("tag")
|
||||
elem.text = "abc"
|
||||
self.assertEqual(serialize(elem), '<tag>abc</tag>')
|
||||
self.assertEqual(serialize(elem, encoding="utf-8"),
|
||||
b'<tag>abc</tag>')
|
||||
self.assertEqual(serialize(elem, encoding="us-ascii"),
|
||||
b'<tag>abc</tag>')
|
||||
for enc in ("iso-8859-1", "utf-16", "utf-32"):
|
||||
self.assertEqual(serialize(elem, encoding=enc),
|
||||
("<?xml version='1.0' encoding='%s'?>\n"
|
||||
"<tag>abc</tag>" % enc).encode(enc))
|
||||
|
||||
elem = ET.Element("tag")
|
||||
elem.text = "<&\"\'>"
|
||||
self.assertEqual(serialize(elem), '<tag><&"\'></tag>')
|
||||
self.assertEqual(serialize(elem, encoding="utf-8"),
|
||||
b'<tag><&"\'></tag>')
|
||||
self.assertEqual(serialize(elem, encoding="us-ascii"),
|
||||
b'<tag><&"\'></tag>')
|
||||
for enc in ("iso-8859-1", "utf-16", "utf-32"):
|
||||
self.assertEqual(serialize(elem, encoding=enc),
|
||||
("<?xml version='1.0' encoding='%s'?>\n"
|
||||
"<tag><&\"'></tag>" % enc).encode(enc))
|
||||
|
||||
elem = ET.Element("tag")
|
||||
elem.attrib["key"] = "<&\"\'>"
|
||||
self.assertEqual(serialize(elem), '<tag key="<&"\'>" />')
|
||||
self.assertEqual(serialize(elem, encoding="utf-8"),
|
||||
b'<tag key="<&"\'>" />')
|
||||
self.assertEqual(serialize(elem, encoding="us-ascii"),
|
||||
b'<tag key="<&"\'>" />')
|
||||
for enc in ("iso-8859-1", "utf-16", "utf-32"):
|
||||
self.assertEqual(serialize(elem, encoding=enc),
|
||||
("<?xml version='1.0' encoding='%s'?>\n"
|
||||
"<tag key=\"<&"'>\" />" % enc).encode(enc))
|
||||
|
||||
elem = ET.Element("tag")
|
||||
elem.text = '\xe5\xf6\xf6<>'
|
||||
self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>')
|
||||
self.assertEqual(serialize(elem, encoding="utf-8"),
|
||||
b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>')
|
||||
self.assertEqual(serialize(elem, encoding="us-ascii"),
|
||||
b'<tag>åöö<></tag>')
|
||||
for enc in ("iso-8859-1", "utf-16", "utf-32"):
|
||||
self.assertEqual(serialize(elem, encoding=enc),
|
||||
("<?xml version='1.0' encoding='%s'?>\n"
|
||||
"<tag>åöö<></tag>" % enc).encode(enc))
|
||||
|
||||
elem = ET.Element("tag")
|
||||
elem.attrib["key"] = '\xe5\xf6\xf6<>'
|
||||
self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />')
|
||||
self.assertEqual(serialize(elem, encoding="utf-8"),
|
||||
b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />')
|
||||
self.assertEqual(serialize(elem, encoding="us-ascii"),
|
||||
b'<tag key="åöö<>" />')
|
||||
for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
|
||||
self.assertEqual(serialize(elem, encoding=enc),
|
||||
("<?xml version='1.0' encoding='%s'?>\n"
|
||||
"<tag key=\"åöö<>\" />" % enc).encode(enc))
|
||||
|
||||
def test_write_to_filename(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
tree.write(TESTFN)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
self.assertEqual(f.read(), b'''<site />''')
|
||||
|
||||
def test_write_to_text_file(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
with open(TESTFN, 'w', encoding='utf-8') as f:
|
||||
tree.write(f, encoding='unicode')
|
||||
self.assertFalse(f.closed)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
self.assertEqual(f.read(), b'''<site />''')
|
||||
|
||||
def test_write_to_binary_file(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
with open(TESTFN, 'wb') as f:
|
||||
tree.write(f)
|
||||
self.assertFalse(f.closed)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
self.assertEqual(f.read(), b'''<site />''')
|
||||
|
||||
def test_write_to_binary_file_with_bom(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
# test BOM writing to buffered file
|
||||
with open(TESTFN, 'wb') as f:
|
||||
tree.write(f, encoding='utf-16')
|
||||
self.assertFalse(f.closed)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
self.assertEqual(f.read(),
|
||||
'''<?xml version='1.0' encoding='utf-16'?>\n'''
|
||||
'''<site />'''.encode("utf-16"))
|
||||
# test BOM writing to non-buffered file
|
||||
with open(TESTFN, 'wb', buffering=0) as f:
|
||||
tree.write(f, encoding='utf-16')
|
||||
self.assertFalse(f.closed)
|
||||
with open(TESTFN, 'rb') as f:
|
||||
self.assertEqual(f.read(),
|
||||
'''<?xml version='1.0' encoding='utf-16'?>\n'''
|
||||
'''<site />'''.encode("utf-16"))
|
||||
|
||||
def test_read_from_stringio(self):
|
||||
tree = ET.ElementTree()
|
||||
stream = io.StringIO()
|
||||
stream.write('''<?xml version="1.0"?><site></site>''')
|
||||
stream.seek(0)
|
||||
stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
|
||||
tree.parse(stream)
|
||||
|
||||
self.assertEqual(tree.getroot().tag, 'site')
|
||||
|
||||
def test_write_to_stringio(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
stream = io.StringIO()
|
||||
tree.write(stream, encoding='unicode')
|
||||
self.assertEqual(stream.getvalue(), '''<site />''')
|
||||
|
||||
def test_read_from_bytesio(self):
|
||||
tree = ET.ElementTree()
|
||||
raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
|
||||
tree.parse(raw)
|
||||
self.assertEqual(tree.getroot().tag, 'site')
|
||||
|
||||
def test_write_to_bytesio(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
raw = io.BytesIO()
|
||||
tree.write(raw)
|
||||
self.assertEqual(raw.getvalue(), b'''<site />''')
|
||||
|
||||
class dummy:
|
||||
pass
|
||||
|
||||
def test_read_from_user_text_reader(self):
|
||||
stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
|
||||
reader = self.dummy()
|
||||
reader.read = stream.read
|
||||
tree = ET.ElementTree()
|
||||
tree.parse(reader)
|
||||
self.assertEqual(tree.getroot().tag, 'site')
|
||||
|
||||
def test_write_to_user_text_writer(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
stream = io.StringIO()
|
||||
writer = self.dummy()
|
||||
writer.write = stream.write
|
||||
tree.write(writer, encoding='unicode')
|
||||
self.assertEqual(stream.getvalue(), '''<site />''')
|
||||
|
||||
def test_read_from_user_binary_reader(self):
|
||||
raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
|
||||
reader = self.dummy()
|
||||
reader.read = raw.read
|
||||
tree = ET.ElementTree()
|
||||
tree.parse(reader)
|
||||
self.assertEqual(tree.getroot().tag, 'site')
|
||||
tree = ET.ElementTree()
|
||||
|
||||
def test_write_to_user_binary_writer(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
raw = io.BytesIO()
|
||||
writer = self.dummy()
|
||||
writer.write = raw.write
|
||||
tree.write(writer)
|
||||
self.assertEqual(raw.getvalue(), b'''<site />''')
|
||||
|
||||
def test_write_to_user_binary_writer_with_bom(self):
|
||||
tree = ET.ElementTree(ET.XML('''<site />'''))
|
||||
raw = io.BytesIO()
|
||||
writer = self.dummy()
|
||||
writer.write = raw.write
|
||||
writer.seekable = lambda: True
|
||||
writer.tell = raw.tell
|
||||
tree.write(writer, encoding="utf-16")
|
||||
self.assertEqual(raw.getvalue(),
|
||||
'''<?xml version='1.0' encoding='utf-16'?>\n'''
|
||||
'''<site />'''.encode("utf-16"))
|
||||
|
||||
|
||||
class ParseErrorTest(unittest.TestCase):
|
||||
def test_subclass(self):
|
||||
|
|
@ -2299,7 +2409,7 @@ def test_main(module=None):
|
|||
test_classes = [
|
||||
ElementSlicingTest,
|
||||
BasicElementTest,
|
||||
StringIOTest,
|
||||
IOTest,
|
||||
ParseErrorTest,
|
||||
XincludeTest,
|
||||
ElementTreeTest,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue