Issue #8047: Fix the xml.etree serializer to return bytes by default.

Use ``encoding="unicode"`` to generate a Unicode string.
This commit is contained in:
Florent Xicluna 2010-08-08 19:48:29 +00:00
parent 1a0a737b13
commit c17f17294f
4 changed files with 73 additions and 57 deletions

View file

@ -71,14 +71,14 @@ def check_method(method):
if not hasattr(method, '__call__'):
print(method, "not callable")
def serialize(elem, to_string=True, **options):
def serialize(elem, to_string=True, encoding='unicode', **options):
import io
if options.get("encoding"):
if encoding != 'unicode':
file = io.BytesIO()
else:
file = io.StringIO()
tree = ET.ElementTree(elem)
tree.write(file, **options)
tree.write(file, encoding=encoding, **options)
if to_string:
return file.getvalue()
else:
@ -537,7 +537,7 @@ def attrib():
>>> elem.set('testa', 'testval')
>>> elem.set('testb', 'test2')
>>> ET.tostring(elem)
'<test testa="testval" testb="test2">aa</test>'
b'<test testa="testval" testb="test2">aa</test>'
>>> sorted(elem.keys())
['testa', 'testb']
>>> sorted(elem.items())
@ -547,7 +547,7 @@ def attrib():
>>> elem.attrib['testb'] = 'test1'
>>> elem.attrib['testc'] = 'test2'
>>> ET.tostring(elem)
'<test testa="testval" testb="test1" testc="test2">aa</test>'
b'<test testa="testval" testb="test1" testc="test2">aa</test>'
"""
def makeelement():
@ -587,7 +587,7 @@ def parsefile():
>>> tree = ET.parse(SIMPLE_XMLFILE)
>>> normalize_crlf(tree)
>>> tree.write(sys.stdout)
>>> tree.write(sys.stdout, encoding='unicode')
<root>
<element key="value">text</element>
<element>text</element>tail
@ -595,7 +595,7 @@ def parsefile():
</root>
>>> tree = ET.parse(SIMPLE_NS_XMLFILE)
>>> normalize_crlf(tree)
>>> tree.write(sys.stdout)
>>> tree.write(sys.stdout, encoding='unicode')
<ns0:root xmlns:ns0="namespace">
<ns0:element key="value">text</ns0:element>
<ns0:element>text</ns0:element>tail
@ -636,17 +636,17 @@ def parsefile():
def parseliteral():
"""
>>> element = ET.XML("<html><body>text</body></html>")
>>> ET.ElementTree(element).write(sys.stdout)
>>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
<html><body>text</body></html>
>>> element = ET.fromstring("<html><body>text</body></html>")
>>> ET.ElementTree(element).write(sys.stdout)
>>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
<html><body>text</body></html>
>>> sequence = ["<html><body>", "text</bo", "dy></html>"]
>>> element = ET.fromstringlist(sequence)
>>> print(ET.tostring(element))
<html><body>text</body></html>
>>> print("".join(ET.tostringlist(element)))
<html><body>text</body></html>
b'<html><body>text</body></html>'
>>> print(b"".join(ET.tostringlist(element)))
b'<html><body>text</body></html>'
>>> ET.tostring(element, "ascii")
b"<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
>>> _, ids = ET.XMLID("<html><body>text</body></html>")
@ -875,10 +875,10 @@ def writestring():
"""
>>> elem = ET.XML("<html><body>text</body></html>")
>>> ET.tostring(elem)
'<html><body>text</body></html>'
b'<html><body>text</body></html>'
>>> elem = ET.fromstring("<html><body>text</body></html>")
>>> ET.tostring(elem)
'<html><body>text</body></html>'
b'<html><body>text</body></html>'
"""
def check_encoding(encoding):
@ -1233,14 +1233,14 @@ def processinginstruction():
Test ProcessingInstruction directly
>>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
'<?test instruction?>'
b'<?test instruction?>'
>>> ET.tostring(ET.PI('test', 'instruction'))
'<?test instruction?>'
b'<?test instruction?>'
Issue #2746
>>> ET.tostring(ET.PI('test', '<testing&>'))
'<?test <testing&>?>'
b'<?test <testing&>?>'
>>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1')
b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
"""
@ -1643,11 +1643,11 @@ def bug_200708_newline():
>>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
>>> ET.tostring(e)
'<SomeTag text="def _f():&#10; return 3&#10;" />'
b'<SomeTag text="def _f():&#10; return 3&#10;" />'
>>> ET.XML(ET.tostring(e)).get("text")
'def _f():\n return 3\n'
>>> ET.tostring(ET.XML(ET.tostring(e)))
'<SomeTag text="def _f():&#10; return 3&#10;" />'
b'<SomeTag text="def _f():&#10; return 3&#10;" />'
"""
@ -1698,15 +1698,15 @@ def bug_200709_register_namespace():
"""
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
>>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
And the Dublin Core namespace is in the default list:
>>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
"""
@ -1792,7 +1792,7 @@ def check_issue3151():
'{${stuff}}localname'
>>> t = ET.ElementTree(e)
>>> ET.tostring(e)
'<ns0:localname xmlns:ns0="${stuff}" />'
b'<ns0:localname xmlns:ns0="${stuff}" />'
"""