Change pyexpat test suite to exercise the .returns_unicode attribute,

parsing the sample data once with 8-bit strings and once with Unicode.
This commit is contained in:
Andrew M. Kuchling 2000-06-27 00:37:25 +00:00
parent beba056cd7
commit 7fd7e36b08
2 changed files with 121 additions and 20 deletions

View file

@ -10,10 +10,10 @@ import pyexpat
class Outputter:
def StartElementHandler(self, name, attrs):
print 'Start element:\n\t', name, attrs
print 'Start element:\n\t', repr(name), attrs
def EndElementHandler(self, name):
print 'End element:\n\t', name
print 'End element:\n\t', repr(name)
def CharacterDataHandler(self, data):
data = string.strip(data)
@ -22,13 +22,13 @@ class Outputter:
print '\t', repr(data)
def ProcessingInstructionHandler(self, target, data):
print 'PI:\n\t', target, data
print 'PI:\n\t', repr(target), repr(data)
def StartNamespaceDeclHandler(self, prefix, uri):
print 'NS decl:\n\t', prefix, uri
print 'NS decl:\n\t', repr(prefix), repr(uri)
def EndNamespaceDeclHandler(self, prefix):
print 'End of NS decl:\n\t', prefix
print 'End of NS decl:\n\t', repr(prefix)
def StartCdataSectionHandler(self):
print 'Start of CDATA section'
@ -51,8 +51,9 @@ class Outputter:
print 'Not standalone'
return 1
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
print 'External entity ref:', context, base, sysId, pubId
def ExternalEntityRefHandler(self, *args):
context, base, sysId, pubId = args
print 'External entity ref:', args
return 1
def DefaultHandler(self, userData):
@ -64,7 +65,14 @@ class Outputter:
out = Outputter()
parser = pyexpat.ParserCreate(namespace_separator='!')
for name in ['StartElementHandler', 'EndElementHandler',
# Test getting/setting returns_unicode
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
'CharacterDataHandler', 'ProcessingInstructionHandler',
'UnparsedEntityDeclHandler', 'NotationDeclHandler',
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler',
'DefaultHandler', 'DefaultHandlerExpand',
#'NotStandaloneHandler',
'ExternalEntityRefHandler'
]:
]
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
%unparsed_entity;
]>
<root>
<root attr1="value1" attr2="value2&#8000;">
<myns:subelement xmlns:myns="http://www.python.org/namespace">
Contents of subelements
</myns:subelement>
@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
</root>
"""
# Produce UTF-8 output
parser.returns_unicode = 0
try:
parser.Parse(data, 1)
except pyexpat.error:
@ -105,3 +116,33 @@ except pyexpat.error:
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try the parse again, this time producing Unicode output
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
try:
parser.Parse(data, 1)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try parsing a file
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
import StringIO
file = StringIO.StringIO(data)
try:
parser.ParseFile(file)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex