mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
This commit is contained in:
parent
437e6a3b15
commit
6e39080649
12 changed files with 999 additions and 2 deletions
|
|
@ -285,7 +285,8 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
|
||||
def test_longstrings(self):
|
||||
# test long strings to check for memory overflow problems
|
||||
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
|
||||
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
|
||||
"backslashreplace"]
|
||||
# register the handlers under different names,
|
||||
# to prevent the codec from recognizing the name
|
||||
for err in errors:
|
||||
|
|
@ -293,7 +294,8 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
l = 1000
|
||||
errors += [ "test." + err for err in errors ]
|
||||
for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
|
||||
for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
|
||||
for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
|
||||
"utf-8", "utf-7", "utf-16", "utf-32"):
|
||||
for err in errors:
|
||||
try:
|
||||
uni.encode(enc, err)
|
||||
|
|
|
|||
|
|
@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase):
|
|||
self.assertEqual(reader.readline(), s5)
|
||||
self.assertEqual(reader.readline(), u"")
|
||||
|
||||
class UTF32Test(ReadTest):
|
||||
encoding = "utf-32"
|
||||
|
||||
spamle = ('\xff\xfe\x00\x00'
|
||||
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
|
||||
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
|
||||
spambe = ('\x00\x00\xfe\xff'
|
||||
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
|
||||
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
|
||||
|
||||
def test_only_one_bom(self):
|
||||
_,_,reader,writer = codecs.lookup(self.encoding)
|
||||
# encode some stream
|
||||
s = StringIO.StringIO()
|
||||
f = writer(s)
|
||||
f.write(u"spam")
|
||||
f.write(u"spam")
|
||||
d = s.getvalue()
|
||||
# check whether there is exactly one BOM in it
|
||||
self.assert_(d == self.spamle or d == self.spambe)
|
||||
# try to read it back
|
||||
s = StringIO.StringIO(d)
|
||||
f = reader(s)
|
||||
self.assertEquals(f.read(), u"spamspam")
|
||||
|
||||
def test_badbom(self):
|
||||
s = StringIO.StringIO(4*"\xff")
|
||||
f = codecs.getreader(self.encoding)(s)
|
||||
self.assertRaises(UnicodeError, f.read)
|
||||
|
||||
s = StringIO.StringIO(8*"\xff")
|
||||
f = codecs.getreader(self.encoding)(s)
|
||||
self.assertRaises(UnicodeError, f.read)
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"", # first byte of BOM read
|
||||
u"", # second byte of BOM read
|
||||
u"", # third byte of BOM read
|
||||
u"", # fourth byte of BOM read => byteorder known
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF32LETest(ReadTest):
|
||||
encoding = "utf-32-le"
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_simple(self):
|
||||
self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF32BETest(ReadTest):
|
||||
encoding = "utf-32-be"
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_simple(self):
|
||||
self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF16Test(ReadTest):
|
||||
encoding = "utf-16"
|
||||
|
||||
|
|
@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase):
|
|||
|
||||
def test_main():
|
||||
test_support.run_unittest(
|
||||
UTF32Test,
|
||||
UTF32LETest,
|
||||
UTF32BETest,
|
||||
UTF16Test,
|
||||
UTF16LETest,
|
||||
UTF16BETest,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue