mirror of
https://github.com/python/cpython.git
synced 2025-07-28 21:55:21 +00:00
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
This commit is contained in:
parent
437e6a3b15
commit
6e39080649
12 changed files with 999 additions and 2 deletions
|
@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase):
|
|||
self.assertEqual(reader.readline(), s5)
|
||||
self.assertEqual(reader.readline(), u"")
|
||||
|
||||
class UTF32Test(ReadTest):
|
||||
encoding = "utf-32"
|
||||
|
||||
spamle = ('\xff\xfe\x00\x00'
|
||||
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
|
||||
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
|
||||
spambe = ('\x00\x00\xfe\xff'
|
||||
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
|
||||
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
|
||||
|
||||
def test_only_one_bom(self):
|
||||
_,_,reader,writer = codecs.lookup(self.encoding)
|
||||
# encode some stream
|
||||
s = StringIO.StringIO()
|
||||
f = writer(s)
|
||||
f.write(u"spam")
|
||||
f.write(u"spam")
|
||||
d = s.getvalue()
|
||||
# check whether there is exactly one BOM in it
|
||||
self.assert_(d == self.spamle or d == self.spambe)
|
||||
# try to read it back
|
||||
s = StringIO.StringIO(d)
|
||||
f = reader(s)
|
||||
self.assertEquals(f.read(), u"spamspam")
|
||||
|
||||
def test_badbom(self):
|
||||
s = StringIO.StringIO(4*"\xff")
|
||||
f = codecs.getreader(self.encoding)(s)
|
||||
self.assertRaises(UnicodeError, f.read)
|
||||
|
||||
s = StringIO.StringIO(8*"\xff")
|
||||
f = codecs.getreader(self.encoding)(s)
|
||||
self.assertRaises(UnicodeError, f.read)
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"", # first byte of BOM read
|
||||
u"", # second byte of BOM read
|
||||
u"", # third byte of BOM read
|
||||
u"", # fourth byte of BOM read => byteorder known
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF32LETest(ReadTest):
|
||||
encoding = "utf-32-le"
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_simple(self):
|
||||
self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF32BETest(ReadTest):
|
||||
encoding = "utf-32-be"
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"\x00\xff\u0100\uffff",
|
||||
[
|
||||
u"",
|
||||
u"",
|
||||
u"",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100",
|
||||
u"\x00\xff\u0100\uffff",
|
||||
]
|
||||
)
|
||||
|
||||
def test_simple(self):
|
||||
self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
|
||||
|
||||
def test_errors(self):
|
||||
self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
|
||||
"\xff", "strict", True)
|
||||
|
||||
class UTF16Test(ReadTest):
|
||||
encoding = "utf-16"
|
||||
|
||||
|
@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase):
|
|||
|
||||
def test_main():
|
||||
test_support.run_unittest(
|
||||
UTF32Test,
|
||||
UTF32LETest,
|
||||
UTF32BETest,
|
||||
UTF16Test,
|
||||
UTF16LETest,
|
||||
UTF16BETest,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue