Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.

This commit is contained in:
Walter Dörwald 2007-08-17 16:41:28 +00:00
parent 437e6a3b15
commit 6e39080649
12 changed files with 999 additions and 2 deletions

View file

@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase):
self.assertEqual(reader.readline(), s5)
self.assertEqual(reader.readline(), u"")
class UTF32Test(ReadTest):
encoding = "utf-32"
spamle = ('\xff\xfe\x00\x00'
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
spambe = ('\x00\x00\xfe\xff'
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
def test_only_one_bom(self):
_,_,reader,writer = codecs.lookup(self.encoding)
# encode some stream
s = StringIO.StringIO()
f = writer(s)
f.write(u"spam")
f.write(u"spam")
d = s.getvalue()
# check whether there is exactly one BOM in it
self.assert_(d == self.spamle or d == self.spambe)
# try to read it back
s = StringIO.StringIO(d)
f = reader(s)
self.assertEquals(f.read(), u"spamspam")
def test_badbom(self):
s = StringIO.StringIO(4*"\xff")
f = codecs.getreader(self.encoding)(s)
self.assertRaises(UnicodeError, f.read)
s = StringIO.StringIO(8*"\xff")
f = codecs.getreader(self.encoding)(s)
self.assertRaises(UnicodeError, f.read)
def test_partial(self):
self.check_partial(
u"\x00\xff\u0100\uffff",
[
u"", # first byte of BOM read
u"", # second byte of BOM read
u"", # third byte of BOM read
u"", # fourth byte of BOM read => byteorder known
u"",
u"",
u"",
u"\x00",
u"\x00",
u"\x00",
u"\x00",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100\uffff",
]
)
def test_errors(self):
self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
"\xff", "strict", True)
class UTF32LETest(ReadTest):
encoding = "utf-32-le"
def test_partial(self):
self.check_partial(
u"\x00\xff\u0100\uffff",
[
u"",
u"",
u"",
u"\x00",
u"\x00",
u"\x00",
u"\x00",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100\uffff",
]
)
def test_simple(self):
self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
def test_errors(self):
self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
"\xff", "strict", True)
class UTF32BETest(ReadTest):
encoding = "utf-32-be"
def test_partial(self):
self.check_partial(
u"\x00\xff\u0100\uffff",
[
u"",
u"",
u"",
u"\x00",
u"\x00",
u"\x00",
u"\x00",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100",
u"\x00\xff\u0100\uffff",
]
)
def test_simple(self):
self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
def test_errors(self):
self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
"\xff", "strict", True)
class UTF16Test(ReadTest):
encoding = "utf-16"
@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase):
def test_main():
test_support.run_unittest(
UTF32Test,
UTF32LETest,
UTF32BETest,
UTF16Test,
UTF16LETest,
UTF16BETest,