mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
Normalize the encoding names for Latin-1 and UTF-8 to
'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303.
This commit is contained in:
parent
a391b11320
commit
8f36af7a4c
32 changed files with 84 additions and 79 deletions
|
|
@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase):
|
|||
|
||||
def test_encoding(self):
|
||||
sample = "Hello world\n\u1234\u5678\u9abc"
|
||||
for enc in ("utf8", "utf16"):
|
||||
for enc in ("utf-8", "utf-16"):
|
||||
b = self.type2test(sample, enc)
|
||||
self.assertEqual(b, self.type2test(sample.encode(enc)))
|
||||
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
|
||||
b = self.type2test(sample, "latin1", "ignore")
|
||||
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
|
||||
b = self.type2test(sample, "latin-1", "ignore")
|
||||
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
|
||||
|
||||
def test_decode(self):
|
||||
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
|
||||
for enc in ("utf8", "utf16"):
|
||||
for enc in ("utf-8", "utf-16"):
|
||||
b = self.type2test(sample, enc)
|
||||
self.assertEqual(b.decode(enc), sample)
|
||||
sample = "Hello world\n\x80\x81\xfe\xff"
|
||||
b = self.type2test(sample, "latin1")
|
||||
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
|
||||
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
|
||||
self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
|
||||
b = self.type2test(sample, "latin-1")
|
||||
self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
|
||||
self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
|
||||
self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
|
||||
"Hello world\n")
|
||||
# Default encoding is utf-8
|
||||
self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
|
||||
|
||||
def test_from_int(self):
|
||||
b = self.type2test(0)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue