mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters
Fix the doc and add tests.
This commit is contained in:
parent
84cc06288d
commit
53a9dd776e
2 changed files with 14 additions and 2 deletions
|
@ -1114,9 +1114,9 @@ particular, the following variants typically exist:
|
||||||
+-----------------+--------------------------------+--------------------------------+
|
+-----------------+--------------------------------+--------------------------------+
|
||||||
| utf_16 | U16, utf16 | all languages |
|
| utf_16 | U16, utf16 | all languages |
|
||||||
+-----------------+--------------------------------+--------------------------------+
|
+-----------------+--------------------------------+--------------------------------+
|
||||||
| utf_16_be | UTF-16BE | all languages (BMP only) |
|
| utf_16_be | UTF-16BE | all languages |
|
||||||
+-----------------+--------------------------------+--------------------------------+
|
+-----------------+--------------------------------+--------------------------------+
|
||||||
| utf_16_le | UTF-16LE | all languages (BMP only) |
|
| utf_16_le | UTF-16LE | all languages |
|
||||||
+-----------------+--------------------------------+--------------------------------+
|
+-----------------+--------------------------------+--------------------------------+
|
||||||
| utf_7 | U7, unicode-1-1-utf-7 | all languages |
|
| utf_7 | U7, unicode-1-1-utf-7 | all languages |
|
||||||
+-----------------+--------------------------------+--------------------------------+
|
+-----------------+--------------------------------+--------------------------------+
|
||||||
|
|
|
@ -544,6 +544,12 @@ class UTF16LETest(ReadTest):
|
||||||
self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
|
self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
|
||||||
b"\xff", "strict", True)
|
b"\xff", "strict", True)
|
||||||
|
|
||||||
|
def test_nonbmp(self):
|
||||||
|
self.assertEqual("\U00010203".encode(self.encoding),
|
||||||
|
b'\x00\xd8\x03\xde')
|
||||||
|
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
|
||||||
|
"\U00010203")
|
||||||
|
|
||||||
class UTF16BETest(ReadTest):
|
class UTF16BETest(ReadTest):
|
||||||
encoding = "utf-16-be"
|
encoding = "utf-16-be"
|
||||||
|
|
||||||
|
@ -566,6 +572,12 @@ class UTF16BETest(ReadTest):
|
||||||
self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
|
self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
|
||||||
b"\xff", "strict", True)
|
b"\xff", "strict", True)
|
||||||
|
|
||||||
|
def test_nonbmp(self):
|
||||||
|
self.assertEqual("\U00010203".encode(self.encoding),
|
||||||
|
b'\xd8\x00\xde\x03')
|
||||||
|
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
|
||||||
|
"\U00010203")
|
||||||
|
|
||||||
class UTF8Test(ReadTest):
|
class UTF8Test(ReadTest):
|
||||||
encoding = "utf-8"
|
encoding = "utf-8"
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue