mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
Issue #3672: Reject surrogates in utf-8 codec; add surrogates error
handler.
This commit is contained in:
parent
02953d244f
commit
db12d454e6
9 changed files with 202 additions and 21 deletions
|
@ -541,6 +541,17 @@ class UTF8Test(ReadTest):
|
|||
self.check_state_handling_decode(self.encoding,
|
||||
u, u.encode(self.encoding))
|
||||
|
||||
def test_lone_surrogates(self):
|
||||
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
|
||||
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
|
||||
|
||||
def test_surrogates_handler(self):
|
||||
self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
|
||||
b"abc\xed\xa0\x80def")
|
||||
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
|
||||
"abc\ud800def")
|
||||
self.assertTrue(codecs.lookup_error("surrogates"))
|
||||
|
||||
class UTF7Test(ReadTest):
|
||||
encoding = "utf-7"
|
||||
|
||||
|
@ -1023,12 +1034,12 @@ class NameprepTest(unittest.TestCase):
|
|||
# Skipped
|
||||
continue
|
||||
# The Unicode strings are given in UTF-8
|
||||
orig = str(orig, "utf-8")
|
||||
orig = str(orig, "utf-8", "surrogates")
|
||||
if prepped is None:
|
||||
# Input contains prohibited characters
|
||||
self.assertRaises(UnicodeError, nameprep, orig)
|
||||
else:
|
||||
prepped = str(prepped, "utf-8")
|
||||
prepped = str(prepped, "utf-8", "surrogates")
|
||||
try:
|
||||
self.assertEquals(nameprep(orig), prepped)
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue