Issue #3672: Reject surrogates in utf-8 codec; add surrogates error

handler.
This commit is contained in:
Martin v. Löwis 2009-05-02 18:52:14 +00:00
parent 02953d244f
commit db12d454e6
9 changed files with 202 additions and 21 deletions

View file

@ -541,6 +541,17 @@ class UTF8Test(ReadTest):
self.check_state_handling_decode(self.encoding,
u, u.encode(self.encoding))
def test_lone_surrogates(self):
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
def test_surrogates_handler(self):
self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
b"abc\xed\xa0\x80def")
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
"abc\ud800def")
self.assertTrue(codecs.lookup_error("surrogates"))
class UTF7Test(ReadTest):
encoding = "utf-7"
@ -1023,12 +1034,12 @@ class NameprepTest(unittest.TestCase):
# Skipped
continue
# The Unicode strings are given in UTF-8
orig = str(orig, "utf-8")
orig = str(orig, "utf-8", "surrogates")
if prepped is None:
# Input contains prohibited characters
self.assertRaises(UnicodeError, nameprep, orig)
else:
prepped = str(prepped, "utf-8")
prepped = str(prepped, "utf-8", "surrogates")
try:
self.assertEquals(nameprep(orig), prepped)
except Exception as e: