Issue #3672: Reject surrogates in utf-8 codec; add surrogates error

handler.
This commit is contained in:
Martin v. Löwis 2009-05-02 18:52:14 +00:00
parent 02953d244f
commit db12d454e6
9 changed files with 202 additions and 21 deletions

View file

@ -169,13 +169,13 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step]))
def test_encoding(self):
sample = "Hello world\n\u1234\u5678\u9abc\udef0"
sample = "Hello world\n\u1234\u5678\u9abc"
for enc in ("utf8", "utf16"):
b = self.type2test(sample, enc)
self.assertEqual(b, self.type2test(sample.encode(enc)))
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
b = self.type2test(sample, "latin1", "ignore")
self.assertEqual(b, self.type2test(sample[:-4], "utf-8"))
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
def test_decode(self):
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"