Issue #8092: Fix PyUnicode_EncodeUTF8() to support error handler producing

unicode string (eg. backslashreplace)
This commit is contained in:
Victor Stinner 2010-04-22 19:38:16 +00:00
parent 29619b2aff
commit 31be90b0c7
3 changed files with 97 additions and 51 deletions

View file

@ -571,6 +571,16 @@ class UTF8Test(ReadTest):
def test_lone_surrogates(self):
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
self.assertEqual("[\uDC80]".encode("utf-8", "backslashreplace"),
b'[\\udc80]')
self.assertEqual("[\uDC80]".encode("utf-8", "xmlcharrefreplace"),
b'[�]')
self.assertEqual("[\uDC80]".encode("utf-8", "surrogateescape"),
b'[\x80]')
self.assertEqual("[\uDC80]".encode("utf-8", "ignore"),
b'[]')
self.assertEqual("[\uDC80]".encode("utf-8", "replace"),
b'[?]')
def test_surrogatepass_handler(self):
self.assertEquals("abc\ud800def".encode("utf-8", "surrogatepass"),