Merged revisions 84655 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines

  Issue #9804: ascii() now always represents unicode surrogate pairs as
  a single `\UXXXXXXXX`, regardless of whether the character is printable
  or not.  Also, the "backslashreplace" error handler now joins surrogate
  pairs into a single character on UCS-2 builds.
........
This commit is contained in:
Antoine Pitrou 2010-09-09 20:33:43 +00:00
parent 8e0bb6a1e2
commit c9a8df24cc
4 changed files with 72 additions and 17 deletions

View file

@ -577,17 +577,31 @@ class CodecCallbackTest(unittest.TestCase):
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
if sys.maxunicode>0xffff:
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
("\\U00010000", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
("\\U0010ffff", 1)
)
# 1 on UCS-4 builds, 2 on UCS-2
len_wide = len("\U00010000")
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\U00010000",
0, len_wide, "ouch")),
("\\U00010000", len_wide)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\U0010ffff",
0, len_wide, "ouch")),
("\\U0010ffff", len_wide)
)
# Lone surrogates (regardless of unicode width)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
("\\ud800", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
("\\udfff", 1)
)
def test_badhandlerresults(self):
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )