mirror of
https://github.com/python/cpython.git
synced 2025-09-18 06:30:38 +00:00
Merged revisions 84655 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines Issue #9804: ascii() now always represents unicode surrogate pairs as a single `\UXXXXXXXX`, regardless of whether the character is printable or not. Also, the "backslashreplace" error handler now joins surrogate pairs into a single character on UCS-2 builds. ........
This commit is contained in:
parent
8e0bb6a1e2
commit
c9a8df24cc
4 changed files with 72 additions and 17 deletions
|
@ -577,17 +577,31 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
|
||||
("\\uffff", 1)
|
||||
)
|
||||
if sys.maxunicode>0xffff:
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
|
||||
("\\U00010000", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
|
||||
("\\U0010ffff", 1)
|
||||
)
|
||||
# 1 on UCS-4 builds, 2 on UCS-2
|
||||
len_wide = len("\U00010000")
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\U00010000",
|
||||
0, len_wide, "ouch")),
|
||||
("\\U00010000", len_wide)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\U0010ffff",
|
||||
0, len_wide, "ouch")),
|
||||
("\\U0010ffff", len_wide)
|
||||
)
|
||||
# Lone surrogates (regardless of unicode width)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
|
||||
("\\ud800", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
|
||||
("\\udfff", 1)
|
||||
)
|
||||
|
||||
def test_badhandlerresults(self):
|
||||
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue