mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Issue #9804: ascii() now always represents unicode surrogate pairs as
a single `\UXXXXXXXX`, regardless of whether the character is printable or not. Also, the "backslashreplace" error handler now joins surrogate pairs into a single character on UCS-2 builds.
This commit is contained in:
parent
ea99c5c949
commit
e4a189274f
4 changed files with 72 additions and 17 deletions
|
@ -179,6 +179,28 @@ class BuiltinTest(unittest.TestCase):
|
|||
a = {}
|
||||
a[0] = a
|
||||
self.assertEqual(ascii(a), '{0: {...}}')
|
||||
# Advanced checks for unicode strings
|
||||
def _check_uni(s):
|
||||
self.assertEqual(ascii(s), repr(s))
|
||||
_check_uni("'")
|
||||
_check_uni('"')
|
||||
_check_uni('"\'')
|
||||
_check_uni('\0')
|
||||
_check_uni('\r\n\t .')
|
||||
# Unprintable non-ASCII characters
|
||||
_check_uni('\x85')
|
||||
_check_uni('\u1fff')
|
||||
_check_uni('\U00012fff')
|
||||
# Lone surrogates
|
||||
_check_uni('\ud800')
|
||||
_check_uni('\udfff')
|
||||
# Issue #9804: surrogates should be joined even for printable
|
||||
# wide characters (UCS-2 builds).
|
||||
self.assertEqual(ascii('\U0001d121'), "'\\U0001d121'")
|
||||
# All together
|
||||
s = "'\0\"\n\r\t abcd\x85é\U00012fff\uD800\U0001D121xxx."
|
||||
self.assertEqual(ascii(s),
|
||||
r"""'\'\x00"\n\r\t abcd\x85\xe9\U00012fff\ud800\U0001d121xxx.'""")
|
||||
|
||||
def test_neg(self):
|
||||
x = -sys.maxsize-1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue