mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Make the unicode-escape and the UTF-16 codecs handle surrogates
correctly and thus roundtrip-safe. Some minor cleanups of the code. Added tests for the roundtrip-safety.
This commit is contained in:
parent
0d42e0c54a
commit
6c6bfb7c70
2 changed files with 54 additions and 24 deletions
|
@ -445,11 +445,19 @@ verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
|
|||
verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
|
||||
verify(u'hello'.encode('latin-1') == 'hello')
|
||||
|
||||
# Roundtrip safety for BMP (just the first 1024 chars)
|
||||
u = u''.join(map(unichr, range(1024)))
|
||||
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
|
||||
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
|
||||
verify(unicode(u.encode(encoding),encoding) == u)
|
||||
|
||||
# Roundtrip safety for non-BMP (just a few chars)
|
||||
u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
|
||||
for encoding in ('utf-8',
|
||||
'utf-16', 'utf-16-le', 'utf-16-be',
|
||||
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
|
||||
verify(unicode(u.encode(encoding),encoding) == u)
|
||||
|
||||
u = u''.join(map(unichr, range(256)))
|
||||
for encoding in (
|
||||
'latin-1',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue