mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
Encode surrogates in UTF-8 even for a wide Py_UNICODE.
Implement sys.maxunicode. Explicitly wrap around upper/lower computations for wide Py_UNICODE. When decoding large characters with UTF-8, represent expected test results using the \U notation.
This commit is contained in:
parent
236d8b7974
commit
ce9b5a55e1
5 changed files with 47 additions and 16 deletions
|
@ -386,9 +386,9 @@ verify(u'\ud84d\udc56'.encode('utf-8') == \
|
|||
''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))) )
|
||||
# UTF-8 specific decoding tests
|
||||
verify(unicode(''.join((chr(0xf0), chr(0xa3), chr(0x91), chr(0x96))),
|
||||
'utf-8') == u'\ud84d\udc56' )
|
||||
'utf-8') == u'\U00023456' )
|
||||
verify(unicode(''.join((chr(0xf0), chr(0x90), chr(0x80), chr(0x82))),
|
||||
'utf-8') == u'\ud800\udc02' )
|
||||
'utf-8') == u'\U00010002' )
|
||||
verify(unicode(''.join((chr(0xe2), chr(0x82), chr(0xac))),
|
||||
'utf-8') == u'\u20ac' )
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue