mirror of
https://github.com/python/cpython.git
synced 2025-08-22 09:45:06 +00:00
gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537)
This commit is contained in:
parent
68fec31364
commit
f9c9354a7a
35 changed files with 199 additions and 2090 deletions
|
@ -1376,57 +1376,28 @@ class PyUnicodeObjectPtr(PyObjectPtr):
|
|||
return _type_Py_UNICODE.sizeof
|
||||
|
||||
def proxyval(self, visited):
|
||||
may_have_surrogates = False
|
||||
compact = self.field('_base')
|
||||
ascii = compact['_base']
|
||||
state = ascii['state']
|
||||
is_compact_ascii = (int(state['ascii']) and int(state['compact']))
|
||||
if not int(state['ready']):
|
||||
# string is not ready
|
||||
field_length = int(compact['wstr_length'])
|
||||
may_have_surrogates = True
|
||||
field_str = ascii['wstr']
|
||||
field_length = int(ascii['length'])
|
||||
if is_compact_ascii:
|
||||
field_str = ascii.address + 1
|
||||
elif int(state['compact']):
|
||||
field_str = compact.address + 1
|
||||
else:
|
||||
field_length = int(ascii['length'])
|
||||
if is_compact_ascii:
|
||||
field_str = ascii.address + 1
|
||||
elif int(state['compact']):
|
||||
field_str = compact.address + 1
|
||||
else:
|
||||
field_str = self.field('data')['any']
|
||||
repr_kind = int(state['kind'])
|
||||
if repr_kind == 1:
|
||||
field_str = field_str.cast(_type_unsigned_char_ptr())
|
||||
elif repr_kind == 2:
|
||||
field_str = field_str.cast(_type_unsigned_short_ptr())
|
||||
elif repr_kind == 4:
|
||||
field_str = field_str.cast(_type_unsigned_int_ptr())
|
||||
field_str = self.field('data')['any']
|
||||
repr_kind = int(state['kind'])
|
||||
if repr_kind == 1:
|
||||
field_str = field_str.cast(_type_unsigned_char_ptr())
|
||||
elif repr_kind == 2:
|
||||
field_str = field_str.cast(_type_unsigned_short_ptr())
|
||||
elif repr_kind == 4:
|
||||
field_str = field_str.cast(_type_unsigned_int_ptr())
|
||||
|
||||
# Gather a list of ints from the Py_UNICODE array; these are either
|
||||
# UCS-1, UCS-2 or UCS-4 code points:
|
||||
if not may_have_surrogates:
|
||||
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
|
||||
else:
|
||||
# A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
|
||||
# inferior process: we must join surrogate pairs.
|
||||
Py_UNICODEs = []
|
||||
i = 0
|
||||
limit = safety_limit(field_length)
|
||||
while i < limit:
|
||||
ucs = int(field_str[i])
|
||||
i += 1
|
||||
if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
|
||||
Py_UNICODEs.append(ucs)
|
||||
continue
|
||||
# This could be a surrogate pair.
|
||||
ucs2 = int(field_str[i])
|
||||
if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
|
||||
continue
|
||||
code = (ucs & 0x03FF) << 10
|
||||
code |= ucs2 & 0x03FF
|
||||
code += 0x00010000
|
||||
Py_UNICODEs.append(code)
|
||||
i += 1
|
||||
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
|
||||
|
||||
# Convert the int code points to unicode characters, and generate a
|
||||
# local unicode instance.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue