gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537)

2025-11-24 20:30:18 +00:00 · 2022-05-12 14:48:38 +09:00 · 2022-05-12 14:48:38 +09:00 · f9c9354a7a
commit f9c9354a7a
parent 68fec31364
35 changed files with 199 additions and 2090 deletions
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@ -1376,57 +1376,28 @@ class PyUnicodeObjectPtr(PyObjectPtr):
        return _type_Py_UNICODE.sizeof

    def proxyval(self, visited):
-        may_have_surrogates = False
        compact = self.field('_base')
        ascii = compact['_base']
        state = ascii['state']
        is_compact_ascii = (int(state['ascii']) and int(state['compact']))
-        if not int(state['ready']):
-            # string is not ready
-            field_length = int(compact['wstr_length'])
-            may_have_surrogates = True
-            field_str = ascii['wstr']
+        field_length = int(ascii['length'])
+        if is_compact_ascii:
+            field_str = ascii.address + 1
+        elif int(state['compact']):
+            field_str = compact.address + 1
        else:
-            field_length = int(ascii['length'])
-            if is_compact_ascii:
-                field_str = ascii.address + 1
-            elif int(state['compact']):
-                field_str = compact.address + 1
-            else:
-                field_str = self.field('data')['any']
-            repr_kind = int(state['kind'])
-            if repr_kind == 1:
-                field_str = field_str.cast(_type_unsigned_char_ptr())
-            elif repr_kind == 2:
-                field_str = field_str.cast(_type_unsigned_short_ptr())
-            elif repr_kind == 4:
-                field_str = field_str.cast(_type_unsigned_int_ptr())
+            field_str = self.field('data')['any']
+        repr_kind = int(state['kind'])
+        if repr_kind == 1:
+            field_str = field_str.cast(_type_unsigned_char_ptr())
+        elif repr_kind == 2:
+            field_str = field_str.cast(_type_unsigned_short_ptr())
+        elif repr_kind == 4:
+            field_str = field_str.cast(_type_unsigned_int_ptr())

        # Gather a list of ints from the Py_UNICODE array; these are either
        # UCS-1, UCS-2 or UCS-4 code points:
-        if not may_have_surrogates:
-            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
-        else:
-            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
-            # inferior process: we must join surrogate pairs.
-            Py_UNICODEs = []
-            i = 0
-            limit = safety_limit(field_length)
-            while i < limit:
-                ucs = int(field_str[i])
-                i += 1
-                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
-                    Py_UNICODEs.append(ucs)
-                    continue
-                # This could be a surrogate pair.
-                ucs2 = int(field_str[i])
-                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
-                    continue
-                code = (ucs & 0x03FF) << 10
-                code |= ucs2 & 0x03FF
-                code += 0x00010000
-                Py_UNICODEs.append(code)
-                i += 1
+        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]

        # Convert the int code points to unicode characters, and generate a
        # local unicode instance.