gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537)

2025-11-01 02:38:53 +00:00 · 2022-05-12 14:48:38 +09:00 · 2022-05-12 14:48:38 +09:00 · f9c9354a7a
commit f9c9354a7a
parent 68fec31364
35 changed files with 199 additions and 2090 deletions
--- a/Python/dynload_win.c
+++ b/Python/dynload_win.c
@ -225,11 +225,7 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,

    _Py_CheckPython3();

-#if USE_UNICODE_WCHAR_CACHE
-    const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname);
-#else /* USE_UNICODE_WCHAR_CACHE */
    wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL);
-#endif /* USE_UNICODE_WCHAR_CACHE */
    if (wpathname == NULL)
        return NULL;

@ -251,9 +247,7 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,
                              LOAD_LIBRARY_SEARCH_DEFAULT_DIRS |
                              LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR);
        Py_END_ALLOW_THREADS
-#if !USE_UNICODE_WCHAR_CACHE
        PyMem_Free(wpathname);
-#endif /* USE_UNICODE_WCHAR_CACHE */

        /* restore old error mode settings */
        SetErrorMode(old_mode);
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@ -1244,18 +1244,12 @@ _Py_stat(PyObject *path, struct stat *statbuf)
 #ifdef MS_WINDOWS
    int err;

-#if USE_UNICODE_WCHAR_CACHE
-    const wchar_t *wpath = _PyUnicode_AsUnicode(path);
-#else /* USE_UNICODE_WCHAR_CACHE */
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
-#endif /* USE_UNICODE_WCHAR_CACHE */
    if (wpath == NULL)
        return -2;

    err = _Py_wstat(wpath, statbuf);
-#if !USE_UNICODE_WCHAR_CACHE
    PyMem_Free(wpath);
-#endif /* USE_UNICODE_WCHAR_CACHE */
    return err;
 #else
    int ret;
@ -1663,11 +1657,8 @@ _Py_fopen_obj(PyObject *path, const char *mode)
                     Py_TYPE(path));
        return NULL;
    }
-#if USE_UNICODE_WCHAR_CACHE
-    const wchar_t *wpath = _PyUnicode_AsUnicode(path);
-#else /* USE_UNICODE_WCHAR_CACHE */
+
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
-#endif /* USE_UNICODE_WCHAR_CACHE */
    if (wpath == NULL)
        return NULL;

@ -1675,9 +1666,7 @@ _Py_fopen_obj(PyObject *path, const char *mode)
                                wmode, Py_ARRAY_LENGTH(wmode));
    if (usize == 0) {
        PyErr_SetFromWindowsErr(0);
-#if !USE_UNICODE_WCHAR_CACHE
        PyMem_Free(wpath);
-#endif /* USE_UNICODE_WCHAR_CACHE */
        return NULL;
    }

@ -1687,9 +1676,7 @@ _Py_fopen_obj(PyObject *path, const char *mode)
        Py_END_ALLOW_THREADS
    } while (f == NULL
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
-#if !USE_UNICODE_WCHAR_CACHE
    PyMem_Free(wpath);
-#endif /* USE_UNICODE_WCHAR_CACHE */
 #else
    PyObject *bytes;
    const char *path_bytes;
--- a/Python/getargs.c
+++ b/Python/getargs.c
@ -1012,58 +1012,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
        break;
    }

-    case 'u': /* raw unicode buffer (Py_UNICODE *) */
-    case 'Z': /* raw unicode buffer or None */
-    {
-        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
-                "getargs: The '%c' format is deprecated. Use 'U' instead.", c)) {
-            return NULL;
-        }
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
-        Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
-
-        if (*format == '#') {
-            /* "u#" or "Z#" */
-            REQUIRE_PY_SSIZE_T_CLEAN;
-            Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
-
-            if (c == 'Z' && arg == Py_None) {
-                *p = NULL;
-                *psize = 0;
-            }
-            else if (PyUnicode_Check(arg)) {
-                Py_ssize_t len;
-                *p = PyUnicode_AsUnicodeAndSize(arg, &len);
-                if (*p == NULL)
-                    RETURN_ERR_OCCURRED;
-                *psize = len;
-            }
-            else
-                return converterr(c == 'Z' ? "str or None" : "str",
-                                  arg, msgbuf, bufsize);
-            format++;
-        } else {
-            /* "u" or "Z" */
-            if (c == 'Z' && arg == Py_None)
-                *p = NULL;
-            else if (PyUnicode_Check(arg)) {
-                Py_ssize_t len;
-                *p = PyUnicode_AsUnicodeAndSize(arg, &len);
-                if (*p == NULL)
-                    RETURN_ERR_OCCURRED;
-                if (wcslen(*p) != (size_t)len) {
-                    PyErr_SetString(PyExc_ValueError, "embedded null character");
-                    RETURN_ERR_OCCURRED;
-                }
-            } else
-                return converterr(c == 'Z' ? "str or None" : "str",
-                                  arg, msgbuf, bufsize);
-        }
-        break;
-_Py_COMP_DIAG_POP
-    }
-
    case 'e': {/* encoded string */
        char **buffer;
        const char *encoding;
@ -2685,8 +2633,6 @@ skipitem(const char **p_format, va_list *p_va, int flags)
    case 's': /* string */
    case 'z': /* string or None */
    case 'y': /* bytes */
-    case 'u': /* unicode string */
-    case 'Z': /* unicode string or None */
    case 'w': /* buffer, read-write */
        {
            if (p_va != NULL) {
--- a/Python/traceback.c
+++ b/Python/traceback.c
@ -1077,7 +1077,6 @@ _Py_DumpASCII(int fd, PyObject *text)
    int truncated;
    int kind;
    void *data = NULL;
-    wchar_t *wstr = NULL;
    Py_UCS4 ch;

    if (!PyUnicode_Check(text))
@ -1085,13 +1084,7 @@ _Py_DumpASCII(int fd, PyObject *text)

    size = ascii->length;
    kind = ascii->state.kind;
-    if (kind == PyUnicode_WCHAR_KIND) {
-        wstr = ascii->wstr;
-        if (wstr == NULL)
-            return;
-        size = _PyCompactUnicodeObject_CAST(text)->wstr_length;
-    }
-    else if (ascii->state.compact) {
+    if (ascii->state.compact) {
        if (ascii->state.ascii)
            data = ascii + 1;
        else
@ -1132,10 +1125,7 @@ _Py_DumpASCII(int fd, PyObject *text)
    }

    for (i=0; i < size; i++) {
-        if (kind != PyUnicode_WCHAR_KIND)
-            ch = PyUnicode_READ(kind, data, i);
-        else
-            ch = wstr[i];
+        ch = PyUnicode_READ(kind, data, i);
        if (' ' <= ch && ch <= 126) {
            /* printable ASCII character */
            char c = (char)ch;