gh-105156: Deprecate the old Py_UNICODE type in C API (#105157)

Deprecate the old Py_UNICODE and PY_UNICODE_TYPE types in the C API:
use wchar_t instead.

Replace Py_UNICODE with wchar_t in multiple C files.

Co-authored-by: Inada Naoki <songofacandy@gmail.com>
This commit is contained in:
Victor Stinner 2023-06-01 08:56:35 +02:00 committed by GitHub
parent f332594dd4
commit 8ed705c083
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 25 additions and 14 deletions

View file

@ -52,6 +52,8 @@ Python:
whether you selected a "narrow" or "wide" Unicode version of Python at whether you selected a "narrow" or "wide" Unicode version of Python at
build time. build time.
.. deprecated-removed:: 3.13 3.15
.. c:type:: PyASCIIObject .. c:type:: PyASCIIObject
PyCompactUnicodeObject PyCompactUnicodeObject

View file

@ -352,6 +352,11 @@ Porting to Python 3.13
Deprecated Deprecated
---------- ----------
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the ``wchar_t`` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to ``wchar_t``.
(Contributed by Victor Stinner in :gh:`105156`.)
Removed Removed
------- -------

View file

@ -6,8 +6,8 @@
Python and represents a single Unicode element in the Unicode type. Python and represents a single Unicode element in the Unicode type.
With PEP 393, Py_UNICODE is deprecated and replaced with a With PEP 393, Py_UNICODE is deprecated and replaced with a
typedef to wchar_t. */ typedef to wchar_t. */
#define PY_UNICODE_TYPE wchar_t Py_DEPRECATED(3.13) typedef wchar_t PY_UNICODE_TYPE;
/* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE; Py_DEPRECATED(3.13) typedef wchar_t Py_UNICODE;
/* --- Internal Unicode Operations ---------------------------------------- */ /* --- Internal Unicode Operations ---------------------------------------- */

View file

@ -0,0 +1,4 @@
Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the ``wchar_t`` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to ``wchar_t``. Patch by Victor
Stinner.

View file

@ -231,7 +231,7 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
/*[clinic end generated code: output=23413f68e6484bbd input=588aac967e0ba74b]*/ /*[clinic end generated code: output=23413f68e6484bbd input=588aac967e0ba74b]*/
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
Py_UNICODE *widename = NULL; wchar_t *widename = NULL;
#else #else
const char *name = NULL; const char *name = NULL;
#endif #endif

View file

@ -594,7 +594,7 @@ getargs_y_hash(PyObject *self, PyObject *args)
static PyObject * static PyObject *
getargs_u(PyObject *self, PyObject *args) getargs_u(PyObject *self, PyObject *args)
{ {
Py_UNICODE *str; wchar_t *str;
if (!PyArg_ParseTuple(args, "u", &str)) { if (!PyArg_ParseTuple(args, "u", &str)) {
return NULL; return NULL;
} }
@ -604,7 +604,7 @@ getargs_u(PyObject *self, PyObject *args)
static PyObject * static PyObject *
getargs_u_hash(PyObject *self, PyObject *args) getargs_u_hash(PyObject *self, PyObject *args)
{ {
Py_UNICODE *str; wchar_t *str;
Py_ssize_t size; Py_ssize_t size;
if (!PyArg_ParseTuple(args, "u#", &str, &size)) { if (!PyArg_ParseTuple(args, "u#", &str, &size)) {
return NULL; return NULL;
@ -615,7 +615,7 @@ getargs_u_hash(PyObject *self, PyObject *args)
static PyObject * static PyObject *
getargs_Z(PyObject *self, PyObject *args) getargs_Z(PyObject *self, PyObject *args)
{ {
Py_UNICODE *str; wchar_t *str;
if (!PyArg_ParseTuple(args, "Z", &str)) { if (!PyArg_ParseTuple(args, "Z", &str)) {
return NULL; return NULL;
} }
@ -628,7 +628,7 @@ getargs_Z(PyObject *self, PyObject *args)
static PyObject * static PyObject *
getargs_Z_hash(PyObject *self, PyObject *args) getargs_Z_hash(PyObject *self, PyObject *args)
{ {
Py_UNICODE *str; wchar_t *str;
Py_ssize_t size; Py_ssize_t size;
if (!PyArg_ParseTuple(args, "Z#", &str, &size)) { if (!PyArg_ParseTuple(args, "Z#", &str, &size)) {
return NULL; return NULL;

View file

@ -1830,10 +1830,10 @@ typecode_to_mformat_code(char typecode)
return UNSIGNED_INT8; return UNSIGNED_INT8;
case 'u': case 'u':
if (sizeof(Py_UNICODE) == 2) { if (sizeof(wchar_t) == 2) {
return UTF16_LE + is_big_endian; return UTF16_LE + is_big_endian;
} }
if (sizeof(Py_UNICODE) == 4) { if (sizeof(wchar_t) == 4) {
return UTF32_LE + is_big_endian; return UTF32_LE + is_big_endian;
} }
return UNKNOWN_FORMAT; return UNKNOWN_FORMAT;

View file

@ -1800,14 +1800,14 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
switch (PyUnicode_KIND(unicode)) { switch (PyUnicode_KIND(unicode)) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
_PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char, _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char,
u, u + size, PyUnicode_1BYTE_DATA(unicode)); u, u + size, PyUnicode_1BYTE_DATA(unicode));
break; break;
case PyUnicode_2BYTE_KIND: case PyUnicode_2BYTE_KIND:
#if Py_UNICODE_SIZE == 2 #if Py_UNICODE_SIZE == 2
memcpy(PyUnicode_2BYTE_DATA(unicode), u, size * 2); memcpy(PyUnicode_2BYTE_DATA(unicode), u, size * 2);
#else #else
_PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2, _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2,
u, u + size, PyUnicode_2BYTE_DATA(unicode)); u, u + size, PyUnicode_2BYTE_DATA(unicode));
#endif #endif
break; break;
@ -3809,9 +3809,9 @@ PyUnicode_AsUTF8(PyObject *unicode)
PyUnicode_GetSize() has been deprecated since Python 3.3 PyUnicode_GetSize() has been deprecated since Python 3.3
because it returned length of Py_UNICODE. because it returned length of Py_UNICODE.
But this function is part of stable abi, because it don't But this function is part of stable abi, because it doesn't
include Py_UNICODE in signature and it was not excluded from include Py_UNICODE in signature and it was not excluded from
stable abi in PEP 384. stable ABI in PEP 384.
*/ */
PyAPI_FUNC(Py_ssize_t) PyAPI_FUNC(Py_ssize_t)
PyUnicode_GetSize(PyObject *unicode) PyUnicode_GetSize(PyObject *unicode)

View file

@ -329,7 +329,7 @@ do_mkvalue(const char **p_format, va_list *p_va)
case 'u': case 'u':
{ {
PyObject *v; PyObject *v;
Py_UNICODE *u = va_arg(*p_va, Py_UNICODE *); const wchar_t *u = va_arg(*p_va, wchar_t*);
Py_ssize_t n; Py_ssize_t n;
if (**p_format == '#') { if (**p_format == '#') {
++*p_format; ++*p_format;