bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -69,9 +69,6 @@ PyDoc_STRVAR(iobase_doc,
of the IOBase object rather than the virtual `closed` attribute as returned
by whatever subclass. */
_Py_IDENTIFIER(__IOBase_closed);
_Py_IDENTIFIER(read);
/* Internal methods */
static PyObject *
@ -114,9 +111,7 @@ static PyObject *
_io__IOBase_tell_impl(PyObject *self)
/*[clinic end generated code: output=89a1c0807935abe2 input=04e615fec128801f]*/
{
_Py_IDENTIFIER(seek);
return _PyObject_CallMethodId(self, &PyId_seek, "ii", 0, 1);
return _PyObject_CallMethod(self, &_Py_ID(seek), "ii", 0, 1);
}
PyDoc_STRVAR(iobase_truncate_doc,
@ -138,7 +133,7 @@ iobase_is_closed(PyObject *self)
int ret;
/* This gets the derived attribute, which is *not* __IOBase_closed
in most cases! */
ret = _PyObject_LookupAttrId(self, &PyId___IOBase_closed, &res);
ret = _PyObject_LookupAttr(self, &_Py_ID(__IOBase_closed), &res);
Py_XDECREF(res);
return ret;
}
@ -239,7 +234,7 @@ _io__IOBase_close_impl(PyObject *self)
res = PyObject_CallMethodNoArgs(self, _PyIO_str_flush);
PyErr_Fetch(&exc, &val, &tb);
rc = _PyObject_SetAttrId(self, &PyId___IOBase_closed, Py_True);
rc = PyObject_SetAttr(self, &_Py_ID(__IOBase_closed), Py_True);
_PyErr_ChainExceptions(exc, val, tb);
if (rc < 0) {
Py_CLEAR(res);
@ -260,7 +255,6 @@ iobase_finalize(PyObject *self)
PyObject *res;
PyObject *error_type, *error_value, *error_traceback;
int closed;
_Py_IDENTIFIER(_finalizing);
/* Save the current exception, if any. */
PyErr_Fetch(&error_type, &error_value, &error_traceback);
@ -280,7 +274,7 @@ iobase_finalize(PyObject *self)
if (closed == 0) {
/* Signal close() that it was called as part of the object
finalization process. */
if (_PyObject_SetAttrId(self, &PyId__finalizing, Py_True))
if (PyObject_SetAttr(self, &_Py_ID(_finalizing), Py_True))
PyErr_Clear();
res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_close);
/* Silencing I/O errors is bad, but printing spurious tracebacks is
@ -597,7 +591,7 @@ _io__IOBase_readline_impl(PyObject *self, Py_ssize_t limit)
Py_DECREF(readahead);
}
b = _PyObject_CallMethodId(self, &PyId_read, "n", nreadahead);
b = _PyObject_CallMethod(self, &_Py_ID(read), "n", nreadahead);
if (b == NULL) {
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
when EINTR occurs so we needn't do it ourselves. */
@ -697,10 +691,8 @@ _io__IOBase_readlines_impl(PyObject *self, Py_ssize_t hint)
/* XXX special-casing this made sense in the Python version in order
to remove the bytecode interpretation overhead, but it could
probably be removed here. */
_Py_IDENTIFIER(extend);
PyObject *ret = _PyObject_CallMethodIdObjArgs(result, &PyId_extend,
self, NULL);
PyObject *ret = PyObject_CallMethodObjArgs(result, &_Py_ID(extend),
self, NULL);
if (ret == NULL) {
goto error;
}
@ -919,9 +911,7 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n)
PyObject *b, *res;
if (n < 0) {
_Py_IDENTIFIER(readall);
return _PyObject_CallMethodIdNoArgs(self, &PyId_readall);
return PyObject_CallMethodNoArgs(self, &_Py_ID(readall));
}
/* TODO: allocate a bytes object directly instead and manually construct
@ -967,8 +957,8 @@ _io__RawIOBase_readall_impl(PyObject *self)
return NULL;
while (1) {
PyObject *data = _PyObject_CallMethodId(self, &PyId_read,
"i", DEFAULT_BUFFER_SIZE);
PyObject *data = _PyObject_CallMethod(self, &_Py_ID(read),
"i", DEFAULT_BUFFER_SIZE);
if (!data) {
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
when EINTR occurs so we needn't do it ourselves. */