bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -11,21 +11,6 @@
#include "pycore_tuple.h" // _PyTuple_FromArray()
#include "pycore_ceval.h" // _PyEval_Vector()
_Py_IDENTIFIER(__builtins__);
_Py_IDENTIFIER(__dict__);
_Py_IDENTIFIER(__prepare__);
_Py_IDENTIFIER(__round__);
_Py_IDENTIFIER(__mro_entries__);
_Py_IDENTIFIER(encoding);
_Py_IDENTIFIER(errors);
_Py_IDENTIFIER(fileno);
_Py_IDENTIFIER(flush);
_Py_IDENTIFIER(metaclass);
_Py_IDENTIFIER(sort);
_Py_IDENTIFIER(stdin);
_Py_IDENTIFIER(stdout);
_Py_IDENTIFIER(stderr);
#include "clinic/bltinmodule.c.h"
static PyObject*
@ -47,7 +32,7 @@ update_bases(PyObject *bases, PyObject *const *args, Py_ssize_t nargs)
}
continue;
}
if (_PyObject_LookupAttrId(base, &PyId___mro_entries__, &meth) < 0) {
if (_PyObject_LookupAttr(base, &_Py_ID(__mro_entries__), &meth) < 0) {
goto error;
}
if (!meth) {
@ -148,10 +133,10 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
goto error;
}
meta = _PyDict_GetItemIdWithError(mkw, &PyId_metaclass);
meta = _PyDict_GetItemWithError(mkw, &_Py_ID(metaclass));
if (meta != NULL) {
Py_INCREF(meta);
if (_PyDict_DelItemId(mkw, &PyId_metaclass) < 0) {
if (PyDict_DelItem(mkw, &_Py_ID(metaclass)) < 0) {
goto error;
}
/* metaclass is explicitly given, check if it's indeed a class */
@ -191,7 +176,7 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
}
/* else: meta is not a class, so we cannot do the metaclass
calculation, so we will use the explicitly given object as it is */
if (_PyObject_LookupAttrId(meta, &PyId___prepare__, &prep) < 0) {
if (_PyObject_LookupAttr(meta, &_Py_ID(__prepare__), &prep) < 0) {
ns = NULL;
}
else if (prep == NULL) {
@ -946,10 +931,9 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals,
return NULL;
}
int r = _PyDict_ContainsId(globals, &PyId___builtins__);
int r = PyDict_Contains(globals, &_Py_ID(__builtins__));
if (r == 0) {
r = _PyDict_SetItemId(globals, &PyId___builtins__,
PyEval_GetBuiltins());
r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins());
}
if (r < 0) {
return NULL;
@ -1034,10 +1018,9 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals,
Py_TYPE(locals)->tp_name);
return NULL;
}
int r = _PyDict_ContainsId(globals, &PyId___builtins__);
int r = PyDict_Contains(globals, &_Py_ID(__builtins__));
if (r == 0) {
r = _PyDict_SetItemId(globals, &PyId___builtins__,
PyEval_GetBuiltins());
r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins());
}
if (r < 0) {
return NULL;
@ -1960,7 +1943,8 @@ builtin_print_impl(PyObject *module, PyObject *args, PyObject *sep,
int i, err;
if (file == Py_None) {
file = _PySys_GetObjectId(&PyId_stdout);
PyThreadState *tstate = _PyThreadState_GET();
file = _PySys_GetAttr(tstate, &_Py_ID(stdout));
if (file == NULL) {
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
return NULL;
@ -2020,7 +2004,7 @@ builtin_print_impl(PyObject *module, PyObject *args, PyObject *sep,
}
if (flush) {
PyObject *tmp = _PyObject_CallMethodIdNoArgs(file, &PyId_flush);
PyObject *tmp = PyObject_CallMethodNoArgs(file, &_Py_ID(flush));
if (tmp == NULL) {
return NULL;
}
@ -2050,9 +2034,13 @@ static PyObject *
builtin_input_impl(PyObject *module, PyObject *prompt)
/*[clinic end generated code: output=83db5a191e7a0d60 input=5e8bb70c2908fe3c]*/
{
PyObject *fin = _PySys_GetObjectId(&PyId_stdin);
PyObject *fout = _PySys_GetObjectId(&PyId_stdout);
PyObject *ferr = _PySys_GetObjectId(&PyId_stderr);
PyThreadState *tstate = _PyThreadState_GET();
PyObject *fin = _PySys_GetAttr(
tstate, &_Py_ID(stdin));
PyObject *fout = _PySys_GetAttr(
tstate, &_Py_ID(stdout));
PyObject *ferr = _PySys_GetAttr(
tstate, &_Py_ID(stderr));
PyObject *tmp;
long fd;
int tty;
@ -2079,7 +2067,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
}
/* First of all, flush stderr */
tmp = _PyObject_CallMethodIdNoArgs(ferr, &PyId_flush);
tmp = PyObject_CallMethodNoArgs(ferr, &_Py_ID(flush));
if (tmp == NULL)
PyErr_Clear();
else
@ -2088,7 +2076,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
/* We should only use (GNU) readline if Python's sys.stdin and
sys.stdout are the same as C's stdin and stdout, because we
need to pass it those. */
tmp = _PyObject_CallMethodIdNoArgs(fin, &PyId_fileno);
tmp = PyObject_CallMethodNoArgs(fin, &_Py_ID(fileno));
if (tmp == NULL) {
PyErr_Clear();
tty = 0;
@ -2101,7 +2089,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
tty = fd == fileno(stdin) && isatty(fd);
}
if (tty) {
tmp = _PyObject_CallMethodIdNoArgs(fout, &PyId_fileno);
tmp = PyObject_CallMethodNoArgs(fout, &_Py_ID(fileno));
if (tmp == NULL) {
PyErr_Clear();
tty = 0;
@ -2127,8 +2115,8 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
size_t len;
/* stdin is a text stream, so it must have an encoding. */
stdin_encoding = _PyObject_GetAttrId(fin, &PyId_encoding);
stdin_errors = _PyObject_GetAttrId(fin, &PyId_errors);
stdin_encoding = PyObject_GetAttr(fin, &_Py_ID(encoding));
stdin_errors = PyObject_GetAttr(fin, &_Py_ID(errors));
if (!stdin_encoding || !stdin_errors ||
!PyUnicode_Check(stdin_encoding) ||
!PyUnicode_Check(stdin_errors)) {
@ -2139,7 +2127,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
stdin_errors_str = PyUnicode_AsUTF8(stdin_errors);
if (!stdin_encoding_str || !stdin_errors_str)
goto _readline_errors;
tmp = _PyObject_CallMethodIdNoArgs(fout, &PyId_flush);
tmp = PyObject_CallMethodNoArgs(fout, &_Py_ID(flush));
if (tmp == NULL)
PyErr_Clear();
else
@ -2148,8 +2136,8 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
/* We have a prompt, encode it as stdout would */
const char *stdout_encoding_str, *stdout_errors_str;
PyObject *stringpo;
stdout_encoding = _PyObject_GetAttrId(fout, &PyId_encoding);
stdout_errors = _PyObject_GetAttrId(fout, &PyId_errors);
stdout_encoding = PyObject_GetAttr(fout, &_Py_ID(encoding));
stdout_errors = PyObject_GetAttr(fout, &_Py_ID(errors));
if (!stdout_encoding || !stdout_errors ||
!PyUnicode_Check(stdout_encoding) ||
!PyUnicode_Check(stdout_errors)) {
@ -2234,7 +2222,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt)
if (PyFile_WriteObject(prompt, fout, Py_PRINT_RAW) != 0)
return NULL;
}
tmp = _PyObject_CallMethodIdNoArgs(fout, &PyId_flush);
tmp = PyObject_CallMethodNoArgs(fout, &_Py_ID(flush));
if (tmp == NULL)
PyErr_Clear();
else
@ -2285,7 +2273,7 @@ builtin_round_impl(PyObject *module, PyObject *number, PyObject *ndigits)
return NULL;
}
round = _PyObject_LookupSpecial(number, &PyId___round__);
round = _PyObject_LookupSpecial(number, &_Py_ID(__round__));
if (round == NULL) {
if (!PyErr_Occurred())
PyErr_Format(PyExc_TypeError,
@ -2346,7 +2334,7 @@ builtin_sorted(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject
if (newlist == NULL)
return NULL;
callable = _PyObject_GetAttrId(newlist, &PyId_sort);
callable = PyObject_GetAttr(newlist, &_Py_ID(sort));
if (callable == NULL) {
Py_DECREF(newlist);
return NULL;
@ -2378,7 +2366,7 @@ builtin_vars(PyObject *self, PyObject *args)
Py_XINCREF(d);
}
else {
if (_PyObject_LookupAttrId(v, &PyId___dict__, &d) == 0) {
if (_PyObject_LookupAttr(v, &_Py_ID(__dict__), &d) == 0) {
PyErr_SetString(PyExc_TypeError,
"vars() argument must have __dict__ attribute");
}