bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -28,12 +28,6 @@ extern char *strerror(int);
extern "C" {
#endif
_Py_IDENTIFIER(__main__);
_Py_IDENTIFIER(__module__);
_Py_IDENTIFIER(builtins);
_Py_IDENTIFIER(stderr);
_Py_IDENTIFIER(flush);
/* Forward declarations */
static PyObject *
_PyErr_FormatV(PyThreadState *tstate, PyObject *exception,
@ -1135,7 +1129,7 @@ PyErr_NewException(const char *name, PyObject *base, PyObject *dict)
goto failure;
}
int r = _PyDict_ContainsId(dict, &PyId___module__);
int r = PyDict_Contains(dict, &_Py_ID(__module__));
if (r < 0) {
goto failure;
}
@ -1144,7 +1138,7 @@ PyErr_NewException(const char *name, PyObject *base, PyObject *dict)
(Py_ssize_t)(dot-name));
if (modulename == NULL)
goto failure;
if (_PyDict_SetItemId(dict, &PyId___module__, modulename) != 0)
if (PyDict_SetItem(dict, &_Py_ID(__module__), modulename) != 0)
goto failure;
}
if (PyTuple_Check(base)) {
@ -1347,7 +1341,7 @@ write_unraisable_exc_file(PyThreadState *tstate, PyObject *exc_type,
assert(PyExceptionClass_Check(exc_type));
PyObject *modulename = _PyObject_GetAttrId(exc_type, &PyId___module__);
PyObject *modulename = PyObject_GetAttr(exc_type, &_Py_ID(__module__));
if (modulename == NULL || !PyUnicode_Check(modulename)) {
Py_XDECREF(modulename);
_PyErr_Clear(tstate);
@ -1356,8 +1350,8 @@ write_unraisable_exc_file(PyThreadState *tstate, PyObject *exc_type,
}
}
else {
if (!_PyUnicode_EqualToASCIIId(modulename, &PyId_builtins) &&
!_PyUnicode_EqualToASCIIId(modulename, &PyId___main__)) {
if (!_PyUnicode_Equal(modulename, &_Py_ID(builtins)) &&
!_PyUnicode_Equal(modulename, &_Py_ID(__main__))) {
if (PyFile_WriteObject(modulename, file, Py_PRINT_RAW) < 0) {
Py_DECREF(modulename);
return -1;
@ -1405,7 +1399,7 @@ write_unraisable_exc_file(PyThreadState *tstate, PyObject *exc_type,
}
/* Explicitly call file.flush() */
PyObject *res = _PyObject_CallMethodIdNoArgs(file, &PyId_flush);
PyObject *res = _PyObject_CallMethodNoArgs(file, &_Py_ID(flush));
if (!res) {
return -1;
}
@ -1420,7 +1414,7 @@ write_unraisable_exc(PyThreadState *tstate, PyObject *exc_type,
PyObject *exc_value, PyObject *exc_tb, PyObject *err_msg,
PyObject *obj)
{
PyObject *file = _PySys_GetObjectId(&PyId_stderr);
PyObject *file = _PySys_GetAttr(tstate, &_Py_ID(stderr));
if (file == NULL || file == Py_None) {
return 0;
}
@ -1524,8 +1518,7 @@ _PyErr_WriteUnraisableMsg(const char *err_msg_str, PyObject *obj)
goto error;
}
_Py_IDENTIFIER(unraisablehook);
PyObject *hook = _PySys_GetObjectId(&PyId_unraisablehook);
PyObject *hook = _PySys_GetAttr(tstate, &_Py_ID(unraisablehook));
if (hook == NULL) {
Py_DECREF(hook_args);
goto default_hook;
@ -1600,14 +1593,6 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
int end_lineno, int end_col_offset)
{
PyObject *exc, *v, *tb, *tmp;
_Py_IDENTIFIER(filename);
_Py_IDENTIFIER(lineno);
_Py_IDENTIFIER(end_lineno);
_Py_IDENTIFIER(msg);
_Py_IDENTIFIER(offset);
_Py_IDENTIFIER(end_offset);
_Py_IDENTIFIER(print_file_and_line);
_Py_IDENTIFIER(text);
PyThreadState *tstate = _PyThreadState_GET();
/* add attributes for the line number and filename for the error */
@ -1619,7 +1604,7 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
if (tmp == NULL)
_PyErr_Clear(tstate);
else {
if (_PyObject_SetAttrId(v, &PyId_lineno, tmp)) {
if (PyObject_SetAttr(v, &_Py_ID(lineno), tmp)) {
_PyErr_Clear(tstate);
}
Py_DECREF(tmp);
@ -1631,7 +1616,7 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
_PyErr_Clear(tstate);
}
}
if (_PyObject_SetAttrId(v, &PyId_offset, tmp ? tmp : Py_None)) {
if (PyObject_SetAttr(v, &_Py_ID(offset), tmp ? tmp : Py_None)) {
_PyErr_Clear(tstate);
}
Py_XDECREF(tmp);
@ -1643,7 +1628,7 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
_PyErr_Clear(tstate);
}
}
if (_PyObject_SetAttrId(v, &PyId_end_lineno, tmp ? tmp : Py_None)) {
if (PyObject_SetAttr(v, &_Py_ID(end_lineno), tmp ? tmp : Py_None)) {
_PyErr_Clear(tstate);
}
Py_XDECREF(tmp);
@ -1655,20 +1640,20 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
_PyErr_Clear(tstate);
}
}
if (_PyObject_SetAttrId(v, &PyId_end_offset, tmp ? tmp : Py_None)) {
if (PyObject_SetAttr(v, &_Py_ID(end_offset), tmp ? tmp : Py_None)) {
_PyErr_Clear(tstate);
}
Py_XDECREF(tmp);
tmp = NULL;
if (filename != NULL) {
if (_PyObject_SetAttrId(v, &PyId_filename, filename)) {
if (PyObject_SetAttr(v, &_Py_ID(filename), filename)) {
_PyErr_Clear(tstate);
}
tmp = PyErr_ProgramTextObject(filename, lineno);
if (tmp) {
if (_PyObject_SetAttrId(v, &PyId_text, tmp)) {
if (PyObject_SetAttr(v, &_Py_ID(text), tmp)) {
_PyErr_Clear(tstate);
}
Py_DECREF(tmp);
@ -1678,7 +1663,7 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
}
}
if (exc != PyExc_SyntaxError) {
if (_PyObject_LookupAttrId(v, &PyId_msg, &tmp) < 0) {
if (_PyObject_LookupAttr(v, &_Py_ID(msg), &tmp) < 0) {
_PyErr_Clear(tstate);
}
else if (tmp) {
@ -1687,7 +1672,7 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
else {
tmp = PyObject_Str(v);
if (tmp) {
if (_PyObject_SetAttrId(v, &PyId_msg, tmp)) {
if (PyObject_SetAttr(v, &_Py_ID(msg), tmp)) {
_PyErr_Clear(tstate);
}
Py_DECREF(tmp);
@ -1696,15 +1681,15 @@ PyErr_SyntaxLocationObjectEx(PyObject *filename, int lineno, int col_offset,
_PyErr_Clear(tstate);
}
}
if (_PyObject_LookupAttrId(v, &PyId_print_file_and_line, &tmp) < 0) {
if (_PyObject_LookupAttr(v, &_Py_ID(print_file_and_line), &tmp) < 0) {
_PyErr_Clear(tstate);
}
else if (tmp) {
Py_DECREF(tmp);
}
else {
if (_PyObject_SetAttrId(v, &PyId_print_file_and_line,
Py_None)) {
if (PyObject_SetAttr(v, &_Py_ID(print_file_and_line), Py_None)) {
_PyErr_Clear(tstate);
}
}