bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -6,6 +6,7 @@
#include "code.h" // PyCode_Addr2Line etc
#include "frameobject.h" // PyFrame_GetBack()
#include "pycore_ast.h" // asdl_seq_*
#include "pycore_call.h" // _PyObject_CallMethodFormat()
#include "pycore_compile.h" // _PyAST_Optimize
#include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH
#include "pycore_frame.h" // _PyFrame_GetCode()
@ -32,11 +33,6 @@
/* Function from Parser/tokenizer.c */
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
_Py_IDENTIFIER(TextIOWrapper);
_Py_IDENTIFIER(close);
_Py_IDENTIFIER(open);
_Py_IDENTIFIER(path);
/*[clinic input]
class TracebackType "PyTracebackObject *" "&PyTraceback_Type"
[clinic start generated code]*/
@ -317,6 +313,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
const char* filepath;
Py_ssize_t len;
PyObject* result;
PyObject *open = NULL;
filebytes = PyUnicode_EncodeFSDefault(filename);
if (filebytes == NULL) {
@ -333,11 +330,13 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
tail++;
taillen = strlen(tail);
syspath = _PySys_GetObjectId(&PyId_path);
PyThreadState *tstate = _PyThreadState_GET();
syspath = _PySys_GetAttr(tstate, &_Py_ID(path));
if (syspath == NULL || !PyList_Check(syspath))
goto error;
npath = PyList_Size(syspath);
open = PyObject_GetAttr(io, &_Py_ID(open));
for (i = 0; i < npath; i++) {
v = PyList_GetItem(syspath, i);
if (v == NULL) {
@ -364,7 +363,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
namebuf[len++] = SEP;
strcpy(namebuf+len, tail);
binary = _PyObject_CallMethodId(io, &PyId_open, "ss", namebuf, "rb");
binary = _PyObject_CallMethodFormat(tstate, open, "ss", namebuf, "rb");
if (binary != NULL) {
result = binary;
goto finally;
@ -376,6 +375,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
error:
result = NULL;
finally:
Py_XDECREF(open);
Py_DECREF(filebytes);
return result;
}
@ -448,10 +448,11 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
}
io = PyImport_ImportModule("io");
if (io == NULL)
if (io == NULL) {
return -1;
binary = _PyObject_CallMethodId(io, &PyId_open, "Os", filename, "rb");
}
binary = _PyObject_CallMethod(io, &_Py_ID(open), "Os", filename, "rb");
if (binary == NULL) {
PyErr_Clear();
@ -480,14 +481,15 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
PyMem_Free(found_encoding);
return 0;
}
fob = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "Os", binary, encoding);
fob = _PyObject_CallMethod(io, &_Py_ID(TextIOWrapper),
"Os", binary, encoding);
Py_DECREF(io);
PyMem_Free(found_encoding);
if (fob == NULL) {
PyErr_Clear();
res = _PyObject_CallMethodIdNoArgs(binary, &PyId_close);
res = PyObject_CallMethodNoArgs(binary, &_Py_ID(close));
Py_DECREF(binary);
if (res)
Py_DECREF(res);
@ -506,7 +508,7 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
break;
}
}
res = _PyObject_CallMethodIdNoArgs(fob, &PyId_close);
res = PyObject_CallMethodNoArgs(fob, &_Py_ID(close));
if (res) {
Py_DECREF(res);
}