bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -55,17 +55,21 @@ module sys
#include "clinic/sysmodule.c.h"
_Py_IDENTIFIER(_);
_Py_IDENTIFIER(__sizeof__);
_Py_IDENTIFIER(_xoptions);
_Py_IDENTIFIER(buffer);
_Py_IDENTIFIER(builtins);
_Py_IDENTIFIER(encoding);
_Py_IDENTIFIER(path);
_Py_IDENTIFIER(stdout);
_Py_IDENTIFIER(stderr);
_Py_IDENTIFIER(warnoptions);
_Py_IDENTIFIER(write);
PyObject *
_PySys_GetAttr(PyThreadState *tstate, PyObject *name)
{
PyObject *sd = tstate->interp->sysdict;
if (sd == NULL) {
return NULL;
}
PyObject *exc_type, *exc_value, *exc_tb;
_PyErr_Fetch(tstate, &exc_type, &exc_value, &exc_tb);
/* XXX Suppress a new exception if it was raised and restore
* the old one. */
PyObject *value = _PyDict_GetItemWithError(sd, name);
_PyErr_Restore(tstate, exc_type, exc_value, exc_tb);
return value;
}
static PyObject *
sys_get_object_id(PyThreadState *tstate, _Py_Identifier *key)
@ -147,6 +151,13 @@ _PySys_SetObjectId(_Py_Identifier *key, PyObject *v)
return sys_set_object_id(interp, key, v);
}
int
_PySys_SetAttr(PyObject *key, PyObject *v)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
return sys_set_object(interp, key, v);
}
static int
sys_set_object_str(PyInterpreterState *interp, const char *name, PyObject *v)
{
@ -258,9 +269,8 @@ sys_audit_tstate(PyThreadState *ts, const char *event,
/* Disallow tracing in hooks unless explicitly enabled */
PyThreadState_EnterTracing(ts);
while ((hook = PyIter_Next(hooks)) != NULL) {
_Py_IDENTIFIER(__cantrace__);
PyObject *o;
int canTrace = _PyObject_LookupAttrId(hook, &PyId___cantrace__, &o);
int canTrace = _PyObject_LookupAttr(hook, &_Py_ID(__cantrace__), &o);
if (o) {
canTrace = PyObject_IsTrue(o);
Py_DECREF(o);
@ -631,7 +641,7 @@ sys_displayhook_unencodable(PyObject *outf, PyObject *o)
const char *stdout_encoding_str;
int ret;
stdout_encoding = _PyObject_GetAttrId(outf, &PyId_encoding);
stdout_encoding = PyObject_GetAttr(outf, &_Py_ID(encoding));
if (stdout_encoding == NULL)
goto error;
stdout_encoding_str = PyUnicode_AsUTF8(stdout_encoding);
@ -648,12 +658,12 @@ sys_displayhook_unencodable(PyObject *outf, PyObject *o)
if (encoded == NULL)
goto error;
if (_PyObject_LookupAttrId(outf, &PyId_buffer, &buffer) < 0) {
if (_PyObject_LookupAttr(outf, &_Py_ID(buffer), &buffer) < 0) {
Py_DECREF(encoded);
goto error;
}
if (buffer) {
result = _PyObject_CallMethodIdOneArg(buffer, &PyId_write, encoded);
result = PyObject_CallMethodOneArg(buffer, &_Py_ID(write), encoded);
Py_DECREF(buffer);
Py_DECREF(encoded);
if (result == NULL)
@ -699,7 +709,7 @@ sys_displayhook(PyObject *module, PyObject *o)
static PyObject *newline = NULL;
PyThreadState *tstate = _PyThreadState_GET();
builtins = _PyImport_GetModuleId(&PyId_builtins);
builtins = PyImport_GetModule(&_Py_ID(builtins));
if (builtins == NULL) {
if (!_PyErr_Occurred(tstate)) {
_PyErr_SetString(tstate, PyExc_RuntimeError,
@ -715,9 +725,9 @@ sys_displayhook(PyObject *module, PyObject *o)
if (o == Py_None) {
Py_RETURN_NONE;
}
if (_PyObject_SetAttrId(builtins, &PyId__, Py_None) != 0)
if (PyObject_SetAttr(builtins, &_Py_ID(_), Py_None) != 0)
return NULL;
outf = sys_get_object_id(tstate, &PyId_stdout);
outf = _PySys_GetAttr(tstate, &_Py_ID(stdout));
if (outf == NULL || outf == Py_None) {
_PyErr_SetString(tstate, PyExc_RuntimeError, "lost sys.stdout");
return NULL;
@ -744,7 +754,7 @@ sys_displayhook(PyObject *module, PyObject *o)
}
if (PyFile_WriteObject(newline, outf, Py_PRINT_RAW) != 0)
return NULL;
if (_PyObject_SetAttrId(builtins, &PyId__, o) != 0)
if (PyObject_SetAttr(builtins, &_Py_ID(_), o) != 0)
return NULL;
Py_RETURN_NONE;
}
@ -1676,7 +1686,7 @@ _PySys_GetSizeOf(PyObject *o)
return (size_t)-1;
}
method = _PyObject_LookupSpecial(o, &PyId___sizeof__);
method = _PyObject_LookupSpecial(o, &_Py_ID(__sizeof__));
if (method == NULL) {
if (!_PyErr_Occurred(tstate)) {
_PyErr_Format(tstate, PyExc_TypeError,
@ -2218,7 +2228,7 @@ _PySys_ReadPreinitXOptions(PyConfig *config)
static PyObject *
get_warnoptions(PyThreadState *tstate)
{
PyObject *warnoptions = sys_get_object_id(tstate, &PyId_warnoptions);
PyObject *warnoptions = _PySys_GetAttr(tstate, &_Py_ID(warnoptions));
if (warnoptions == NULL || !PyList_Check(warnoptions)) {
/* PEP432 TODO: we can reach this if warnoptions is NULL in the main
* interpreter config. When that happens, we need to properly set
@ -2234,7 +2244,7 @@ get_warnoptions(PyThreadState *tstate)
if (warnoptions == NULL) {
return NULL;
}
if (sys_set_object_id(tstate->interp, &PyId_warnoptions, warnoptions)) {
if (sys_set_object(tstate->interp, &_Py_ID(warnoptions), warnoptions)) {
Py_DECREF(warnoptions);
return NULL;
}
@ -2252,7 +2262,7 @@ PySys_ResetWarnOptions(void)
return;
}
PyObject *warnoptions = sys_get_object_id(tstate, &PyId_warnoptions);
PyObject *warnoptions = _PySys_GetAttr(tstate, &_Py_ID(warnoptions));
if (warnoptions == NULL || !PyList_Check(warnoptions))
return;
PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL);
@ -2306,7 +2316,7 @@ int
PySys_HasWarnOptions(void)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *warnoptions = sys_get_object_id(tstate, &PyId_warnoptions);
PyObject *warnoptions = _PySys_GetAttr(tstate, &_Py_ID(warnoptions));
return (warnoptions != NULL && PyList_Check(warnoptions)
&& PyList_GET_SIZE(warnoptions) > 0);
}
@ -2314,7 +2324,7 @@ PySys_HasWarnOptions(void)
static PyObject *
get_xoptions(PyThreadState *tstate)
{
PyObject *xoptions = sys_get_object_id(tstate, &PyId__xoptions);
PyObject *xoptions = _PySys_GetAttr(tstate, &_Py_ID(_xoptions));
if (xoptions == NULL || !PyDict_Check(xoptions)) {
/* PEP432 TODO: we can reach this if xoptions is NULL in the main
* interpreter config. When that happens, we need to properly set
@ -2330,7 +2340,7 @@ get_xoptions(PyThreadState *tstate)
if (xoptions == NULL) {
return NULL;
}
if (sys_set_object_id(tstate->interp, &PyId__xoptions, xoptions)) {
if (sys_set_object(tstate->interp, &_Py_ID(_xoptions), xoptions)) {
Py_DECREF(xoptions);
return NULL;
}
@ -3032,7 +3042,7 @@ _PySys_SetPreliminaryStderr(PyObject *sysdict)
if (pstderr == NULL) {
goto error;
}
if (_PyDict_SetItemId(sysdict, &PyId_stderr, pstderr) < 0) {
if (PyDict_SetItem(sysdict, &_Py_ID(stderr), pstderr) < 0) {
goto error;
}
if (PyDict_SetItemString(sysdict, "__stderr__", pstderr) < 0) {
@ -3157,7 +3167,7 @@ PySys_SetPath(const wchar_t *path)
if ((v = makepathobject(path, DELIM)) == NULL)
Py_FatalError("can't create sys.path");
PyInterpreterState *interp = _PyInterpreterState_GET();
if (sys_set_object_id(interp, &PyId_path, v) != 0) {
if (sys_set_object(interp, &_Py_ID(path), v) != 0) {
Py_FatalError("can't assign sys.path");
}
Py_DECREF(v);
@ -3214,7 +3224,7 @@ PySys_SetArgvEx(int argc, wchar_t **argv, int updatepath)
Py_FatalError("can't compute path0 from argv");
}
PyObject *sys_path = sys_get_object_id(tstate, &PyId_path);
PyObject *sys_path = _PySys_GetAttr(tstate, &_Py_ID(path));
if (sys_path != NULL) {
if (PyList_Insert(sys_path, 0, path0) < 0) {
Py_DECREF(path0);
@ -3241,7 +3251,7 @@ sys_pyfile_write_unicode(PyObject *unicode, PyObject *file)
if (file == NULL)
return -1;
assert(unicode != NULL);
PyObject *result = _PyObject_CallMethodIdOneArg(file, &PyId_write, unicode);
PyObject *result = _PyObject_CallMethodOneArg(file, &_Py_ID(write), unicode);
if (result == NULL) {
return -1;
}
@ -3296,7 +3306,7 @@ sys_pyfile_write(const char *text, PyObject *file)
*/
static void
sys_write(_Py_Identifier *key, FILE *fp, const char *format, va_list va)
sys_write(PyObject *key, FILE *fp, const char *format, va_list va)
{
PyObject *file;
PyObject *error_type, *error_value, *error_traceback;
@ -3305,7 +3315,7 @@ sys_write(_Py_Identifier *key, FILE *fp, const char *format, va_list va)
PyThreadState *tstate = _PyThreadState_GET();
_PyErr_Fetch(tstate, &error_type, &error_value, &error_traceback);
file = sys_get_object_id(tstate, key);
file = _PySys_GetAttr(tstate, key);
written = PyOS_vsnprintf(buffer, sizeof(buffer), format, va);
if (sys_pyfile_write(buffer, file) != 0) {
_PyErr_Clear(tstate);
@ -3325,7 +3335,7 @@ PySys_WriteStdout(const char *format, ...)
va_list va;
va_start(va, format);
sys_write(&PyId_stdout, stdout, format, va);
sys_write(&_Py_ID(stdout), stdout, format, va);
va_end(va);
}
@ -3335,12 +3345,12 @@ PySys_WriteStderr(const char *format, ...)
va_list va;
va_start(va, format);
sys_write(&PyId_stderr, stderr, format, va);
sys_write(&_Py_ID(stderr), stderr, format, va);
va_end(va);
}
static void
sys_format(_Py_Identifier *key, FILE *fp, const char *format, va_list va)
sys_format(PyObject *key, FILE *fp, const char *format, va_list va)
{
PyObject *file, *message;
PyObject *error_type, *error_value, *error_traceback;
@ -3348,7 +3358,7 @@ sys_format(_Py_Identifier *key, FILE *fp, const char *format, va_list va)
PyThreadState *tstate = _PyThreadState_GET();
_PyErr_Fetch(tstate, &error_type, &error_value, &error_traceback);
file = sys_get_object_id(tstate, key);
file = _PySys_GetAttr(tstate, key);
message = PyUnicode_FromFormatV(format, va);
if (message != NULL) {
if (sys_pyfile_write_unicode(message, file) != 0) {
@ -3368,7 +3378,7 @@ PySys_FormatStdout(const char *format, ...)
va_list va;
va_start(va, format);
sys_format(&PyId_stdout, stdout, format, va);
sys_format(&_Py_ID(stdout), stdout, format, va);
va_end(va);
}
@ -3378,6 +3388,6 @@ PySys_FormatStderr(const char *format, ...)
va_list va;
va_start(va, format);
sys_format(&PyId_stderr, stderr, format, va);
sys_format(&_Py_ID(stderr), stderr, format, va);
va_end(va);
}