mirror of
https://github.com/python/cpython.git
synced 2025-08-24 18:55:00 +00:00
bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)
We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code. It is still used in a number of non-builtin stdlib modules. The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime. A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings). https://bugs.python.org/issue46541#msg411799 explains the rationale for this change. The core of the change is in: * (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros * Include/internal/pycore_runtime_init.h - added the static initializers for the global strings * Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState * Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings. That check is added to the PR CI config. The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()). This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *. The following are not changed (yet): * stop using _Py_IDENTIFIER() in the stdlib modules * (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API * (maybe) intern the strings during runtime init https://bugs.python.org/issue46541
This commit is contained in:
parent
c018d3037b
commit
81c72044a1
108 changed files with 2282 additions and 1573 deletions
|
@ -260,11 +260,7 @@ get_unicode_state(void)
|
|||
// Return a borrowed reference to the empty string singleton.
|
||||
static inline PyObject* unicode_get_empty(void)
|
||||
{
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
// unicode_get_empty() must not be called before _PyUnicode_Init()
|
||||
// or after _PyUnicode_Fini()
|
||||
assert(state->empty_string != NULL);
|
||||
return state->empty_string;
|
||||
return &_Py_STR(empty);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1388,25 +1384,6 @@ _PyUnicode_Dump(PyObject *op)
|
|||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
unicode_create_empty_string_singleton(struct _Py_unicode_state *state)
|
||||
{
|
||||
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
|
||||
// optimized to always use state->empty_string without having to check if
|
||||
// it is NULL or not.
|
||||
PyObject *empty = PyUnicode_New(1, 0);
|
||||
if (empty == NULL) {
|
||||
return -1;
|
||||
}
|
||||
PyUnicode_1BYTE_DATA(empty)[0] = 0;
|
||||
_PyUnicode_LENGTH(empty) = 0;
|
||||
assert(_PyUnicode_CheckConsistency(empty, 1));
|
||||
|
||||
assert(state->empty_string == NULL);
|
||||
state->empty_string = empty;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||
|
@ -2009,10 +1986,11 @@ unicode_dealloc(PyObject *unicode)
|
|||
static int
|
||||
unicode_is_singleton(PyObject *unicode)
|
||||
{
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
if (unicode == state->empty_string) {
|
||||
if (unicode == &_Py_STR(empty)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
|
||||
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
||||
|
@ -15551,11 +15529,14 @@ _PyUnicode_InitState(PyInterpreterState *interp)
|
|||
PyStatus
|
||||
_PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
|
||||
{
|
||||
struct _Py_unicode_state *state = &interp->unicode;
|
||||
if (unicode_create_empty_string_singleton(state) < 0) {
|
||||
return _PyStatus_NO_MEMORY();
|
||||
if (!_Py_IsMainInterpreter(interp)) {
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
|
||||
#endif
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
|
@ -15798,15 +15779,14 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
|
|||
static PyObject *
|
||||
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
_Py_IDENTIFIER(iter);
|
||||
if (it->it_seq != NULL) {
|
||||
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
|
||||
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
|
||||
it->it_seq, it->it_index);
|
||||
} else {
|
||||
PyObject *u = (PyObject *)_PyUnicode_New(0);
|
||||
if (u == NULL)
|
||||
return NULL;
|
||||
return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
|
||||
return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16137,7 +16117,6 @@ _PyUnicode_Fini(PyInterpreterState *interp)
|
|||
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||
Py_CLEAR(state->latin1[i]);
|
||||
}
|
||||
Py_CLEAR(state->empty_string);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue