bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)

We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-08 13:39:07 -07:00 committed by GitHub
parent c018d3037b
commit 81c72044a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
108 changed files with 2282 additions and 1573 deletions

View file

@ -82,7 +82,6 @@ extern "C" {
.ob_digit = { ((val) >= 0 ? (val) : -(val)) }, \
}
#define _PyBytes_SIMPLE_INIT(CH, LEN) \
{ \
_PyVarObject_IMMORTAL_INIT(&PyBytes_Type, LEN), \
@ -94,6 +93,26 @@ extern "C" {
_PyBytes_SIMPLE_INIT(CH, 1) \
}
#define _PyASCIIObject_INIT(LITERAL) \
{ \
._ascii = { \
.ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \
.length = sizeof(LITERAL) - 1, \
.hash = -1, \
.state = { \
.kind = 1, \
.compact = 1, \
.ascii = 1, \
.ready = 1, \
}, \
}, \
._data = LITERAL, \
}
#define INIT_STR(NAME, LITERAL) \
._ ## NAME = _PyASCIIObject_INIT(LITERAL)
#define INIT_ID(NAME) \
._ ## NAME = _PyASCIIObject_INIT(#NAME)
/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
#define _Py_global_objects_INIT { \
@ -622,6 +641,298 @@ extern "C" {
_PyBytes_CHAR_INIT(254), \
_PyBytes_CHAR_INIT(255), \
}, \
\
.strings = { \
.literals = { \
INIT_STR(empty, ""), \
INIT_STR(dot, "."), \
INIT_STR(comma_sep, ", "), \
INIT_STR(percent, "%"), \
INIT_STR(dbl_percent, "%%"), \
\
INIT_STR(anon_dictcomp, "<dictcomp>"), \
INIT_STR(anon_genexpr, "<genexpr>"), \
INIT_STR(anon_lambda, "<lambda>"), \
INIT_STR(anon_listcomp, "<listcomp>"), \
INIT_STR(anon_module, "<module>"), \
INIT_STR(anon_setcomp, "<setcomp>"), \
INIT_STR(anon_string, "<string>"), \
INIT_STR(dot_locals, ".<locals>"), \
}, \
.identifiers = { \
INIT_ID(Py_Repr), \
INIT_ID(TextIOWrapper), \
INIT_ID(WarningMessage), \
INIT_ID(_), \
INIT_ID(__IOBase_closed), \
INIT_ID(__abc_tpflags__), \
INIT_ID(__abs__), \
INIT_ID(__abstractmethods__), \
INIT_ID(__add__), \
INIT_ID(__aenter__), \
INIT_ID(__aexit__), \
INIT_ID(__aiter__), \
INIT_ID(__all__), \
INIT_ID(__and__), \
INIT_ID(__anext__), \
INIT_ID(__annotations__), \
INIT_ID(__args__), \
INIT_ID(__await__), \
INIT_ID(__bases__), \
INIT_ID(__bool__), \
INIT_ID(__build_class__), \
INIT_ID(__builtins__), \
INIT_ID(__bytes__), \
INIT_ID(__call__), \
INIT_ID(__cantrace__), \
INIT_ID(__class__), \
INIT_ID(__class_getitem__), \
INIT_ID(__classcell__), \
INIT_ID(__complex__), \
INIT_ID(__contains__), \
INIT_ID(__copy__), \
INIT_ID(__del__), \
INIT_ID(__delattr__), \
INIT_ID(__delete__), \
INIT_ID(__delitem__), \
INIT_ID(__dict__), \
INIT_ID(__dir__), \
INIT_ID(__divmod__), \
INIT_ID(__doc__), \
INIT_ID(__enter__), \
INIT_ID(__eq__), \
INIT_ID(__exit__), \
INIT_ID(__file__), \
INIT_ID(__float__), \
INIT_ID(__floordiv__), \
INIT_ID(__format__), \
INIT_ID(__fspath__), \
INIT_ID(__ge__), \
INIT_ID(__get__), \
INIT_ID(__getattr__), \
INIT_ID(__getattribute__), \
INIT_ID(__getinitargs__), \
INIT_ID(__getitem__), \
INIT_ID(__getnewargs__), \
INIT_ID(__getnewargs_ex__), \
INIT_ID(__getstate__), \
INIT_ID(__gt__), \
INIT_ID(__hash__), \
INIT_ID(__iadd__), \
INIT_ID(__iand__), \
INIT_ID(__ifloordiv__), \
INIT_ID(__ilshift__), \
INIT_ID(__imatmul__), \
INIT_ID(__imod__), \
INIT_ID(__import__), \
INIT_ID(__imul__), \
INIT_ID(__index__), \
INIT_ID(__init__), \
INIT_ID(__init_subclass__), \
INIT_ID(__instancecheck__), \
INIT_ID(__int__), \
INIT_ID(__invert__), \
INIT_ID(__ior__), \
INIT_ID(__ipow__), \
INIT_ID(__irshift__), \
INIT_ID(__isabstractmethod__), \
INIT_ID(__isub__), \
INIT_ID(__iter__), \
INIT_ID(__itruediv__), \
INIT_ID(__ixor__), \
INIT_ID(__le__), \
INIT_ID(__len__), \
INIT_ID(__length_hint__), \
INIT_ID(__loader__), \
INIT_ID(__lshift__), \
INIT_ID(__lt__), \
INIT_ID(__ltrace__), \
INIT_ID(__main__), \
INIT_ID(__matmul__), \
INIT_ID(__missing__), \
INIT_ID(__mod__), \
INIT_ID(__module__), \
INIT_ID(__mro_entries__), \
INIT_ID(__mul__), \
INIT_ID(__name__), \
INIT_ID(__ne__), \
INIT_ID(__neg__), \
INIT_ID(__new__), \
INIT_ID(__newobj__), \
INIT_ID(__newobj_ex__), \
INIT_ID(__next__), \
INIT_ID(__note__), \
INIT_ID(__or__), \
INIT_ID(__origin__), \
INIT_ID(__package__), \
INIT_ID(__parameters__), \
INIT_ID(__path__), \
INIT_ID(__pos__), \
INIT_ID(__pow__), \
INIT_ID(__prepare__), \
INIT_ID(__qualname__), \
INIT_ID(__radd__), \
INIT_ID(__rand__), \
INIT_ID(__rdivmod__), \
INIT_ID(__reduce__), \
INIT_ID(__reduce_ex__), \
INIT_ID(__repr__), \
INIT_ID(__reversed__), \
INIT_ID(__rfloordiv__), \
INIT_ID(__rlshift__), \
INIT_ID(__rmatmul__), \
INIT_ID(__rmod__), \
INIT_ID(__rmul__), \
INIT_ID(__ror__), \
INIT_ID(__round__), \
INIT_ID(__rpow__), \
INIT_ID(__rrshift__), \
INIT_ID(__rshift__), \
INIT_ID(__rsub__), \
INIT_ID(__rtruediv__), \
INIT_ID(__rxor__), \
INIT_ID(__set__), \
INIT_ID(__set_name__), \
INIT_ID(__setattr__), \
INIT_ID(__setitem__), \
INIT_ID(__setstate__), \
INIT_ID(__sizeof__), \
INIT_ID(__slotnames__), \
INIT_ID(__slots__), \
INIT_ID(__spec__), \
INIT_ID(__str__), \
INIT_ID(__sub__), \
INIT_ID(__subclasscheck__), \
INIT_ID(__subclasshook__), \
INIT_ID(__truediv__), \
INIT_ID(__trunc__), \
INIT_ID(__warningregistry__), \
INIT_ID(__weakref__), \
INIT_ID(__xor__), \
INIT_ID(_abc_impl), \
INIT_ID(_blksize), \
INIT_ID(_dealloc_warn), \
INIT_ID(_finalizing), \
INIT_ID(_find_and_load), \
INIT_ID(_fix_up_module), \
INIT_ID(_get_sourcefile), \
INIT_ID(_handle_fromlist), \
INIT_ID(_initializing), \
INIT_ID(_is_text_encoding), \
INIT_ID(_lock_unlock_module), \
INIT_ID(_showwarnmsg), \
INIT_ID(_shutdown), \
INIT_ID(_slotnames), \
INIT_ID(_strptime_time), \
INIT_ID(_uninitialized_submodules), \
INIT_ID(_warn_unawaited_coroutine), \
INIT_ID(_xoptions), \
INIT_ID(add), \
INIT_ID(append), \
INIT_ID(big), \
INIT_ID(buffer), \
INIT_ID(builtins), \
INIT_ID(clear), \
INIT_ID(close), \
INIT_ID(code), \
INIT_ID(copy), \
INIT_ID(copyreg), \
INIT_ID(decode), \
INIT_ID(default), \
INIT_ID(defaultaction), \
INIT_ID(difference_update), \
INIT_ID(dispatch_table), \
INIT_ID(displayhook), \
INIT_ID(enable), \
INIT_ID(encoding), \
INIT_ID(end_lineno), \
INIT_ID(end_offset), \
INIT_ID(errors), \
INIT_ID(excepthook), \
INIT_ID(extend), \
INIT_ID(filename), \
INIT_ID(fileno), \
INIT_ID(fillvalue), \
INIT_ID(filters), \
INIT_ID(find_class), \
INIT_ID(flush), \
INIT_ID(get), \
INIT_ID(get_source), \
INIT_ID(getattr), \
INIT_ID(ignore), \
INIT_ID(importlib), \
INIT_ID(intersection), \
INIT_ID(isatty), \
INIT_ID(items), \
INIT_ID(iter), \
INIT_ID(keys), \
INIT_ID(last_traceback), \
INIT_ID(last_type), \
INIT_ID(last_value), \
INIT_ID(latin1), \
INIT_ID(lineno), \
INIT_ID(little), \
INIT_ID(match), \
INIT_ID(metaclass), \
INIT_ID(mode), \
INIT_ID(modules), \
INIT_ID(mro), \
INIT_ID(msg), \
INIT_ID(n_fields), \
INIT_ID(n_sequence_fields), \
INIT_ID(n_unnamed_fields), \
INIT_ID(name), \
INIT_ID(obj), \
INIT_ID(offset), \
INIT_ID(onceregistry), \
INIT_ID(open), \
INIT_ID(parent), \
INIT_ID(partial), \
INIT_ID(path), \
INIT_ID(peek), \
INIT_ID(persistent_id), \
INIT_ID(persistent_load), \
INIT_ID(print_file_and_line), \
INIT_ID(ps1), \
INIT_ID(ps2), \
INIT_ID(raw), \
INIT_ID(read), \
INIT_ID(read1), \
INIT_ID(readable), \
INIT_ID(readall), \
INIT_ID(readinto), \
INIT_ID(readinto1), \
INIT_ID(readline), \
INIT_ID(reducer_override), \
INIT_ID(reload), \
INIT_ID(replace), \
INIT_ID(reset), \
INIT_ID(return), \
INIT_ID(reversed), \
INIT_ID(seek), \
INIT_ID(seekable), \
INIT_ID(send), \
INIT_ID(setstate), \
INIT_ID(sort), \
INIT_ID(stderr), \
INIT_ID(stdin), \
INIT_ID(stdout), \
INIT_ID(strict), \
INIT_ID(symmetric_difference_update), \
INIT_ID(tell), \
INIT_ID(text), \
INIT_ID(threading), \
INIT_ID(throw), \
INIT_ID(unraisablehook), \
INIT_ID(values), \
INIT_ID(version), \
INIT_ID(warnings), \
INIT_ID(warnoptions), \
INIT_ID(writable), \
INIT_ID(write), \
INIT_ID(zipimporter), \
}, \
}, \
}, \
}
/* End auto-generated code */