mirror of
https://github.com/python/cpython.git
synced 2025-07-18 16:55:20 +00:00
bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)
We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code. It is still used in a number of non-builtin stdlib modules. The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime. A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings). https://bugs.python.org/issue46541#msg411799 explains the rationale for this change. The core of the change is in: * (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros * Include/internal/pycore_runtime_init.h - added the static initializers for the global strings * Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState * Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings. That check is added to the PR CI config. The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()). This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *. The following are not changed (yet): * stop using _Py_IDENTIFIER() in the stdlib modules * (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API * (maybe) intern the strings during runtime init https://bugs.python.org/issue46541
This commit is contained in:
parent
c018d3037b
commit
81c72044a1
108 changed files with 2282 additions and 1573 deletions
|
@ -1,9 +1,7 @@
|
|||
import argparse
|
||||
import ast
|
||||
import builtins
|
||||
import collections
|
||||
import contextlib
|
||||
import glob
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
|
@ -12,6 +10,298 @@ ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
|||
INTERNAL = os.path.join(ROOT, 'Include', 'internal')
|
||||
|
||||
|
||||
STRING_LITERALS = {
|
||||
'empty': '',
|
||||
'dot': '.',
|
||||
'comma_sep': ', ',
|
||||
'percent': '%',
|
||||
'dbl_percent': '%%',
|
||||
|
||||
'"anonymous" labels': None,
|
||||
'anon_dictcomp': '<dictcomp>',
|
||||
'anon_genexpr': '<genexpr>',
|
||||
'anon_lambda': '<lambda>',
|
||||
'anon_listcomp': '<listcomp>',
|
||||
'anon_module': '<module>',
|
||||
'anon_setcomp': '<setcomp>',
|
||||
'anon_string': '<string>',
|
||||
'dot_locals': '.<locals>',
|
||||
}
|
||||
IDENTIFIERS = [
|
||||
'Py_Repr',
|
||||
'TextIOWrapper',
|
||||
'WarningMessage',
|
||||
'_',
|
||||
'__IOBase_closed',
|
||||
'__abc_tpflags__',
|
||||
'__abs__',
|
||||
'__abstractmethods__',
|
||||
'__add__',
|
||||
'__aenter__',
|
||||
'__aexit__',
|
||||
'__aiter__',
|
||||
'__all__',
|
||||
'__and__',
|
||||
'__anext__',
|
||||
'__annotations__',
|
||||
'__args__',
|
||||
'__await__',
|
||||
'__bases__',
|
||||
'__bool__',
|
||||
'__build_class__',
|
||||
'__builtins__',
|
||||
'__bytes__',
|
||||
'__call__',
|
||||
'__cantrace__',
|
||||
'__class__',
|
||||
'__class_getitem__',
|
||||
'__classcell__',
|
||||
'__complex__',
|
||||
'__contains__',
|
||||
'__copy__',
|
||||
'__del__',
|
||||
'__delattr__',
|
||||
'__delete__',
|
||||
'__delitem__',
|
||||
'__dict__',
|
||||
'__dir__',
|
||||
'__divmod__',
|
||||
'__doc__',
|
||||
'__enter__',
|
||||
'__eq__',
|
||||
'__exit__',
|
||||
'__file__',
|
||||
'__float__',
|
||||
'__floordiv__',
|
||||
'__format__',
|
||||
'__fspath__',
|
||||
'__ge__',
|
||||
'__get__',
|
||||
'__getattr__',
|
||||
'__getattribute__',
|
||||
'__getinitargs__',
|
||||
'__getitem__',
|
||||
'__getnewargs__',
|
||||
'__getnewargs_ex__',
|
||||
'__getstate__',
|
||||
'__gt__',
|
||||
'__hash__',
|
||||
'__iadd__',
|
||||
'__iand__',
|
||||
'__ifloordiv__',
|
||||
'__ilshift__',
|
||||
'__imatmul__',
|
||||
'__imod__',
|
||||
'__import__',
|
||||
'__imul__',
|
||||
'__index__',
|
||||
'__init__',
|
||||
'__init_subclass__',
|
||||
'__instancecheck__',
|
||||
'__int__',
|
||||
'__invert__',
|
||||
'__ior__',
|
||||
'__ipow__',
|
||||
'__irshift__',
|
||||
'__isabstractmethod__',
|
||||
'__isub__',
|
||||
'__iter__',
|
||||
'__itruediv__',
|
||||
'__ixor__',
|
||||
'__le__',
|
||||
'__len__',
|
||||
'__length_hint__',
|
||||
'__loader__',
|
||||
'__lshift__',
|
||||
'__lt__',
|
||||
'__ltrace__',
|
||||
'__main__',
|
||||
'__matmul__',
|
||||
'__missing__',
|
||||
'__mod__',
|
||||
'__module__',
|
||||
'__mro_entries__',
|
||||
'__mul__',
|
||||
'__name__',
|
||||
'__ne__',
|
||||
'__neg__',
|
||||
'__new__',
|
||||
'__newobj__',
|
||||
'__newobj_ex__',
|
||||
'__next__',
|
||||
'__note__',
|
||||
'__or__',
|
||||
'__origin__',
|
||||
'__package__',
|
||||
'__parameters__',
|
||||
'__path__',
|
||||
'__pos__',
|
||||
'__pow__',
|
||||
'__prepare__',
|
||||
'__qualname__',
|
||||
'__radd__',
|
||||
'__rand__',
|
||||
'__rdivmod__',
|
||||
'__reduce__',
|
||||
'__reduce_ex__',
|
||||
'__repr__',
|
||||
'__reversed__',
|
||||
'__rfloordiv__',
|
||||
'__rlshift__',
|
||||
'__rmatmul__',
|
||||
'__rmod__',
|
||||
'__rmul__',
|
||||
'__ror__',
|
||||
'__round__',
|
||||
'__rpow__',
|
||||
'__rrshift__',
|
||||
'__rshift__',
|
||||
'__rsub__',
|
||||
'__rtruediv__',
|
||||
'__rxor__',
|
||||
'__set__',
|
||||
'__set_name__',
|
||||
'__setattr__',
|
||||
'__setitem__',
|
||||
'__setstate__',
|
||||
'__sizeof__',
|
||||
'__slotnames__',
|
||||
'__slots__',
|
||||
'__spec__',
|
||||
'__str__',
|
||||
'__sub__',
|
||||
'__subclasscheck__',
|
||||
'__subclasshook__',
|
||||
'__truediv__',
|
||||
'__trunc__',
|
||||
'__warningregistry__',
|
||||
'__weakref__',
|
||||
'__xor__',
|
||||
'_abc_impl',
|
||||
'_blksize',
|
||||
'_dealloc_warn',
|
||||
'_finalizing',
|
||||
'_find_and_load',
|
||||
'_fix_up_module',
|
||||
'_get_sourcefile',
|
||||
'_handle_fromlist',
|
||||
'_initializing',
|
||||
'_is_text_encoding',
|
||||
'_lock_unlock_module',
|
||||
'_showwarnmsg',
|
||||
'_shutdown',
|
||||
'_slotnames',
|
||||
'_strptime_time',
|
||||
'_uninitialized_submodules',
|
||||
'_warn_unawaited_coroutine',
|
||||
'_xoptions',
|
||||
'add',
|
||||
'append',
|
||||
'big',
|
||||
'buffer',
|
||||
'builtins',
|
||||
'clear',
|
||||
'close',
|
||||
'code',
|
||||
'copy',
|
||||
'copyreg',
|
||||
'decode',
|
||||
'default',
|
||||
'defaultaction',
|
||||
'difference_update',
|
||||
'dispatch_table',
|
||||
'displayhook',
|
||||
'enable',
|
||||
'encoding',
|
||||
'end_lineno',
|
||||
'end_offset',
|
||||
'errors',
|
||||
'excepthook',
|
||||
'extend',
|
||||
'filename',
|
||||
'fileno',
|
||||
'fillvalue',
|
||||
'filters',
|
||||
'find_class',
|
||||
'flush',
|
||||
'get',
|
||||
'get_source',
|
||||
'getattr',
|
||||
'ignore',
|
||||
'importlib',
|
||||
'intersection',
|
||||
'isatty',
|
||||
'items',
|
||||
'iter',
|
||||
'keys',
|
||||
'last_traceback',
|
||||
'last_type',
|
||||
'last_value',
|
||||
'latin1',
|
||||
'lineno',
|
||||
'little',
|
||||
'match',
|
||||
'metaclass',
|
||||
'mode',
|
||||
'modules',
|
||||
'mro',
|
||||
'msg',
|
||||
'n_fields',
|
||||
'n_sequence_fields',
|
||||
'n_unnamed_fields',
|
||||
'name',
|
||||
'obj',
|
||||
'offset',
|
||||
'onceregistry',
|
||||
'open',
|
||||
'parent',
|
||||
'partial',
|
||||
'path',
|
||||
'peek',
|
||||
'persistent_id',
|
||||
'persistent_load',
|
||||
'print_file_and_line',
|
||||
'ps1',
|
||||
'ps2',
|
||||
'raw',
|
||||
'read',
|
||||
'read1',
|
||||
'readable',
|
||||
'readall',
|
||||
'readinto',
|
||||
'readinto1',
|
||||
'readline',
|
||||
'reducer_override',
|
||||
'reload',
|
||||
'replace',
|
||||
'reset',
|
||||
'return',
|
||||
'reversed',
|
||||
'seek',
|
||||
'seekable',
|
||||
'send',
|
||||
'setstate',
|
||||
'sort',
|
||||
'stderr',
|
||||
'stdin',
|
||||
'stdout',
|
||||
'strict',
|
||||
'symmetric_difference_update',
|
||||
'tell',
|
||||
'text',
|
||||
'threading',
|
||||
'throw',
|
||||
'unraisablehook',
|
||||
'values',
|
||||
'version',
|
||||
'warnings',
|
||||
'warnoptions',
|
||||
'writable',
|
||||
'write',
|
||||
'zipimporter',
|
||||
]
|
||||
|
||||
|
||||
#######################################
|
||||
# helpers
|
||||
|
||||
|
@ -64,6 +354,38 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
|
|||
END = '/* End auto-generated code */'
|
||||
|
||||
|
||||
def generate_global_strings():
|
||||
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
|
||||
|
||||
# Read the non-generated part of the file.
|
||||
with open(filename) as infile:
|
||||
before = ''.join(iter_to_marker(infile, START))[:-1]
|
||||
for _ in iter_to_marker(infile, END):
|
||||
pass
|
||||
after = infile.read()[:-1]
|
||||
|
||||
# Generate the file.
|
||||
with open(filename, 'w', encoding='utf-8') as outfile:
|
||||
printer = Printer(outfile)
|
||||
printer.write(before)
|
||||
printer.write(START)
|
||||
with printer.block('struct _Py_global_strings', ';'):
|
||||
with printer.block('struct', ' literals;'):
|
||||
for name, literal in STRING_LITERALS.items():
|
||||
if literal is None:
|
||||
outfile.write('\n')
|
||||
printer.write(f'// {name}')
|
||||
else:
|
||||
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
|
||||
outfile.write('\n')
|
||||
with printer.block('struct', ' identifiers;'):
|
||||
for name in sorted(IDENTIFIERS):
|
||||
assert name.isidentifier(), name
|
||||
printer.write(f'STRUCT_FOR_ID({name})')
|
||||
printer.write(END)
|
||||
printer.write(after)
|
||||
|
||||
|
||||
def generate_runtime_init():
|
||||
# First get some info from the declarations.
|
||||
nsmallposints = None
|
||||
|
@ -106,19 +428,174 @@ def generate_runtime_init():
|
|||
with printer.block('.bytes_characters =', ','):
|
||||
for i in range(256):
|
||||
printer.write(f'_PyBytes_CHAR_INIT({i}),')
|
||||
printer.write('')
|
||||
# Global strings.
|
||||
with printer.block('.strings =', ','):
|
||||
with printer.block('.literals =', ','):
|
||||
for name, literal in STRING_LITERALS.items():
|
||||
if literal is None:
|
||||
printer.write('')
|
||||
else:
|
||||
printer.write(f'INIT_STR({name}, "{literal}"),')
|
||||
with printer.block('.identifiers =', ','):
|
||||
for name in sorted(IDENTIFIERS):
|
||||
assert name.isidentifier(), name
|
||||
printer.write(f'INIT_ID({name}),')
|
||||
printer.write(END)
|
||||
printer.write(after)
|
||||
|
||||
|
||||
#######################################
|
||||
# checks
|
||||
|
||||
def err(msg):
|
||||
print(msg, file=sys.stderr)
|
||||
|
||||
|
||||
GETTER_RE = re.compile(r'''
|
||||
^
|
||||
.*?
|
||||
(?:
|
||||
(?:
|
||||
_Py_ID
|
||||
[(]
|
||||
( \w+ ) # <identifier>
|
||||
[)]
|
||||
)
|
||||
|
|
||||
(?:
|
||||
_Py_STR
|
||||
[(]
|
||||
( \w+ ) # <literal>
|
||||
[)]
|
||||
)
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
TYPESLOTS_RE = re.compile(r'''
|
||||
^
|
||||
.*?
|
||||
(?:
|
||||
(?:
|
||||
SLOT0 [(] .*?, \s*
|
||||
( \w+ ) # <slot0>
|
||||
[)]
|
||||
)
|
||||
|
|
||||
(?:
|
||||
SLOT1 [(] .*?, \s*
|
||||
( \w+ ) # <slot1>
|
||||
, .* [)]
|
||||
)
|
||||
|
|
||||
(?:
|
||||
SLOT1BIN [(] .*?, .*?, \s*
|
||||
( \w+ ) # <slot1bin>
|
||||
, \s*
|
||||
( \w+ ) # <reverse>
|
||||
[)]
|
||||
)
|
||||
|
|
||||
(?:
|
||||
SLOT1BINFULL [(] .*?, .*?, .*?, \s*
|
||||
( \w+ ) # <slot1binfull>
|
||||
, \s*
|
||||
( \w+ ) # <fullreverse>
|
||||
[)]
|
||||
)
|
||||
|
|
||||
( SLOT \d .* [^)] $ ) # <wrapped>
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
def check_orphan_strings():
|
||||
literals = set(n for n, s in STRING_LITERALS.items() if s)
|
||||
identifiers = set(IDENTIFIERS)
|
||||
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
|
||||
for i, filename in enumerate(files, start=1):
|
||||
print('.', end='')
|
||||
if i % 5 == 0:
|
||||
print(' ', end='')
|
||||
if i % 20 == 0:
|
||||
print()
|
||||
if i % 100 == 0:
|
||||
print()
|
||||
with open(filename) as infile:
|
||||
wrapped = None
|
||||
for line in infile:
|
||||
identifier = literal = reverse = None
|
||||
|
||||
line = line.splitlines()[0]
|
||||
if wrapped:
|
||||
line = f'{wrapped.rstrip()} {line}'
|
||||
wrapped = None
|
||||
|
||||
if os.path.basename(filename) == '_warnings.c':
|
||||
m = re.match(r'^.* = GET_WARNINGS_ATTR[(][^,]*, (\w+),', line)
|
||||
if m:
|
||||
identifier, = m.groups()
|
||||
elif os.path.basename(filename) == 'typeobject.c':
|
||||
m = TYPESLOTS_RE.match(line)
|
||||
if m:
|
||||
(slot0,
|
||||
slot1,
|
||||
slot1bin, reverse,
|
||||
slot1binfull, fullreverse,
|
||||
wrapped,
|
||||
) = m.groups()
|
||||
identifier = slot0 or slot1 or slot1bin or slot1binfull
|
||||
reverse = reverse or fullreverse
|
||||
|
||||
if not identifier and not literal:
|
||||
m = GETTER_RE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
identifier, literal = m.groups()
|
||||
|
||||
if literal:
|
||||
if literals and literal in literals:
|
||||
literals.remove(literal)
|
||||
if identifier:
|
||||
if identifiers and identifier in identifiers:
|
||||
identifiers.remove(identifier)
|
||||
if reverse:
|
||||
if identifiers and reverse in identifiers:
|
||||
identifiers.remove(reverse)
|
||||
if not literals and not identifiers:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
if i % 20:
|
||||
print()
|
||||
if not literals and not identifiers:
|
||||
return
|
||||
print('ERROR:', file=sys.stderr)
|
||||
if literals:
|
||||
err(' unused global string literals:')
|
||||
for name in sorted(literals):
|
||||
err(f' {name}')
|
||||
if identifiers:
|
||||
if literals:
|
||||
print()
|
||||
err(' unused global identifiers:')
|
||||
for name in sorted(identifiers):
|
||||
err(f' {name}')
|
||||
|
||||
|
||||
#######################################
|
||||
# the script
|
||||
|
||||
def main() -> None:
|
||||
def main(*, check=False) -> None:
|
||||
generate_global_strings()
|
||||
generate_runtime_init()
|
||||
|
||||
if check:
|
||||
check_orphan_strings()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
argv = sys.argv[1:]
|
||||
if argv:
|
||||
sys.exit(f'ERROR: got unexpected args {argv}')
|
||||
main()
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--check', action='store_true')
|
||||
args = parser.parse_args()
|
||||
main(**vars(args))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue