mirror of
https://github.com/python/cpython.git
synced 2025-07-19 01:05:26 +00:00
bpo-46541: Discover the global strings. (gh-31346)
Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files. This is partly inspired by gh-31261. https://bugs.python.org/issue46541
This commit is contained in:
parent
278fdd3e3a
commit
12360aa159
9 changed files with 103 additions and 274 deletions
|
@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal')
|
|||
STRING_LITERALS = {
|
||||
'empty': '',
|
||||
'dot': '.',
|
||||
'comma_sep': ', ',
|
||||
'percent': '%',
|
||||
'dbl_percent': '%%',
|
||||
|
||||
'"anonymous" labels': None,
|
||||
'anon_dictcomp': '<dictcomp>',
|
||||
'anon_genexpr': '<genexpr>',
|
||||
'anon_lambda': '<lambda>',
|
||||
'anon_listcomp': '<listcomp>',
|
||||
'anon_module': '<module>',
|
||||
'anon_setcomp': '<setcomp>',
|
||||
'anon_string': '<string>',
|
||||
'dot_locals': '.<locals>',
|
||||
}
|
||||
IGNORED = {
|
||||
'ACTION', # Python/_warnings.c
|
||||
'ATTR', # Python/_warnings.c and Objects/funcobject.c
|
||||
'DUNDER', # Objects/typeobject.c
|
||||
'RDUNDER', # Objects/typeobject.c
|
||||
'SPECIAL', # Objects/weakrefobject.c
|
||||
}
|
||||
IDENTIFIERS = [
|
||||
'Py_Repr',
|
||||
'TextIOWrapper',
|
||||
# from ADD() Python/_warnings.c
|
||||
'default',
|
||||
'ignore',
|
||||
|
||||
# from GET_WARNINGS_ATTR() in Python/_warnings.c
|
||||
'WarningMessage',
|
||||
'_',
|
||||
'__IOBase_closed',
|
||||
'__abc_tpflags__',
|
||||
'__abs__',
|
||||
'__abstractmethods__',
|
||||
'__add__',
|
||||
'__aenter__',
|
||||
'__aexit__',
|
||||
'__aiter__',
|
||||
'__all__',
|
||||
'__and__',
|
||||
'__anext__',
|
||||
'__annotations__',
|
||||
'__args__',
|
||||
'__await__',
|
||||
'__bases__',
|
||||
'__bool__',
|
||||
'__build_class__',
|
||||
'__builtins__',
|
||||
'_showwarnmsg',
|
||||
'_warn_unawaited_coroutine',
|
||||
'defaultaction',
|
||||
'filters',
|
||||
'onceregistry',
|
||||
|
||||
# from WRAP_METHOD() in Objects/weakrefobject.c
|
||||
'__bytes__',
|
||||
'__call__',
|
||||
'__cantrace__',
|
||||
'__class__',
|
||||
'__class_getitem__',
|
||||
'__classcell__',
|
||||
'__complex__',
|
||||
'__contains__',
|
||||
'__copy__',
|
||||
'__del__',
|
||||
'__delattr__',
|
||||
'__delete__',
|
||||
'__delitem__',
|
||||
'__dict__',
|
||||
'__dir__',
|
||||
'__divmod__',
|
||||
'__reversed__',
|
||||
|
||||
# from COPY_ATTR() in Objects/funcobject.c
|
||||
'__module__',
|
||||
'__name__',
|
||||
'__qualname__',
|
||||
'__doc__',
|
||||
'__enter__',
|
||||
'__eq__',
|
||||
'__exit__',
|
||||
'__file__',
|
||||
'__annotations__',
|
||||
|
||||
# from SLOT* in Objects/typeobject.c
|
||||
'__abs__',
|
||||
'__add__',
|
||||
'__and__',
|
||||
'__divmod__',
|
||||
'__float__',
|
||||
'__floordiv__',
|
||||
'__format__',
|
||||
'__fspath__',
|
||||
'__ge__',
|
||||
'__get__',
|
||||
'__getattr__',
|
||||
'__getattribute__',
|
||||
'__getinitargs__',
|
||||
'__getitem__',
|
||||
'__getnewargs__',
|
||||
'__getnewargs_ex__',
|
||||
'__getstate__',
|
||||
'__gt__',
|
||||
'__hash__',
|
||||
'__iadd__',
|
||||
'__iand__',
|
||||
'__ifloordiv__',
|
||||
'__ilshift__',
|
||||
'__imatmul__',
|
||||
'__imod__',
|
||||
'__import__',
|
||||
'__imul__',
|
||||
'__index__',
|
||||
'__init__',
|
||||
'__init_subclass__',
|
||||
'__instancecheck__',
|
||||
'__int__',
|
||||
'__invert__',
|
||||
'__ior__',
|
||||
'__ipow__',
|
||||
'__irshift__',
|
||||
'__isabstractmethod__',
|
||||
'__isub__',
|
||||
'__iter__',
|
||||
'__itruediv__',
|
||||
'__ixor__',
|
||||
'__le__',
|
||||
'__len__',
|
||||
'__length_hint__',
|
||||
'__loader__',
|
||||
'__lshift__',
|
||||
'__lt__',
|
||||
'__ltrace__',
|
||||
'__main__',
|
||||
'__matmul__',
|
||||
'__missing__',
|
||||
'__mod__',
|
||||
'__module__',
|
||||
'__mro_entries__',
|
||||
'__mul__',
|
||||
'__name__',
|
||||
'__ne__',
|
||||
'__neg__',
|
||||
'__new__',
|
||||
'__newobj__',
|
||||
'__newobj_ex__',
|
||||
'__next__',
|
||||
'__note__',
|
||||
'__or__',
|
||||
'__origin__',
|
||||
'__package__',
|
||||
'__parameters__',
|
||||
'__path__',
|
||||
'__pos__',
|
||||
'__pow__',
|
||||
'__prepare__',
|
||||
'__qualname__',
|
||||
'__radd__',
|
||||
'__rand__',
|
||||
'__rdivmod__',
|
||||
'__reduce__',
|
||||
'__reduce_ex__',
|
||||
'__repr__',
|
||||
'__reversed__',
|
||||
'__rfloordiv__',
|
||||
'__rlshift__',
|
||||
'__rmatmul__',
|
||||
'__rmod__',
|
||||
'__rmul__',
|
||||
'__ror__',
|
||||
'__round__',
|
||||
'__rpow__',
|
||||
'__rrshift__',
|
||||
'__rshift__',
|
||||
'__rsub__',
|
||||
'__rtruediv__',
|
||||
'__rxor__',
|
||||
'__set__',
|
||||
'__set_name__',
|
||||
'__setattr__',
|
||||
'__setitem__',
|
||||
'__setstate__',
|
||||
'__sizeof__',
|
||||
'__slotnames__',
|
||||
'__slots__',
|
||||
'__spec__',
|
||||
'__str__',
|
||||
'__sub__',
|
||||
'__subclasscheck__',
|
||||
'__subclasshook__',
|
||||
'__truediv__',
|
||||
'__trunc__',
|
||||
'__warningregistry__',
|
||||
'__weakref__',
|
||||
'__xor__',
|
||||
'_abc_impl',
|
||||
'_blksize',
|
||||
'_dealloc_warn',
|
||||
'_finalizing',
|
||||
'_find_and_load',
|
||||
'_fix_up_module',
|
||||
'_get_sourcefile',
|
||||
'_handle_fromlist',
|
||||
'_initializing',
|
||||
'_is_text_encoding',
|
||||
'_lock_unlock_module',
|
||||
'_showwarnmsg',
|
||||
'_shutdown',
|
||||
'_slotnames',
|
||||
'_strptime_time',
|
||||
'_uninitialized_submodules',
|
||||
'_warn_unawaited_coroutine',
|
||||
'_xoptions',
|
||||
'add',
|
||||
'append',
|
||||
'big',
|
||||
'buffer',
|
||||
'builtins',
|
||||
'clear',
|
||||
'close',
|
||||
'code',
|
||||
'copy',
|
||||
'copyreg',
|
||||
'decode',
|
||||
'default',
|
||||
'defaultaction',
|
||||
'difference_update',
|
||||
'dispatch_table',
|
||||
'displayhook',
|
||||
'enable',
|
||||
'encoding',
|
||||
'end_lineno',
|
||||
'end_offset',
|
||||
'errors',
|
||||
'excepthook',
|
||||
'extend',
|
||||
'filename',
|
||||
'fileno',
|
||||
'fillvalue',
|
||||
'filters',
|
||||
'find_class',
|
||||
'flush',
|
||||
'get',
|
||||
'get_source',
|
||||
'getattr',
|
||||
'ignore',
|
||||
'importlib',
|
||||
'intersection',
|
||||
'isatty',
|
||||
'items',
|
||||
'iter',
|
||||
'keys',
|
||||
'last_traceback',
|
||||
'last_type',
|
||||
'last_value',
|
||||
'latin1',
|
||||
'lineno',
|
||||
'little',
|
||||
'match',
|
||||
'metaclass',
|
||||
'mode',
|
||||
'modules',
|
||||
'mro',
|
||||
'msg',
|
||||
'n_fields',
|
||||
'n_sequence_fields',
|
||||
'n_unnamed_fields',
|
||||
'name',
|
||||
'obj',
|
||||
'offset',
|
||||
'onceregistry',
|
||||
'open',
|
||||
'parent',
|
||||
'partial',
|
||||
'path',
|
||||
'peek',
|
||||
'persistent_id',
|
||||
'persistent_load',
|
||||
'print_file_and_line',
|
||||
'ps1',
|
||||
'ps2',
|
||||
'raw',
|
||||
'read',
|
||||
'read1',
|
||||
'readable',
|
||||
'readall',
|
||||
'readinto',
|
||||
'readinto1',
|
||||
'readline',
|
||||
'reducer_override',
|
||||
'reload',
|
||||
'replace',
|
||||
'reset',
|
||||
'return',
|
||||
'reversed',
|
||||
'seek',
|
||||
'seekable',
|
||||
'send',
|
||||
'setstate',
|
||||
'sort',
|
||||
'stderr',
|
||||
'stdin',
|
||||
'stdout',
|
||||
'strict',
|
||||
'symmetric_difference_update',
|
||||
'tell',
|
||||
'text',
|
||||
'threading',
|
||||
'throw',
|
||||
'unraisablehook',
|
||||
'values',
|
||||
'version',
|
||||
'warnings',
|
||||
'warnoptions',
|
||||
'writable',
|
||||
'write',
|
||||
'zipimporter',
|
||||
]
|
||||
|
||||
|
||||
#######################################
|
||||
# helpers
|
||||
|
||||
def iter_global_strings():
|
||||
id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
|
||||
str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
|
||||
for dirname, _, files in os.walk(ROOT):
|
||||
if os.path.relpath(dirname, ROOT).startswith('Include'):
|
||||
continue
|
||||
for name in files:
|
||||
if not name.endswith(('.c', '.h')):
|
||||
continue
|
||||
filename = os.path.join(dirname, name)
|
||||
with open(os.path.join(filename), encoding='utf-8') as infile:
|
||||
for lno, line in enumerate(infile, 1):
|
||||
for m in id_regex.finditer(line):
|
||||
identifier, = m.groups()
|
||||
yield identifier, None, filename, lno, line
|
||||
for m in str_regex.finditer(line):
|
||||
varname, string = m.groups()
|
||||
yield varname, string, filename, lno, line
|
||||
|
||||
def iter_to_marker(lines, marker):
|
||||
for line in lines:
|
||||
if line.rstrip() == marker:
|
||||
|
@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
|
|||
END = '/* End auto-generated code */'
|
||||
|
||||
|
||||
def generate_global_strings():
|
||||
def generate_global_strings(identifiers, strings):
|
||||
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
|
||||
|
||||
# Read the non-generated part of the file.
|
||||
|
@ -371,22 +185,18 @@ def generate_global_strings():
|
|||
printer.write(START)
|
||||
with printer.block('struct _Py_global_strings', ';'):
|
||||
with printer.block('struct', ' literals;'):
|
||||
for name, literal in STRING_LITERALS.items():
|
||||
if literal is None:
|
||||
outfile.write('\n')
|
||||
printer.write(f'// {name}')
|
||||
else:
|
||||
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
|
||||
for name, literal in sorted(strings.items()):
|
||||
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
|
||||
outfile.write('\n')
|
||||
with printer.block('struct', ' identifiers;'):
|
||||
for name in sorted(IDENTIFIERS):
|
||||
for name in sorted(identifiers):
|
||||
assert name.isidentifier(), name
|
||||
printer.write(f'STRUCT_FOR_ID({name})')
|
||||
printer.write(END)
|
||||
printer.write(after)
|
||||
|
||||
|
||||
def generate_runtime_init():
|
||||
def generate_runtime_init(identifiers, strings):
|
||||
# First get some info from the declarations.
|
||||
nsmallposints = None
|
||||
nsmallnegints = None
|
||||
|
@ -432,13 +242,10 @@ def generate_runtime_init():
|
|||
# Global strings.
|
||||
with printer.block('.strings =', ','):
|
||||
with printer.block('.literals =', ','):
|
||||
for name, literal in STRING_LITERALS.items():
|
||||
if literal is None:
|
||||
printer.write('')
|
||||
else:
|
||||
printer.write(f'INIT_STR({name}, "{literal}"),')
|
||||
for name, literal in sorted(strings.items()):
|
||||
printer.write(f'INIT_STR({name}, "{literal}"),')
|
||||
with printer.block('.identifiers =', ','):
|
||||
for name in sorted(IDENTIFIERS):
|
||||
for name in sorted(identifiers):
|
||||
assert name.isidentifier(), name
|
||||
printer.write(f'INIT_ID({name}),')
|
||||
printer.write(END)
|
||||
|
@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r'''
|
|||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
def check_orphan_strings():
|
||||
def check_orphan_strings(identifiers):
|
||||
literals = set(n for n, s in STRING_LITERALS.items() if s)
|
||||
identifiers = set(IDENTIFIERS)
|
||||
identifiers = set(identifiers)
|
||||
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
|
||||
for i, filename in enumerate(files, start=1):
|
||||
print('.', end='')
|
||||
|
@ -586,11 +393,23 @@ def check_orphan_strings():
|
|||
# the script
|
||||
|
||||
def main(*, check=False) -> None:
|
||||
generate_global_strings()
|
||||
generate_runtime_init()
|
||||
identifiers = set(IDENTIFIERS)
|
||||
strings = dict(STRING_LITERALS)
|
||||
for name, string, filename, lno, _ in iter_global_strings():
|
||||
if string is None:
|
||||
if name not in IGNORED:
|
||||
identifiers.add(name)
|
||||
else:
|
||||
if name not in strings:
|
||||
strings[name] = string
|
||||
elif string != strings[name]:
|
||||
raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
|
||||
|
||||
generate_global_strings(identifiers, strings)
|
||||
generate_runtime_init(identifiers, strings)
|
||||
|
||||
if check:
|
||||
check_orphan_strings()
|
||||
check_orphan_strings(identifiers)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue