bpo-46541: Discover the global strings. (gh-31346)

Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.

This is partly inspired by gh-31261.

https://bugs.python.org/issue46541
This commit is contained in:
Eric Snow 2022-02-14 17:36:51 -07:00 committed by GitHub
parent 278fdd3e3a
commit 12360aa159
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 103 additions and 274 deletions

View file

@ -28,13 +28,6 @@ extern "C" {
/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */ /* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
struct _Py_global_strings { struct _Py_global_strings {
struct { struct {
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(comma_sep, ", ")
STRUCT_FOR_STR(percent, "%")
STRUCT_FOR_STR(dbl_percent, "%%")
// "anonymous" labels
STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>") STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>")
STRUCT_FOR_STR(anon_genexpr, "<genexpr>") STRUCT_FOR_STR(anon_genexpr, "<genexpr>")
STRUCT_FOR_STR(anon_lambda, "<lambda>") STRUCT_FOR_STR(anon_lambda, "<lambda>")
@ -42,7 +35,12 @@ struct _Py_global_strings {
STRUCT_FOR_STR(anon_module, "<module>") STRUCT_FOR_STR(anon_module, "<module>")
STRUCT_FOR_STR(anon_setcomp, "<setcomp>") STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
STRUCT_FOR_STR(anon_string, "<string>") STRUCT_FOR_STR(anon_string, "<string>")
STRUCT_FOR_STR(comma_sep, ", ")
STRUCT_FOR_STR(dbl_percent, "%%")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(dot_locals, ".<locals>") STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(percent, "%")
} literals; } literals;
struct { struct {
@ -330,6 +328,7 @@ struct _Py_global_strings {
#define _Py_STR(NAME) \ #define _Py_STR(NAME) \
(_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base)) (_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base))
#define _Py_DECLARE_STR(name, str)
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -644,12 +644,6 @@ extern "C" {
\ \
.strings = { \ .strings = { \
.literals = { \ .literals = { \
INIT_STR(empty, ""), \
INIT_STR(dot, "."), \
INIT_STR(comma_sep, ", "), \
INIT_STR(percent, "%"), \
INIT_STR(dbl_percent, "%%"), \
\
INIT_STR(anon_dictcomp, "<dictcomp>"), \ INIT_STR(anon_dictcomp, "<dictcomp>"), \
INIT_STR(anon_genexpr, "<genexpr>"), \ INIT_STR(anon_genexpr, "<genexpr>"), \
INIT_STR(anon_lambda, "<lambda>"), \ INIT_STR(anon_lambda, "<lambda>"), \
@ -657,7 +651,12 @@ extern "C" {
INIT_STR(anon_module, "<module>"), \ INIT_STR(anon_module, "<module>"), \
INIT_STR(anon_setcomp, "<setcomp>"), \ INIT_STR(anon_setcomp, "<setcomp>"), \
INIT_STR(anon_string, "<string>"), \ INIT_STR(anon_string, "<string>"), \
INIT_STR(comma_sep, ", "), \
INIT_STR(dbl_percent, "%%"), \
INIT_STR(dot, "."), \
INIT_STR(dot_locals, ".<locals>"), \ INIT_STR(dot_locals, ".<locals>"), \
INIT_STR(empty, ""), \
INIT_STR(percent, "%"), \
}, \ }, \
.identifiers = { \ .identifiers = { \
INIT_ID(Py_Repr), \ INIT_ID(Py_Repr), \

View file

@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_DECREF(sorted_methods); Py_DECREF(sorted_methods);
return NULL; return NULL;
} }
_Py_DECLARE_STR(comma_sep, ", ");
joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods); joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods);
method_count = PyObject_Length(sorted_methods); method_count = PyObject_Length(sorted_methods);
Py_DECREF(sorted_methods); Py_DECREF(sorted_methods);

View file

@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy)
return res; \ return res; \
} }
#define WRAP_METHOD(method, special) \ #define WRAP_METHOD(method, SPECIAL) \
static PyObject * \ static PyObject * \
method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \ method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \
UNWRAP(proxy); \ UNWRAP(proxy); \
Py_INCREF(proxy); \ Py_INCREF(proxy); \
PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \ PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \
Py_DECREF(proxy); \ Py_DECREF(proxy); \
return res; \ return res; \
} }

View file

@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
return rc; return rc;
} }
#define GET_WARNINGS_ATTR(interp, attr, try_import) \ #define GET_WARNINGS_ATTR(interp, ATTR, try_import) \
get_warnings_attr(interp, &_Py_ID(attr), try_import) get_warnings_attr(interp, &_Py_ID(ATTR), try_import)
/* /*
Returns a new reference. Returns a new reference.

View file

@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
PyObject *str = PyUnicode_Substring(fmt, start, pos); PyObject *str = PyUnicode_Substring(fmt, start, pos);
/* str = str.replace('%%', '%') */ /* str = str.replace('%%', '%') */
if (str && has_percents) { if (str && has_percents) {
_Py_DECLARE_STR(percent, "%");
_Py_DECLARE_STR(dbl_percent, "%%");
Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent), Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent),
&_Py_STR(percent), -1)); &_Py_STR(percent), -1));
} }

View file

@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c)
|| parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
|| parent->u_scope_type == COMPILER_SCOPE_LAMBDA) || parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
{ {
_Py_DECLARE_STR(dot_locals, ".<locals>");
base = PyUnicode_Concat(parent->u_qualname, base = PyUnicode_Concat(parent->u_qualname,
&_Py_STR(dot_locals)); &_Py_STR(dot_locals));
if (base == NULL) if (base == NULL)
@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
{ {
PyCodeObject *co; PyCodeObject *co;
int addNone = 1; int addNone = 1;
_Py_DECLARE_STR(anon_module, "<module>");
if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE, if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
mod, 1)) { mod, 1)) {
return NULL; return NULL;
@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
return 0; return 0;
} }
_Py_DECLARE_STR(anon_lambda, "<lambda>");
if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA, if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
(void *)e, e->lineno)) { (void *)e, e->lineno)) {
return 0; return 0;
@ -5347,6 +5350,7 @@ static int
compiler_genexp(struct compiler *c, expr_ty e) compiler_genexp(struct compiler *c, expr_ty e)
{ {
assert(e->kind == GeneratorExp_kind); assert(e->kind == GeneratorExp_kind);
_Py_DECLARE_STR(anon_genexpr, "<genexpr>");
return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr), return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
e->v.GeneratorExp.generators, e->v.GeneratorExp.generators,
e->v.GeneratorExp.elt, NULL); e->v.GeneratorExp.elt, NULL);
@ -5356,6 +5360,7 @@ static int
compiler_listcomp(struct compiler *c, expr_ty e) compiler_listcomp(struct compiler *c, expr_ty e)
{ {
assert(e->kind == ListComp_kind); assert(e->kind == ListComp_kind);
_Py_DECLARE_STR(anon_listcomp, "<listcomp>");
return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp), return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
e->v.ListComp.generators, e->v.ListComp.generators,
e->v.ListComp.elt, NULL); e->v.ListComp.elt, NULL);
@ -5365,6 +5370,7 @@ static int
compiler_setcomp(struct compiler *c, expr_ty e) compiler_setcomp(struct compiler *c, expr_ty e)
{ {
assert(e->kind == SetComp_kind); assert(e->kind == SetComp_kind);
_Py_DECLARE_STR(anon_setcomp, "<setcomp>");
return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp), return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
e->v.SetComp.generators, e->v.SetComp.generators,
e->v.SetComp.elt, NULL); e->v.SetComp.elt, NULL);
@ -5375,6 +5381,7 @@ static int
compiler_dictcomp(struct compiler *c, expr_ty e) compiler_dictcomp(struct compiler *c, expr_ty e)
{ {
assert(e->kind == DictComp_kind); assert(e->kind == DictComp_kind);
_Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp), return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
e->v.DictComp.generators, e->v.DictComp.generators,
e->v.DictComp.key, e->v.DictComp.value); e->v.DictComp.key, e->v.DictComp.value);

View file

@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
goto finally; goto finally;
if (v == Py_None) { if (v == Py_None) {
Py_DECREF(v); Py_DECREF(v);
_Py_DECLARE_STR(anon_string, "<string>");
*filename = &_Py_STR(anon_string); *filename = &_Py_STR(anon_string);
Py_INCREF(*filename); Py_INCREF(*filename);
} }
@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
if (arena == NULL) if (arena == NULL)
return NULL; return NULL;
_Py_DECLARE_STR(anon_string, "<string>");
mod = _PyParser_ASTFromString( mod = _PyParser_ASTFromString(
str, &_Py_STR(anon_string), start, flags, arena); str, &_Py_STR(anon_string), start, flags, arena);

View file

@ -13,298 +13,112 @@ INTERNAL = os.path.join(ROOT, 'Include', 'internal')
STRING_LITERALS = { STRING_LITERALS = {
'empty': '', 'empty': '',
'dot': '.', 'dot': '.',
'comma_sep': ', ', }
'percent': '%', IGNORED = {
'dbl_percent': '%%', 'ACTION', # Python/_warnings.c
'ATTR', # Python/_warnings.c and Objects/funcobject.c
'"anonymous" labels': None, 'DUNDER', # Objects/typeobject.c
'anon_dictcomp': '<dictcomp>', 'RDUNDER', # Objects/typeobject.c
'anon_genexpr': '<genexpr>', 'SPECIAL', # Objects/weakrefobject.c
'anon_lambda': '<lambda>',
'anon_listcomp': '<listcomp>',
'anon_module': '<module>',
'anon_setcomp': '<setcomp>',
'anon_string': '<string>',
'dot_locals': '.<locals>',
} }
IDENTIFIERS = [ IDENTIFIERS = [
'Py_Repr', # from ADD() Python/_warnings.c
'TextIOWrapper', 'default',
'ignore',
# from GET_WARNINGS_ATTR() in Python/_warnings.c
'WarningMessage', 'WarningMessage',
'_', '_showwarnmsg',
'__IOBase_closed', '_warn_unawaited_coroutine',
'__abc_tpflags__', 'defaultaction',
'__abs__', 'filters',
'__abstractmethods__', 'onceregistry',
'__add__',
'__aenter__', # from WRAP_METHOD() in Objects/weakrefobject.c
'__aexit__',
'__aiter__',
'__all__',
'__and__',
'__anext__',
'__annotations__',
'__args__',
'__await__',
'__bases__',
'__bool__',
'__build_class__',
'__builtins__',
'__bytes__', '__bytes__',
'__call__', '__reversed__',
'__cantrace__',
'__class__', # from COPY_ATTR() in Objects/funcobject.c
'__class_getitem__', '__module__',
'__classcell__', '__name__',
'__complex__', '__qualname__',
'__contains__',
'__copy__',
'__del__',
'__delattr__',
'__delete__',
'__delitem__',
'__dict__',
'__dir__',
'__divmod__',
'__doc__', '__doc__',
'__enter__', '__annotations__',
'__eq__',
'__exit__', # from SLOT* in Objects/typeobject.c
'__file__', '__abs__',
'__add__',
'__and__',
'__divmod__',
'__float__', '__float__',
'__floordiv__', '__floordiv__',
'__format__',
'__fspath__',
'__ge__',
'__get__',
'__getattr__',
'__getattribute__',
'__getinitargs__',
'__getitem__', '__getitem__',
'__getnewargs__',
'__getnewargs_ex__',
'__getstate__',
'__gt__',
'__hash__',
'__iadd__', '__iadd__',
'__iand__', '__iand__',
'__ifloordiv__', '__ifloordiv__',
'__ilshift__', '__ilshift__',
'__imatmul__', '__imatmul__',
'__imod__', '__imod__',
'__import__',
'__imul__', '__imul__',
'__index__',
'__init__',
'__init_subclass__',
'__instancecheck__',
'__int__', '__int__',
'__invert__', '__invert__',
'__ior__', '__ior__',
'__ipow__',
'__irshift__', '__irshift__',
'__isabstractmethod__',
'__isub__', '__isub__',
'__iter__',
'__itruediv__', '__itruediv__',
'__ixor__', '__ixor__',
'__le__',
'__len__',
'__length_hint__',
'__loader__',
'__lshift__', '__lshift__',
'__lt__',
'__ltrace__',
'__main__',
'__matmul__', '__matmul__',
'__missing__',
'__mod__', '__mod__',
'__module__',
'__mro_entries__',
'__mul__', '__mul__',
'__name__',
'__ne__',
'__neg__', '__neg__',
'__new__',
'__newobj__',
'__newobj_ex__',
'__next__',
'__note__',
'__or__', '__or__',
'__origin__',
'__package__',
'__parameters__',
'__path__',
'__pos__', '__pos__',
'__pow__', '__pow__',
'__prepare__',
'__qualname__',
'__radd__', '__radd__',
'__rand__', '__rand__',
'__rdivmod__', '__rdivmod__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__reversed__',
'__rfloordiv__', '__rfloordiv__',
'__rlshift__', '__rlshift__',
'__rmatmul__', '__rmatmul__',
'__rmod__', '__rmod__',
'__rmul__', '__rmul__',
'__ror__', '__ror__',
'__round__',
'__rpow__', '__rpow__',
'__rrshift__', '__rrshift__',
'__rshift__', '__rshift__',
'__rsub__', '__rsub__',
'__rtruediv__', '__rtruediv__',
'__rxor__', '__rxor__',
'__set__',
'__set_name__',
'__setattr__',
'__setitem__',
'__setstate__',
'__sizeof__',
'__slotnames__',
'__slots__',
'__spec__',
'__str__', '__str__',
'__sub__', '__sub__',
'__subclasscheck__',
'__subclasshook__',
'__truediv__', '__truediv__',
'__trunc__',
'__warningregistry__',
'__weakref__',
'__xor__', '__xor__',
'_abc_impl',
'_blksize',
'_dealloc_warn',
'_finalizing',
'_find_and_load',
'_fix_up_module',
'_get_sourcefile',
'_handle_fromlist',
'_initializing',
'_is_text_encoding',
'_lock_unlock_module',
'_showwarnmsg',
'_shutdown',
'_slotnames',
'_strptime_time',
'_uninitialized_submodules',
'_warn_unawaited_coroutine',
'_xoptions',
'add',
'append',
'big',
'buffer',
'builtins',
'clear',
'close',
'code',
'copy',
'copyreg',
'decode',
'default',
'defaultaction',
'difference_update',
'dispatch_table',
'displayhook',
'enable',
'encoding',
'end_lineno',
'end_offset',
'errors',
'excepthook',
'extend',
'filename',
'fileno',
'fillvalue',
'filters',
'find_class',
'flush',
'get',
'get_source',
'getattr',
'ignore',
'importlib',
'intersection',
'isatty',
'items',
'iter',
'keys',
'last_traceback',
'last_type',
'last_value',
'latin1',
'lineno',
'little',
'match',
'metaclass',
'mode',
'modules',
'mro',
'msg',
'n_fields',
'n_sequence_fields',
'n_unnamed_fields',
'name',
'obj',
'offset',
'onceregistry',
'open',
'parent',
'partial',
'path',
'peek',
'persistent_id',
'persistent_load',
'print_file_and_line',
'ps1',
'ps2',
'raw',
'read',
'read1',
'readable',
'readall',
'readinto',
'readinto1',
'readline',
'reducer_override',
'reload',
'replace',
'reset',
'return',
'reversed',
'seek',
'seekable',
'send',
'setstate',
'sort',
'stderr',
'stdin',
'stdout',
'strict',
'symmetric_difference_update',
'tell',
'text',
'threading',
'throw',
'unraisablehook',
'values',
'version',
'warnings',
'warnoptions',
'writable',
'write',
'zipimporter',
] ]
####################################### #######################################
# helpers # helpers
def iter_global_strings():
id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
for dirname, _, files in os.walk(ROOT):
if os.path.relpath(dirname, ROOT).startswith('Include'):
continue
for name in files:
if not name.endswith(('.c', '.h')):
continue
filename = os.path.join(dirname, name)
with open(os.path.join(filename), encoding='utf-8') as infile:
for lno, line in enumerate(infile, 1):
for m in id_regex.finditer(line):
identifier, = m.groups()
yield identifier, None, filename, lno, line
for m in str_regex.finditer(line):
varname, string = m.groups()
yield varname, string, filename, lno, line
def iter_to_marker(lines, marker): def iter_to_marker(lines, marker):
for line in lines: for line in lines:
if line.rstrip() == marker: if line.rstrip() == marker:
@ -354,7 +168,7 @@ START = '/* The following is auto-generated by Tools/scripts/generate_global_obj
END = '/* End auto-generated code */' END = '/* End auto-generated code */'
def generate_global_strings(): def generate_global_strings(identifiers, strings):
filename = os.path.join(INTERNAL, 'pycore_global_strings.h') filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
# Read the non-generated part of the file. # Read the non-generated part of the file.
@ -371,22 +185,18 @@ def generate_global_strings():
printer.write(START) printer.write(START)
with printer.block('struct _Py_global_strings', ';'): with printer.block('struct _Py_global_strings', ';'):
with printer.block('struct', ' literals;'): with printer.block('struct', ' literals;'):
for name, literal in STRING_LITERALS.items(): for name, literal in sorted(strings.items()):
if literal is None: printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n')
printer.write(f'// {name}')
else:
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n') outfile.write('\n')
with printer.block('struct', ' identifiers;'): with printer.block('struct', ' identifiers;'):
for name in sorted(IDENTIFIERS): for name in sorted(identifiers):
assert name.isidentifier(), name assert name.isidentifier(), name
printer.write(f'STRUCT_FOR_ID({name})') printer.write(f'STRUCT_FOR_ID({name})')
printer.write(END) printer.write(END)
printer.write(after) printer.write(after)
def generate_runtime_init(): def generate_runtime_init(identifiers, strings):
# First get some info from the declarations. # First get some info from the declarations.
nsmallposints = None nsmallposints = None
nsmallnegints = None nsmallnegints = None
@ -432,13 +242,10 @@ def generate_runtime_init():
# Global strings. # Global strings.
with printer.block('.strings =', ','): with printer.block('.strings =', ','):
with printer.block('.literals =', ','): with printer.block('.literals =', ','):
for name, literal in STRING_LITERALS.items(): for name, literal in sorted(strings.items()):
if literal is None: printer.write(f'INIT_STR({name}, "{literal}"),')
printer.write('')
else:
printer.write(f'INIT_STR({name}, "{literal}"),')
with printer.block('.identifiers =', ','): with printer.block('.identifiers =', ','):
for name in sorted(IDENTIFIERS): for name in sorted(identifiers):
assert name.isidentifier(), name assert name.isidentifier(), name
printer.write(f'INIT_ID({name}),') printer.write(f'INIT_ID({name}),')
printer.write(END) printer.write(END)
@ -507,9 +314,9 @@ TYPESLOTS_RE = re.compile(r'''
) )
''', re.VERBOSE) ''', re.VERBOSE)
def check_orphan_strings(): def check_orphan_strings(identifiers):
literals = set(n for n, s in STRING_LITERALS.items() if s) literals = set(n for n, s in STRING_LITERALS.items() if s)
identifiers = set(IDENTIFIERS) identifiers = set(identifiers)
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True) files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
for i, filename in enumerate(files, start=1): for i, filename in enumerate(files, start=1):
print('.', end='') print('.', end='')
@ -586,11 +393,23 @@ def check_orphan_strings():
# the script # the script
def main(*, check=False) -> None: def main(*, check=False) -> None:
generate_global_strings() identifiers = set(IDENTIFIERS)
generate_runtime_init() strings = dict(STRING_LITERALS)
for name, string, filename, lno, _ in iter_global_strings():
if string is None:
if name not in IGNORED:
identifiers.add(name)
else:
if name not in strings:
strings[name] = string
elif string != strings[name]:
raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
generate_global_strings(identifiers, strings)
generate_runtime_init(identifiers, strings)
if check: if check:
check_orphan_strings() check_orphan_strings(identifiers)
if __name__ == '__main__': if __name__ == '__main__':