bpo-43795: Generate python3dll.c and doc data from manifest (PEP 652) (GH-25315)

This commit is contained in:
Petr Viktorin 2021-04-29 11:33:14 +02:00 committed by GitHub
parent 645ed62fb4
commit f6ee4dad58
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 2849 additions and 144 deletions

View file

@ -58,6 +58,7 @@ reindent-rst.py Fix-up reStructuredText file whitespace
rgrep.py Reverse grep through a file (useful for big logfiles)
run_tests.py Run the test suite with more sensible default options
serve.py Small wsgiref-based web server, used in make serve in Doc
stable_abi.py Stable ABI checks and file generators.
suff.py Sort a list of files by suffix
texi2html.py Convert GNU texinfo files into HTML
untabify.py Replace tabs with spaces in argument files

View file

@ -1,13 +1,28 @@
#!/usr/bin/env python
"""Check the stable ABI manifest or generate files from it
import argparse
import glob
import os.path
import pathlib
import re
By default, the tool only checks existing files/libraries.
Pass --generate to recreate auto-generated files instead.
For actions that take a FILENAME, the filename can be left out to use a default
(relative to the manifest file, as they appear in the CPython codebase).
"""
from functools import partial
from pathlib import Path
import dataclasses
import subprocess
import sys
import sysconfig
import argparse
import textwrap
import difflib
import shutil
import sys
import os
import os.path
import io
import re
MISSING = object()
EXCLUDED_HEADERS = {
"bytes_methods.h",
@ -27,10 +42,303 @@ EXCLUDED_HEADERS = {
"token.h",
"ucnhash.h",
}
MACOS = (sys.platform == "darwin")
UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"?
def get_exported_symbols(library, dynamic=False):
# The stable ABI manifest (Misc/stable_abi.txt) exists only to fill the
# following dataclasses.
# Feel free to change its syntax (and the `parse_manifest` function)
# to better serve that purpose (while keeping it human-readable).
@dataclasses.dataclass
class Manifest:
"""Collection of `ABIItem`s forming the stable ABI/limited API."""
kind = 'manifest'
contents: dict = dataclasses.field(default_factory=dict)
def add(self, item):
if item.name in self.contents:
# We assume that stable ABI items do not share names,
# even if they're diferent kinds (e.g. function vs. macro).
raise ValueError(f'duplicate ABI item {item.name}')
self.contents[item.name] = item
@property
def feature_defines(self):
"""Return all feature defines which affect what's available
These are e.g. HAVE_FORK and MS_WINDOWS.
"""
return set(item.ifdef for item in self.contents.values()) - {None}
def select(self, kinds, *, include_abi_only=True, ifdef=None):
"""Yield selected items of the manifest
kinds: set of requested kinds, e.g. {'function', 'macro'}
include_abi_only: if True (default), include all items of the
stable ABI.
If False, include only items from the limited API
(i.e. items people should use today)
ifdef: set of feature defines (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
If None (default), items are not filtered by this. (This is
different from the empty set, which filters out all such
conditional items.)
"""
for name, item in sorted(self.contents.items()):
if item.kind not in kinds:
continue
if item.abi_only and not include_abi_only:
continue
if (ifdef is not None
and item.ifdef is not None
and item.ifdef not in ifdef):
continue
yield item
def dump(self):
"""Yield lines to recreate the manifest file (sans comments/newlines)"""
# Recursive in preparation for struct member & function argument nodes
for item in self.contents.values():
yield from item.dump(indent=0)
@dataclasses.dataclass
class ABIItem:
"""Information on one item (function, macro, struct, etc.)"""
kind: str
name: str
added: str = None
contents: list = dataclasses.field(default_factory=list)
abi_only: bool = False
ifdef: str = None
KINDS = frozenset({
'struct', 'function', 'macro', 'data', 'const', 'typedef',
})
def dump(self, indent=0):
yield f"{' ' * indent}{self.kind} {self.name}"
if self.added:
yield f"{' ' * (indent+1)}added {self.added}"
if self.ifdef:
yield f"{' ' * (indent+1)}ifdef {self.ifdef}"
if self.abi_only:
yield f"{' ' * (indent+1)}abi_only"
def parse_manifest(file):
"""Parse the given file (iterable of lines) to a Manifest"""
LINE_RE = re.compile('(?P<indent>[ ]*)(?P<kind>[^ ]+)[ ]*(?P<content>.*)')
manifest = Manifest()
# parents of currently processed line, each with its indentation level
levels = [(manifest, -1)]
def raise_error(msg):
raise SyntaxError(f'line {lineno}: {msg}')
for lineno, line in enumerate(file, start=1):
line, sep, comment = line.partition('#')
line = line.rstrip()
if not line:
continue
match = LINE_RE.fullmatch(line)
if not match:
raise_error(f'invalid syntax: {line}')
level = len(match['indent'])
kind = match['kind']
content = match['content']
while level <= levels[-1][1]:
levels.pop()
parent = levels[-1][0]
entry = None
if kind in ABIItem.KINDS:
if parent.kind not in {'manifest'}:
raise_error(f'{kind} cannot go in {parent.kind}')
entry = ABIItem(kind, content)
parent.add(entry)
elif kind in {'added', 'ifdef'}:
if parent.kind not in ABIItem.KINDS:
raise_error(f'{kind} cannot go in {parent.kind}')
setattr(parent, kind, content)
elif kind in {'abi_only'}:
if parent.kind not in {'function', 'data'}:
raise_error(f'{kind} cannot go in {parent.kind}')
parent.abi_only = True
else:
raise_error(f"unknown kind {kind!r}")
levels.append((entry, level))
return manifest
# The tool can run individual "actions".
# Most actions are "generators", which generate a single file from the
# manifest. (Checking works by generating a temp file & comparing.)
# Other actions, like "--unixy-check", don't work on a single file.
generators = []
def generator(var_name, default_path):
"""Decorates a file generator: function that writes to a file"""
def _decorator(func):
func.var_name = var_name
func.arg_name = '--' + var_name.replace('_', '-')
func.default_path = default_path
generators.append(func)
return func
return _decorator
@generator("python3dll", 'PC/python3dll.c')
def gen_python3dll(manifest, args, outfile):
"""Generate/check the source for the Windows stable ABI library"""
write = partial(print, file=outfile)
write(textwrap.dedent(r"""
/* Re-export stable Python ABI */
/* Generated by Tools/scripts/stable_abi.py */
#ifdef _M_IX86
#define DECORATE "_"
#else
#define DECORATE
#endif
#define EXPORT_FUNC(name) \
__pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
#define EXPORT_DATA(name) \
__pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
"""))
def sort_key(item):
return item.name.lower()
for item in sorted(
manifest.select(
{'function'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
key=sort_key):
write(f'EXPORT_FUNC({item.name})')
write()
for item in sorted(
manifest.select(
{'data'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
key=sort_key):
write(f'EXPORT_DATA({item.name})')
@generator("doc_list", 'Doc/data/stable_abi.dat')
def gen_doc_annotations(manifest, args, outfile):
"""Generate/check the stable ABI list for documentation annotations"""
write = partial(print, file=outfile)
write("# Generated by Tools/scripts/stable_abi.py")
write()
for item in manifest.select(ABIItem.KINDS, include_abi_only=False):
write(item.name)
def generate_or_check(manifest, args, path, func):
"""Generate/check a file with a single generator
Return True if successful; False if a comparison failed.
"""
outfile = io.StringIO()
func(manifest, args, outfile)
generated = outfile.getvalue()
existing = path.read_text()
if generated != existing:
if args.generate:
path.write_text(generated)
else:
print(f'File {path} differs from expected!')
diff = difflib.unified_diff(
generated.splitlines(), existing.splitlines(),
str(path), '<expected>',
lineterm='',
)
for line in diff:
print(line)
return False
return True
def do_unixy_check(manifest, args):
"""Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
okay = True
# Get all macros first: we'll need feature macros like HAVE_FORK and
# MS_WINDOWS for everything else
present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
feature_defines = manifest.feature_defines & present_macros
# Check that we have all neded macros
expected_macros = set(
item.name for item in manifest.select({'macro'})
)
missing_macros = expected_macros - present_macros
okay &= _report_unexpected_items(
missing_macros,
'Some macros from are not defined from "Include/Python.h"'
+ 'with Py_LIMITED_API:')
expected_symbols = set(item.name for item in manifest.select(
{'function', 'data'}, include_abi_only=True, ifdef=feature_defines,
))
# Check the static library (*.a)
LIBRARY = sysconfig.get_config_var("LIBRARY")
if not LIBRARY:
raise Exception("failed to get LIBRARY variable from sysconfig")
if os.path.exists(LIBRARY):
okay &= binutils_check_library(
manifest, LIBRARY, expected_symbols, dynamic=False)
# Check the dynamic library (*.so)
LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
if not LDLIBRARY:
raise Exception("failed to get LDLIBRARY variable from sysconfig")
okay &= binutils_check_library(
manifest, LDLIBRARY, expected_symbols, dynamic=False)
# Check definitions in the header files
expected_defs = set(item.name for item in manifest.select(
{'function', 'data'}, include_abi_only=False, ifdef=feature_defines,
))
found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
missing_defs = expected_defs - found_defs
okay &= _report_unexpected_items(
missing_defs,
'Some expected declarations were not declared in '
+ '"Include/Python.h" with Py_LIMITED_API:')
# Some Limited API macros are defined in terms of private symbols.
# These are not part of Limited API (even though they're defined with
# Py_LIMITED_API). They must be part of the Stable ABI, though.
private_symbols = {n for n in expected_symbols if n.startswith('_')}
extra_defs = found_defs - expected_defs - private_symbols
okay &= _report_unexpected_items(
extra_defs,
'Some extra declarations were found in "Include/Python.h" '
+ 'with Py_LIMITED_API:')
return okay
def _report_unexpected_items(items, msg):
"""If there are any `items`, report them using "msg" and return false"""
if items:
print(msg, file=sys.stderr)
for item in sorted(items):
print(' -', item, file=sys.stderr)
return False
return True
def binutils_get_exported_symbols(library, dynamic=False):
"""Retrieve exported symbols using the nm(1) tool from binutils"""
# Only look at dynamic symbols
args = ["nm", "--no-sort"]
if dynamic:
@ -61,86 +369,89 @@ def get_exported_symbols(library, dynamic=False):
yield symbol
def check_library(stable_abi_file, library, abi_funcs, dynamic=False):
available_symbols = set(get_exported_symbols(library, dynamic))
missing_symbols = abi_funcs - available_symbols
def binutils_check_library(manifest, library, expected_symbols, dynamic):
"""Check that library exports all expected_symbols"""
available_symbols = set(binutils_get_exported_symbols(library, dynamic))
missing_symbols = expected_symbols - available_symbols
if missing_symbols:
raise Exception(
f"""\
Some symbols from the limited API are missing: {', '.join(missing_symbols)}
print(textwrap.dedent(f"""\
Some symbols from the limited API are missing from {library}:
{', '.join(missing_symbols)}
This error means that there are some missing symbols among the ones exported
in the Python library ("libpythonx.x.a" or "libpythonx.x.so"). This normally
means that some symbol, function implementation or a prototype, belonging to
a symbol in the limited API has been deleted or is missing.
Check if this was a mistake and if not, update the file containing the limited
API symbols. This file is located at:
{stable_abi_file}
You can read more about the limited API and its contracts at:
https://docs.python.org/3/c-api/stable.html
And in PEP 384:
https://www.python.org/dev/peps/pep-0384/
"""
)
This error means that there are some missing symbols among the
ones exported in the library.
This normally means that some symbol, function implementation or
a prototype belonging to a symbol in the limited API has been
deleted or is missing.
"""), file=sys.stderr)
return False
return True
def generate_limited_api_symbols(args):
library = sysconfig.get_config_var("LIBRARY")
ldlibrary = sysconfig.get_config_var("LDLIBRARY")
if ldlibrary != library:
raise Exception("Limited ABI symbols can only be generated from a static build")
available_symbols = {
symbol for symbol in get_exported_symbols(library) if symbol.startswith("Py")
}
def gcc_get_limited_api_macros(headers):
"""Get all limited API macros from headers.
headers = [
file
for file in pathlib.Path("Include").glob("*.h")
if file.name not in EXCLUDED_HEADERS
]
stable_data, stable_exported_data, stable_functions = get_limited_api_definitions(
headers
Runs the preprocesor over all the header files in "Include" setting
"-DPy_LIMITED_API" to the correct value for the running version of the
interpreter and extracting all macro definitions (via adding -dM to the
compiler arguments).
Requires Python built with a GCC-compatible compiler. (clang might work)
"""
api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
preprocesor_output_with_macros = subprocess.check_output(
sysconfig.get_config_var("CC").split()
+ [
# Prevent the expansion of the exported macros so we can
# capture them later
"-DSIZEOF_WCHAR_T=4", # The actual value is not important
f"-DPy_LIMITED_API={api_hexversion}",
"-I.",
"-I./Include",
"-dM",
"-E",
]
+ [str(file) for file in headers],
text=True,
)
stable_symbols = {
symbol
for symbol in (stable_functions | stable_exported_data | stable_data)
if symbol.startswith("Py") and symbol in available_symbols
}
with open(args.output_file, "w") as output_file:
output_file.write(f"# File generated by 'make regen-limited-abi'\n")
output_file.write(
f"# This is NOT an authoritative list of stable ABI symbols\n"
return {
target
for target in re.findall(
r"#define (\w+)", preprocesor_output_with_macros
)
for symbol in sorted(stable_symbols):
output_file.write(f"{symbol}\n")
}
def get_limited_api_definitions(headers):
"""Run the preprocesor over all the header files in "Include" setting
"-DPy_LIMITED_API" to the correct value for the running version of the interpreter.
def gcc_get_limited_api_definitions(headers):
"""Get all limited API definitions from headers.
The limited API symbols will be extracted from the output of this command as it includes
the prototypes and definitions of all the exported symbols that are in the limited api.
Run the preprocesor over all the header files in "Include" setting
"-DPy_LIMITED_API" to the correct value for the running version of the
interpreter.
The limited API symbols will be extracted from the output of this command
as it includes the prototypes and definitions of all the exported symbols
that are in the limited api.
This function does *NOT* extract the macros defined on the limited API
Requires Python built with a GCC-compatible compiler. (clang might work)
"""
api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
preprocesor_output = subprocess.check_output(
sysconfig.get_config_var("CC").split()
+ [
# Prevent the expansion of the exported macros so we can capture them later
# Prevent the expansion of the exported macros so we can capture
# them later
"-DPyAPI_FUNC=__PyAPI_FUNC",
"-DPyAPI_DATA=__PyAPI_DATA",
"-DEXPORT_DATA=__EXPORT_DATA",
"-D_Py_NO_RETURN=",
"-DSIZEOF_WCHAR_T=4", # The actual value is not important
f"-DPy_LIMITED_API={sys.version_info.major << 24 | sys.version_info.minor << 16}",
f"-DPy_LIMITED_API={api_hexversion}",
"-I.",
"-I./Include",
"-E",
@ -156,64 +467,126 @@ def get_limited_api_definitions(headers):
re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
)
stable_data = set(
re.findall(r"__PyAPI_DATA\(.*?\)\s*\(?(.*?)\)?\s*;", preprocesor_output)
re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
)
return stable_data, stable_exported_data, stable_functions
def check_symbols(parser_args):
with open(parser_args.stable_abi_file, "r") as filename:
abi_funcs = {
symbol
for symbol in filename.read().splitlines()
if symbol and not symbol.startswith("#")
}
try:
# static library
LIBRARY = sysconfig.get_config_var("LIBRARY")
if not LIBRARY:
raise Exception("failed to get LIBRARY variable from sysconfig")
if os.path.exists(LIBRARY):
check_library(parser_args.stable_abi_file, LIBRARY, abi_funcs)
# dynamic library
LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
if not LDLIBRARY:
raise Exception("failed to get LDLIBRARY variable from sysconfig")
if LDLIBRARY != LIBRARY:
check_library(
parser_args.stable_abi_file, LDLIBRARY, abi_funcs, dynamic=True
)
except Exception as e:
print(e, file=sys.stderr)
sys.exit(1)
return stable_data | stable_exported_data | stable_functions
def main():
parser = argparse.ArgumentParser(description="Process some integers.")
subparsers = parser.add_subparsers()
check_parser = subparsers.add_parser(
"check", help="Check the exported symbols against a given ABI file"
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
check_parser.add_argument(
"stable_abi_file", type=str, help="File with the stable abi functions"
parser.add_argument(
"file", type=Path, metavar='FILE',
help="file with the stable abi manifest",
)
check_parser.set_defaults(func=check_symbols)
generate_parser = subparsers.add_parser(
"generate",
help="Generate symbols from the header files and the exported symbols",
parser.add_argument(
"--generate", action='store_true',
help="generate file(s), rather than just checking them",
)
generate_parser.add_argument(
"output_file", type=str, help="File to dump the symbols to"
parser.add_argument(
"--generate-all", action='store_true',
help="as --generate, but generate all file(s) using default filenames."
+ " (unlike --all, does not run any extra checks)",
)
parser.add_argument(
"-a", "--all", action='store_true',
help="run all available checks using default filenames",
)
parser.add_argument(
"-l", "--list", action='store_true',
help="list available generators and their default filenames; then exit",
)
parser.add_argument(
"--dump", action='store_true',
help="dump the manifest contents (used for debugging the parser)",
)
generate_parser.set_defaults(func=generate_limited_api_symbols)
args = parser.parse_args()
if "func" not in args:
parser.error("Either 'check' or 'generate' must be used")
sys.exit(1)
args.func(args)
actions_group = parser.add_argument_group('actions')
for gen in generators:
actions_group.add_argument(
gen.arg_name, dest=gen.var_name,
type=str, nargs="?", default=MISSING,
metavar='FILENAME',
help=gen.__doc__,
)
actions_group.add_argument(
'--unixy-check', action='store_true',
help=do_unixy_check.__doc__,
)
args = parser.parse_args()
base_path = args.file.parent.parent
if args.list:
for gen in generators:
print(f'{gen.arg_name}: {base_path / gen.default_path}')
sys.exit(0)
run_all_generators = args.generate_all
if args.generate_all:
args.generate = True
if args.all:
run_all_generators = True
args.unixy_check = True
with args.file.open() as file:
manifest = parse_manifest(file)
# Remember results of all actions (as booleans).
# At the end we'll check that at least one action was run,
# and also fail if any are false.
results = {}
if args.dump:
for line in manifest.dump():
print(line)
results['dump'] = True
for gen in generators:
filename = getattr(args, gen.var_name)
if filename is None or (run_all_generators and filename is MISSING):
filename = base_path / gen.default_path
elif filename is MISSING:
continue
results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
if args.unixy_check:
results['unixy_check'] = do_unixy_check(manifest, args)
if not results:
if args.generate:
parser.error('No file specified. Use --help for usage.')
parser.error('No check specified. Use --help for usage.')
failed_results = [name for name, result in results.items() if not result]
if failed_results:
raise Exception(f"""
These checks related to the stable ABI did not succeed:
{', '.join(failed_results)}
If you see diffs in the output, files derived from the stable
ABI manifest the were not regenerated.
Run `make regen-limited-abi` to fix this.
Otherwise, see the error(s) above.
The stable ABI manifest is at: {args.file}
Note that there is a process to follow when modifying it.
You can read more about the limited API and its contracts at:
https://docs.python.org/3/c-api/stable.html
And in PEP 384:
https://www.python.org/dev/peps/pep-0384/
""")
if __name__ == "__main__":