mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
bpo-30860: Consolidate stateful runtime globals. (#3397)
* group the (stateful) runtime globals into various topical structs * consolidate the topical structs under a single top-level _PyRuntimeState struct * add a check-c-globals.py script that helps identify runtime globals Other globals are excluded (see globals.txt and check-c-globals.py).
This commit is contained in:
parent
bab21faded
commit
2ebc5ce42a
72 changed files with 2746 additions and 1312 deletions
446
Tools/c-globals/check-c-globals.py
Normal file
446
Tools/c-globals/check-c-globals.py
Normal file
|
|
@ -0,0 +1,446 @@
|
|||
|
||||
from collections import namedtuple
|
||||
import glob
|
||||
import os.path
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
|
||||
VERBOSITY = 2
|
||||
|
||||
C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
|
||||
ROOT_DIR = os.path.dirname(TOOLS_DIR)
|
||||
GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
|
||||
|
||||
SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
|
||||
|
||||
CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
|
||||
|
||||
|
||||
IGNORED_VARS = {
|
||||
'_DYNAMIC',
|
||||
'_GLOBAL_OFFSET_TABLE_',
|
||||
'__JCR_LIST__',
|
||||
'__JCR_END__',
|
||||
'__TMC_END__',
|
||||
'__bss_start',
|
||||
'__data_start',
|
||||
'__dso_handle',
|
||||
'_edata',
|
||||
'_end',
|
||||
}
|
||||
|
||||
|
||||
def find_capi_vars(root):
|
||||
capi_vars = {}
|
||||
for dirname in SOURCE_DIRS:
|
||||
for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
|
||||
recursive=True):
|
||||
with open(filename) as file:
|
||||
for name in _find_capi_vars(file):
|
||||
if name in capi_vars:
|
||||
assert not filename.endswith('.c')
|
||||
assert capi_vars[name].endswith('.c')
|
||||
capi_vars[name] = filename
|
||||
return capi_vars
|
||||
|
||||
|
||||
def _find_capi_vars(lines):
|
||||
for line in lines:
|
||||
if not line.startswith('PyAPI_DATA'):
|
||||
continue
|
||||
assert '{' not in line
|
||||
match = CAPI_REGEX.match(line)
|
||||
assert match
|
||||
names, = match.groups()
|
||||
for name in names.split(', '):
|
||||
yield name
|
||||
|
||||
|
||||
def _read_global_names(filename):
|
||||
# These variables are shared between all interpreters in the process.
|
||||
with open(filename) as file:
|
||||
return {line.partition('#')[0].strip()
|
||||
for line in file
|
||||
if line.strip() and not line.startswith('#')}
|
||||
|
||||
|
||||
def _is_global_var(name, globalnames):
|
||||
if _is_autogen_var(name):
|
||||
return True
|
||||
if _is_type_var(name):
|
||||
return True
|
||||
if _is_module(name):
|
||||
return True
|
||||
if _is_exception(name):
|
||||
return True
|
||||
if _is_compiler(name):
|
||||
return True
|
||||
return name in globalnames
|
||||
|
||||
|
||||
def _is_autogen_var(name):
|
||||
return (
|
||||
name.startswith('PyId_') or
|
||||
'.' in name or
|
||||
# Objects/typeobject.c
|
||||
name.startswith('op_id.') or
|
||||
name.startswith('rop_id.') or
|
||||
# Python/graminit.c
|
||||
name.startswith('arcs_') or
|
||||
name.startswith('states_')
|
||||
)
|
||||
|
||||
|
||||
def _is_type_var(name):
|
||||
if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
|
||||
return True
|
||||
if name.endswith('_desc'): # for structseq types
|
||||
return True
|
||||
return (
|
||||
name.startswith('doc_') or
|
||||
name.endswith(('_doc', '__doc__', '_docstring')) or
|
||||
name.endswith('_methods') or
|
||||
name.endswith('_fields') or
|
||||
name.endswith(('_memberlist', '_members')) or
|
||||
name.endswith('_slots') or
|
||||
name.endswith(('_getset', '_getsets', '_getsetlist')) or
|
||||
name.endswith('_as_mapping') or
|
||||
name.endswith('_as_number') or
|
||||
name.endswith('_as_sequence') or
|
||||
name.endswith('_as_buffer') or
|
||||
name.endswith('_as_async')
|
||||
)
|
||||
|
||||
|
||||
def _is_module(name):
|
||||
if name.endswith(('_functions', 'Methods', '_Methods')):
|
||||
return True
|
||||
if name == 'module_def':
|
||||
return True
|
||||
if name == 'initialized':
|
||||
return True
|
||||
return name.endswith(('module', '_Module'))
|
||||
|
||||
|
||||
def _is_exception(name):
|
||||
# Other vars are enumerated in globals-core.txt.
|
||||
if not name.startswith(('PyExc_', '_PyExc_')):
|
||||
return False
|
||||
return name.endswith(('Error', 'Warning'))
|
||||
|
||||
|
||||
def _is_compiler(name):
|
||||
return (
|
||||
# Python/Pythyon-ast.c
|
||||
name.endswith('_type') or
|
||||
name.endswith('_singleton') or
|
||||
name.endswith('_attributes')
|
||||
)
|
||||
|
||||
|
||||
class Var(namedtuple('Var', 'name kind scope capi filename')):
|
||||
|
||||
@classmethod
|
||||
def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
|
||||
_, _, line = line.partition(' ') # strip off the address
|
||||
line = line.strip()
|
||||
kind, _, line = line.partition(' ')
|
||||
if kind in ignored or ():
|
||||
return None
|
||||
elif kind not in expected or ():
|
||||
raise RuntimeError('unsupported NM type {!r}'.format(kind))
|
||||
|
||||
name, _, filename = line.partition('\t')
|
||||
name = name.strip()
|
||||
if _is_autogen_var(name):
|
||||
return None
|
||||
if _is_global_var(name, globalnames):
|
||||
scope = 'global'
|
||||
else:
|
||||
scope = None
|
||||
capi = (name in capi_vars or ())
|
||||
if filename:
|
||||
filename = os.path.relpath(filename.partition(':')[0])
|
||||
return cls(name, kind, scope, capi, filename or '~???~')
|
||||
|
||||
@property
|
||||
def external(self):
|
||||
return self.kind.isupper()
|
||||
|
||||
|
||||
def find_vars(root, globals_filename=GLOBALS_FILE):
|
||||
python = os.path.join(root, 'python')
|
||||
if not os.path.exists(python):
|
||||
raise RuntimeError('python binary missing (need to build it first?)')
|
||||
capi_vars = find_capi_vars(root)
|
||||
globalnames = _read_global_names(globals_filename)
|
||||
|
||||
nm = shutil.which('nm')
|
||||
if nm is None:
|
||||
# XXX Use dumpbin.exe /SYMBOLS on Windows.
|
||||
raise NotImplementedError
|
||||
else:
|
||||
yield from (var
|
||||
for var in _find_var_symbols(python, nm, capi_vars,
|
||||
globalnames)
|
||||
if var.name not in IGNORED_VARS)
|
||||
|
||||
|
||||
NM_FUNCS = set('Tt')
|
||||
NM_PUBLIC_VARS = set('BD')
|
||||
NM_PRIVATE_VARS = set('bd')
|
||||
NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
|
||||
NM_DATA = set('Rr')
|
||||
NM_OTHER = set('ACGgiINpSsuUVvWw-?')
|
||||
NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
|
||||
|
||||
|
||||
def _find_var_symbols(python, nm, capi_vars, globalnames):
|
||||
args = [nm,
|
||||
'--line-numbers',
|
||||
python]
|
||||
out = subprocess.check_output(args)
|
||||
for line in out.decode('utf-8').splitlines():
|
||||
var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
|
||||
if var is None:
|
||||
continue
|
||||
yield var
|
||||
|
||||
|
||||
#######################################
|
||||
|
||||
class Filter(namedtuple('Filter', 'name op value action')):
|
||||
|
||||
@classmethod
|
||||
def parse(cls, raw):
|
||||
action = '+'
|
||||
if raw.startswith(('+', '-')):
|
||||
action = raw[0]
|
||||
raw = raw[1:]
|
||||
# XXX Support < and >?
|
||||
name, op, value = raw.partition('=')
|
||||
return cls(name, op, value, action)
|
||||
|
||||
def check(self, var):
|
||||
value = getattr(var, self.name, None)
|
||||
if not self.op:
|
||||
matched = bool(value)
|
||||
elif self.op == '=':
|
||||
matched = (value == self.value)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
if self.action == '+':
|
||||
return matched
|
||||
elif self.action == '-':
|
||||
return not matched
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def filter_var(var, filters):
|
||||
for filter in filters:
|
||||
if not filter.check(var):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def make_sort_key(spec):
|
||||
columns = [(col.strip('_'), '_' if col.startswith('_') else '')
|
||||
for col in spec]
|
||||
def sort_key(var):
|
||||
return tuple(getattr(var, col).lstrip(prefix)
|
||||
for col, prefix in columns)
|
||||
return sort_key
|
||||
|
||||
|
||||
def make_groups(allvars, spec):
|
||||
group = spec
|
||||
groups = {}
|
||||
for var in allvars:
|
||||
value = getattr(var, group)
|
||||
key = '{}: {}'.format(group, value)
|
||||
try:
|
||||
groupvars = groups[key]
|
||||
except KeyError:
|
||||
groupvars = groups[key] = []
|
||||
groupvars.append(var)
|
||||
return groups
|
||||
|
||||
|
||||
def format_groups(groups, columns, fmts, widths):
|
||||
for group in sorted(groups):
|
||||
groupvars = groups[group]
|
||||
yield '', 0
|
||||
yield ' # {}'.format(group), 0
|
||||
yield from format_vars(groupvars, columns, fmts, widths)
|
||||
|
||||
|
||||
def format_vars(allvars, columns, fmts, widths):
|
||||
fmt = ' '.join(fmts[col] for col in columns)
|
||||
fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
|
||||
header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
|
||||
yield header, 0
|
||||
div = ' '.join('-'*(widths[col]+2) for col in columns)
|
||||
yield div, 0
|
||||
for var in allvars:
|
||||
values = (getattr(var, col) for col in columns)
|
||||
row = fmt.format(*('X' if val is True else val or ''
|
||||
for val in values))
|
||||
yield row, 1
|
||||
yield div, 0
|
||||
|
||||
|
||||
#######################################
|
||||
|
||||
COLUMNS = 'name,external,capi,scope,filename'
|
||||
COLUMN_NAMES = COLUMNS.split(',')
|
||||
|
||||
COLUMN_WIDTHS = {col: len(col)
|
||||
for col in COLUMN_NAMES}
|
||||
COLUMN_WIDTHS.update({
|
||||
'name': 50,
|
||||
'scope': 7,
|
||||
'filename': 40,
|
||||
})
|
||||
COLUMN_FORMATS = {col: '{:%s}' % width
|
||||
for col, width in COLUMN_WIDTHS.items()}
|
||||
for col in COLUMN_FORMATS:
|
||||
if COLUMN_WIDTHS[col] == len(col):
|
||||
COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
|
||||
|
||||
|
||||
def _parse_filters_arg(raw, error):
|
||||
filters = []
|
||||
for value in raw.split(','):
|
||||
value=value.strip()
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
filter = Filter.parse(value)
|
||||
if filter.name not in COLUMN_NAMES:
|
||||
raise Exception('unsupported column {!r}'.format(filter.name))
|
||||
except Exception as e:
|
||||
error('bad filter {!r}: {}'.format(raw, e))
|
||||
filters.append(filter)
|
||||
return filters
|
||||
|
||||
|
||||
def _parse_columns_arg(raw, error):
|
||||
columns = raw.split(',')
|
||||
for column in columns:
|
||||
if column not in COLUMN_NAMES:
|
||||
error('unsupported column {!r}'.format(column))
|
||||
return columns
|
||||
|
||||
|
||||
def _parse_sort_arg(raw, error):
|
||||
sort = raw.split(',')
|
||||
for column in sort:
|
||||
if column.lstrip('_') not in COLUMN_NAMES:
|
||||
error('unsupported column {!r}'.format(column))
|
||||
return sort
|
||||
|
||||
|
||||
def _parse_group_arg(raw, error):
|
||||
if not raw:
|
||||
return raw
|
||||
group = raw
|
||||
if group not in COLUMN_NAMES:
|
||||
error('unsupported column {!r}'.format(group))
|
||||
if group != 'filename':
|
||||
error('unsupported group {!r}'.format(group))
|
||||
return group
|
||||
|
||||
|
||||
def parse_args(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('-v', '--verbose', action='count', default=0)
|
||||
parser.add_argument('-q', '--quiet', action='count', default=0)
|
||||
|
||||
parser.add_argument('--filters', default='-scope',
|
||||
help='[[-]<COLUMN>[=<GLOB>]] ...')
|
||||
|
||||
parser.add_argument('--columns', default=COLUMNS,
|
||||
help='a comma-separated list of columns to show')
|
||||
parser.add_argument('--sort', default='filename,_name',
|
||||
help='a comma-separated list of columns to sort')
|
||||
parser.add_argument('--group',
|
||||
help='group by the given column name (- to not group)')
|
||||
|
||||
parser.add_argument('--rc-on-match', dest='rc', type=int)
|
||||
|
||||
parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
verbose = vars(args).pop('verbose', 0)
|
||||
quiet = vars(args).pop('quiet', 0)
|
||||
args.verbosity = max(0, VERBOSITY + verbose - quiet)
|
||||
|
||||
if args.sort.startswith('filename') and not args.group:
|
||||
args.group = 'filename'
|
||||
|
||||
if args.rc is None:
|
||||
if '-scope=core' in args.filters or 'core' not in args.filters:
|
||||
args.rc = 0
|
||||
else:
|
||||
args.rc = 1
|
||||
|
||||
args.filters = _parse_filters_arg(args.filters, parser.error)
|
||||
args.columns = _parse_columns_arg(args.columns, parser.error)
|
||||
args.sort = _parse_sort_arg(args.sort, parser.error)
|
||||
args.group = _parse_group_arg(args.group, parser.error)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main(root=ROOT_DIR, filename=GLOBALS_FILE,
|
||||
filters=None, columns=COLUMN_NAMES, sort=None, group=None,
|
||||
verbosity=VERBOSITY, rc=1):
|
||||
|
||||
log = lambda msg: ...
|
||||
if verbosity >= 2:
|
||||
log = lambda msg: print(msg)
|
||||
|
||||
allvars = (var
|
||||
for var in find_vars(root, filename)
|
||||
if filter_var(var, filters))
|
||||
if sort:
|
||||
allvars = sorted(allvars, key=make_sort_key(sort))
|
||||
|
||||
if group:
|
||||
try:
|
||||
columns.remove(group)
|
||||
except ValueError:
|
||||
pass
|
||||
grouped = make_groups(allvars, group)
|
||||
lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
|
||||
else:
|
||||
lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
|
||||
|
||||
total = 0
|
||||
for line, count in lines:
|
||||
total += count
|
||||
log(line)
|
||||
log('\ntotal: {}'.format(total))
|
||||
|
||||
if total and rc:
|
||||
print('ERROR: found unsafe globals', file=sys.stderr)
|
||||
return rc
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
sys.exit(
|
||||
main(**vars(args)))
|
||||
Loading…
Add table
Add a link
Reference in a new issue