mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
bpo-36876: Add a tool that identifies unsupported global C variables. (#15877)
This commit is contained in:
parent
9936371af2
commit
ee536b2020
51 changed files with 9467 additions and 19 deletions
9
Tools/c-analyzer/c-globals.py
Normal file
9
Tools/c-analyzer/c-globals.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
# This is a script equivalent of running "python -m test.test_c_globals.cg".
|
||||
|
||||
from c_globals.__main__ import parse_args, main
|
||||
|
||||
|
||||
# This is effectively copied from cg/__main__.py:
|
||||
if __name__ == '__main__':
|
||||
cmd, cmdkwargs = parse_args()
|
||||
main(cmd, cmdkwargs)
|
19
Tools/c-analyzer/c_analyzer_common/__init__.py
Normal file
19
Tools/c-analyzer/c_analyzer_common/__init__.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import os.path
|
||||
|
||||
|
||||
PKG_ROOT = os.path.dirname(__file__)
|
||||
DATA_DIR = os.path.dirname(PKG_ROOT)
|
||||
REPO_ROOT = os.path.dirname(
|
||||
os.path.dirname(DATA_DIR))
|
||||
|
||||
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
|
||||
'Include',
|
||||
'Python',
|
||||
'Parser',
|
||||
'Objects',
|
||||
'Modules',
|
||||
]]
|
||||
|
||||
|
||||
# Clean up the namespace.
|
||||
del os
|
328
Tools/c-analyzer/c_analyzer_common/_generate.py
Normal file
328
Tools/c-analyzer/c_analyzer_common/_generate.py
Normal file
|
@ -0,0 +1,328 @@
|
|||
# The code here consists of hacks for pre-populating the known.tsv file.
|
||||
|
||||
from c_parser.preprocessor import _iter_clean_lines
|
||||
from c_parser.naive import (
|
||||
iter_variables, parse_variable_declaration, find_variables,
|
||||
)
|
||||
from c_parser.info import Variable
|
||||
|
||||
from . import SOURCE_DIRS, REPO_ROOT
|
||||
from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
|
||||
from .info import UNKNOWN, ID
|
||||
from .util import write_tsv
|
||||
from .files import iter_cpython_files
|
||||
|
||||
|
||||
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
|
||||
POTS += tuple('const ' + v for v in POTS)
|
||||
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
|
||||
|
||||
|
||||
def _parse_global(line, funcname=None):
|
||||
line = line.strip()
|
||||
if line.startswith('static '):
|
||||
if '(' in line and '[' not in line and ' = ' not in line:
|
||||
return None, None
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith('_Py_static_string('):
|
||||
decl = line.strip(';').strip()
|
||||
name = line.split('(')[1].split(',')[0].strip()
|
||||
elif line.startswith('_Py_IDENTIFIER('):
|
||||
decl = line.strip(';').strip()
|
||||
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
|
||||
elif funcname:
|
||||
return None, None
|
||||
|
||||
# global-only
|
||||
elif line.startswith('PyAPI_DATA('): # only in .h files
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith('extern '): # only in .h files
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith('PyDoc_VAR('):
|
||||
decl = line.strip(';').strip()
|
||||
name = line.split('(')[1].split(')')[0].strip()
|
||||
elif line.startswith(POTS): # implied static
|
||||
if '(' in line and '[' not in line and ' = ' not in line:
|
||||
return None, None
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
|
||||
name, decl = parse_variable_declaration(line)
|
||||
elif line.startswith('struct '):
|
||||
if not line.endswith(' = {'):
|
||||
return None, None
|
||||
if not line.partition(' ')[2].startswith(STRUCTS):
|
||||
return None, None
|
||||
# implied static
|
||||
name, decl = parse_variable_declaration(line)
|
||||
|
||||
# file-specific
|
||||
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
|
||||
# Objects/typeobject.c
|
||||
funcname = line.split('(')[1].split(',')[0]
|
||||
return [
|
||||
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
|
||||
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
|
||||
]
|
||||
elif line.startswith('WRAP_METHOD('):
|
||||
# Objects/weakrefobject.c
|
||||
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
|
||||
return [
|
||||
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
|
||||
]
|
||||
|
||||
else:
|
||||
return None, None
|
||||
return name, decl
|
||||
|
||||
|
||||
def _pop_cached(varcache, filename, funcname, name, *,
|
||||
_iter_variables=iter_variables,
|
||||
):
|
||||
# Look for the file.
|
||||
try:
|
||||
cached = varcache[filename]
|
||||
except KeyError:
|
||||
cached = varcache[filename] = {}
|
||||
for variable in _iter_variables(filename,
|
||||
parse_variable=_parse_global,
|
||||
):
|
||||
variable._isglobal = True
|
||||
cached[variable.id] = variable
|
||||
for var in cached:
|
||||
print(' ', var)
|
||||
|
||||
# Look for the variable.
|
||||
if funcname == UNKNOWN:
|
||||
for varid in cached:
|
||||
if varid.name == name:
|
||||
break
|
||||
else:
|
||||
return None
|
||||
return cached.pop(varid)
|
||||
else:
|
||||
return cached.pop((filename, funcname, name), None)
|
||||
|
||||
|
||||
def find_matching_variable(varid, varcache, allfilenames, *,
|
||||
_pop_cached=_pop_cached,
|
||||
):
|
||||
if varid.filename and varid.filename != UNKNOWN:
|
||||
filenames = [varid.filename]
|
||||
else:
|
||||
filenames = allfilenames
|
||||
for filename in filenames:
|
||||
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
|
||||
if variable is not None:
|
||||
return variable
|
||||
else:
|
||||
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
|
||||
for filename in allfilenames:
|
||||
if not filename.endswith('.h'):
|
||||
continue
|
||||
variable = _pop_cached(varcache, filename, None, varid.name)
|
||||
if variable is not None:
|
||||
return variable
|
||||
return None
|
||||
|
||||
|
||||
MULTILINE = {
|
||||
# Python/Python-ast.c
|
||||
'Load_singleton': 'PyObject *',
|
||||
'Store_singleton': 'PyObject *',
|
||||
'Del_singleton': 'PyObject *',
|
||||
'AugLoad_singleton': 'PyObject *',
|
||||
'AugStore_singleton': 'PyObject *',
|
||||
'Param_singleton': 'PyObject *',
|
||||
'And_singleton': 'PyObject *',
|
||||
'Or_singleton': 'PyObject *',
|
||||
'Add_singleton': 'static PyObject *',
|
||||
'Sub_singleton': 'static PyObject *',
|
||||
'Mult_singleton': 'static PyObject *',
|
||||
'MatMult_singleton': 'static PyObject *',
|
||||
'Div_singleton': 'static PyObject *',
|
||||
'Mod_singleton': 'static PyObject *',
|
||||
'Pow_singleton': 'static PyObject *',
|
||||
'LShift_singleton': 'static PyObject *',
|
||||
'RShift_singleton': 'static PyObject *',
|
||||
'BitOr_singleton': 'static PyObject *',
|
||||
'BitXor_singleton': 'static PyObject *',
|
||||
'BitAnd_singleton': 'static PyObject *',
|
||||
'FloorDiv_singleton': 'static PyObject *',
|
||||
'Invert_singleton': 'static PyObject *',
|
||||
'Not_singleton': 'static PyObject *',
|
||||
'UAdd_singleton': 'static PyObject *',
|
||||
'USub_singleton': 'static PyObject *',
|
||||
'Eq_singleton': 'static PyObject *',
|
||||
'NotEq_singleton': 'static PyObject *',
|
||||
'Lt_singleton': 'static PyObject *',
|
||||
'LtE_singleton': 'static PyObject *',
|
||||
'Gt_singleton': 'static PyObject *',
|
||||
'GtE_singleton': 'static PyObject *',
|
||||
'Is_singleton': 'static PyObject *',
|
||||
'IsNot_singleton': 'static PyObject *',
|
||||
'In_singleton': 'static PyObject *',
|
||||
'NotIn_singleton': 'static PyObject *',
|
||||
# Python/symtable.c
|
||||
'top': 'static identifier ',
|
||||
'lambda': 'static identifier ',
|
||||
'genexpr': 'static identifier ',
|
||||
'listcomp': 'static identifier ',
|
||||
'setcomp': 'static identifier ',
|
||||
'dictcomp': 'static identifier ',
|
||||
'__class__': 'static identifier ',
|
||||
# Python/compile.c
|
||||
'__doc__': 'static PyObject *',
|
||||
'__annotations__': 'static PyObject *',
|
||||
# Objects/floatobject.c
|
||||
'double_format': 'static float_format_type ',
|
||||
'float_format': 'static float_format_type ',
|
||||
'detected_double_format': 'static float_format_type ',
|
||||
'detected_float_format': 'static float_format_type ',
|
||||
# Parser/listnode.c
|
||||
'level': 'static int ',
|
||||
'atbol': 'static int ',
|
||||
# Python/dtoa.c
|
||||
'private_mem': 'static double private_mem[PRIVATE_mem]',
|
||||
'pmem_next': 'static double *',
|
||||
# Modules/_weakref.c
|
||||
'weakref_functions': 'static PyMethodDef ',
|
||||
}
|
||||
INLINE = {
|
||||
# Modules/_tracemalloc.c
|
||||
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
|
||||
# Modules/faulthandler.c
|
||||
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
|
||||
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
|
||||
# Modules/signalmodule.c
|
||||
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
|
||||
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
|
||||
# Python/dynload_shlib.c
|
||||
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
|
||||
# Objects/obmalloc.c
|
||||
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
|
||||
# Python/bootstrap_hash.c
|
||||
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
|
||||
}
|
||||
FUNC = {
|
||||
# Objects/object.c
|
||||
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
|
||||
# Parser/myreadline.c
|
||||
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
|
||||
# Python/pylifecycle.c
|
||||
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
|
||||
# Parser/myreadline.c
|
||||
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
|
||||
}
|
||||
IMPLIED = {
|
||||
# Objects/boolobject.c
|
||||
'_Py_FalseStruct': 'static struct _longobject ',
|
||||
'_Py_TrueStruct': 'static struct _longobject ',
|
||||
# Modules/config.c
|
||||
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
|
||||
}
|
||||
GLOBALS = {}
|
||||
GLOBALS.update(MULTILINE)
|
||||
GLOBALS.update(INLINE)
|
||||
GLOBALS.update(FUNC)
|
||||
GLOBALS.update(IMPLIED)
|
||||
|
||||
LOCALS = {
|
||||
'buildinfo': ('Modules/getbuildinfo.c',
|
||||
'Py_GetBuildInfo',
|
||||
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
|
||||
'methods': ('Python/codecs.c',
|
||||
'_PyCodecRegistry_Init',
|
||||
'static struct { char *name; PyMethodDef def; } methods[]'),
|
||||
}
|
||||
|
||||
|
||||
def _known(symbol):
|
||||
if symbol.funcname:
|
||||
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
|
||||
raise KeyError(symbol.name)
|
||||
filename, funcname, decl = LOCALS[symbol.name]
|
||||
varid = ID(filename, funcname, symbol.name)
|
||||
elif not symbol.filename or symbol.filename == UNKNOWN:
|
||||
raise KeyError(symbol.name)
|
||||
else:
|
||||
varid = symbol.id
|
||||
try:
|
||||
decl = GLOBALS[symbol.name]
|
||||
except KeyError:
|
||||
|
||||
if symbol.name.endswith('_methods'):
|
||||
decl = 'static PyMethodDef '
|
||||
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
|
||||
decl = 'static PyTypeObject '
|
||||
else:
|
||||
raise
|
||||
if symbol.name not in decl:
|
||||
decl = decl + symbol.name
|
||||
return Variable(varid, decl)
|
||||
|
||||
|
||||
def known_row(varid, decl):
|
||||
return (
|
||||
varid.filename,
|
||||
varid.funcname or '-',
|
||||
varid.name,
|
||||
'variable',
|
||||
decl,
|
||||
)
|
||||
|
||||
|
||||
def known_rows(symbols, *,
|
||||
cached=True,
|
||||
_get_filenames=iter_cpython_files,
|
||||
_find_match=find_matching_variable,
|
||||
_find_symbols=find_variables,
|
||||
_as_known=known_row,
|
||||
):
|
||||
filenames = list(_get_filenames())
|
||||
cache = {}
|
||||
if cached:
|
||||
for symbol in symbols:
|
||||
try:
|
||||
found = _known(symbol)
|
||||
except KeyError:
|
||||
found = _find_match(symbol, cache, filenames)
|
||||
if found is None:
|
||||
found = Variable(symbol.id, UNKNOWN)
|
||||
yield _as_known(found.id, found.vartype)
|
||||
else:
|
||||
raise NotImplementedError # XXX incorporate KNOWN
|
||||
for variable in _find_symbols(symbols, filenames,
|
||||
srccache=cache,
|
||||
parse_variable=_parse_global,
|
||||
):
|
||||
#variable = variable._replace(
|
||||
# filename=os.path.relpath(variable.filename, REPO_ROOT))
|
||||
if variable.funcname == UNKNOWN:
|
||||
print(variable)
|
||||
if variable.vartype== UNKNOWN:
|
||||
print(variable)
|
||||
yield _as_known(variable.id, variable.vartype)
|
||||
|
||||
|
||||
def generate(symbols, filename=None, *,
|
||||
_generate_rows=known_rows,
|
||||
_write_tsv=write_tsv,
|
||||
):
|
||||
if not filename:
|
||||
filename = KNOWN_FILE + '.new'
|
||||
|
||||
rows = _generate_rows(symbols)
|
||||
_write_tsv(filename, KNOWN_HEADER, rows)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from c_symbols import binary
|
||||
symbols = binary.iter_symbols(
|
||||
binary.PYTHON,
|
||||
find_local_symbol=None,
|
||||
)
|
||||
generate(symbols)
|
138
Tools/c-analyzer/c_analyzer_common/files.py
Normal file
138
Tools/c-analyzer/c_analyzer_common/files.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
import glob
|
||||
import os
|
||||
import os.path
|
||||
|
||||
from . import SOURCE_DIRS, REPO_ROOT
|
||||
|
||||
|
||||
C_SOURCE_SUFFIXES = ('.c', '.h')
|
||||
|
||||
|
||||
def _walk_tree(root, *,
|
||||
_walk=os.walk,
|
||||
):
|
||||
# A wrapper around os.walk that resolves the filenames.
|
||||
for parent, _, names in _walk(root):
|
||||
for name in names:
|
||||
yield os.path.join(parent, name)
|
||||
|
||||
|
||||
def walk_tree(root, *,
|
||||
suffix=None,
|
||||
walk=_walk_tree,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
"""
|
||||
if suffix and not isinstance(suffix, str):
|
||||
raise ValueError('suffix must be a string')
|
||||
|
||||
for filename in walk(root):
|
||||
if suffix and not filename.endswith(suffix):
|
||||
continue
|
||||
yield filename
|
||||
|
||||
|
||||
def glob_tree(root, *,
|
||||
suffix=None,
|
||||
_glob=glob.iglob,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
"""
|
||||
suffix = suffix or ''
|
||||
if not isinstance(suffix, str):
|
||||
raise ValueError('suffix must be a string')
|
||||
|
||||
for filename in _glob(f'{root}/*{suffix}'):
|
||||
yield filename
|
||||
for filename in _glob(f'{root}/**/*{suffix}'):
|
||||
yield filename
|
||||
|
||||
|
||||
def iter_files(root, suffix=None, relparent=None, *,
|
||||
get_files=os.walk,
|
||||
_glob=glob_tree,
|
||||
_walk=walk_tree,
|
||||
):
|
||||
"""Yield each file in the tree under the given directory name.
|
||||
|
||||
If "root" is a non-string iterable then do the same for each of
|
||||
those trees.
|
||||
|
||||
If "suffix" is provided then only files with that suffix will
|
||||
be included.
|
||||
|
||||
if "relparent" is provided then it is used to resolve each
|
||||
filename as a relative path.
|
||||
"""
|
||||
if not isinstance(root, str):
|
||||
roots = root
|
||||
for root in roots:
|
||||
yield from iter_files(root, suffix, relparent,
|
||||
get_files=get_files,
|
||||
_glob=_glob, _walk=_walk)
|
||||
return
|
||||
|
||||
# Use the right "walk" function.
|
||||
if get_files in (glob.glob, glob.iglob, glob_tree):
|
||||
get_files = _glob
|
||||
else:
|
||||
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
|
||||
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
|
||||
|
||||
# Handle a single suffix.
|
||||
if suffix and not isinstance(suffix, str):
|
||||
filenames = get_files(root)
|
||||
suffix = tuple(suffix)
|
||||
else:
|
||||
filenames = get_files(root, suffix=suffix)
|
||||
suffix = None
|
||||
|
||||
for filename in filenames:
|
||||
if suffix and not isinstance(suffix, str): # multiple suffixes
|
||||
if not filename.endswith(suffix):
|
||||
continue
|
||||
if relparent:
|
||||
filename = os.path.relpath(filename, relparent)
|
||||
yield filename
|
||||
|
||||
|
||||
def iter_files_by_suffix(root, suffixes, relparent=None, *,
|
||||
walk=walk_tree,
|
||||
_iter_files=iter_files,
|
||||
):
|
||||
"""Yield each file in the tree that has the given suffixes.
|
||||
|
||||
Unlike iter_files(), the results are in the original suffix order.
|
||||
"""
|
||||
if isinstance(suffixes, str):
|
||||
suffixes = [suffixes]
|
||||
# XXX Ignore repeated suffixes?
|
||||
for suffix in suffixes:
|
||||
yield from _iter_files(root, suffix, relparent)
|
||||
|
||||
|
||||
def iter_cpython_files(*,
|
||||
walk=walk_tree,
|
||||
_files=iter_files_by_suffix,
|
||||
):
|
||||
"""Yield each file in the tree for each of the given directory names."""
|
||||
excludedtrees = [
|
||||
os.path.join('Include', 'cpython', ''),
|
||||
]
|
||||
def is_excluded(filename):
|
||||
for root in excludedtrees:
|
||||
if filename.startswith(root):
|
||||
return True
|
||||
return False
|
||||
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
|
||||
walk=walk,
|
||||
):
|
||||
if is_excluded(filename):
|
||||
continue
|
||||
yield filename
|
69
Tools/c-analyzer/c_analyzer_common/info.py
Normal file
69
Tools/c-analyzer/c_analyzer_common/info.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
from .util import classonly, _NTBase
|
||||
|
||||
|
||||
UNKNOWN = '???'
|
||||
|
||||
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
|
||||
|
||||
|
||||
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
|
||||
"""A unique ID for a single symbol or declaration."""
|
||||
|
||||
__slots__ = ()
|
||||
# XXX Add optional conditions (tuple of strings) field.
|
||||
#conditions = Slot()
|
||||
|
||||
@classonly
|
||||
def from_raw(cls, raw):
|
||||
if not raw:
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
return cls(None, None, raw)
|
||||
try:
|
||||
name, = raw
|
||||
filename = None
|
||||
except ValueError:
|
||||
try:
|
||||
filename, name = raw
|
||||
except ValueError:
|
||||
return super().from_raw(raw)
|
||||
return cls(filename, None, name)
|
||||
|
||||
def __new__(cls, filename, funcname, name):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
filename=str(filename) if filename else None,
|
||||
funcname=str(funcname) if funcname else None,
|
||||
name=str(name) if name else None,
|
||||
)
|
||||
#cls.conditions.set(self, tuple(str(s) if s else None
|
||||
# for s in conditions or ()))
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
else:
|
||||
if not NAME_RE.match(self.name):
|
||||
raise ValueError(
|
||||
f'name must be an identifier, got {self.name!r}')
|
||||
|
||||
# Symbols from a binary might not have filename/funcname info.
|
||||
|
||||
if self.funcname:
|
||||
if not self.filename:
|
||||
raise TypeError('missing filename')
|
||||
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
|
||||
raise ValueError(
|
||||
f'name must be an identifier, got {self.funcname!r}')
|
||||
|
||||
# XXX Require the filename (at least UNKONWN)?
|
||||
# XXX Check the filename?
|
||||
|
||||
@property
|
||||
def islocal(self):
|
||||
return self.funcname is not None
|
67
Tools/c-analyzer/c_analyzer_common/known.py
Normal file
67
Tools/c-analyzer/c_analyzer_common/known.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
import csv
|
||||
import os.path
|
||||
|
||||
from c_parser.info import Variable
|
||||
|
||||
from . import DATA_DIR
|
||||
from .info import ID, UNKNOWN
|
||||
from .util import read_tsv
|
||||
|
||||
|
||||
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
|
||||
|
||||
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
|
||||
HEADER = '\t'.join(COLUMNS)
|
||||
|
||||
|
||||
# XXX need tests:
|
||||
# * from_file()
|
||||
|
||||
def from_file(infile, *,
|
||||
_read_tsv=read_tsv,
|
||||
):
|
||||
"""Return the info for known declarations in the given file."""
|
||||
known = {
|
||||
'variables': {},
|
||||
#'types': {},
|
||||
#'constants': {},
|
||||
#'macros': {},
|
||||
}
|
||||
for row in _read_tsv(infile, HEADER):
|
||||
filename, funcname, name, kind, declaration = row
|
||||
if not funcname or funcname == '-':
|
||||
funcname = None
|
||||
id = ID(filename, funcname, name)
|
||||
if kind == 'variable':
|
||||
values = known['variables']
|
||||
value = Variable(id, declaration)
|
||||
value._isglobal = _is_global(declaration) or id.funcname is None
|
||||
else:
|
||||
raise ValueError(f'unsupported kind in row {row}')
|
||||
if value.name == 'id' and declaration == UNKNOWN:
|
||||
# None of these are variables.
|
||||
declaration = 'int id';
|
||||
else:
|
||||
value.validate()
|
||||
values[id] = value
|
||||
return known
|
||||
|
||||
|
||||
def _is_global(vartype):
|
||||
# statics
|
||||
if vartype.startswith('static '):
|
||||
return True
|
||||
if vartype.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
|
||||
return True
|
||||
if vartype.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
|
||||
return True
|
||||
if vartype.startswith('PyDoc_VAR('):
|
||||
return True
|
||||
if vartype.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
|
||||
return True
|
||||
if vartype.startswith('WRAP_METHOD('):
|
||||
return True
|
||||
# public extern
|
||||
if vartype.startswith('PyAPI_DATA('):
|
||||
return True
|
||||
return False
|
214
Tools/c-analyzer/c_analyzer_common/util.py
Normal file
214
Tools/c-analyzer/c_analyzer_common/util.py
Normal file
|
@ -0,0 +1,214 @@
|
|||
import csv
|
||||
import subprocess
|
||||
|
||||
|
||||
_NOT_SET = object()
|
||||
|
||||
|
||||
def run_cmd(argv, **kwargs):
|
||||
proc = subprocess.run(
|
||||
argv,
|
||||
#capture_output=True,
|
||||
#stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
**kwargs
|
||||
)
|
||||
return proc.stdout
|
||||
|
||||
|
||||
def read_tsv(infile, header, *,
|
||||
_open=open,
|
||||
_get_reader=csv.reader,
|
||||
):
|
||||
"""Yield each row of the given TSV (tab-separated) file."""
|
||||
if isinstance(infile, str):
|
||||
with _open(infile, newline='') as infile:
|
||||
yield from read_tsv(infile, header,
|
||||
_open=_open,
|
||||
_get_reader=_get_reader,
|
||||
)
|
||||
return
|
||||
lines = iter(infile)
|
||||
|
||||
# Validate the header.
|
||||
try:
|
||||
actualheader = next(lines).strip()
|
||||
except StopIteration:
|
||||
actualheader = ''
|
||||
if actualheader != header:
|
||||
raise ValueError(f'bad header {actualheader!r}')
|
||||
|
||||
for row in _get_reader(lines, delimiter='\t'):
|
||||
yield tuple(v.strip() for v in row)
|
||||
|
||||
|
||||
def write_tsv(outfile, header, rows, *,
|
||||
_open=open,
|
||||
_get_writer=csv.writer,
|
||||
):
|
||||
"""Write each of the rows to the given TSV (tab-separated) file."""
|
||||
if isinstance(outfile, str):
|
||||
with _open(outfile, 'w', newline='') as outfile:
|
||||
return write_tsv(outfile, header, rows,
|
||||
_open=_open,
|
||||
_get_writer=_get_writer,
|
||||
)
|
||||
|
||||
if isinstance(header, str):
|
||||
header = header.split('\t')
|
||||
writer = _get_writer(outfile, delimiter='\t')
|
||||
writer.writerow(header)
|
||||
for row in rows:
|
||||
writer.writerow('' if v is None else str(v)
|
||||
for v in row)
|
||||
|
||||
|
||||
class Slot:
|
||||
"""A descriptor that provides a slot.
|
||||
|
||||
This is useful for types that can't have slots via __slots__,
|
||||
e.g. tuple subclasses.
|
||||
"""
|
||||
|
||||
__slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
|
||||
|
||||
def __init__(self, initial=_NOT_SET, *,
|
||||
default=_NOT_SET,
|
||||
readonly=False,
|
||||
):
|
||||
self.initial = initial
|
||||
self.default = default
|
||||
self.readonly = readonly
|
||||
|
||||
self.instances = {}
|
||||
self.name = None
|
||||
|
||||
def __set_name__(self, cls, name):
|
||||
if self.name is not None:
|
||||
raise TypeError('already used')
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None: # called on the class
|
||||
return self
|
||||
try:
|
||||
value = self.instances[id(obj)]
|
||||
except KeyError:
|
||||
if self.initial is _NOT_SET:
|
||||
value = self.default
|
||||
else:
|
||||
value = self.initial
|
||||
self.instances[id(obj)] = value
|
||||
if value is _NOT_SET:
|
||||
raise AttributeError(self.name)
|
||||
# XXX Optionally make a copy?
|
||||
return value
|
||||
|
||||
def __set__(self, obj, value):
|
||||
if self.readonly:
|
||||
raise AttributeError(f'{self.name} is readonly')
|
||||
# XXX Optionally coerce?
|
||||
self.instances[id(obj)] = value
|
||||
|
||||
def __delete__(self, obj):
|
||||
if self.readonly:
|
||||
raise AttributeError(f'{self.name} is readonly')
|
||||
self.instances[id(obj)] = self.default
|
||||
|
||||
def set(self, obj, value):
|
||||
"""Update the cached value for an object.
|
||||
|
||||
This works even if the descriptor is read-only. This is
|
||||
particularly useful when initializing the object (e.g. in
|
||||
its __new__ or __init__).
|
||||
"""
|
||||
self.instances[id(obj)] = value
|
||||
|
||||
|
||||
class classonly:
|
||||
"""A non-data descriptor that makes a value only visible on the class.
|
||||
|
||||
This is like the "classmethod" builtin, but does not show up on
|
||||
instances of the class. It may be used as a decorator.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
self.getter = classmethod(value).__get__
|
||||
self.name = None
|
||||
|
||||
def __set_name__(self, cls, name):
|
||||
if self.name is not None:
|
||||
raise TypeError('already used')
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is not None:
|
||||
raise AttributeError(self.name)
|
||||
# called on the class
|
||||
return self.getter(None, cls)
|
||||
|
||||
|
||||
class _NTBase:
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
@classonly
|
||||
def from_raw(cls, raw):
|
||||
if not raw:
|
||||
return None
|
||||
elif isinstance(raw, cls):
|
||||
return raw
|
||||
elif isinstance(raw, str):
|
||||
return cls.from_string(raw)
|
||||
else:
|
||||
if hasattr(raw, 'items'):
|
||||
return cls(**raw)
|
||||
try:
|
||||
args = tuple(raw)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
return cls(*args)
|
||||
raise NotImplementedError
|
||||
|
||||
@classonly
|
||||
def from_string(cls, value):
|
||||
"""Return a new instance based on the given string."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def _make(cls, iterable): # The default _make() is not subclass-friendly.
|
||||
return cls.__new__(cls, *iterable)
|
||||
|
||||
# XXX Always validate?
|
||||
#def __init__(self, *args, **kwargs):
|
||||
# self.validate()
|
||||
|
||||
# XXX The default __repr__() is not subclass-friendly (where the name changes).
|
||||
#def __repr__(self):
|
||||
# _, _, sig = super().__repr__().partition('(')
|
||||
# return f'{self.__class__.__name__}({sig}'
|
||||
|
||||
# To make sorting work with None:
|
||||
def __lt__(self, other):
|
||||
try:
|
||||
return super().__lt__(other)
|
||||
except TypeError:
|
||||
if None in self:
|
||||
return True
|
||||
elif None in other:
|
||||
return False
|
||||
else:
|
||||
raise
|
||||
|
||||
def validate(self):
|
||||
return
|
||||
|
||||
# XXX Always validate?
|
||||
#def _replace(self, **kwargs):
|
||||
# obj = super()._replace(**kwargs)
|
||||
# obj.validate()
|
||||
# return obj
|
72
Tools/c-analyzer/c_globals/README
Normal file
72
Tools/c-analyzer/c_globals/README
Normal file
|
@ -0,0 +1,72 @@
|
|||
#######################################
|
||||
# C Globals and CPython Runtime State.
|
||||
|
||||
CPython's C code makes extensive use of global variables (whether static
|
||||
globals or static locals). Each such variable falls into one of several
|
||||
categories:
|
||||
|
||||
* strictly const data
|
||||
* used exclusively in main or in the REPL
|
||||
* process-global state (e.g. managing process-level resources
|
||||
like signals and file descriptors)
|
||||
* Python "global" runtime state
|
||||
* per-interpreter runtime state
|
||||
|
||||
The last one can be a problem as soon as anyone creates a second
|
||||
interpreter (AKA "subinterpreter") in a process. It is definitely a
|
||||
problem under subinterpreters if they are no longer sharing the GIL,
|
||||
since the GIL protects us from a lot of race conditions. Keep in mind
|
||||
that ultimately *all* objects (PyObject) should be treated as
|
||||
per-interpreter state. This includes "static types", freelists,
|
||||
_PyIdentifier, and singletons. Take that in for a second. It has
|
||||
significant implications on where we use static variables!
|
||||
|
||||
Be aware that module-global state (stored in C statics) is a kind of
|
||||
per-interpreter state. There have been efforts across many years, and
|
||||
still going, to provide extension module authors mechanisms to store
|
||||
that state safely (see PEPs 3121, 489, etc.).
|
||||
|
||||
(Note that there has been discussion around support for running multiple
|
||||
Python runtimes in the same process. That would ends up with the same
|
||||
problems, relative to static variables, that subinterpreters have.)
|
||||
|
||||
Historically we have been bad at keeping per-interpreter state out of
|
||||
static variables, mostly because until recently subinterpreters were
|
||||
not widely used nor even factored in to solutions. However, the
|
||||
feature is growing in popularity and use in the community.
|
||||
|
||||
Mandate: "Eliminate use of static variables for per-interpreter state."
|
||||
|
||||
The "c-statics.py" script in this directory, along with its accompanying
|
||||
data files, are part of the effort to resolve existing problems with
|
||||
our use of static variables and to prevent future problems.
|
||||
|
||||
#-------------------------
|
||||
## statics for actually-global state (and runtime state consolidation)
|
||||
|
||||
In general, holding any kind of state in static variables
|
||||
increases maintenance burden and increases the complexity of code (e.g.
|
||||
we use TSS to identify the active thread state). So it is a good idea
|
||||
to avoid using statics for state even if for the "global" runtime or
|
||||
for process-global state.
|
||||
|
||||
Relative to maintenance burden, one problem is where the runtime
|
||||
state is spread throughout the codebase in dozens of individual
|
||||
globals. Unlike the other globals, the runtime state represents a set
|
||||
of values that are constantly shifting in a complex way. When they are
|
||||
spread out it's harder to get a clear picture of what the runtime
|
||||
involves. Furthermore, when they are spread out it complicates efforts
|
||||
that change the runtime.
|
||||
|
||||
Consequently, the globals for Python's runtime state have been
|
||||
consolidated under a single top-level _PyRuntime global. No new globals
|
||||
should be added for runtime state. Instead, they should be added to
|
||||
_PyRuntimeState or one of its sub-structs. The tools in this directory
|
||||
are run as part of the test suite to ensure that no new globals have
|
||||
been added. The script can be run manually as well:
|
||||
|
||||
./python Lib/test/test_c_statics/c-statics.py check
|
||||
|
||||
If it reports any globals then they should be resolved. If the globals
|
||||
are runtime state then they should be folded into _PyRuntimeState.
|
||||
Otherwise they should be marked as ignored.
|
0
Tools/c-analyzer/c_globals/__init__.py
Normal file
0
Tools/c-analyzer/c_globals/__init__.py
Normal file
209
Tools/c-analyzer/c_globals/__main__.py
Normal file
209
Tools/c-analyzer/c_globals/__main__.py
Normal file
|
@ -0,0 +1,209 @@
|
|||
import argparse
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
|
||||
from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
|
||||
from c_analyzer_common.info import UNKNOWN
|
||||
from c_analyzer_common.known import (
|
||||
from_file as known_from_file,
|
||||
DATA_FILE as KNOWN_FILE,
|
||||
)
|
||||
from . import find, show
|
||||
from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
|
||||
|
||||
|
||||
def _match_unused_global(variable, knownvars, used):
|
||||
found = []
|
||||
for varid in knownvars:
|
||||
if varid in used:
|
||||
continue
|
||||
if varid.funcname is not None:
|
||||
continue
|
||||
if varid.name != variable.name:
|
||||
continue
|
||||
if variable.filename and variable.filename != UNKNOWN:
|
||||
if variable.filename == varid.filename:
|
||||
found.append(varid)
|
||||
else:
|
||||
found.append(varid)
|
||||
return found
|
||||
|
||||
|
||||
def _check_results(unknown, knownvars, used):
|
||||
badknown = set()
|
||||
for variable in sorted(unknown):
|
||||
msg = None
|
||||
if variable.funcname != UNKNOWN:
|
||||
msg = f'could not find global symbol {variable.id}'
|
||||
elif m := _match_unused_global(variable, knownvars, used):
|
||||
assert isinstance(m, list)
|
||||
badknown.update(m)
|
||||
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
|
||||
unknown.remove(variable)
|
||||
else:
|
||||
msg = f'could not find local symbol {variable.id}'
|
||||
if msg:
|
||||
#raise Exception(msg)
|
||||
print(msg)
|
||||
if badknown:
|
||||
print('---')
|
||||
print(f'{len(badknown)} globals in known.tsv, but may actually be local:')
|
||||
for varid in sorted(badknown):
|
||||
print(f'{varid.filename:30} {varid.name}')
|
||||
unused = sorted(varid
|
||||
for varid in set(knownvars) - used
|
||||
if varid.name != 'id') # XXX Figure out where these variables are.
|
||||
if unused:
|
||||
print('---')
|
||||
print(f'did not use {len(unused)} known vars:')
|
||||
for varid in unused:
|
||||
print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}')
|
||||
raise Exception('not all known symbols used')
|
||||
if unknown:
|
||||
print('---')
|
||||
raise Exception('could not find all symbols')
|
||||
|
||||
|
||||
def _find_globals(dirnames, known, ignored):
|
||||
if dirnames == SOURCE_DIRS:
|
||||
dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
|
||||
|
||||
ignored = ignored_from_file(ignored)
|
||||
known = known_from_file(known)
|
||||
|
||||
used = set()
|
||||
unknown = set()
|
||||
knownvars = (known or {}).get('variables')
|
||||
for variable in find.globals_from_binary(knownvars=knownvars,
|
||||
dirnames=dirnames):
|
||||
#for variable in find.globals(dirnames, known, kind='platform'):
|
||||
if variable.vartype == UNKNOWN:
|
||||
unknown.add(variable)
|
||||
continue
|
||||
yield variable, is_supported(variable, ignored, known)
|
||||
used.add(variable.id)
|
||||
|
||||
#_check_results(unknown, knownvars, used)
|
||||
|
||||
|
||||
def cmd_check(cmd, dirs=SOURCE_DIRS, *,
|
||||
ignored=IGNORED_FILE,
|
||||
known=KNOWN_FILE,
|
||||
_find=_find_globals,
|
||||
_show=show.basic,
|
||||
_print=print,
|
||||
):
|
||||
"""
|
||||
Fail if there are unsupported globals variables.
|
||||
|
||||
In the failure case, the list of unsupported variables
|
||||
will be printed out.
|
||||
"""
|
||||
unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
|
||||
if not unsupported:
|
||||
#_print('okay')
|
||||
return
|
||||
|
||||
_print('ERROR: found unsupported global variables')
|
||||
_print()
|
||||
_show(sorted(unsupported))
|
||||
_print(f' ({len(unsupported)} total)')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def cmd_show(cmd, dirs=SOURCE_DIRS, *,
|
||||
ignored=IGNORED_FILE,
|
||||
known=KNOWN_FILE,
|
||||
skip_objects=False,
|
||||
_find=_find_globals,
|
||||
_show=show.basic,
|
||||
_print=print,
|
||||
):
|
||||
"""
|
||||
Print out the list of found global variables.
|
||||
|
||||
The variables will be distinguished as "supported" or "unsupported".
|
||||
"""
|
||||
allsupported = []
|
||||
allunsupported = []
|
||||
for found, supported in _find(dirs, known, ignored):
|
||||
if skip_objects: # XXX Support proper filters instead.
|
||||
if _is_object(found.vartype):
|
||||
continue
|
||||
(allsupported if supported else allunsupported
|
||||
).append(found)
|
||||
|
||||
_print('supported:')
|
||||
_print('----------')
|
||||
_show(sorted(allsupported))
|
||||
_print(f' ({len(allsupported)} total)')
|
||||
_print()
|
||||
_print('unsupported:')
|
||||
_print('------------')
|
||||
_show(sorted(allunsupported))
|
||||
_print(f' ({len(allunsupported)} total)')
|
||||
|
||||
|
||||
#############################
|
||||
# the script
|
||||
|
||||
COMMANDS = {
|
||||
'check': cmd_check,
|
||||
'show': cmd_show,
|
||||
}
|
||||
|
||||
PROG = sys.argv[0]
|
||||
PROG = 'c-globals.py'
|
||||
|
||||
|
||||
def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
|
||||
common = argparse.ArgumentParser(add_help=False)
|
||||
common.add_argument('--ignored', metavar='FILE',
|
||||
default=IGNORED_FILE,
|
||||
help='path to file that lists ignored vars')
|
||||
common.add_argument('--known', metavar='FILE',
|
||||
default=KNOWN_FILE,
|
||||
help='path to file that lists known types')
|
||||
common.add_argument('dirs', metavar='DIR', nargs='*',
|
||||
default=SOURCE_DIRS,
|
||||
help='a directory to check')
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=prog,
|
||||
)
|
||||
subs = parser.add_subparsers(dest='cmd')
|
||||
|
||||
check = subs.add_parser('check', parents=[common])
|
||||
|
||||
show = subs.add_parser('show', parents=[common])
|
||||
show.add_argument('--skip-objects', action='store_true')
|
||||
|
||||
if _fail is None:
|
||||
def _fail(msg):
|
||||
parser.error(msg)
|
||||
|
||||
# Now parse the args.
|
||||
args = parser.parse_args(argv)
|
||||
ns = vars(args)
|
||||
|
||||
cmd = ns.pop('cmd')
|
||||
if not cmd:
|
||||
_fail('missing command')
|
||||
|
||||
return cmd, ns
|
||||
|
||||
|
||||
def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS):
|
||||
try:
|
||||
cmdfunc = _COMMANDS[cmd]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
f'unsupported cmd {cmd!r}' if cmd else 'missing cmd')
|
||||
|
||||
cmdfunc(cmd, **cmdkwargs or {})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cmd, cmdkwargs = parse_args()
|
||||
main(cmd, cmdkwargs)
|
95
Tools/c-analyzer/c_globals/find.py
Normal file
95
Tools/c-analyzer/c_globals/find.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
from c_analyzer_common import SOURCE_DIRS
|
||||
from c_analyzer_common.info import UNKNOWN
|
||||
from c_symbols import (
|
||||
info as s_info,
|
||||
binary as b_symbols,
|
||||
source as s_symbols,
|
||||
resolve,
|
||||
)
|
||||
from c_parser import info, declarations
|
||||
|
||||
|
||||
# XXX needs tests:
|
||||
# * iter_variables
|
||||
|
||||
def globals_from_binary(binfile=b_symbols.PYTHON, *,
|
||||
knownvars=None,
|
||||
dirnames=None,
|
||||
_iter_symbols=b_symbols.iter_symbols,
|
||||
_resolve=resolve.symbols_to_variables,
|
||||
_get_symbol_resolver=resolve.get_resolver,
|
||||
):
|
||||
"""Yield a Variable for each found Symbol.
|
||||
|
||||
Details are filled in from the given "known" variables and types.
|
||||
"""
|
||||
symbols = _iter_symbols(binfile, find_local_symbol=None)
|
||||
#symbols = list(symbols)
|
||||
for variable in _resolve(symbols,
|
||||
resolve=_get_symbol_resolver(knownvars, dirnames),
|
||||
):
|
||||
# Skip each non-global variable (unless we couldn't find it).
|
||||
# XXX Drop the "UNKNOWN" condition?
|
||||
if not variable.isglobal and variable.vartype != UNKNOWN:
|
||||
continue
|
||||
yield variable
|
||||
|
||||
|
||||
def globals_from_declarations(dirnames=SOURCE_DIRS, *,
|
||||
known=None,
|
||||
):
|
||||
"""Yield a Variable for each found declaration.
|
||||
|
||||
Details are filled in from the given "known" variables and types.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def iter_variables(kind='platform', *,
|
||||
known=None,
|
||||
dirnames=None,
|
||||
_resolve_symbols=resolve.symbols_to_variables,
|
||||
_get_symbol_resolver=resolve.get_resolver,
|
||||
_symbols_from_binary=b_symbols.iter_symbols,
|
||||
_symbols_from_source=s_symbols.iter_symbols,
|
||||
_iter_raw=declarations.iter_all,
|
||||
_iter_preprocessed=declarations.iter_preprocessed,
|
||||
):
|
||||
"""Yield a Variable for each one found (e.g. in files)."""
|
||||
kind = kind or 'platform'
|
||||
|
||||
if kind == 'symbols':
|
||||
knownvars = (known or {}).get('variables')
|
||||
yield from _resolve_symbols(
|
||||
_symbols_from_source(dirnames, known),
|
||||
resolve=_get_symbol_resolver(knownvars, dirnames),
|
||||
)
|
||||
elif kind == 'platform':
|
||||
knownvars = (known or {}).get('variables')
|
||||
yield from _resolve_symbols(
|
||||
_symbols_from_binary(find_local_symbol=None),
|
||||
resolve=_get_symbol_resolver(knownvars, dirnames),
|
||||
)
|
||||
elif kind == 'declarations':
|
||||
for decl in _iter_raw(dirnames):
|
||||
if not isinstance(decl, info.Variable):
|
||||
continue
|
||||
yield decl
|
||||
elif kind == 'preprocessed':
|
||||
for decl in _iter_preprocessed(dirnames):
|
||||
if not isinstance(decl, info.Variable):
|
||||
continue
|
||||
yield decl
|
||||
else:
|
||||
raise ValueError(f'unsupported kind {kind!r}')
|
||||
|
||||
|
||||
def globals(dirnames, known, *,
|
||||
kind=None, # Use the default.
|
||||
_iter_variables=iter_variables,
|
||||
):
|
||||
"""Return a list of (StaticVar, <supported>) for each found global var."""
|
||||
for found in _iter_variables(kind, known=known, dirnames=dirnames):
|
||||
if not found.isglobal:
|
||||
continue
|
||||
yield found
|
16
Tools/c-analyzer/c_globals/show.py
Normal file
16
Tools/c-analyzer/c_globals/show.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
|
||||
def basic(globals, *,
|
||||
_print=print):
|
||||
"""Print each row simply."""
|
||||
for variable in globals:
|
||||
if variable.funcname:
|
||||
line = f'{variable.filename}:{variable.funcname}():{variable.name}'
|
||||
else:
|
||||
line = f'{variable.filename}:{variable.name}'
|
||||
vartype = variable.vartype
|
||||
#if vartype.startswith('static '):
|
||||
# vartype = vartype.partition(' ')[2]
|
||||
#else:
|
||||
# vartype = '=' + vartype
|
||||
line = f'{line:<64} {vartype}'
|
||||
_print(line)
|
368
Tools/c-analyzer/c_globals/supported.py
Normal file
368
Tools/c-analyzer/c_globals/supported.py
Normal file
|
@ -0,0 +1,368 @@
|
|||
import os.path
|
||||
import re
|
||||
|
||||
from c_analyzer_common import DATA_DIR
|
||||
from c_analyzer_common.info import ID
|
||||
from c_analyzer_common.util import read_tsv, write_tsv
|
||||
|
||||
|
||||
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
|
||||
|
||||
IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
|
||||
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
|
||||
|
||||
# XXX Move these to ignored.tsv.
|
||||
IGNORED = {
|
||||
# global
|
||||
'PyImport_FrozenModules': 'process-global',
|
||||
'M___hello__': 'process-global',
|
||||
'inittab_copy': 'process-global',
|
||||
'PyHash_Func': 'process-global',
|
||||
'_Py_HashSecret_Initialized': 'process-global',
|
||||
'_TARGET_LOCALES': 'process-global',
|
||||
|
||||
# startup (only changed before/during)
|
||||
'_PyRuntime': 'runtime startup',
|
||||
'runtime_initialized': 'runtime startup',
|
||||
'static_arg_parsers': 'runtime startup',
|
||||
'orig_argv': 'runtime startup',
|
||||
'opt_ptr': 'runtime startup',
|
||||
'_preinit_warnoptions': 'runtime startup',
|
||||
'_Py_StandardStreamEncoding': 'runtime startup',
|
||||
'Py_FileSystemDefaultEncoding': 'runtime startup',
|
||||
'_Py_StandardStreamErrors': 'runtime startup',
|
||||
'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
|
||||
'Py_BytesWarningFlag': 'runtime startup',
|
||||
'Py_DebugFlag': 'runtime startup',
|
||||
'Py_DontWriteBytecodeFlag': 'runtime startup',
|
||||
'Py_FrozenFlag': 'runtime startup',
|
||||
'Py_HashRandomizationFlag': 'runtime startup',
|
||||
'Py_IgnoreEnvironmentFlag': 'runtime startup',
|
||||
'Py_InspectFlag': 'runtime startup',
|
||||
'Py_InteractiveFlag': 'runtime startup',
|
||||
'Py_IsolatedFlag': 'runtime startup',
|
||||
'Py_NoSiteFlag': 'runtime startup',
|
||||
'Py_NoUserSiteDirectory': 'runtime startup',
|
||||
'Py_OptimizeFlag': 'runtime startup',
|
||||
'Py_QuietFlag': 'runtime startup',
|
||||
'Py_UTF8Mode': 'runtime startup',
|
||||
'Py_UnbufferedStdioFlag': 'runtime startup',
|
||||
'Py_VerboseFlag': 'runtime startup',
|
||||
'_Py_path_config': 'runtime startup',
|
||||
'_PyOS_optarg': 'runtime startup',
|
||||
'_PyOS_opterr': 'runtime startup',
|
||||
'_PyOS_optind': 'runtime startup',
|
||||
'_Py_HashSecret': 'runtime startup',
|
||||
|
||||
# REPL
|
||||
'_PyOS_ReadlineLock': 'repl',
|
||||
'_PyOS_ReadlineTState': 'repl',
|
||||
|
||||
# effectively const
|
||||
'tracemalloc_empty_traceback': 'const',
|
||||
'_empty_bitmap_node': 'const',
|
||||
'posix_constants_pathconf': 'const',
|
||||
'posix_constants_confstr': 'const',
|
||||
'posix_constants_sysconf': 'const',
|
||||
'_PySys_ImplCacheTag': 'const',
|
||||
'_PySys_ImplName': 'const',
|
||||
'PyImport_Inittab': 'const',
|
||||
'_PyImport_DynLoadFiletab': 'const',
|
||||
'_PyParser_Grammar': 'const',
|
||||
'Py_hexdigits': 'const',
|
||||
'_PyImport_Inittab': 'const',
|
||||
'_PyByteArray_empty_string': 'const',
|
||||
'_PyLong_DigitValue': 'const',
|
||||
'_Py_SwappedOp': 'const',
|
||||
'PyStructSequence_UnnamedField': 'const',
|
||||
|
||||
# signals are main-thread only
|
||||
'faulthandler_handlers': 'signals are main-thread only',
|
||||
'user_signals': 'signals are main-thread only',
|
||||
'wakeup': 'signals are main-thread only',
|
||||
|
||||
# hacks
|
||||
'_PySet_Dummy': 'only used as a placeholder',
|
||||
}
|
||||
|
||||
BENIGN = 'races here are benign and unlikely'
|
||||
|
||||
|
||||
def is_supported(variable, ignored=None, known=None, *,
|
||||
_ignored=(lambda *a, **k: _is_ignored(*a, **k)),
|
||||
_vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
|
||||
):
|
||||
"""Return True if the given global variable is okay in CPython."""
|
||||
if _ignored(variable,
|
||||
ignored and ignored.get('variables')):
|
||||
return True
|
||||
elif _vartype_okay(variable.vartype,
|
||||
ignored.get('types')):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def _is_ignored(variable, ignoredvars=None, *,
|
||||
_IGNORED=IGNORED,
|
||||
):
|
||||
"""Return the reason if the variable is a supported global.
|
||||
|
||||
Return None if the variable is not a supported global.
|
||||
"""
|
||||
if ignoredvars and (reason := ignoredvars.get(variable.id)):
|
||||
return reason
|
||||
|
||||
if variable.funcname is None:
|
||||
if reason := _IGNORED.get(variable.name):
|
||||
return reason
|
||||
|
||||
# compiler
|
||||
if variable.filename == 'Python/graminit.c':
|
||||
if variable.vartype.startswith('static state '):
|
||||
return 'compiler'
|
||||
if variable.filename == 'Python/symtable.c':
|
||||
if variable.vartype.startswith('static identifier '):
|
||||
return 'compiler'
|
||||
if variable.filename == 'Python/Python-ast.c':
|
||||
# These should be const.
|
||||
if variable.name.endswith('_field'):
|
||||
return 'compiler'
|
||||
if variable.name.endswith('_attribute'):
|
||||
return 'compiler'
|
||||
|
||||
# other
|
||||
if variable.filename == 'Python/dtoa.c':
|
||||
# guarded by lock?
|
||||
if variable.name in ('p5s', 'freelist'):
|
||||
return 'dtoa is thread-safe?'
|
||||
if variable.name in ('private_mem', 'pmem_next'):
|
||||
return 'dtoa is thread-safe?'
|
||||
if variable.filename == 'Python/thread.c':
|
||||
# Threads do not become an issue until after these have been set
|
||||
# and these never get changed after that.
|
||||
if variable.name in ('initialized', 'thread_debug'):
|
||||
return 'thread-safe'
|
||||
if variable.filename == 'Python/getversion.c':
|
||||
if variable.name == 'version':
|
||||
# Races are benign here, as well as unlikely.
|
||||
return BENIGN
|
||||
if variable.filename == 'Python/fileutils.c':
|
||||
if variable.name == 'force_ascii':
|
||||
return BENIGN
|
||||
if variable.name == 'ioctl_works':
|
||||
return BENIGN
|
||||
if variable.name == '_Py_open_cloexec_works':
|
||||
return BENIGN
|
||||
if variable.filename == 'Python/codecs.c':
|
||||
if variable.name == 'ucnhash_CAPI':
|
||||
return BENIGN
|
||||
if variable.filename == 'Python/bootstrap_hash.c':
|
||||
if variable.name == 'getrandom_works':
|
||||
return BENIGN
|
||||
if variable.filename == 'Objects/unicodeobject.c':
|
||||
if variable.name == 'ucnhash_CAPI':
|
||||
return BENIGN
|
||||
if variable.name == 'bloom_linebreak':
|
||||
# *mostly* benign
|
||||
return BENIGN
|
||||
if variable.filename == 'Modules/getbuildinfo.c':
|
||||
if variable.name == 'buildinfo':
|
||||
# The static is used for pre-allocation.
|
||||
return BENIGN
|
||||
if variable.filename == 'Modules/posixmodule.c':
|
||||
if variable.name == 'ticks_per_second':
|
||||
return BENIGN
|
||||
if variable.name == 'dup3_works':
|
||||
return BENIGN
|
||||
if variable.filename == 'Modules/timemodule.c':
|
||||
if variable.name == 'ticks_per_second':
|
||||
return BENIGN
|
||||
if variable.filename == 'Objects/longobject.c':
|
||||
if variable.name == 'log_base_BASE':
|
||||
return BENIGN
|
||||
if variable.name == 'convwidth_base':
|
||||
return BENIGN
|
||||
if variable.name == 'convmultmax_base':
|
||||
return BENIGN
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _is_vartype_okay(vartype, ignoredtypes=None):
|
||||
if _is_object(vartype):
|
||||
return None
|
||||
|
||||
if vartype.startswith('static const '):
|
||||
return 'const'
|
||||
if vartype.startswith('const '):
|
||||
return 'const'
|
||||
|
||||
# components for TypeObject definitions
|
||||
for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
|
||||
if name in vartype:
|
||||
return 'const'
|
||||
for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
|
||||
'PyBufferProcs', 'PyAsyncMethods'):
|
||||
if name in vartype:
|
||||
return 'const'
|
||||
for name in ('slotdef', 'newfunc'):
|
||||
if name in vartype:
|
||||
return 'const'
|
||||
|
||||
# structseq
|
||||
for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
|
||||
if name in vartype:
|
||||
return 'const'
|
||||
|
||||
# other definiitions
|
||||
if 'PyModuleDef' in vartype:
|
||||
return 'const'
|
||||
|
||||
# thread-safe
|
||||
if '_Py_atomic_int' in vartype:
|
||||
return 'thread-safe'
|
||||
if 'pthread_condattr_t' in vartype:
|
||||
return 'thread-safe'
|
||||
|
||||
# startup
|
||||
if '_Py_PreInitEntry' in vartype:
|
||||
return 'startup'
|
||||
|
||||
# global
|
||||
# if 'PyMemAllocatorEx' in vartype:
|
||||
# return True
|
||||
|
||||
# others
|
||||
# if 'PyThread_type_lock' in vartype:
|
||||
# return True
|
||||
|
||||
# XXX ???
|
||||
# _Py_tss_t
|
||||
# _Py_hashtable_t
|
||||
# stack_t
|
||||
# _PyUnicode_Name_CAPI
|
||||
|
||||
# functions
|
||||
if '(' in vartype and '[' not in vartype:
|
||||
return 'function pointer'
|
||||
|
||||
# XXX finish!
|
||||
# * allow const values?
|
||||
#raise NotImplementedError
|
||||
return None
|
||||
|
||||
|
||||
def _is_object(vartype):
|
||||
if re.match(r'.*\bPy\w*Object\b', vartype):
|
||||
return True
|
||||
if '_PyArg_Parser ' in vartype:
|
||||
return True
|
||||
if vartype.startswith(('_Py_IDENTIFIER(', 'static _Py_Identifier',
|
||||
'_Py_static_string(')):
|
||||
return True
|
||||
if 'traceback_t' in vartype:
|
||||
return True
|
||||
if 'PyAsyncGenASend' in vartype:
|
||||
return True
|
||||
if '_PyAsyncGenWrappedValue' in vartype:
|
||||
return True
|
||||
if 'PyContext' in vartype:
|
||||
return True
|
||||
if 'method_cache_entry' in vartype:
|
||||
return True
|
||||
if vartype.startswith('static identifier '):
|
||||
return True
|
||||
if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
|
||||
return True
|
||||
|
||||
# XXX Add more?
|
||||
|
||||
#for part in vartype.split():
|
||||
# # XXX const is automatic True?
|
||||
# if part == 'PyObject' or part.startswith('PyObject['):
|
||||
# return True
|
||||
return False
|
||||
|
||||
|
||||
def ignored_from_file(infile, *,
|
||||
_read_tsv=read_tsv,
|
||||
):
|
||||
"""Yield a Variable for each ignored var in the file."""
|
||||
ignored = {
|
||||
'variables': {},
|
||||
#'types': {},
|
||||
#'constants': {},
|
||||
#'macros': {},
|
||||
}
|
||||
for row in _read_tsv(infile, IGNORED_HEADER):
|
||||
filename, funcname, name, kind, reason = row
|
||||
if not funcname or funcname == '-':
|
||||
funcname = None
|
||||
id = ID(filename, funcname, name)
|
||||
if kind == 'variable':
|
||||
values = ignored['variables']
|
||||
else:
|
||||
raise ValueError(f'unsupported kind in row {row}')
|
||||
values[id] = reason
|
||||
return ignored
|
||||
|
||||
|
||||
##################################
|
||||
# generate
|
||||
|
||||
def _get_row(varid, reason):
|
||||
return (
|
||||
varid.filename,
|
||||
varid.funcname or '-',
|
||||
varid.name,
|
||||
'variable',
|
||||
str(reason),
|
||||
)
|
||||
|
||||
|
||||
def _get_rows(variables, ignored=None, *,
|
||||
_as_row=_get_row,
|
||||
_is_ignored=_is_ignored,
|
||||
_vartype_okay=_is_vartype_okay,
|
||||
):
|
||||
count = 0
|
||||
for variable in variables:
|
||||
reason = _is_ignored(variable,
|
||||
ignored and ignored.get('variables'),
|
||||
)
|
||||
if not reason:
|
||||
reason = _vartype_okay(variable.vartype,
|
||||
ignored and ignored.get('types'))
|
||||
if not reason:
|
||||
continue
|
||||
|
||||
print(' ', variable, repr(reason))
|
||||
yield _as_row(variable.id, reason)
|
||||
count += 1
|
||||
print(f'total: {count}')
|
||||
|
||||
|
||||
def _generate_ignored_file(variables, filename=None, *,
|
||||
_generate_rows=_get_rows,
|
||||
_write_tsv=write_tsv,
|
||||
):
|
||||
if not filename:
|
||||
filename = IGNORED_FILE + '.new'
|
||||
rows = _generate_rows(variables)
|
||||
_write_tsv(filename, IGNORED_HEADER, rows)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from c_analyzer_common import SOURCE_DIRS
|
||||
from c_analyzer_common.known import (
|
||||
from_file as known_from_file,
|
||||
DATA_FILE as KNOWN_FILE,
|
||||
)
|
||||
from . import find
|
||||
known = known_from_file(KNOWN_FILE)
|
||||
knownvars = (known or {}).get('variables')
|
||||
variables = find.globals_from_binary(knownvars=knownvars,
|
||||
dirnames=SOURCE_DIRS)
|
||||
|
||||
_generate_ignored_file(variables)
|
0
Tools/c-analyzer/c_parser/__init__.py
Normal file
0
Tools/c-analyzer/c_parser/__init__.py
Normal file
295
Tools/c-analyzer/c_parser/declarations.py
Normal file
295
Tools/c-analyzer/c_parser/declarations.py
Normal file
|
@ -0,0 +1,295 @@
|
|||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from . import source
|
||||
|
||||
|
||||
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
|
||||
|
||||
TYPE_QUAL = r'(?:const|volatile)'
|
||||
|
||||
VAR_TYPE_SPEC = r'''(?:
|
||||
void |
|
||||
(?:
|
||||
(?:(?:un)?signed\s+)?
|
||||
(?:
|
||||
char |
|
||||
short |
|
||||
int |
|
||||
long |
|
||||
long\s+int |
|
||||
long\s+long
|
||||
) |
|
||||
) |
|
||||
float |
|
||||
double |
|
||||
{IDENTIFIER} |
|
||||
(?:struct|union)\s+{IDENTIFIER}
|
||||
)'''
|
||||
|
||||
POINTER = rf'''(?:
|
||||
(?:\s+const)?\s*[*]
|
||||
)'''
|
||||
|
||||
#STRUCT = r'''(?:
|
||||
# (?:struct|(struct\s+%s))\s*[{]
|
||||
# [^}]*
|
||||
# [}]
|
||||
# )''' % (IDENTIFIER)
|
||||
#UNION = r'''(?:
|
||||
# (?:union|(union\s+%s))\s*[{]
|
||||
# [^}]*
|
||||
# [}]
|
||||
# )''' % (IDENTIFIER)
|
||||
#DECL_SPEC = rf'''(?:
|
||||
# ({VAR_TYPE_SPEC}) |
|
||||
# ({STRUCT}) |
|
||||
# ({UNION})
|
||||
# )'''
|
||||
|
||||
FUNC_START = rf'''(?:
|
||||
(?:
|
||||
(?:
|
||||
extern |
|
||||
static |
|
||||
static\s+inline
|
||||
)\s+
|
||||
)?
|
||||
#(?:const\s+)?
|
||||
{VAR_TYPE_SPEC}
|
||||
)'''
|
||||
#GLOBAL_VAR_START = rf'''(?:
|
||||
# (?:
|
||||
# (?:
|
||||
# extern |
|
||||
# static
|
||||
# )\s+
|
||||
# )?
|
||||
# (?:
|
||||
# {TYPE_QUAL}
|
||||
# (?:\s+{TYPE_QUAL})?
|
||||
# )?\s+
|
||||
# {VAR_TYPE_SPEC}
|
||||
# )'''
|
||||
GLOBAL_DECL_START_RE = re.compile(rf'''
|
||||
^
|
||||
(?:
|
||||
({FUNC_START})
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
LOCAL_VAR_START = rf'''(?:
|
||||
(?:
|
||||
(?:
|
||||
register |
|
||||
static
|
||||
)\s+
|
||||
)?
|
||||
(?:
|
||||
(?:
|
||||
{TYPE_QUAL}
|
||||
(?:\s+{TYPE_QUAL})?
|
||||
)\s+
|
||||
)?
|
||||
{VAR_TYPE_SPEC}
|
||||
{POINTER}?
|
||||
)'''
|
||||
LOCAL_STMT_START_RE = re.compile(rf'''
|
||||
^
|
||||
(?:
|
||||
({LOCAL_VAR_START})
|
||||
)
|
||||
''', re.VERBOSE)
|
||||
|
||||
|
||||
def iter_global_declarations(lines):
|
||||
"""Yield (decl, body) for each global declaration in the given lines.
|
||||
|
||||
For function definitions the header is reduced to one line and
|
||||
the body is provided as-is. For other compound declarations (e.g.
|
||||
struct) the entire declaration is reduced to one line and "body"
|
||||
is None. Likewise for simple declarations (e.g. variables).
|
||||
|
||||
Declarations inside function bodies are ignored, though their text
|
||||
is provided in the function body.
|
||||
"""
|
||||
# XXX Bail out upon bogus syntax.
|
||||
lines = source.iter_clean_lines(lines)
|
||||
for line in lines:
|
||||
if not GLOBAL_DECL_START_RE.match(line):
|
||||
continue
|
||||
# We only need functions here, since we only need locals for now.
|
||||
if line.endswith(';'):
|
||||
continue
|
||||
if line.endswith('{') and '(' not in line:
|
||||
continue
|
||||
|
||||
# Capture the function.
|
||||
# (assume no func is a one-liner)
|
||||
decl = line
|
||||
while '{' not in line: # assume no inline structs, etc.
|
||||
try:
|
||||
line = next(lines)
|
||||
except StopIteration:
|
||||
return
|
||||
decl += ' ' + line
|
||||
|
||||
body, end = _extract_block(lines)
|
||||
if end is None:
|
||||
return
|
||||
assert end == '}'
|
||||
yield (f'{decl}\n{body}\n{end}', body)
|
||||
|
||||
|
||||
def iter_local_statements(lines):
|
||||
"""Yield (lines, blocks) for each statement in the given lines.
|
||||
|
||||
For simple statements, "blocks" is None and the statement is reduced
|
||||
to a single line. For compound statements, "blocks" is a pair of
|
||||
(header, body) for each block in the statement. The headers are
|
||||
reduced to a single line each, but the bpdies are provided as-is.
|
||||
"""
|
||||
# XXX Bail out upon bogus syntax.
|
||||
lines = source.iter_clean_lines(lines)
|
||||
for line in lines:
|
||||
if not LOCAL_STMT_START_RE.match(line):
|
||||
continue
|
||||
|
||||
stmt = line
|
||||
blocks = None
|
||||
if not line.endswith(';'):
|
||||
# XXX Support compound & multiline simple statements.
|
||||
#blocks = []
|
||||
continue
|
||||
|
||||
yield (stmt, blocks)
|
||||
|
||||
|
||||
def _extract_block(lines):
|
||||
end = None
|
||||
depth = 1
|
||||
body = []
|
||||
for line in lines:
|
||||
depth += line.count('{') - line.count('}')
|
||||
if depth == 0:
|
||||
end = line
|
||||
break
|
||||
body.append(line)
|
||||
return '\n'.join(body), end
|
||||
|
||||
|
||||
def parse_func(stmt, body):
|
||||
"""Return (name, signature) for the given function definition."""
|
||||
header, _, end = stmt.partition(body)
|
||||
assert end.strip() == '}'
|
||||
assert header.strip().endswith('{')
|
||||
header, _, _= header.rpartition('{')
|
||||
|
||||
signature = ' '.join(header.strip().splitlines())
|
||||
|
||||
_, _, name = signature.split('(')[0].strip().rpartition(' ')
|
||||
assert name
|
||||
|
||||
return name, signature
|
||||
|
||||
|
||||
def parse_var(stmt):
|
||||
"""Return (name, vartype) for the given variable declaration."""
|
||||
stmt = stmt.rstrip(';')
|
||||
m = LOCAL_STMT_START_RE.match(stmt)
|
||||
assert m
|
||||
vartype = m.group(0)
|
||||
name = stmt[len(vartype):].partition('=')[0].strip()
|
||||
|
||||
if name.startswith('('):
|
||||
name, _, after = name[1:].partition(')')
|
||||
assert after
|
||||
name = name.replace('*', '* ')
|
||||
inside, _, name = name.strip().rpartition(' ')
|
||||
vartype = f'{vartype} ({inside.strip()}){after}'
|
||||
else:
|
||||
name = name.replace('*', '* ')
|
||||
before, _, name = name.rpartition(' ')
|
||||
vartype = f'{vartype} {before}'
|
||||
|
||||
vartype = vartype.strip()
|
||||
while ' ' in vartype:
|
||||
vartype = vartype.replace(' ', ' ')
|
||||
|
||||
return name, vartype
|
||||
|
||||
|
||||
def parse_compound(stmt, blocks):
|
||||
"""Return (headers, bodies) for the given compound statement."""
|
||||
# XXX Identify declarations inside compound statements
|
||||
# (if/switch/for/while).
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def iter_variables(filename, *,
|
||||
_iter_source_lines=source.iter_lines,
|
||||
_iter_global=iter_global_declarations,
|
||||
_iter_local=iter_local_statements,
|
||||
_parse_func=parse_func,
|
||||
_parse_var=parse_var,
|
||||
_parse_compound=parse_compound,
|
||||
):
|
||||
"""Yield (funcname, name, vartype) for every variable in the given file."""
|
||||
lines = _iter_source_lines(filename)
|
||||
for stmt, body in _iter_global(lines):
|
||||
# At the file top-level we only have to worry about vars & funcs.
|
||||
if not body:
|
||||
name, vartype = _parse_var(stmt)
|
||||
if name:
|
||||
yield (None, name, vartype)
|
||||
else:
|
||||
funcname, _ = _parse_func(stmt, body)
|
||||
localvars = _iter_locals(body,
|
||||
_iter_statements=_iter_local,
|
||||
_parse_var=_parse_var,
|
||||
_parse_compound=_parse_compound,
|
||||
)
|
||||
for name, vartype in localvars:
|
||||
yield (funcname, name, vartype)
|
||||
|
||||
|
||||
def _iter_locals(lines, *,
|
||||
_iter_statements=iter_local_statements,
|
||||
_parse_var=parse_var,
|
||||
_parse_compound=parse_compound,
|
||||
):
|
||||
compound = [lines]
|
||||
while compound:
|
||||
body = compound.pop(0)
|
||||
bodylines = body.splitlines()
|
||||
for stmt, blocks in _iter_statements(bodylines):
|
||||
if not blocks:
|
||||
name, vartype = _parse_var(stmt)
|
||||
if name:
|
||||
yield (name, vartype)
|
||||
else:
|
||||
headers, bodies = _parse_compound(stmt, blocks)
|
||||
for header in headers:
|
||||
for line in header:
|
||||
name, vartype = _parse_var(line)
|
||||
if name:
|
||||
yield (name, vartype)
|
||||
compound.extend(bodies)
|
||||
|
||||
|
||||
def iter_all(dirnames):
|
||||
"""Yield a Declaration for each one found.
|
||||
|
||||
If there are duplicates, due to preprocessor conditionals, then
|
||||
they are checked to make sure they are the same.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def iter_preprocessed(dirnames):
|
||||
"""Yield a Declaration for each one found.
|
||||
|
||||
All source files are run through the preprocessor first.
|
||||
"""
|
||||
raise NotImplementedError
|
78
Tools/c-analyzer/c_parser/info.py
Normal file
78
Tools/c-analyzer/c_parser/info.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
from collections import namedtuple
|
||||
|
||||
from c_analyzer_common import info, util
|
||||
from c_analyzer_common.util import classonly, _NTBase
|
||||
|
||||
|
||||
def normalize_vartype(vartype):
|
||||
"""Return the canonical form for a variable type (or func signature)."""
|
||||
# We allow empty strring through for semantic reasons.
|
||||
if vartype is None:
|
||||
return None
|
||||
|
||||
# XXX finish!
|
||||
# XXX Return (modifiers, type, pointer)?
|
||||
return str(vartype)
|
||||
|
||||
|
||||
class Variable(_NTBase,
|
||||
namedtuple('Variable', 'id vartype')):
|
||||
"""Information about a single variable declaration."""
|
||||
|
||||
__slots__ = ()
|
||||
_isglobal = util.Slot()
|
||||
|
||||
@classonly
|
||||
def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
|
||||
id = info.ID(filename, funcname, name)
|
||||
self = cls(id, vartype)
|
||||
if isglobal:
|
||||
self._isglobal = True
|
||||
return self
|
||||
|
||||
def __new__(cls, id, vartype):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
id=info.ID.from_raw(id),
|
||||
vartype=normalize_vartype(vartype) if vartype else None,
|
||||
)
|
||||
return self
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.id)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.id, name)
|
||||
|
||||
def _validate_id(self):
|
||||
if not self.id:
|
||||
raise TypeError('missing id')
|
||||
|
||||
if not self.filename or self.filename == info.UNKNOWN:
|
||||
raise TypeError(f'id missing filename ({self.id})')
|
||||
|
||||
if self.funcname and self.funcname == info.UNKNOWN:
|
||||
raise TypeError(f'id missing funcname ({self.id})')
|
||||
|
||||
self.id.validate()
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
self._validate_id()
|
||||
|
||||
if self.vartype is None or self.vartype == info.UNKNOWN:
|
||||
raise TypeError('missing vartype')
|
||||
|
||||
@property
|
||||
def isglobal(self):
|
||||
try:
|
||||
return self._isglobal
|
||||
except AttributeError:
|
||||
# XXX Include extern variables.
|
||||
# XXX Ignore functions.
|
||||
self._isglobal = ('static' in self.vartype.split())
|
||||
return self._isglobal
|
||||
|
||||
@property
|
||||
def isconst(self):
|
||||
return 'const' in self.vartype.split()
|
180
Tools/c-analyzer/c_parser/naive.py
Normal file
180
Tools/c-analyzer/c_parser/naive.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
import re
|
||||
|
||||
from c_analyzer_common.info import UNKNOWN
|
||||
|
||||
from .info import Variable
|
||||
from .preprocessor import _iter_clean_lines
|
||||
|
||||
|
||||
_NOT_SET = object()
|
||||
|
||||
|
||||
def get_srclines(filename, *,
|
||||
cache=None,
|
||||
_open=open,
|
||||
_iter_lines=_iter_clean_lines,
|
||||
):
|
||||
"""Return the file's lines as a list.
|
||||
|
||||
Each line will have trailing whitespace removed (including newline).
|
||||
|
||||
If a cache is given the it is used.
|
||||
"""
|
||||
if cache is not None:
|
||||
try:
|
||||
return cache[filename]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
with _open(filename) as srcfile:
|
||||
srclines = [line
|
||||
for _, line in _iter_lines(srcfile)
|
||||
if not line.startswith('#')]
|
||||
for i, line in enumerate(srclines):
|
||||
srclines[i] = line.rstrip()
|
||||
|
||||
if cache is not None:
|
||||
cache[filename] = srclines
|
||||
return srclines
|
||||
|
||||
|
||||
def parse_variable_declaration(srcline):
|
||||
"""Return (name, decl) for the given declaration line."""
|
||||
# XXX possible false negatives...
|
||||
decl, sep, _ = srcline.partition('=')
|
||||
if not sep:
|
||||
if not srcline.endswith(';'):
|
||||
return None, None
|
||||
decl = decl.strip(';')
|
||||
decl = decl.strip()
|
||||
m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
|
||||
if not m:
|
||||
return None, None
|
||||
name = m.group(1)
|
||||
return name, decl
|
||||
|
||||
|
||||
def parse_variable(srcline, funcname=None):
|
||||
"""Return a Variable for the variable declared on the line (or None)."""
|
||||
line = srcline.strip()
|
||||
|
||||
# XXX Handle more than just static variables.
|
||||
if line.startswith('static '):
|
||||
if '(' in line and '[' not in line:
|
||||
# a function
|
||||
return None, None
|
||||
return parse_variable_declaration(line)
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def iter_variables(filename, *,
|
||||
srccache=None,
|
||||
parse_variable=None,
|
||||
_get_srclines=get_srclines,
|
||||
_default_parse_variable=parse_variable,
|
||||
):
|
||||
"""Yield a Variable for each in the given source file."""
|
||||
if parse_variable is None:
|
||||
parse_variable = _default_parse_variable
|
||||
|
||||
indent = ''
|
||||
prev = ''
|
||||
funcname = None
|
||||
for line in _get_srclines(filename, cache=srccache):
|
||||
# remember current funcname
|
||||
if funcname:
|
||||
if line == indent + '}':
|
||||
funcname = None
|
||||
continue
|
||||
else:
|
||||
if '(' in prev and line == indent + '{':
|
||||
if not prev.startswith('__attribute__'):
|
||||
funcname = prev.split('(')[0].split()[-1]
|
||||
prev = ''
|
||||
continue
|
||||
indent = line[:-len(line.lstrip())]
|
||||
prev = line
|
||||
|
||||
info = parse_variable(line, funcname)
|
||||
if isinstance(info, list):
|
||||
for name, _funcname, decl in info:
|
||||
yield Variable.from_parts(filename, _funcname, name, decl)
|
||||
continue
|
||||
name, decl = info
|
||||
|
||||
if name is None:
|
||||
continue
|
||||
yield Variable.from_parts(filename, funcname, name, decl)
|
||||
|
||||
|
||||
def _match_varid(variable, name, funcname, ignored=None):
|
||||
if ignored and variable in ignored:
|
||||
return False
|
||||
|
||||
if variable.name != name:
|
||||
return False
|
||||
|
||||
if funcname == UNKNOWN:
|
||||
if not variable.funcname:
|
||||
return False
|
||||
elif variable.funcname != funcname:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def find_variable(filename, funcname, name, *,
|
||||
ignored=None,
|
||||
srccache=None, # {filename: lines}
|
||||
parse_variable=None,
|
||||
_iter_variables=iter_variables,
|
||||
):
|
||||
"""Return the matching variable.
|
||||
|
||||
Return None if the variable is not found.
|
||||
"""
|
||||
for variable in _iter_variables(filename,
|
||||
srccache=srccache,
|
||||
parse_variable=parse_variable,
|
||||
):
|
||||
if _match_varid(variable, name, funcname, ignored):
|
||||
return variable
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def find_variables(varids, filenames=None, *,
|
||||
srccache=_NOT_SET,
|
||||
parse_variable=None,
|
||||
_find_symbol=find_variable,
|
||||
):
|
||||
"""Yield a Variable for each ID.
|
||||
|
||||
If the variable is not found then its decl will be UNKNOWN. That
|
||||
way there will be one resulting Variable per given ID.
|
||||
"""
|
||||
if srccache is _NOT_SET:
|
||||
srccache = {}
|
||||
|
||||
used = set()
|
||||
for varid in varids:
|
||||
if varid.filename and varid.filename != UNKNOWN:
|
||||
srcfiles = [varid.filename]
|
||||
else:
|
||||
if not filenames:
|
||||
yield Variable(varid, UNKNOWN)
|
||||
continue
|
||||
srcfiles = filenames
|
||||
for filename in srcfiles:
|
||||
found = _find_varid(filename, varid.funcname, varid.name,
|
||||
ignored=used,
|
||||
srccache=srccache,
|
||||
parse_variable=parse_variable,
|
||||
)
|
||||
if found:
|
||||
yield found
|
||||
used.add(found)
|
||||
break
|
||||
else:
|
||||
yield Variable(varid, UNKNOWN)
|
512
Tools/c-analyzer/c_parser/preprocessor.py
Normal file
512
Tools/c-analyzer/c_parser/preprocessor.py
Normal file
|
@ -0,0 +1,512 @@
|
|||
from collections import namedtuple
|
||||
import shlex
|
||||
import os
|
||||
import re
|
||||
|
||||
from c_analyzer_common import util
|
||||
from . import info
|
||||
|
||||
|
||||
CONTINUATION = '\\' + os.linesep
|
||||
|
||||
IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
|
||||
IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
|
||||
|
||||
|
||||
def _coerce_str(value):
|
||||
if not value:
|
||||
return ''
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
#############################
|
||||
# directives
|
||||
|
||||
DIRECTIVE_START = r'''
|
||||
(?:
|
||||
^ \s*
|
||||
[#] \s*
|
||||
)'''
|
||||
DIRECTIVE_TEXT = r'''
|
||||
(?:
|
||||
(?: \s+ ( .*\S ) )?
|
||||
\s* $
|
||||
)'''
|
||||
DIRECTIVE = rf'''
|
||||
(?:
|
||||
{DIRECTIVE_START}
|
||||
(
|
||||
include |
|
||||
error | warning |
|
||||
pragma |
|
||||
define | undef |
|
||||
if | ifdef | ifndef | elseif | else | endif |
|
||||
__FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
|
||||
)
|
||||
{DIRECTIVE_TEXT}
|
||||
)'''
|
||||
# (?:
|
||||
# [^\\\n] |
|
||||
# \\ [^\n] |
|
||||
# \\ \n
|
||||
# )+
|
||||
# ) \n
|
||||
# )'''
|
||||
DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
|
||||
|
||||
DEFINE = rf'''
|
||||
(?:
|
||||
{DIRECTIVE_START} define \s+
|
||||
(?:
|
||||
( \w*[a-zA-Z]\w* )
|
||||
(?: \s* [(] ([^)]*) [)] )?
|
||||
)
|
||||
{DIRECTIVE_TEXT}
|
||||
)'''
|
||||
DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
|
||||
|
||||
|
||||
def parse_directive(line):
|
||||
"""Return the appropriate directive for the given line."""
|
||||
line = line.strip()
|
||||
if line.startswith('#'):
|
||||
line = line[1:].lstrip()
|
||||
line = '#' + line
|
||||
directive = line
|
||||
#directive = '#' + line
|
||||
while ' ' in directive:
|
||||
directive = directive.replace(' ', ' ')
|
||||
return _parse_directive(directive)
|
||||
|
||||
|
||||
def _parse_directive(line):
|
||||
m = DEFINE_RE.match(line)
|
||||
if m:
|
||||
name, args, text = m.groups()
|
||||
if args:
|
||||
args = [a.strip() for a in args.split(',')]
|
||||
return Macro(name, args, text)
|
||||
else:
|
||||
return Constant(name, text)
|
||||
|
||||
m = DIRECTIVE_RE.match(line)
|
||||
if not m:
|
||||
raise ValueError(f'unsupported directive {line!r}')
|
||||
kind, text = m.groups()
|
||||
if not text:
|
||||
if kind not in ('else', 'endif'):
|
||||
raise ValueError(f'missing text in directive {line!r}')
|
||||
elif kind in ('else', 'endif', 'define'):
|
||||
raise ValueError(f'unexpected text in directive {line!r}')
|
||||
if kind == 'include':
|
||||
directive = Include(text)
|
||||
elif kind in IfDirective.KINDS:
|
||||
directive = IfDirective(kind, text)
|
||||
else:
|
||||
directive = OtherDirective(kind, text)
|
||||
directive.validate()
|
||||
return directive
|
||||
|
||||
|
||||
class PreprocessorDirective(util._NTBase):
|
||||
"""The base class for directives."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = frozenset([
|
||||
'include',
|
||||
'pragma',
|
||||
'error', 'warning',
|
||||
'define', 'undef',
|
||||
'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
|
||||
'__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
|
||||
])
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return ' '.join(v for v in self[1:] if v and v.strip()) or None
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.kind:
|
||||
raise TypeError('missing kind')
|
||||
elif self.kind not in self.KINDS:
|
||||
raise ValueError
|
||||
|
||||
# text can be anything, including None.
|
||||
|
||||
|
||||
class Constant(PreprocessorDirective,
|
||||
namedtuple('Constant', 'kind name value')):
|
||||
"""A single "constant" directive ("define")."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, name, value=None):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
'define',
|
||||
name=_coerce_str(name) or None,
|
||||
value=_coerce_str(value) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
elif not IDENTIFIER_RE.match(self.name):
|
||||
raise ValueError(f'name must be identifier, got {self.name!r}')
|
||||
|
||||
# value can be anything, including None
|
||||
|
||||
|
||||
class Macro(PreprocessorDirective,
|
||||
namedtuple('Macro', 'kind name args body')):
|
||||
"""A single "macro" directive ("define")."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, name, args, body=None):
|
||||
# "args" must be a string or an iterable of strings (or "empty").
|
||||
if isinstance(args, str):
|
||||
args = [v.strip() for v in args.split(',')]
|
||||
if args:
|
||||
args = tuple(_coerce_str(a) or None for a in args)
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind='define',
|
||||
name=_coerce_str(name) or None,
|
||||
args=args if args else (),
|
||||
body=_coerce_str(body) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
if self.body:
|
||||
return f'{self.name}({", ".join(self.args)}) {self.body}'
|
||||
else:
|
||||
return f'{self.name}({", ".join(self.args)})'
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.name:
|
||||
raise TypeError('missing name')
|
||||
elif not IDENTIFIER_RE.match(self.name):
|
||||
raise ValueError(f'name must be identifier, got {self.name!r}')
|
||||
|
||||
for arg in self.args:
|
||||
if not arg:
|
||||
raise ValueError(f'missing arg in {self.args}')
|
||||
elif not IDENTIFIER_RE.match(arg):
|
||||
raise ValueError(f'arg must be identifier, got {arg!r}')
|
||||
|
||||
# body can be anything, including None
|
||||
|
||||
|
||||
class IfDirective(PreprocessorDirective,
|
||||
namedtuple('IfDirective', 'kind condition')):
|
||||
"""A single conditional directive (e.g. "if", "ifdef").
|
||||
|
||||
This only includes directives that actually provide conditions. The
|
||||
related directives "else" and "endif" are covered by OtherDirective
|
||||
instead.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = frozenset([
|
||||
'if',
|
||||
'ifdef',
|
||||
'ifndef',
|
||||
'elseif',
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def _condition_from_raw(cls, raw, kind):
|
||||
#return Condition.from_raw(raw, _kind=kind)
|
||||
condition = _coerce_str(raw)
|
||||
if not condition:
|
||||
return None
|
||||
|
||||
if kind == 'ifdef':
|
||||
condition = f'defined({condition})'
|
||||
elif kind == 'ifndef':
|
||||
condition = f'! defined({condition})'
|
||||
|
||||
return condition
|
||||
|
||||
def __new__(cls, kind, condition):
|
||||
kind = _coerce_str(kind)
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind=kind or None,
|
||||
condition=cls._condition_from_raw(condition, kind),
|
||||
)
|
||||
return self
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
if self.kind == 'ifdef':
|
||||
return self.condition[8:-1] # strip "defined("
|
||||
elif self.kind == 'ifndef':
|
||||
return self.condition[10:-1] # strip "! defined("
|
||||
else:
|
||||
return self.condition
|
||||
#return str(self.condition)
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.condition:
|
||||
raise TypeError('missing condition')
|
||||
#else:
|
||||
# for cond in self.condition:
|
||||
# if not cond:
|
||||
# raise ValueError(f'missing condition in {self.condition}')
|
||||
# cond.validate()
|
||||
# if self.kind in ('ifdef', 'ifndef'):
|
||||
# if len(self.condition) != 1:
|
||||
# raise ValueError('too many condition')
|
||||
# if self.kind == 'ifdef':
|
||||
# if not self.condition[0].startswith('defined '):
|
||||
# raise ValueError('bad condition')
|
||||
# else:
|
||||
# if not self.condition[0].startswith('! defined '):
|
||||
# raise ValueError('bad condition')
|
||||
|
||||
|
||||
class Include(PreprocessorDirective,
|
||||
namedtuple('Include', 'kind file')):
|
||||
"""A single "include" directive.
|
||||
|
||||
Supported "file" values are either follow the bracket style
|
||||
(<stdio>) or double quotes ("spam.h").
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, file):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind='include',
|
||||
file=_coerce_str(file) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if not self.file:
|
||||
raise TypeError('missing file')
|
||||
|
||||
|
||||
class OtherDirective(PreprocessorDirective,
|
||||
namedtuple('OtherDirective', 'kind text')):
|
||||
"""A single directive not covered by another class.
|
||||
|
||||
This includes the "else", "endif", and "undef" directives, which are
|
||||
otherwise inherently related to the directives covered by the
|
||||
Constant, Macro, and IfCondition classes.
|
||||
|
||||
Note that all directives must have a text value, except for "else"
|
||||
and "endif" (which must have no text).
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
|
||||
|
||||
def __new__(cls, kind, text):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
kind=_coerce_str(kind) or None,
|
||||
text=_coerce_str(text) or None,
|
||||
)
|
||||
return self
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
super().validate()
|
||||
|
||||
if self.text:
|
||||
if self.kind in ('else', 'endif'):
|
||||
raise ValueError('unexpected text in directive')
|
||||
elif self.kind not in ('else', 'endif'):
|
||||
raise TypeError('missing text')
|
||||
|
||||
|
||||
#############################
|
||||
# iterating lines
|
||||
|
||||
def _recompute_conditions(directive, ifstack):
|
||||
if directive.kind in ('if', 'ifdef', 'ifndef'):
|
||||
ifstack.append(
|
||||
([], directive.condition))
|
||||
elif directive.kind == 'elseif':
|
||||
if ifstack:
|
||||
negated, active = ifstack.pop()
|
||||
if active:
|
||||
negated.append(active)
|
||||
else:
|
||||
negated = []
|
||||
ifstack.append(
|
||||
(negated, directive.condition))
|
||||
elif directive.kind == 'else':
|
||||
if ifstack:
|
||||
negated, active = ifstack.pop()
|
||||
if active:
|
||||
negated.append(active)
|
||||
ifstack.append(
|
||||
(negated, None))
|
||||
elif directive.kind == 'endif':
|
||||
if ifstack:
|
||||
ifstack.pop()
|
||||
|
||||
conditions = []
|
||||
for negated, active in ifstack:
|
||||
for condition in negated:
|
||||
conditions.append(f'! ({condition})')
|
||||
if active:
|
||||
conditions.append(active)
|
||||
return tuple(conditions)
|
||||
|
||||
|
||||
def _iter_clean_lines(lines):
|
||||
lines = iter(enumerate(lines, 1))
|
||||
for lno, line in lines:
|
||||
# Handle line continuations.
|
||||
while line.endswith(CONTINUATION):
|
||||
try:
|
||||
lno, _line = next(lines)
|
||||
except StopIteration:
|
||||
break
|
||||
line = line[:-len(CONTINUATION)] + ' ' + _line
|
||||
|
||||
# Deal with comments.
|
||||
after = line
|
||||
line = ''
|
||||
while True:
|
||||
# Look for a comment.
|
||||
before, begin, remainder = after.partition('/*')
|
||||
if '//' in before:
|
||||
before, _, _ = before.partition('//')
|
||||
line += before + ' ' # per the C99 spec
|
||||
break
|
||||
line += before
|
||||
if not begin:
|
||||
break
|
||||
line += ' ' # per the C99 spec
|
||||
|
||||
# Go until we find the end of the comment.
|
||||
_, end, after = remainder.partition('*/')
|
||||
while not end:
|
||||
try:
|
||||
lno, remainder = next(lines)
|
||||
except StopIteration:
|
||||
raise Exception('unterminated comment')
|
||||
_, end, after = remainder.partition('*/')
|
||||
|
||||
yield lno, line
|
||||
|
||||
|
||||
def iter_lines(lines, *,
|
||||
_iter_clean_lines=_iter_clean_lines,
|
||||
_parse_directive=_parse_directive,
|
||||
_recompute_conditions=_recompute_conditions,
|
||||
):
|
||||
"""Yield (lno, line, directive, active conditions) for each given line.
|
||||
|
||||
This is effectively a subset of the operations taking place in
|
||||
translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
|
||||
section 5.1.1.2. Line continuations are removed and comments
|
||||
replaced with a single space. (In both cases "lno" will be the last
|
||||
line involved.) Otherwise each line is returned as-is.
|
||||
|
||||
"lno" is the (1-indexed) line number for the line.
|
||||
|
||||
"directive" will be a PreprocessorDirective or None, depending on
|
||||
whether or not there is a directive on the line.
|
||||
|
||||
"active conditions" is the set of preprocessor conditions (e.g.
|
||||
"defined()") under which the current line of code will be included
|
||||
in compilation. That set is derived from every conditional
|
||||
directive block (e.g. "if defined()", "ifdef", "else") containing
|
||||
that line. That includes nested directives. Note that the
|
||||
current line does not affect the active conditions for iteself.
|
||||
It only impacts subsequent lines. That applies to directives
|
||||
that close blocks (e.g. "endif") just as much as conditional
|
||||
directvies. Also note that "else" and "elseif" directives
|
||||
update the active conditions (for later lines), rather than
|
||||
adding to them.
|
||||
"""
|
||||
ifstack = []
|
||||
conditions = ()
|
||||
for lno, line in _iter_clean_lines(lines):
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith('#'):
|
||||
yield lno, line, None, conditions
|
||||
continue
|
||||
|
||||
directive = '#' + stripped[1:].lstrip()
|
||||
while ' ' in directive:
|
||||
directive = directive.replace(' ', ' ')
|
||||
directive = _parse_directive(directive)
|
||||
yield lno, line, directive, conditions
|
||||
|
||||
if directive.kind in ('else', 'endif'):
|
||||
conditions = _recompute_conditions(directive, ifstack)
|
||||
elif isinstance(directive, IfDirective):
|
||||
conditions = _recompute_conditions(directive, ifstack)
|
||||
|
||||
|
||||
#############################
|
||||
# running (platform-specific?)
|
||||
|
||||
def _gcc(filename, *,
|
||||
_get_argv=(lambda: _get_gcc_argv()),
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
argv = _get_argv()
|
||||
argv.extend([
|
||||
'-E', filename,
|
||||
])
|
||||
output = _run(argv)
|
||||
return output
|
||||
|
||||
|
||||
def _get_gcc_argv(*,
|
||||
_open=open,
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
with _open('/tmp/print.mk', 'w') as tmpfile:
|
||||
tmpfile.write('print-%:\n')
|
||||
#tmpfile.write('\t@echo $* = $($*)\n')
|
||||
tmpfile.write('\t@echo $($*)\n')
|
||||
argv = ['/usr/bin/make',
|
||||
'-f', 'Makefile',
|
||||
'-f', '/tmp/print.mk',
|
||||
'print-CC',
|
||||
'print-PY_CORE_CFLAGS',
|
||||
]
|
||||
output = _run(argv)
|
||||
gcc, cflags = output.strip().splitlines()
|
||||
argv = shlex.split(gcc.strip())
|
||||
cflags = shlex.split(cflags.strip())
|
||||
return argv + cflags
|
||||
|
||||
|
||||
def run(filename, *,
|
||||
_gcc=_gcc,
|
||||
):
|
||||
"""Return the text of the given file after running the preprocessor."""
|
||||
return _gcc(filename)
|
34
Tools/c-analyzer/c_parser/source.py
Normal file
34
Tools/c-analyzer/c_parser/source.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
from . import preprocessor
|
||||
|
||||
|
||||
def iter_clean_lines(lines):
|
||||
incomment = False
|
||||
for line in lines:
|
||||
# Deal with comments.
|
||||
if incomment:
|
||||
_, sep, line = line.partition('*/')
|
||||
if sep:
|
||||
incomment = False
|
||||
continue
|
||||
line, _, _ = line.partition('//')
|
||||
line, sep, remainder = line.partition('/*')
|
||||
if sep:
|
||||
_, sep, after = remainder.partition('*/')
|
||||
if not sep:
|
||||
incomment = True
|
||||
continue
|
||||
line += ' ' + after
|
||||
|
||||
# Ignore blank lines and leading/trailing whitespace.
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
yield line
|
||||
|
||||
|
||||
def iter_lines(filename, *,
|
||||
preprocess=preprocessor.run,
|
||||
):
|
||||
content = preprocess(filename)
|
||||
return iter(content.splitlines())
|
0
Tools/c-analyzer/c_symbols/__init__.py
Normal file
0
Tools/c-analyzer/c_symbols/__init__.py
Normal file
157
Tools/c-analyzer/c_symbols/binary.py
Normal file
157
Tools/c-analyzer/c_symbols/binary.py
Normal file
|
@ -0,0 +1,157 @@
|
|||
import os
|
||||
import os.path
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from c_analyzer_common import util, info
|
||||
from . import source
|
||||
from .info import Symbol
|
||||
|
||||
|
||||
#PYTHON = os.path.join(REPO_ROOT, 'python')
|
||||
PYTHON = sys.executable
|
||||
|
||||
|
||||
def iter_symbols(binary=PYTHON, dirnames=None, *,
|
||||
# Alternately, use look_up_known_symbol()
|
||||
# from c_globals.supported.
|
||||
find_local_symbol=source.find_symbol,
|
||||
_file_exists=os.path.exists,
|
||||
_iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
|
||||
):
|
||||
"""Yield a Symbol for each symbol found in the binary."""
|
||||
if not _file_exists(binary):
|
||||
raise Exception('executable missing (need to build it first?)')
|
||||
|
||||
if find_local_symbol:
|
||||
cache = {}
|
||||
def find_local_symbol(name, *, _find=find_local_symbol):
|
||||
return _find(name, dirnames, _perfilecache=cache)
|
||||
else:
|
||||
find_local_symbol = None
|
||||
|
||||
if os.name == 'nt':
|
||||
# XXX Support this.
|
||||
raise NotImplementedError
|
||||
else:
|
||||
yield from _iter_symbols_nm(binary, find_local_symbol)
|
||||
|
||||
|
||||
#############################
|
||||
# binary format (e.g. ELF)
|
||||
|
||||
SPECIAL_SYMBOLS = {
|
||||
'__bss_start',
|
||||
'__data_start',
|
||||
'__dso_handle',
|
||||
'_DYNAMIC',
|
||||
'_edata',
|
||||
'_end',
|
||||
'__environ@@GLIBC_2.2.5',
|
||||
'_GLOBAL_OFFSET_TABLE_',
|
||||
'__JCR_END__',
|
||||
'__JCR_LIST__',
|
||||
'__TMC_END__',
|
||||
}
|
||||
|
||||
|
||||
def _is_special_symbol(name):
|
||||
if name in SPECIAL_SYMBOLS:
|
||||
return True
|
||||
if '@@GLIBC' in name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
#############################
|
||||
# "nm"
|
||||
|
||||
NM_KINDS = {
|
||||
'b': Symbol.KIND.VARIABLE, # uninitialized
|
||||
'd': Symbol.KIND.VARIABLE, # initialized
|
||||
#'g': Symbol.KIND.VARIABLE, # uninitialized
|
||||
#'s': Symbol.KIND.VARIABLE, # initialized
|
||||
't': Symbol.KIND.FUNCTION,
|
||||
}
|
||||
|
||||
|
||||
def _iter_symbols_nm(binary, find_local_symbol=None,
|
||||
*,
|
||||
_which=shutil.which,
|
||||
_run=util.run_cmd,
|
||||
):
|
||||
nm = _which('nm')
|
||||
if not nm:
|
||||
raise NotImplementedError
|
||||
argv = [nm,
|
||||
'--line-numbers',
|
||||
binary,
|
||||
]
|
||||
try:
|
||||
output = _run(argv)
|
||||
except Exception:
|
||||
if nm is None:
|
||||
# XXX Use dumpbin.exe /SYMBOLS on Windows.
|
||||
raise NotImplementedError
|
||||
raise
|
||||
for line in output.splitlines():
|
||||
(name, kind, external, filename, funcname, vartype,
|
||||
) = _parse_nm_line(line,
|
||||
_find_local_symbol=find_local_symbol,
|
||||
)
|
||||
if kind != Symbol.KIND.VARIABLE:
|
||||
continue
|
||||
elif _is_special_symbol(name):
|
||||
continue
|
||||
assert vartype is None
|
||||
yield Symbol(
|
||||
id=(filename, funcname, name),
|
||||
kind=kind,
|
||||
external=external,
|
||||
)
|
||||
|
||||
|
||||
def _parse_nm_line(line, *, _find_local_symbol=None):
|
||||
_origline = line
|
||||
_, _, line = line.partition(' ') # strip off the address
|
||||
line = line.strip()
|
||||
|
||||
kind, _, line = line.partition(' ')
|
||||
line = line.strip()
|
||||
external = kind.isupper()
|
||||
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
|
||||
|
||||
name, _, filename = line.partition('\t')
|
||||
name = name.strip()
|
||||
if filename:
|
||||
filename = os.path.relpath(filename.partition(':')[0])
|
||||
else:
|
||||
filename = info.UNKNOWN
|
||||
|
||||
vartype = None
|
||||
name, islocal = _parse_nm_name(name, kind)
|
||||
if islocal:
|
||||
funcname = info.UNKNOWN
|
||||
if _find_local_symbol is not None:
|
||||
filename, funcname, vartype = _find_local_symbol(name)
|
||||
filename = filename or info.UNKNOWN
|
||||
funcname = funcname or info.UNKNOWN
|
||||
else:
|
||||
funcname = None
|
||||
# XXX fine filename and vartype?
|
||||
return name, kind, external, filename, funcname, vartype
|
||||
|
||||
|
||||
def _parse_nm_name(name, kind):
|
||||
if kind != Symbol.KIND.VARIABLE:
|
||||
return name, None
|
||||
if _is_special_symbol(name):
|
||||
return name, None
|
||||
|
||||
actual, sep, digits = name.partition('.')
|
||||
if not sep:
|
||||
return name, False
|
||||
|
||||
if not digits.isdigit():
|
||||
raise Exception(f'got bogus name {name}')
|
||||
return actual, True
|
51
Tools/c-analyzer/c_symbols/info.py
Normal file
51
Tools/c-analyzer/c_symbols/info.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
from collections import namedtuple
|
||||
|
||||
from c_analyzer_common.info import ID
|
||||
from c_analyzer_common.util import classonly, _NTBase
|
||||
|
||||
|
||||
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
|
||||
"""Info for a single compilation symbol."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
class KIND:
|
||||
VARIABLE = 'variable'
|
||||
FUNCTION = 'function'
|
||||
OTHER = 'other'
|
||||
|
||||
@classonly
|
||||
def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
|
||||
"""Return a new symbol based on the given name."""
|
||||
id = ID(filename, None, name)
|
||||
return cls(id, kind, external)
|
||||
|
||||
def __new__(cls, id, kind=KIND.VARIABLE, external=None):
|
||||
self = super().__new__(
|
||||
cls,
|
||||
id=ID.from_raw(id),
|
||||
kind=str(kind) if kind else None,
|
||||
external=bool(external) if external is not None else None,
|
||||
)
|
||||
return self
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.id)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.id, name)
|
||||
|
||||
def validate(self):
|
||||
"""Fail if the object is invalid (i.e. init with bad data)."""
|
||||
if not self.id:
|
||||
raise TypeError('missing id')
|
||||
else:
|
||||
self.id.validate()
|
||||
|
||||
if not self.kind:
|
||||
raise TypeError('missing kind')
|
||||
elif self.kind not in vars(self.KIND).values():
|
||||
raise ValueError(f'unsupported kind {self.kind}')
|
||||
|
||||
if self.external is None:
|
||||
raise TypeError('missing external')
|
149
Tools/c-analyzer/c_symbols/resolve.py
Normal file
149
Tools/c-analyzer/c_symbols/resolve.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
import os.path
|
||||
|
||||
from c_analyzer_common import files
|
||||
from c_analyzer_common.info import UNKNOWN
|
||||
from c_parser import declarations, info
|
||||
from .info import Symbol
|
||||
from .source import _find_symbol
|
||||
|
||||
|
||||
# XXX need tests:
|
||||
# * look_up_known_symbol()
|
||||
# * symbol_from_source()
|
||||
# * get_resolver()
|
||||
# * symbols_to_variables()
|
||||
|
||||
def look_up_known_symbol(symbol, knownvars, *,
|
||||
match_files=(lambda f1, f2: f1 == f2),
|
||||
):
|
||||
"""Return the known variable matching the given symbol.
|
||||
|
||||
"knownvars" is a mapping of common.ID to parser.Variable.
|
||||
|
||||
"match_files" is used to verify if two filenames point to
|
||||
the same file.
|
||||
"""
|
||||
if not knownvars:
|
||||
return None
|
||||
|
||||
if symbol.funcname == UNKNOWN:
|
||||
if not symbol.filename or symbol.filename == UNKNOWN:
|
||||
for varid in knownvars:
|
||||
if not varid.funcname:
|
||||
continue
|
||||
if varid.name == symbol.name:
|
||||
return knownvars[varid]
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
for varid in knownvars:
|
||||
if not varid.funcname:
|
||||
continue
|
||||
if not match_files(varid.filename, symbol.filename):
|
||||
continue
|
||||
if varid.name == symbol.name:
|
||||
return knownvars[varid]
|
||||
else:
|
||||
return None
|
||||
elif not symbol.filename or symbol.filename == UNKNOWN:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return knownvars.get(symbol.id)
|
||||
|
||||
|
||||
def find_in_source(symbol, dirnames, *,
|
||||
_perfilecache={},
|
||||
_find_symbol=_find_symbol,
|
||||
_iter_files=files.iter_files_by_suffix,
|
||||
):
|
||||
"""Return the Variable matching the given Symbol.
|
||||
|
||||
If there is no match then return None.
|
||||
"""
|
||||
if symbol.filename and symbol.filename != UNKNOWN:
|
||||
filenames = [symbol.filename]
|
||||
else:
|
||||
filenames = _iter_files(dirnames, ('.c', '.h'))
|
||||
|
||||
if symbol.funcname and symbol.funcname != UNKNOWN:
|
||||
raise NotImplementedError
|
||||
|
||||
(filename, funcname, vartype
|
||||
) = _find_symbol(symbol.name, filenames, _perfilecache)
|
||||
if filename == UNKNOWN:
|
||||
return None
|
||||
return info.Variable(
|
||||
id=(filename, funcname, symbol.name),
|
||||
vartype=vartype,
|
||||
)
|
||||
|
||||
|
||||
def get_resolver(knownvars=None, dirnames=None, *,
|
||||
_look_up_known=look_up_known_symbol,
|
||||
_from_source=find_in_source,
|
||||
):
|
||||
"""Return a "resolver" func for the given known vars and dirnames.
|
||||
|
||||
The func takes a single Symbol and returns a corresponding Variable.
|
||||
If the symbol was located then the variable will be valid, populated
|
||||
with the corresponding information. Otherwise None is returned.
|
||||
"""
|
||||
if knownvars:
|
||||
knownvars = dict(knownvars) # a copy
|
||||
def resolve_known(symbol):
|
||||
found = _look_up_known(symbol, knownvars)
|
||||
if found is None:
|
||||
return None
|
||||
elif symbol.funcname == UNKNOWN:
|
||||
knownvars.pop(found.id)
|
||||
elif not symbol.filename or symbol.filename == UNKNOWN:
|
||||
knownvars.pop(found.id)
|
||||
return found
|
||||
if dirnames:
|
||||
def resolve(symbol):
|
||||
found = resolve_known(symbol)
|
||||
if found is None:
|
||||
return None
|
||||
#return _from_source(symbol, dirnames)
|
||||
else:
|
||||
for dirname in dirnames:
|
||||
if not dirname.endswith(os.path.sep):
|
||||
dirname += os.path.sep
|
||||
if found.filename.startswith(dirname):
|
||||
break
|
||||
else:
|
||||
return None
|
||||
return found
|
||||
else:
|
||||
resolve = resolve_known
|
||||
elif dirnames:
|
||||
def resolve(symbol):
|
||||
return _from_source(symbol, dirnames)
|
||||
else:
|
||||
def resolve(symbol):
|
||||
return None
|
||||
return resolve
|
||||
|
||||
|
||||
def symbols_to_variables(symbols, *,
|
||||
resolve=(lambda s: look_up_known_symbol(s, None)),
|
||||
):
|
||||
"""Yield the variable the matches each given symbol.
|
||||
|
||||
Use get_resolver() for a "resolve" func to use.
|
||||
"""
|
||||
for symbol in symbols:
|
||||
if isinstance(symbol, info.Variable):
|
||||
# XXX validate?
|
||||
yield symbol
|
||||
continue
|
||||
if symbol.kind != Symbol.KIND.VARIABLE:
|
||||
continue
|
||||
resolved = resolve(symbol)
|
||||
if resolved is None:
|
||||
#raise NotImplementedError(symbol)
|
||||
resolved = info.Variable(
|
||||
id=symbol.id,
|
||||
vartype=UNKNOWN,
|
||||
)
|
||||
yield resolved
|
58
Tools/c-analyzer/c_symbols/source.py
Normal file
58
Tools/c-analyzer/c_symbols/source.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
from c_analyzer_common import files
|
||||
from c_analyzer_common.info import UNKNOWN
|
||||
from c_parser import declarations
|
||||
|
||||
|
||||
# XXX need tests:
|
||||
# * find_symbol()
|
||||
|
||||
def find_symbol(name, dirnames, *,
|
||||
_perfilecache,
|
||||
_iter_files=files.iter_files_by_suffix,
|
||||
**kwargs
|
||||
):
|
||||
"""Return (filename, funcname, vartype) for the matching Symbol."""
|
||||
filenames = _iter_files(dirnames, ('.c', '.h'))
|
||||
return _find_symbol(name, filenames, _perfilecache, **kwargs)
|
||||
|
||||
|
||||
def _get_symbols(filename, *,
|
||||
_iter_variables=declarations.iter_variables,
|
||||
):
|
||||
"""Return the list of Symbols found in the given file."""
|
||||
symbols = {}
|
||||
for funcname, name, vartype in _iter_variables(filename):
|
||||
if not funcname:
|
||||
continue
|
||||
try:
|
||||
instances = symbols[name]
|
||||
except KeyError:
|
||||
instances = symbols[name] = []
|
||||
instances.append((funcname, vartype))
|
||||
return symbols
|
||||
|
||||
|
||||
def _find_symbol(name, filenames, _perfilecache, *,
|
||||
_get_local_symbols=_get_symbols,
|
||||
):
|
||||
for filename in filenames:
|
||||
try:
|
||||
symbols = _perfilecache[filename]
|
||||
except KeyError:
|
||||
symbols = _perfilecache[filename] = _get_local_symbols(filename)
|
||||
|
||||
try:
|
||||
instances = symbols[name]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
funcname, vartype = instances.pop(0)
|
||||
if not instances:
|
||||
symbols.pop(name)
|
||||
return filename, funcname, vartype
|
||||
else:
|
||||
return UNKNOWN, UNKNOWN, UNKNOWN
|
||||
|
||||
|
||||
def iter_symbols():
|
||||
raise NotImplementedError
|
1
Tools/c-analyzer/ignored.tsv
Normal file
1
Tools/c-analyzer/ignored.tsv
Normal file
|
@ -0,0 +1 @@
|
|||
filename funcname name kind reason
|
|
1922
Tools/c-analyzer/known.tsv
Normal file
1922
Tools/c-analyzer/known.tsv
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue