bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045)

This is a little bit of clean-up, small fixes, and additional helpers prior to building an updated & accurate list of globals to eliminate.
This commit is contained in:
Eric Snow 2020-10-30 15:46:52 -06:00 committed by GitHub
parent b9ee4af4c6
commit 4fe72090de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 632 additions and 217 deletions

View file

@ -92,7 +92,7 @@ def write_decls_tsv(decls, outfile, extracolumns=None, *,
**kwargs
):
# XXX Move the row rendering here.
_write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs)
_write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs)
def _iter_decls_tsv(infile, extracolumns=None, relroot=None):

View file

@ -7,85 +7,12 @@ from c_common.clsutil import classonly
import c_common.misc as _misc
import c_common.strutil as _strutil
import c_common.tables as _tables
from .parser._regexes import SIMPLE_TYPE
from .parser._regexes import SIMPLE_TYPE, _STORAGE
FIXED_TYPE = _misc.Labeled('FIXED_TYPE')
POTS_REGEX = re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE)
def is_pots(typespec):
if not typespec:
return None
if type(typespec) is not str:
_, _, _, typespec, _ = get_parsed_vartype(typespec)
return POTS_REGEX.match(typespec) is not None
def is_funcptr(vartype):
if not vartype:
return None
_, _, _, _, abstract = get_parsed_vartype(vartype)
return _is_funcptr(abstract)
def _is_funcptr(declstr):
if not declstr:
return None
# XXX Support "(<name>*)(".
return '(*)(' in declstr.replace(' ', '')
def is_exported_symbol(decl):
_, storage, _, _, _ = get_parsed_vartype(decl)
raise NotImplementedError
def is_process_global(vardecl):
kind, storage, _, _, _ = get_parsed_vartype(vardecl)
if kind is not KIND.VARIABLE:
raise NotImplementedError(vardecl)
if 'static' in (storage or ''):
return True
if hasattr(vardecl, 'parent'):
parent = vardecl.parent
else:
parent = vardecl.get('parent')
return not parent
def is_fixed_type(vardecl):
if not vardecl:
return None
_, _, _, typespec, abstract = get_parsed_vartype(vardecl)
if 'typeof' in typespec:
raise NotImplementedError(vardecl)
elif not abstract:
return True
if '*' not in abstract:
# XXX What about []?
return True
elif _is_funcptr(abstract):
return True
else:
for after in abstract.split('*')[1:]:
if not after.lstrip().startswith('const'):
return False
else:
return True
def is_immutable(vardecl):
if not vardecl:
return None
if not is_fixed_type(vardecl):
return False
_, _, typequal, _, _ = get_parsed_vartype(vardecl)
# If there, it can only be "const" or "volatile".
return typequal == 'const'
STORAGE = frozenset(_STORAGE)
#############################
@ -214,58 +141,8 @@ KIND._GROUPS = {
KIND._GROUPS.update((k.value, {k}) for k in KIND)
# The module-level kind-related helpers (below) deal with <item>.kind:
def is_type_decl(kind):
# Handle ParsedItem, Declaration, etc..
kind = getattr(kind, 'kind', kind)
return KIND.is_type_decl(kind)
def is_decl(kind):
# Handle ParsedItem, Declaration, etc..
kind = getattr(kind, 'kind', kind)
return KIND.is_decl(kind)
def filter_by_kind(items, kind):
if kind == 'type':
kinds = KIND._TYPE_DECLS
elif kind == 'decl':
kinds = KIND._TYPE_DECLS
try:
okay = kind in KIND
except TypeError:
kinds = set(kind)
else:
kinds = {kind} if okay else set(kind)
for item in items:
if item.kind in kinds:
yield item
def collate_by_kind(items):
collated = {kind: [] for kind in KIND}
for item in items:
try:
collated[item.kind].append(item)
except KeyError:
raise ValueError(f'unsupported kind in {item!r}')
return collated
def get_kind_group(kind):
# Handle ParsedItem, Declaration, etc..
kind = getattr(kind, 'kind', kind)
return KIND.get_group(kind)
def collate_by_kind_group(items):
collated = {KIND.get_group(k): [] for k in KIND}
for item in items:
group = KIND.get_group(item.kind)
collated[group].append(item)
return collated
def get_kind_group(item):
return KIND.get_group(item.kind)
#############################
@ -484,6 +361,27 @@ def get_parsed_vartype(decl):
return kind, storage, typequal, typespec, abstract
def get_default_storage(decl):
if decl.kind not in (KIND.VARIABLE, KIND.FUNCTION):
return None
return 'extern' if decl.parent is None else 'auto'
def get_effective_storage(decl, *, default=None):
# Note that "static" limits access to just that C module
# and "extern" (the default for module-level) allows access
# outside the C module.
if default is None:
default = get_default_storage(decl)
if default is None:
return None
try:
storage = decl.storage
except AttributeError:
storage, _ = _get_vartype(decl.data)
return storage or default
#############################
# high-level
@ -997,7 +895,7 @@ class Variable(Declaration):
def __init__(self, file, name, data, parent=None, storage=None):
super().__init__(file, name, data, parent,
_extra={'storage': storage},
_extra={'storage': storage or None},
_shortkey=f'({parent.name}).{name}' if parent else name,
_key=(str(file),
# Tilde comes after all other ascii characters.
@ -1005,6 +903,11 @@ class Variable(Declaration):
name,
),
)
if storage:
if storage not in STORAGE:
# The parser must need an update.
raise NotImplementedError(storage)
# Otherwise we trust the compiler to have validated it.
@property
def vartype(self):
@ -1413,6 +1316,13 @@ def resolve_parsed(parsed):
return cls.from_parsed(parsed)
def set_flag(item, name, value):
try:
setattr(item, name, value)
except AttributeError:
object.__setattr__(item, name, value)
#############################
# composite

View file

@ -0,0 +1,177 @@
import re
from . import info as _info
from .parser._regexes import SIMPLE_TYPE
_KIND = _info.KIND
def match_storage(decl, expected):
default = _info.get_default_storage(decl)
#assert default
if expected is None:
expected = {default}
elif isinstance(expected, str):
expected = {expected or default}
elif not expected:
expected = _info.STORAGE
else:
expected = {v or default for v in expected}
storage = _info.get_effective_storage(decl, default=default)
return storage in expected
##################################
# decl matchers
def is_type_decl(item):
return _KIND.is_type_decl(item.kind)
def is_decl(item):
return _KIND.is_decl(item.kind)
def is_pots(typespec, *,
_regex=re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE),
):
if not typespec:
return None
if type(typespec) is not str:
_, _, _, typespec, _ = _info.get_parsed_vartype(typespec)
return _regex.match(typespec) is not None
def is_funcptr(vartype):
if not vartype:
return None
_, _, _, _, abstract = _info.get_parsed_vartype(vartype)
return _is_funcptr(abstract)
def _is_funcptr(declstr):
if not declstr:
return None
# XXX Support "(<name>*)(".
return '(*)(' in declstr.replace(' ', '')
def is_forward_decl(decl):
if decl.kind is _KIND.TYPEDEF:
return False
elif is_type_decl(decl):
return not decl.data
elif decl.kind is _KIND.FUNCTION:
# XXX This doesn't work with ParsedItem.
return decl.signature.isforward
elif decl.kind is _KIND.VARIABLE:
# No var decls are considered forward (or all are...).
return False
else:
raise NotImplementedError(decl)
def can_have_symbol(decl):
return decl.kind in (_KIND.VARIABLE, _KIND.FUNCTION)
def has_external_symbol(decl):
if not can_have_symbol(decl):
return False
if _info.get_effective_storage(decl) != 'extern':
return False
if decl.kind is _KIND.FUNCTION:
return not decl.signature.isforward
else:
# It must be a variable, which can only be implicitly extern here.
return decl.storage != 'extern'
def has_internal_symbol(decl):
if not can_have_symbol(decl):
return False
return _info.get_actual_storage(decl) == 'static'
def is_external_reference(decl):
if not can_have_symbol(decl):
return False
# We have to check the declared storage rather tnan the effective.
if decl.storage != 'extern':
return False
if decl.kind is _KIND.FUNCTION:
return decl.signature.isforward
# Otherwise it's a variable.
return True
def is_local_var(decl):
if not decl.kind is _KIND.VARIABLE:
return False
return True if decl.parent else False
def is_global_var(decl):
if not decl.kind is _KIND.VARIABLE:
return False
return False if decl.parent else True
##################################
# filtering with matchers
def filter_by_kind(items, kind):
if kind == 'type':
kinds = _KIND._TYPE_DECLS
elif kind == 'decl':
kinds = _KIND._TYPE_DECLS
try:
okay = kind in _KIND
except TypeError:
kinds = set(kind)
else:
kinds = {kind} if okay else set(kind)
for item in items:
if item.kind in kinds:
yield item
##################################
# grouping with matchers
def group_by_category(decls, categories, *, ignore_non_match=True):
collated = {}
for decl in decls:
# Matchers should be mutually exclusive. (First match wins.)
for category, match in categories.items():
if match(decl):
if category not in collated:
collated[category] = [decl]
else:
collated[category].append(decl)
break
else:
if not ignore_non_match:
raise Exception(f'no match for {decl!r}')
return collated
def group_by_kind(items):
collated = {kind: [] for kind in _KIND}
for item in items:
try:
collated[item.kind].append(item)
except KeyError:
raise ValueError(f'unsupported kind in {item!r}')
return collated
def group_by_kinds(items):
# Collate into kind groups (decl, type, etc.).
collated = {_KIND.get_group(k): [] for k in _KIND}
for item in items:
group = _KIND.get_group(item.kind)
collated[group].append(item)
return collated

View file

@ -163,6 +163,8 @@ def _parse(srclines, anon_name):
def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
maxtext = maxtext if maxtext and maxtext > 0 else None
maxlines = maxlines if maxlines and maxlines > 0 else None
filestack = []
allinfo = {}
# "lines" should be (fileinfo, data), as produced by the preprocessor code.
@ -181,9 +183,7 @@ def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
_logger.debug(f'-> {line}')
srcinfo._add_line(line, fileinfo.lno)
if len(srcinfo.text) > maxtext:
break
if srcinfo.end - srcinfo.start > maxlines:
if srcinfo.too_much(maxtext, maxlines):
break
while srcinfo._used():
yield srcinfo

View file

@ -1,3 +1,5 @@
import re
from ..info import KIND, ParsedItem, FileInfo
@ -121,6 +123,19 @@ class SourceInfo:
def done(self):
self._set_ready()
def too_much(self, maxtext, maxlines):
if maxtext and len(self.text) > maxtext:
pass
elif maxlines and self.end - self.start > maxlines:
pass
else:
return False
#if re.fullmatch(r'[^;]+\[\][ ]*=[ ]*[{]([ ]*\d+,)*([ ]*\d+,?)\s*',
# self._current.text):
# return False
return True
def _set_ready(self):
if self._current is None:
self._ready = False

View file

@ -137,7 +137,8 @@ COMPOUND_TYPE_KIND = r'(?: \b (?: struct | union | enum ) \b )'
#######################################
# variable declarations
STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )'
_STORAGE = 'auto register static extern'.split()
STORAGE_CLASS = rf'(?: \b (?: {" | ".join(_STORAGE)} ) \b )'
TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )'
PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )'