bpo-36876: Fix the C analyzer tool. (GH-22841)

The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.)  It take ~40 seconds on my machine to analyze the full CPython code base.

Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close.

https://bugs.python.org/issue36876
This commit is contained in:
Eric Snow 2020-10-22 18:42:51 -06:00 committed by GitHub
parent ec388cfb4e
commit 345cd37abe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
92 changed files with 8868 additions and 10539 deletions

View file

@ -0,0 +1,115 @@
import re
from ._regexes import (
_ind,
STRING_LITERAL,
VAR_DECL as _VAR_DECL,
)
def log_match(group, m):
from . import _logger
_logger.debug(f'matched <{group}> ({m.group(0)})')
#############################
# regex utils
def set_capture_group(pattern, group, *, strict=True):
old = f'(?: # <{group}>'
if strict and f'(?: # <{group}>' not in pattern:
raise ValueError(f'{old!r} not found in pattern')
return pattern.replace(old, f'( # <{group}>', 1)
def set_capture_groups(pattern, groups, *, strict=True):
for group in groups:
pattern = set_capture_group(pattern, group, strict=strict)
return pattern
#############################
# syntax-related utils
_PAREN_RE = re.compile(rf'''
(?:
(?:
[^'"()]*
{_ind(STRING_LITERAL, 3)}
)*
[^'"()]*
(?:
( [(] )
|
( [)] )
)
)
''', re.VERBOSE)
def match_paren(text, depth=0):
pos = 0
while (m := _PAREN_RE.match(text, pos)):
pos = m.end()
_open, _close = m.groups()
if _open:
depth += 1
else: # _close
depth -= 1
if depth == 0:
return pos
else:
raise ValueError(f'could not find matching parens for {text!r}')
VAR_DECL = set_capture_groups(_VAR_DECL, (
'STORAGE',
'TYPE_QUAL',
'TYPE_SPEC',
'DECLARATOR',
'IDENTIFIER',
'WRAPPED_IDENTIFIER',
'FUNC_IDENTIFIER',
))
def parse_var_decl(decl):
m = re.match(VAR_DECL, decl, re.VERBOSE)
(storage, typequal, typespec, declarator,
name,
wrappedname,
funcptrname,
) = m.groups()
if name:
kind = 'simple'
elif wrappedname:
kind = 'wrapped'
name = wrappedname
elif funcptrname:
kind = 'funcptr'
name = funcptrname
else:
raise NotImplementedError
abstract = declarator.replace(name, '')
vartype = {
'storage': storage,
'typequal': typequal,
'typespec': typespec,
'abstract': abstract,
}
return (kind, name, vartype)
#############################
# parser state utils
# XXX Drop this or use it!
def iter_results(results):
if not results:
return
if callable(results):
results = results()
for result, text in results():
if result:
yield result, text