mirror of
https://github.com/python/cpython.git
synced 2025-07-08 03:45:36 +00:00

The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
115 lines
2.4 KiB
Python
115 lines
2.4 KiB
Python
import re
|
|
|
|
from ._regexes import (
|
|
_ind,
|
|
STRING_LITERAL,
|
|
VAR_DECL as _VAR_DECL,
|
|
)
|
|
|
|
|
|
def log_match(group, m):
|
|
from . import _logger
|
|
_logger.debug(f'matched <{group}> ({m.group(0)})')
|
|
|
|
|
|
#############################
|
|
# regex utils
|
|
|
|
def set_capture_group(pattern, group, *, strict=True):
|
|
old = f'(?: # <{group}>'
|
|
if strict and f'(?: # <{group}>' not in pattern:
|
|
raise ValueError(f'{old!r} not found in pattern')
|
|
return pattern.replace(old, f'( # <{group}>', 1)
|
|
|
|
|
|
def set_capture_groups(pattern, groups, *, strict=True):
|
|
for group in groups:
|
|
pattern = set_capture_group(pattern, group, strict=strict)
|
|
return pattern
|
|
|
|
|
|
#############################
|
|
# syntax-related utils
|
|
|
|
_PAREN_RE = re.compile(rf'''
|
|
(?:
|
|
(?:
|
|
[^'"()]*
|
|
{_ind(STRING_LITERAL, 3)}
|
|
)*
|
|
[^'"()]*
|
|
(?:
|
|
( [(] )
|
|
|
|
|
( [)] )
|
|
)
|
|
)
|
|
''', re.VERBOSE)
|
|
|
|
|
|
def match_paren(text, depth=0):
|
|
pos = 0
|
|
while (m := _PAREN_RE.match(text, pos)):
|
|
pos = m.end()
|
|
_open, _close = m.groups()
|
|
if _open:
|
|
depth += 1
|
|
else: # _close
|
|
depth -= 1
|
|
if depth == 0:
|
|
return pos
|
|
else:
|
|
raise ValueError(f'could not find matching parens for {text!r}')
|
|
|
|
|
|
VAR_DECL = set_capture_groups(_VAR_DECL, (
|
|
'STORAGE',
|
|
'TYPE_QUAL',
|
|
'TYPE_SPEC',
|
|
'DECLARATOR',
|
|
'IDENTIFIER',
|
|
'WRAPPED_IDENTIFIER',
|
|
'FUNC_IDENTIFIER',
|
|
))
|
|
|
|
|
|
def parse_var_decl(decl):
|
|
m = re.match(VAR_DECL, decl, re.VERBOSE)
|
|
(storage, typequal, typespec, declarator,
|
|
name,
|
|
wrappedname,
|
|
funcptrname,
|
|
) = m.groups()
|
|
if name:
|
|
kind = 'simple'
|
|
elif wrappedname:
|
|
kind = 'wrapped'
|
|
name = wrappedname
|
|
elif funcptrname:
|
|
kind = 'funcptr'
|
|
name = funcptrname
|
|
else:
|
|
raise NotImplementedError
|
|
abstract = declarator.replace(name, '')
|
|
vartype = {
|
|
'storage': storage,
|
|
'typequal': typequal,
|
|
'typespec': typespec,
|
|
'abstract': abstract,
|
|
}
|
|
return (kind, name, vartype)
|
|
|
|
|
|
#############################
|
|
# parser state utils
|
|
|
|
# XXX Drop this or use it!
|
|
def iter_results(results):
|
|
if not results:
|
|
return
|
|
if callable(results):
|
|
results = results()
|
|
|
|
for result, text in results():
|
|
if result:
|
|
yield result, text
|