mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-90110: Fix the c-analyzer Tool (gh-96731)
This includes: * update the whitelists * fixes so we can stop ignoring some of the files * ensure Include/cpython/*.h get analyzed
This commit is contained in:
parent
662782e95f
commit
1756ffd66a
10 changed files with 499 additions and 118 deletions
|
@ -22,8 +22,12 @@ def parse_files(filenames, *,
|
|||
if get_file_preprocessor is None:
|
||||
get_file_preprocessor = _get_preprocessor()
|
||||
for filename in filenames:
|
||||
yield from _parse_file(
|
||||
filename, match_kind, get_file_preprocessor, file_maxsizes)
|
||||
try:
|
||||
yield from _parse_file(
|
||||
filename, match_kind, get_file_preprocessor, file_maxsizes)
|
||||
except Exception:
|
||||
print(f'# requested file: <{filename}>')
|
||||
raise # re-raise
|
||||
|
||||
|
||||
def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes):
|
||||
|
|
|
@ -35,9 +35,11 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
def preprocess(source, *,
|
||||
incldirs=None,
|
||||
includes=None,
|
||||
macros=None,
|
||||
samefiles=None,
|
||||
filename=None,
|
||||
cwd=None,
|
||||
tool=True,
|
||||
):
|
||||
"""...
|
||||
|
@ -45,17 +47,27 @@ def preprocess(source, *,
|
|||
CWD should be the project root and "source" should be relative.
|
||||
"""
|
||||
if tool:
|
||||
logger.debug(f'CWD: {os.getcwd()!r}')
|
||||
logger.debug(f'incldirs: {incldirs!r}')
|
||||
logger.debug(f'macros: {macros!r}')
|
||||
if not cwd:
|
||||
cwd = os.getcwd()
|
||||
logger.debug(f'CWD: {cwd!r}')
|
||||
logger.debug(f'incldirs: {incldirs!r}')
|
||||
logger.debug(f'includes: {includes!r}')
|
||||
logger.debug(f'macros: {macros!r}')
|
||||
logger.debug(f'samefiles: {samefiles!r}')
|
||||
_preprocess = _get_preprocessor(tool)
|
||||
with _good_file(source, filename) as source:
|
||||
return _preprocess(source, incldirs, macros, samefiles) or ()
|
||||
return _preprocess(
|
||||
source,
|
||||
incldirs,
|
||||
includes,
|
||||
macros,
|
||||
samefiles,
|
||||
cwd,
|
||||
) or ()
|
||||
else:
|
||||
source, filename = _resolve_source(source, filename)
|
||||
# We ignore "includes", "macros", etc.
|
||||
return _pure.preprocess(source, filename)
|
||||
return _pure.preprocess(source, filename, cwd)
|
||||
|
||||
# if _run() returns just the lines:
|
||||
# text = _run(source)
|
||||
|
@ -72,6 +84,7 @@ def preprocess(source, *,
|
|||
|
||||
def get_preprocessor(*,
|
||||
file_macros=None,
|
||||
file_includes=None,
|
||||
file_incldirs=None,
|
||||
file_same=None,
|
||||
ignore_exc=False,
|
||||
|
@ -80,10 +93,12 @@ def get_preprocessor(*,
|
|||
_preprocess = preprocess
|
||||
if file_macros:
|
||||
file_macros = tuple(_parse_macros(file_macros))
|
||||
if file_includes:
|
||||
file_includes = tuple(_parse_includes(file_includes))
|
||||
if file_incldirs:
|
||||
file_incldirs = tuple(_parse_incldirs(file_incldirs))
|
||||
if file_same:
|
||||
file_same = tuple(file_same)
|
||||
file_same = dict(file_same or ())
|
||||
if not callable(ignore_exc):
|
||||
ignore_exc = (lambda exc, _ig=ignore_exc: _ig)
|
||||
|
||||
|
@ -91,16 +106,26 @@ def get_preprocessor(*,
|
|||
filename = filename.strip()
|
||||
if file_macros:
|
||||
macros = list(_resolve_file_values(filename, file_macros))
|
||||
if file_includes:
|
||||
# There's a small chance we could need to filter out any
|
||||
# includes that import "filename". It isn't clear that it's
|
||||
# a problem any longer. If we do end up filtering then
|
||||
# it may make sense to use c_common.fsutil.match_path_tail().
|
||||
includes = [i for i, in _resolve_file_values(filename, file_includes)]
|
||||
if file_incldirs:
|
||||
incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)]
|
||||
if file_same:
|
||||
samefiles = _resolve_samefiles(filename, file_same)
|
||||
|
||||
def preprocess(**kwargs):
|
||||
if file_macros and 'macros' not in kwargs:
|
||||
kwargs['macros'] = macros
|
||||
if file_includes and 'includes' not in kwargs:
|
||||
kwargs['includes'] = includes
|
||||
if file_incldirs and 'incldirs' not in kwargs:
|
||||
kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)]
|
||||
if file_same and 'file_same' not in kwargs:
|
||||
kwargs['samefiles'] = file_same
|
||||
kwargs['incldirs'] = incldirs
|
||||
if file_same and 'samefiles' not in kwargs:
|
||||
kwargs['samefiles'] = samefiles
|
||||
kwargs.setdefault('filename', filename)
|
||||
with handling_errors(ignore_exc, log_err=log_err):
|
||||
return _preprocess(filename, **kwargs)
|
||||
|
@ -120,6 +145,11 @@ def _parse_macros(macros):
|
|||
yield row
|
||||
|
||||
|
||||
def _parse_includes(includes):
|
||||
for row, srcfile in _parse_table(includes, '\t', 'glob\tinclude', default=None):
|
||||
yield row
|
||||
|
||||
|
||||
def _parse_incldirs(incldirs):
|
||||
for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None):
|
||||
glob, dirname = row
|
||||
|
@ -130,6 +160,43 @@ def _parse_incldirs(incldirs):
|
|||
yield row
|
||||
|
||||
|
||||
def _resolve_samefiles(filename, file_same):
|
||||
assert '*' not in filename, (filename,)
|
||||
assert os.path.normpath(filename) == filename, (filename,)
|
||||
_, suffix = os.path.splitext(filename)
|
||||
samefiles = []
|
||||
for patterns, in _resolve_file_values(filename, file_same.items()):
|
||||
for pattern in patterns:
|
||||
same = _resolve_samefile(filename, pattern, suffix)
|
||||
if not same:
|
||||
continue
|
||||
samefiles.append(same)
|
||||
return samefiles
|
||||
|
||||
|
||||
def _resolve_samefile(filename, pattern, suffix):
|
||||
if pattern == filename:
|
||||
return None
|
||||
if pattern.endswith(os.path.sep):
|
||||
pattern += f'*{suffix}'
|
||||
assert os.path.normpath(pattern) == pattern, (pattern,)
|
||||
if '*' in os.path.dirname(pattern):
|
||||
raise NotImplementedError((filename, pattern))
|
||||
if '*' not in os.path.basename(pattern):
|
||||
return pattern
|
||||
|
||||
common = os.path.commonpath([filename, pattern])
|
||||
relpattern = pattern[len(common) + len(os.path.sep):]
|
||||
relpatterndir = os.path.dirname(relpattern)
|
||||
relfile = filename[len(common) + len(os.path.sep):]
|
||||
if os.path.basename(pattern) == '*':
|
||||
return os.path.join(common, relpatterndir, relfile)
|
||||
elif os.path.basename(relpattern) == '*' + suffix:
|
||||
return os.path.join(common, relpatterndir, relfile)
|
||||
else:
|
||||
raise NotImplementedError((filename, pattern))
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def handling_errors(ignore_exc=None, *, log_err=None):
|
||||
try:
|
||||
|
|
|
@ -44,7 +44,7 @@ def run_cmd(argv, *,
|
|||
return proc.stdout
|
||||
|
||||
|
||||
def preprocess(tool, filename, **kwargs):
|
||||
def preprocess(tool, filename, cwd=None, **kwargs):
|
||||
argv = _build_argv(tool, filename, **kwargs)
|
||||
logger.debug(' '.join(shlex.quote(v) for v in argv))
|
||||
|
||||
|
@ -59,19 +59,24 @@ def preprocess(tool, filename, **kwargs):
|
|||
# distutil compiler object's preprocess() method, since that
|
||||
# one writes to stdout/stderr and it's simpler to do it directly
|
||||
# through subprocess.
|
||||
return run_cmd(argv)
|
||||
return run_cmd(argv, cwd=cwd)
|
||||
|
||||
|
||||
def _build_argv(
|
||||
tool,
|
||||
filename,
|
||||
incldirs=None,
|
||||
includes=None,
|
||||
macros=None,
|
||||
preargs=None,
|
||||
postargs=None,
|
||||
executable=None,
|
||||
compiler=None,
|
||||
):
|
||||
if includes:
|
||||
includes = tuple(f'-include{i}' for i in includes)
|
||||
postargs = (includes + postargs) if postargs else includes
|
||||
|
||||
compiler = distutils.ccompiler.new_compiler(
|
||||
compiler=compiler or tool,
|
||||
)
|
||||
|
|
|
@ -7,7 +7,12 @@ from . import common as _common
|
|||
TOOL = 'gcc'
|
||||
|
||||
# https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
|
||||
LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$')
|
||||
# flags:
|
||||
# 1 start of a new file
|
||||
# 2 returning to a file (after including another)
|
||||
# 3 following text comes from a system header file
|
||||
# 4 following text treated wrapped in implicit extern "C" block
|
||||
LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$')
|
||||
PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
|
||||
COMPILER_DIRECTIVE_RE = re.compile(r'''
|
||||
^
|
||||
|
@ -40,32 +45,112 @@ POST_ARGS = (
|
|||
)
|
||||
|
||||
|
||||
def preprocess(filename, incldirs=None, macros=None, samefiles=None):
|
||||
def preprocess(filename,
|
||||
incldirs=None,
|
||||
includes=None,
|
||||
macros=None,
|
||||
samefiles=None,
|
||||
cwd=None,
|
||||
):
|
||||
if not cwd or not os.path.isabs(cwd):
|
||||
cwd = os.path.abspath(cwd or '.')
|
||||
filename = _normpath(filename, cwd)
|
||||
text = _common.preprocess(
|
||||
TOOL,
|
||||
filename,
|
||||
incldirs=incldirs,
|
||||
includes=includes,
|
||||
macros=macros,
|
||||
#preargs=PRE_ARGS,
|
||||
postargs=POST_ARGS,
|
||||
executable=['gcc'],
|
||||
compiler='unix',
|
||||
cwd=cwd,
|
||||
)
|
||||
return _iter_lines(text, filename, samefiles)
|
||||
return _iter_lines(text, filename, samefiles, cwd)
|
||||
|
||||
|
||||
def _iter_lines(text, filename, samefiles, *, raw=False):
|
||||
def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
|
||||
lines = iter(text.splitlines())
|
||||
|
||||
# Build the lines and filter out directives.
|
||||
partial = 0 # depth
|
||||
origfile = None
|
||||
# The first line is special.
|
||||
# The next two lines are consistent.
|
||||
for expected in [
|
||||
f'# 1 "{reqfile}"',
|
||||
'# 1 "<built-in>"',
|
||||
'# 1 "<command-line>"',
|
||||
]:
|
||||
line = next(lines)
|
||||
if line != expected:
|
||||
raise NotImplementedError((line, expected))
|
||||
|
||||
# Do all the CLI-provided includes.
|
||||
filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles))
|
||||
make_info = (lambda lno: _common.FileInfo(reqfile, lno))
|
||||
last = None
|
||||
for line in lines:
|
||||
m = LINE_MARKER_RE.match(line)
|
||||
if m:
|
||||
lno, origfile = m.groups()
|
||||
lno = int(lno)
|
||||
elif _filter_orig_file(origfile, filename, samefiles):
|
||||
assert last != reqfile, (last,)
|
||||
lno, included, flags = _parse_marker_line(line, reqfile)
|
||||
if not included:
|
||||
raise NotImplementedError((line,))
|
||||
if included == reqfile:
|
||||
# This will be the last one.
|
||||
assert not flags, (line, flags)
|
||||
else:
|
||||
assert 1 in flags, (line, flags)
|
||||
yield from _iter_top_include_lines(
|
||||
lines,
|
||||
_normpath(included, cwd),
|
||||
cwd,
|
||||
filter_reqfile,
|
||||
make_info,
|
||||
raw,
|
||||
)
|
||||
last = included
|
||||
# The last one is always the requested file.
|
||||
assert included == reqfile, (line,)
|
||||
|
||||
|
||||
def _iter_top_include_lines(lines, topfile, cwd,
|
||||
filter_reqfile, make_info,
|
||||
raw):
|
||||
partial = 0 # depth
|
||||
files = [topfile]
|
||||
# We start at 1 in case there are source lines (including blank onces)
|
||||
# before the first marker line. Also, we already verified in
|
||||
# _parse_marker_line() that the preprocessor reported lno as 1.
|
||||
lno = 1
|
||||
for line in lines:
|
||||
if line == '# 1 "<command-line>" 2':
|
||||
# We're done with this top-level include.
|
||||
return
|
||||
|
||||
_lno, included, flags = _parse_marker_line(line)
|
||||
if included:
|
||||
lno = _lno
|
||||
included = _normpath(included, cwd)
|
||||
# We hit a marker line.
|
||||
if 1 in flags:
|
||||
# We're entering a file.
|
||||
# XXX Cycles are unexpected?
|
||||
#assert included not in files, (line, files)
|
||||
files.append(included)
|
||||
elif 2 in flags:
|
||||
# We're returning to a file.
|
||||
assert files and included in files, (line, files)
|
||||
assert included != files[-1], (line, files)
|
||||
while files[-1] != included:
|
||||
files.pop()
|
||||
# XXX How can a file return to line 1?
|
||||
#assert lno > 1, (line, lno)
|
||||
else:
|
||||
# It's the next line from the file.
|
||||
assert included == files[-1], (line, files)
|
||||
assert lno > 1, (line, lno)
|
||||
elif not files:
|
||||
raise NotImplementedError((line,))
|
||||
elif filter_reqfile(files[-1]):
|
||||
assert lno is not None, (line, files[-1])
|
||||
if (m := PREPROC_DIRECTIVE_RE.match(line)):
|
||||
name, = m.groups()
|
||||
if name != 'pragma':
|
||||
|
@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
|
|||
if not raw:
|
||||
line, partial = _strip_directives(line, partial=partial)
|
||||
yield _common.SourceLine(
|
||||
_common.FileInfo(filename, lno),
|
||||
make_info(lno),
|
||||
'source',
|
||||
line or '',
|
||||
None,
|
||||
|
@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
|
|||
lno += 1
|
||||
|
||||
|
||||
def _parse_marker_line(line, reqfile=None):
|
||||
m = LINE_MARKER_RE.match(line)
|
||||
if not m:
|
||||
return None, None, None
|
||||
lno, origfile, flags = m.groups()
|
||||
lno = int(lno)
|
||||
assert lno > 0, (line, lno)
|
||||
assert origfile not in ('<built-in>', '<command-line>'), (line,)
|
||||
flags = set(int(f) for f in flags.split()) if flags else ()
|
||||
|
||||
if 1 in flags:
|
||||
# We're entering a file.
|
||||
assert lno == 1, (line, lno)
|
||||
assert 2 not in flags, (line,)
|
||||
elif 2 in flags:
|
||||
# We're returning to a file.
|
||||
#assert lno > 1, (line, lno)
|
||||
pass
|
||||
elif reqfile and origfile == reqfile:
|
||||
# We're starting the requested file.
|
||||
assert lno == 1, (line, lno)
|
||||
assert not flags, (line, flags)
|
||||
else:
|
||||
# It's the next line from the file.
|
||||
assert lno > 1, (line, lno)
|
||||
return lno, origfile, flags
|
||||
|
||||
|
||||
def _strip_directives(line, partial=0):
|
||||
# We assume there are no string literals with parens in directive bodies.
|
||||
while partial > 0:
|
||||
|
@ -106,18 +219,16 @@ def _strip_directives(line, partial=0):
|
|||
return line, partial
|
||||
|
||||
|
||||
def _filter_orig_file(origfile, current, samefiles):
|
||||
if origfile == current:
|
||||
def _filter_reqfile(current, reqfile, samefiles):
|
||||
if current == reqfile:
|
||||
return True
|
||||
if origfile == '<stdin>':
|
||||
if current == '<stdin>':
|
||||
return True
|
||||
if current in samefiles:
|
||||
return True
|
||||
if os.path.isabs(origfile):
|
||||
return False
|
||||
|
||||
for filename in samefiles or ():
|
||||
if filename.endswith(os.path.sep):
|
||||
filename += os.path.basename(current)
|
||||
if origfile == filename:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _normpath(filename, cwd):
|
||||
assert cwd
|
||||
return os.path.normpath(os.path.join(cwd, filename))
|
||||
|
|
|
@ -4,7 +4,7 @@ from ..source import (
|
|||
from . import common as _common
|
||||
|
||||
|
||||
def preprocess(lines, filename=None):
|
||||
def preprocess(lines, filename=None, cwd=None):
|
||||
if isinstance(lines, str):
|
||||
with _open_source(lines, filename) as (lines, filename):
|
||||
yield from preprocess(lines, filename)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue