bpo-36876: Fix the C analyzer tool. (GH-22841)

The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
2025-11-01 18:51:43 +00:00 · 2020-10-22 18:42:51 -06:00 · 2020-10-22 18:42:51 -06:00 · 345cd37abe
commit 345cd37abe
parent ec388cfb4e
92 changed files with 8868 additions and 10539 deletions
--- a/Tools/c-analyzer/c_analyzer/datafiles.py
+++ b/Tools/c-analyzer/c_analyzer/datafiles.py
@ -0,0 +1,109 @@
+import c_common.tables as _tables
+import c_parser.info as _info
+import c_parser.datafiles as _parser
+from . import analyze as _analyze
+
+
+#############################
+# "known" decls
+
+EXTRA_COLUMNS = [
+    #'typedecl',
+]
+
+
+def analyze_known(known, *,
+                  analyze_resolved=None,
+                  handle_unresolved=True,
+                  ):
+    knowntypes = knowntypespecs = {}
+    collated = _info.collate_by_kind_group(known)
+    types = {decl: None for decl in collated['type']}
+    typespecs = _analyze.get_typespecs(types)
+    def analyze_decl(decl):
+        return _analyze.analyze_decl(
+            decl,
+            typespecs,
+            knowntypespecs,
+            types,
+            knowntypes,
+            analyze_resolved=analyze_resolved,
+        )
+    _analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
+    return types, typespecs
+
+
+def get_known(known, extracolumns=None, *,
+              analyze_resolved=None,
+              handle_unresolved=True,
+              relroot=None,
+              ):
+    if isinstance(known, str):
+        known = read_known(known, extracolumns, relroot)
+    return analyze_known(
+        known,
+        handle_unresolved=handle_unresolved,
+        analyze_resolved=analyze_resolved,
+    )
+
+
+def read_known(infile, extracolumns=None, relroot=None):
+    extracolumns = EXTRA_COLUMNS + (
+        list(extracolumns) if extracolumns else []
+    )
+    known = {}
+    for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
+        known[decl] = extra
+    return known
+
+
+def write_known(rows, outfile, extracolumns=None, *,
+                relroot=None,
+                backup=True,
+                ):
+    extracolumns = EXTRA_COLUMNS + (
+        list(extracolumns) if extracolumns else []
+    )
+    _parser.write_decls_tsv(
+        rows,
+        outfile,
+        extracolumns,
+        relroot=relroot,
+        backup=backup,
+    )
+
+
+#############################
+# ignored vars
+
+IGNORED_COLUMNS = [
+    'filename',
+    'funcname',
+    'name',
+    'reason',
+]
+IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
+
+
+def read_ignored(infile):
+    return dict(_iter_ignored(infile))
+
+
+def _iter_ignored(infile):
+    for row in _tables.read_table(infile, IGNORED_HEADER, sep='\t'):
+        *varidinfo, reason = row
+        varid = _info.DeclID.from_row(varidinfo)
+        yield varid, reason
+
+
+def write_ignored(variables, outfile):
+    raise NotImplementedError
+    reason = '???'
+    #if not isinstance(varid, DeclID):
+    #    varid = getattr(varid, 'parsed', varid).id
+    _tables.write_table(
+        outfile,
+        IGNORED_HEADER,
+        sep='\t',
+        rows=(r.render_rowdata() + (reason,) for r in decls),
+    )