bpo-36876: Add a tool that identifies unsupported global C variables. (#15877)

2025-08-04 17:08:35 +00:00 · 2019-09-11 19:49:45 +01:00 · 2019-09-11 19:49:45 +01:00 · ee536b2020
commit ee536b2020
parent 9936371af2
51 changed files with 9467 additions and 19 deletions
--- a/Tools/c-analyzer/README
+++ b/Tools/c-analyzer/README
--- a/Tools/c-analyzer/c-globals.py
+++ b/Tools/c-analyzer/c-globals.py
@ -0,0 +1,9 @@
+# This is a script equivalent of running "python -m test.test_c_globals.cg".
+
+from c_globals.__main__ import parse_args, main
+
+
+# This is effectively copied from cg/__main__.py:
+if __name__ == '__main__':
+    cmd, cmdkwargs = parse_args()
+    main(cmd, cmdkwargs)
--- a/Tools/c-analyzer/c_analyzer_common/init.py
+++ b/Tools/c-analyzer/c_analyzer_common/init.py
@ -0,0 +1,19 @@
+import os.path
+
+
+PKG_ROOT = os.path.dirname(__file__)
+DATA_DIR = os.path.dirname(PKG_ROOT)
+REPO_ROOT = os.path.dirname(
+        os.path.dirname(DATA_DIR))
+
+SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
+        'Include',
+        'Python',
+        'Parser',
+        'Objects',
+        'Modules',
+        ]]
+
+
+# Clean up the namespace.
+del os
--- a/Tools/c-analyzer/c_analyzer_common/_generate.py
+++ b/Tools/c-analyzer/c_analyzer_common/_generate.py
@ -0,0 +1,328 @@
+# The code here consists of hacks for pre-populating the known.tsv file.
+
+from c_parser.preprocessor import _iter_clean_lines
+from c_parser.naive import (
+        iter_variables, parse_variable_declaration, find_variables,
+        )
+from c_parser.info import Variable
+
+from . import SOURCE_DIRS, REPO_ROOT
+from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
+from .info import UNKNOWN, ID
+from .util import write_tsv
+from .files import iter_cpython_files
+
+
+POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
+POTS += tuple('const ' + v for v in POTS)
+STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
+
+
+def _parse_global(line, funcname=None):
+    line = line.strip()
+    if line.startswith('static '):
+        if '(' in line and '[' not in line and ' = ' not in line:
+            return None, None
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith('_Py_static_string('):
+        decl = line.strip(';').strip()
+        name = line.split('(')[1].split(',')[0].strip()
+    elif line.startswith('_Py_IDENTIFIER('):
+        decl = line.strip(';').strip()
+        name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
+    elif funcname:
+        return None, None
+
+    # global-only
+    elif line.startswith('PyAPI_DATA('):  # only in .h files
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith('extern '):  # only in .h files
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith('PyDoc_VAR('):
+        decl = line.strip(';').strip()
+        name = line.split('(')[1].split(')')[0].strip()
+    elif line.startswith(POTS):  # implied static
+        if '(' in line and '[' not in line and ' = ' not in line:
+            return None, None
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith(STRUCTS) and line.endswith(' = {'):  # implied static
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith(STRUCTS) and line.endswith(' = NULL;'):  # implied static
+        name, decl = parse_variable_declaration(line)
+    elif line.startswith('struct '):
+        if not line.endswith(' = {'):
+            return None, None
+        if not line.partition(' ')[2].startswith(STRUCTS):
+            return None, None
+        # implied static
+        name, decl = parse_variable_declaration(line)
+
+    # file-specific
+    elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
+        # Objects/typeobject.c
+        funcname = line.split('(')[1].split(',')[0]
+        return [
+                ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
+                ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
+                ]
+    elif line.startswith('WRAP_METHOD('):
+        # Objects/weakrefobject.c
+        funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
+        return [
+                ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
+                ]
+
+    else:
+        return None, None
+    return name, decl
+
+
+def _pop_cached(varcache, filename, funcname, name, *,
+                _iter_variables=iter_variables,
+                ):
+    # Look for the file.
+    try:
+        cached = varcache[filename]
+    except KeyError:
+        cached = varcache[filename] = {}
+        for variable in _iter_variables(filename,
+                                        parse_variable=_parse_global,
+                                        ):
+            variable._isglobal = True
+            cached[variable.id] = variable
+        for var in cached:
+            print(' ', var)
+
+    # Look for the variable.
+    if funcname == UNKNOWN:
+        for varid in cached:
+            if varid.name == name:
+                break
+        else:
+            return None
+        return cached.pop(varid)
+    else:
+        return cached.pop((filename, funcname, name), None)
+
+
+def find_matching_variable(varid, varcache, allfilenames, *,
+                           _pop_cached=_pop_cached,
+                           ):
+    if varid.filename and varid.filename != UNKNOWN:
+        filenames = [varid.filename]
+    else:
+        filenames = allfilenames
+    for filename in filenames:
+        variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
+        if variable is not None:
+            return variable
+    else:
+        if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
+            for filename in allfilenames:
+                if not filename.endswith('.h'):
+                    continue
+                variable = _pop_cached(varcache, filename, None, varid.name)
+                if variable is not None:
+                    return variable
+        return None
+
+
+MULTILINE = {
+    # Python/Python-ast.c
+    'Load_singleton': 'PyObject *',
+    'Store_singleton': 'PyObject *',
+    'Del_singleton': 'PyObject *',
+    'AugLoad_singleton': 'PyObject *',
+    'AugStore_singleton': 'PyObject *',
+    'Param_singleton': 'PyObject *',
+    'And_singleton': 'PyObject *',
+    'Or_singleton': 'PyObject *',
+    'Add_singleton': 'static PyObject *',
+    'Sub_singleton': 'static PyObject *',
+    'Mult_singleton': 'static PyObject *',
+    'MatMult_singleton': 'static PyObject *',
+    'Div_singleton': 'static PyObject *',
+    'Mod_singleton': 'static PyObject *',
+    'Pow_singleton': 'static PyObject *',
+    'LShift_singleton': 'static PyObject *',
+    'RShift_singleton': 'static PyObject *',
+    'BitOr_singleton': 'static PyObject *',
+    'BitXor_singleton': 'static PyObject *',
+    'BitAnd_singleton': 'static PyObject *',
+    'FloorDiv_singleton': 'static PyObject *',
+    'Invert_singleton': 'static PyObject *',
+    'Not_singleton': 'static PyObject *',
+    'UAdd_singleton': 'static PyObject *',
+    'USub_singleton': 'static PyObject *',
+    'Eq_singleton': 'static PyObject *',
+    'NotEq_singleton': 'static PyObject *',
+    'Lt_singleton': 'static PyObject *',
+    'LtE_singleton': 'static PyObject *',
+    'Gt_singleton': 'static PyObject *',
+    'GtE_singleton': 'static PyObject *',
+    'Is_singleton': 'static PyObject *',
+    'IsNot_singleton': 'static PyObject *',
+    'In_singleton': 'static PyObject *',
+    'NotIn_singleton': 'static PyObject *',
+    # Python/symtable.c
+    'top': 'static identifier ',
+    'lambda': 'static identifier ',
+    'genexpr': 'static identifier ',
+    'listcomp': 'static identifier ',
+    'setcomp': 'static identifier ',
+    'dictcomp': 'static identifier ',
+    '__class__': 'static identifier ',
+    # Python/compile.c
+    '__doc__': 'static PyObject *',
+    '__annotations__': 'static PyObject *',
+    # Objects/floatobject.c
+    'double_format': 'static float_format_type ',
+    'float_format': 'static float_format_type ',
+    'detected_double_format': 'static float_format_type ',
+    'detected_float_format': 'static float_format_type ',
+    # Parser/listnode.c
+    'level': 'static int ',
+    'atbol': 'static int ',
+    # Python/dtoa.c
+    'private_mem': 'static double private_mem[PRIVATE_mem]',
+    'pmem_next': 'static double *',
+    # Modules/_weakref.c
+    'weakref_functions': 'static PyMethodDef ',
+}
+INLINE = {
+    # Modules/_tracemalloc.c
+    'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
+    # Modules/faulthandler.c
+    'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
+    'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
+    # Modules/signalmodule.c
+    'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
+    'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
+    # Python/dynload_shlib.c
+    'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
+    # Objects/obmalloc.c
+    '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
+    # Python/bootstrap_hash.c
+    'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
+    }
+FUNC = {
+    # Objects/object.c
+    '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
+    # Parser/myreadline.c
+    'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
+    # Python/pylifecycle.c
+    '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
+    # Parser/myreadline.c
+    'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
+    }
+IMPLIED = {
+    # Objects/boolobject.c
+    '_Py_FalseStruct': 'static struct _longobject ',
+    '_Py_TrueStruct': 'static struct _longobject ',
+    # Modules/config.c
+    '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
+    }
+GLOBALS = {}
+GLOBALS.update(MULTILINE)
+GLOBALS.update(INLINE)
+GLOBALS.update(FUNC)
+GLOBALS.update(IMPLIED)
+
+LOCALS = {
+    'buildinfo': ('Modules/getbuildinfo.c',
+                  'Py_GetBuildInfo',
+                  'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ?  sizeof(GITTAG) : sizeof(GITBRANCH))]'),
+    'methods': ('Python/codecs.c',
+                '_PyCodecRegistry_Init',
+                'static struct { char *name; PyMethodDef def; } methods[]'),
+    }
+
+
+def _known(symbol):
+    if symbol.funcname:
+        if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
+            raise KeyError(symbol.name)
+        filename, funcname, decl = LOCALS[symbol.name]
+        varid = ID(filename, funcname, symbol.name)
+    elif not symbol.filename or symbol.filename == UNKNOWN:
+        raise KeyError(symbol.name)
+    else:
+        varid = symbol.id
+        try:
+            decl = GLOBALS[symbol.name]
+        except KeyError:
+
+            if symbol.name.endswith('_methods'):
+                decl = 'static PyMethodDef '
+            elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
+                decl = 'static PyTypeObject '
+            else:
+                raise
+    if symbol.name not in decl:
+        decl = decl + symbol.name
+    return Variable(varid, decl)
+
+
+def known_row(varid, decl):
+    return (
+            varid.filename,
+            varid.funcname or '-',
+            varid.name,
+            'variable',
+            decl,
+            )
+
+
+def known_rows(symbols, *,
+               cached=True,
+               _get_filenames=iter_cpython_files,
+               _find_match=find_matching_variable,
+               _find_symbols=find_variables,
+               _as_known=known_row,
+               ):
+    filenames = list(_get_filenames())
+    cache = {}
+    if cached:
+        for symbol in symbols:
+            try:
+                found = _known(symbol)
+            except KeyError:
+                found = _find_match(symbol, cache, filenames)
+                if found is None:
+                    found = Variable(symbol.id, UNKNOWN)
+            yield _as_known(found.id, found.vartype)
+    else:
+        raise NotImplementedError  # XXX incorporate KNOWN
+        for variable in _find_symbols(symbols, filenames,
+                                      srccache=cache,
+                                      parse_variable=_parse_global,
+                                      ):
+            #variable = variable._replace(
+            #    filename=os.path.relpath(variable.filename, REPO_ROOT))
+            if variable.funcname == UNKNOWN:
+                print(variable)
+            if variable.vartype== UNKNOWN:
+                print(variable)
+            yield _as_known(variable.id, variable.vartype)
+
+
+def generate(symbols, filename=None, *,
+             _generate_rows=known_rows,
+             _write_tsv=write_tsv,
+             ):
+    if not filename:
+        filename = KNOWN_FILE + '.new'
+
+    rows = _generate_rows(symbols)
+    _write_tsv(filename, KNOWN_HEADER, rows)
+
+
+if __name__ == '__main__':
+    from c_symbols import binary
+    symbols = binary.iter_symbols(
+            binary.PYTHON,
+            find_local_symbol=None,
+            )
+    generate(symbols)
--- a/Tools/c-analyzer/c_analyzer_common/files.py
+++ b/Tools/c-analyzer/c_analyzer_common/files.py
@ -0,0 +1,138 @@
+import glob
+import os
+import os.path
+
+from . import SOURCE_DIRS, REPO_ROOT
+
+
+C_SOURCE_SUFFIXES = ('.c', '.h')
+
+
+def _walk_tree(root, *,
+               _walk=os.walk,
+               ):
+    # A wrapper around os.walk that resolves the filenames.
+    for parent, _, names in _walk(root):
+        for name in names:
+            yield os.path.join(parent, name)
+
+
+def walk_tree(root, *,
+              suffix=None,
+              walk=_walk_tree,
+              ):
+    """Yield each file in the tree under the given directory name.
+
+    If "suffix" is provided then only files with that suffix will
+    be included.
+    """
+    if suffix and not isinstance(suffix, str):
+        raise ValueError('suffix must be a string')
+
+    for filename in walk(root):
+        if suffix and not filename.endswith(suffix):
+            continue
+        yield filename
+
+
+def glob_tree(root, *,
+              suffix=None,
+              _glob=glob.iglob,
+              ):
+    """Yield each file in the tree under the given directory name.
+
+    If "suffix" is provided then only files with that suffix will
+    be included.
+    """
+    suffix = suffix or ''
+    if not isinstance(suffix, str):
+        raise ValueError('suffix must be a string')
+
+    for filename in _glob(f'{root}/*{suffix}'):
+        yield filename
+    for filename in _glob(f'{root}/**/*{suffix}'):
+        yield filename
+
+
+def iter_files(root, suffix=None, relparent=None, *,
+               get_files=os.walk,
+               _glob=glob_tree,
+               _walk=walk_tree,
+               ):
+    """Yield each file in the tree under the given directory name.
+
+    If "root" is a non-string iterable then do the same for each of
+    those trees.
+
+    If "suffix" is provided then only files with that suffix will
+    be included.
+
+    if "relparent" is provided then it is used to resolve each
+    filename as a relative path.
+    """
+    if not isinstance(root, str):
+        roots = root
+        for root in roots:
+            yield from iter_files(root, suffix, relparent,
+                                  get_files=get_files,
+                                  _glob=_glob, _walk=_walk)
+        return
+
+    # Use the right "walk" function.
+    if get_files in (glob.glob, glob.iglob, glob_tree):
+        get_files = _glob
+    else:
+        _files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
+        get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
+
+    # Handle a single suffix.
+    if suffix and not isinstance(suffix, str):
+        filenames = get_files(root)
+        suffix = tuple(suffix)
+    else:
+        filenames = get_files(root, suffix=suffix)
+        suffix = None
+
+    for filename in filenames:
+        if suffix and not isinstance(suffix, str):  # multiple suffixes
+            if not filename.endswith(suffix):
+                continue
+        if relparent:
+            filename = os.path.relpath(filename, relparent)
+        yield filename
+
+
+def iter_files_by_suffix(root, suffixes, relparent=None, *,
+                         walk=walk_tree,
+                         _iter_files=iter_files,
+                         ):
+    """Yield each file in the tree that has the given suffixes.
+
+    Unlike iter_files(), the results are in the original suffix order.
+    """
+    if isinstance(suffixes, str):
+        suffixes = [suffixes]
+    # XXX Ignore repeated suffixes?
+    for suffix in suffixes:
+        yield from _iter_files(root, suffix, relparent)
+
+
+def iter_cpython_files(*,
+                       walk=walk_tree,
+                       _files=iter_files_by_suffix,
+                       ):
+    """Yield each file in the tree for each of the given directory names."""
+    excludedtrees = [
+        os.path.join('Include', 'cpython', ''),
+        ]
+    def is_excluded(filename):
+        for root in excludedtrees:
+            if filename.startswith(root):
+                return True
+        return False
+    for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
+                           walk=walk,
+                           ):
+        if is_excluded(filename):
+            continue
+        yield filename
--- a/Tools/c-analyzer/c_analyzer_common/info.py
+++ b/Tools/c-analyzer/c_analyzer_common/info.py
@ -0,0 +1,69 @@
+from collections import namedtuple
+import re
+
+from .util import classonly, _NTBase
+
+
+UNKNOWN = '???'
+
+NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
+
+
+class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
+    """A unique ID for a single symbol or declaration."""
+
+    __slots__ = ()
+    # XXX Add optional conditions (tuple of strings) field.
+    #conditions = Slot()
+
+    @classonly
+    def from_raw(cls, raw):
+        if not raw:
+            return None
+        if isinstance(raw, str):
+            return cls(None, None, raw)
+        try:
+            name, = raw
+            filename = None
+        except ValueError:
+            try:
+                filename, name = raw
+            except ValueError:
+                return super().from_raw(raw)
+        return cls(filename, None, name)
+
+    def __new__(cls, filename, funcname, name):
+        self = super().__new__(
+                cls,
+                filename=str(filename) if filename else None,
+                funcname=str(funcname) if funcname else None,
+                name=str(name) if name else None,
+                )
+        #cls.conditions.set(self, tuple(str(s) if s else None
+        #                               for s in conditions or ()))
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        if not self.name:
+            raise TypeError('missing name')
+        else:
+            if not NAME_RE.match(self.name):
+                raise ValueError(
+                        f'name must be an identifier, got {self.name!r}')
+
+        # Symbols from a binary might not have filename/funcname info.
+
+        if self.funcname:
+            if not self.filename:
+                raise TypeError('missing filename')
+            if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
+                raise ValueError(
+                        f'name must be an identifier, got {self.funcname!r}')
+
+        # XXX Require the filename (at least UNKONWN)?
+        # XXX Check the filename?
+
+    @property
+    def islocal(self):
+        return self.funcname is not None
--- a/Tools/c-analyzer/c_analyzer_common/known.py
+++ b/Tools/c-analyzer/c_analyzer_common/known.py
@ -0,0 +1,67 @@
+import csv
+import os.path
+
+from c_parser.info import Variable
+
+from . import DATA_DIR
+from .info import ID, UNKNOWN
+from .util import read_tsv
+
+
+DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
+
+COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
+HEADER = '\t'.join(COLUMNS)
+
+
+# XXX need tests:
+# * from_file()
+
+def from_file(infile, *,
+              _read_tsv=read_tsv,
+              ):
+    """Return the info for known declarations in the given file."""
+    known = {
+        'variables': {},
+        #'types': {},
+        #'constants': {},
+        #'macros': {},
+        }
+    for row in _read_tsv(infile, HEADER):
+        filename, funcname, name, kind, declaration = row
+        if not funcname or funcname == '-':
+            funcname = None
+        id = ID(filename, funcname, name)
+        if kind == 'variable':
+            values = known['variables']
+            value = Variable(id, declaration)
+            value._isglobal = _is_global(declaration) or id.funcname is None
+        else:
+            raise ValueError(f'unsupported kind in row {row}')
+        if value.name == 'id' and declaration == UNKNOWN:
+            # None of these are variables.
+            declaration = 'int id';
+        else:
+            value.validate()
+        values[id] = value
+    return known
+
+
+def _is_global(vartype):
+    # statics
+    if vartype.startswith('static '):
+        return True
+    if vartype.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
+        return True
+    if vartype.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
+        return True
+    if vartype.startswith('PyDoc_VAR('):
+        return True
+    if vartype.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
+        return True
+    if vartype.startswith('WRAP_METHOD('):
+        return True
+    # public extern
+    if vartype.startswith('PyAPI_DATA('):
+        return True
+    return False
--- a/Tools/c-analyzer/c_analyzer_common/util.py
+++ b/Tools/c-analyzer/c_analyzer_common/util.py
@ -0,0 +1,214 @@
+import csv
+import subprocess
+
+
+_NOT_SET = object()
+
+
+def run_cmd(argv, **kwargs):
+    proc = subprocess.run(
+            argv,
+            #capture_output=True,
+            #stderr=subprocess.STDOUT,
+            stdout=subprocess.PIPE,
+            text=True,
+            check=True,
+            **kwargs
+            )
+    return proc.stdout
+
+
+def read_tsv(infile, header, *,
+             _open=open,
+             _get_reader=csv.reader,
+             ):
+    """Yield each row of the given TSV (tab-separated) file."""
+    if isinstance(infile, str):
+        with _open(infile, newline='') as infile:
+            yield from read_tsv(infile, header,
+                                _open=_open,
+                                _get_reader=_get_reader,
+                                )
+            return
+    lines = iter(infile)
+
+    # Validate the header.
+    try:
+        actualheader = next(lines).strip()
+    except StopIteration:
+        actualheader = ''
+    if actualheader != header:
+        raise ValueError(f'bad header {actualheader!r}')
+
+    for row in _get_reader(lines, delimiter='\t'):
+        yield tuple(v.strip() for v in row)
+
+
+def write_tsv(outfile, header, rows, *,
+             _open=open,
+             _get_writer=csv.writer,
+             ):
+    """Write each of the rows to the given TSV (tab-separated) file."""
+    if isinstance(outfile, str):
+        with _open(outfile, 'w', newline='') as outfile:
+            return write_tsv(outfile, header, rows,
+                            _open=_open,
+                            _get_writer=_get_writer,
+                            )
+
+    if isinstance(header, str):
+        header = header.split('\t')
+    writer = _get_writer(outfile, delimiter='\t')
+    writer.writerow(header)
+    for row in rows:
+        writer.writerow('' if v is None else str(v)
+                        for v in row)
+
+
+class Slot:
+    """A descriptor that provides a slot.
+
+    This is useful for types that can't have slots via __slots__,
+    e.g. tuple subclasses.
+    """
+
+    __slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
+
+    def __init__(self, initial=_NOT_SET, *,
+                 default=_NOT_SET,
+                 readonly=False,
+                 ):
+        self.initial = initial
+        self.default = default
+        self.readonly = readonly
+
+        self.instances = {}
+        self.name = None
+
+    def __set_name__(self, cls, name):
+        if self.name is not None:
+            raise TypeError('already used')
+        self.name = name
+
+    def __get__(self, obj, cls):
+        if obj is None:  # called on the class
+            return self
+        try:
+            value = self.instances[id(obj)]
+        except KeyError:
+            if self.initial is _NOT_SET:
+                value = self.default
+            else:
+                value = self.initial
+            self.instances[id(obj)] = value
+        if value is _NOT_SET:
+            raise AttributeError(self.name)
+        # XXX Optionally make a copy?
+        return value
+
+    def __set__(self, obj, value):
+        if self.readonly:
+            raise AttributeError(f'{self.name} is readonly')
+        # XXX Optionally coerce?
+        self.instances[id(obj)] = value
+
+    def __delete__(self, obj):
+        if self.readonly:
+            raise AttributeError(f'{self.name} is readonly')
+        self.instances[id(obj)] = self.default
+
+    def set(self, obj, value):
+        """Update the cached value for an object.
+
+        This works even if the descriptor is read-only.  This is
+        particularly useful when initializing the object (e.g. in
+        its __new__ or __init__).
+        """
+        self.instances[id(obj)] = value
+
+
+class classonly:
+    """A non-data descriptor that makes a value only visible on the class.
+
+    This is like the "classmethod" builtin, but does not show up on
+    instances of the class.  It may be used as a decorator.
+    """
+
+    def __init__(self, value):
+        self.value = value
+        self.getter = classmethod(value).__get__
+        self.name = None
+
+    def __set_name__(self, cls, name):
+        if self.name is not None:
+            raise TypeError('already used')
+        self.name = name
+
+    def __get__(self, obj, cls):
+        if obj is not None:
+            raise AttributeError(self.name)
+        # called on the class
+        return self.getter(None, cls)
+
+
+class _NTBase:
+
+    __slots__ = ()
+
+    @classonly
+    def from_raw(cls, raw):
+        if not raw:
+            return None
+        elif isinstance(raw, cls):
+            return raw
+        elif isinstance(raw, str):
+            return cls.from_string(raw)
+        else:
+            if hasattr(raw, 'items'):
+                return cls(**raw)
+            try:
+                args = tuple(raw)
+            except TypeError:
+                pass
+            else:
+                return cls(*args)
+        raise NotImplementedError
+
+    @classonly
+    def from_string(cls, value):
+        """Return a new instance based on the given string."""
+        raise NotImplementedError
+
+    @classmethod
+    def _make(cls, iterable):  # The default _make() is not subclass-friendly.
+        return cls.__new__(cls, *iterable)
+
+    # XXX Always validate?
+    #def __init__(self, *args, **kwargs):
+    #    self.validate()
+
+    # XXX The default __repr__() is not subclass-friendly (where the name changes).
+    #def __repr__(self):
+    #    _, _, sig = super().__repr__().partition('(')
+    #    return f'{self.__class__.__name__}({sig}'
+
+    # To make sorting work with None:
+    def __lt__(self, other):
+        try:
+            return super().__lt__(other)
+        except TypeError:
+            if None in self:
+                return True
+            elif None in other:
+                return False
+            else:
+                raise
+
+    def validate(self):
+        return
+
+    # XXX Always validate?
+    #def _replace(self, **kwargs):
+    #    obj = super()._replace(**kwargs)
+    #    obj.validate()
+    #    return obj
--- a/Tools/c-analyzer/c_globals/README
+++ b/Tools/c-analyzer/c_globals/README
@ -0,0 +1,72 @@
+#######################################
+# C Globals and CPython Runtime State.
+
+CPython's C code makes extensive use of global variables (whether static
+globals or static locals).  Each such variable falls into one of several
+categories:
+
+* strictly const data
+* used exclusively in main or in the REPL
+* process-global state (e.g. managing process-level resources
+  like signals and file descriptors)
+* Python "global" runtime state
+* per-interpreter runtime state
+
+The last one can be a problem as soon as anyone creates a second
+interpreter (AKA "subinterpreter") in a process.  It is definitely a
+problem under subinterpreters if they are no longer sharing the GIL,
+since the GIL protects us from a lot of race conditions.  Keep in mind
+that ultimately *all* objects (PyObject) should be treated as
+per-interpreter state.  This includes "static types", freelists,
+_PyIdentifier, and singletons.  Take that in for a second.  It has
+significant implications on where we use static variables!
+
+Be aware that module-global state (stored in C statics) is a kind of
+per-interpreter state.  There have been efforts across many years, and
+still going, to provide extension module authors mechanisms to store
+that state safely (see PEPs 3121, 489, etc.).
+
+(Note that there has been discussion around support for running multiple
+Python runtimes in the same process.  That would ends up with the same
+problems, relative to static variables, that subinterpreters have.)
+
+Historically we have been bad at keeping per-interpreter state out of
+static variables, mostly because until recently subinterpreters were
+not widely used nor even factored in to solutions.  However, the
+feature is growing in popularity and use in the community.
+
+Mandate: "Eliminate use of static variables for per-interpreter state."
+
+The "c-statics.py" script in this directory, along with its accompanying
+data files, are part of the effort to resolve existing problems with
+our use of static variables and to prevent future problems.
+
+#-------------------------
+## statics for actually-global state (and runtime state consolidation)
+
+In general, holding any kind of state in static variables
+increases maintenance burden and increases the complexity of code (e.g.
+we use TSS to identify the active thread state).  So it is a good idea
+to avoid using statics for state even if for the "global" runtime or
+for process-global state.
+
+Relative to maintenance burden, one problem is where the runtime
+state is spread throughout the codebase in dozens of individual
+globals.  Unlike the other globals, the runtime state represents a set
+of values that are constantly shifting in a complex way.  When they are
+spread out it's harder to get a clear picture of what the runtime
+involves.  Furthermore, when they are spread out it complicates efforts
+that change the runtime.
+
+Consequently, the globals for Python's runtime state have been
+consolidated under a single top-level _PyRuntime global. No new globals
+should be added for runtime state.  Instead, they should be added to
+_PyRuntimeState or one of its sub-structs.  The tools in this directory
+are run as part of the test suite to ensure that no new globals have
+been added.  The script can be run manually as well:
+
+  ./python Lib/test/test_c_statics/c-statics.py check
+
+If it reports any globals then they should be resolved.  If the globals
+are runtime state then they should be folded into _PyRuntimeState.
+Otherwise they should be marked as ignored.
--- a/Tools/c-analyzer/c_globals/init.py
+++ b/Tools/c-analyzer/c_globals/init.py
--- a/Tools/c-analyzer/c_globals/main.py
+++ b/Tools/c-analyzer/c_globals/main.py
@ -0,0 +1,209 @@
+import argparse
+import os.path
+import re
+import sys
+
+from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
+from c_analyzer_common.info import UNKNOWN
+from c_analyzer_common.known import (
+    from_file as known_from_file,
+    DATA_FILE as KNOWN_FILE,
+    )
+from . import find, show
+from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
+
+
+def _match_unused_global(variable, knownvars, used):
+    found = []
+    for varid in knownvars:
+        if varid in used:
+            continue
+        if varid.funcname is not None:
+            continue
+        if varid.name != variable.name:
+            continue
+        if variable.filename and variable.filename != UNKNOWN:
+            if variable.filename == varid.filename:
+                found.append(varid)
+        else:
+            found.append(varid)
+    return found
+
+
+def _check_results(unknown, knownvars, used):
+    badknown = set()
+    for variable in sorted(unknown):
+        msg = None
+        if variable.funcname != UNKNOWN:
+            msg = f'could not find global symbol {variable.id}'
+        elif m := _match_unused_global(variable, knownvars, used):
+            assert isinstance(m, list)
+            badknown.update(m)
+        elif variable.name in ('completed', 'id'):  # XXX Figure out where these variables are.
+            unknown.remove(variable)
+        else:
+            msg = f'could not find local symbol {variable.id}'
+        if msg:
+            #raise Exception(msg)
+            print(msg)
+    if badknown:
+        print('---')
+        print(f'{len(badknown)} globals in known.tsv, but may actually be local:')
+        for varid in sorted(badknown):
+            print(f'{varid.filename:30} {varid.name}')
+    unused = sorted(varid
+                    for varid in set(knownvars) - used
+                    if varid.name != 'id')  # XXX Figure out where these variables are.
+    if unused:
+        print('---')
+        print(f'did not use {len(unused)} known vars:')
+        for varid in unused:
+            print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}')
+        raise Exception('not all known symbols used')
+    if unknown:
+        print('---')
+        raise Exception('could not find all symbols')
+
+
+def _find_globals(dirnames, known, ignored):
+    if dirnames == SOURCE_DIRS:
+        dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
+
+    ignored = ignored_from_file(ignored)
+    known = known_from_file(known)
+
+    used = set()
+    unknown = set()
+    knownvars = (known or {}).get('variables')
+    for variable in find.globals_from_binary(knownvars=knownvars,
+                                             dirnames=dirnames):
+    #for variable in find.globals(dirnames, known, kind='platform'):
+        if variable.vartype == UNKNOWN:
+            unknown.add(variable)
+            continue
+        yield variable, is_supported(variable, ignored, known)
+        used.add(variable.id)
+
+    #_check_results(unknown, knownvars, used)
+
+
+def cmd_check(cmd, dirs=SOURCE_DIRS, *,
+              ignored=IGNORED_FILE,
+              known=KNOWN_FILE,
+              _find=_find_globals,
+              _show=show.basic,
+              _print=print,
+              ):
+    """
+    Fail if there are unsupported globals variables.
+
+    In the failure case, the list of unsupported variables
+    will be printed out.
+    """
+    unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
+    if not unsupported:
+        #_print('okay')
+        return
+
+    _print('ERROR: found unsupported global variables')
+    _print()
+    _show(sorted(unsupported))
+    _print(f' ({len(unsupported)} total)')
+    sys.exit(1)
+
+
+def cmd_show(cmd, dirs=SOURCE_DIRS, *,
+             ignored=IGNORED_FILE,
+             known=KNOWN_FILE,
+             skip_objects=False,
+              _find=_find_globals,
+             _show=show.basic,
+             _print=print,
+             ):
+    """
+    Print out the list of found global variables.
+
+    The variables will be distinguished as "supported" or "unsupported".
+    """
+    allsupported = []
+    allunsupported = []
+    for found, supported in _find(dirs, known, ignored):
+        if skip_objects:  # XXX Support proper filters instead.
+            if _is_object(found.vartype):
+                continue
+        (allsupported if supported else allunsupported
+         ).append(found)
+
+    _print('supported:')
+    _print('----------')
+    _show(sorted(allsupported))
+    _print(f' ({len(allsupported)} total)')
+    _print()
+    _print('unsupported:')
+    _print('------------')
+    _show(sorted(allunsupported))
+    _print(f' ({len(allunsupported)} total)')
+
+
+#############################
+# the script
+
+COMMANDS = {
+        'check': cmd_check,
+        'show': cmd_show,
+        }
+
+PROG = sys.argv[0]
+PROG = 'c-globals.py'
+
+
+def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
+    common = argparse.ArgumentParser(add_help=False)
+    common.add_argument('--ignored', metavar='FILE',
+                        default=IGNORED_FILE,
+                        help='path to file that lists ignored vars')
+    common.add_argument('--known', metavar='FILE',
+                        default=KNOWN_FILE,
+                        help='path to file that lists known types')
+    common.add_argument('dirs', metavar='DIR', nargs='*',
+                        default=SOURCE_DIRS,
+                        help='a directory to check')
+
+    parser = argparse.ArgumentParser(
+            prog=prog,
+            )
+    subs = parser.add_subparsers(dest='cmd')
+
+    check = subs.add_parser('check', parents=[common])
+
+    show = subs.add_parser('show', parents=[common])
+    show.add_argument('--skip-objects', action='store_true')
+
+    if _fail is None:
+        def _fail(msg):
+            parser.error(msg)
+
+    # Now parse the args.
+    args = parser.parse_args(argv)
+    ns = vars(args)
+
+    cmd = ns.pop('cmd')
+    if not cmd:
+        _fail('missing command')
+
+    return cmd, ns
+
+
+def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS):
+    try:
+        cmdfunc = _COMMANDS[cmd]
+    except KeyError:
+        raise ValueError(
+            f'unsupported cmd {cmd!r}' if cmd else 'missing cmd')
+
+    cmdfunc(cmd, **cmdkwargs or {})
+
+
+if __name__ == '__main__':
+    cmd, cmdkwargs = parse_args()
+    main(cmd, cmdkwargs)
--- a/Tools/c-analyzer/c_globals/find.py
+++ b/Tools/c-analyzer/c_globals/find.py
@ -0,0 +1,95 @@
+from c_analyzer_common import SOURCE_DIRS
+from c_analyzer_common.info import UNKNOWN
+from c_symbols import (
+        info as s_info,
+        binary as b_symbols,
+        source as s_symbols,
+        resolve,
+        )
+from c_parser import info, declarations
+
+
+# XXX needs tests:
+# * iter_variables
+
+def globals_from_binary(binfile=b_symbols.PYTHON, *,
+                        knownvars=None,
+                        dirnames=None,
+                        _iter_symbols=b_symbols.iter_symbols,
+                        _resolve=resolve.symbols_to_variables,
+                        _get_symbol_resolver=resolve.get_resolver,
+                        ):
+    """Yield a Variable for each found Symbol.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    symbols = _iter_symbols(binfile, find_local_symbol=None)
+    #symbols = list(symbols)
+    for variable in _resolve(symbols,
+                             resolve=_get_symbol_resolver(knownvars, dirnames),
+                             ):
+        # Skip each non-global variable (unless we couldn't find it).
+        # XXX Drop the "UNKNOWN" condition?
+        if not variable.isglobal and variable.vartype != UNKNOWN:
+            continue
+        yield variable
+
+
+def globals_from_declarations(dirnames=SOURCE_DIRS, *,
+                              known=None,
+                              ):
+    """Yield a Variable for each found declaration.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    raise NotImplementedError
+
+
+def iter_variables(kind='platform', *,
+                   known=None,
+                   dirnames=None,
+                   _resolve_symbols=resolve.symbols_to_variables,
+                   _get_symbol_resolver=resolve.get_resolver,
+                   _symbols_from_binary=b_symbols.iter_symbols,
+                   _symbols_from_source=s_symbols.iter_symbols,
+                   _iter_raw=declarations.iter_all,
+                   _iter_preprocessed=declarations.iter_preprocessed,
+                   ):
+    """Yield a Variable for each one found (e.g. in files)."""
+    kind = kind or 'platform'
+
+    if kind == 'symbols':
+        knownvars = (known or {}).get('variables')
+        yield from _resolve_symbols(
+                _symbols_from_source(dirnames, known),
+                resolve=_get_symbol_resolver(knownvars, dirnames),
+                )
+    elif kind == 'platform':
+        knownvars = (known or {}).get('variables')
+        yield from _resolve_symbols(
+                _symbols_from_binary(find_local_symbol=None),
+                resolve=_get_symbol_resolver(knownvars, dirnames),
+                )
+    elif kind == 'declarations':
+        for decl in _iter_raw(dirnames):
+            if not isinstance(decl, info.Variable):
+                continue
+            yield decl
+    elif kind == 'preprocessed':
+        for decl in _iter_preprocessed(dirnames):
+            if not isinstance(decl, info.Variable):
+                continue
+            yield decl
+    else:
+        raise ValueError(f'unsupported kind {kind!r}')
+
+
+def globals(dirnames, known, *,
+            kind=None,  # Use the default.
+            _iter_variables=iter_variables,
+            ):
+    """Return a list of (StaticVar, <supported>) for each found global var."""
+    for found in _iter_variables(kind, known=known, dirnames=dirnames):
+        if not found.isglobal:
+            continue
+        yield found
--- a/Tools/c-analyzer/c_globals/show.py
+++ b/Tools/c-analyzer/c_globals/show.py
@ -0,0 +1,16 @@
+
+def basic(globals, *,
+          _print=print):
+    """Print each row simply."""
+    for variable in globals:
+        if variable.funcname:
+            line = f'{variable.filename}:{variable.funcname}():{variable.name}'
+        else:
+            line = f'{variable.filename}:{variable.name}'
+        vartype = variable.vartype
+        #if vartype.startswith('static '):
+        #    vartype = vartype.partition(' ')[2]
+        #else:
+        #    vartype = '=' + vartype
+        line = f'{line:<64} {vartype}'
+        _print(line)
--- a/Tools/c-analyzer/c_globals/supported.py
+++ b/Tools/c-analyzer/c_globals/supported.py
@ -0,0 +1,368 @@
+import os.path
+import re
+
+from c_analyzer_common import DATA_DIR
+from c_analyzer_common.info import ID
+from c_analyzer_common.util import read_tsv, write_tsv
+
+
+IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
+
+IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
+IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
+
+# XXX Move these to ignored.tsv.
+IGNORED = {
+        # global
+        'PyImport_FrozenModules': 'process-global',
+        'M___hello__': 'process-global',
+        'inittab_copy': 'process-global',
+        'PyHash_Func': 'process-global',
+        '_Py_HashSecret_Initialized': 'process-global',
+        '_TARGET_LOCALES': 'process-global',
+
+        # startup (only changed before/during)
+        '_PyRuntime': 'runtime startup',
+        'runtime_initialized': 'runtime startup',
+        'static_arg_parsers': 'runtime startup',
+        'orig_argv': 'runtime startup',
+        'opt_ptr': 'runtime startup',
+        '_preinit_warnoptions': 'runtime startup',
+        '_Py_StandardStreamEncoding': 'runtime startup',
+        'Py_FileSystemDefaultEncoding': 'runtime startup',
+        '_Py_StandardStreamErrors': 'runtime startup',
+        'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
+        'Py_BytesWarningFlag': 'runtime startup',
+        'Py_DebugFlag': 'runtime startup',
+        'Py_DontWriteBytecodeFlag': 'runtime startup',
+        'Py_FrozenFlag': 'runtime startup',
+        'Py_HashRandomizationFlag': 'runtime startup',
+        'Py_IgnoreEnvironmentFlag': 'runtime startup',
+        'Py_InspectFlag': 'runtime startup',
+        'Py_InteractiveFlag': 'runtime startup',
+        'Py_IsolatedFlag': 'runtime startup',
+        'Py_NoSiteFlag': 'runtime startup',
+        'Py_NoUserSiteDirectory': 'runtime startup',
+        'Py_OptimizeFlag': 'runtime startup',
+        'Py_QuietFlag': 'runtime startup',
+        'Py_UTF8Mode': 'runtime startup',
+        'Py_UnbufferedStdioFlag': 'runtime startup',
+        'Py_VerboseFlag': 'runtime startup',
+        '_Py_path_config': 'runtime startup',
+        '_PyOS_optarg': 'runtime startup',
+        '_PyOS_opterr': 'runtime startup',
+        '_PyOS_optind': 'runtime startup',
+        '_Py_HashSecret': 'runtime startup',
+
+        # REPL
+        '_PyOS_ReadlineLock': 'repl',
+        '_PyOS_ReadlineTState': 'repl',
+
+        # effectively const
+        'tracemalloc_empty_traceback': 'const',
+        '_empty_bitmap_node': 'const',
+        'posix_constants_pathconf': 'const',
+        'posix_constants_confstr': 'const',
+        'posix_constants_sysconf': 'const',
+        '_PySys_ImplCacheTag': 'const',
+        '_PySys_ImplName': 'const',
+        'PyImport_Inittab': 'const',
+        '_PyImport_DynLoadFiletab': 'const',
+        '_PyParser_Grammar': 'const',
+        'Py_hexdigits': 'const',
+        '_PyImport_Inittab': 'const',
+        '_PyByteArray_empty_string': 'const',
+        '_PyLong_DigitValue': 'const',
+        '_Py_SwappedOp': 'const',
+        'PyStructSequence_UnnamedField': 'const',
+
+        # signals are main-thread only
+        'faulthandler_handlers': 'signals are main-thread only',
+        'user_signals': 'signals are main-thread only',
+        'wakeup': 'signals are main-thread only',
+
+        # hacks
+        '_PySet_Dummy': 'only used as a placeholder',
+        }
+
+BENIGN = 'races here are benign and unlikely'
+
+
+def is_supported(variable, ignored=None, known=None, *,
+                 _ignored=(lambda *a, **k: _is_ignored(*a, **k)),
+                 _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
+                 ):
+    """Return True if the given global variable is okay in CPython."""
+    if _ignored(variable,
+                ignored and ignored.get('variables')):
+        return True
+    elif _vartype_okay(variable.vartype,
+                       ignored.get('types')):
+        return True
+    else:
+        return False
+
+
+def _is_ignored(variable, ignoredvars=None, *,
+                _IGNORED=IGNORED,
+                ):
+    """Return the reason if the variable is a supported global.
+
+    Return None if the variable is not a supported global.
+    """
+    if ignoredvars and (reason := ignoredvars.get(variable.id)):
+        return reason
+
+    if variable.funcname is None:
+        if reason := _IGNORED.get(variable.name):
+            return reason
+
+    # compiler
+    if variable.filename == 'Python/graminit.c':
+        if variable.vartype.startswith('static state '):
+            return 'compiler'
+    if variable.filename == 'Python/symtable.c':
+        if variable.vartype.startswith('static identifier '):
+            return 'compiler'
+    if variable.filename == 'Python/Python-ast.c':
+        # These should be const.
+        if variable.name.endswith('_field'):
+            return 'compiler'
+        if variable.name.endswith('_attribute'):
+            return 'compiler'
+
+    # other
+    if variable.filename == 'Python/dtoa.c':
+        # guarded by lock?
+        if variable.name in ('p5s', 'freelist'):
+            return 'dtoa is thread-safe?'
+        if variable.name in ('private_mem', 'pmem_next'):
+            return 'dtoa is thread-safe?'
+    if variable.filename == 'Python/thread.c':
+        # Threads do not become an issue until after these have been set
+        # and these never get changed after that.
+        if variable.name in ('initialized', 'thread_debug'):
+            return 'thread-safe'
+    if variable.filename == 'Python/getversion.c':
+        if variable.name == 'version':
+            # Races are benign here, as well as unlikely.
+            return BENIGN
+    if variable.filename == 'Python/fileutils.c':
+        if variable.name == 'force_ascii':
+            return BENIGN
+        if variable.name == 'ioctl_works':
+            return BENIGN
+        if variable.name == '_Py_open_cloexec_works':
+            return BENIGN
+    if variable.filename == 'Python/codecs.c':
+        if variable.name == 'ucnhash_CAPI':
+            return BENIGN
+    if variable.filename == 'Python/bootstrap_hash.c':
+        if variable.name == 'getrandom_works':
+            return BENIGN
+    if variable.filename == 'Objects/unicodeobject.c':
+        if variable.name == 'ucnhash_CAPI':
+            return BENIGN
+        if variable.name == 'bloom_linebreak':
+            # *mostly* benign
+            return BENIGN
+    if variable.filename == 'Modules/getbuildinfo.c':
+        if variable.name == 'buildinfo':
+            # The static is used for pre-allocation.
+            return BENIGN
+    if variable.filename == 'Modules/posixmodule.c':
+        if variable.name == 'ticks_per_second':
+            return BENIGN
+        if variable.name == 'dup3_works':
+            return BENIGN
+    if variable.filename == 'Modules/timemodule.c':
+        if variable.name == 'ticks_per_second':
+            return BENIGN
+    if variable.filename == 'Objects/longobject.c':
+        if variable.name == 'log_base_BASE':
+            return BENIGN
+        if variable.name == 'convwidth_base':
+            return BENIGN
+        if variable.name == 'convmultmax_base':
+            return BENIGN
+
+    return None
+
+
+def _is_vartype_okay(vartype, ignoredtypes=None):
+    if _is_object(vartype):
+        return None
+
+    if vartype.startswith('static const '):
+        return 'const'
+    if vartype.startswith('const '):
+        return 'const'
+
+    # components for TypeObject definitions
+    for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
+        if name in vartype:
+            return 'const'
+    for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
+                 'PyBufferProcs', 'PyAsyncMethods'):
+        if name in vartype:
+            return 'const'
+    for name in ('slotdef', 'newfunc'):
+        if name in vartype:
+            return 'const'
+
+    # structseq
+    for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
+        if name in vartype:
+            return 'const'
+
+    # other definiitions
+    if 'PyModuleDef' in vartype:
+        return 'const'
+
+    # thread-safe
+    if '_Py_atomic_int' in vartype:
+        return 'thread-safe'
+    if 'pthread_condattr_t' in vartype:
+        return 'thread-safe'
+
+    # startup
+    if '_Py_PreInitEntry' in vartype:
+        return 'startup'
+
+    # global
+#    if 'PyMemAllocatorEx' in vartype:
+#        return True
+
+    # others
+#    if 'PyThread_type_lock' in vartype:
+#        return True
+
+    # XXX ???
+    # _Py_tss_t
+    # _Py_hashtable_t
+    # stack_t
+    # _PyUnicode_Name_CAPI
+
+    # functions
+    if '(' in vartype and '[' not in vartype:
+        return 'function pointer'
+
+    # XXX finish!
+    # * allow const values?
+    #raise NotImplementedError
+    return None
+
+
+def _is_object(vartype):
+    if re.match(r'.*\bPy\w*Object\b', vartype):
+        return True
+    if '_PyArg_Parser ' in vartype:
+        return True
+    if vartype.startswith(('_Py_IDENTIFIER(', 'static _Py_Identifier',
+                           '_Py_static_string(')):
+        return True
+    if 'traceback_t' in vartype:
+        return True
+    if 'PyAsyncGenASend' in vartype:
+        return True
+    if '_PyAsyncGenWrappedValue' in vartype:
+        return True
+    if 'PyContext' in vartype:
+        return True
+    if 'method_cache_entry' in vartype:
+        return True
+    if vartype.startswith('static identifier '):
+        return True
+    if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
+        return True
+
+    # XXX Add more?
+
+    #for part in vartype.split():
+    #    # XXX const is automatic True?
+    #    if part == 'PyObject' or part.startswith('PyObject['):
+    #        return True
+    return False
+
+
+def ignored_from_file(infile, *,
+                      _read_tsv=read_tsv,
+                      ):
+    """Yield a Variable for each ignored var in the file."""
+    ignored = {
+        'variables': {},
+        #'types': {},
+        #'constants': {},
+        #'macros': {},
+        }
+    for row in _read_tsv(infile, IGNORED_HEADER):
+        filename, funcname, name, kind, reason = row
+        if not funcname or funcname == '-':
+            funcname = None
+        id = ID(filename, funcname, name)
+        if kind == 'variable':
+            values = ignored['variables']
+        else:
+            raise ValueError(f'unsupported kind in row {row}')
+        values[id] = reason
+    return ignored
+
+
+##################################
+# generate
+
+def _get_row(varid, reason):
+    return (
+            varid.filename,
+            varid.funcname or '-',
+            varid.name,
+            'variable',
+            str(reason),
+            )
+
+
+def _get_rows(variables, ignored=None, *,
+              _as_row=_get_row,
+              _is_ignored=_is_ignored,
+              _vartype_okay=_is_vartype_okay,
+              ):
+    count = 0
+    for variable in variables:
+        reason = _is_ignored(variable,
+                             ignored and ignored.get('variables'),
+                             )
+        if not reason:
+            reason = _vartype_okay(variable.vartype,
+                                   ignored and ignored.get('types'))
+        if not reason:
+            continue
+
+        print(' ', variable, repr(reason))
+        yield _as_row(variable.id, reason)
+        count += 1
+    print(f'total: {count}')
+
+
+def _generate_ignored_file(variables, filename=None, *,
+                           _generate_rows=_get_rows,
+                           _write_tsv=write_tsv,
+                           ):
+    if not filename:
+        filename = IGNORED_FILE + '.new'
+    rows = _generate_rows(variables)
+    _write_tsv(filename, IGNORED_HEADER, rows)
+
+
+if __name__ == '__main__':
+    from c_analyzer_common import SOURCE_DIRS
+    from c_analyzer_common.known import (
+        from_file as known_from_file,
+        DATA_FILE as KNOWN_FILE,
+        )
+    from . import find
+    known = known_from_file(KNOWN_FILE)
+    knownvars = (known or {}).get('variables')
+    variables = find.globals_from_binary(knownvars=knownvars,
+                                         dirnames=SOURCE_DIRS)
+
+    _generate_ignored_file(variables)
--- a/Tools/c-analyzer/c_parser/init.py
+++ b/Tools/c-analyzer/c_parser/init.py
--- a/Tools/c-analyzer/c_parser/declarations.py
+++ b/Tools/c-analyzer/c_parser/declarations.py
@ -0,0 +1,295 @@
+import re
+import shlex
+import subprocess
+
+from . import source
+
+
+IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
+
+TYPE_QUAL = r'(?:const|volatile)'
+
+VAR_TYPE_SPEC = r'''(?:
+        void |
+        (?:
+         (?:(?:un)?signed\s+)?
+         (?:
+          char |
+          short |
+          int |
+          long |
+          long\s+int |
+          long\s+long
+          ) |
+         ) |
+        float |
+        double |
+        {IDENTIFIER} |
+        (?:struct|union)\s+{IDENTIFIER}
+        )'''
+
+POINTER = rf'''(?:
+        (?:\s+const)?\s*[*]
+        )'''
+
+#STRUCT = r'''(?:
+#        (?:struct|(struct\s+%s))\s*[{]
+#            [^}]*
+#        [}]
+#        )''' % (IDENTIFIER)
+#UNION = r'''(?:
+#        (?:union|(union\s+%s))\s*[{]
+#            [^}]*
+#        [}]
+#        )''' % (IDENTIFIER)
+#DECL_SPEC = rf'''(?:
+#        ({VAR_TYPE_SPEC}) |
+#        ({STRUCT}) |
+#        ({UNION})
+#        )'''
+
+FUNC_START = rf'''(?:
+        (?:
+          (?:
+            extern |
+            static |
+            static\s+inline
+           )\s+
+         )?
+        #(?:const\s+)?
+        {VAR_TYPE_SPEC}
+        )'''
+#GLOBAL_VAR_START = rf'''(?:
+#        (?:
+#          (?:
+#            extern |
+#            static
+#           )\s+
+#         )?
+#        (?:
+#           {TYPE_QUAL}
+#           (?:\s+{TYPE_QUAL})?
+#         )?\s+
+#        {VAR_TYPE_SPEC}
+#        )'''
+GLOBAL_DECL_START_RE = re.compile(rf'''
+        ^
+        (?:
+            ({FUNC_START})
+         )
+        ''', re.VERBOSE)
+
+LOCAL_VAR_START = rf'''(?:
+        (?:
+          (?:
+            register |
+            static
+           )\s+
+         )?
+        (?:
+          (?:
+            {TYPE_QUAL}
+            (?:\s+{TYPE_QUAL})?
+           )\s+
+         )?
+        {VAR_TYPE_SPEC}
+        {POINTER}?
+        )'''
+LOCAL_STMT_START_RE = re.compile(rf'''
+        ^
+        (?:
+            ({LOCAL_VAR_START})
+         )
+        ''', re.VERBOSE)
+
+
+def iter_global_declarations(lines):
+    """Yield (decl, body) for each global declaration in the given lines.
+
+    For function definitions the header is reduced to one line and
+    the body is provided as-is.  For other compound declarations (e.g.
+    struct) the entire declaration is reduced to one line and "body"
+    is None.  Likewise for simple declarations (e.g. variables).
+
+    Declarations inside function bodies are ignored, though their text
+    is provided in the function body.
+    """
+    # XXX Bail out upon bogus syntax.
+    lines = source.iter_clean_lines(lines)
+    for line in lines:
+        if not GLOBAL_DECL_START_RE.match(line):
+            continue
+        # We only need functions here, since we only need locals for now.
+        if line.endswith(';'):
+            continue
+        if line.endswith('{') and '(' not in line:
+            continue
+
+        # Capture the function.
+        # (assume no func is a one-liner)
+        decl = line
+        while '{' not in line:  # assume no inline structs, etc.
+            try:
+                line = next(lines)
+            except StopIteration:
+                return
+            decl += ' ' + line
+
+        body, end = _extract_block(lines)
+        if end is None:
+            return
+        assert end == '}'
+        yield (f'{decl}\n{body}\n{end}', body)
+
+
+def iter_local_statements(lines):
+    """Yield (lines, blocks) for each statement in the given lines.
+
+    For simple statements, "blocks" is None and the statement is reduced
+    to a single line.  For compound statements, "blocks" is a pair of
+    (header, body) for each block in the statement.  The headers are
+    reduced to a single line each, but the bpdies are provided as-is.
+    """
+    # XXX Bail out upon bogus syntax.
+    lines = source.iter_clean_lines(lines)
+    for line in lines:
+        if not LOCAL_STMT_START_RE.match(line):
+            continue
+
+        stmt = line
+        blocks = None
+        if not line.endswith(';'):
+            # XXX Support compound & multiline simple statements.
+            #blocks = []
+            continue
+
+        yield (stmt, blocks)
+
+
+def _extract_block(lines):
+    end = None
+    depth = 1
+    body = []
+    for line in lines:
+        depth += line.count('{') - line.count('}')
+        if depth == 0:
+            end = line
+            break
+        body.append(line)
+    return '\n'.join(body), end
+
+
+def parse_func(stmt, body):
+    """Return (name, signature) for the given function definition."""
+    header, _, end = stmt.partition(body)
+    assert end.strip() == '}'
+    assert header.strip().endswith('{')
+    header, _, _= header.rpartition('{')
+
+    signature = ' '.join(header.strip().splitlines())
+
+    _, _, name = signature.split('(')[0].strip().rpartition(' ')
+    assert name
+
+    return name, signature
+
+
+def parse_var(stmt):
+    """Return (name, vartype) for the given variable declaration."""
+    stmt = stmt.rstrip(';')
+    m = LOCAL_STMT_START_RE.match(stmt)
+    assert m
+    vartype = m.group(0)
+    name = stmt[len(vartype):].partition('=')[0].strip()
+
+    if name.startswith('('):
+        name, _, after = name[1:].partition(')')
+        assert after
+        name = name.replace('*', '* ')
+        inside, _, name = name.strip().rpartition(' ')
+        vartype = f'{vartype} ({inside.strip()}){after}'
+    else:
+        name = name.replace('*', '* ')
+        before, _, name = name.rpartition(' ')
+        vartype = f'{vartype} {before}'
+
+    vartype = vartype.strip()
+    while '  ' in vartype:
+        vartype = vartype.replace('  ', ' ')
+
+    return name, vartype
+
+
+def parse_compound(stmt, blocks):
+    """Return (headers, bodies) for the given compound statement."""
+    # XXX Identify declarations inside compound statements
+    # (if/switch/for/while).
+    raise NotImplementedError
+
+
+def iter_variables(filename, *,
+                   _iter_source_lines=source.iter_lines,
+                   _iter_global=iter_global_declarations,
+                   _iter_local=iter_local_statements,
+                   _parse_func=parse_func,
+                   _parse_var=parse_var,
+                   _parse_compound=parse_compound,
+                   ):
+    """Yield (funcname, name, vartype) for every variable in the given file."""
+    lines = _iter_source_lines(filename)
+    for stmt, body in _iter_global(lines):
+        # At the file top-level we only have to worry about vars & funcs.
+        if not body:
+            name, vartype = _parse_var(stmt)
+            if name:
+                yield (None, name, vartype)
+        else:
+            funcname, _ = _parse_func(stmt, body)
+            localvars = _iter_locals(body,
+                                     _iter_statements=_iter_local,
+                                     _parse_var=_parse_var,
+                                     _parse_compound=_parse_compound,
+                                     )
+            for name, vartype in localvars:
+                yield (funcname, name, vartype)
+
+
+def _iter_locals(lines, *,
+                 _iter_statements=iter_local_statements,
+                 _parse_var=parse_var,
+                 _parse_compound=parse_compound,
+                 ):
+    compound = [lines]
+    while compound:
+        body = compound.pop(0)
+        bodylines = body.splitlines()
+        for stmt, blocks in _iter_statements(bodylines):
+            if not blocks:
+                name, vartype = _parse_var(stmt)
+                if name:
+                    yield (name, vartype)
+            else:
+                headers, bodies = _parse_compound(stmt, blocks)
+                for header in headers:
+                    for line in header:
+                        name, vartype = _parse_var(line)
+                        if name:
+                            yield (name, vartype)
+                compound.extend(bodies)
+
+
+def iter_all(dirnames):
+    """Yield a Declaration for each one found.
+
+    If there are duplicates, due to preprocessor conditionals, then
+    they are checked to make sure they are the same.
+    """
+    raise NotImplementedError
+
+
+def iter_preprocessed(dirnames):
+    """Yield a Declaration for each one found.
+
+    All source files are run through the preprocessor first.
+    """
+    raise NotImplementedError
--- a/Tools/c-analyzer/c_parser/info.py
+++ b/Tools/c-analyzer/c_parser/info.py
@ -0,0 +1,78 @@
+from collections import namedtuple
+
+from c_analyzer_common import info, util
+from c_analyzer_common.util import classonly, _NTBase
+
+
+def normalize_vartype(vartype):
+    """Return the canonical form for a variable type (or func signature)."""
+    # We allow empty strring through for semantic reasons.
+    if vartype is None:
+        return None
+
+    # XXX finish!
+    # XXX Return (modifiers, type, pointer)?
+    return str(vartype)
+
+
+class Variable(_NTBase,
+               namedtuple('Variable', 'id vartype')):
+    """Information about a single variable declaration."""
+
+    __slots__ = ()
+    _isglobal = util.Slot()
+
+    @classonly
+    def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
+        id = info.ID(filename, funcname, name)
+        self = cls(id, vartype)
+        if isglobal:
+            self._isglobal = True
+        return self
+
+    def __new__(cls, id, vartype):
+        self = super().__new__(
+                cls,
+                id=info.ID.from_raw(id),
+                vartype=normalize_vartype(vartype) if vartype else None,
+                )
+        return self
+
+    def __hash__(self):
+        return hash(self.id)
+
+    def __getattr__(self, name):
+        return getattr(self.id, name)
+
+    def _validate_id(self):
+        if not self.id:
+            raise TypeError('missing id')
+
+        if not self.filename or self.filename == info.UNKNOWN:
+            raise TypeError(f'id missing filename ({self.id})')
+
+        if self.funcname and self.funcname == info.UNKNOWN:
+            raise TypeError(f'id missing funcname ({self.id})')
+
+        self.id.validate()
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        self._validate_id()
+
+        if self.vartype is None or self.vartype == info.UNKNOWN:
+            raise TypeError('missing vartype')
+
+    @property
+    def isglobal(self):
+        try:
+            return self._isglobal
+        except AttributeError:
+            # XXX Include extern variables.
+            # XXX Ignore functions.
+            self._isglobal = ('static' in self.vartype.split())
+            return self._isglobal
+
+    @property
+    def isconst(self):
+        return 'const' in self.vartype.split()
--- a/Tools/c-analyzer/c_parser/naive.py
+++ b/Tools/c-analyzer/c_parser/naive.py
@ -0,0 +1,180 @@
+import re
+
+from c_analyzer_common.info import UNKNOWN
+
+from .info import Variable
+from .preprocessor import _iter_clean_lines
+
+
+_NOT_SET = object()
+
+
+def get_srclines(filename, *,
+                 cache=None,
+                 _open=open,
+                 _iter_lines=_iter_clean_lines,
+                 ):
+    """Return the file's lines as a list.
+
+    Each line will have trailing whitespace removed (including newline).
+
+    If a cache is given the it is used.
+    """
+    if cache is not None:
+        try:
+            return cache[filename]
+        except KeyError:
+            pass
+
+    with _open(filename) as srcfile:
+        srclines = [line
+                    for _, line in _iter_lines(srcfile)
+                    if not line.startswith('#')]
+    for i, line in enumerate(srclines):
+        srclines[i] = line.rstrip()
+
+    if cache is not None:
+        cache[filename] = srclines
+    return srclines
+
+
+def parse_variable_declaration(srcline):
+    """Return (name, decl) for the given declaration line."""
+    # XXX possible false negatives...
+    decl, sep, _ = srcline.partition('=')
+    if not sep:
+        if not srcline.endswith(';'):
+            return None, None
+        decl = decl.strip(';')
+    decl = decl.strip()
+    m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
+    if not m:
+        return None, None
+    name = m.group(1)
+    return name, decl
+
+
+def parse_variable(srcline, funcname=None):
+    """Return a Variable for the variable declared on the line (or None)."""
+    line = srcline.strip()
+
+    # XXX Handle more than just static variables.
+    if line.startswith('static '):
+        if '(' in line and '[' not in line:
+            # a function
+            return None, None
+        return parse_variable_declaration(line)
+    else:
+        return None, None
+
+
+def iter_variables(filename, *,
+                   srccache=None,
+                   parse_variable=None,
+                   _get_srclines=get_srclines,
+                   _default_parse_variable=parse_variable,
+                   ):
+    """Yield a Variable for each in the given source file."""
+    if parse_variable is None:
+        parse_variable = _default_parse_variable
+
+    indent = ''
+    prev = ''
+    funcname = None
+    for line in _get_srclines(filename, cache=srccache):
+        # remember current funcname
+        if funcname:
+            if line == indent + '}':
+                funcname = None
+                continue
+        else:
+            if '(' in prev and line == indent + '{':
+                if not prev.startswith('__attribute__'):
+                    funcname = prev.split('(')[0].split()[-1]
+                    prev = ''
+                    continue
+            indent = line[:-len(line.lstrip())]
+            prev = line
+
+        info = parse_variable(line, funcname)
+        if isinstance(info, list):
+            for name, _funcname, decl in info:
+                yield Variable.from_parts(filename, _funcname, name, decl)
+            continue
+        name, decl = info
+
+        if name is None:
+            continue
+        yield Variable.from_parts(filename, funcname, name, decl)
+
+
+def _match_varid(variable, name, funcname, ignored=None):
+    if ignored and variable in ignored:
+        return False
+
+    if variable.name != name:
+        return False
+
+    if funcname == UNKNOWN:
+        if not variable.funcname:
+            return False
+    elif variable.funcname != funcname:
+        return False
+
+    return True
+
+
+def find_variable(filename, funcname, name, *,
+                  ignored=None,
+                  srccache=None,  # {filename: lines}
+                  parse_variable=None,
+                  _iter_variables=iter_variables,
+                  ):
+    """Return the matching variable.
+
+    Return None if the variable is not found.
+    """
+    for variable in _iter_variables(filename,
+                                    srccache=srccache,
+                                    parse_variable=parse_variable,
+                                    ):
+        if _match_varid(variable, name, funcname, ignored):
+            return variable
+    else:
+        return None
+
+
+def find_variables(varids, filenames=None, *,
+                   srccache=_NOT_SET,
+                   parse_variable=None,
+                   _find_symbol=find_variable,
+                   ):
+    """Yield a Variable for each ID.
+
+    If the variable is not found then its decl will be UNKNOWN.  That
+    way there will be one resulting Variable per given ID.
+    """
+    if srccache is _NOT_SET:
+        srccache = {}
+
+    used = set()
+    for varid in varids:
+        if varid.filename and varid.filename != UNKNOWN:
+            srcfiles = [varid.filename]
+        else:
+            if not filenames:
+                yield Variable(varid, UNKNOWN)
+                continue
+            srcfiles = filenames
+        for filename in srcfiles:
+            found = _find_varid(filename, varid.funcname, varid.name,
+                                 ignored=used,
+                                 srccache=srccache,
+                                 parse_variable=parse_variable,
+                                 )
+            if found:
+                yield found
+                used.add(found)
+                break
+        else:
+            yield Variable(varid, UNKNOWN)
--- a/Tools/c-analyzer/c_parser/preprocessor.py
+++ b/Tools/c-analyzer/c_parser/preprocessor.py
@ -0,0 +1,512 @@
+from collections import namedtuple
+import shlex
+import os
+import re
+
+from c_analyzer_common import util
+from . import info
+
+
+CONTINUATION = '\\' + os.linesep
+
+IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
+IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
+
+
+def _coerce_str(value):
+    if not value:
+        return ''
+    return str(value).strip()
+
+
+#############################
+# directives
+
+DIRECTIVE_START = r'''
+    (?:
+      ^ \s*
+      [#] \s*
+      )'''
+DIRECTIVE_TEXT = r'''
+    (?:
+      (?: \s+ ( .*\S ) )?
+      \s* $
+      )'''
+DIRECTIVE = rf'''
+    (?:
+      {DIRECTIVE_START}
+      (
+        include |
+        error | warning |
+        pragma |
+        define | undef |
+        if | ifdef | ifndef | elseif | else | endif |
+        __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
+        )
+      {DIRECTIVE_TEXT}
+      )'''
+#       (?:
+#        [^\\\n] |
+#        \\ [^\n] |
+#        \\ \n
+#        )+
+#      ) \n
+#     )'''
+DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
+
+DEFINE = rf'''
+    (?:
+      {DIRECTIVE_START} define \s+
+      (?:
+        ( \w*[a-zA-Z]\w* )
+        (?: \s* [(] ([^)]*) [)] )?
+        )
+      {DIRECTIVE_TEXT}
+      )'''
+DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
+
+
+def parse_directive(line):
+    """Return the appropriate directive for the given line."""
+    line = line.strip()
+    if line.startswith('#'):
+        line = line[1:].lstrip()
+        line = '#' + line
+    directive = line
+    #directive = '#' + line
+    while '  ' in directive:
+        directive = directive.replace('  ', ' ')
+    return _parse_directive(directive)
+
+
+def _parse_directive(line):
+    m = DEFINE_RE.match(line)
+    if m:
+        name, args, text = m.groups()
+        if args:
+            args = [a.strip() for a in args.split(',')]
+            return Macro(name, args, text)
+        else:
+            return Constant(name, text)
+
+    m = DIRECTIVE_RE.match(line)
+    if not m:
+        raise ValueError(f'unsupported directive {line!r}')
+    kind, text = m.groups()
+    if not text:
+        if kind not in ('else', 'endif'):
+            raise ValueError(f'missing text in directive {line!r}')
+    elif kind in ('else', 'endif', 'define'):
+        raise ValueError(f'unexpected text in directive {line!r}')
+    if kind == 'include':
+        directive = Include(text)
+    elif kind in IfDirective.KINDS:
+        directive = IfDirective(kind, text)
+    else:
+        directive = OtherDirective(kind, text)
+    directive.validate()
+    return directive
+
+
+class PreprocessorDirective(util._NTBase):
+    """The base class for directives."""
+
+    __slots__ = ()
+
+    KINDS = frozenset([
+            'include',
+            'pragma',
+            'error', 'warning',
+            'define', 'undef',
+            'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
+            '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
+            ])
+
+    @property
+    def text(self):
+        return ' '.join(v for v in self[1:] if v and v.strip()) or None
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.kind:
+            raise TypeError('missing kind')
+        elif self.kind not in self.KINDS:
+            raise ValueError
+
+        # text can be anything, including None.
+
+
+class Constant(PreprocessorDirective,
+               namedtuple('Constant', 'kind name value')):
+    """A single "constant" directive ("define")."""
+
+    __slots__ = ()
+
+    def __new__(cls, name, value=None):
+        self = super().__new__(
+                cls,
+                'define',
+                name=_coerce_str(name) or None,
+                value=_coerce_str(value) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.name:
+            raise TypeError('missing name')
+        elif not IDENTIFIER_RE.match(self.name):
+            raise ValueError(f'name must be identifier, got {self.name!r}')
+
+        # value can be anything, including None
+
+
+class Macro(PreprocessorDirective,
+            namedtuple('Macro', 'kind name args body')):
+    """A single "macro" directive ("define")."""
+
+    __slots__ = ()
+
+    def __new__(cls, name, args, body=None):
+        # "args" must be a string or an iterable of strings (or "empty").
+        if isinstance(args, str):
+            args = [v.strip() for v in args.split(',')]
+        if args:
+            args = tuple(_coerce_str(a) or None for a in args)
+        self = super().__new__(
+                cls,
+                kind='define',
+                name=_coerce_str(name) or None,
+                args=args if args else (),
+                body=_coerce_str(body) or None,
+                )
+        return self
+
+    @property
+    def text(self):
+        if self.body:
+            return f'{self.name}({", ".join(self.args)}) {self.body}'
+        else:
+            return f'{self.name}({", ".join(self.args)})'
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.name:
+            raise TypeError('missing name')
+        elif not IDENTIFIER_RE.match(self.name):
+            raise ValueError(f'name must be identifier, got {self.name!r}')
+
+        for arg in self.args:
+            if not arg:
+                raise ValueError(f'missing arg in {self.args}')
+            elif not IDENTIFIER_RE.match(arg):
+                raise ValueError(f'arg must be identifier, got {arg!r}')
+
+        # body can be anything, including None
+
+
+class IfDirective(PreprocessorDirective,
+                  namedtuple('IfDirective', 'kind condition')):
+    """A single conditional directive (e.g. "if", "ifdef").
+
+    This only includes directives that actually provide conditions.  The
+    related directives "else" and "endif" are covered by OtherDirective
+    instead.
+    """
+
+    __slots__ = ()
+
+    KINDS = frozenset([
+            'if',
+            'ifdef',
+            'ifndef',
+            'elseif',
+            ])
+
+    @classmethod
+    def _condition_from_raw(cls, raw, kind):
+        #return Condition.from_raw(raw, _kind=kind)
+        condition = _coerce_str(raw)
+        if not condition:
+            return None
+
+        if kind == 'ifdef':
+            condition = f'defined({condition})'
+        elif kind == 'ifndef':
+            condition = f'! defined({condition})'
+
+        return condition
+
+    def __new__(cls, kind, condition):
+        kind = _coerce_str(kind)
+        self = super().__new__(
+                cls,
+                kind=kind or None,
+                condition=cls._condition_from_raw(condition, kind),
+                )
+        return self
+
+    @property
+    def text(self):
+        if self.kind == 'ifdef':
+            return self.condition[8:-1]  # strip "defined("
+        elif self.kind == 'ifndef':
+            return self.condition[10:-1]  # strip "! defined("
+        else:
+            return self.condition
+        #return str(self.condition)
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.condition:
+            raise TypeError('missing condition')
+        #else:
+        #    for cond in self.condition:
+        #        if not cond:
+        #            raise ValueError(f'missing condition in {self.condition}')
+        #        cond.validate()
+        #    if self.kind in ('ifdef', 'ifndef'):
+        #        if len(self.condition) != 1:
+        #            raise ValueError('too many condition')
+        #        if self.kind == 'ifdef':
+        #            if not self.condition[0].startswith('defined '):
+        #                raise ValueError('bad condition')
+        #        else:
+        #            if not self.condition[0].startswith('! defined '):
+        #                raise ValueError('bad condition')
+
+
+class Include(PreprocessorDirective,
+              namedtuple('Include', 'kind file')):
+    """A single "include" directive.
+
+    Supported "file" values are either follow the bracket style
+    (<stdio>) or double quotes ("spam.h").
+    """
+
+    __slots__ = ()
+
+    def __new__(cls, file):
+        self = super().__new__(
+                cls,
+                kind='include',
+                file=_coerce_str(file) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.file:
+            raise TypeError('missing file')
+
+
+class OtherDirective(PreprocessorDirective,
+                     namedtuple('OtherDirective', 'kind text')):
+    """A single directive not covered by another class.
+
+    This includes the "else", "endif", and "undef" directives, which are
+    otherwise inherently related to the directives covered by the
+    Constant, Macro, and IfCondition classes.
+
+    Note that all directives must have a text value, except for "else"
+    and "endif" (which must have no text).
+    """
+
+    __slots__ = ()
+
+    KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
+
+    def __new__(cls, kind, text):
+        self = super().__new__(
+                cls,
+                kind=_coerce_str(kind) or None,
+                text=_coerce_str(text) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if self.text:
+            if self.kind in ('else', 'endif'):
+                raise ValueError('unexpected text in directive')
+        elif self.kind not in ('else', 'endif'):
+            raise TypeError('missing text')
+
+
+#############################
+# iterating lines
+
+def _recompute_conditions(directive, ifstack):
+    if directive.kind in ('if', 'ifdef', 'ifndef'):
+        ifstack.append(
+                ([], directive.condition))
+    elif directive.kind == 'elseif':
+        if ifstack:
+            negated, active = ifstack.pop()
+            if active:
+                negated.append(active)
+        else:
+            negated = []
+        ifstack.append(
+                (negated, directive.condition))
+    elif directive.kind == 'else':
+        if ifstack:
+            negated, active = ifstack.pop()
+            if active:
+                negated.append(active)
+            ifstack.append(
+                    (negated, None))
+    elif directive.kind == 'endif':
+        if ifstack:
+            ifstack.pop()
+
+    conditions = []
+    for negated, active in ifstack:
+        for condition in negated:
+            conditions.append(f'! ({condition})')
+        if active:
+            conditions.append(active)
+    return tuple(conditions)
+
+
+def _iter_clean_lines(lines):
+    lines = iter(enumerate(lines, 1))
+    for lno, line in lines:
+        # Handle line continuations.
+        while line.endswith(CONTINUATION):
+            try:
+                lno, _line = next(lines)
+            except StopIteration:
+                break
+            line = line[:-len(CONTINUATION)] + ' ' + _line
+
+        # Deal with comments.
+        after = line
+        line = ''
+        while True:
+            # Look for a comment.
+            before, begin, remainder = after.partition('/*')
+            if '//' in before:
+                before, _, _ = before.partition('//')
+                line += before + ' '  # per the C99 spec
+                break
+            line += before
+            if not begin:
+                break
+            line += ' '  # per the C99 spec
+
+            # Go until we find the end of the comment.
+            _, end, after = remainder.partition('*/')
+            while not end:
+                try:
+                    lno, remainder = next(lines)
+                except StopIteration:
+                    raise Exception('unterminated comment')
+                _, end, after = remainder.partition('*/')
+
+        yield lno, line
+
+
+def iter_lines(lines, *,
+                   _iter_clean_lines=_iter_clean_lines,
+                   _parse_directive=_parse_directive,
+                   _recompute_conditions=_recompute_conditions,
+                   ):
+    """Yield (lno, line, directive, active conditions) for each given line.
+
+    This is effectively a subset of the operations taking place in
+    translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
+    section 5.1.1.2.  Line continuations are removed and comments
+    replaced with a single space.  (In both cases "lno" will be the last
+    line involved.)  Otherwise each line is returned as-is.
+
+    "lno" is the (1-indexed) line number for the line.
+
+    "directive" will be a PreprocessorDirective or None, depending on
+    whether or not there is a directive on the line.
+
+    "active conditions" is the set of preprocessor conditions (e.g.
+    "defined()") under which the current line of code will be included
+    in compilation.  That set is derived from every conditional
+    directive block (e.g. "if defined()", "ifdef", "else") containing
+    that line.  That includes nested directives.  Note that the
+    current line does not affect the active conditions for iteself.
+    It only impacts subsequent lines.  That applies to directives
+    that close blocks (e.g. "endif") just as much as conditional
+    directvies.  Also note that "else" and "elseif" directives
+    update the active conditions (for later lines), rather than
+    adding to them.
+    """
+    ifstack = []
+    conditions = ()
+    for lno, line in _iter_clean_lines(lines):
+        stripped = line.strip()
+        if not stripped.startswith('#'):
+            yield lno, line, None, conditions
+            continue
+
+        directive = '#' + stripped[1:].lstrip()
+        while '  ' in directive:
+            directive = directive.replace('  ', ' ')
+        directive = _parse_directive(directive)
+        yield lno, line, directive, conditions
+
+        if directive.kind in ('else', 'endif'):
+            conditions = _recompute_conditions(directive, ifstack)
+        elif isinstance(directive, IfDirective):
+            conditions = _recompute_conditions(directive, ifstack)
+
+
+#############################
+# running (platform-specific?)
+
+def _gcc(filename, *,
+         _get_argv=(lambda: _get_gcc_argv()),
+         _run=util.run_cmd,
+         ):
+    argv = _get_argv()
+    argv.extend([
+            '-E', filename,
+            ])
+    output = _run(argv)
+    return output
+
+
+def _get_gcc_argv(*,
+                  _open=open,
+                  _run=util.run_cmd,
+                  ):
+    with _open('/tmp/print.mk', 'w') as tmpfile:
+        tmpfile.write('print-%:\n')
+        #tmpfile.write('\t@echo $* = $($*)\n')
+        tmpfile.write('\t@echo $($*)\n')
+    argv = ['/usr/bin/make',
+            '-f', 'Makefile',
+            '-f', '/tmp/print.mk',
+            'print-CC',
+            'print-PY_CORE_CFLAGS',
+            ]
+    output = _run(argv)
+    gcc, cflags = output.strip().splitlines()
+    argv = shlex.split(gcc.strip())
+    cflags = shlex.split(cflags.strip())
+    return argv + cflags
+
+
+def run(filename, *,
+        _gcc=_gcc,
+        ):
+    """Return the text of the given file after running the preprocessor."""
+    return _gcc(filename)
--- a/Tools/c-analyzer/c_parser/source.py
+++ b/Tools/c-analyzer/c_parser/source.py
@ -0,0 +1,34 @@
+from . import preprocessor
+
+
+def iter_clean_lines(lines):
+    incomment = False
+    for line in lines:
+        # Deal with comments.
+        if incomment:
+            _, sep, line = line.partition('*/')
+            if sep:
+                incomment = False
+            continue
+        line, _, _ = line.partition('//')
+        line, sep, remainder = line.partition('/*')
+        if sep:
+            _, sep, after = remainder.partition('*/')
+            if not sep:
+                incomment = True
+                continue
+            line += ' ' + after
+
+        # Ignore blank lines and leading/trailing whitespace.
+        line = line.strip()
+        if not line:
+            continue
+
+        yield line
+
+
+def iter_lines(filename, *,
+               preprocess=preprocessor.run,
+               ):
+    content = preprocess(filename)
+    return iter(content.splitlines())
--- a/Tools/c-analyzer/c_symbols/init.py
+++ b/Tools/c-analyzer/c_symbols/init.py
--- a/Tools/c-analyzer/c_symbols/binary.py
+++ b/Tools/c-analyzer/c_symbols/binary.py
@ -0,0 +1,157 @@
+import os
+import os.path
+import shutil
+import sys
+
+from c_analyzer_common import util, info
+from . import source
+from .info import Symbol
+
+
+#PYTHON = os.path.join(REPO_ROOT, 'python')
+PYTHON = sys.executable
+
+
+def iter_symbols(binary=PYTHON, dirnames=None, *,
+                 # Alternately, use look_up_known_symbol()
+                 # from c_globals.supported.
+                 find_local_symbol=source.find_symbol,
+                 _file_exists=os.path.exists,
+                 _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
+                 ):
+    """Yield a Symbol for each symbol found in the binary."""
+    if not _file_exists(binary):
+        raise Exception('executable missing (need to build it first?)')
+
+    if find_local_symbol:
+        cache = {}
+        def find_local_symbol(name, *, _find=find_local_symbol):
+            return _find(name, dirnames, _perfilecache=cache)
+    else:
+        find_local_symbol = None
+
+    if os.name == 'nt':
+        # XXX Support this.
+        raise NotImplementedError
+    else:
+        yield from _iter_symbols_nm(binary, find_local_symbol)
+
+
+#############################
+# binary format (e.g. ELF)
+
+SPECIAL_SYMBOLS = {
+        '__bss_start',
+        '__data_start',
+        '__dso_handle',
+        '_DYNAMIC',
+        '_edata',
+        '_end',
+        '__environ@@GLIBC_2.2.5',
+        '_GLOBAL_OFFSET_TABLE_',
+        '__JCR_END__',
+        '__JCR_LIST__',
+        '__TMC_END__',
+        }
+
+
+def _is_special_symbol(name):
+    if name in SPECIAL_SYMBOLS:
+        return True
+    if '@@GLIBC' in name:
+        return True
+    return False
+
+
+#############################
+# "nm"
+
+NM_KINDS = {
+        'b': Symbol.KIND.VARIABLE,  # uninitialized
+        'd': Symbol.KIND.VARIABLE,  # initialized
+        #'g': Symbol.KIND.VARIABLE,  # uninitialized
+        #'s': Symbol.KIND.VARIABLE,  # initialized
+        't': Symbol.KIND.FUNCTION,
+        }
+
+
+def _iter_symbols_nm(binary, find_local_symbol=None,
+                     *,
+                     _which=shutil.which,
+                     _run=util.run_cmd,
+                     ):
+    nm = _which('nm')
+    if not nm:
+        raise NotImplementedError
+    argv = [nm,
+            '--line-numbers',
+            binary,
+            ]
+    try:
+        output = _run(argv)
+    except Exception:
+        if nm is None:
+            # XXX Use dumpbin.exe /SYMBOLS on Windows.
+            raise NotImplementedError
+        raise
+    for line in output.splitlines():
+        (name, kind, external, filename, funcname, vartype,
+         ) = _parse_nm_line(line,
+                            _find_local_symbol=find_local_symbol,
+                            )
+        if kind != Symbol.KIND.VARIABLE:
+            continue
+        elif _is_special_symbol(name):
+            continue
+        assert vartype is None
+        yield Symbol(
+                id=(filename, funcname, name),
+                kind=kind,
+                external=external,
+                )
+
+
+def _parse_nm_line(line, *, _find_local_symbol=None):
+    _origline = line
+    _, _, line = line.partition(' ')  # strip off the address
+    line = line.strip()
+
+    kind, _, line = line.partition(' ')
+    line = line.strip()
+    external = kind.isupper()
+    kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
+
+    name, _, filename = line.partition('\t')
+    name = name.strip()
+    if filename:
+        filename = os.path.relpath(filename.partition(':')[0])
+    else:
+        filename = info.UNKNOWN
+
+    vartype = None
+    name, islocal = _parse_nm_name(name, kind)
+    if islocal:
+        funcname = info.UNKNOWN
+        if _find_local_symbol is not None:
+            filename, funcname, vartype = _find_local_symbol(name)
+            filename = filename or info.UNKNOWN
+            funcname = funcname or info.UNKNOWN
+    else:
+        funcname = None
+        # XXX fine filename and vartype?
+    return name, kind, external, filename, funcname, vartype
+
+
+def _parse_nm_name(name, kind):
+    if kind != Symbol.KIND.VARIABLE:
+        return name, None
+    if _is_special_symbol(name):
+        return name, None
+
+    actual, sep, digits = name.partition('.')
+    if not sep:
+        return name, False
+
+    if not digits.isdigit():
+        raise Exception(f'got bogus name {name}')
+    return actual, True
--- a/Tools/c-analyzer/c_symbols/info.py
+++ b/Tools/c-analyzer/c_symbols/info.py
@ -0,0 +1,51 @@
+from collections import namedtuple
+
+from c_analyzer_common.info import ID
+from c_analyzer_common.util import classonly, _NTBase
+
+
+class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
+    """Info for a single compilation symbol."""
+
+    __slots__ = ()
+
+    class KIND:
+        VARIABLE = 'variable'
+        FUNCTION = 'function'
+        OTHER = 'other'
+
+    @classonly
+    def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
+        """Return a new symbol based on the given name."""
+        id = ID(filename, None, name)
+        return cls(id, kind, external)
+
+    def __new__(cls, id, kind=KIND.VARIABLE, external=None):
+        self = super().__new__(
+                cls,
+                id=ID.from_raw(id),
+                kind=str(kind) if kind else None,
+                external=bool(external) if external is not None else None,
+                )
+        return self
+
+    def __hash__(self):
+        return hash(self.id)
+
+    def __getattr__(self, name):
+        return getattr(self.id, name)
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        if not self.id:
+            raise TypeError('missing id')
+        else:
+            self.id.validate()
+
+        if not self.kind:
+            raise TypeError('missing kind')
+        elif self.kind not in vars(self.KIND).values():
+            raise ValueError(f'unsupported kind {self.kind}')
+
+        if self.external is None:
+            raise TypeError('missing external')
--- a/Tools/c-analyzer/c_symbols/resolve.py
+++ b/Tools/c-analyzer/c_symbols/resolve.py
@ -0,0 +1,149 @@
+import os.path
+
+from c_analyzer_common import files
+from c_analyzer_common.info import UNKNOWN
+from c_parser import declarations, info
+from .info import Symbol
+from .source import _find_symbol
+
+
+# XXX need tests:
+# * look_up_known_symbol()
+# * symbol_from_source()
+# * get_resolver()
+# * symbols_to_variables()
+
+def look_up_known_symbol(symbol, knownvars, *,
+                         match_files=(lambda f1, f2: f1 == f2),
+                         ):
+    """Return the known variable matching the given symbol.
+
+    "knownvars" is a mapping of common.ID to parser.Variable.
+
+    "match_files" is used to verify if two filenames point to
+    the same file.
+    """
+    if not knownvars:
+        return None
+
+    if symbol.funcname == UNKNOWN:
+        if not symbol.filename or symbol.filename == UNKNOWN:
+            for varid in knownvars:
+                if not varid.funcname:
+                    continue
+                if varid.name == symbol.name:
+                    return knownvars[varid]
+            else:
+                return None
+        else:
+            for varid in knownvars:
+                if not varid.funcname:
+                    continue
+                if not match_files(varid.filename, symbol.filename):
+                    continue
+                if varid.name == symbol.name:
+                    return knownvars[varid]
+            else:
+                return None
+    elif not symbol.filename or symbol.filename == UNKNOWN:
+        raise NotImplementedError
+    else:
+        return knownvars.get(symbol.id)
+
+
+def find_in_source(symbol, dirnames, *,
+                   _perfilecache={},
+                   _find_symbol=_find_symbol,
+                   _iter_files=files.iter_files_by_suffix,
+                   ):
+    """Return the Variable matching the given Symbol.
+
+    If there is no match then return None.
+    """
+    if symbol.filename and symbol.filename != UNKNOWN:
+        filenames = [symbol.filename]
+    else:
+        filenames = _iter_files(dirnames, ('.c', '.h'))
+
+    if symbol.funcname and symbol.funcname != UNKNOWN:
+        raise NotImplementedError
+
+    (filename, funcname, vartype
+     ) = _find_symbol(symbol.name, filenames, _perfilecache)
+    if filename == UNKNOWN:
+        return None
+    return info.Variable(
+            id=(filename, funcname, symbol.name),
+            vartype=vartype,
+            )
+
+
+def get_resolver(knownvars=None, dirnames=None, *,
+                 _look_up_known=look_up_known_symbol,
+                 _from_source=find_in_source,
+                 ):
+    """Return a "resolver" func for the given known vars and dirnames.
+
+    The func takes a single Symbol and returns a corresponding Variable.
+    If the symbol was located then the variable will be valid, populated
+    with the corresponding information.  Otherwise None is returned.
+    """
+    if knownvars:
+        knownvars = dict(knownvars)  # a copy
+        def resolve_known(symbol):
+            found = _look_up_known(symbol, knownvars)
+            if found is None:
+                return None
+            elif symbol.funcname == UNKNOWN:
+                knownvars.pop(found.id)
+            elif not symbol.filename or symbol.filename == UNKNOWN:
+                knownvars.pop(found.id)
+            return found
+        if dirnames:
+            def resolve(symbol):
+                found = resolve_known(symbol)
+                if found is None:
+                    return None
+                    #return _from_source(symbol, dirnames)
+                else:
+                    for dirname in dirnames:
+                        if not dirname.endswith(os.path.sep):
+                            dirname += os.path.sep
+                        if found.filename.startswith(dirname):
+                            break
+                    else:
+                        return None
+                    return found
+        else:
+            resolve = resolve_known
+    elif dirnames:
+        def resolve(symbol):
+            return _from_source(symbol, dirnames)
+    else:
+        def resolve(symbol):
+            return None
+    return resolve
+
+
+def symbols_to_variables(symbols, *,
+                         resolve=(lambda s: look_up_known_symbol(s, None)),
+                         ):
+    """Yield the variable the matches each given symbol.
+
+    Use get_resolver() for a "resolve" func to use.
+    """
+    for symbol in symbols:
+        if isinstance(symbol, info.Variable):
+            # XXX validate?
+            yield symbol
+            continue
+        if symbol.kind != Symbol.KIND.VARIABLE:
+            continue
+        resolved = resolve(symbol)
+        if resolved is None:
+            #raise NotImplementedError(symbol)
+            resolved = info.Variable(
+                    id=symbol.id,
+                    vartype=UNKNOWN,
+                    )
+        yield resolved
--- a/Tools/c-analyzer/c_symbols/source.py
+++ b/Tools/c-analyzer/c_symbols/source.py
@ -0,0 +1,58 @@
+from c_analyzer_common import files
+from c_analyzer_common.info import UNKNOWN
+from c_parser import declarations
+
+
+# XXX need tests:
+# * find_symbol()
+
+def find_symbol(name, dirnames, *,
+                _perfilecache,
+                _iter_files=files.iter_files_by_suffix,
+                **kwargs
+                ):
+    """Return (filename, funcname, vartype) for the matching Symbol."""
+    filenames = _iter_files(dirnames, ('.c', '.h'))
+    return _find_symbol(name, filenames, _perfilecache, **kwargs)
+
+
+def _get_symbols(filename, *,
+                 _iter_variables=declarations.iter_variables,
+                 ):
+    """Return the list of Symbols found in the given file."""
+    symbols = {}
+    for funcname, name, vartype in _iter_variables(filename):
+        if not funcname:
+            continue
+        try:
+            instances = symbols[name]
+        except KeyError:
+            instances = symbols[name] = []
+        instances.append((funcname, vartype))
+    return symbols
+
+
+def _find_symbol(name, filenames, _perfilecache, *,
+                _get_local_symbols=_get_symbols,
+                ):
+    for filename in filenames:
+        try:
+            symbols = _perfilecache[filename]
+        except KeyError:
+            symbols = _perfilecache[filename] = _get_local_symbols(filename)
+
+        try:
+            instances = symbols[name]
+        except KeyError:
+            continue
+
+        funcname, vartype = instances.pop(0)
+        if not instances:
+            symbols.pop(name)
+        return filename, funcname, vartype
+    else:
+        return UNKNOWN, UNKNOWN, UNKNOWN
+
+
+def iter_symbols():
+    raise NotImplementedError
--- a/Tools/c-analyzer/check-c-globals.py
+++ b/Tools/c-analyzer/check-c-globals.py
--- a/Tools/c-analyzer/ignored-globals.txt
+++ b/Tools/c-analyzer/ignored-globals.txt
--- a/Tools/c-analyzer/ignored.tsv
+++ b/Tools/c-analyzer/ignored.tsv
@ -0,0 +1 @@
+filename	funcname	name	kind	reason
--- a/Tools/c-analyzer/known.tsv
+++ b/Tools/c-analyzer/known.tsv