mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 15:58:57 +00:00 
			
		
		
		
	 345cd37abe
			
		
	
	
		345cd37abe
		
			
		
	
	
	
	
		
			
			The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
		
			
				
	
	
		
			115 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			115 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| 
 | |
| from ._regexes import (
 | |
|     _ind,
 | |
|     STRING_LITERAL,
 | |
|     VAR_DECL as _VAR_DECL,
 | |
| )
 | |
| 
 | |
| 
 | |
| def log_match(group, m):
 | |
|     from . import _logger
 | |
|     _logger.debug(f'matched <{group}> ({m.group(0)})')
 | |
| 
 | |
| 
 | |
| #############################
 | |
| # regex utils
 | |
| 
 | |
| def set_capture_group(pattern, group, *, strict=True):
 | |
|     old = f'(?:  # <{group}>'
 | |
|     if strict and f'(?:  # <{group}>' not in pattern:
 | |
|         raise ValueError(f'{old!r} not found in pattern')
 | |
|     return pattern.replace(old, f'(  # <{group}>', 1)
 | |
| 
 | |
| 
 | |
| def set_capture_groups(pattern, groups, *, strict=True):
 | |
|     for group in groups:
 | |
|         pattern = set_capture_group(pattern, group, strict=strict)
 | |
|     return pattern
 | |
| 
 | |
| 
 | |
| #############################
 | |
| # syntax-related utils
 | |
| 
 | |
| _PAREN_RE = re.compile(rf'''
 | |
|     (?:
 | |
|         (?:
 | |
|             [^'"()]*
 | |
|             {_ind(STRING_LITERAL, 3)}
 | |
|          )*
 | |
|         [^'"()]*
 | |
|         (?:
 | |
|             ( [(] )
 | |
|             |
 | |
|             ( [)] )
 | |
|          )
 | |
|      )
 | |
|     ''', re.VERBOSE)
 | |
| 
 | |
| 
 | |
| def match_paren(text, depth=0):
 | |
|     pos = 0
 | |
|     while (m := _PAREN_RE.match(text, pos)):
 | |
|         pos = m.end()
 | |
|         _open, _close = m.groups()
 | |
|         if _open:
 | |
|             depth += 1
 | |
|         else:  # _close
 | |
|             depth -= 1
 | |
|             if depth == 0:
 | |
|                 return pos
 | |
|     else:
 | |
|         raise ValueError(f'could not find matching parens for {text!r}')
 | |
| 
 | |
| 
 | |
| VAR_DECL = set_capture_groups(_VAR_DECL, (
 | |
|     'STORAGE',
 | |
|     'TYPE_QUAL',
 | |
|     'TYPE_SPEC',
 | |
|     'DECLARATOR',
 | |
|     'IDENTIFIER',
 | |
|     'WRAPPED_IDENTIFIER',
 | |
|     'FUNC_IDENTIFIER',
 | |
| ))
 | |
| 
 | |
| 
 | |
| def parse_var_decl(decl):
 | |
|     m = re.match(VAR_DECL, decl, re.VERBOSE)
 | |
|     (storage, typequal, typespec, declarator,
 | |
|      name,
 | |
|      wrappedname,
 | |
|      funcptrname,
 | |
|      ) = m.groups()
 | |
|     if name:
 | |
|         kind = 'simple'
 | |
|     elif wrappedname:
 | |
|         kind = 'wrapped'
 | |
|         name = wrappedname
 | |
|     elif funcptrname:
 | |
|         kind = 'funcptr'
 | |
|         name = funcptrname
 | |
|     else:
 | |
|         raise NotImplementedError
 | |
|     abstract = declarator.replace(name, '')
 | |
|     vartype = {
 | |
|         'storage': storage,
 | |
|         'typequal': typequal,
 | |
|         'typespec': typespec,
 | |
|         'abstract': abstract,
 | |
|     }
 | |
|     return (kind, name, vartype)
 | |
| 
 | |
| 
 | |
| #############################
 | |
| # parser state utils
 | |
| 
 | |
| # XXX Drop this or use it!
 | |
| def iter_results(results):
 | |
|     if not results:
 | |
|         return
 | |
|     if callable(results):
 | |
|         results = results()
 | |
| 
 | |
|     for result, text in results():
 | |
|         if result:
 | |
|             yield result, text
 |