mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			133 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			133 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import importlib.util
 | 
						|
import io
 | 
						|
import os
 | 
						|
import pathlib
 | 
						|
import sys
 | 
						|
import textwrap
 | 
						|
import tokenize
 | 
						|
import token
 | 
						|
 | 
						|
from typing import Any, cast, Dict, IO, Type, Final
 | 
						|
 | 
						|
from pegen.build import compile_c_extension
 | 
						|
from pegen.c_generator import CParserGenerator
 | 
						|
from pegen.grammar import Grammar
 | 
						|
from pegen.grammar_parser import GeneratedParser as GrammarParser
 | 
						|
from pegen.parser import Parser
 | 
						|
from pegen.python_generator import PythonParserGenerator
 | 
						|
from pegen.tokenizer import Tokenizer
 | 
						|
 | 
						|
ALL_TOKENS = token.tok_name
 | 
						|
EXACT_TOKENS = token.EXACT_TOKEN_TYPES  # type: ignore
 | 
						|
NON_EXACT_TOKENS = {
 | 
						|
    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def generate_parser(grammar: Grammar) -> Type[Parser]:
 | 
						|
    # Generate a parser.
 | 
						|
    out = io.StringIO()
 | 
						|
    genr = PythonParserGenerator(grammar, out)
 | 
						|
    genr.generate("<string>")
 | 
						|
 | 
						|
    # Load the generated parser class.
 | 
						|
    ns: Dict[str, Any] = {}
 | 
						|
    exec(out.getvalue(), ns)
 | 
						|
    return ns["GeneratedParser"]
 | 
						|
 | 
						|
 | 
						|
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
 | 
						|
    # Run a parser on a file (stream).
 | 
						|
    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore # typeshed issue #3515
 | 
						|
    parser = parser_class(tokenizer, verbose=verbose)
 | 
						|
    result = parser.start()
 | 
						|
    if result is None:
 | 
						|
        raise parser.make_syntax_error()
 | 
						|
    return result
 | 
						|
 | 
						|
 | 
						|
def parse_string(
 | 
						|
    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
 | 
						|
) -> Any:
 | 
						|
    # Run the parser on a string.
 | 
						|
    if dedent:
 | 
						|
        source = textwrap.dedent(source)
 | 
						|
    file = io.StringIO(source)
 | 
						|
    return run_parser(file, parser_class, verbose=verbose)  # type: ignore # typeshed issue #3515
 | 
						|
 | 
						|
 | 
						|
def make_parser(source: str) -> Type[Parser]:
 | 
						|
    # Combine parse_string() and generate_parser().
 | 
						|
    grammar = parse_string(source, GrammarParser)
 | 
						|
    return generate_parser(grammar)
 | 
						|
 | 
						|
 | 
						|
def import_file(full_name: str, path: str) -> Any:
 | 
						|
    """Import a python module from a path"""
 | 
						|
 | 
						|
    spec = importlib.util.spec_from_file_location(full_name, path)
 | 
						|
    mod = importlib.util.module_from_spec(spec)
 | 
						|
 | 
						|
    # We assume this is not None and has an exec_module() method.
 | 
						|
    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
 | 
						|
    loader = cast(Any, spec.loader)
 | 
						|
    loader.exec_module(mod)
 | 
						|
    return mod
 | 
						|
 | 
						|
 | 
						|
def generate_c_parser_source(grammar: Grammar) -> str:
 | 
						|
    out = io.StringIO()
 | 
						|
    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
 | 
						|
    genr.generate("<string>")
 | 
						|
    return out.getvalue()
 | 
						|
 | 
						|
 | 
						|
def generate_parser_c_extension(
 | 
						|
    grammar: Grammar, path: pathlib.PurePath, debug: bool = False
 | 
						|
) -> Any:
 | 
						|
    """Generate a parser c extension for the given grammar in the given path
 | 
						|
 | 
						|
    Returns a module object with a parse_string() method.
 | 
						|
    TODO: express that using a Protocol.
 | 
						|
    """
 | 
						|
    # Make sure that the working directory is empty: reusing non-empty temporary
 | 
						|
    # directories when generating extensions can lead to segmentation faults.
 | 
						|
    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
 | 
						|
    # context.
 | 
						|
    assert not os.listdir(path)
 | 
						|
    source = path / "parse.c"
 | 
						|
    with open(source, "w", encoding="utf-8") as file:
 | 
						|
        genr = CParserGenerator(
 | 
						|
            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
 | 
						|
        )
 | 
						|
        genr.generate("parse.c")
 | 
						|
    compile_c_extension(str(source), build_dir=str(path))
 | 
						|
 | 
						|
 | 
						|
def print_memstats() -> bool:
 | 
						|
    MiB: Final = 2 ** 20
 | 
						|
    try:
 | 
						|
        import psutil  # type: ignore
 | 
						|
    except ImportError:
 | 
						|
        return False
 | 
						|
    print("Memory stats:")
 | 
						|
    process = psutil.Process()
 | 
						|
    meminfo = process.memory_info()
 | 
						|
    res = {}
 | 
						|
    res["rss"] = meminfo.rss / MiB
 | 
						|
    res["vms"] = meminfo.vms / MiB
 | 
						|
    if sys.platform == "win32":
 | 
						|
        res["maxrss"] = meminfo.peak_wset / MiB
 | 
						|
    else:
 | 
						|
        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
 | 
						|
        import resource  # Since it doesn't exist on Windows.
 | 
						|
 | 
						|
        rusage = resource.getrusage(resource.RUSAGE_SELF)
 | 
						|
        if sys.platform == "darwin":
 | 
						|
            factor = 1
 | 
						|
        else:
 | 
						|
            factor = 1024  # Linux
 | 
						|
        res["maxrss"] = rusage.ru_maxrss * factor / MiB
 | 
						|
    for key, value in res.items():
 | 
						|
        print(f"  {key:12.12s}: {value:10.0f} MiB")
 | 
						|
    return True
 |