mirror of
https://github.com/python/cpython.git
synced 2025-09-23 00:43:12 +00:00
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that
`ast.parse` and `compile` use the old parser, since this was the
state of things, while we were developing them. They need to be
updated to always use the correct parser. `_peg_parser` is being
extended to support both parsing and compiling with both parsers.
(cherry picked from commit 9645930b5b
)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
318a18eb88
commit
3c6c86ab77
6 changed files with 151 additions and 187 deletions
|
@ -1,60 +1,9 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include "pegen_interface.h"
|
#include "pegen_interface.h"
|
||||||
|
|
||||||
PyObject *
|
static int
|
||||||
_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
|
_mode_str_to_int(char *mode_str)
|
||||||
{
|
{
|
||||||
static char *keywords[] = {"file", "mode", NULL};
|
|
||||||
char *filename;
|
|
||||||
char *mode_str = "exec";
|
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
int mode;
|
|
||||||
if (strcmp(mode_str, "exec") == 0) {
|
|
||||||
mode = Py_file_input;
|
|
||||||
}
|
|
||||||
else if (strcmp(mode_str, "single") == 0) {
|
|
||||||
mode = Py_single_input;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'");
|
|
||||||
}
|
|
||||||
|
|
||||||
PyArena *arena = PyArena_New();
|
|
||||||
if (arena == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
|
||||||
PyObject *result = NULL;
|
|
||||||
|
|
||||||
mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena);
|
|
||||||
if (res == NULL) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
result = PyAST_mod2obj(res);
|
|
||||||
|
|
||||||
error:
|
|
||||||
PyArena_Free(arena);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *
|
|
||||||
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
|
||||||
{
|
|
||||||
static char *keywords[] = {"string", "mode", "oldparser", NULL};
|
|
||||||
char *the_string;
|
|
||||||
char *mode_str = "exec";
|
|
||||||
int oldparser = 0;
|
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords,
|
|
||||||
&the_string, &mode_str, &oldparser)) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
int mode;
|
int mode;
|
||||||
if (strcmp(mode_str, "exec") == 0) {
|
if (strcmp(mode_str, "exec") == 0) {
|
||||||
mode = Py_file_input;
|
mode = Py_file_input;
|
||||||
|
@ -66,39 +15,119 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
mode = Py_single_input;
|
mode = Py_single_input;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
mode = -1;
|
||||||
|
}
|
||||||
|
return mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static mod_ty
|
||||||
|
_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser)
|
||||||
|
{
|
||||||
|
mod_ty mod;
|
||||||
|
if (!oldparser) {
|
||||||
|
mod = PyPegen_ASTFromString(str, filename, mode, flags, arena);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mod = PyParser_ASTFromString(str, filename, mode, flags, arena);
|
||||||
|
}
|
||||||
|
return mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
|
||||||
|
char *the_string;
|
||||||
|
char *filename = "<string>";
|
||||||
|
char *mode_str = "exec";
|
||||||
|
int oldparser = 0;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
|
||||||
|
&the_string, &filename, &mode_str, &oldparser)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mode = _mode_str_to_int(mode_str);
|
||||||
|
if (mode == -1) {
|
||||||
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
|
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
||||||
|
flags.cf_flags = PyCF_IGNORE_COOKIE;
|
||||||
|
|
||||||
PyArena *arena = PyArena_New();
|
PyArena *arena = PyArena_New();
|
||||||
if (arena == NULL) {
|
if (arena == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *result = NULL;
|
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
|
||||||
|
if (mod == NULL) {
|
||||||
|
PyArena_Free(arena);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename);
|
||||||
|
if (filename_ob == NULL) {
|
||||||
|
PyArena_Free(arena);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena);
|
||||||
|
Py_XDECREF(filename_ob);
|
||||||
|
PyArena_Free(arena);
|
||||||
|
return (PyObject *)result;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
|
||||||
|
char *the_string;
|
||||||
|
char *filename = "<string>";
|
||||||
|
char *mode_str = "exec";
|
||||||
|
int oldparser = 0;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
|
||||||
|
&the_string, &filename, &mode_str, &oldparser)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mode = _mode_str_to_int(mode_str);
|
||||||
|
if (mode == -1) {
|
||||||
|
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
|
||||||
|
}
|
||||||
|
|
||||||
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
PyCompilerFlags flags = _PyCompilerFlags_INIT;
|
||||||
flags.cf_flags = PyCF_IGNORE_COOKIE;
|
flags.cf_flags = PyCF_IGNORE_COOKIE;
|
||||||
|
|
||||||
mod_ty res;
|
PyArena *arena = PyArena_New();
|
||||||
if (oldparser) {
|
if (arena == NULL) {
|
||||||
res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena);
|
return NULL;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena);
|
|
||||||
}
|
|
||||||
if (res == NULL) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
result = PyAST_mod2obj(res);
|
|
||||||
|
|
||||||
error:
|
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
|
||||||
|
if (mod == NULL) {
|
||||||
|
PyArena_Free(arena);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *result = PyAST_mod2obj(mod);
|
||||||
PyArena_Free(arena);
|
PyArena_Free(arena);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyMethodDef ParseMethods[] = {
|
static PyMethodDef ParseMethods[] = {
|
||||||
{"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
|
{
|
||||||
{"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."},
|
"parse_string",
|
||||||
|
(PyCFunction)(void (*)(void))_Py_parse_string,
|
||||||
|
METH_VARARGS|METH_KEYWORDS,
|
||||||
|
"Parse a string, return an AST."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"compile_string",
|
||||||
|
(PyCFunction)(void (*)(void))_Py_compile_string,
|
||||||
|
METH_VARARGS|METH_KEYWORDS,
|
||||||
|
"Compile a string, return a code object."
|
||||||
|
},
|
||||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py
|
||||||
|
|
||||||
time: time_compile
|
time: time_compile
|
||||||
|
|
||||||
time_compile: venv peg_extension/parse.c data/xxl.py
|
time_compile: venv data/xxl.py
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
|
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
|
||||||
|
|
||||||
time_parse: venv peg_extension/parse.c data/xxl.py
|
time_parse: venv data/xxl.py
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
|
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
|
||||||
|
|
||||||
time_check: venv peg_extension/parse.c data/xxl.py
|
time_old: time_old_compile
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
|
|
||||||
|
|
||||||
time_stdlib: time_stdlib_compile
|
time_old_compile: venv data/xxl.py
|
||||||
|
|
||||||
time_stdlib_compile: venv peg_extension/parse.c data/xxl.py
|
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
|
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
|
||||||
|
|
||||||
time_stdlib_parse: venv peg_extension/parse.c data/xxl.py
|
time_old_parse: venv data/xxl.py
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
|
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
|
||||||
|
|
||||||
test_local:
|
time_peg_dir: venv
|
||||||
$(PYTHON) scripts/test_parse_directory.py \
|
$(VENVPYTHON) scripts/test_parse_directory.py \
|
||||||
--grammar-file $(GRAMMAR) \
|
--grammar-file $(GRAMMAR) \
|
||||||
--tokens-file $(TOKENS) \
|
--tokens-file $(TOKENS) \
|
||||||
-d $(TESTDIR) \
|
-d $(TESTDIR) \
|
||||||
|
@ -96,8 +93,8 @@ test_local:
|
||||||
--exclude "*/failset/**" \
|
--exclude "*/failset/**" \
|
||||||
--exclude "*/failset/**/*"
|
--exclude "*/failset/**/*"
|
||||||
|
|
||||||
test_global: $(CPYTHON)
|
time_stdlib: $(CPYTHON) venv
|
||||||
$(PYTHON) scripts/test_parse_directory.py \
|
$(VENVPYTHON) scripts/test_parse_directory.py \
|
||||||
--grammar-file $(GRAMMAR) \
|
--grammar-file $(GRAMMAR) \
|
||||||
--tokens-file $(TOKENS) \
|
--tokens-file $(TOKENS) \
|
||||||
-d $(CPYTHON) \
|
-d $(CPYTHON) \
|
||||||
|
@ -113,9 +110,6 @@ mypy: regen-metaparser
|
||||||
format-python:
|
format-python:
|
||||||
black pegen scripts
|
black pegen scripts
|
||||||
|
|
||||||
bench: venv
|
|
||||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check
|
|
||||||
|
|
||||||
format: format-python
|
format: format-python
|
||||||
|
|
||||||
find_max_nesting:
|
find_max_nesting:
|
||||||
|
|
|
@ -6,6 +6,8 @@ import sys
|
||||||
import os
|
import os
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
|
import _peg_parser
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import memory_profiler
|
import memory_profiler
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
|
@ -14,8 +16,6 @@ except ModuleNotFoundError:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
sys.path.insert(0, os.getcwd())
|
sys.path.insert(0, os.getcwd())
|
||||||
from peg_extension import parse
|
|
||||||
from pegen.build import build_c_parser_and_generator
|
|
||||||
from scripts.test_parse_directory import parse_directory
|
from scripts.test_parse_directory import parse_directory
|
||||||
|
|
||||||
argparser = argparse.ArgumentParser(
|
argparser = argparse.ArgumentParser(
|
||||||
|
@ -41,9 +41,6 @@ command_compile = subcommands.add_parser(
|
||||||
"compile", help="Benchmark parsing and compiling to bytecode"
|
"compile", help="Benchmark parsing and compiling to bytecode"
|
||||||
)
|
)
|
||||||
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
|
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
|
||||||
command_check = subcommands.add_parser(
|
|
||||||
"check", help="Benchmark parsing and throwing the tree away"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark(func):
|
def benchmark(func):
|
||||||
|
@ -66,22 +63,20 @@ def benchmark(func):
|
||||||
@benchmark
|
@benchmark
|
||||||
def time_compile(source, parser):
|
def time_compile(source, parser):
|
||||||
if parser == "cpython":
|
if parser == "cpython":
|
||||||
return compile(source, os.path.join("data", "xxl.py"), "exec")
|
return _peg_parser.compile_string(
|
||||||
|
source,
|
||||||
|
oldparser=True,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return parse.parse_string(source, mode=2)
|
return _peg_parser.compile_string(source)
|
||||||
|
|
||||||
|
|
||||||
@benchmark
|
@benchmark
|
||||||
def time_parse(source, parser):
|
def time_parse(source, parser):
|
||||||
if parser == "cpython":
|
if parser == "cpython":
|
||||||
return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
|
return _peg_parser.parse_string(source, oldparser=True)
|
||||||
else:
|
else:
|
||||||
return parse.parse_string(source, mode=1)
|
return _peg_parser.parse_string(source)
|
||||||
|
|
||||||
|
|
||||||
@benchmark
|
|
||||||
def time_check(source):
|
|
||||||
return parse.parse_string(source, mode=0)
|
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark_xxl(subcommand, parser, source):
|
def run_benchmark_xxl(subcommand, parser, source):
|
||||||
|
@ -89,32 +84,20 @@ def run_benchmark_xxl(subcommand, parser, source):
|
||||||
time_compile(source, parser)
|
time_compile(source, parser)
|
||||||
elif subcommand == "parse":
|
elif subcommand == "parse":
|
||||||
time_parse(source, parser)
|
time_parse(source, parser)
|
||||||
elif subcommand == "check":
|
|
||||||
time_check(source)
|
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark_stdlib(subcommand, parser):
|
def run_benchmark_stdlib(subcommand, parser):
|
||||||
modes = {"compile": 2, "parse": 1, "check": 0}
|
|
||||||
extension = None
|
|
||||||
if parser == "pegen":
|
|
||||||
extension = build_c_parser_and_generator(
|
|
||||||
"../../Grammar/python.gram",
|
|
||||||
"../../Grammar/Tokens",
|
|
||||||
"peg_extension/parse.c",
|
|
||||||
compile_extension=True,
|
|
||||||
skip_actions=False,
|
|
||||||
)
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
parse_directory(
|
parse_directory(
|
||||||
"../../Lib",
|
"../../Lib",
|
||||||
"../../Grammar/python.gram",
|
"../../Grammar/python.gram",
|
||||||
|
"../../Grammar/Tokens",
|
||||||
verbose=False,
|
verbose=False,
|
||||||
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
|
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
|
||||||
skip_actions=False,
|
skip_actions=False,
|
||||||
tree_arg=0,
|
tree_arg=0,
|
||||||
short=True,
|
short=True,
|
||||||
extension=extension,
|
mode=2 if subcommand == "compile" else 1,
|
||||||
mode=modes[subcommand],
|
|
||||||
parser=parser,
|
parser=parser,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -127,8 +110,6 @@ def main():
|
||||||
|
|
||||||
if subcommand is None:
|
if subcommand is None:
|
||||||
argparser.error("A benchmark to run is required")
|
argparser.error("A benchmark to run is required")
|
||||||
if subcommand == "check" and parser == "cpython":
|
|
||||||
argparser.error("Cannot use check target with the CPython parser")
|
|
||||||
|
|
||||||
if target == "xxl":
|
if target == "xxl":
|
||||||
with open(os.path.join("data", "xxl.py"), "r") as f:
|
with open(os.path.join("data", "xxl.py"), "r") as f:
|
||||||
|
|
|
@ -30,6 +30,8 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
import _peg_parser
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
sys.path.insert(0, os.getcwd())
|
sys.path.insert(0, os.getcwd())
|
||||||
|
@ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
def show_parse(source: str, verbose: bool = False) -> str:
|
def show_parse(source: str, verbose: bool = False) -> str:
|
||||||
tree = ast.parse(source)
|
tree = _peg_parser.parse_string(source, oldparser=True)
|
||||||
return format_tree(tree, verbose).rstrip("\n")
|
return format_tree(tree, verbose).rstrip("\n")
|
||||||
|
|
||||||
|
|
||||||
|
@ -90,17 +92,11 @@ def main() -> None:
|
||||||
sep = " "
|
sep = " "
|
||||||
program = sep.join(args.program)
|
program = sep.join(args.program)
|
||||||
if args.grammar_file:
|
if args.grammar_file:
|
||||||
sys.path.insert(0, os.curdir)
|
tree = _peg_parser.parse_string(program)
|
||||||
from pegen.build import build_parser_and_generator
|
|
||||||
|
|
||||||
build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
|
|
||||||
from pegen.parse import parse_string # type: ignore[import]
|
|
||||||
|
|
||||||
tree = parse_string(program, mode=1)
|
|
||||||
|
|
||||||
if args.diff:
|
if args.diff:
|
||||||
a = tree
|
a = tree
|
||||||
b = ast.parse(program)
|
b = _peg_parser.parse_string(program, oldparser=True)
|
||||||
diff = diff_trees(a, b, args.verbose)
|
diff = diff_trees(a, b, args.verbose)
|
||||||
if diff:
|
if diff:
|
||||||
for line in diff:
|
for line in diff:
|
||||||
|
@ -111,8 +107,8 @@ def main() -> None:
|
||||||
print(f"# Parsed using {args.grammar_file}")
|
print(f"# Parsed using {args.grammar_file}")
|
||||||
print(format_tree(tree, args.verbose))
|
print(format_tree(tree, args.verbose))
|
||||||
else:
|
else:
|
||||||
tree = ast.parse(program)
|
tree = _peg_parser.parse_string(program, oldparser=True)
|
||||||
print("# Parse using ast.parse()")
|
print("# Parse using the old parser")
|
||||||
print(format_tree(tree, args.verbose))
|
print(format_tree(tree, args.verbose))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,13 +6,14 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
import tokenize
|
||||||
|
import _peg_parser
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from pathlib import PurePath
|
from pathlib import PurePath
|
||||||
|
|
||||||
from typing import List, Optional, Any
|
from typing import List, Optional, Any
|
||||||
|
|
||||||
sys.path.insert(0, os.getcwd())
|
sys.path.insert(0, os.getcwd())
|
||||||
from pegen.build import build_c_parser_and_generator
|
|
||||||
from pegen.ast_dump import ast_dump
|
from pegen.ast_dump import ast_dump
|
||||||
from pegen.testutil import print_memstats
|
from pegen.testutil import print_memstats
|
||||||
from scripts import show_parse
|
from scripts import show_parse
|
||||||
|
@ -83,7 +84,7 @@ def compare_trees(
|
||||||
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
|
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
with open(file) as f:
|
with open(file) as f:
|
||||||
expected_tree = ast.parse(f.read())
|
expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
|
||||||
|
|
||||||
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
|
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
|
||||||
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
|
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
|
||||||
|
@ -121,7 +122,6 @@ def parse_directory(
|
||||||
skip_actions: bool,
|
skip_actions: bool,
|
||||||
tree_arg: int,
|
tree_arg: int,
|
||||||
short: bool,
|
short: bool,
|
||||||
extension: Any,
|
|
||||||
mode: int,
|
mode: int,
|
||||||
parser: str,
|
parser: str,
|
||||||
) -> int:
|
) -> int:
|
||||||
|
@ -137,47 +137,21 @@ def parse_directory(
|
||||||
if not os.path.exists(grammar_file):
|
if not os.path.exists(grammar_file):
|
||||||
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
|
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
try:
|
|
||||||
if not extension and parser == "pegen":
|
|
||||||
build_c_parser_and_generator(
|
|
||||||
grammar_file,
|
|
||||||
tokens_file,
|
|
||||||
"peg_extension/parse.c",
|
|
||||||
compile_extension=True,
|
|
||||||
skip_actions=skip_actions,
|
|
||||||
)
|
|
||||||
except Exception as err:
|
|
||||||
print(
|
|
||||||
f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
traceback.print_exception(err.__class__, err, None)
|
|
||||||
|
|
||||||
return 1
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
|
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if parser == "pegen":
|
if tree_arg:
|
||||||
try:
|
assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
|
||||||
from peg_extension import parse # type: ignore
|
|
||||||
except Exception as e:
|
|
||||||
print(
|
|
||||||
"An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# For a given directory, traverse files and attempt to parse each one
|
# For a given directory, traverse files and attempt to parse each one
|
||||||
# - Output success/failure for each file
|
# - Output success/failure for each file
|
||||||
errors = 0
|
errors = 0
|
||||||
files = []
|
files = []
|
||||||
trees = {} # Trees to compare (after everything else is done)
|
trees = {} # Trees to compare (after everything else is done)
|
||||||
|
total_seconds = 0
|
||||||
|
|
||||||
t0 = time.time()
|
|
||||||
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
||||||
# Only attempt to parse Python files and files that are not excluded
|
# Only attempt to parse Python files and files that are not excluded
|
||||||
should_exclude_file = False
|
should_exclude_file = False
|
||||||
|
@ -187,25 +161,31 @@ def parse_directory(
|
||||||
break
|
break
|
||||||
|
|
||||||
if not should_exclude_file:
|
if not should_exclude_file:
|
||||||
|
with tokenize.open(file) as f:
|
||||||
|
source = f.read()
|
||||||
try:
|
try:
|
||||||
if tree_arg:
|
t0 = time.time()
|
||||||
mode = 1
|
if mode == 2:
|
||||||
if parser == "cpython":
|
result = _peg_parser.compile_string(
|
||||||
with open(file, "r") as f:
|
source,
|
||||||
source = f.read()
|
filename=file,
|
||||||
if mode == 2:
|
oldparser=parser == "cpython",
|
||||||
compile(source, file, "exec")
|
)
|
||||||
elif mode == 1:
|
|
||||||
ast.parse(source, file, "exec")
|
|
||||||
else:
|
else:
|
||||||
tree = parse.parse_file(file, mode=mode)
|
result = _peg_parser.parse_string(
|
||||||
|
source,
|
||||||
|
filename=file,
|
||||||
|
oldparser=parser == "cpython"
|
||||||
|
)
|
||||||
|
t1 = time.time()
|
||||||
|
total_seconds += (t1 - t0)
|
||||||
if tree_arg:
|
if tree_arg:
|
||||||
trees[file] = tree
|
trees[file] = result
|
||||||
if not short:
|
if not short:
|
||||||
report_status(succeeded=True, file=file, verbose=verbose)
|
report_status(succeeded=True, file=file, verbose=verbose)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
try:
|
try:
|
||||||
ast.parse(file)
|
_peg_parser.parse_string(source, mode="exec", oldparser=True)
|
||||||
except Exception:
|
except Exception:
|
||||||
if not short:
|
if not short:
|
||||||
print(f"File {file} cannot be parsed by either pegen or the ast module.")
|
print(f"File {file} cannot be parsed by either pegen or the ast module.")
|
||||||
|
@ -217,7 +197,6 @@ def parse_directory(
|
||||||
files.append(file)
|
files.append(file)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
||||||
total_seconds = t1 - t0
|
|
||||||
total_files = len(files)
|
total_files = len(files)
|
||||||
|
|
||||||
total_bytes = 0
|
total_bytes = 0
|
||||||
|
@ -238,13 +217,6 @@ def parse_directory(
|
||||||
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
|
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
|
||||||
)
|
)
|
||||||
|
|
||||||
if parser == "pegen":
|
|
||||||
# Dump memo stats to @data.
|
|
||||||
with open("@data", "w") as datafile:
|
|
||||||
for i, count in enumerate(parse.get_memo_stats()):
|
|
||||||
if count:
|
|
||||||
datafile.write(f"{i:4d} {count:9d}\n")
|
|
||||||
|
|
||||||
if short:
|
if short:
|
||||||
print_memstats()
|
print_memstats()
|
||||||
|
|
||||||
|
@ -275,6 +247,7 @@ def main() -> None:
|
||||||
skip_actions = args.skip_actions
|
skip_actions = args.skip_actions
|
||||||
tree = args.tree
|
tree = args.tree
|
||||||
short = args.short
|
short = args.short
|
||||||
|
mode = 1 if args.tree else 2
|
||||||
sys.exit(
|
sys.exit(
|
||||||
parse_directory(
|
parse_directory(
|
||||||
directory,
|
directory,
|
||||||
|
@ -285,8 +258,7 @@ def main() -> None:
|
||||||
skip_actions,
|
skip_actions,
|
||||||
tree,
|
tree,
|
||||||
short,
|
short,
|
||||||
None,
|
mode,
|
||||||
0,
|
|
||||||
"pegen",
|
"pegen",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -54,7 +54,7 @@ def find_dirname(package_name: str) -> str:
|
||||||
assert False # This is to fix mypy, should never be reached
|
assert False # This is to fix mypy, should never be reached
|
||||||
|
|
||||||
|
|
||||||
def run_tests(dirname: str, tree: int, extension: Any) -> int:
|
def run_tests(dirname: str, tree: int) -> int:
|
||||||
return test_parse_directory.parse_directory(
|
return test_parse_directory.parse_directory(
|
||||||
dirname,
|
dirname,
|
||||||
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
|
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
|
||||||
|
@ -72,7 +72,6 @@ def run_tests(dirname: str, tree: int, extension: Any) -> int:
|
||||||
skip_actions=False,
|
skip_actions=False,
|
||||||
tree_arg=tree,
|
tree_arg=tree,
|
||||||
short=True,
|
short=True,
|
||||||
extension=extension,
|
|
||||||
mode=1,
|
mode=1,
|
||||||
parser="pegen",
|
parser="pegen",
|
||||||
)
|
)
|
||||||
|
@ -82,13 +81,6 @@ def main() -> None:
|
||||||
args = argparser.parse_args()
|
args = argparser.parse_args()
|
||||||
tree = args.tree
|
tree = args.tree
|
||||||
|
|
||||||
extension = build.build_c_parser_and_generator(
|
|
||||||
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
|
|
||||||
HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
|
|
||||||
"peg_extension/parse.c",
|
|
||||||
compile_extension=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
for package in get_packages():
|
for package in get_packages():
|
||||||
print(f"Extracting files from {package}... ", end="")
|
print(f"Extracting files from {package}... ", end="")
|
||||||
try:
|
try:
|
||||||
|
@ -100,7 +92,7 @@ def main() -> None:
|
||||||
|
|
||||||
print(f"Trying to parse all python files ... ")
|
print(f"Trying to parse all python files ... ")
|
||||||
dirname = find_dirname(package)
|
dirname = find_dirname(package)
|
||||||
status = run_tests(dirname, tree, extension)
|
status = run_tests(dirname, tree)
|
||||||
if status == 0:
|
if status == 0:
|
||||||
shutil.rmtree(dirname)
|
shutil.rmtree(dirname)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue