mirror of
https://github.com/python/cpython.git
synced 2025-08-03 08:34:29 +00:00
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)
Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
a81849b031
commit
c5fc156852
91 changed files with 27057 additions and 146 deletions
15391
Parser/pegen/parse.c
Normal file
15391
Parser/pegen/parse.c
Normal file
File diff suppressed because it is too large
Load diff
1387
Parser/pegen/parse_string.c
Normal file
1387
Parser/pegen/parse_string.c
Normal file
File diff suppressed because it is too large
Load diff
46
Parser/pegen/parse_string.h
Normal file
46
Parser/pegen/parse_string.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifndef STRINGS_H
|
||||
#define STRINGS_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <Python-ast.h>
|
||||
#include "pegen.h"
|
||||
|
||||
#define EXPRLIST_N_CACHED 64
|
||||
|
||||
typedef struct {
|
||||
/* Incrementally build an array of expr_ty, so be used in an
|
||||
asdl_seq. Cache some small but reasonably sized number of
|
||||
expr_ty's, and then after that start dynamically allocating,
|
||||
doubling the number allocated each time. Note that the f-string
|
||||
f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
|
||||
Constant for the literal 'a'. So you add expr_ty's about twice as
|
||||
fast as you add expressions in an f-string. */
|
||||
|
||||
Py_ssize_t allocated; /* Number we've allocated. */
|
||||
Py_ssize_t size; /* Number we've used. */
|
||||
expr_ty *p; /* Pointer to the memory we're actually
|
||||
using. Will point to 'data' until we
|
||||
start dynamically allocating. */
|
||||
expr_ty data[EXPRLIST_N_CACHED];
|
||||
} ExprList;
|
||||
|
||||
/* The FstringParser is designed to add a mix of strings and
|
||||
f-strings, and concat them together as needed. Ultimately, it
|
||||
generates an expr_ty. */
|
||||
typedef struct {
|
||||
PyObject *last_str;
|
||||
ExprList expr_list;
|
||||
int fmode;
|
||||
} FstringParser;
|
||||
|
||||
void _PyPegen_FstringParser_Init(FstringParser *);
|
||||
int _PyPegen_parsestr(Parser *, const char *, int *, int *, PyObject **,
|
||||
const char **, Py_ssize_t *);
|
||||
int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
|
||||
const char *, int, int, Token *, Token *,
|
||||
Token *);
|
||||
int _PyPegen_FstringParser_ConcatAndDel(FstringParser *, PyObject *);
|
||||
expr_ty _PyPegen_FstringParser_Finish(Parser *, FstringParser *, Token *, Token *);
|
||||
void _PyPegen_FstringParser_Dealloc(FstringParser *);
|
||||
|
||||
#endif
|
134
Parser/pegen/peg_api.c
Normal file
134
Parser/pegen/peg_api.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
#include <pegen_interface.h>
|
||||
|
||||
#include "../tokenizer.h"
|
||||
#include "pegen.h"
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromString(const char *str, int mode, PyCompilerFlags *flags, PyArena *arena)
|
||||
{
|
||||
PyObject *filename_ob = PyUnicode_FromString("<string>");
|
||||
if (filename_ob == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
mod_ty result = PyPegen_ASTFromStringObject(str, filename_ob, mode, flags, arena);
|
||||
Py_XDECREF(filename_ob);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromStringObject(const char *str, PyObject* filename, int mode, PyCompilerFlags *flags, PyArena *arena)
|
||||
{
|
||||
if (PySys_Audit("compile", "yO", str, filename) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int iflags = flags != NULL ? flags->cf_flags : PyCF_IGNORE_COOKIE;
|
||||
mod_ty result = _PyPegen_run_parser_from_string(str, mode, filename, iflags, arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena)
|
||||
{
|
||||
PyObject *filename_ob = PyUnicode_FromString(filename);
|
||||
if (filename_ob == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mod_ty result = _PyPegen_run_parser_from_file(filename, mode, filename_ob, arena);
|
||||
Py_XDECREF(filename_ob);
|
||||
return result;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
|
||||
const char *enc, const char *ps1, const char* ps2,
|
||||
int *errcode, PyArena *arena)
|
||||
{
|
||||
if (PySys_Audit("compile", "OO", Py_None, filename_ob) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return _PyPegen_run_parser_from_file_pointer(fp, mode, filename_ob, enc, ps1, ps2,
|
||||
errcode, arena);
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromString(const char *str, int mode, PyCompilerFlags *flags)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString("<string>");
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = PyPegen_ASTFromString(str, mode, flags, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromFile(const char *filename, int mode)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
PyObject *filename_ob = PyUnicode_FromString(filename);
|
||||
if (filename_ob == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
mod_ty res = PyPegen_ASTFromFile(filename, mode, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
Py_XDECREF(filename_ob);
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyCodeObject *
|
||||
PyPegen_CodeObjectFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
|
||||
const char *ps1, const char *ps2, const char *enc,
|
||||
int *errcode)
|
||||
{
|
||||
PyArena *arena = PyArena_New();
|
||||
if (arena == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyCodeObject *result = NULL;
|
||||
|
||||
mod_ty res = PyPegen_ASTFromFileObject(fp, filename_ob, mode, enc, ps1, ps2,
|
||||
errcode, arena);
|
||||
if (res == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);
|
||||
|
||||
error:
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
1865
Parser/pegen/pegen.c
Normal file
1865
Parser/pegen/pegen.c
Normal file
File diff suppressed because it is too large
Load diff
179
Parser/pegen/pegen.h
Normal file
179
Parser/pegen/pegen.h
Normal file
|
@ -0,0 +1,179 @@
|
|||
#ifndef PEGEN_H
|
||||
#define PEGEN_H
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <token.h>
|
||||
#include <Python-ast.h>
|
||||
#include <pyarena.h>
|
||||
|
||||
typedef struct _memo {
|
||||
int type;
|
||||
void *node;
|
||||
int mark;
|
||||
struct _memo *next;
|
||||
} Memo;
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
PyObject *bytes;
|
||||
int lineno, col_offset, end_lineno, end_col_offset;
|
||||
Memo *memo;
|
||||
} Token;
|
||||
|
||||
typedef struct {
|
||||
char *str;
|
||||
int type;
|
||||
} KeywordToken;
|
||||
|
||||
typedef struct {
|
||||
struct tok_state *tok;
|
||||
Token **tokens;
|
||||
int mark;
|
||||
int fill, size;
|
||||
PyArena *arena;
|
||||
KeywordToken **keywords;
|
||||
int n_keyword_lists;
|
||||
int start_rule;
|
||||
int *errcode;
|
||||
int parsing_started;
|
||||
PyObject* normalize;
|
||||
int starting_lineno;
|
||||
int starting_col_offset;
|
||||
int error_indicator;
|
||||
} Parser;
|
||||
|
||||
typedef struct {
|
||||
cmpop_ty cmpop;
|
||||
expr_ty expr;
|
||||
} CmpopExprPair;
|
||||
|
||||
typedef struct {
|
||||
expr_ty key;
|
||||
expr_ty value;
|
||||
} KeyValuePair;
|
||||
|
||||
typedef struct {
|
||||
arg_ty arg;
|
||||
expr_ty value;
|
||||
} NameDefaultPair;
|
||||
|
||||
typedef struct {
|
||||
asdl_seq *plain_names;
|
||||
asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
|
||||
} SlashWithDefault;
|
||||
|
||||
typedef struct {
|
||||
arg_ty vararg;
|
||||
asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
|
||||
arg_ty kwarg;
|
||||
} StarEtc;
|
||||
|
||||
typedef struct {
|
||||
operator_ty kind;
|
||||
} AugOperator;
|
||||
|
||||
typedef struct {
|
||||
void *element;
|
||||
int is_keyword;
|
||||
} KeywordOrStarred;
|
||||
|
||||
void _PyPegen_clear_memo_statistics(void);
|
||||
PyObject *_PyPegen_get_memo_statistics(void);
|
||||
|
||||
int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
|
||||
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||
|
||||
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
|
||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||
|
||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||
int _PyPegen_fill_token(Parser *p);
|
||||
void *_PyPegen_async_token(Parser *p);
|
||||
void *_PyPegen_await_token(Parser *p);
|
||||
void *_PyPegen_endmarker_token(Parser *p);
|
||||
expr_ty _PyPegen_name_token(Parser *p);
|
||||
void *_PyPegen_newline_token(Parser *p);
|
||||
void *_PyPegen_indent_token(Parser *p);
|
||||
void *_PyPegen_dedent_token(Parser *p);
|
||||
expr_ty _PyPegen_number_token(Parser *p);
|
||||
void *_PyPegen_string_token(Parser *p);
|
||||
const char *_PyPegen_get_expr_name(expr_ty);
|
||||
void *_PyPegen_raise_error(Parser *p, PyObject *, const char *errmsg, ...);
|
||||
void *_PyPegen_dummy_name(Parser *p, ...);
|
||||
|
||||
#define UNUSED(expr) do { (void)(expr); } while (0)
|
||||
#define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena
|
||||
#define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena
|
||||
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
|
||||
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
|
||||
|
||||
Py_LOCAL_INLINE(void *)
|
||||
CHECK_CALL(Parser *p, void *result)
|
||||
{
|
||||
if (result == NULL) {
|
||||
assert(PyErr_Occurred());
|
||||
p->error_indicator = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* This is needed for helper functions that are allowed to
|
||||
return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
|
||||
Py_LOCAL_INLINE(void *)
|
||||
CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
|
||||
{
|
||||
if (result == NULL && PyErr_Occurred()) {
|
||||
p->error_indicator = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define CHECK(result) CHECK_CALL(p, result)
|
||||
#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result)
|
||||
|
||||
PyObject *_PyPegen_new_identifier(Parser *, char *);
|
||||
Parser *_PyPegen_Parser_New(struct tok_state *, int, int *, PyArena *);
|
||||
void _PyPegen_Parser_Free(Parser *);
|
||||
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
|
||||
const char *, const char *, int *, PyArena *);
|
||||
void *_PyPegen_run_parser(Parser *);
|
||||
mod_ty _PyPegen_run_parser_from_file(const char *, int, PyObject *, PyArena *);
|
||||
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, int, PyArena *);
|
||||
void *_PyPegen_interactive_exit(Parser *);
|
||||
asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
|
||||
asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
|
||||
int _PyPegen_seq_count_dots(asdl_seq *);
|
||||
alias_ty _PyPegen_alias_for_star(Parser *);
|
||||
asdl_seq *_PyPegen_map_names_to_ids(Parser *, asdl_seq *);
|
||||
CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
|
||||
asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
|
||||
asdl_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
|
||||
KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
|
||||
asdl_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_get_values(Parser *, asdl_seq *);
|
||||
NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty);
|
||||
SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_seq *, asdl_seq *);
|
||||
StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
|
||||
arguments_ty _PyPegen_make_arguments(Parser *, asdl_seq *, SlashWithDefault *,
|
||||
asdl_seq *, asdl_seq *, StarEtc *);
|
||||
arguments_ty _PyPegen_empty_arguments(Parser *);
|
||||
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
||||
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_seq *, stmt_ty);
|
||||
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_seq *, stmt_ty);
|
||||
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
|
||||
asdl_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
|
||||
asdl_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
|
||||
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
|
||||
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
||||
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
||||
|
||||
void *_PyPegen_parse(Parser *);
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue