mirror of
https://github.com/python/cpython.git
synced 2025-08-03 08:34:29 +00:00
bpo-41076: Pre-feed the parser with the f-string expression location (GH-21054)
This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.
This commit is contained in:
parent
89e82c4a62
commit
1f0f4abb11
5 changed files with 2437 additions and 2653 deletions
|
@ -423,6 +423,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
}
|
||||
}
|
||||
|
||||
if (p->start_rule == Py_fstring_input) {
|
||||
col_offset -= p->starting_col_offset;
|
||||
}
|
||||
Py_ssize_t col_number = col_offset;
|
||||
|
||||
if (p->tok->encoding != NULL) {
|
||||
|
|
|
@ -271,235 +271,6 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
|
|||
|
||||
// FSTRING STUFF
|
||||
|
||||
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
|
||||
static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
|
||||
|
||||
|
||||
static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
|
||||
if (n == NULL) {
|
||||
return;
|
||||
}
|
||||
if (parent->lineno < n->lineno) {
|
||||
col = 0;
|
||||
}
|
||||
fstring_shift_expr_locations(n, line, col);
|
||||
}
|
||||
|
||||
static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
|
||||
if (parent->lineno < n->lineno) {
|
||||
col = 0;
|
||||
}
|
||||
fstring_shift_argument(parent, n, line, col);
|
||||
}
|
||||
|
||||
static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
|
||||
expr_ty expr = asdl_seq_GET(seq, i);
|
||||
if (expr == NULL){
|
||||
continue;
|
||||
}
|
||||
shift_expr(parent, expr, lineno, col_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
|
||||
switch (slice->kind) {
|
||||
case Slice_kind:
|
||||
if (slice->v.Slice.lower) {
|
||||
shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
|
||||
}
|
||||
if (slice->v.Slice.upper) {
|
||||
shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
|
||||
}
|
||||
if (slice->v.Slice.step) {
|
||||
shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case Tuple_kind:
|
||||
fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
|
||||
shift_expr(parent, comp->target, lineno, col_offset);
|
||||
shift_expr(parent, comp->iter, lineno, col_offset);
|
||||
fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
|
||||
}
|
||||
|
||||
static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
|
||||
if (arg->annotation != NULL){
|
||||
shift_expr(parent, arg->annotation, lineno, col_offset);
|
||||
}
|
||||
arg->col_offset = arg->col_offset + col_offset;
|
||||
arg->end_col_offset = arg->end_col_offset + col_offset;
|
||||
arg->lineno = arg->lineno + lineno;
|
||||
arg->end_lineno = arg->end_lineno + lineno;
|
||||
}
|
||||
|
||||
static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
|
||||
arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
|
||||
shift_arg(parent, arg, lineno, col_offset);
|
||||
}
|
||||
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
|
||||
arg_ty arg = asdl_seq_GET(args->args, i);
|
||||
shift_arg(parent, arg, lineno, col_offset);
|
||||
}
|
||||
|
||||
if (args->vararg != NULL) {
|
||||
shift_arg(parent, args->vararg, lineno, col_offset);
|
||||
}
|
||||
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
|
||||
arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
|
||||
shift_arg(parent, arg, lineno, col_offset);
|
||||
}
|
||||
|
||||
fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
|
||||
|
||||
if (args->kwarg != NULL) {
|
||||
shift_arg(parent, args->kwarg, lineno, col_offset);
|
||||
}
|
||||
|
||||
fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
|
||||
}
|
||||
|
||||
static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
|
||||
switch (node->kind) {
|
||||
case BoolOp_kind:
|
||||
fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
|
||||
break;
|
||||
case NamedExpr_kind:
|
||||
shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
|
||||
shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
|
||||
break;
|
||||
case BinOp_kind:
|
||||
shift_expr(node, node->v.BinOp.left, lineno, col_offset);
|
||||
shift_expr(node, node->v.BinOp.right, lineno, col_offset);
|
||||
break;
|
||||
case UnaryOp_kind:
|
||||
shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
|
||||
break;
|
||||
case Lambda_kind:
|
||||
fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
|
||||
shift_expr(node, node->v.Lambda.body, lineno, col_offset);
|
||||
break;
|
||||
case IfExp_kind:
|
||||
shift_expr(node, node->v.IfExp.test, lineno, col_offset);
|
||||
shift_expr(node, node->v.IfExp.body, lineno, col_offset);
|
||||
shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
|
||||
break;
|
||||
case Dict_kind:
|
||||
fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
|
||||
fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
|
||||
break;
|
||||
case Set_kind:
|
||||
fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
|
||||
break;
|
||||
case ListComp_kind:
|
||||
shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
|
||||
comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
|
||||
fstring_shift_comprehension(node, comp, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case SetComp_kind:
|
||||
shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
|
||||
comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
|
||||
fstring_shift_comprehension(node, comp, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case DictComp_kind:
|
||||
shift_expr(node, node->v.DictComp.key, lineno, col_offset);
|
||||
shift_expr(node, node->v.DictComp.value, lineno, col_offset);
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
|
||||
comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
|
||||
fstring_shift_comprehension(node, comp, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case GeneratorExp_kind:
|
||||
shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
|
||||
comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
|
||||
fstring_shift_comprehension(node, comp, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case Await_kind:
|
||||
shift_expr(node, node->v.Await.value, lineno, col_offset);
|
||||
break;
|
||||
case Yield_kind:
|
||||
shift_expr(node, node->v.Yield.value, lineno, col_offset);
|
||||
break;
|
||||
case YieldFrom_kind:
|
||||
shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
|
||||
break;
|
||||
case Compare_kind:
|
||||
shift_expr(node, node->v.Compare.left, lineno, col_offset);
|
||||
fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
|
||||
break;
|
||||
case Call_kind:
|
||||
shift_expr(node, node->v.Call.func, lineno, col_offset);
|
||||
fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
|
||||
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
|
||||
keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
|
||||
shift_expr(node, keyword->value, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
case Attribute_kind:
|
||||
shift_expr(node, node->v.Attribute.value, lineno, col_offset);
|
||||
break;
|
||||
case Subscript_kind:
|
||||
shift_expr(node, node->v.Subscript.value, lineno, col_offset);
|
||||
fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
|
||||
shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
|
||||
break;
|
||||
case Starred_kind:
|
||||
shift_expr(node, node->v.Starred.value, lineno, col_offset);
|
||||
break;
|
||||
case List_kind:
|
||||
fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
|
||||
break;
|
||||
case Tuple_kind:
|
||||
fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
|
||||
break;
|
||||
case JoinedStr_kind:
|
||||
fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
|
||||
break;
|
||||
case FormattedValue_kind:
|
||||
shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
|
||||
if (node->v.FormattedValue.format_spec) {
|
||||
shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Shift locations for the given node and all its children by adding `lineno`
|
||||
and `col_offset` to existing locations. Note that n is the already parsed
|
||||
expression. */
|
||||
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
|
||||
{
|
||||
n->col_offset = n->col_offset + col_offset;
|
||||
|
||||
// The following is needed, in order for nodes spanning across multiple lines
|
||||
// to be shifted correctly. An example of such a node is a Call node, the closing
|
||||
// parenthesis of which is not on the same line as its name.
|
||||
if (n->lineno == n->end_lineno) {
|
||||
n->end_col_offset = n->end_col_offset + col_offset;
|
||||
}
|
||||
|
||||
fstring_shift_children_locations(n, lineno, col_offset);
|
||||
n->lineno = n->lineno + lineno;
|
||||
n->end_lineno = n->end_lineno + lineno;
|
||||
}
|
||||
|
||||
/* Fix locations for the given node and its children.
|
||||
|
||||
`parent` is the enclosing node.
|
||||
|
@ -507,7 +278,7 @@ static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
|
|||
`expr_str` is the child node's string representation, including braces.
|
||||
*/
|
||||
static void
|
||||
fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
|
||||
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
|
||||
{
|
||||
char *substr = NULL;
|
||||
char *start;
|
||||
|
@ -552,7 +323,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
|
|||
}
|
||||
}
|
||||
}
|
||||
fstring_shift_expr_locations(n, lines, cols);
|
||||
*p_lines = lines;
|
||||
*p_cols = cols;
|
||||
}
|
||||
|
||||
|
||||
|
@ -598,11 +370,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
str[0] = '(';
|
||||
// The call to fstring_find_expr_location is responsible for finding the column offset
|
||||
// the generated AST nodes need to be shifted to the right, which is equal to the number
|
||||
// of the f-string characters before the expression starts. In order to correctly compute
|
||||
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
|
||||
// if curly braces appear before and after the f-string expression (exactly like they do
|
||||
// in the f-string itself), hence the following lines.
|
||||
str[0] = '{';
|
||||
memcpy(str+1, expr_start, len);
|
||||
str[len+1] = ')';
|
||||
str[len+1] = '}';
|
||||
str[len+2] = 0;
|
||||
|
||||
int lines, cols;
|
||||
fstring_find_expr_location(t, str, &lines, &cols);
|
||||
|
||||
// The parentheses are needed in order to allow for leading whitespace withing
|
||||
// the f-string expression. This consequently gets parsed as a group (see the
|
||||
// group rule in python.gram).
|
||||
str[0] = '(';
|
||||
str[len+1] = ')';
|
||||
|
||||
struct tok_state* tok = PyTokenizer_FromString(str, 1);
|
||||
if (tok == NULL) {
|
||||
PyMem_Free(str);
|
||||
|
@ -613,21 +400,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
|
|||
|
||||
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
|
||||
NULL, p->arena);
|
||||
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
|
||||
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
|
||||
? p->starting_col_offset + t->col_offset : 0;
|
||||
p2->starting_lineno = t->lineno + lines - 1;
|
||||
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;
|
||||
|
||||
expr = _PyPegen_run_parser(p2);
|
||||
|
||||
if (expr == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Reuse str to find the correct column offset. */
|
||||
str[0] = '{';
|
||||
str[len+1] = '}';
|
||||
fstring_fix_expr_location(t, expr, str);
|
||||
|
||||
result = expr;
|
||||
|
||||
exit:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue