mirror of
https://github.com/python/cpython.git
synced 2025-08-30 13:38:43 +00:00
gh-132661: Implement PEP 750 (#132662)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Wingy <git@wingysam.xyz> Co-authored-by: Koudai Aono <koxudaxi@gmail.com> Co-authored-by: Dave Peck <davepeck@gmail.com> Co-authored-by: Terry Jan Reedy <tjreedy@udel.edu> Co-authored-by: Paul Everitt <pauleveritt@me.com> Co-authored-by: sobolevn <mail@sobolevn.me>
This commit is contained in:
parent
5ea9010e89
commit
60202609a2
81 changed files with 7716 additions and 3761 deletions
|
@ -965,9 +965,21 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
|
|||
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
|
||||
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
||||
conv_token, conv,
|
||||
"f-string: conversion type must come right after the exclamanation mark"
|
||||
"%c-string: conversion type must come right after the exclamanation mark",
|
||||
TOK_GET_STRING_PREFIX(p->tok)
|
||||
);
|
||||
}
|
||||
|
||||
Py_UCS4 first = PyUnicode_READ_CHAR(conv->v.Name.id, 0);
|
||||
if (PyUnicode_GET_LENGTH(conv->v.Name.id) > 1 ||
|
||||
!(first == 's' || first == 'r' || first == 'a')) {
|
||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conv,
|
||||
"%c-string: invalid conversion character %R: expected 's', 'r', or 'a'",
|
||||
TOK_GET_STRING_PREFIX(p->tok),
|
||||
conv->v.Name.id);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return result_token_with_metadata(p, conv, conv_token->metadata);
|
||||
}
|
||||
|
||||
|
@ -1070,6 +1082,9 @@ _PyPegen_get_expr_name(expr_ty e)
|
|||
case JoinedStr_kind:
|
||||
case FormattedValue_kind:
|
||||
return "f-string expression";
|
||||
case TemplateStr_kind:
|
||||
case Interpolation_kind:
|
||||
return "t-string expression";
|
||||
case Constant_kind: {
|
||||
PyObject *value = e->v.Constant.value;
|
||||
if (value == Py_None) {
|
||||
|
@ -1279,20 +1294,13 @@ _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* tok
|
|||
p->arena);
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b) {
|
||||
|
||||
/* The parser might put multiple f-string values into an individual
|
||||
* JoinedStr node at the top level due to stuff like f-string debugging
|
||||
* expressions. This function flattens those and promotes them to the
|
||||
* upper level. Only simplifies AST, but the compiler already takes care
|
||||
* of the regular output, so this is not necessary if you are not going
|
||||
* to expose the output AST to Python level. */
|
||||
|
||||
Py_ssize_t n_items = asdl_seq_LEN(expr);
|
||||
static asdl_expr_seq *
|
||||
_get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind)
|
||||
{
|
||||
Py_ssize_t n_items = asdl_seq_LEN(raw_expressions);
|
||||
Py_ssize_t total_items = n_items;
|
||||
for (Py_ssize_t i = 0; i < n_items; i++) {
|
||||
expr_ty item = asdl_seq_GET(expr, i);
|
||||
expr_ty item = asdl_seq_GET(raw_expressions, i);
|
||||
if (item->kind == JoinedStr_kind) {
|
||||
total_items += asdl_seq_LEN(item->v.JoinedStr.values) - 1;
|
||||
}
|
||||
|
@ -1311,17 +1319,19 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b) {
|
|||
|
||||
Py_ssize_t index = 0;
|
||||
for (Py_ssize_t i = 0; i < n_items; i++) {
|
||||
expr_ty item = asdl_seq_GET(expr, i);
|
||||
expr_ty item = asdl_seq_GET(raw_expressions, i);
|
||||
|
||||
// This should correspond to a JoinedStr node of two elements
|
||||
// created _PyPegen_formatted_value. This situation can only be the result of
|
||||
// a f-string debug expression where the first element is a constant with the text and the second
|
||||
// a (f|t)-string debug expression where the first element is a constant with the text and the second
|
||||
// a formatted value with the expression.
|
||||
if (item->kind == JoinedStr_kind) {
|
||||
asdl_expr_seq *values = item->v.JoinedStr.values;
|
||||
if (asdl_seq_LEN(values) != 2) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"unexpected JoinedStr node without debug data in f-string at line %d",
|
||||
string_kind == TSTRING
|
||||
? "unexpected TemplateStr node without debug data in t-string at line %d"
|
||||
: "unexpected JoinedStr node without debug data in f-string at line %d",
|
||||
item->lineno);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1331,7 +1341,7 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b) {
|
|||
asdl_seq_SET(seq, index++, first);
|
||||
|
||||
expr_ty second = asdl_seq_GET(values, 1);
|
||||
assert(second->kind == FormattedValue_kind);
|
||||
assert((string_kind == TSTRING && second->kind == Interpolation_kind) || second->kind == FormattedValue_kind);
|
||||
asdl_seq_SET(seq, index++, second);
|
||||
|
||||
continue;
|
||||
|
@ -1367,7 +1377,22 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b) {
|
|||
else {
|
||||
resized_exprs = seq;
|
||||
}
|
||||
return resized_exprs;
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_template_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b) {
|
||||
|
||||
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, TSTRING);
|
||||
return _PyAST_TemplateStr(resized_exprs, a->lineno, a->col_offset,
|
||||
b->end_lineno, b->end_col_offset,
|
||||
p->arena);
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
|
||||
|
||||
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, FSTRING);
|
||||
return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
|
||||
b->end_lineno, b->end_col_offset,
|
||||
p->arena);
|
||||
|
@ -1434,29 +1459,101 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
|
|||
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
|
||||
}
|
||||
|
||||
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
|
||||
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
|
||||
int end_lineno, int end_col_offset, PyArena *arena) {
|
||||
int conversion_val = -1;
|
||||
static int
|
||||
_get_interpolation_conversion(Parser *p, Token *debug, ResultTokenWithMetadata *conversion,
|
||||
ResultTokenWithMetadata *format)
|
||||
{
|
||||
if (conversion != NULL) {
|
||||
expr_ty conversion_expr = (expr_ty) conversion->result;
|
||||
assert(conversion_expr->kind == Name_kind);
|
||||
Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
|
||||
|
||||
if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
|
||||
!(first == 's' || first == 'r' || first == 'a')) {
|
||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
|
||||
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
|
||||
conversion_expr->v.Name.id);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int);
|
||||
return Py_SAFE_DOWNCAST(first, Py_UCS4, int);
|
||||
}
|
||||
else if (debug && !format) {
|
||||
/* If no conversion is specified, use !r for debug expressions */
|
||||
conversion_val = (int)'r';
|
||||
return (int)'r';
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
_strip_interpolation_expr(PyObject *exprstr)
|
||||
{
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(exprstr);
|
||||
|
||||
for (Py_ssize_t i = len - 1; i >= 0; i--) {
|
||||
Py_UCS4 c = PyUnicode_READ_CHAR(exprstr, i);
|
||||
if (_PyUnicode_IsWhitespace(c) || c == '=') {
|
||||
len--;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return PyUnicode_Substring(exprstr, 0, len);
|
||||
}
|
||||
|
||||
expr_ty _PyPegen_interpolation(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
|
||||
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
|
||||
int end_lineno, int end_col_offset, PyArena *arena) {
|
||||
|
||||
int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
|
||||
|
||||
/* Find the non whitespace token after the "=" */
|
||||
int debug_end_line, debug_end_offset;
|
||||
PyObject *debug_metadata;
|
||||
constant exprstr;
|
||||
|
||||
if (conversion) {
|
||||
debug_end_line = ((expr_ty) conversion->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
|
||||
debug_metadata = exprstr = conversion->metadata;
|
||||
}
|
||||
else if (format) {
|
||||
debug_end_line = ((expr_ty) format->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
|
||||
debug_metadata = exprstr = format->metadata;
|
||||
}
|
||||
else {
|
||||
debug_end_line = end_lineno;
|
||||
debug_end_offset = end_col_offset;
|
||||
debug_metadata = exprstr = closing_brace->metadata;
|
||||
}
|
||||
|
||||
assert(exprstr != NULL);
|
||||
PyObject *final_exprstr = _strip_interpolation_expr(exprstr);
|
||||
if (!final_exprstr || _PyArena_AddPyObject(arena, final_exprstr) < 0) {
|
||||
Py_XDECREF(final_exprstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
expr_ty interpolation = _PyAST_Interpolation(
|
||||
expression, final_exprstr, conversion_val, format ? (expr_ty) format->result : NULL,
|
||||
lineno, col_offset, end_lineno,
|
||||
end_col_offset, arena
|
||||
);
|
||||
|
||||
if (!debug) {
|
||||
return interpolation;
|
||||
}
|
||||
|
||||
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
||||
debug_end_offset - 1, p->arena);
|
||||
if (!debug_text) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
|
||||
asdl_seq_SET(values, 0, debug_text);
|
||||
asdl_seq_SET(values, 1, interpolation);
|
||||
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
|
||||
}
|
||||
|
||||
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
|
||||
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
|
||||
int end_lineno, int end_col_offset, PyArena *arena) {
|
||||
int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
|
||||
|
||||
expr_ty formatted_value = _PyAST_FormattedValue(
|
||||
expression, conversion_val, format ? (expr_ty) format->result : NULL,
|
||||
|
@ -1464,108 +1561,137 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
|
|||
end_col_offset, arena
|
||||
);
|
||||
|
||||
if (debug) {
|
||||
/* Find the non whitespace token after the "=" */
|
||||
int debug_end_line, debug_end_offset;
|
||||
PyObject *debug_metadata;
|
||||
|
||||
if (conversion) {
|
||||
debug_end_line = ((expr_ty) conversion->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
|
||||
debug_metadata = conversion->metadata;
|
||||
}
|
||||
else if (format) {
|
||||
debug_end_line = ((expr_ty) format->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
|
||||
debug_metadata = format->metadata;
|
||||
}
|
||||
else {
|
||||
debug_end_line = end_lineno;
|
||||
debug_end_offset = end_col_offset;
|
||||
debug_metadata = closing_brace->metadata;
|
||||
}
|
||||
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
||||
debug_end_offset - 1, p->arena);
|
||||
if (!debug_text) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
|
||||
if (values == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
asdl_seq_SET(values, 0, debug_text);
|
||||
asdl_seq_SET(values, 1, formatted_value);
|
||||
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
|
||||
}
|
||||
else {
|
||||
if (!debug) {
|
||||
return formatted_value;
|
||||
}
|
||||
|
||||
/* Find the non whitespace token after the "=" */
|
||||
int debug_end_line, debug_end_offset;
|
||||
PyObject *debug_metadata;
|
||||
|
||||
if (conversion) {
|
||||
debug_end_line = ((expr_ty) conversion->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
|
||||
debug_metadata = conversion->metadata;
|
||||
}
|
||||
else if (format) {
|
||||
debug_end_line = ((expr_ty) format->result)->lineno;
|
||||
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
|
||||
debug_metadata = format->metadata;
|
||||
}
|
||||
else {
|
||||
debug_end_line = end_lineno;
|
||||
debug_end_offset = end_col_offset;
|
||||
debug_metadata = closing_brace->metadata;
|
||||
}
|
||||
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
||||
debug_end_offset - 1, p->arena);
|
||||
if (!debug_text) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
|
||||
asdl_seq_SET(values, 0, debug_text);
|
||||
asdl_seq_SET(values, 1, formatted_value);
|
||||
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
||||
int lineno, int col_offset, int end_lineno,
|
||||
int end_col_offset, PyArena *arena)
|
||||
static expr_ty
|
||||
_build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
|
||||
int col_offset, int end_lineno, int end_col_offset,
|
||||
PyArena *arena)
|
||||
{
|
||||
Py_ssize_t len = asdl_seq_LEN(strings);
|
||||
assert(len > 0);
|
||||
|
||||
int f_string_found = 0;
|
||||
int unicode_string_found = 0;
|
||||
int bytes_found = 0;
|
||||
PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
|
||||
|
||||
Py_ssize_t i = 0;
|
||||
Py_ssize_t n_flattened_elements = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
/* Bytes literals never get a kind, but just for consistency
|
||||
since they are represented as Constant nodes, we'll mirror
|
||||
the same behavior as unicode strings for determining the
|
||||
kind. */
|
||||
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
expr_ty elem = asdl_seq_GET(strings, i);
|
||||
switch(elem->kind) {
|
||||
case Constant_kind:
|
||||
if (PyBytes_CheckExact(elem->v.Constant.value)) {
|
||||
bytes_found = 1;
|
||||
} else {
|
||||
unicode_string_found = 1;
|
||||
}
|
||||
n_flattened_elements++;
|
||||
break;
|
||||
case JoinedStr_kind:
|
||||
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
|
||||
f_string_found = 1;
|
||||
break;
|
||||
default:
|
||||
n_flattened_elements++;
|
||||
f_string_found = 1;
|
||||
break;
|
||||
}
|
||||
PyBytes_Concat(&res, elem->v.Constant.value);
|
||||
}
|
||||
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
|
||||
Py_XDECREF(res);
|
||||
return NULL;
|
||||
}
|
||||
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||
}
|
||||
|
||||
if ((unicode_string_found || f_string_found) && bytes_found) {
|
||||
RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
|
||||
static expr_ty
|
||||
_build_concatenated_unicode(Parser *p, asdl_expr_seq *strings, int lineno,
|
||||
int col_offset, int end_lineno, int end_col_offset,
|
||||
PyArena *arena)
|
||||
{
|
||||
Py_ssize_t len = asdl_seq_LEN(strings);
|
||||
assert(len > 1);
|
||||
|
||||
expr_ty first = asdl_seq_GET(strings, 0);
|
||||
|
||||
/* When a string is getting concatenated, the kind of the string
|
||||
is determined by the first string in the concatenation
|
||||
sequence.
|
||||
|
||||
u"abc" "def" -> u"abcdef"
|
||||
"abc" u"abc" -> "abcabc" */
|
||||
PyObject *kind = first->v.Constant.kind;
|
||||
|
||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||
if (writer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bytes_found) {
|
||||
PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
expr_ty current_elem = asdl_seq_GET(strings, i);
|
||||
assert(current_elem->kind == Constant_kind);
|
||||
|
||||
/* Bytes literals never get a kind, but just for consistency
|
||||
since they are represented as Constant nodes, we'll mirror
|
||||
the same behavior as unicode strings for determining the
|
||||
kind. */
|
||||
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
|
||||
for (i = 0; i < len; i++) {
|
||||
expr_ty elem = asdl_seq_GET(strings, i);
|
||||
PyBytes_Concat(&res, elem->v.Constant.value);
|
||||
}
|
||||
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
|
||||
Py_XDECREF(res);
|
||||
if (PyUnicodeWriter_WriteStr(writer,
|
||||
current_elem->v.Constant.value)) {
|
||||
PyUnicodeWriter_Discard(writer);
|
||||
return NULL;
|
||||
}
|
||||
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||
}
|
||||
|
||||
if (!f_string_found && len == 1) {
|
||||
return asdl_seq_GET(strings, 0);
|
||||
PyObject *final = PyUnicodeWriter_Finish(writer);
|
||||
if (final == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (_PyArena_AddPyObject(p->arena, final) < 0) {
|
||||
Py_DECREF(final);
|
||||
return NULL;
|
||||
}
|
||||
return _PyAST_Constant(final, kind, lineno, col_offset,
|
||||
end_lineno, end_col_offset, arena);
|
||||
}
|
||||
|
||||
static asdl_expr_seq *
|
||||
_build_concatenated_str(Parser *p, asdl_expr_seq *strings,
|
||||
int lineno, int col_offset, int end_lineno,
|
||||
int end_col_offset, PyArena *arena)
|
||||
{
|
||||
Py_ssize_t len = asdl_seq_LEN(strings);
|
||||
assert(len > 0);
|
||||
|
||||
Py_ssize_t n_flattened_elements = 0;
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
expr_ty elem = asdl_seq_GET(strings, i);
|
||||
switch(elem->kind) {
|
||||
case JoinedStr_kind:
|
||||
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
|
||||
break;
|
||||
case TemplateStr_kind:
|
||||
n_flattened_elements += asdl_seq_LEN(elem->v.TemplateStr.values);
|
||||
break;
|
||||
default:
|
||||
n_flattened_elements++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
|
||||
if (flattened == NULL) {
|
||||
|
@ -1574,12 +1700,11 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
|
||||
/* build flattened list */
|
||||
Py_ssize_t current_pos = 0;
|
||||
Py_ssize_t j = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
expr_ty elem = asdl_seq_GET(strings, i);
|
||||
switch(elem->kind) {
|
||||
case JoinedStr_kind:
|
||||
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
|
||||
for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
|
||||
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
|
||||
if (subvalue == NULL) {
|
||||
return NULL;
|
||||
|
@ -1587,6 +1712,15 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
asdl_seq_SET(flattened, current_pos++, subvalue);
|
||||
}
|
||||
break;
|
||||
case TemplateStr_kind:
|
||||
for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.TemplateStr.values); j++) {
|
||||
expr_ty subvalue = asdl_seq_GET(elem->v.TemplateStr.values, j);
|
||||
if (subvalue == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
asdl_seq_SET(flattened, current_pos++, subvalue);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
asdl_seq_SET(flattened, current_pos++, elem);
|
||||
break;
|
||||
|
@ -1596,13 +1730,13 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
/* calculate folded element count */
|
||||
Py_ssize_t n_elements = 0;
|
||||
int prev_is_constant = 0;
|
||||
for (i = 0; i < n_flattened_elements; i++) {
|
||||
for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
|
||||
expr_ty elem = asdl_seq_GET(flattened, i);
|
||||
|
||||
/* The concatenation of a FormattedValue and an empty Constant should
|
||||
lead to the FormattedValue itself. Thus, we will not take any empty
|
||||
constants into account, just as in `_PyPegen_joined_str` */
|
||||
if (f_string_found && elem->kind == Constant_kind &&
|
||||
if (elem->kind == Constant_kind &&
|
||||
PyUnicode_CheckExact(elem->v.Constant.value) &&
|
||||
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
|
||||
continue;
|
||||
|
@ -1620,7 +1754,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
|
||||
/* build folded list */
|
||||
current_pos = 0;
|
||||
for (i = 0; i < n_flattened_elements; i++) {
|
||||
for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
|
||||
expr_ty elem = asdl_seq_GET(flattened, i);
|
||||
|
||||
/* if the current elem and the following are constants,
|
||||
|
@ -1643,6 +1777,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
return NULL;
|
||||
}
|
||||
expr_ty last_elem = elem;
|
||||
Py_ssize_t j;
|
||||
for (j = i; j < n_flattened_elements; j++) {
|
||||
expr_ty current_elem = asdl_seq_GET(flattened, j);
|
||||
if (current_elem->kind == Constant_kind) {
|
||||
|
@ -1676,8 +1811,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
}
|
||||
|
||||
/* Drop all empty contanst strings */
|
||||
if (f_string_found &&
|
||||
PyUnicode_CheckExact(elem->v.Constant.value) &&
|
||||
if (PyUnicode_CheckExact(elem->v.Constant.value) &&
|
||||
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1686,15 +1820,95 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
|||
asdl_seq_SET(values, current_pos++, elem);
|
||||
}
|
||||
|
||||
if (!f_string_found) {
|
||||
assert(n_elements == 1);
|
||||
expr_ty elem = asdl_seq_GET(values, 0);
|
||||
assert(elem->kind == Constant_kind);
|
||||
return elem;
|
||||
assert(current_pos == n_elements);
|
||||
return values;
|
||||
}
|
||||
|
||||
static expr_ty
|
||||
_build_concatenated_joined_str(Parser *p, asdl_expr_seq *strings,
|
||||
int lineno, int col_offset, int end_lineno,
|
||||
int end_col_offset, PyArena *arena)
|
||||
{
|
||||
asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
|
||||
col_offset, end_lineno, end_col_offset, arena);
|
||||
return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||
}
|
||||
|
||||
static expr_ty
|
||||
_build_concatenated_template_str(Parser *p, asdl_expr_seq *strings,
|
||||
int lineno, int col_offset, int end_lineno,
|
||||
int end_col_offset, PyArena *arena)
|
||||
{
|
||||
asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
|
||||
col_offset, end_lineno, end_col_offset, arena);
|
||||
return _PyAST_TemplateStr(values, lineno, col_offset, end_lineno,
|
||||
end_col_offset, arena);
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
||||
int lineno, int col_offset, int end_lineno,
|
||||
int end_col_offset, PyArena *arena)
|
||||
{
|
||||
Py_ssize_t len = asdl_seq_LEN(strings);
|
||||
assert(len > 0);
|
||||
|
||||
int t_string_found = 0;
|
||||
int f_string_found = 0;
|
||||
int unicode_string_found = 0;
|
||||
int bytes_found = 0;
|
||||
|
||||
Py_ssize_t i = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
expr_ty elem = asdl_seq_GET(strings, i);
|
||||
switch(elem->kind) {
|
||||
case Constant_kind:
|
||||
if (PyBytes_CheckExact(elem->v.Constant.value)) {
|
||||
bytes_found = 1;
|
||||
} else {
|
||||
unicode_string_found = 1;
|
||||
}
|
||||
break;
|
||||
case JoinedStr_kind:
|
||||
f_string_found = 1;
|
||||
break;
|
||||
case TemplateStr_kind:
|
||||
t_string_found = 1;
|
||||
break;
|
||||
default:
|
||||
f_string_found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(current_pos == n_elements);
|
||||
return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||
// Cannot mix unicode and bytes
|
||||
if ((unicode_string_found || f_string_found || t_string_found) && bytes_found) {
|
||||
RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// If it's only bytes or only unicode string, do a simple concat
|
||||
if (!f_string_found && !t_string_found) {
|
||||
if (len == 1) {
|
||||
return asdl_seq_GET(strings, 0);
|
||||
}
|
||||
else if (bytes_found) {
|
||||
return _build_concatenated_bytes(p, strings, lineno, col_offset,
|
||||
end_lineno, end_col_offset, arena);
|
||||
}
|
||||
else {
|
||||
return _build_concatenated_unicode(p, strings, lineno, col_offset,
|
||||
end_lineno, end_col_offset, arena);
|
||||
}
|
||||
}
|
||||
|
||||
if (t_string_found) {
|
||||
return _build_concatenated_template_str(p, strings, lineno,
|
||||
col_offset, end_lineno, end_col_offset, arena);
|
||||
}
|
||||
|
||||
return _build_concatenated_joined_str(p, strings, lineno,
|
||||
col_offset, end_lineno, end_col_offset, arena);
|
||||
}
|
||||
|
||||
stmt_ty
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue