mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-102856: Initial implementation of PEP 701 (#102855)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com> Co-authored-by: Marta Gómez Macías <mgmacias@google.com> Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
This commit is contained in:
parent
a6b07b5a34
commit
1ef61cf71a
27 changed files with 8859 additions and 6573 deletions
10
Doc/library/token-list.inc
generated
10
Doc/library/token-list.inc
generated
|
@ -201,6 +201,10 @@
|
||||||
|
|
||||||
Token value for ``":="``.
|
Token value for ``":="``.
|
||||||
|
|
||||||
|
.. data:: EXCLAMATION
|
||||||
|
|
||||||
|
Token value for ``"!"``.
|
||||||
|
|
||||||
.. data:: OP
|
.. data:: OP
|
||||||
|
|
||||||
.. data:: AWAIT
|
.. data:: AWAIT
|
||||||
|
@ -213,6 +217,12 @@
|
||||||
|
|
||||||
.. data:: SOFT_KEYWORD
|
.. data:: SOFT_KEYWORD
|
||||||
|
|
||||||
|
.. data:: FSTRING_START
|
||||||
|
|
||||||
|
.. data:: FSTRING_MIDDLE
|
||||||
|
|
||||||
|
.. data:: FSTRING_END
|
||||||
|
|
||||||
.. data:: ERRORTOKEN
|
.. data:: ERRORTOKEN
|
||||||
|
|
||||||
.. data:: N_TOKENS
|
.. data:: N_TOKENS
|
||||||
|
|
|
@ -53,6 +53,7 @@ ATEQUAL '@='
|
||||||
RARROW '->'
|
RARROW '->'
|
||||||
ELLIPSIS '...'
|
ELLIPSIS '...'
|
||||||
COLONEQUAL ':='
|
COLONEQUAL ':='
|
||||||
|
EXCLAMATION '!'
|
||||||
|
|
||||||
OP
|
OP
|
||||||
AWAIT
|
AWAIT
|
||||||
|
@ -60,6 +61,9 @@ ASYNC
|
||||||
TYPE_IGNORE
|
TYPE_IGNORE
|
||||||
TYPE_COMMENT
|
TYPE_COMMENT
|
||||||
SOFT_KEYWORD
|
SOFT_KEYWORD
|
||||||
|
FSTRING_START
|
||||||
|
FSTRING_MIDDLE
|
||||||
|
FSTRING_END
|
||||||
ERRORTOKEN
|
ERRORTOKEN
|
||||||
|
|
||||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||||
|
|
|
@ -194,7 +194,7 @@ yield_stmt[stmt_ty]: y=yield_expr { _PyAST_Expr(y, EXTRA) }
|
||||||
|
|
||||||
assert_stmt[stmt_ty]: 'assert' a=expression b=[',' z=expression { z }] { _PyAST_Assert(a, b, EXTRA) }
|
assert_stmt[stmt_ty]: 'assert' a=expression b=[',' z=expression { z }] { _PyAST_Assert(a, b, EXTRA) }
|
||||||
|
|
||||||
import_stmt[stmt_ty]:
|
import_stmt[stmt_ty]:
|
||||||
| invalid_import
|
| invalid_import
|
||||||
| import_name
|
| import_name
|
||||||
| import_from
|
| import_from
|
||||||
|
@ -415,8 +415,8 @@ try_stmt[stmt_ty]:
|
||||||
| invalid_try_stmt
|
| invalid_try_stmt
|
||||||
| 'try' &&':' b=block f=finally_block { _PyAST_Try(b, NULL, NULL, f, EXTRA) }
|
| 'try' &&':' b=block f=finally_block { _PyAST_Try(b, NULL, NULL, f, EXTRA) }
|
||||||
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _PyAST_Try(b, ex, el, f, EXTRA) }
|
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _PyAST_Try(b, ex, el, f, EXTRA) }
|
||||||
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_star_block+ el=[else_block] f=[finally_block] {
|
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_star_block+ el=[else_block] f=[finally_block] {
|
||||||
CHECK_VERSION(stmt_ty, 11, "Exception groups are",
|
CHECK_VERSION(stmt_ty, 11, "Exception groups are",
|
||||||
_PyAST_TryStar(b, ex, el, f, EXTRA)) }
|
_PyAST_TryStar(b, ex, el, f, EXTRA)) }
|
||||||
|
|
||||||
|
|
||||||
|
@ -807,7 +807,7 @@ atom[expr_ty]:
|
||||||
| 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
|
| 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
|
||||||
| 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
|
| 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
|
||||||
| 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) }
|
| 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) }
|
||||||
| &STRING strings
|
| &(STRING|FSTRING_START) strings
|
||||||
| NUMBER
|
| NUMBER
|
||||||
| &'(' (tuple | group | genexp)
|
| &'(' (tuple | group | genexp)
|
||||||
| &'[' (list | listcomp)
|
| &'[' (list | listcomp)
|
||||||
|
@ -877,7 +877,26 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) }
|
||||||
# LITERALS
|
# LITERALS
|
||||||
# ========
|
# ========
|
||||||
|
|
||||||
strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
|
fstring_middle[expr_ty]:
|
||||||
|
| fstring_replacement_field
|
||||||
|
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||||
|
fstring_replacement_field[expr_ty]:
|
||||||
|
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
|
||||||
|
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
|
||||||
|
}
|
||||||
|
| invalid_replacement_field
|
||||||
|
fstring_conversion[expr_ty]:
|
||||||
|
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
|
||||||
|
fstring_full_format_spec[expr_ty]:
|
||||||
|
| ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
|
||||||
|
fstring_format_spec[expr_ty]:
|
||||||
|
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||||
|
| fstring_replacement_field
|
||||||
|
fstring[expr_ty]:
|
||||||
|
| a=FSTRING_START b=fstring_middle* c=FSTRING_END { _PyPegen_joined_str(p, a, (asdl_expr_seq*)b, c) }
|
||||||
|
|
||||||
|
string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) }
|
||||||
|
strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string)+ { _PyPegen_concatenate_strings(p, a, EXTRA) }
|
||||||
|
|
||||||
list[expr_ty]:
|
list[expr_ty]:
|
||||||
| '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }
|
| '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }
|
||||||
|
@ -1118,6 +1137,8 @@ invalid_expression:
|
||||||
_PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL :
|
_PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL :
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
|
||||||
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
|
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
|
||||||
|
| a='lambda' [lambda_params] b=':' &(FSTRING_MIDDLE | fstring_replacement_field) {
|
||||||
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "f-string: lambda expressions are not allowed without parentheses") }
|
||||||
|
|
||||||
invalid_named_expression(memo):
|
invalid_named_expression(memo):
|
||||||
| a=expression ':=' expression {
|
| a=expression ':=' expression {
|
||||||
|
@ -1241,7 +1262,7 @@ invalid_group:
|
||||||
invalid_import:
|
invalid_import:
|
||||||
| a='import' dotted_name 'from' dotted_name {
|
| a='import' dotted_name 'from' dotted_name {
|
||||||
RAISE_SYNTAX_ERROR_STARTING_FROM(a, "Did you mean to use 'from ... import ...' instead?") }
|
RAISE_SYNTAX_ERROR_STARTING_FROM(a, "Did you mean to use 'from ... import ...' instead?") }
|
||||||
|
|
||||||
invalid_import_from_targets:
|
invalid_import_from_targets:
|
||||||
| import_from_as_names ',' NEWLINE {
|
| import_from_as_names ',' NEWLINE {
|
||||||
RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") }
|
RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") }
|
||||||
|
@ -1335,3 +1356,24 @@ invalid_kvpair:
|
||||||
| expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
|
| expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
|
||||||
invalid_starred_expression:
|
invalid_starred_expression:
|
||||||
| a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
|
| a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
|
||||||
|
invalid_replacement_field:
|
||||||
|
| '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '='") }
|
||||||
|
| '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '!'") }
|
||||||
|
| '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before ':'") }
|
||||||
|
| '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '}'") }
|
||||||
|
| '{' !(yield_expr | star_expressions) { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting a valid expression after '{'")}
|
||||||
|
| '{' (yield_expr | star_expressions) !('=' | '!' | ':' | '}') {
|
||||||
|
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '=', or '!', or ':', or '}'") }
|
||||||
|
| '{' (yield_expr | star_expressions) '=' !('!' | ':' | '}') {
|
||||||
|
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '!', or ':', or '}'") }
|
||||||
|
| '{' (yield_expr | star_expressions) '='? invalid_conversion_character
|
||||||
|
| '{' (yield_expr | star_expressions) '='? ['!' NAME] !(':' | '}') {
|
||||||
|
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting ':' or '}'") }
|
||||||
|
| '{' (yield_expr | star_expressions) '='? ['!' NAME] ':' fstring_format_spec* !'}' {
|
||||||
|
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}', or format specs") }
|
||||||
|
| '{' (yield_expr | star_expressions) '='? ['!' NAME] !'}' {
|
||||||
|
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}'") }
|
||||||
|
|
||||||
|
invalid_conversion_character:
|
||||||
|
| '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: missing conversion character") }
|
||||||
|
| '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: invalid conversion character") }
|
||||||
|
|
|
@ -67,14 +67,18 @@ extern "C" {
|
||||||
#define RARROW 51
|
#define RARROW 51
|
||||||
#define ELLIPSIS 52
|
#define ELLIPSIS 52
|
||||||
#define COLONEQUAL 53
|
#define COLONEQUAL 53
|
||||||
#define OP 54
|
#define EXCLAMATION 54
|
||||||
#define AWAIT 55
|
#define OP 55
|
||||||
#define ASYNC 56
|
#define AWAIT 56
|
||||||
#define TYPE_IGNORE 57
|
#define ASYNC 57
|
||||||
#define TYPE_COMMENT 58
|
#define TYPE_IGNORE 58
|
||||||
#define SOFT_KEYWORD 59
|
#define TYPE_COMMENT 59
|
||||||
#define ERRORTOKEN 60
|
#define SOFT_KEYWORD 60
|
||||||
#define N_TOKENS 64
|
#define FSTRING_START 61
|
||||||
|
#define FSTRING_MIDDLE 62
|
||||||
|
#define FSTRING_END 63
|
||||||
|
#define ERRORTOKEN 64
|
||||||
|
#define N_TOKENS 68
|
||||||
#define NT_OFFSET 256
|
#define NT_OFFSET 256
|
||||||
|
|
||||||
/* Special definitions for cooperation with parser */
|
/* Special definitions for cooperation with parser */
|
||||||
|
@ -86,6 +90,8 @@ extern "C" {
|
||||||
(x) == NEWLINE || \
|
(x) == NEWLINE || \
|
||||||
(x) == INDENT || \
|
(x) == INDENT || \
|
||||||
(x) == DEDENT)
|
(x) == DEDENT)
|
||||||
|
#define ISSTRINGLIT(x) ((x) == STRING || \
|
||||||
|
(x) == FSTRING_MIDDLE)
|
||||||
|
|
||||||
|
|
||||||
// Symbols exported for test_peg_generator
|
// Symbols exported for test_peg_generator
|
||||||
|
|
|
@ -774,11 +774,6 @@ class AST_Tests(unittest.TestCase):
|
||||||
ast.parse('with (CtxManager() as example): ...', feature_version=(3, 8))
|
ast.parse('with (CtxManager() as example): ...', feature_version=(3, 8))
|
||||||
ast.parse('with CtxManager() as example: ...', feature_version=(3, 8))
|
ast.parse('with CtxManager() as example: ...', feature_version=(3, 8))
|
||||||
|
|
||||||
def test_debug_f_string_feature_version(self):
|
|
||||||
ast.parse('f"{x=}"', feature_version=(3, 8))
|
|
||||||
with self.assertRaises(SyntaxError):
|
|
||||||
ast.parse('f"{x=}"', feature_version=(3, 7))
|
|
||||||
|
|
||||||
def test_assignment_expression_feature_version(self):
|
def test_assignment_expression_feature_version(self):
|
||||||
ast.parse('(x := 0)', feature_version=(3, 8))
|
ast.parse('(x := 0)', feature_version=(3, 8))
|
||||||
with self.assertRaises(SyntaxError):
|
with self.assertRaises(SyntaxError):
|
||||||
|
|
|
@ -636,9 +636,9 @@ class CmdLineTest(unittest.TestCase):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
stderr.splitlines()[-3:],
|
stderr.splitlines()[-3:],
|
||||||
[
|
[
|
||||||
b' foo"""',
|
b' foo = f"""{}',
|
||||||
b' ^',
|
b' ^',
|
||||||
b'SyntaxError: f-string: empty expression not allowed',
|
b'SyntaxError: f-string: valid expression required before \'}\'',
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import sys
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import os_helper
|
from test.support import os_helper
|
||||||
from test.support import script_helper
|
from test.support import script_helper
|
||||||
|
from test.support import warnings_helper
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
class EOFTestCase(unittest.TestCase):
|
class EOFTestCase(unittest.TestCase):
|
||||||
|
@ -36,10 +37,11 @@ class EOFTestCase(unittest.TestCase):
|
||||||
rc, out, err = script_helper.assert_python_failure(file_name)
|
rc, out, err = script_helper.assert_python_failure(file_name)
|
||||||
self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
|
self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
|
||||||
|
|
||||||
|
@warnings_helper.ignore_warnings(category=SyntaxWarning)
|
||||||
def test_eof_with_line_continuation(self):
|
def test_eof_with_line_continuation(self):
|
||||||
expect = "unexpected EOF while parsing (<string>, line 1)"
|
expect = "unexpected EOF while parsing (<string>, line 1)"
|
||||||
try:
|
try:
|
||||||
compile('"\\xhh" \\', '<string>', 'exec', dont_inherit=True)
|
compile('"\\Xhh" \\', '<string>', 'exec')
|
||||||
except SyntaxError as msg:
|
except SyntaxError as msg:
|
||||||
self.assertEqual(str(msg), expect)
|
self.assertEqual(str(msg), expect)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -155,6 +155,7 @@ class ExceptionTests(unittest.TestCase):
|
||||||
|
|
||||||
ckmsg(s, "'continue' not properly in loop")
|
ckmsg(s, "'continue' not properly in loop")
|
||||||
ckmsg("continue\n", "'continue' not properly in loop")
|
ckmsg("continue\n", "'continue' not properly in loop")
|
||||||
|
ckmsg("f'{6 0}'", "invalid syntax. Perhaps you forgot a comma?")
|
||||||
|
|
||||||
def testSyntaxErrorMissingParens(self):
|
def testSyntaxErrorMissingParens(self):
|
||||||
def ckmsg(src, msg, exception=SyntaxError):
|
def ckmsg(src, msg, exception=SyntaxError):
|
||||||
|
@ -227,7 +228,7 @@ class ExceptionTests(unittest.TestCase):
|
||||||
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
|
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
|
||||||
check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
|
check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
|
||||||
2, 19, encoding='cp1251')
|
2, 19, encoding='cp1251')
|
||||||
check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
|
check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 10)
|
||||||
check('x = "a', 1, 5)
|
check('x = "a', 1, 5)
|
||||||
check('lambda x: x = 2', 1, 1)
|
check('lambda x: x = 2', 1, 1)
|
||||||
check('f{a + b + c}', 1, 2)
|
check('f{a + b + c}', 1, 2)
|
||||||
|
|
|
@ -329,13 +329,13 @@ non-important content
|
||||||
self.assertEqual(t.body[1].lineno, 3)
|
self.assertEqual(t.body[1].lineno, 3)
|
||||||
self.assertEqual(t.body[1].value.lineno, 3)
|
self.assertEqual(t.body[1].value.lineno, 3)
|
||||||
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
||||||
self.assertEqual(t.body[1].value.values[1].lineno, 3)
|
self.assertEqual(t.body[1].value.values[1].lineno, 4)
|
||||||
self.assertEqual(t.body[1].value.values[2].lineno, 3)
|
self.assertEqual(t.body[1].value.values[2].lineno, 6)
|
||||||
self.assertEqual(t.body[1].col_offset, 0)
|
self.assertEqual(t.body[1].col_offset, 0)
|
||||||
self.assertEqual(t.body[1].value.col_offset, 0)
|
self.assertEqual(t.body[1].value.col_offset, 0)
|
||||||
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
|
self.assertEqual(t.body[1].value.values[0].col_offset, 4)
|
||||||
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
|
self.assertEqual(t.body[1].value.values[1].col_offset, 2)
|
||||||
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
|
self.assertEqual(t.body[1].value.values[2].col_offset, 11)
|
||||||
# NOTE: the following lineno information and col_offset is correct for
|
# NOTE: the following lineno information and col_offset is correct for
|
||||||
# expressions within FormattedValues.
|
# expressions within FormattedValues.
|
||||||
binop = t.body[1].value.values[1].value
|
binop = t.body[1].value.values[1].value
|
||||||
|
@ -366,13 +366,13 @@ a = f'''
|
||||||
self.assertEqual(t.body[0].lineno, 2)
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
self.assertEqual(t.body[0].value.lineno, 2)
|
self.assertEqual(t.body[0].value.lineno, 2)
|
||||||
self.assertEqual(t.body[0].value.values[0].lineno, 2)
|
self.assertEqual(t.body[0].value.values[0].lineno, 2)
|
||||||
self.assertEqual(t.body[0].value.values[1].lineno, 2)
|
self.assertEqual(t.body[0].value.values[1].lineno, 3)
|
||||||
self.assertEqual(t.body[0].value.values[2].lineno, 2)
|
self.assertEqual(t.body[0].value.values[2].lineno, 3)
|
||||||
self.assertEqual(t.body[0].col_offset, 0)
|
self.assertEqual(t.body[0].col_offset, 0)
|
||||||
self.assertEqual(t.body[0].value.col_offset, 4)
|
self.assertEqual(t.body[0].value.col_offset, 4)
|
||||||
self.assertEqual(t.body[0].value.values[0].col_offset, 4)
|
self.assertEqual(t.body[0].value.values[0].col_offset, 8)
|
||||||
self.assertEqual(t.body[0].value.values[1].col_offset, 4)
|
self.assertEqual(t.body[0].value.values[1].col_offset, 10)
|
||||||
self.assertEqual(t.body[0].value.values[2].col_offset, 4)
|
self.assertEqual(t.body[0].value.values[2].col_offset, 17)
|
||||||
# Check {blech}
|
# Check {blech}
|
||||||
self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
|
self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
|
||||||
self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
|
self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
|
||||||
|
@ -387,6 +387,20 @@ x = (
|
||||||
t = ast.parse(expr)
|
t = ast.parse(expr)
|
||||||
self.assertEqual(type(t), ast.Module)
|
self.assertEqual(type(t), ast.Module)
|
||||||
self.assertEqual(len(t.body), 1)
|
self.assertEqual(len(t.body), 1)
|
||||||
|
# check the joinedstr location
|
||||||
|
joinedstr = t.body[0].value
|
||||||
|
self.assertEqual(type(joinedstr), ast.JoinedStr)
|
||||||
|
self.assertEqual(joinedstr.lineno, 3)
|
||||||
|
self.assertEqual(joinedstr.end_lineno, 3)
|
||||||
|
self.assertEqual(joinedstr.col_offset, 4)
|
||||||
|
self.assertEqual(joinedstr.end_col_offset, 17)
|
||||||
|
# check the formatted value location
|
||||||
|
fv = t.body[0].value.values[1]
|
||||||
|
self.assertEqual(type(fv), ast.FormattedValue)
|
||||||
|
self.assertEqual(fv.lineno, 3)
|
||||||
|
self.assertEqual(fv.end_lineno, 3)
|
||||||
|
self.assertEqual(fv.col_offset, 7)
|
||||||
|
self.assertEqual(fv.end_col_offset, 16)
|
||||||
# check the test(t) location
|
# check the test(t) location
|
||||||
call = t.body[0].value.values[1].value
|
call = t.body[0].value.values[1].value
|
||||||
self.assertEqual(type(call), ast.Call)
|
self.assertEqual(type(call), ast.Call)
|
||||||
|
@ -397,6 +411,50 @@ x = (
|
||||||
|
|
||||||
expr = """
|
expr = """
|
||||||
x = (
|
x = (
|
||||||
|
u'wat',
|
||||||
|
u"wat",
|
||||||
|
b'wat',
|
||||||
|
b"wat",
|
||||||
|
f'wat',
|
||||||
|
f"wat",
|
||||||
|
)
|
||||||
|
|
||||||
|
y = (
|
||||||
|
u'''wat''',
|
||||||
|
u\"\"\"wat\"\"\",
|
||||||
|
b'''wat''',
|
||||||
|
b\"\"\"wat\"\"\",
|
||||||
|
f'''wat''',
|
||||||
|
f\"\"\"wat\"\"\",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
x, y = t.body
|
||||||
|
|
||||||
|
# Check the single quoted string offsets first.
|
||||||
|
offsets = [
|
||||||
|
(elt.col_offset, elt.end_col_offset)
|
||||||
|
for elt in x.value.elts
|
||||||
|
]
|
||||||
|
self.assertTrue(all(
|
||||||
|
offset == (4, 10)
|
||||||
|
for offset in offsets
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check the triple quoted string offsets.
|
||||||
|
offsets = [
|
||||||
|
(elt.col_offset, elt.end_col_offset)
|
||||||
|
for elt in y.value.elts
|
||||||
|
]
|
||||||
|
self.assertTrue(all(
|
||||||
|
offset == (4, 14)
|
||||||
|
for offset in offsets
|
||||||
|
))
|
||||||
|
|
||||||
|
expr = """
|
||||||
|
x = (
|
||||||
'PERL_MM_OPT', (
|
'PERL_MM_OPT', (
|
||||||
f'wat'
|
f'wat'
|
||||||
f'some_string={f(x)} '
|
f'some_string={f(x)} '
|
||||||
|
@ -415,9 +473,9 @@ x = (
|
||||||
# check the first wat
|
# check the first wat
|
||||||
self.assertEqual(type(wat1), ast.Constant)
|
self.assertEqual(type(wat1), ast.Constant)
|
||||||
self.assertEqual(wat1.lineno, 4)
|
self.assertEqual(wat1.lineno, 4)
|
||||||
self.assertEqual(wat1.end_lineno, 6)
|
self.assertEqual(wat1.end_lineno, 5)
|
||||||
self.assertEqual(wat1.col_offset, 12)
|
self.assertEqual(wat1.col_offset, 14)
|
||||||
self.assertEqual(wat1.end_col_offset, 18)
|
self.assertEqual(wat1.end_col_offset, 26)
|
||||||
# check the call
|
# check the call
|
||||||
call = middle.value
|
call = middle.value
|
||||||
self.assertEqual(type(call), ast.Call)
|
self.assertEqual(type(call), ast.Call)
|
||||||
|
@ -427,10 +485,14 @@ x = (
|
||||||
self.assertEqual(call.end_col_offset, 31)
|
self.assertEqual(call.end_col_offset, 31)
|
||||||
# check the second wat
|
# check the second wat
|
||||||
self.assertEqual(type(wat2), ast.Constant)
|
self.assertEqual(type(wat2), ast.Constant)
|
||||||
self.assertEqual(wat2.lineno, 4)
|
self.assertEqual(wat2.lineno, 5)
|
||||||
self.assertEqual(wat2.end_lineno, 6)
|
self.assertEqual(wat2.end_lineno, 6)
|
||||||
self.assertEqual(wat2.col_offset, 12)
|
self.assertEqual(wat2.col_offset, 32)
|
||||||
self.assertEqual(wat2.end_col_offset, 18)
|
# wat ends at the offset 17, but the whole f-string
|
||||||
|
# ends at the offset 18 (since the quote is part of the
|
||||||
|
# f-string but not the wat string)
|
||||||
|
self.assertEqual(wat2.end_col_offset, 17)
|
||||||
|
self.assertEqual(fstring.end_col_offset, 18)
|
||||||
|
|
||||||
def test_docstring(self):
|
def test_docstring(self):
|
||||||
def f():
|
def f():
|
||||||
|
@ -467,7 +529,7 @@ x = (
|
||||||
self.assertEqual(f' ', ' ')
|
self.assertEqual(f' ', ' ')
|
||||||
|
|
||||||
def test_unterminated_string(self):
|
def test_unterminated_string(self):
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
|
self.assertAllRaise(SyntaxError, 'unterminated string',
|
||||||
[r"""f'{"x'""",
|
[r"""f'{"x'""",
|
||||||
r"""f'{"x}'""",
|
r"""f'{"x}'""",
|
||||||
r"""f'{("x'""",
|
r"""f'{("x'""",
|
||||||
|
@ -475,28 +537,33 @@ x = (
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_mismatched_parens(self):
|
def test_mismatched_parens(self):
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
|
self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
|
||||||
r"does not match opening parenthesis '\('",
|
r"does not match opening parenthesis '\('",
|
||||||
["f'{((}'",
|
["f'{((}'",
|
||||||
])
|
])
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\)' "
|
self.assertAllRaise(SyntaxError, r"closing parenthesis '\)' "
|
||||||
r"does not match opening parenthesis '\['",
|
r"does not match opening parenthesis '\['",
|
||||||
["f'{a[4)}'",
|
["f'{a[4)}'",
|
||||||
])
|
])
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\]' "
|
self.assertAllRaise(SyntaxError, r"closing parenthesis '\]' "
|
||||||
r"does not match opening parenthesis '\('",
|
r"does not match opening parenthesis '\('",
|
||||||
["f'{a(4]}'",
|
["f'{a(4]}'",
|
||||||
])
|
])
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
|
self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
|
||||||
r"does not match opening parenthesis '\['",
|
r"does not match opening parenthesis '\['",
|
||||||
["f'{a[4}'",
|
["f'{a[4}'",
|
||||||
])
|
])
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: closing parenthesis '\}' "
|
self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' "
|
||||||
r"does not match opening parenthesis '\('",
|
r"does not match opening parenthesis '\('",
|
||||||
["f'{a(4}'",
|
["f'{a(4}'",
|
||||||
])
|
])
|
||||||
self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'")
|
self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'")
|
||||||
|
|
||||||
|
def test_fstring_nested_too_deeply(self):
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: expressions nested too deeply",
|
||||||
|
['f"{1+2:{1+2:{1+1:{1}}}}"'])
|
||||||
|
|
||||||
def test_double_braces(self):
|
def test_double_braces(self):
|
||||||
self.assertEqual(f'{{', '{')
|
self.assertEqual(f'{{', '{')
|
||||||
self.assertEqual(f'a{{', 'a{')
|
self.assertEqual(f'a{{', 'a{')
|
||||||
|
@ -559,8 +626,14 @@ x = (
|
||||||
self.assertEqual(f'' '' f'', '')
|
self.assertEqual(f'' '' f'', '')
|
||||||
self.assertEqual(f'' '' f'' '', '')
|
self.assertEqual(f'' '' f'' '', '')
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
|
# This is not really [f'{'] + [f'}'] since we treat the inside
|
||||||
["f'{3' f'}'", # can't concat to get a valid f-string
|
# of braces as a purely new context, so it is actually f'{ and
|
||||||
|
# then eval(' f') (a valid expression) and then }' which would
|
||||||
|
# constitute a valid f-string.
|
||||||
|
self.assertEqual(f'{' f'}', ' f')
|
||||||
|
|
||||||
|
self.assertAllRaise(SyntaxError, "expecting '}'",
|
||||||
|
['''f'{3' f"}"''', # can't concat to get a valid f-string
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
|
@ -618,25 +691,19 @@ x = (
|
||||||
self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa')
|
self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa')
|
||||||
self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa')
|
self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa')
|
||||||
self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
|
self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
|
||||||
|
self.assertEqual(f'result: {value:{width:{0}}.{precision:1}}', 'result: 12.35')
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError,
|
self.assertAllRaise(SyntaxError, "f-string: expecting ':' or '}'",
|
||||||
"""f-string: invalid conversion character 'r{"': """
|
|
||||||
"""expected 's', 'r', or 'a'""",
|
|
||||||
["""f'{"s"!r{":10"}}'""",
|
["""f'{"s"!r{":10"}}'""",
|
||||||
|
|
||||||
# This looks like a nested format spec.
|
# This looks like a nested format spec.
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: invalid syntax",
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: expecting a valid expression after '{'",
|
||||||
[# Invalid syntax inside a nested spec.
|
[# Invalid syntax inside a nested spec.
|
||||||
"f'{4:{/5}}'",
|
"f'{4:{/5}}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
|
|
||||||
[# Can't nest format specifiers.
|
|
||||||
"f'result: {value:{width:{0}}.{precision:1}}'",
|
|
||||||
])
|
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
|
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
|
||||||
[# No expansion inside conversion or for
|
[# No expansion inside conversion or for
|
||||||
# the : or ! itself.
|
# the : or ! itself.
|
||||||
|
@ -655,7 +722,8 @@ x = (
|
||||||
self.assertEqual(f'{x} {x}', '1 2')
|
self.assertEqual(f'{x} {x}', '1 2')
|
||||||
|
|
||||||
def test_missing_expression(self):
|
def test_missing_expression(self):
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: valid expression required before '}'",
|
||||||
["f'{}'",
|
["f'{}'",
|
||||||
"f'{ }'"
|
"f'{ }'"
|
||||||
"f' {} '",
|
"f' {} '",
|
||||||
|
@ -667,8 +735,8 @@ x = (
|
||||||
"f'''{\t\f\r\n}'''",
|
"f'''{\t\f\r\n}'''",
|
||||||
])
|
])
|
||||||
|
|
||||||
# Different error messages are raised when a specifier ('!', ':' or '=') is used after an empty expression
|
self.assertAllRaise(SyntaxError,
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expression required before '!'",
|
"f-string: valid expression required before '!'",
|
||||||
["f'{!r}'",
|
["f'{!r}'",
|
||||||
"f'{ !r}'",
|
"f'{ !r}'",
|
||||||
"f'{!}'",
|
"f'{!}'",
|
||||||
|
@ -689,7 +757,8 @@ x = (
|
||||||
"f'{ !xr:a}'",
|
"f'{ !xr:a}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expression required before ':'",
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: valid expression required before ':'",
|
||||||
["f'{:}'",
|
["f'{:}'",
|
||||||
"f'{ :!}'",
|
"f'{ :!}'",
|
||||||
"f'{:2}'",
|
"f'{:2}'",
|
||||||
|
@ -697,7 +766,8 @@ x = (
|
||||||
"f'{:'",
|
"f'{:'",
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expression required before '='",
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: valid expression required before '='",
|
||||||
["f'{=}'",
|
["f'{=}'",
|
||||||
"f'{ =}'",
|
"f'{ =}'",
|
||||||
"f'{ =:}'",
|
"f'{ =:}'",
|
||||||
|
@ -715,24 +785,18 @@ x = (
|
||||||
def test_parens_in_expressions(self):
|
def test_parens_in_expressions(self):
|
||||||
self.assertEqual(f'{3,}', '(3,)')
|
self.assertEqual(f'{3,}', '(3,)')
|
||||||
|
|
||||||
# Add these because when an expression is evaluated, parens
|
self.assertAllRaise(SyntaxError,
|
||||||
# are added around it. But we shouldn't go from an invalid
|
"f-string: expecting a valid expression after '{'",
|
||||||
# expression to a valid one. The added parens are just
|
|
||||||
# supposed to allow whitespace (including newlines).
|
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: invalid syntax',
|
|
||||||
["f'{,}'",
|
["f'{,}'",
|
||||||
"f'{,}'", # this is (,), which is an error
|
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, r"f-string: unmatched '\)'",
|
self.assertAllRaise(SyntaxError, r"f-string: unmatched '\)'",
|
||||||
["f'{3)+(4}'",
|
["f'{3)+(4}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, 'unterminated string literal',
|
|
||||||
["f'{\n}'",
|
|
||||||
])
|
|
||||||
def test_newlines_before_syntax_error(self):
|
def test_newlines_before_syntax_error(self):
|
||||||
self.assertAllRaise(SyntaxError, "invalid syntax",
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: expecting a valid expression after '{'",
|
||||||
["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"])
|
["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"])
|
||||||
|
|
||||||
def test_backslashes_in_string_part(self):
|
def test_backslashes_in_string_part(self):
|
||||||
|
@ -776,7 +840,7 @@ x = (
|
||||||
self.assertEqual(f'2\x203', '2 3')
|
self.assertEqual(f'2\x203', '2 3')
|
||||||
self.assertEqual(f'\x203', ' 3')
|
self.assertEqual(f'\x203', ' 3')
|
||||||
|
|
||||||
with self.assertWarns(SyntaxWarning): # invalid escape sequence
|
with self.assertWarns(DeprecationWarning): # invalid escape sequence
|
||||||
value = eval(r"f'\{6*7}'")
|
value = eval(r"f'\{6*7}'")
|
||||||
self.assertEqual(value, '\\42')
|
self.assertEqual(value, '\\42')
|
||||||
self.assertEqual(f'\\{6*7}', '\\42')
|
self.assertEqual(f'\\{6*7}', '\\42')
|
||||||
|
@ -809,18 +873,40 @@ x = (
|
||||||
r"'\N{GREEK CAPITAL LETTER DELTA'",
|
r"'\N{GREEK CAPITAL LETTER DELTA'",
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_no_backslashes_in_expression_part(self):
|
def test_backslashes_in_expression_part(self):
|
||||||
self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
|
self.assertEqual(f"{(
|
||||||
[r"f'{\'a\'}'",
|
1 +
|
||||||
r"f'{\t3}'",
|
2
|
||||||
r"f'{\}'",
|
)}", "3")
|
||||||
r"rf'{\'a\'}'",
|
|
||||||
r"rf'{\t3}'",
|
self.assertEqual("\N{LEFT CURLY BRACKET}", '{')
|
||||||
r"rf'{\}'",
|
self.assertEqual(f'{"\N{LEFT CURLY BRACKET}"}', '{')
|
||||||
r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
|
self.assertEqual(rf'{"\N{LEFT CURLY BRACKET}"}', '{')
|
||||||
r"f'{\n}'",
|
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: valid expression required before '}'",
|
||||||
|
["f'{\n}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def test_invalid_backslashes_inside_fstring_context(self):
|
||||||
|
# All of these variations are invalid python syntax,
|
||||||
|
# so they are also invalid in f-strings as well.
|
||||||
|
cases = [
|
||||||
|
formatting.format(expr=expr)
|
||||||
|
for formatting in [
|
||||||
|
"{expr}",
|
||||||
|
"f'{{{expr}}}'",
|
||||||
|
"rf'{{{expr}}}'",
|
||||||
|
]
|
||||||
|
for expr in [
|
||||||
|
r"\'a\'",
|
||||||
|
r"\t3",
|
||||||
|
r"\\"[0],
|
||||||
|
]
|
||||||
|
]
|
||||||
|
self.assertAllRaise(SyntaxError, 'unexpected character after line continuation',
|
||||||
|
cases)
|
||||||
|
|
||||||
def test_no_escapes_for_braces(self):
|
def test_no_escapes_for_braces(self):
|
||||||
"""
|
"""
|
||||||
Only literal curly braces begin an expression.
|
Only literal curly braces begin an expression.
|
||||||
|
@ -843,11 +929,69 @@ x = (
|
||||||
self.assertEqual(f'{(lambda y:x*y)("8"):10}', "88888 ")
|
self.assertEqual(f'{(lambda y:x*y)("8"):10}', "88888 ")
|
||||||
|
|
||||||
# lambda doesn't work without parens, because the colon
|
# lambda doesn't work without parens, because the colon
|
||||||
# makes the parser think it's a format_spec
|
# makes the parser think it's a format_spec
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: invalid syntax',
|
# emit warning if we can match a format_spec
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: lambda expressions are not allowed "
|
||||||
|
"without parentheses",
|
||||||
["f'{lambda x:x}'",
|
["f'{lambda x:x}'",
|
||||||
|
"f'{lambda :x}'",
|
||||||
|
"f'{lambda *arg, :x}'",
|
||||||
|
"f'{1, lambda:x}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# but don't emit the paren warning in general cases
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: expecting a valid expression after '{'",
|
||||||
|
["f'{lambda x:}'",
|
||||||
|
"f'{lambda :}'",
|
||||||
|
"f'{+ lambda:None}'",
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_valid_prefixes(self):
|
||||||
|
self.assertEqual(F'{1}', "1")
|
||||||
|
self.assertEqual(FR'{2}', "2")
|
||||||
|
self.assertEqual(fR'{3}', "3")
|
||||||
|
|
||||||
|
def test_roundtrip_raw_quotes(self):
|
||||||
|
self.assertEqual(fr"\'", "\\'")
|
||||||
|
self.assertEqual(fr'\"', '\\"')
|
||||||
|
self.assertEqual(fr'\"\'', '\\"\\\'')
|
||||||
|
self.assertEqual(fr'\'\"', '\\\'\\"')
|
||||||
|
self.assertEqual(fr'\"\'\"', '\\"\\\'\\"')
|
||||||
|
self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'')
|
||||||
|
self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'')
|
||||||
|
|
||||||
|
def test_fstring_backslash_before_double_bracket(self):
|
||||||
|
self.assertEqual(f'\{{\}}', '\\{\\}')
|
||||||
|
self.assertEqual(f'\{{', '\\{')
|
||||||
|
self.assertEqual(f'\{{{1+1}', '\\{2')
|
||||||
|
self.assertEqual(f'\}}{1+1}', '\\}2')
|
||||||
|
self.assertEqual(f'{1+1}\}}', '2\\}')
|
||||||
|
self.assertEqual(fr'\{{\}}', '\\{\\}')
|
||||||
|
self.assertEqual(fr'\{{', '\\{')
|
||||||
|
self.assertEqual(fr'\{{{1+1}', '\\{2')
|
||||||
|
self.assertEqual(fr'\}}{1+1}', '\\}2')
|
||||||
|
self.assertEqual(fr'{1+1}\}}', '2\\}')
|
||||||
|
|
||||||
|
def test_fstring_backslash_prefix_raw(self):
|
||||||
|
self.assertEqual(f'\\', '\\')
|
||||||
|
self.assertEqual(f'\\\\', '\\\\')
|
||||||
|
self.assertEqual(fr'\\', r'\\')
|
||||||
|
self.assertEqual(fr'\\\\', r'\\\\')
|
||||||
|
self.assertEqual(rf'\\', r'\\')
|
||||||
|
self.assertEqual(rf'\\\\', r'\\\\')
|
||||||
|
self.assertEqual(Rf'\\', R'\\')
|
||||||
|
self.assertEqual(Rf'\\\\', R'\\\\')
|
||||||
|
self.assertEqual(fR'\\', R'\\')
|
||||||
|
self.assertEqual(fR'\\\\', R'\\\\')
|
||||||
|
self.assertEqual(FR'\\', R'\\')
|
||||||
|
self.assertEqual(FR'\\\\', R'\\\\')
|
||||||
|
|
||||||
|
def test_fstring_format_spec_greedy_matching(self):
|
||||||
|
self.assertEqual(f"{1:}}}", "1}")
|
||||||
|
self.assertEqual(f"{1:>3{5}}}}", " 1}")
|
||||||
|
|
||||||
def test_yield(self):
|
def test_yield(self):
|
||||||
# Not terribly useful, but make sure the yield turns
|
# Not terribly useful, but make sure the yield turns
|
||||||
# a function into a generator
|
# a function into a generator
|
||||||
|
@ -1037,6 +1181,11 @@ x = (
|
||||||
self.assertEqual(f'{"a"!r}', "'a'")
|
self.assertEqual(f'{"a"!r}', "'a'")
|
||||||
self.assertEqual(f'{"a"!a}', "'a'")
|
self.assertEqual(f'{"a"!a}', "'a'")
|
||||||
|
|
||||||
|
# Conversions can have trailing whitespace after them since it
|
||||||
|
# does not provide any significance
|
||||||
|
self.assertEqual(f"{3!s }", "3")
|
||||||
|
self.assertEqual(f'{3.14!s :10.10}', '3.14 ')
|
||||||
|
|
||||||
# Not a conversion.
|
# Not a conversion.
|
||||||
self.assertEqual(f'{"a!r"}', "a!r")
|
self.assertEqual(f'{"a!r"}', "a!r")
|
||||||
|
|
||||||
|
@ -1049,16 +1198,27 @@ x = (
|
||||||
"f'{3!g'",
|
"f'{3!g'",
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: missed conversion character',
|
self.assertAllRaise(SyntaxError, 'f-string: missing conversion character',
|
||||||
["f'{3!}'",
|
["f'{3!}'",
|
||||||
"f'{3!:'",
|
"f'{3!:'",
|
||||||
"f'{3!:}'",
|
"f'{3!:}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
for conv in 'g', 'A', '3', 'G', '!', ' s', 's ', ' s ', 'ä', 'ɐ', 'ª':
|
for conv_identifier in 'g', 'A', 'G', 'ä', 'ɐ':
|
||||||
self.assertAllRaise(SyntaxError,
|
self.assertAllRaise(SyntaxError,
|
||||||
"f-string: invalid conversion character %r: "
|
"f-string: invalid conversion character %r: "
|
||||||
"expected 's', 'r', or 'a'" % conv,
|
"expected 's', 'r', or 'a'" % conv_identifier,
|
||||||
|
["f'{3!" + conv_identifier + "}'"])
|
||||||
|
|
||||||
|
for conv_non_identifier in '3', '!':
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: invalid conversion character",
|
||||||
|
["f'{3!" + conv_non_identifier + "}'"])
|
||||||
|
|
||||||
|
for conv in ' s', ' s ':
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: conversion type must come right after the"
|
||||||
|
" exclamanation mark",
|
||||||
["f'{3!" + conv + "}'"])
|
["f'{3!" + conv + "}'"])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError,
|
self.assertAllRaise(SyntaxError,
|
||||||
|
@ -1097,8 +1257,7 @@ x = (
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
|
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
|
||||||
["f'{3:{{>10}'",
|
["f'{3'",
|
||||||
"f'{3'",
|
|
||||||
"f'{3!'",
|
"f'{3!'",
|
||||||
"f'{3:'",
|
"f'{3:'",
|
||||||
"f'{3!s'",
|
"f'{3!s'",
|
||||||
|
@ -1111,11 +1270,14 @@ x = (
|
||||||
"f'{{{'",
|
"f'{{{'",
|
||||||
"f'{{}}{'",
|
"f'{{}}{'",
|
||||||
"f'{'",
|
"f'{'",
|
||||||
"f'x{<'", # See bpo-46762.
|
|
||||||
"f'x{>'",
|
|
||||||
"f'{i='", # See gh-93418.
|
"f'{i='", # See gh-93418.
|
||||||
])
|
])
|
||||||
|
|
||||||
|
self.assertAllRaise(SyntaxError,
|
||||||
|
"f-string: expecting a valid expression after '{'",
|
||||||
|
["f'{3:{{>10}'",
|
||||||
|
])
|
||||||
|
|
||||||
# But these are just normal strings.
|
# But these are just normal strings.
|
||||||
self.assertEqual(f'{"{"}', '{')
|
self.assertEqual(f'{"{"}', '{')
|
||||||
self.assertEqual(f'{"}"}', '}')
|
self.assertEqual(f'{"}"}', '}')
|
||||||
|
@ -1314,6 +1476,7 @@ x = (
|
||||||
self.assertEqual(f'X{x =}Y', 'Xx ='+repr(x)+'Y')
|
self.assertEqual(f'X{x =}Y', 'Xx ='+repr(x)+'Y')
|
||||||
self.assertEqual(f'X{x= }Y', 'Xx= '+repr(x)+'Y')
|
self.assertEqual(f'X{x= }Y', 'Xx= '+repr(x)+'Y')
|
||||||
self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y')
|
self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y')
|
||||||
|
self.assertEqual(f"sadsd {1 + 1 = :{1 + 1:1d}f}", "sadsd 1 + 1 = 2.000000")
|
||||||
|
|
||||||
# These next lines contains tabs. Backslash escapes don't
|
# These next lines contains tabs. Backslash escapes don't
|
||||||
# work in f-strings.
|
# work in f-strings.
|
||||||
|
@ -1335,7 +1498,8 @@ x = (
|
||||||
self.assertEqual(x, 10)
|
self.assertEqual(x, 10)
|
||||||
|
|
||||||
def test_invalid_syntax_error_message(self):
|
def test_invalid_syntax_error_message(self):
|
||||||
with self.assertRaisesRegex(SyntaxError, "f-string: invalid syntax"):
|
with self.assertRaisesRegex(SyntaxError,
|
||||||
|
"f-string: expecting '=', or '!', or ':', or '}'"):
|
||||||
compile("f'{a $ b}'", "?", "exec")
|
compile("f'{a $ b}'", "?", "exec")
|
||||||
|
|
||||||
def test_with_two_commas_in_format_specifier(self):
|
def test_with_two_commas_in_format_specifier(self):
|
||||||
|
@ -1359,12 +1523,11 @@ x = (
|
||||||
f'{1:_,}'
|
f'{1:_,}'
|
||||||
|
|
||||||
def test_syntax_error_for_starred_expressions(self):
|
def test_syntax_error_for_starred_expressions(self):
|
||||||
error_msg = re.escape("cannot use starred expression here")
|
with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"):
|
||||||
with self.assertRaisesRegex(SyntaxError, error_msg):
|
|
||||||
compile("f'{*a}'", "?", "exec")
|
compile("f'{*a}'", "?", "exec")
|
||||||
|
|
||||||
error_msg = re.escape("cannot use double starred expression here")
|
with self.assertRaisesRegex(SyntaxError,
|
||||||
with self.assertRaisesRegex(SyntaxError, error_msg):
|
"f-string: expecting a valid expression after '{'"):
|
||||||
compile("f'{**a}'", "?", "exec")
|
compile("f'{**a}'", "?", "exec")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1625,6 +1625,10 @@ class TestRoundtrip(TestCase):
|
||||||
# 7 more testfiles fail. Remove them also until the failure is diagnosed.
|
# 7 more testfiles fail. Remove them also until the failure is diagnosed.
|
||||||
|
|
||||||
testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
|
testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
|
||||||
|
|
||||||
|
# TODO: Remove this once we can unparse PEP 701 syntax
|
||||||
|
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
|
||||||
|
|
||||||
for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
|
for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
|
||||||
testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
|
testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
|
||||||
|
|
||||||
|
@ -1937,25 +1941,39 @@ c"""', """\
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize('f"abc"', """\
|
self.check_tokenize('f"abc"', """\
|
||||||
STRING 'f"abc"' (1, 0) (1, 6)
|
FSTRING_START 'f"' (1, 0) (1, 2)
|
||||||
|
FSTRING_MIDDLE 'abc' (1, 2) (1, 5)
|
||||||
|
FSTRING_END '"' (1, 5) (1, 6)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize('fR"a{b}c"', """\
|
self.check_tokenize('fR"a{b}c"', """\
|
||||||
STRING 'fR"a{b}c"' (1, 0) (1, 9)
|
FSTRING_START 'fR"' (1, 0) (1, 3)
|
||||||
|
FSTRING_MIDDLE 'a' (1, 3) (1, 4)
|
||||||
|
LBRACE '{' (1, 4) (1, 5)
|
||||||
|
NAME 'b' (1, 5) (1, 6)
|
||||||
|
RBRACE '}' (1, 6) (1, 7)
|
||||||
|
FSTRING_MIDDLE 'c' (1, 7) (1, 8)
|
||||||
|
FSTRING_END '"' (1, 8) (1, 9)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize('f"""abc"""', """\
|
self.check_tokenize('f"""abc"""', """\
|
||||||
STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
|
FSTRING_START 'f\"""' (1, 0) (1, 4)
|
||||||
|
FSTRING_MIDDLE 'abc' (1, 4) (1, 7)
|
||||||
|
FSTRING_END '\"""' (1, 7) (1, 10)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize(r'f"abc\
|
self.check_tokenize(r'f"abc\
|
||||||
def"', """\
|
def"', """\
|
||||||
STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
|
FSTRING_START \'f"\' (1, 0) (1, 2)
|
||||||
|
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3)
|
||||||
|
FSTRING_END '"' (2, 3) (2, 4)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize(r'Rf"abc\
|
self.check_tokenize(r'Rf"abc\
|
||||||
def"', """\
|
def"', """\
|
||||||
STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
|
FSTRING_START 'Rf"' (1, 0) (1, 3)
|
||||||
|
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3)
|
||||||
|
FSTRING_END '"' (2, 3) (2, 4)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_function(self):
|
def test_function(self):
|
||||||
|
|
|
@ -272,7 +272,7 @@ class TypeCommentTests(unittest.TestCase):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_fstring(self):
|
def test_fstring(self):
|
||||||
for tree in self.parse_all(fstring, minver=6):
|
for tree in self.parse_all(fstring):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_underscorednumber(self):
|
def test_underscorednumber(self):
|
||||||
|
|
27
Lib/token.py
generated
27
Lib/token.py
generated
|
@ -57,18 +57,22 @@ ATEQUAL = 50
|
||||||
RARROW = 51
|
RARROW = 51
|
||||||
ELLIPSIS = 52
|
ELLIPSIS = 52
|
||||||
COLONEQUAL = 53
|
COLONEQUAL = 53
|
||||||
OP = 54
|
EXCLAMATION = 54
|
||||||
AWAIT = 55
|
OP = 55
|
||||||
ASYNC = 56
|
AWAIT = 56
|
||||||
TYPE_IGNORE = 57
|
ASYNC = 57
|
||||||
TYPE_COMMENT = 58
|
TYPE_IGNORE = 58
|
||||||
SOFT_KEYWORD = 59
|
TYPE_COMMENT = 59
|
||||||
|
SOFT_KEYWORD = 60
|
||||||
|
FSTRING_START = 61
|
||||||
|
FSTRING_MIDDLE = 62
|
||||||
|
FSTRING_END = 63
|
||||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||||
ERRORTOKEN = 60
|
ERRORTOKEN = 64
|
||||||
COMMENT = 61
|
COMMENT = 65
|
||||||
NL = 62
|
NL = 66
|
||||||
ENCODING = 63
|
ENCODING = 67
|
||||||
N_TOKENS = 64
|
N_TOKENS = 68
|
||||||
# Special definitions for cooperation with parser
|
# Special definitions for cooperation with parser
|
||||||
NT_OFFSET = 256
|
NT_OFFSET = 256
|
||||||
|
|
||||||
|
@ -78,6 +82,7 @@ tok_name = {value: name
|
||||||
__all__.extend(tok_name.values())
|
__all__.extend(tok_name.values())
|
||||||
|
|
||||||
EXACT_TOKEN_TYPES = {
|
EXACT_TOKEN_TYPES = {
|
||||||
|
'!': EXCLAMATION,
|
||||||
'!=': NOTEQUAL,
|
'!=': NOTEQUAL,
|
||||||
'%': PERCENT,
|
'%': PERCENT,
|
||||||
'%=': PERCENTEQUAL,
|
'%=': PERCENTEQUAL,
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Implement the required C tokenizer changes for PEP 701. Patch by Pablo Galindo Salgado, Lysandros Nikolaou, Batuhan Taskaya, Marta Gómez Macías and sunmy2019.
|
|
@ -1,6 +1,7 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
|
||||||
#include "pegen.h"
|
#include "pegen.h"
|
||||||
|
#include "tokenizer.h"
|
||||||
#include "string_parser.h"
|
#include "string_parser.h"
|
||||||
#include "pycore_runtime.h" // _PyRuntime
|
#include "pycore_runtime.h" // _PyRuntime
|
||||||
|
|
||||||
|
@ -853,96 +854,6 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
|
||||||
return new_seq;
|
return new_seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty
|
|
||||||
_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
|
|
||||||
{
|
|
||||||
Py_ssize_t len = asdl_seq_LEN(strings);
|
|
||||||
assert(len > 0);
|
|
||||||
|
|
||||||
Token *first = asdl_seq_GET_UNTYPED(strings, 0);
|
|
||||||
Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
|
|
||||||
|
|
||||||
int bytesmode = 0;
|
|
||||||
PyObject *bytes_str = NULL;
|
|
||||||
|
|
||||||
FstringParser state;
|
|
||||||
_PyPegen_FstringParser_Init(&state);
|
|
||||||
|
|
||||||
for (Py_ssize_t i = 0; i < len; i++) {
|
|
||||||
Token *t = asdl_seq_GET_UNTYPED(strings, i);
|
|
||||||
|
|
||||||
int this_bytesmode;
|
|
||||||
int this_rawmode;
|
|
||||||
PyObject *s;
|
|
||||||
const char *fstr;
|
|
||||||
Py_ssize_t fstrlen = -1;
|
|
||||||
|
|
||||||
if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check that we are not mixing bytes with unicode. */
|
|
||||||
if (i != 0 && bytesmode != this_bytesmode) {
|
|
||||||
RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
|
|
||||||
Py_XDECREF(s);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
bytesmode = this_bytesmode;
|
|
||||||
|
|
||||||
if (fstr != NULL) {
|
|
||||||
assert(s == NULL && !bytesmode);
|
|
||||||
|
|
||||||
int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
|
|
||||||
this_rawmode, 0, first, t, last);
|
|
||||||
if (result < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* String or byte string. */
|
|
||||||
assert(s != NULL && fstr == NULL);
|
|
||||||
assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
|
|
||||||
|
|
||||||
if (bytesmode) {
|
|
||||||
if (i == 0) {
|
|
||||||
bytes_str = s;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
PyBytes_ConcatAndDel(&bytes_str, s);
|
|
||||||
if (!bytes_str) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* This is a regular string. Concatenate it. */
|
|
||||||
if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bytesmode) {
|
|
||||||
if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
return _PyAST_Constant(bytes_str, NULL, first->lineno,
|
|
||||||
first->col_offset, last->end_lineno,
|
|
||||||
last->end_col_offset, p->arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
return _PyPegen_FstringParser_Finish(p, &state, first, last);
|
|
||||||
|
|
||||||
error:
|
|
||||||
Py_XDECREF(bytes_str);
|
|
||||||
_PyPegen_FstringParser_Dealloc(&state);
|
|
||||||
if (PyErr_Occurred()) {
|
|
||||||
_Pypegen_raise_decode_error(p);
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
expr_ty
|
expr_ty
|
||||||
_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
|
_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
|
||||||
{
|
{
|
||||||
|
@ -1054,6 +965,18 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
expr_ty
|
||||||
|
_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
|
||||||
|
if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
|
||||||
|
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
||||||
|
symbol, conv,
|
||||||
|
"f-string: conversion type must come right after the exclamanation mark"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return conv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const char *
|
const char *
|
||||||
_PyPegen_get_expr_name(expr_ty e)
|
_PyPegen_get_expr_name(expr_ty e)
|
||||||
{
|
{
|
||||||
|
@ -1271,3 +1194,439 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
|
||||||
"Generator expression must be parenthesized"
|
"Generator expression must be parenthesized"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fstring stuff
|
||||||
|
|
||||||
|
static expr_ty
|
||||||
|
decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
|
||||||
|
int end_col_offset)
|
||||||
|
{
|
||||||
|
tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
|
||||||
|
assert(tok_mode->last_expr_buffer != NULL);
|
||||||
|
assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);
|
||||||
|
|
||||||
|
PyObject *res = PyUnicode_DecodeUTF8(
|
||||||
|
tok_mode->last_expr_buffer,
|
||||||
|
tok_mode->last_expr_size - tok_mode->last_expr_end,
|
||||||
|
NULL
|
||||||
|
);
|
||||||
|
if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
|
||||||
|
Py_XDECREF(res);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
static expr_ty
|
||||||
|
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
|
||||||
|
assert(PyUnicode_CheckExact(constant->v.Constant.value));
|
||||||
|
|
||||||
|
const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
|
||||||
|
if (bstr == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t len;
|
||||||
|
if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
|
||||||
|
len = 1;
|
||||||
|
} else {
|
||||||
|
len = strlen(bstr);
|
||||||
|
}
|
||||||
|
|
||||||
|
is_raw = is_raw || strchr(bstr, '\\') == NULL;
|
||||||
|
PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, NULL);
|
||||||
|
if (str == NULL) {
|
||||||
|
_Pypegen_raise_decode_error(p);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (_PyArena_AddPyObject(p->arena, str) < 0) {
|
||||||
|
Py_DECREF(str);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
|
||||||
|
constant->end_lineno, constant->end_col_offset,
|
||||||
|
p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
static asdl_expr_seq *
|
||||||
|
unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
|
||||||
|
{
|
||||||
|
/* The parser might put multiple f-string values into an individual
|
||||||
|
* JoinedStr node at the top level due to stuff like f-string debugging
|
||||||
|
* expressions. This function flattens those and promotes them to the
|
||||||
|
* upper level. Only simplifies AST, but the compiler already takes care
|
||||||
|
* of the regular output, so this is not necessary if you are not going
|
||||||
|
* to expose the output AST to Python level. */
|
||||||
|
|
||||||
|
Py_ssize_t i, req_size, raw_size;
|
||||||
|
|
||||||
|
req_size = raw_size = asdl_seq_LEN(raw_expressions);
|
||||||
|
expr_ty expr;
|
||||||
|
for (i = 0; i < raw_size; i++) {
|
||||||
|
expr = asdl_seq_GET(raw_expressions, i);
|
||||||
|
if (expr->kind == JoinedStr_kind) {
|
||||||
|
req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena);
|
||||||
|
|
||||||
|
Py_ssize_t raw_index, req_index = 0;
|
||||||
|
for (raw_index = 0; raw_index < raw_size; raw_index++) {
|
||||||
|
expr = asdl_seq_GET(raw_expressions, raw_index);
|
||||||
|
if (expr->kind == JoinedStr_kind) {
|
||||||
|
asdl_expr_seq *values = expr->v.JoinedStr.values;
|
||||||
|
for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) {
|
||||||
|
asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n));
|
||||||
|
req_index++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
asdl_seq_SET(expressions, req_index, expr);
|
||||||
|
req_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return expressions;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty
|
||||||
|
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
|
||||||
|
asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
|
||||||
|
Py_ssize_t n_items = asdl_seq_LEN(expr);
|
||||||
|
|
||||||
|
const char* quote_str = PyBytes_AsString(a->bytes);
|
||||||
|
if (quote_str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
int is_raw = strpbrk(quote_str, "rR") != NULL;
|
||||||
|
|
||||||
|
asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
|
||||||
|
if (seq == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t index = 0;
|
||||||
|
for (Py_ssize_t i = 0; i < n_items; i++) {
|
||||||
|
expr_ty item = asdl_seq_GET(expr, i);
|
||||||
|
if (item->kind == Constant_kind) {
|
||||||
|
item = _PyPegen_decode_fstring_part(p, is_raw, item);
|
||||||
|
if (item == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tokenizer emits string parts even when the underlying string
|
||||||
|
might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
|
||||||
|
so we need to check for them and simplify it here. */
|
||||||
|
if (PyUnicode_CheckExact(item->v.Constant.value)
|
||||||
|
&& PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
asdl_seq_SET(seq, index++, item);
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_expr_seq *resized_exprs;
|
||||||
|
if (index != n_items) {
|
||||||
|
resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
|
||||||
|
if (resized_exprs == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
for (Py_ssize_t i = 0; i < index; i++) {
|
||||||
|
asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
resized_exprs = seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
|
||||||
|
b->end_lineno, b->end_col_offset,
|
||||||
|
p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
|
||||||
|
char* bstr = PyBytes_AsString(tok->bytes);
|
||||||
|
if (bstr == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyObject* str = PyUnicode_FromString(bstr);
|
||||||
|
if (str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (_PyArena_AddPyObject(p->arena, str) < 0) {
|
||||||
|
Py_DECREF(str);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
|
||||||
|
tok->end_lineno, tok->end_col_offset,
|
||||||
|
p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
|
||||||
|
char* the_str = PyBytes_AsString(tok->bytes);
|
||||||
|
if (the_str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyObject *s = _PyPegen_parse_string(p, tok);
|
||||||
|
if (s == NULL) {
|
||||||
|
_Pypegen_raise_decode_error(p);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (_PyArena_AddPyObject(p->arena, s) < 0) {
|
||||||
|
Py_DECREF(s);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyObject *kind = NULL;
|
||||||
|
if (the_str && the_str[0] == 'u') {
|
||||||
|
kind = _PyPegen_new_identifier(p, "u");
|
||||||
|
if (kind == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
|
||||||
|
expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
|
||||||
|
PyArena *arena) {
|
||||||
|
int conversion_val = -1;
|
||||||
|
if (conversion != NULL) {
|
||||||
|
assert(conversion->kind == Name_kind);
|
||||||
|
Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
|
||||||
|
|
||||||
|
if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
|
||||||
|
!(first == 's' || first == 'r' || first == 'a')) {
|
||||||
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
|
||||||
|
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
|
||||||
|
conversion->v.Name.id);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
conversion_val = Py_SAFE_DOWNCAST(first, Py_UCS4, int);
|
||||||
|
}
|
||||||
|
else if (debug && !format) {
|
||||||
|
/* If no conversion is specified, use !r for debug expressions */
|
||||||
|
conversion_val = (int)'r';
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty formatted_value = _PyAST_FormattedValue(
|
||||||
|
expression, conversion_val, format,
|
||||||
|
lineno, col_offset, end_lineno,
|
||||||
|
end_col_offset, arena
|
||||||
|
);
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
/* Find the non whitespace token after the "=" */
|
||||||
|
int debug_end_line, debug_end_offset;
|
||||||
|
|
||||||
|
if (conversion) {
|
||||||
|
debug_end_line = conversion->lineno;
|
||||||
|
debug_end_offset = conversion->col_offset;
|
||||||
|
}
|
||||||
|
else if (format) {
|
||||||
|
debug_end_line = format->lineno;
|
||||||
|
debug_end_offset = format->col_offset + 1; // HACK: ??
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
debug_end_line = end_lineno;
|
||||||
|
debug_end_offset = end_col_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
|
||||||
|
debug_end_line, debug_end_offset - 1);
|
||||||
|
if (!debug_text) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
|
||||||
|
asdl_seq_SET(values, 0, debug_text);
|
||||||
|
asdl_seq_SET(values, 1, formatted_value);
|
||||||
|
return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return formatted_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ty
|
||||||
|
_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
||||||
|
int lineno, int col_offset, int end_lineno,
|
||||||
|
int end_col_offset, PyArena *arena)
|
||||||
|
{
|
||||||
|
Py_ssize_t len = asdl_seq_LEN(strings);
|
||||||
|
assert(len > 0);
|
||||||
|
|
||||||
|
int f_string_found = 0;
|
||||||
|
int unicode_string_found = 0;
|
||||||
|
int bytes_found = 0;
|
||||||
|
|
||||||
|
Py_ssize_t i = 0;
|
||||||
|
Py_ssize_t n_flattened_elements = 0;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
expr_ty elem = asdl_seq_GET(strings, i);
|
||||||
|
if (elem->kind == Constant_kind) {
|
||||||
|
if (PyBytes_CheckExact(elem->v.Constant.value)) {
|
||||||
|
bytes_found = 1;
|
||||||
|
} else {
|
||||||
|
unicode_string_found = 1;
|
||||||
|
}
|
||||||
|
n_flattened_elements++;
|
||||||
|
} else {
|
||||||
|
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
|
||||||
|
f_string_found = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((unicode_string_found || f_string_found) && bytes_found) {
|
||||||
|
RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytes_found) {
|
||||||
|
PyObject* res = PyBytes_FromString("");
|
||||||
|
|
||||||
|
/* Bytes literals never get a kind, but just for consistency
|
||||||
|
since they are represented as Constant nodes, we'll mirror
|
||||||
|
the same behavior as unicode strings for determining the
|
||||||
|
kind. */
|
||||||
|
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
expr_ty elem = asdl_seq_GET(strings, i);
|
||||||
|
PyBytes_Concat(&res, elem->v.Constant.value);
|
||||||
|
}
|
||||||
|
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
|
||||||
|
Py_XDECREF(res);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!f_string_found && len == 1) {
|
||||||
|
return asdl_seq_GET(strings, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
|
||||||
|
if (flattened == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* build flattened list */
|
||||||
|
Py_ssize_t current_pos = 0;
|
||||||
|
Py_ssize_t j = 0;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
expr_ty elem = asdl_seq_GET(strings, i);
|
||||||
|
if (elem->kind == Constant_kind) {
|
||||||
|
asdl_seq_SET(flattened, current_pos++, elem);
|
||||||
|
} else {
|
||||||
|
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
|
||||||
|
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
|
||||||
|
if (subvalue == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
asdl_seq_SET(flattened, current_pos++, subvalue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate folded element count */
|
||||||
|
Py_ssize_t n_elements = 0;
|
||||||
|
int prev_is_constant = 0;
|
||||||
|
for (i = 0; i < n_flattened_elements; i++) {
|
||||||
|
expr_ty elem = asdl_seq_GET(flattened, i);
|
||||||
|
|
||||||
|
/* The concatenation of a FormattedValue and an empty Contant should
|
||||||
|
lead to the FormattedValue itself. Thus, we will not take any empty
|
||||||
|
constants into account, just as in `_PyPegen_joined_str` */
|
||||||
|
if (f_string_found && elem->kind == Constant_kind &&
|
||||||
|
PyUnicode_CheckExact(elem->v.Constant.value) &&
|
||||||
|
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!prev_is_constant || elem->kind != Constant_kind) {
|
||||||
|
n_elements++;
|
||||||
|
}
|
||||||
|
prev_is_constant = elem->kind == Constant_kind;
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
|
||||||
|
if (values == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* build folded list */
|
||||||
|
_PyUnicodeWriter writer;
|
||||||
|
current_pos = 0;
|
||||||
|
for (i = 0; i < n_flattened_elements; i++) {
|
||||||
|
expr_ty elem = asdl_seq_GET(flattened, i);
|
||||||
|
|
||||||
|
/* if the current elem and the following are constants,
|
||||||
|
fold them and all consequent constants */
|
||||||
|
if (elem->kind == Constant_kind) {
|
||||||
|
if (i + 1 < n_flattened_elements &&
|
||||||
|
asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
|
||||||
|
expr_ty first_elem = elem;
|
||||||
|
|
||||||
|
/* When a string is getting concatenated, the kind of the string
|
||||||
|
is determined by the first string in the concatenation
|
||||||
|
sequence.
|
||||||
|
|
||||||
|
u"abc" "def" -> u"abcdef"
|
||||||
|
"abc" u"abc" -> "abcabc" */
|
||||||
|
PyObject *kind = elem->v.Constant.kind;
|
||||||
|
|
||||||
|
_PyUnicodeWriter_Init(&writer);
|
||||||
|
expr_ty last_elem = elem;
|
||||||
|
for (j = i; j < n_flattened_elements; j++) {
|
||||||
|
expr_ty current_elem = asdl_seq_GET(flattened, j);
|
||||||
|
if (current_elem->kind == Constant_kind) {
|
||||||
|
if (_PyUnicodeWriter_WriteStr(
|
||||||
|
&writer, current_elem->v.Constant.value)) {
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
last_elem = current_elem;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = j - 1;
|
||||||
|
|
||||||
|
PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
|
||||||
|
if (concat_str == NULL) {
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
|
||||||
|
Py_DECREF(concat_str);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
|
||||||
|
first_elem->col_offset,
|
||||||
|
last_elem->end_lineno,
|
||||||
|
last_elem->end_col_offset, p->arena);
|
||||||
|
if (elem == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Drop all empty contanst strings */
|
||||||
|
if (f_string_found &&
|
||||||
|
PyUnicode_CheckExact(elem->v.Constant.value) &&
|
||||||
|
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
asdl_seq_SET(values, current_pos++, elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!f_string_found) {
|
||||||
|
assert(n_elements == 1);
|
||||||
|
expr_ty elem = asdl_seq_GET(values, 0);
|
||||||
|
assert(elem->kind == Constant_kind);
|
||||||
|
return elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(current_pos == n_elements);
|
||||||
|
return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||||
|
}
|
||||||
|
|
12714
Parser/parser.c
generated
12714
Parser/parser.c
generated
File diff suppressed because it is too large
Load diff
|
@ -359,7 +359,7 @@ _PyPegen_expect_token(Parser *p, int type)
|
||||||
}
|
}
|
||||||
Token *t = p->tokens[p->mark];
|
Token *t = p->tokens[p->mark];
|
||||||
if (t->type != type) {
|
if (t->type != type) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
p->mark += 1;
|
p->mark += 1;
|
||||||
return t;
|
return t;
|
||||||
|
|
|
@ -138,6 +138,7 @@ void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expecte
|
||||||
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
||||||
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||||
expr_ty _PyPegen_soft_keyword_token(Parser *p);
|
expr_ty _PyPegen_soft_keyword_token(Parser *p);
|
||||||
|
expr_ty _PyPegen_fstring_middle_token(Parser* p);
|
||||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||||
int _PyPegen_fill_token(Parser *p);
|
int _PyPegen_fill_token(Parser *p);
|
||||||
expr_ty _PyPegen_name_token(Parser *p);
|
expr_ty _PyPegen_name_token(Parser *p);
|
||||||
|
@ -155,7 +156,7 @@ typedef enum {
|
||||||
int _Pypegen_raise_decode_error(Parser *p);
|
int _Pypegen_raise_decode_error(Parser *p);
|
||||||
void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
|
void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
|
||||||
int _Pypegen_tokenizer_error(Parser *p);
|
int _Pypegen_tokenizer_error(Parser *p);
|
||||||
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
|
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
|
||||||
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||||
Py_ssize_t lineno, Py_ssize_t col_offset,
|
Py_ssize_t lineno, Py_ssize_t col_offset,
|
||||||
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
|
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
|
||||||
|
@ -175,8 +176,9 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
|
||||||
va_end(va);
|
va_end(va);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
|
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
|
||||||
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
|
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 0, msg, ##__VA_ARGS__)
|
||||||
|
#define RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
|
||||||
#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
|
#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
|
||||||
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
|
||||||
#define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
|
#define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
|
||||||
|
@ -308,6 +310,7 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
|
||||||
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
|
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
|
||||||
asdl_arg_seq *, asdl_seq *, StarEtc *);
|
asdl_arg_seq *, asdl_seq *, StarEtc *);
|
||||||
arguments_ty _PyPegen_empty_arguments(Parser *);
|
arguments_ty _PyPegen_empty_arguments(Parser *);
|
||||||
|
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
|
||||||
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
||||||
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
||||||
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
||||||
|
@ -317,12 +320,16 @@ asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
|
||||||
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
|
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
|
||||||
int lineno, int col_offset, int end_lineno,
|
int lineno, int col_offset, int end_lineno,
|
||||||
int end_col_offset, PyArena *arena);
|
int end_col_offset, PyArena *arena);
|
||||||
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
|
expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok);
|
||||||
|
expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok);
|
||||||
|
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *);
|
||||||
|
expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int);
|
||||||
expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
|
expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
|
||||||
expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
|
expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
|
||||||
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
||||||
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
|
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
|
||||||
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
|
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
|
||||||
|
expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
|
||||||
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
|
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
|
||||||
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
||||||
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
|
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
|
||||||
|
@ -338,6 +345,9 @@ void *_PyPegen_run_parser(Parser *);
|
||||||
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
|
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
|
||||||
asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
|
asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
|
||||||
|
|
||||||
|
// TODO: move to the correct place in this file
|
||||||
|
expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b);
|
||||||
|
|
||||||
// Generated function in parse.c - function definition in python.gram
|
// Generated function in parse.c - function definition in python.gram
|
||||||
void *_PyPegen_parse(Parser *);
|
void *_PyPegen_parse(Parser *);
|
||||||
|
|
||||||
|
|
|
@ -192,7 +192,10 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
|
||||||
|
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
if (PyErr_Occurred()) {
|
// If we're in an f-string, we want the syntax error in the expression part
|
||||||
|
// to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
|
||||||
|
// do not swallow it.
|
||||||
|
if (PyErr_Occurred() && p->tok->tok_mode_stack_index <= 0) {
|
||||||
Py_XDECREF(value);
|
Py_XDECREF(value);
|
||||||
Py_XDECREF(type);
|
Py_XDECREF(type);
|
||||||
Py_XDECREF(traceback);
|
Py_XDECREF(traceback);
|
||||||
|
@ -205,7 +208,7 @@ exit:
|
||||||
// PARSER ERRORS
|
// PARSER ERRORS
|
||||||
|
|
||||||
void *
|
void *
|
||||||
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...)
|
||||||
{
|
{
|
||||||
if (p->fill == 0) {
|
if (p->fill == 0) {
|
||||||
va_list va;
|
va_list va;
|
||||||
|
@ -214,8 +217,13 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
||||||
va_end(va);
|
va_end(va);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
if (use_mark && p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
|
||||||
Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
|
p->error_indicator = 1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Token *t = p->known_err_token != NULL
|
||||||
|
? p->known_err_token
|
||||||
|
: p->tokens[use_mark ? p->mark : p->fill - 1];
|
||||||
Py_ssize_t col_offset;
|
Py_ssize_t col_offset;
|
||||||
Py_ssize_t end_col_offset = -1;
|
Py_ssize_t end_col_offset = -1;
|
||||||
if (t->col_offset == -1) {
|
if (t->col_offset == -1) {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,42 +5,7 @@
|
||||||
#include <pycore_ast.h>
|
#include <pycore_ast.h>
|
||||||
#include "pegen.h"
|
#include "pegen.h"
|
||||||
|
|
||||||
#define EXPRLIST_N_CACHED 64
|
PyObject *_PyPegen_parse_string(Parser *, Token *);
|
||||||
|
PyObject *_PyPegen_decode_string(Parser *, int, const char *, size_t, Token *);
|
||||||
typedef struct {
|
|
||||||
/* Incrementally build an array of expr_ty, so be used in an
|
|
||||||
asdl_seq. Cache some small but reasonably sized number of
|
|
||||||
expr_ty's, and then after that start dynamically allocating,
|
|
||||||
doubling the number allocated each time. Note that the f-string
|
|
||||||
f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
|
|
||||||
Constant for the literal 'a'. So you add expr_ty's about twice as
|
|
||||||
fast as you add expressions in an f-string. */
|
|
||||||
|
|
||||||
Py_ssize_t allocated; /* Number we've allocated. */
|
|
||||||
Py_ssize_t size; /* Number we've used. */
|
|
||||||
expr_ty *p; /* Pointer to the memory we're actually
|
|
||||||
using. Will point to 'data' until we
|
|
||||||
start dynamically allocating. */
|
|
||||||
expr_ty data[EXPRLIST_N_CACHED];
|
|
||||||
} ExprList;
|
|
||||||
|
|
||||||
/* The FstringParser is designed to add a mix of strings and
|
|
||||||
f-strings, and concat them together as needed. Ultimately, it
|
|
||||||
generates an expr_ty. */
|
|
||||||
typedef struct {
|
|
||||||
PyObject *last_str;
|
|
||||||
ExprList expr_list;
|
|
||||||
int fmode;
|
|
||||||
} FstringParser;
|
|
||||||
|
|
||||||
void _PyPegen_FstringParser_Init(FstringParser *);
|
|
||||||
int _PyPegen_parsestr(Parser *, int *, int *, PyObject **,
|
|
||||||
const char **, Py_ssize_t *, Token *);
|
|
||||||
int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
|
|
||||||
const char *, int, int, Token *, Token *,
|
|
||||||
Token *);
|
|
||||||
int _PyPegen_FstringParser_ConcatAndDel(FstringParser *, PyObject *);
|
|
||||||
expr_ty _PyPegen_FstringParser_Finish(Parser *, FstringParser *, Token *, Token *);
|
|
||||||
void _PyPegen_FstringParser_Dealloc(FstringParser *);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
5
Parser/token.c
generated
5
Parser/token.c
generated
|
@ -60,12 +60,16 @@ const char * const _PyParser_TokenNames[] = {
|
||||||
"RARROW",
|
"RARROW",
|
||||||
"ELLIPSIS",
|
"ELLIPSIS",
|
||||||
"COLONEQUAL",
|
"COLONEQUAL",
|
||||||
|
"EXCLAMATION",
|
||||||
"OP",
|
"OP",
|
||||||
"AWAIT",
|
"AWAIT",
|
||||||
"ASYNC",
|
"ASYNC",
|
||||||
"TYPE_IGNORE",
|
"TYPE_IGNORE",
|
||||||
"TYPE_COMMENT",
|
"TYPE_COMMENT",
|
||||||
"SOFT_KEYWORD",
|
"SOFT_KEYWORD",
|
||||||
|
"FSTRING_START",
|
||||||
|
"FSTRING_MIDDLE",
|
||||||
|
"FSTRING_END",
|
||||||
"<ERRORTOKEN>",
|
"<ERRORTOKEN>",
|
||||||
"<COMMENT>",
|
"<COMMENT>",
|
||||||
"<NL>",
|
"<NL>",
|
||||||
|
@ -79,6 +83,7 @@ int
|
||||||
_PyToken_OneChar(int c1)
|
_PyToken_OneChar(int c1)
|
||||||
{
|
{
|
||||||
switch (c1) {
|
switch (c1) {
|
||||||
|
case '!': return EXCLAMATION;
|
||||||
case '%': return PERCENT;
|
case '%': return PERCENT;
|
||||||
case '&': return AMPER;
|
case '&': return AMPER;
|
||||||
case '(': return LPAR;
|
case '(': return LPAR;
|
||||||
|
|
|
@ -43,6 +43,28 @@
|
||||||
tok->lineno++; \
|
tok->lineno++; \
|
||||||
tok->col_offset = 0;
|
tok->col_offset = 0;
|
||||||
|
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
|
||||||
|
assert(tok->tok_mode_stack_index >= 0);
|
||||||
|
assert(tok->tok_mode_stack_index < MAXLEVEL);
|
||||||
|
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
|
||||||
|
}
|
||||||
|
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
|
||||||
|
assert(tok->tok_mode_stack_index >= 0);
|
||||||
|
assert(tok->tok_mode_stack_index < MAXLEVEL);
|
||||||
|
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
|
||||||
|
}
|
||||||
|
static inline int *TOK_GET_BRACKET_MARK(tokenizer_mode* mode) {
|
||||||
|
assert(mode->bracket_mark_index >= 0);
|
||||||
|
assert(mode->bracket_mark_index < MAX_EXPR_NESTING);
|
||||||
|
return &(mode->bracket_mark[mode->bracket_mark_index]);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
|
||||||
|
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
|
||||||
|
#define TOK_GET_BRACKET_MARK(mode) (&(mode->bracket_mark[mode->bracket_mark_index]))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward */
|
/* Forward */
|
||||||
static struct tok_state *tok_new(void);
|
static struct tok_state *tok_new(void);
|
||||||
static int tok_nextc(struct tok_state *tok);
|
static int tok_nextc(struct tok_state *tok);
|
||||||
|
@ -98,6 +120,9 @@ tok_new(void)
|
||||||
tok->interactive_underflow = IUNDERFLOW_NORMAL;
|
tok->interactive_underflow = IUNDERFLOW_NORMAL;
|
||||||
tok->str = NULL;
|
tok->str = NULL;
|
||||||
tok->report_warnings = 1;
|
tok->report_warnings = 1;
|
||||||
|
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0};
|
||||||
|
tok->tok_mode_stack_index = 0;
|
||||||
|
tok->tok_report_warnings = 1;
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
tok->debug = _Py_GetConfig()->parser_debug;
|
tok->debug = _Py_GetConfig()->parser_debug;
|
||||||
#endif
|
#endif
|
||||||
|
@ -346,6 +371,92 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Traverse and update all f-string buffers with the value */
|
||||||
|
static void
|
||||||
|
update_fstring_buffers(struct tok_state *tok, char value, int regular, int multiline)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
tokenizer_mode *mode;
|
||||||
|
|
||||||
|
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
|
||||||
|
mode = &(tok->tok_mode_stack[index]);
|
||||||
|
if (regular && mode->f_string_start != NULL) {
|
||||||
|
mode->f_string_start += value;
|
||||||
|
}
|
||||||
|
if (multiline && mode->f_string_multi_line_start != NULL) {
|
||||||
|
mode->f_string_multi_line_start += value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
update_fstring_expr(struct tok_state *tok, char cur)
|
||||||
|
{
|
||||||
|
assert(tok->cur != NULL);
|
||||||
|
|
||||||
|
Py_ssize_t size = strlen(tok->cur);
|
||||||
|
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
|
||||||
|
|
||||||
|
switch (cur) {
|
||||||
|
case '{':
|
||||||
|
if (tok_mode->last_expr_buffer != NULL) {
|
||||||
|
PyMem_Free(tok_mode->last_expr_buffer);
|
||||||
|
}
|
||||||
|
tok_mode->last_expr_buffer = PyMem_Malloc(size);
|
||||||
|
if (tok_mode->last_expr_buffer == NULL) {
|
||||||
|
tok->done = E_NOMEM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
tok_mode->last_expr_size = size;
|
||||||
|
tok_mode->last_expr_end = -1;
|
||||||
|
strncpy(tok_mode->last_expr_buffer, tok->cur, size);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
char *new_buffer = PyMem_Realloc(
|
||||||
|
tok_mode->last_expr_buffer,
|
||||||
|
tok_mode->last_expr_size + size
|
||||||
|
);
|
||||||
|
if (new_buffer == NULL) {
|
||||||
|
PyMem_Free(tok_mode->last_expr_buffer);
|
||||||
|
tok->done = E_NOMEM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
tok_mode->last_expr_buffer = new_buffer;
|
||||||
|
strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
|
||||||
|
tok_mode->last_expr_size += size;
|
||||||
|
break;
|
||||||
|
case '}':
|
||||||
|
case '!':
|
||||||
|
case ':':
|
||||||
|
if (tok_mode->last_expr_end == -1) {
|
||||||
|
tok_mode->last_expr_end = strlen(tok->start);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
free_fstring_expressions(struct tok_state *tok)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
tokenizer_mode *mode;
|
||||||
|
|
||||||
|
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
|
||||||
|
mode = &(tok->tok_mode_stack[index]);
|
||||||
|
if (mode->last_expr_buffer != NULL) {
|
||||||
|
PyMem_Free(mode->last_expr_buffer);
|
||||||
|
mode->last_expr_buffer = NULL;
|
||||||
|
mode->last_expr_size = 0;
|
||||||
|
mode->last_expr_end = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Read a line of text from TOK into S, using the stream in TOK.
|
/* Read a line of text from TOK into S, using the stream in TOK.
|
||||||
Return NULL on failure, else S.
|
Return NULL on failure, else S.
|
||||||
|
|
||||||
|
@ -372,6 +483,7 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
|
||||||
Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;
|
Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;
|
||||||
Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;
|
Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;
|
||||||
Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;
|
Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;
|
||||||
|
update_fstring_buffers(tok, -*tok->buf, /*regular=*/1, /*multiline=*/1);
|
||||||
newbuf = (char *)PyMem_Realloc(newbuf, newsize);
|
newbuf = (char *)PyMem_Realloc(newbuf, newsize);
|
||||||
if (newbuf == NULL) {
|
if (newbuf == NULL) {
|
||||||
tok->done = E_NOMEM;
|
tok->done = E_NOMEM;
|
||||||
|
@ -384,6 +496,7 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
|
||||||
tok->start = start < 0 ? NULL : tok->buf + start;
|
tok->start = start < 0 ? NULL : tok->buf + start;
|
||||||
tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;
|
tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;
|
||||||
tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;
|
tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;
|
||||||
|
update_fstring_buffers(tok, *tok->buf, /*regular=*/1, /*multiline=*/1);
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -838,6 +951,7 @@ _PyTokenizer_Free(struct tok_state *tok)
|
||||||
if (tok->interactive_src_start != NULL) {
|
if (tok->interactive_src_start != NULL) {
|
||||||
PyMem_Free(tok->interactive_src_start);
|
PyMem_Free(tok->interactive_src_start);
|
||||||
}
|
}
|
||||||
|
free_fstring_expressions(tok);
|
||||||
PyMem_Free(tok);
|
PyMem_Free(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -854,6 +968,9 @@ tok_readline_raw(struct tok_state *tok)
|
||||||
if (line == NULL) {
|
if (line == NULL) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if (tok->fp_interactive &&
|
if (tok->fp_interactive &&
|
||||||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -941,6 +1058,7 @@ tok_underflow_interactive(struct tok_state *tok) {
|
||||||
}
|
}
|
||||||
else if (tok->start != NULL) {
|
else if (tok->start != NULL) {
|
||||||
Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
|
Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
|
||||||
|
update_fstring_buffers(tok, -*tok->buf, /*regular=*/0, /*multiline=*/1);
|
||||||
size_t size = strlen(newtok);
|
size_t size = strlen(newtok);
|
||||||
ADVANCE_LINENO();
|
ADVANCE_LINENO();
|
||||||
if (!tok_reserve_buf(tok, size + 1)) {
|
if (!tok_reserve_buf(tok, size + 1)) {
|
||||||
|
@ -953,6 +1071,7 @@ tok_underflow_interactive(struct tok_state *tok) {
|
||||||
PyMem_Free(newtok);
|
PyMem_Free(newtok);
|
||||||
tok->inp += size;
|
tok->inp += size;
|
||||||
tok->multi_line_start = tok->buf + cur_multi_line_start;
|
tok->multi_line_start = tok->buf + cur_multi_line_start;
|
||||||
|
update_fstring_buffers(tok, *tok->buf, /*regular=*/0, /*multiline=*/1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ADVANCE_LINENO();
|
ADVANCE_LINENO();
|
||||||
|
@ -969,6 +1088,10 @@ tok_underflow_interactive(struct tok_state *tok) {
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1073,7 +1196,7 @@ tok_nextc(struct tok_state *tok)
|
||||||
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
||||||
}
|
}
|
||||||
if (tok->done != E_OK) {
|
if (tok->done != E_OK) {
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
if (tok->fp == NULL) {
|
if (tok->fp == NULL) {
|
||||||
rc = tok_underflow_string(tok);
|
rc = tok_underflow_string(tok);
|
||||||
|
@ -1115,7 +1238,7 @@ tok_backup(struct tok_state *tok, int c)
|
||||||
if (--tok->cur < tok->buf) {
|
if (--tok->cur < tok->buf) {
|
||||||
Py_FatalError("tokenizer beginning of buffer");
|
Py_FatalError("tokenizer beginning of buffer");
|
||||||
}
|
}
|
||||||
if ((int)(unsigned char)*tok->cur != c) {
|
if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
|
||||||
Py_FatalError("tok_backup: wrong character");
|
Py_FatalError("tok_backup: wrong character");
|
||||||
}
|
}
|
||||||
tok->col_offset--;
|
tok->col_offset--;
|
||||||
|
@ -1172,6 +1295,7 @@ error:
|
||||||
static int
|
static int
|
||||||
syntaxerror(struct tok_state *tok, const char *format, ...)
|
syntaxerror(struct tok_state *tok, const char *format, ...)
|
||||||
{
|
{
|
||||||
|
// This errors are cleaned on startup. Todo: Fix it.
|
||||||
va_list vargs;
|
va_list vargs;
|
||||||
va_start(vargs, format);
|
va_start(vargs, format);
|
||||||
int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
|
int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
|
||||||
|
@ -1234,6 +1358,41 @@ error:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (!tok->tok_report_warnings) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *msg = PyUnicode_FromFormat(
|
||||||
|
"invalid escape sequence '\\%c'",
|
||||||
|
(char) first_invalid_escape_char
|
||||||
|
);
|
||||||
|
|
||||||
|
if (msg == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, tok->filename,
|
||||||
|
tok->lineno, NULL, NULL) < 0) {
|
||||||
|
Py_DECREF(msg);
|
||||||
|
|
||||||
|
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
|
||||||
|
/* Replace the DeprecationWarning exception with a SyntaxError
|
||||||
|
to get a more accurate error report */
|
||||||
|
PyErr_Clear();
|
||||||
|
return syntaxerror(tok, "invalid escape sequence '\\%c'", (char) first_invalid_escape_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_DECREF(msg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
lookahead(struct tok_state *tok, const char *test)
|
lookahead(struct tok_state *tok, const char *test)
|
||||||
{
|
{
|
||||||
|
@ -1389,7 +1548,6 @@ tok_decimal_tail(struct tok_state *tok)
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get next token, after space stripping etc. */
|
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
tok_continuation_line(struct tok_state *tok) {
|
tok_continuation_line(struct tok_state *tok) {
|
||||||
|
@ -1427,7 +1585,12 @@ token_setup(struct tok_state *tok, struct token *token, int type, const char *st
|
||||||
{
|
{
|
||||||
assert((start == NULL && end == NULL) || (start != NULL && end != NULL));
|
assert((start == NULL && end == NULL) || (start != NULL && end != NULL));
|
||||||
token->level = tok->level;
|
token->level = tok->level;
|
||||||
token->lineno = type == STRING ? tok->first_lineno : tok->lineno;
|
if (ISSTRINGLIT(type)) {
|
||||||
|
token->lineno = tok->first_lineno;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
token->lineno = tok->lineno;
|
||||||
|
}
|
||||||
token->end_lineno = tok->lineno;
|
token->end_lineno = tok->lineno;
|
||||||
token->col_offset = token->end_col_offset = -1;
|
token->col_offset = token->end_col_offset = -1;
|
||||||
token->start = start;
|
token->start = start;
|
||||||
|
@ -1441,7 +1604,7 @@ token_setup(struct tok_state *tok, struct token *token, int type, const char *st
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tok_get(struct tok_state *tok, struct token *token)
|
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
int blankline, nonascii;
|
int blankline, nonascii;
|
||||||
|
@ -1602,6 +1765,11 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
|
|
||||||
/* Skip comment, unless it's a type comment */
|
/* Skip comment, unless it's a type comment */
|
||||||
if (c == '#') {
|
if (c == '#') {
|
||||||
|
|
||||||
|
if (tok->tok_mode_stack_index > 0) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string expression part cannot include '#'"));
|
||||||
|
}
|
||||||
|
|
||||||
const char *prefix, *p, *type_start;
|
const char *prefix, *p, *type_start;
|
||||||
int current_starting_col_offset;
|
int current_starting_col_offset;
|
||||||
|
|
||||||
|
@ -1703,6 +1871,9 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
}
|
}
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
if (c == '"' || c == '\'') {
|
if (c == '"' || c == '\'') {
|
||||||
|
if (saw_f) {
|
||||||
|
goto f_string_quote;
|
||||||
|
}
|
||||||
goto letter_quote;
|
goto letter_quote;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1748,7 +1919,9 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
int ahead_tok_kind;
|
int ahead_tok_kind;
|
||||||
|
|
||||||
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
||||||
ahead_tok_kind = tok_get(&ahead_tok, &ahead_token);
|
ahead_tok_kind = tok_get_normal_mode(&ahead_tok,
|
||||||
|
current_tok,
|
||||||
|
&ahead_token);
|
||||||
|
|
||||||
if (ahead_tok_kind == NAME
|
if (ahead_tok_kind == NAME
|
||||||
&& ahead_tok.cur - ahead_tok.start == 3
|
&& ahead_tok.cur - ahead_tok.start == 3
|
||||||
|
@ -2003,6 +2176,67 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
return MAKE_TOKEN(NUMBER);
|
return MAKE_TOKEN(NUMBER);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f_string_quote:
|
||||||
|
if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
|
||||||
|
int quote = c;
|
||||||
|
int quote_size = 1; /* 1 or 3 */
|
||||||
|
|
||||||
|
/* Nodes of type STRING, especially multi line strings
|
||||||
|
must be handled differently in order to get both
|
||||||
|
the starting line number and the column offset right.
|
||||||
|
(cf. issue 16806) */
|
||||||
|
tok->first_lineno = tok->lineno;
|
||||||
|
tok->multi_line_start = tok->line_start;
|
||||||
|
|
||||||
|
/* Find the quote size and start of string */
|
||||||
|
int after_quote = tok_nextc(tok);
|
||||||
|
if (after_quote == quote) {
|
||||||
|
int after_after_quote = tok_nextc(tok);
|
||||||
|
if (after_after_quote == quote) {
|
||||||
|
quote_size = 3;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// TODO: Check this
|
||||||
|
tok_backup(tok, after_after_quote);
|
||||||
|
tok_backup(tok, after_quote);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (after_quote != quote) {
|
||||||
|
tok_backup(tok, after_quote);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
tokenizer_mode *current_tok = TOK_NEXT_MODE(tok);
|
||||||
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
current_tok->f_string_quote = quote;
|
||||||
|
current_tok->f_string_quote_size = quote_size;
|
||||||
|
current_tok->f_string_start = tok->start;
|
||||||
|
current_tok->f_string_multi_line_start = tok->line_start;
|
||||||
|
current_tok->last_expr_buffer = NULL;
|
||||||
|
current_tok->last_expr_size = 0;
|
||||||
|
current_tok->last_expr_end = -1;
|
||||||
|
|
||||||
|
switch (*tok->start) {
|
||||||
|
case 'F':
|
||||||
|
case 'f':
|
||||||
|
current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r';
|
||||||
|
break;
|
||||||
|
case 'R':
|
||||||
|
case 'r':
|
||||||
|
current_tok->f_string_raw = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
current_tok->bracket_stack = 0;
|
||||||
|
current_tok->bracket_mark[0] = 0;
|
||||||
|
current_tok->bracket_mark_index = -1;
|
||||||
|
return MAKE_TOKEN(FSTRING_START);
|
||||||
|
}
|
||||||
|
|
||||||
letter_quote:
|
letter_quote:
|
||||||
/* String */
|
/* String */
|
||||||
if (c == '\'' || c == '"') {
|
if (c == '\'' || c == '"') {
|
||||||
|
@ -2047,6 +2281,20 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
tok->line_start = tok->multi_line_start;
|
tok->line_start = tok->multi_line_start;
|
||||||
int start = tok->lineno;
|
int start = tok->lineno;
|
||||||
tok->lineno = tok->first_lineno;
|
tok->lineno = tok->first_lineno;
|
||||||
|
|
||||||
|
if (tok->tok_mode_stack_index > 0) {
|
||||||
|
/* When we are in an f-string, before raising the
|
||||||
|
* unterminated string literal error, check whether
|
||||||
|
* does the initial quote matches with f-strings quotes
|
||||||
|
* and if it is, then this must be a missing '}' token
|
||||||
|
* so raise the proper error */
|
||||||
|
tokenizer_mode *current_tok = TOK_GET_MODE(tok);
|
||||||
|
if (current_tok->f_string_quote == quote &&
|
||||||
|
current_tok->f_string_quote_size == quote_size) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string: expecting '}'", start));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (quote_size == 3) {
|
if (quote_size == 3) {
|
||||||
syntaxerror(tok, "unterminated triple-quoted string literal"
|
syntaxerror(tok, "unterminated triple-quoted string literal"
|
||||||
" (detected at line %d)", start);
|
" (detected at line %d)", start);
|
||||||
|
@ -2089,6 +2337,27 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
goto again; /* Read next line */
|
goto again; /* Read next line */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Punctuation character */
|
||||||
|
int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
|
||||||
|
if (is_punctuation && tok->tok_mode_stack_index > 0 && current_tok->bracket_mark_index >= 0) {
|
||||||
|
int mark = *TOK_GET_BRACKET_MARK(current_tok);
|
||||||
|
/* This code block gets executed before the bracket_stack is incremented
|
||||||
|
* by the `{` case, so for ensuring that we are on the 0th level, we need
|
||||||
|
* to adjust it manually */
|
||||||
|
int cursor = current_tok->bracket_stack - (c != '{');
|
||||||
|
|
||||||
|
if (cursor == 0 && !update_fstring_expr(tok, c)) {
|
||||||
|
return MAKE_TOKEN(ENDMARKER);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == ':' && cursor == mark) {
|
||||||
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
return MAKE_TOKEN(_PyToken_OneChar(c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Check for two-character token */
|
/* Check for two-character token */
|
||||||
{
|
{
|
||||||
int c2 = tok_nextc(tok);
|
int c2 = tok_nextc(tok);
|
||||||
|
@ -2121,11 +2390,18 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
tok->parenlinenostack[tok->level] = tok->lineno;
|
tok->parenlinenostack[tok->level] = tok->lineno;
|
||||||
tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
|
tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
|
||||||
tok->level++;
|
tok->level++;
|
||||||
|
|
||||||
|
if (tok->tok_mode_stack_index > 0) {
|
||||||
|
current_tok->bracket_stack++;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case ')':
|
case ')':
|
||||||
case ']':
|
case ']':
|
||||||
case '}':
|
case '}':
|
||||||
if (!tok->level) {
|
if (!tok->level) {
|
||||||
|
if (tok->tok_mode_stack_index > 0 && !current_tok->bracket_stack && c == '}') {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed"));
|
||||||
|
}
|
||||||
return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c));
|
return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c));
|
||||||
}
|
}
|
||||||
tok->level--;
|
tok->level--;
|
||||||
|
@ -2134,6 +2410,18 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
(opening == '[' && c == ']') ||
|
(opening == '[' && c == ']') ||
|
||||||
(opening == '{' && c == '}')))
|
(opening == '{' && c == '}')))
|
||||||
{
|
{
|
||||||
|
/* If the opening bracket belongs to an f-string's expression
|
||||||
|
part (e.g. f"{)}") and the closing bracket is an arbitrary
|
||||||
|
nested expression, then instead of matching a different
|
||||||
|
syntactical construct with it; we'll throw an unmatched
|
||||||
|
parentheses error. */
|
||||||
|
if (tok->tok_mode_stack_index > 0 && opening == '{') {
|
||||||
|
assert(current_tok->bracket_stack >= 0);
|
||||||
|
int previous_bracket = current_tok->bracket_stack - 1;
|
||||||
|
if (previous_bracket == *TOK_GET_BRACKET_MARK(current_tok)) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
|
||||||
|
}
|
||||||
|
}
|
||||||
if (tok->parenlinenostack[tok->level] != tok->lineno) {
|
if (tok->parenlinenostack[tok->level] != tok->lineno) {
|
||||||
return MAKE_TOKEN(syntaxerror(tok,
|
return MAKE_TOKEN(syntaxerror(tok,
|
||||||
"closing parenthesis '%c' does not match "
|
"closing parenthesis '%c' does not match "
|
||||||
|
@ -2147,6 +2435,14 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
c, opening));
|
c, opening));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tok->tok_mode_stack_index > 0) {
|
||||||
|
current_tok->bracket_stack--;
|
||||||
|
if (c == '}' && current_tok->bracket_stack == *TOK_GET_BRACKET_MARK(current_tok)) {
|
||||||
|
current_tok->bracket_mark_index--;
|
||||||
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2162,6 +2458,187 @@ tok_get(struct tok_state *tok, struct token *token)
|
||||||
return MAKE_TOKEN(_PyToken_OneChar(c));
|
return MAKE_TOKEN(_PyToken_OneChar(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
|
||||||
|
{
|
||||||
|
const char *p_start = NULL;
|
||||||
|
const char *p_end = NULL;
|
||||||
|
int end_quote_size = 0;
|
||||||
|
int unicode_escape = 0;
|
||||||
|
|
||||||
|
tok->start = tok->cur;
|
||||||
|
tok->first_lineno = tok->lineno;
|
||||||
|
tok->starting_col_offset = tok->col_offset;
|
||||||
|
|
||||||
|
// If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
|
||||||
|
// before it.
|
||||||
|
int start_char = tok_nextc(tok);
|
||||||
|
int peek1 = tok_nextc(tok);
|
||||||
|
tok_backup(tok, peek1);
|
||||||
|
tok_backup(tok, start_char);
|
||||||
|
|
||||||
|
if ((start_char == '{' && peek1 != '{') || (start_char == '}' && peek1 != '}')) {
|
||||||
|
if (start_char == '{') {
|
||||||
|
current_tok->bracket_mark_index++;
|
||||||
|
if (current_tok->bracket_mark_index >= MAX_EXPR_NESTING) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply"));
|
||||||
|
}
|
||||||
|
*TOK_GET_BRACKET_MARK(current_tok) = current_tok->bracket_stack;
|
||||||
|
}
|
||||||
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
return tok_get_normal_mode(tok, current_tok, token);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we are at the end of the string
|
||||||
|
for (int i = 0; i < current_tok->f_string_quote_size; i++) {
|
||||||
|
int quote = tok_nextc(tok);
|
||||||
|
if (quote != current_tok->f_string_quote) {
|
||||||
|
tok_backup(tok, quote);
|
||||||
|
goto f_string_middle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current_tok->last_expr_buffer != NULL) {
|
||||||
|
PyMem_Free(current_tok->last_expr_buffer);
|
||||||
|
current_tok->last_expr_buffer = NULL;
|
||||||
|
current_tok->last_expr_size = 0;
|
||||||
|
current_tok->last_expr_end = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
tok->tok_mode_stack_index--;
|
||||||
|
return MAKE_TOKEN(FSTRING_END);
|
||||||
|
|
||||||
|
f_string_middle:
|
||||||
|
|
||||||
|
while (end_quote_size != current_tok->f_string_quote_size) {
|
||||||
|
int c = tok_nextc(tok);
|
||||||
|
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
|
||||||
|
assert(tok->multi_line_start != NULL);
|
||||||
|
// shift the tok_state's location into
|
||||||
|
// the start of string, and report the error
|
||||||
|
// from the initial quote character
|
||||||
|
tok->cur = (char *)current_tok->f_string_start;
|
||||||
|
tok->cur++;
|
||||||
|
tok->line_start = current_tok->f_string_multi_line_start;
|
||||||
|
int start = tok->lineno;
|
||||||
|
tok->lineno = tok->first_lineno;
|
||||||
|
|
||||||
|
if (current_tok->f_string_quote_size == 3) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok,
|
||||||
|
"unterminated triple-quoted f-string literal"
|
||||||
|
" (detected at line %d)", start));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok,
|
||||||
|
"unterminated f-string literal (detected at"
|
||||||
|
" line %d)", start));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == current_tok->f_string_quote) {
|
||||||
|
end_quote_size += 1;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
end_quote_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int in_format_spec = current_tok->last_expr_end != -1 && current_tok->bracket_mark_index >= 0;
|
||||||
|
if (c == '{') {
|
||||||
|
int peek = tok_nextc(tok);
|
||||||
|
if (peek != '{' || in_format_spec) {
|
||||||
|
tok_backup(tok, peek);
|
||||||
|
tok_backup(tok, c);
|
||||||
|
current_tok->bracket_mark_index++;
|
||||||
|
if (current_tok->bracket_mark_index >= MAX_EXPR_NESTING) {
|
||||||
|
return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply"));
|
||||||
|
}
|
||||||
|
*TOK_GET_BRACKET_MARK(current_tok) = current_tok->bracket_stack;
|
||||||
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
} else {
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur - 1;
|
||||||
|
}
|
||||||
|
return MAKE_TOKEN(FSTRING_MIDDLE);
|
||||||
|
} else if (c == '}') {
|
||||||
|
if (unicode_escape) {
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
return MAKE_TOKEN(FSTRING_MIDDLE);
|
||||||
|
}
|
||||||
|
int peek = tok_nextc(tok);
|
||||||
|
|
||||||
|
// The tokenizer can only be in the format spec if we have already completed the expression
|
||||||
|
// scanning (indicated by the end of the expression being set) and we are not at the top level
|
||||||
|
// of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
|
||||||
|
// brackets, we can bypass it here.
|
||||||
|
if (peek == '}' && !in_format_spec) {
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur - 1;
|
||||||
|
} else {
|
||||||
|
tok_backup(tok, peek);
|
||||||
|
tok_backup(tok, c);
|
||||||
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
}
|
||||||
|
return MAKE_TOKEN(FSTRING_MIDDLE);
|
||||||
|
} else if (c == '\\') {
|
||||||
|
int peek = tok_nextc(tok);
|
||||||
|
// Special case when the backslash is right before a curly
|
||||||
|
// brace. We have to restore and return the control back
|
||||||
|
// to the loop for the next iteration.
|
||||||
|
if (peek == '{' || peek == '}') {
|
||||||
|
if (!current_tok->f_string_raw) {
|
||||||
|
if (warn_invalid_escape_sequence(tok, peek)) {
|
||||||
|
return MAKE_TOKEN(ERRORTOKEN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tok_backup(tok, peek);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!current_tok->f_string_raw) {
|
||||||
|
if (peek == 'N') {
|
||||||
|
/* Handle named unicode escapes (\N{BULLET}) */
|
||||||
|
peek = tok_nextc(tok);
|
||||||
|
if (peek == '{') {
|
||||||
|
unicode_escape = 1;
|
||||||
|
} else {
|
||||||
|
tok_backup(tok, peek);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} /* else {
|
||||||
|
skip the escaped character
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backup the f-string quotes to emit a final FSTRING_MIDDLE and
|
||||||
|
// add the quotes to the FSTRING_END in the next tokenizer iteration.
|
||||||
|
for (int i = 0; i < current_tok->f_string_quote_size; i++) {
|
||||||
|
tok_backup(tok, current_tok->f_string_quote);
|
||||||
|
}
|
||||||
|
p_start = tok->start;
|
||||||
|
p_end = tok->cur;
|
||||||
|
return MAKE_TOKEN(FSTRING_MIDDLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
tok_get(struct tok_state *tok, struct token *token)
|
||||||
|
{
|
||||||
|
tokenizer_mode *current_tok = TOK_GET_MODE(tok);
|
||||||
|
if (current_tok->kind == TOK_REGULAR_MODE) {
|
||||||
|
return tok_get_normal_mode(tok, current_tok, token);
|
||||||
|
} else {
|
||||||
|
return tok_get_fstring_mode(tok, current_tok, token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
|
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
|
||||||
{
|
{
|
||||||
|
|
|
@ -33,6 +33,31 @@ struct token {
|
||||||
const char *start, *end;
|
const char *start, *end;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum tokenizer_mode_kind_t {
|
||||||
|
TOK_REGULAR_MODE,
|
||||||
|
TOK_FSTRING_MODE,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MAX_EXPR_NESTING 3
|
||||||
|
|
||||||
|
typedef struct _tokenizer_mode {
|
||||||
|
enum tokenizer_mode_kind_t kind;
|
||||||
|
|
||||||
|
int bracket_stack;
|
||||||
|
int bracket_mark[MAX_EXPR_NESTING];
|
||||||
|
int bracket_mark_index;
|
||||||
|
|
||||||
|
char f_string_quote;
|
||||||
|
int f_string_quote_size;
|
||||||
|
int f_string_raw;
|
||||||
|
const char* f_string_start;
|
||||||
|
const char* f_string_multi_line_start;
|
||||||
|
|
||||||
|
Py_ssize_t last_expr_size;
|
||||||
|
Py_ssize_t last_expr_end;
|
||||||
|
char* last_expr_buffer;
|
||||||
|
} tokenizer_mode;
|
||||||
|
|
||||||
/* Tokenizer state */
|
/* Tokenizer state */
|
||||||
struct tok_state {
|
struct tok_state {
|
||||||
/* Input state; buf <= cur <= inp <= end */
|
/* Input state; buf <= cur <= inp <= end */
|
||||||
|
@ -93,6 +118,10 @@ struct tok_state {
|
||||||
/* How to proceed when asked for a new token in interactive mode */
|
/* How to proceed when asked for a new token in interactive mode */
|
||||||
enum interactive_underflow_t interactive_underflow;
|
enum interactive_underflow_t interactive_underflow;
|
||||||
int report_warnings;
|
int report_warnings;
|
||||||
|
// TODO: Factor this into its own thing
|
||||||
|
tokenizer_mode tok_mode_stack[MAXLEVEL];
|
||||||
|
int tok_mode_stack_index;
|
||||||
|
int tok_report_warnings;
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
int debug;
|
int debug;
|
||||||
#endif
|
#endif
|
||||||
|
|
8
Programs/test_frozenmain.h
generated
8
Programs/test_frozenmain.h
generated
|
@ -27,12 +27,12 @@ unsigned char M_test_frozenmain[] = {
|
||||||
218,3,107,101,121,169,0,243,0,0,0,0,250,18,116,101,
|
218,3,107,101,121,169,0,243,0,0,0,0,250,18,116,101,
|
||||||
115,116,95,102,114,111,122,101,110,109,97,105,110,46,112,121,
|
115,116,95,102,114,111,122,101,110,109,97,105,110,46,112,121,
|
||||||
250,8,60,109,111,100,117,108,101,62,114,18,0,0,0,1,
|
250,8,60,109,111,100,117,108,101,62,114,18,0,0,0,1,
|
||||||
0,0,0,115,100,0,0,0,240,3,1,1,1,243,8,0,
|
0,0,0,115,102,0,0,0,240,3,1,1,1,243,8,0,
|
||||||
1,11,219,0,24,225,0,5,208,6,26,212,0,27,217,0,
|
1,11,219,0,24,225,0,5,208,6,26,212,0,27,217,0,
|
||||||
5,128,106,144,35,151,40,145,40,212,0,27,216,9,38,208,
|
5,128,106,144,35,151,40,145,40,212,0,27,216,9,38,208,
|
||||||
9,26,215,9,38,209,9,38,211,9,40,168,24,209,9,50,
|
9,26,215,9,38,209,9,38,211,9,40,168,24,209,9,50,
|
||||||
128,6,240,2,6,12,2,242,0,7,1,42,128,67,241,14,
|
128,6,240,2,6,12,2,242,0,7,1,42,128,67,241,14,
|
||||||
0,5,10,208,10,40,144,67,209,10,40,152,54,160,35,153,
|
0,5,10,136,71,144,67,144,53,152,2,152,54,160,35,153,
|
||||||
59,209,10,40,213,4,41,241,15,7,1,42,114,16,0,0,
|
59,152,45,208,10,40,213,4,41,241,15,7,1,42,114,16,
|
||||||
0,
|
0,0,0,
|
||||||
};
|
};
|
||||||
|
|
|
@ -86,8 +86,8 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
Py_DECREF(str);
|
Py_DECREF(str);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
|
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
|
||||||
int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
|
int lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
|
||||||
int end_lineno = it->tok->lineno;
|
int end_lineno = it->tok->lineno;
|
||||||
int col_offset = -1;
|
int col_offset = -1;
|
||||||
int end_col_offset = -1;
|
int end_col_offset = -1;
|
||||||
|
|
|
@ -80,6 +80,8 @@ extern "C" {
|
||||||
(x) == NEWLINE || \\
|
(x) == NEWLINE || \\
|
||||||
(x) == INDENT || \\
|
(x) == INDENT || \\
|
||||||
(x) == DEDENT)
|
(x) == DEDENT)
|
||||||
|
#define ISSTRINGLIT(x) ((x) == STRING || \\
|
||||||
|
(x) == FSTRING_MIDDLE)
|
||||||
|
|
||||||
|
|
||||||
// Symbols exported for test_peg_generator
|
// Symbols exported for test_peg_generator
|
||||||
|
|
|
@ -68,6 +68,7 @@ class NodeTypes(Enum):
|
||||||
KEYWORD = 4
|
KEYWORD = 4
|
||||||
SOFT_KEYWORD = 5
|
SOFT_KEYWORD = 5
|
||||||
CUT_OPERATOR = 6
|
CUT_OPERATOR = 6
|
||||||
|
F_STRING_CHUNK = 7
|
||||||
|
|
||||||
|
|
||||||
BASE_NODETYPES = {
|
BASE_NODETYPES = {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue