mirror of
https://github.com/python/cpython.git
synced 2025-09-27 10:50:04 +00:00
Issue 27948: Allow backslashes in the literal string portion of f-strings, but not in the expressions. Also, require expressions to begin and end with literal curly braces.
This commit is contained in:
parent
052828db15
commit
451d0e38fc
9 changed files with 321 additions and 341 deletions
|
@ -1060,7 +1060,7 @@ class HTTPConnection:
|
||||||
|
|
||||||
if encode_chunked and self._http_vsn == 11:
|
if encode_chunked and self._http_vsn == 11:
|
||||||
# chunked encoding
|
# chunked encoding
|
||||||
chunk = f'{len(chunk):X}''\r\n'.encode('ascii') + chunk \
|
chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
|
||||||
+ b'\r\n'
|
+ b'\r\n'
|
||||||
self.send(chunk)
|
self.send(chunk)
|
||||||
|
|
||||||
|
|
|
@ -280,6 +280,6 @@ class saved_test_environment:
|
||||||
print(f"Warning -- {name} was modified by {self.testname}",
|
print(f"Warning -- {name} was modified by {self.testname}",
|
||||||
file=sys.stderr, flush=True)
|
file=sys.stderr, flush=True)
|
||||||
if self.verbose > 1:
|
if self.verbose > 1:
|
||||||
print(f" Before: {original}""\n"f" After: {current} ",
|
print(f" Before: {original}\n After: {current} ",
|
||||||
file=sys.stderr, flush=True)
|
file=sys.stderr, flush=True)
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -735,11 +735,11 @@ class FaultHandlerTests(unittest.TestCase):
|
||||||
('EXCEPTION_INT_DIVIDE_BY_ZERO', 'int divide by zero'),
|
('EXCEPTION_INT_DIVIDE_BY_ZERO', 'int divide by zero'),
|
||||||
('EXCEPTION_STACK_OVERFLOW', 'stack overflow'),
|
('EXCEPTION_STACK_OVERFLOW', 'stack overflow'),
|
||||||
):
|
):
|
||||||
self.check_windows_exception("""
|
self.check_windows_exception(f"""
|
||||||
import faulthandler
|
import faulthandler
|
||||||
faulthandler.enable()
|
faulthandler.enable()
|
||||||
faulthandler._raise_exception(faulthandler._{exc})
|
faulthandler._raise_exception(faulthandler._{exc})
|
||||||
""".format(exc=exc),
|
""",
|
||||||
3,
|
3,
|
||||||
name)
|
name)
|
||||||
|
|
||||||
|
|
|
@ -119,6 +119,14 @@ f'{a * x()}'"""
|
||||||
self.assertEqual(f'a}}', 'a}')
|
self.assertEqual(f'a}}', 'a}')
|
||||||
self.assertEqual(f'}}b', '}b')
|
self.assertEqual(f'}}b', '}b')
|
||||||
self.assertEqual(f'a}}b', 'a}b')
|
self.assertEqual(f'a}}b', 'a}b')
|
||||||
|
self.assertEqual(f'{{}}', '{}')
|
||||||
|
self.assertEqual(f'a{{}}', 'a{}')
|
||||||
|
self.assertEqual(f'{{b}}', '{b}')
|
||||||
|
self.assertEqual(f'{{}}c', '{}c')
|
||||||
|
self.assertEqual(f'a{{b}}', 'a{b}')
|
||||||
|
self.assertEqual(f'a{{}}c', 'a{}c')
|
||||||
|
self.assertEqual(f'{{b}}c', '{b}c')
|
||||||
|
self.assertEqual(f'a{{b}}c', 'a{b}c')
|
||||||
|
|
||||||
self.assertEqual(f'{{{10}', '{10')
|
self.assertEqual(f'{{{10}', '{10')
|
||||||
self.assertEqual(f'}}{10}', '}10')
|
self.assertEqual(f'}}{10}', '}10')
|
||||||
|
@ -302,56 +310,79 @@ f'{a * x()}'"""
|
||||||
["f'{\n}'",
|
["f'{\n}'",
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_no_backslashes(self):
|
def test_backslashes_in_string_part(self):
|
||||||
# See issue 27921
|
self.assertEqual(f'\t', '\t')
|
||||||
|
self.assertEqual(r'\t', '\\t')
|
||||||
|
self.assertEqual(rf'\t', '\\t')
|
||||||
|
self.assertEqual(f'{2}\t', '2\t')
|
||||||
|
self.assertEqual(f'{2}\t{3}', '2\t3')
|
||||||
|
self.assertEqual(f'\t{3}', '\t3')
|
||||||
|
|
||||||
# These should work, but currently don't
|
self.assertEqual(f'\u0394', '\u0394')
|
||||||
self.assertAllRaise(SyntaxError, 'backslashes not allowed',
|
self.assertEqual(r'\u0394', '\\u0394')
|
||||||
[r"f'\t'",
|
self.assertEqual(rf'\u0394', '\\u0394')
|
||||||
r"f'{2}\t'",
|
self.assertEqual(f'{2}\u0394', '2\u0394')
|
||||||
r"f'{2}\t{3}'",
|
self.assertEqual(f'{2}\u0394{3}', '2\u03943')
|
||||||
r"f'\t{3}'",
|
self.assertEqual(f'\u0394{3}', '\u03943')
|
||||||
|
|
||||||
r"f'\N{GREEK CAPITAL LETTER DELTA}'",
|
self.assertEqual(f'\U00000394', '\u0394')
|
||||||
r"f'{2}\N{GREEK CAPITAL LETTER DELTA}'",
|
self.assertEqual(r'\U00000394', '\\U00000394')
|
||||||
r"f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}'",
|
self.assertEqual(rf'\U00000394', '\\U00000394')
|
||||||
r"f'\N{GREEK CAPITAL LETTER DELTA}{3}'",
|
self.assertEqual(f'{2}\U00000394', '2\u0394')
|
||||||
|
self.assertEqual(f'{2}\U00000394{3}', '2\u03943')
|
||||||
|
self.assertEqual(f'\U00000394{3}', '\u03943')
|
||||||
|
|
||||||
r"f'\u0394'",
|
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394')
|
||||||
r"f'{2}\u0394'",
|
self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
|
||||||
r"f'{2}\u0394{3}'",
|
self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943')
|
||||||
r"f'\u0394{3}'",
|
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943')
|
||||||
|
self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
|
||||||
|
self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943')
|
||||||
|
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943')
|
||||||
|
|
||||||
r"f'\U00000394'",
|
self.assertEqual(f'\x20', ' ')
|
||||||
r"f'{2}\U00000394'",
|
self.assertEqual(r'\x20', '\\x20')
|
||||||
r"f'{2}\U00000394{3}'",
|
self.assertEqual(rf'\x20', '\\x20')
|
||||||
r"f'\U00000394{3}'",
|
self.assertEqual(f'{2}\x20', '2 ')
|
||||||
|
self.assertEqual(f'{2}\x20{3}', '2 3')
|
||||||
|
self.assertEqual(f'\x20{3}', ' 3')
|
||||||
|
|
||||||
r"f'\x20'",
|
self.assertEqual(f'2\x20', '2 ')
|
||||||
r"f'{2}\x20'",
|
self.assertEqual(f'2\x203', '2 3')
|
||||||
r"f'{2}\x20{3}'",
|
self.assertEqual(f'\x203', ' 3')
|
||||||
r"f'\x20{3}'",
|
|
||||||
|
|
||||||
r"f'2\x20'",
|
def test_misformed_unicode_character_name(self):
|
||||||
r"f'2\x203'",
|
# These test are needed because unicode names are parsed
|
||||||
r"f'2\x203'",
|
# differently inside f-strings.
|
||||||
|
self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
|
||||||
|
[r"f'\N'",
|
||||||
|
r"f'\N{'",
|
||||||
|
r"f'\N{GREEK CAPITAL LETTER DELTA'",
|
||||||
|
|
||||||
|
# Here are the non-f-string versions,
|
||||||
|
# which should give the same errors.
|
||||||
|
r"'\N'",
|
||||||
|
r"'\N{'",
|
||||||
|
r"'\N{GREEK CAPITAL LETTER DELTA'",
|
||||||
])
|
])
|
||||||
|
|
||||||
# And these don't work now, and shouldn't work in the future.
|
def test_no_backslashes_in_expression_part(self):
|
||||||
self.assertAllRaise(SyntaxError, 'backslashes not allowed',
|
self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
|
||||||
[r"f'{\'a\'}'",
|
[r"f'{\'a\'}'",
|
||||||
r"f'{\t3}'",
|
r"f'{\t3}'",
|
||||||
|
r"f'{\}'",
|
||||||
|
r"rf'{\'a\'}'",
|
||||||
|
r"rf'{\t3}'",
|
||||||
|
r"rf'{\}'",
|
||||||
|
r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
|
||||||
])
|
])
|
||||||
|
|
||||||
# add this when backslashes are allowed again. see issue 27921
|
def test_no_escapes_for_braces(self):
|
||||||
# these test will be needed because unicode names will be parsed
|
# \x7b is '{'. Make sure it doesn't start an expression.
|
||||||
# differently once backslashes are allowed inside expressions
|
self.assertEqual(f'\x7b2}}', '{2}')
|
||||||
## def test_misformed_unicode_character_name(self):
|
self.assertEqual(f'\x7b2', '{2')
|
||||||
## self.assertAllRaise(SyntaxError, 'xx',
|
self.assertEqual(f'\u007b2', '{2')
|
||||||
## [r"f'\N'",
|
self.assertEqual(f'\N{LEFT CURLY BRACKET}2\N{RIGHT CURLY BRACKET}', '{2}')
|
||||||
## [r"f'\N{'",
|
|
||||||
## [r"f'\N{GREEK CAPITAL LETTER DELTA'",
|
|
||||||
## ])
|
|
||||||
|
|
||||||
def test_newlines_in_expressions(self):
|
def test_newlines_in_expressions(self):
|
||||||
self.assertEqual(f'{0}', '0')
|
self.assertEqual(f'{0}', '0')
|
||||||
|
@ -509,6 +540,14 @@ f'{a * x()}'"""
|
||||||
"ruf''",
|
"ruf''",
|
||||||
"FUR''",
|
"FUR''",
|
||||||
"Fur''",
|
"Fur''",
|
||||||
|
"fb''",
|
||||||
|
"fB''",
|
||||||
|
"Fb''",
|
||||||
|
"FB''",
|
||||||
|
"bf''",
|
||||||
|
"bF''",
|
||||||
|
"Bf''",
|
||||||
|
"BF''",
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_leading_trailing_spaces(self):
|
def test_leading_trailing_spaces(self):
|
||||||
|
@ -551,8 +590,8 @@ f'{a * x()}'"""
|
||||||
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
|
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
|
||||||
["f'{3!g}'",
|
["f'{3!g}'",
|
||||||
"f'{3!A}'",
|
"f'{3!A}'",
|
||||||
"f'{3!A}'",
|
"f'{3!3}'",
|
||||||
"f'{3!A}'",
|
"f'{3!G}'",
|
||||||
"f'{3!!}'",
|
"f'{3!!}'",
|
||||||
"f'{3!:}'",
|
"f'{3!:}'",
|
||||||
"f'{3! s}'", # no space before conversion char
|
"f'{3! s}'", # no space before conversion char
|
||||||
|
@ -601,6 +640,7 @@ f'{a * x()}'"""
|
||||||
"f'{3!s:3'",
|
"f'{3!s:3'",
|
||||||
"f'x{'",
|
"f'x{'",
|
||||||
"f'x{x'",
|
"f'x{x'",
|
||||||
|
"f'{x'",
|
||||||
"f'{3:s'",
|
"f'{3:s'",
|
||||||
"f'{{{'",
|
"f'{{{'",
|
||||||
"f'{{}}{'",
|
"f'{{}}{'",
|
||||||
|
|
|
@ -285,12 +285,12 @@ class DirectoryTestCase(ASTTestCase):
|
||||||
if test.support.verbose:
|
if test.support.verbose:
|
||||||
print('Testing %s' % filename)
|
print('Testing %s' % filename)
|
||||||
|
|
||||||
# it's very much a hack that I'm skipping these files, but
|
# Some f-strings are not correctly round-tripped by
|
||||||
# I can't figure out why they fail. I'll fix it when I
|
# Tools/parser/unparse.py. See issue 28002 for details.
|
||||||
# address issue #27948.
|
# We need to skip files that contain such f-strings.
|
||||||
if os.path.basename(filename) in ('test_fstring.py', 'test_traceback.py'):
|
if os.path.basename(filename) in ('test_fstring.py', ):
|
||||||
if test.support.verbose:
|
if test.support.verbose:
|
||||||
print(f'Skipping {filename}: see issue 27921')
|
print(f'Skipping {filename}: see issue 28002')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with self.subTest(filename=filename):
|
with self.subTest(filename=filename):
|
||||||
|
|
|
@ -326,13 +326,13 @@ class TracebackFormatTests(unittest.TestCase):
|
||||||
lineno_f = f.__code__.co_firstlineno
|
lineno_f = f.__code__.co_firstlineno
|
||||||
result_f = (
|
result_f = (
|
||||||
'Traceback (most recent call last):\n'
|
'Traceback (most recent call last):\n'
|
||||||
f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display''\n'
|
f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n'
|
||||||
' f()\n'
|
' f()\n'
|
||||||
f' File "{__file__}", line {lineno_f+1}, in f''\n'
|
f' File "{__file__}", line {lineno_f+1}, in f\n'
|
||||||
' f()\n'
|
' f()\n'
|
||||||
f' File "{__file__}", line {lineno_f+1}, in f''\n'
|
f' File "{__file__}", line {lineno_f+1}, in f\n'
|
||||||
' f()\n'
|
' f()\n'
|
||||||
f' File "{__file__}", line {lineno_f+1}, in f''\n'
|
f' File "{__file__}", line {lineno_f+1}, in f\n'
|
||||||
' f()\n'
|
' f()\n'
|
||||||
# XXX: The following line changes depending on whether the tests
|
# XXX: The following line changes depending on whether the tests
|
||||||
# are run through the interactive interpreter or with -m
|
# are run through the interactive interpreter or with -m
|
||||||
|
@ -371,20 +371,20 @@ class TracebackFormatTests(unittest.TestCase):
|
||||||
|
|
||||||
lineno_g = g.__code__.co_firstlineno
|
lineno_g = g.__code__.co_firstlineno
|
||||||
result_g = (
|
result_g = (
|
||||||
f' File "{__file__}", line {lineno_g+2}, in g''\n'
|
f' File "{__file__}", line {lineno_g+2}, in g\n'
|
||||||
' return g(count-1)\n'
|
' return g(count-1)\n'
|
||||||
f' File "{__file__}", line {lineno_g+2}, in g''\n'
|
f' File "{__file__}", line {lineno_g+2}, in g\n'
|
||||||
' return g(count-1)\n'
|
' return g(count-1)\n'
|
||||||
f' File "{__file__}", line {lineno_g+2}, in g''\n'
|
f' File "{__file__}", line {lineno_g+2}, in g\n'
|
||||||
' return g(count-1)\n'
|
' return g(count-1)\n'
|
||||||
' [Previous line repeated 6 more times]\n'
|
' [Previous line repeated 6 more times]\n'
|
||||||
f' File "{__file__}", line {lineno_g+3}, in g''\n'
|
f' File "{__file__}", line {lineno_g+3}, in g\n'
|
||||||
' raise ValueError\n'
|
' raise ValueError\n'
|
||||||
'ValueError\n'
|
'ValueError\n'
|
||||||
)
|
)
|
||||||
tb_line = (
|
tb_line = (
|
||||||
'Traceback (most recent call last):\n'
|
'Traceback (most recent call last):\n'
|
||||||
f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display''\n'
|
f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n'
|
||||||
' g()\n'
|
' g()\n'
|
||||||
)
|
)
|
||||||
expected = (tb_line + result_g).splitlines()
|
expected = (tb_line + result_g).splitlines()
|
||||||
|
@ -408,16 +408,16 @@ class TracebackFormatTests(unittest.TestCase):
|
||||||
lineno_h = h.__code__.co_firstlineno
|
lineno_h = h.__code__.co_firstlineno
|
||||||
result_h = (
|
result_h = (
|
||||||
'Traceback (most recent call last):\n'
|
'Traceback (most recent call last):\n'
|
||||||
f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display''\n'
|
f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n'
|
||||||
' h()\n'
|
' h()\n'
|
||||||
f' File "{__file__}", line {lineno_h+2}, in h''\n'
|
f' File "{__file__}", line {lineno_h+2}, in h\n'
|
||||||
' return h(count-1)\n'
|
' return h(count-1)\n'
|
||||||
f' File "{__file__}", line {lineno_h+2}, in h''\n'
|
f' File "{__file__}", line {lineno_h+2}, in h\n'
|
||||||
' return h(count-1)\n'
|
' return h(count-1)\n'
|
||||||
f' File "{__file__}", line {lineno_h+2}, in h''\n'
|
f' File "{__file__}", line {lineno_h+2}, in h\n'
|
||||||
' return h(count-1)\n'
|
' return h(count-1)\n'
|
||||||
' [Previous line repeated 6 more times]\n'
|
' [Previous line repeated 6 more times]\n'
|
||||||
f' File "{__file__}", line {lineno_h+3}, in h''\n'
|
f' File "{__file__}", line {lineno_h+3}, in h\n'
|
||||||
' g()\n'
|
' g()\n'
|
||||||
)
|
)
|
||||||
expected = (result_h + result_g).splitlines()
|
expected = (result_h + result_g).splitlines()
|
||||||
|
|
|
@ -402,7 +402,7 @@ class StackSummary(list):
|
||||||
count += 1
|
count += 1
|
||||||
else:
|
else:
|
||||||
if count > 3:
|
if count > 3:
|
||||||
result.append(f' [Previous line repeated {count-3} more times]'+'\n')
|
result.append(f' [Previous line repeated {count-3} more times]\n')
|
||||||
last_file = frame.filename
|
last_file = frame.filename
|
||||||
last_line = frame.lineno
|
last_line = frame.lineno
|
||||||
last_name = frame.name
|
last_name = frame.name
|
||||||
|
@ -419,7 +419,7 @@ class StackSummary(list):
|
||||||
row.append(' {name} = {value}\n'.format(name=name, value=value))
|
row.append(' {name} = {value}\n'.format(name=name, value=value))
|
||||||
result.append(''.join(row))
|
result.append(''.join(row))
|
||||||
if count > 3:
|
if count > 3:
|
||||||
result.append(f' [Previous line repeated {count-3} more times]'+'\n')
|
result.append(f' [Previous line repeated {count-3} more times]\n')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
12
Misc/NEWS
12
Misc/NEWS
|
@ -10,6 +10,13 @@ What's New in Python 3.6.0 beta 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #27948: In f-strings, only allow backslashes inside the braces
|
||||||
|
(where the expressions are). This is a breaking change from the 3.6
|
||||||
|
alpha releases, where backslashes are allowed anywhere in an
|
||||||
|
f-string. Also, require that expressions inside f-strings be
|
||||||
|
enclosed within literal braces, and not escapes like
|
||||||
|
f'\x7b"hi"\x7d'.
|
||||||
|
|
||||||
- Issue #28046: Remove platform-specific directories from sys.path.
|
- Issue #28046: Remove platform-specific directories from sys.path.
|
||||||
|
|
||||||
- Issue #25758: Prevents zipimport from unnecessarily encoding a filename
|
- Issue #25758: Prevents zipimport from unnecessarily encoding a filename
|
||||||
|
@ -56,11 +63,6 @@ Core and Builtins
|
||||||
- Issue #27355: Removed support for Windows CE. It was never finished,
|
- Issue #27355: Removed support for Windows CE. It was never finished,
|
||||||
and Windows CE is no longer a relevant platform for Python.
|
and Windows CE is no longer a relevant platform for Python.
|
||||||
|
|
||||||
- Issue #27921: Disallow backslashes in f-strings. This is a temporary
|
|
||||||
restriction: in beta 2, backslashes will only be disallowed inside
|
|
||||||
the braces (where the expressions are). This is a breaking change
|
|
||||||
from the 3.6 alpha releases.
|
|
||||||
|
|
||||||
- Implement PEP 523.
|
- Implement PEP 523.
|
||||||
|
|
||||||
- Issue #27870: A left shift of zero by a large integer no longer attempts
|
- Issue #27870: A left shift of zero by a large integer no longer attempts
|
||||||
|
|
456
Python/ast.c
456
Python/ast.c
|
@ -4155,141 +4155,74 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compile this expression in to an expr_ty. We know that we can
|
/* Compile this expression in to an expr_ty. Add parens around the
|
||||||
temporarily modify the character before the start of this string
|
expression, in order to allow leading spaces in the expression. */
|
||||||
(it's '{'), and we know we can temporarily modify the character
|
|
||||||
after this string (it is a '}'). Leverage this to create a
|
|
||||||
sub-string with enough room for us to add parens around the
|
|
||||||
expression. This is to allow strings with embedded newlines, for
|
|
||||||
example. */
|
|
||||||
static expr_ty
|
static expr_ty
|
||||||
fstring_compile_expr(PyObject *str, Py_ssize_t expr_start,
|
fstring_compile_expr(const char *expr_start, const char *expr_end,
|
||||||
Py_ssize_t expr_end, struct compiling *c, const node *n)
|
struct compiling *c, const node *n)
|
||||||
|
|
||||||
{
|
{
|
||||||
|
int all_whitespace = 1;
|
||||||
|
int kind;
|
||||||
|
void *data;
|
||||||
PyCompilerFlags cf;
|
PyCompilerFlags cf;
|
||||||
mod_ty mod;
|
mod_ty mod;
|
||||||
char *utf_expr;
|
char *str;
|
||||||
|
PyObject *o;
|
||||||
|
Py_ssize_t len;
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
Py_UCS4 end_ch = -1;
|
|
||||||
int all_whitespace;
|
|
||||||
PyObject *sub = NULL;
|
|
||||||
|
|
||||||
/* We only decref sub if we allocated it with a PyUnicode_Substring.
|
|
||||||
decref_sub records that. */
|
|
||||||
int decref_sub = 0;
|
|
||||||
|
|
||||||
assert(str);
|
|
||||||
|
|
||||||
assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str));
|
|
||||||
assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str));
|
|
||||||
assert(expr_end >= expr_start);
|
assert(expr_end >= expr_start);
|
||||||
|
assert(*(expr_start-1) == '{');
|
||||||
|
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
|
||||||
|
|
||||||
/* There has to be at least one character on each side of the
|
/* We know there are no escapes here, because backslashes are not allowed,
|
||||||
expression inside this str. This will have been caught before
|
and we know it's utf-8 encoded (per PEP 263). But, in order to check
|
||||||
we're called. */
|
that each char is not whitespace, we need to decode it to unicode.
|
||||||
assert(expr_start >= 1);
|
Which is unfortunate, but such is life. */
|
||||||
assert(expr_end <= PyUnicode_GET_LENGTH(str)-1);
|
|
||||||
|
|
||||||
/* If the substring is all whitespace, it's an error. We need to
|
/* If the substring is all whitespace, it's an error. We need to catch
|
||||||
catch this here, and not when we call PyParser_ASTFromString,
|
this here, and not when we call PyParser_ASTFromString, because turning
|
||||||
because turning the expression '' in to '()' would go from
|
the expression '' in to '()' would go from being invalid to valid. */
|
||||||
being invalid to valid. */
|
/* Note that this code says an empty string is all whitespace. That's
|
||||||
/* Note that this code says an empty string is all
|
important. There's a test for it: f'{}'. */
|
||||||
whitespace. That's important. There's a test for it: f'{}'. */
|
o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
|
||||||
all_whitespace = 1;
|
if (o == NULL)
|
||||||
for (i = expr_start; i < expr_end; i++) {
|
return NULL;
|
||||||
if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) {
|
len = PyUnicode_GET_LENGTH(o);
|
||||||
|
kind = PyUnicode_KIND(o);
|
||||||
|
data = PyUnicode_DATA(o);
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
|
||||||
all_whitespace = 0;
|
all_whitespace = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Py_DECREF(o);
|
||||||
if (all_whitespace) {
|
if (all_whitespace) {
|
||||||
ast_error(c, n, "f-string: empty expression not allowed");
|
ast_error(c, n, "f-string: empty expression not allowed");
|
||||||
goto error;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the substring will be the entire source string, we can't use
|
/* Reuse len to be the length of the utf-8 input string. */
|
||||||
PyUnicode_Substring, since it will return another reference to
|
len = expr_end - expr_start;
|
||||||
our original string. Because we're modifying the string in
|
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
|
||||||
place, that's a no-no. So, detect that case and just use our
|
str = PyMem_RawMalloc(len + 3);
|
||||||
string directly. */
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) {
|
str[0] = '(';
|
||||||
/* If str is well formed, then the first and last chars must
|
memcpy(str+1, expr_start, len);
|
||||||
be '{' and '}', respectively. But, if there's a syntax
|
str[len+1] = ')';
|
||||||
error, for example f'{3!', then the last char won't be a
|
str[len+2] = 0;
|
||||||
closing brace. So, remember the last character we read in
|
|
||||||
order for us to restore it. */
|
|
||||||
end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1);
|
|
||||||
assert(end_ch != (Py_UCS4)-1);
|
|
||||||
|
|
||||||
/* In all cases, however, start_ch must be '{'. */
|
|
||||||
assert(PyUnicode_ReadChar(str, 0) == '{');
|
|
||||||
|
|
||||||
sub = str;
|
|
||||||
} else {
|
|
||||||
/* Create a substring object. It must be a new object, with
|
|
||||||
refcount==1, so that we can modify it. */
|
|
||||||
sub = PyUnicode_Substring(str, expr_start-1, expr_end+1);
|
|
||||||
if (!sub)
|
|
||||||
goto error;
|
|
||||||
assert(sub != str); /* Make sure it's a new string. */
|
|
||||||
decref_sub = 1; /* Remember to deallocate it on error. */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Put () around the expression. */
|
|
||||||
if (PyUnicode_WriteChar(sub, 0, '(') < 0 ||
|
|
||||||
PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
/* No need to free the memory returned here: it's managed by the
|
|
||||||
string. */
|
|
||||||
utf_expr = PyUnicode_AsUTF8(sub);
|
|
||||||
if (!utf_expr)
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
cf.cf_flags = PyCF_ONLY_AST;
|
cf.cf_flags = PyCF_ONLY_AST;
|
||||||
mod = PyParser_ASTFromString(utf_expr, "<fstring>",
|
mod = PyParser_ASTFromString(str, "<fstring>",
|
||||||
Py_eval_input, &cf, c->c_arena);
|
Py_eval_input, &cf, c->c_arena);
|
||||||
|
PyMem_RawFree(str);
|
||||||
if (!mod)
|
if (!mod)
|
||||||
goto error;
|
|
||||||
|
|
||||||
if (sub != str)
|
|
||||||
/* Clear instead of decref in case we ever modify this code to change
|
|
||||||
the error handling: this is safest because the XDECREF won't try
|
|
||||||
and decref it when it's NULL. */
|
|
||||||
/* No need to restore the chars in sub, since we know it's getting
|
|
||||||
ready to get deleted (refcount must be 1, since we got a new string
|
|
||||||
in PyUnicode_Substring). */
|
|
||||||
Py_CLEAR(sub);
|
|
||||||
else {
|
|
||||||
assert(!decref_sub);
|
|
||||||
assert(end_ch != (Py_UCS4)-1);
|
|
||||||
/* Restore str, which we earlier modified directly. */
|
|
||||||
if (PyUnicode_WriteChar(str, 0, '{') < 0 ||
|
|
||||||
PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0)
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
return mod->v.Expression.body;
|
|
||||||
|
|
||||||
error:
|
|
||||||
/* Only decref sub if it was the result of a call to SubString. */
|
|
||||||
if (decref_sub)
|
|
||||||
Py_XDECREF(sub);
|
|
||||||
|
|
||||||
if (end_ch != (Py_UCS4)-1) {
|
|
||||||
/* We only get here if we modified str. Make sure that's the
|
|
||||||
case: str will be equal to sub. */
|
|
||||||
if (str == sub) {
|
|
||||||
/* Don't check the error, because we've already set the
|
|
||||||
error state (that's why we're in 'error', after
|
|
||||||
all). */
|
|
||||||
PyUnicode_WriteChar(str, 0, '{');
|
|
||||||
PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
return mod->v.Expression.body;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return -1 on error.
|
/* Return -1 on error.
|
||||||
|
@ -4301,35 +4234,38 @@ error:
|
||||||
doubled braces.
|
doubled braces.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
|
fstring_find_literal(const char **str, const char *end, int raw,
|
||||||
int recurse_lvl, struct compiling *c, const node *n)
|
PyObject **literal, int recurse_lvl,
|
||||||
|
struct compiling *c, const node *n)
|
||||||
{
|
{
|
||||||
/* Get any literal string. It ends when we hit an un-doubled brace, or the
|
/* Get any literal string. It ends when we hit an un-doubled left
|
||||||
end of the string. */
|
brace (which isn't part of a unicode name escape such as
|
||||||
|
"\N{EULER CONSTANT}"), or the end of the string. */
|
||||||
|
|
||||||
Py_ssize_t literal_start, literal_end;
|
const char *literal_start = *str;
|
||||||
|
const char *literal_end;
|
||||||
|
int in_named_escape = 0;
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
enum PyUnicode_Kind kind = PyUnicode_KIND(str);
|
|
||||||
void *data = PyUnicode_DATA(str);
|
|
||||||
|
|
||||||
assert(*literal == NULL);
|
assert(*literal == NULL);
|
||||||
|
for (; *str < end; (*str)++) {
|
||||||
literal_start = *ofs;
|
char ch = **str;
|
||||||
for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
|
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
|
||||||
Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs);
|
*(*str-2) == '\\' && *(*str-1) == 'N') {
|
||||||
if (ch == '{' || ch == '}') {
|
in_named_escape = 1;
|
||||||
|
} else if (in_named_escape && ch == '}') {
|
||||||
|
in_named_escape = 0;
|
||||||
|
} else if (ch == '{' || ch == '}') {
|
||||||
/* Check for doubled braces, but only at the top level. If
|
/* Check for doubled braces, but only at the top level. If
|
||||||
we checked at every level, then f'{0:{3}}' would fail
|
we checked at every level, then f'{0:{3}}' would fail
|
||||||
with the two closing braces. */
|
with the two closing braces. */
|
||||||
if (recurse_lvl == 0) {
|
if (recurse_lvl == 0) {
|
||||||
if (*ofs + 1 < PyUnicode_GET_LENGTH(str) &&
|
if (*str+1 < end && *(*str+1) == ch) {
|
||||||
PyUnicode_READ(kind, data, *ofs + 1) == ch) {
|
|
||||||
/* We're going to tell the caller that the literal ends
|
/* We're going to tell the caller that the literal ends
|
||||||
here, but that they should continue scanning. But also
|
here, but that they should continue scanning. But also
|
||||||
skip over the second brace when we resume scanning. */
|
skip over the second brace when we resume scanning. */
|
||||||
literal_end = *ofs + 1;
|
literal_end = *str+1;
|
||||||
*ofs += 2;
|
*str += 2;
|
||||||
result = 1;
|
result = 1;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -4341,34 +4277,36 @@ fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We're either at a '{', which means we're starting another
|
/* We're either at a '{', which means we're starting another
|
||||||
expression; or a '}', which means we're at the end of this
|
expression; or a '}', which means we're at the end of this
|
||||||
f-string (for a nested format_spec). */
|
f-string (for a nested format_spec). */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
literal_end = *ofs;
|
literal_end = *str;
|
||||||
|
assert(*str <= end);
|
||||||
assert(*ofs == PyUnicode_GET_LENGTH(str) ||
|
assert(*str == end || **str == '{' || **str == '}');
|
||||||
PyUnicode_READ(kind, data, *ofs) == '{' ||
|
|
||||||
PyUnicode_READ(kind, data, *ofs) == '}');
|
|
||||||
done:
|
done:
|
||||||
if (literal_start != literal_end) {
|
if (literal_start != literal_end) {
|
||||||
*literal = PyUnicode_Substring(str, literal_start, literal_end);
|
if (raw)
|
||||||
|
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
|
||||||
|
literal_end-literal_start,
|
||||||
|
NULL, NULL);
|
||||||
|
else
|
||||||
|
*literal = decode_unicode_with_escapes(c, literal_start,
|
||||||
|
literal_end-literal_start);
|
||||||
if (!*literal)
|
if (!*literal)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Forward declaration because parsing is recursive. */
|
/* Forward declaration because parsing is recursive. */
|
||||||
static expr_ty
|
static expr_ty
|
||||||
fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
|
||||||
struct compiling *c, const node *n);
|
struct compiling *c, const node *n);
|
||||||
|
|
||||||
/* Parse the f-string str, starting at ofs. We know *ofs starts an
|
/* Parse the f-string at *str, ending at end. We know *str starts an
|
||||||
expression (so it must be a '{'). Returns the FormattedValue node,
|
expression (so it must be a '{'). Returns the FormattedValue node,
|
||||||
which includes the expression, conversion character, and
|
which includes the expression, conversion character, and
|
||||||
format_spec expression.
|
format_spec expression.
|
||||||
|
@ -4379,23 +4317,20 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
find the end of all valid ones. Any errors inside the expression
|
find the end of all valid ones. Any errors inside the expression
|
||||||
will be caught when we parse it later. */
|
will be caught when we parse it later. */
|
||||||
static int
|
static int
|
||||||
fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
|
||||||
expr_ty *expression, struct compiling *c, const node *n)
|
expr_ty *expression, struct compiling *c, const node *n)
|
||||||
{
|
{
|
||||||
/* Return -1 on error, else 0. */
|
/* Return -1 on error, else 0. */
|
||||||
|
|
||||||
Py_ssize_t expr_start;
|
const char *expr_start;
|
||||||
Py_ssize_t expr_end;
|
const char *expr_end;
|
||||||
expr_ty simple_expression;
|
expr_ty simple_expression;
|
||||||
expr_ty format_spec = NULL; /* Optional format specifier. */
|
expr_ty format_spec = NULL; /* Optional format specifier. */
|
||||||
Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */
|
char conversion = -1; /* The conversion char. -1 if not specified. */
|
||||||
|
|
||||||
enum PyUnicode_Kind kind = PyUnicode_KIND(str);
|
|
||||||
void *data = PyUnicode_DATA(str);
|
|
||||||
|
|
||||||
/* 0 if we're not in a string, else the quote char we're trying to
|
/* 0 if we're not in a string, else the quote char we're trying to
|
||||||
match (single or double quote). */
|
match (single or double quote). */
|
||||||
Py_UCS4 quote_char = 0;
|
char quote_char = 0;
|
||||||
|
|
||||||
/* If we're inside a string, 1=normal, 3=triple-quoted. */
|
/* If we're inside a string, 1=normal, 3=triple-quoted. */
|
||||||
int string_type = 0;
|
int string_type = 0;
|
||||||
|
@ -4412,22 +4347,30 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
|
|
||||||
/* The first char must be a left brace, or we wouldn't have gotten
|
/* The first char must be a left brace, or we wouldn't have gotten
|
||||||
here. Skip over it. */
|
here. Skip over it. */
|
||||||
assert(PyUnicode_READ(kind, data, *ofs) == '{');
|
assert(**str == '{');
|
||||||
*ofs += 1;
|
*str += 1;
|
||||||
|
|
||||||
expr_start = *ofs;
|
expr_start = *str;
|
||||||
for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
|
for (; *str < end; (*str)++) {
|
||||||
Py_UCS4 ch;
|
char ch;
|
||||||
|
|
||||||
/* Loop invariants. */
|
/* Loop invariants. */
|
||||||
assert(nested_depth >= 0);
|
assert(nested_depth >= 0);
|
||||||
assert(*ofs >= expr_start);
|
assert(*str >= expr_start && *str < end);
|
||||||
if (quote_char)
|
if (quote_char)
|
||||||
assert(string_type == 1 || string_type == 3);
|
assert(string_type == 1 || string_type == 3);
|
||||||
else
|
else
|
||||||
assert(string_type == 0);
|
assert(string_type == 0);
|
||||||
|
|
||||||
ch = PyUnicode_READ(kind, data, *ofs);
|
ch = **str;
|
||||||
|
/* Nowhere inside an expression is a backslash allowed. */
|
||||||
|
if (ch == '\\') {
|
||||||
|
/* Error: can't include a backslash character, inside
|
||||||
|
parens or strings or not. */
|
||||||
|
ast_error(c, n, "f-string expression part "
|
||||||
|
"cannot include a backslash");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
if (quote_char) {
|
if (quote_char) {
|
||||||
/* We're inside a string. See if we're at the end. */
|
/* We're inside a string. See if we're at the end. */
|
||||||
/* This code needs to implement the same non-error logic
|
/* This code needs to implement the same non-error logic
|
||||||
|
@ -4443,11 +4386,9 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
/* Does this match the string_type (single or triple
|
/* Does this match the string_type (single or triple
|
||||||
quoted)? */
|
quoted)? */
|
||||||
if (string_type == 3) {
|
if (string_type == 3) {
|
||||||
if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
|
if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
|
||||||
PyUnicode_READ(kind, data, *ofs+1) == ch &&
|
|
||||||
PyUnicode_READ(kind, data, *ofs+2) == ch) {
|
|
||||||
/* We're at the end of a triple quoted string. */
|
/* We're at the end of a triple quoted string. */
|
||||||
*ofs += 2;
|
*str += 2;
|
||||||
string_type = 0;
|
string_type = 0;
|
||||||
quote_char = 0;
|
quote_char = 0;
|
||||||
continue;
|
continue;
|
||||||
|
@ -4459,21 +4400,11 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* We're inside a string, and not finished with the
|
|
||||||
string. If this is a backslash, skip the next char (it
|
|
||||||
might be an end quote that needs skipping). Otherwise,
|
|
||||||
just consume this character normally. */
|
|
||||||
if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) {
|
|
||||||
/* Just skip the next char, whatever it is. */
|
|
||||||
*ofs += 1;
|
|
||||||
}
|
|
||||||
} else if (ch == '\'' || ch == '"') {
|
} else if (ch == '\'' || ch == '"') {
|
||||||
/* Is this a triple quoted string? */
|
/* Is this a triple quoted string? */
|
||||||
if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
|
if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
|
||||||
PyUnicode_READ(kind, data, *ofs+1) == ch &&
|
|
||||||
PyUnicode_READ(kind, data, *ofs+2) == ch) {
|
|
||||||
string_type = 3;
|
string_type = 3;
|
||||||
*ofs += 2;
|
*str += 2;
|
||||||
} else {
|
} else {
|
||||||
/* Start of a normal string. */
|
/* Start of a normal string. */
|
||||||
string_type = 1;
|
string_type = 1;
|
||||||
|
@ -4495,18 +4426,17 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
/* First, test for the special case of "!=". Since '=' is
|
/* First, test for the special case of "!=". Since '=' is
|
||||||
not an allowed conversion character, nothing is lost in
|
not an allowed conversion character, nothing is lost in
|
||||||
this test. */
|
this test. */
|
||||||
if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) &&
|
if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
|
||||||
PyUnicode_READ(kind, data, *ofs+1) == '=')
|
|
||||||
/* This isn't a conversion character, just continue. */
|
/* This isn't a conversion character, just continue. */
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
/* Normal way out of this loop. */
|
/* Normal way out of this loop. */
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
/* Just consume this char and loop around. */
|
/* Just consume this char and loop around. */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
expr_end = *ofs;
|
expr_end = *str;
|
||||||
/* If we leave this loop in a string or with mismatched parens, we
|
/* If we leave this loop in a string or with mismatched parens, we
|
||||||
don't care. We'll get a syntax error when compiling the
|
don't care. We'll get a syntax error when compiling the
|
||||||
expression. But, we can produce a better error message, so
|
expression. But, we can produce a better error message, so
|
||||||
|
@ -4520,24 +4450,24 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str))
|
if (*str >= end)
|
||||||
goto unexpected_end_of_string;
|
goto unexpected_end_of_string;
|
||||||
|
|
||||||
/* Compile the expression as soon as possible, so we show errors
|
/* Compile the expression as soon as possible, so we show errors
|
||||||
related to the expression before errors related to the
|
related to the expression before errors related to the
|
||||||
conversion or format_spec. */
|
conversion or format_spec. */
|
||||||
simple_expression = fstring_compile_expr(str, expr_start, expr_end, c, n);
|
simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
|
||||||
if (!simple_expression)
|
if (!simple_expression)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/* Check for a conversion char, if present. */
|
/* Check for a conversion char, if present. */
|
||||||
if (PyUnicode_READ(kind, data, *ofs) == '!') {
|
if (**str == '!') {
|
||||||
*ofs += 1;
|
*str += 1;
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str))
|
if (*str >= end)
|
||||||
goto unexpected_end_of_string;
|
goto unexpected_end_of_string;
|
||||||
|
|
||||||
conversion = PyUnicode_READ(kind, data, *ofs);
|
conversion = **str;
|
||||||
*ofs += 1;
|
*str += 1;
|
||||||
|
|
||||||
/* Validate the conversion. */
|
/* Validate the conversion. */
|
||||||
if (!(conversion == 's' || conversion == 'r'
|
if (!(conversion == 's' || conversion == 'r'
|
||||||
|
@ -4549,30 +4479,29 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for the format spec, if present. */
|
/* Check for the format spec, if present. */
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str))
|
if (*str >= end)
|
||||||
goto unexpected_end_of_string;
|
goto unexpected_end_of_string;
|
||||||
if (PyUnicode_READ(kind, data, *ofs) == ':') {
|
if (**str == ':') {
|
||||||
*ofs += 1;
|
*str += 1;
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str))
|
if (*str >= end)
|
||||||
goto unexpected_end_of_string;
|
goto unexpected_end_of_string;
|
||||||
|
|
||||||
/* Parse the format spec. */
|
/* Parse the format spec. */
|
||||||
format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n);
|
format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
|
||||||
if (!format_spec)
|
if (!format_spec)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str) ||
|
if (*str >= end || **str != '}')
|
||||||
PyUnicode_READ(kind, data, *ofs) != '}')
|
|
||||||
goto unexpected_end_of_string;
|
goto unexpected_end_of_string;
|
||||||
|
|
||||||
/* We're at a right brace. Consume it. */
|
/* We're at a right brace. Consume it. */
|
||||||
assert(*ofs < PyUnicode_GET_LENGTH(str));
|
assert(*str < end);
|
||||||
assert(PyUnicode_READ(kind, data, *ofs) == '}');
|
assert(**str == '}');
|
||||||
*ofs += 1;
|
*str += 1;
|
||||||
|
|
||||||
/* And now create the FormattedValue node that represents this entire
|
/* And now create the FormattedValue node that represents this
|
||||||
expression with the conversion and format spec. */
|
entire expression with the conversion and format spec. */
|
||||||
*expression = FormattedValue(simple_expression, (int)conversion,
|
*expression = FormattedValue(simple_expression, (int)conversion,
|
||||||
format_spec, LINENO(n), n->n_col_offset,
|
format_spec, LINENO(n), n->n_col_offset,
|
||||||
c->c_arena);
|
c->c_arena);
|
||||||
|
@ -4610,8 +4539,9 @@ unexpected_end_of_string:
|
||||||
we're finished.
|
we're finished.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
fstring_find_literal_and_expr(const char **str, const char *end, int raw,
|
||||||
PyObject **literal, expr_ty *expression,
|
int recurse_lvl, PyObject **literal,
|
||||||
|
expr_ty *expression,
|
||||||
struct compiling *c, const node *n)
|
struct compiling *c, const node *n)
|
||||||
{
|
{
|
||||||
int result;
|
int result;
|
||||||
|
@ -4619,7 +4549,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
assert(*literal == NULL && *expression == NULL);
|
assert(*literal == NULL && *expression == NULL);
|
||||||
|
|
||||||
/* Get any literal string. */
|
/* Get any literal string. */
|
||||||
result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n);
|
result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
|
||||||
if (result < 0)
|
if (result < 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -4629,10 +4559,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
/* We have a literal, but don't look at the expression. */
|
/* We have a literal, but don't look at the expression. */
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
assert(*ofs <= PyUnicode_GET_LENGTH(str));
|
if (*str >= end || **str == '}')
|
||||||
|
|
||||||
if (*ofs >= PyUnicode_GET_LENGTH(str) ||
|
|
||||||
PyUnicode_READ_CHAR(str, *ofs) == '}')
|
|
||||||
/* We're at the end of the string or the end of a nested
|
/* We're at the end of the string or the end of a nested
|
||||||
f-string: no expression. The top-level error case where we
|
f-string: no expression. The top-level error case where we
|
||||||
expect to be at the end of the string but we're at a '}' is
|
expect to be at the end of the string but we're at a '}' is
|
||||||
|
@ -4640,10 +4567,9 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* We must now be the start of an expression, on a '{'. */
|
/* We must now be the start of an expression, on a '{'. */
|
||||||
assert(*ofs < PyUnicode_GET_LENGTH(str) &&
|
assert(**str == '{');
|
||||||
PyUnicode_READ_CHAR(str, *ofs) == '{');
|
|
||||||
|
|
||||||
if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0)
|
if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -4852,13 +4778,11 @@ FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f'
|
/* Parse an f-string. The f-string is in *str to end, with no
|
||||||
or quotes. str is not decref'd, since we don't know if it's used elsewhere.
|
'f' or quotes. */
|
||||||
And if we're only looking at a part of a string, then decref'ing is
|
|
||||||
definitely not the right thing to do! */
|
|
||||||
static int
|
static int
|
||||||
FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
|
FstringParser_ConcatFstring(FstringParser *state, const char **str,
|
||||||
Py_ssize_t *ofs, int recurse_lvl,
|
const char *end, int raw, int recurse_lvl,
|
||||||
struct compiling *c, const node *n)
|
struct compiling *c, const node *n)
|
||||||
{
|
{
|
||||||
FstringParser_check_invariants(state);
|
FstringParser_check_invariants(state);
|
||||||
|
@ -4872,7 +4796,7 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
|
||||||
expression, literal will be NULL. If we're at the end of
|
expression, literal will be NULL. If we're at the end of
|
||||||
the f-string, expression will be NULL (unless result == 1,
|
the f-string, expression will be NULL (unless result == 1,
|
||||||
see below). */
|
see below). */
|
||||||
int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl,
|
int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
|
||||||
&literal, &expression,
|
&literal, &expression,
|
||||||
c, n);
|
c, n);
|
||||||
if (result < 0)
|
if (result < 0)
|
||||||
|
@ -4925,16 +4849,14 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(*ofs <= PyUnicode_GET_LENGTH(str));
|
|
||||||
|
|
||||||
/* If recurse_lvl is zero, then we must be at the end of the
|
/* If recurse_lvl is zero, then we must be at the end of the
|
||||||
string. Otherwise, we must be at a right brace. */
|
string. Otherwise, we must be at a right brace. */
|
||||||
|
|
||||||
if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) {
|
if (recurse_lvl == 0 && *str < end-1) {
|
||||||
ast_error(c, n, "f-string: unexpected end of string");
|
ast_error(c, n, "f-string: unexpected end of string");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') {
|
if (recurse_lvl != 0 && **str != '}') {
|
||||||
ast_error(c, n, "f-string: expecting '}'");
|
ast_error(c, n, "f-string: expecting '}'");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -4991,17 +4913,17 @@ error:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Given an f-string (with no 'f' or quotes) that's in str starting at
|
/* Given an f-string (with no 'f' or quotes) that's in *str and ends
|
||||||
ofs, parse it into an expr_ty. Return NULL on error. Does not
|
at end, parse it into an expr_ty. Return NULL on error. Adjust
|
||||||
decref str. */
|
str to point past the parsed portion. */
|
||||||
static expr_ty
|
static expr_ty
|
||||||
fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
|
||||||
struct compiling *c, const node *n)
|
struct compiling *c, const node *n)
|
||||||
{
|
{
|
||||||
FstringParser state;
|
FstringParser state;
|
||||||
|
|
||||||
FstringParser_Init(&state);
|
FstringParser_Init(&state);
|
||||||
if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl,
|
if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
|
||||||
c, n) < 0) {
|
c, n) < 0) {
|
||||||
FstringParser_Dealloc(&state);
|
FstringParser_Dealloc(&state);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -5012,19 +4934,25 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
|
||||||
|
|
||||||
/* n is a Python string literal, including the bracketing quote
|
/* n is a Python string literal, including the bracketing quote
|
||||||
characters, and r, b, u, &/or f prefixes (if any), and embedded
|
characters, and r, b, u, &/or f prefixes (if any), and embedded
|
||||||
escape sequences (if any). parsestr parses it, and returns the
|
escape sequences (if any). parsestr parses it, and sets *result to
|
||||||
decoded Python string object. If the string is an f-string, set
|
decoded Python string object. If the string is an f-string, set
|
||||||
*fmode and return the unparsed string object.
|
*fstr and *fstrlen to the unparsed string object. Return 0 if no
|
||||||
|
errors occurred.
|
||||||
*/
|
*/
|
||||||
static PyObject *
|
static int
|
||||||
parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
|
parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
|
||||||
|
PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
|
||||||
{
|
{
|
||||||
size_t len;
|
size_t len;
|
||||||
const char *s = STR(n);
|
const char *s = STR(n);
|
||||||
int quote = Py_CHARMASK(*s);
|
int quote = Py_CHARMASK(*s);
|
||||||
int rawmode = 0;
|
int fmode = 0;
|
||||||
|
*bytesmode = 0;
|
||||||
|
*rawmode = 0;
|
||||||
|
*result = NULL;
|
||||||
|
*fstr = NULL;
|
||||||
if (Py_ISALPHA(quote)) {
|
if (Py_ISALPHA(quote)) {
|
||||||
while (!*bytesmode || !rawmode) {
|
while (!*bytesmode || !*rawmode) {
|
||||||
if (quote == 'b' || quote == 'B') {
|
if (quote == 'b' || quote == 'B') {
|
||||||
quote = *++s;
|
quote = *++s;
|
||||||
*bytesmode = 1;
|
*bytesmode = 1;
|
||||||
|
@ -5034,24 +4962,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
|
||||||
}
|
}
|
||||||
else if (quote == 'r' || quote == 'R') {
|
else if (quote == 'r' || quote == 'R') {
|
||||||
quote = *++s;
|
quote = *++s;
|
||||||
rawmode = 1;
|
*rawmode = 1;
|
||||||
}
|
}
|
||||||
else if (quote == 'f' || quote == 'F') {
|
else if (quote == 'f' || quote == 'F') {
|
||||||
quote = *++s;
|
quote = *++s;
|
||||||
*fmode = 1;
|
fmode = 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (*fmode && *bytesmode) {
|
if (fmode && *bytesmode) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
if (quote != '\'' && quote != '\"') {
|
if (quote != '\'' && quote != '\"') {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
/* Skip the leading quote char. */
|
/* Skip the leading quote char. */
|
||||||
s++;
|
s++;
|
||||||
|
@ -5059,12 +4987,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
|
||||||
if (len > INT_MAX) {
|
if (len > INT_MAX) {
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
"string to parse is too long");
|
"string to parse is too long");
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
if (s[--len] != quote) {
|
if (s[--len] != quote) {
|
||||||
/* Last quote char must match the first. */
|
/* Last quote char must match the first. */
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
if (len >= 4 && s[0] == quote && s[1] == quote) {
|
if (len >= 4 && s[0] == quote && s[1] == quote) {
|
||||||
/* A triple quoted string. We've already skipped one quote at
|
/* A triple quoted string. We've already skipped one quote at
|
||||||
|
@ -5075,21 +5003,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
|
||||||
/* And check that the last two match. */
|
/* And check that the last two match. */
|
||||||
if (s[--len] != quote || s[--len] != quote) {
|
if (s[--len] != quote || s[--len] != quote) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Temporary hack: if this is an f-string, no backslashes are allowed. */
|
if (fmode) {
|
||||||
/* See issue 27921. */
|
/* Just return the bytes. The caller will parse the resulting
|
||||||
if (*fmode && strchr(s, '\\') != NULL) {
|
string. */
|
||||||
/* Syntax error. At a later date fix this so it only checks for
|
*fstr = s;
|
||||||
backslashes within the braces. */
|
*fstrlen = len;
|
||||||
ast_error(c, n, "backslashes not allowed in f-strings");
|
return 0;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Not an f-string. */
|
||||||
/* Avoid invoking escape decoding routines if possible. */
|
/* Avoid invoking escape decoding routines if possible. */
|
||||||
rawmode = rawmode || strchr(s, '\\') == NULL;
|
*rawmode = *rawmode || strchr(s, '\\') == NULL;
|
||||||
if (*bytesmode) {
|
if (*bytesmode) {
|
||||||
/* Disallow non-ASCII characters. */
|
/* Disallow non-ASCII characters. */
|
||||||
const char *ch;
|
const char *ch;
|
||||||
|
@ -5097,19 +5025,20 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
|
||||||
if (Py_CHARMASK(*ch) >= 0x80) {
|
if (Py_CHARMASK(*ch) >= 0x80) {
|
||||||
ast_error(c, n, "bytes can only contain ASCII "
|
ast_error(c, n, "bytes can only contain ASCII "
|
||||||
"literal characters.");
|
"literal characters.");
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (rawmode)
|
if (*rawmode)
|
||||||
return PyBytes_FromStringAndSize(s, len);
|
*result = PyBytes_FromStringAndSize(s, len);
|
||||||
else
|
else
|
||||||
return PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
|
*result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
|
||||||
} else {
|
} else {
|
||||||
if (rawmode)
|
if (*rawmode)
|
||||||
return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
|
*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
|
||||||
else
|
else
|
||||||
return decode_unicode_with_escapes(c, s, len);
|
*result = decode_unicode_with_escapes(c, s, len);
|
||||||
}
|
}
|
||||||
|
return *result == NULL ? -1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Accepts a STRING+ atom, and produces an expr_ty node. Run through
|
/* Accepts a STRING+ atom, and produces an expr_ty node. Run through
|
||||||
|
@ -5131,13 +5060,15 @@ parsestrplus(struct compiling *c, const node *n)
|
||||||
FstringParser_Init(&state);
|
FstringParser_Init(&state);
|
||||||
|
|
||||||
for (i = 0; i < NCH(n); i++) {
|
for (i = 0; i < NCH(n); i++) {
|
||||||
int this_bytesmode = 0;
|
int this_bytesmode;
|
||||||
int this_fmode = 0;
|
int this_rawmode;
|
||||||
PyObject *s;
|
PyObject *s;
|
||||||
|
const char *fstr;
|
||||||
|
Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
|
||||||
|
|
||||||
REQ(CHILD(n, i), STRING);
|
REQ(CHILD(n, i), STRING);
|
||||||
s = parsestr(c, CHILD(n, i), &this_bytesmode, &this_fmode);
|
if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
|
||||||
if (!s)
|
&fstr, &fstrlen) != 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
/* Check that we're not mixing bytes with unicode. */
|
/* Check that we're not mixing bytes with unicode. */
|
||||||
|
@ -5148,8 +5079,20 @@ parsestrplus(struct compiling *c, const node *n)
|
||||||
}
|
}
|
||||||
bytesmode = this_bytesmode;
|
bytesmode = this_bytesmode;
|
||||||
|
|
||||||
assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
|
if (fstr != NULL) {
|
||||||
|
int result;
|
||||||
|
assert(s == NULL && !bytesmode);
|
||||||
|
/* This is an f-string. Parse and concatenate it. */
|
||||||
|
result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
|
||||||
|
this_rawmode, 0, c, n);
|
||||||
|
if (result < 0)
|
||||||
|
goto error;
|
||||||
|
} else {
|
||||||
|
assert(bytesmode ? PyBytes_CheckExact(s) :
|
||||||
|
PyUnicode_CheckExact(s));
|
||||||
|
|
||||||
|
/* A string or byte string. */
|
||||||
|
assert(s != NULL && fstr == NULL);
|
||||||
if (bytesmode) {
|
if (bytesmode) {
|
||||||
/* For bytes, concat as we go. */
|
/* For bytes, concat as we go. */
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
|
@ -5160,19 +5103,14 @@ parsestrplus(struct compiling *c, const node *n)
|
||||||
if (!bytes_str)
|
if (!bytes_str)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
} else if (this_fmode) {
|
|
||||||
/* This is an f-string. Concatenate and decref it. */
|
|
||||||
Py_ssize_t ofs = 0;
|
|
||||||
int result = FstringParser_ConcatFstring(&state, s, &ofs, 0, c, n);
|
|
||||||
Py_DECREF(s);
|
|
||||||
if (result < 0)
|
|
||||||
goto error;
|
|
||||||
} else {
|
} else {
|
||||||
|
assert(s != NULL && fstr == NULL);
|
||||||
/* This is a regular string. Concatenate it. */
|
/* This is a regular string. Concatenate it. */
|
||||||
if (FstringParser_ConcatAndDel(&state, s) < 0)
|
if (FstringParser_ConcatAndDel(&state, s) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (bytesmode) {
|
if (bytesmode) {
|
||||||
/* Just return the bytes object and we're done. */
|
/* Just return the bytes object and we're done. */
|
||||||
if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
|
if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue