mirror of
https://github.com/python/cpython.git
synced 2025-08-01 07:33:08 +00:00
[3.10] bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130). (GH-30898)
(cherry picked from commit a0efc0c196
)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
parent
4a57fa296b
commit
3fc8b74ace
5 changed files with 67 additions and 16 deletions
|
@ -1045,8 +1045,7 @@ Module(
|
|||
ast.literal_eval(node)
|
||||
|
||||
def test_literal_eval_syntax_errors(self):
|
||||
msg = "unexpected character after line continuation character"
|
||||
with self.assertRaisesRegex(SyntaxError, msg):
|
||||
with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
|
||||
ast.literal_eval(r'''
|
||||
\
|
||||
(\
|
||||
|
|
|
@ -1463,6 +1463,36 @@ pass
|
|||
except SyntaxError:
|
||||
self.fail("Empty line after a line continuation character is valid.")
|
||||
|
||||
# See issue-46091
|
||||
s1 = r"""\
|
||||
def fib(n):
|
||||
\
|
||||
'''Print a Fibonacci series up to n.'''
|
||||
\
|
||||
a, b = 0, 1
|
||||
"""
|
||||
s2 = r"""\
|
||||
def fib(n):
|
||||
'''Print a Fibonacci series up to n.'''
|
||||
a, b = 0, 1
|
||||
"""
|
||||
try:
|
||||
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
|
||||
except SyntaxError:
|
||||
self.fail("Indented statement over multiple lines is valid")
|
||||
|
||||
def test_continuation_bad_indentation(self):
|
||||
# Check that code that breaks indentation across multiple lines raises a syntax error
|
||||
|
||||
code = r"""\
|
||||
if x:
|
||||
y = 1
|
||||
\
|
||||
foo = 1
|
||||
"""
|
||||
|
||||
self.assertRaises(IndentationError, exec, code)
|
||||
|
||||
@support.cpython_only
|
||||
def test_nested_named_except_blocks(self):
|
||||
code = ""
|
||||
|
|
|
@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
|||
NEWLINE)
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
from textwrap import dedent
|
||||
from unittest import TestCase, mock
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
|
@ -45,7 +46,6 @@ class TokenizeTest(TestCase):
|
|||
# The ENDMARKER and final NEWLINE are omitted.
|
||||
f = BytesIO(s.encode('utf-8'))
|
||||
result = stringify_tokens_from_source(tokenize(f.readline), s)
|
||||
|
||||
self.assertEqual(result,
|
||||
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
|
||||
expected.rstrip().splitlines())
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Correctly calculate indentation levels for lines with whitespace character
|
||||
that are ended by line continuation characters. Patch by Pablo Galindo
|
|
@ -1346,6 +1346,24 @@ tok_decimal_tail(struct tok_state *tok)
|
|||
|
||||
/* Get next token, after space stripping etc. */
|
||||
|
||||
static inline int
|
||||
tok_continuation_line(struct tok_state *tok) {
|
||||
int c = tok_nextc(tok);
|
||||
if (c != '\n') {
|
||||
tok->done = E_LINECONT;
|
||||
return -1;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
if (c == EOF) {
|
||||
tok->done = E_EOF;
|
||||
tok->cur = tok->inp;
|
||||
return -1;
|
||||
} else {
|
||||
tok_backup(tok, c);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||
{
|
||||
|
@ -1362,6 +1380,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
int col = 0;
|
||||
int altcol = 0;
|
||||
tok->atbol = 0;
|
||||
int cont_line_col = 0;
|
||||
for (;;) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == ' ') {
|
||||
|
@ -1374,14 +1393,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
else if (c == '\014') {/* Control-L (formfeed) */
|
||||
col = altcol = 0; /* For Emacs users */
|
||||
}
|
||||
else if (c == '\\') {
|
||||
// Indentation cannot be split over multiple physical lines
|
||||
// using backslashes. This means that if we found a backslash
|
||||
// preceded by whitespace, **the first one we find** determines
|
||||
// the level of indentation of whatever comes next.
|
||||
cont_line_col = cont_line_col ? cont_line_col : col;
|
||||
if ((c = tok_continuation_line(tok)) == -1) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
if (c == '#' || c == '\n' || c == '\\') {
|
||||
if (c == '#' || c == '\n') {
|
||||
/* Lines with only whitespace and/or comments
|
||||
and/or a line continuation character
|
||||
shouldn't affect the indentation and are
|
||||
not passed to the parser as NEWLINE tokens,
|
||||
except *totally* empty lines in interactive
|
||||
|
@ -1402,6 +1430,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
may need to skip to the end of a comment */
|
||||
}
|
||||
if (!blankline && tok->level == 0) {
|
||||
col = cont_line_col ? cont_line_col : col;
|
||||
altcol = cont_line_col ? cont_line_col : altcol;
|
||||
if (col == tok->indstack[tok->indent]) {
|
||||
/* No change */
|
||||
if (altcol != tok->altindstack[tok->indent]) {
|
||||
|
@ -1963,19 +1993,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
|
||||
/* Line continuation */
|
||||
if (c == '\\') {
|
||||
c = tok_nextc(tok);
|
||||
if (c != '\n') {
|
||||
tok->done = E_LINECONT;
|
||||
if ((c = tok_continuation_line(tok)) == -1) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
c = tok_nextc(tok);
|
||||
if (c == EOF) {
|
||||
tok->done = E_EOF;
|
||||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
} else {
|
||||
tok_backup(tok, c);
|
||||
}
|
||||
tok->cont_line = 1;
|
||||
goto again; /* Read next line */
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue