[3.10] bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130). (GH-30898)

(cherry picked from commit a0efc0c196) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
2025-08-01 07:33:08 +00:00 · 2022-01-25 22:33:57 +00:00 · 2022-01-25 22:33:57 +00:00 · 3fc8b74ace
commit 3fc8b74ace
parent 4a57fa296b
5 changed files with 67 additions and 16 deletions
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@ -1045,8 +1045,7 @@ Module(
            ast.literal_eval(node)
    def test_literal_eval_syntax_errors(self):
-        msg = "unexpected character after line continuation character"
+        with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
        with self.assertRaisesRegex(SyntaxError, msg):
            ast.literal_eval(r'''
                \
                (\
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@ -1463,6 +1463,36 @@ pass
        except SyntaxError:
            self.fail("Empty line after a line continuation character is valid.")
        # See issue-46091
        s1 = r"""\
 def fib(n):
    \
 '''Print a Fibonacci series up to n.'''
    \
 a, b = 0, 1
 """
        s2 = r"""\
 def fib(n):
    '''Print a Fibonacci series up to n.'''
    a, b = 0, 1
 """
        try:
            self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
        except SyntaxError:
            self.fail("Indented statement over multiple lines is valid")
    def test_continuation_bad_indentation(self): 
        # Check that code that breaks indentation across multiple lines raises a syntax error
        code = r"""\
 if x:
    y = 1
  \
  foo = 1
        """
        self.assertRaises(IndentationError, exec, code)
    @support.cpython_only
    def test_nested_named_except_blocks(self):
        code = ""
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                     NEWLINE)
 from io import BytesIO, StringIO
 import unittest
 from textwrap import dedent
 from unittest import TestCase, mock
 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
                               INVALID_UNDERSCORE_LITERALS)
@ -45,7 +46,6 @@ class TokenizeTest(TestCase):
        # The ENDMARKER and final NEWLINE are omitted.
        f = BytesIO(s.encode('utf-8'))
        result = stringify_tokens_from_source(tokenize(f.readline), s)
        self.assertEqual(result,
                         ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
                         expected.rstrip().splitlines())
--- a/Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst
+++ b/Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst
@ -0,0 +1,2 @@
 Correctly calculate indentation levels for lines with whitespace character
 that are ended by line continuation characters. Patch by Pablo Galindo
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1346,6 +1346,24 @@ tok_decimal_tail(struct tok_state *tok)
 /* Get next token, after space stripping etc. */
 static inline int
 tok_continuation_line(struct tok_state *tok) {
    int c = tok_nextc(tok);
    if (c != '\n') {
        tok->done = E_LINECONT;
        return -1;
    }
    c = tok_nextc(tok);
    if (c == EOF) {
        tok->done = E_EOF;
        tok->cur = tok->inp;
        return -1;
    } else {
        tok_backup(tok, c);
    }
    return c;
 }
 static int
 tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
 {
@ -1362,6 +1380,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
        int col = 0;
        int altcol = 0;
        tok->atbol = 0;
        int cont_line_col = 0;
        for (;;) {
            c = tok_nextc(tok);
            if (c == ' ') {
@ -1374,14 +1393,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
            else if (c == '\014')  {/* Control-L (formfeed) */
                col = altcol = 0; /* For Emacs users */
            }
            else if (c == '\\') {
                // Indentation cannot be split over multiple physical lines
                // using backslashes. This means that if we found a backslash
                // preceded by whitespace, **the first one we find** determines
                // the level of indentation of whatever comes next.
                cont_line_col = cont_line_col ? cont_line_col : col;
                if ((c = tok_continuation_line(tok)) == -1) {
                    return ERRORTOKEN;
                }
            }
            else {
                break;
            }
        }
        tok_backup(tok, c);
-        if (c == '#' || c == '\n' || c == '\\') {
+        if (c == '#' || c == '\n') {
            /* Lines with only whitespace and/or comments
               and/or a line continuation character
               shouldn't affect the indentation and are
               not passed to the parser as NEWLINE tokens,
               except *totally* empty lines in interactive
@ -1402,6 +1430,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
               may need to skip to the end of a comment */
        }
        if (!blankline && tok->level == 0) {
            col = cont_line_col ? cont_line_col : col;
            altcol = cont_line_col ? cont_line_col : altcol;
            if (col == tok->indstack[tok->indent]) {
                /* No change */
                if (altcol != tok->altindstack[tok->indent]) {
@ -1963,19 +1993,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
    /* Line continuation */
    if (c == '\\') {
-        c = tok_nextc(tok);
+        if ((c = tok_continuation_line(tok)) == -1) {
        if (c != '\n') {
            tok->done = E_LINECONT;
            return ERRORTOKEN;
        }
        c = tok_nextc(tok);
        if (c == EOF) {
            tok->done = E_EOF;
            tok->cur = tok->inp;
            return ERRORTOKEN;
        } else {
            tok_backup(tok, c);
        }
        tok->cont_line = 1;
        goto again; /* Read next line */
    }
		`@ -0,0 +1,2 @@`
							`Correctly calculate indentation levels for lines with whitespace character`
							`that are ended by line continuation characters. Patch by Pablo Galindo`