[3.12] gh-105390: Correctly raise TokenError instead of SyntaxError for tokenize errors (GH-105399) (#105439)

2025-08-10 03:49:18 +00:00 · 2023-06-07 04:38:36 -07:00 · 2023-06-07 04:38:36 -07:00 · c84d4d165d
commit c84d4d165d
parent c607551baf
6 changed files with 35 additions and 24 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -3,7 +3,8 @@ from test.support import os_helper
 from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
                     open as tokenize_open, Untokenizer, generate_tokens,
-                     NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo)
+                     NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
+                     TokenError)
 from io import BytesIO, StringIO
 import unittest
 from textwrap import dedent
@ -286,7 +287,7 @@ def k(x):
        for lit in INVALID_UNDERSCORE_LITERALS:
            try:
                number_token(lit)
-            except SyntaxError:
+            except TokenError:
                continue
            self.assertNotEqual(number_token(lit), lit)

@ -1379,7 +1380,7 @@ class TestDetectEncoding(TestCase):
                self.assertEqual(found, "iso-8859-1")

    def test_syntaxerror_latin1(self):
-        # Issue 14629: need to raise SyntaxError if the first
+        # Issue 14629: need to raise TokenError if the first
        # line(s) have non-UTF-8 characters
        lines = (
            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
@ -2754,7 +2755,7 @@ async def f():
            "]",
        ]:
            with self.subTest(case=case):
-                self.assertRaises(SyntaxError, get_tokens, case)
+                self.assertRaises(TokenError, get_tokens, case)

    def test_max_indent(self):
        MAXINDENT = 100
@ -2773,7 +2774,7 @@ async def f():

        invalid = generate_source(MAXINDENT)
        the_input = StringIO(invalid)
-        self.assertRaises(SyntaxError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
+        self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
        self.assertRaises(
            IndentationError, compile, invalid, "<string>", "exec"
        )