gh-132449: Improve syntax error messages for keywords with typos (#132450)

Signed-off-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: Łukasz Langa <lukasz@langa.pl>
2025-08-04 08:59:19 +00:00 · 2025-04-22 10:01:55 +01:00 · 2025-04-22 10:01:55 +01:00 · bf3a0a1c0f
commit bf3a0a1c0f
parent 3cfab449ab
12 changed files with 818 additions and 450 deletions
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@ -6,6 +6,10 @@ import linecache
 import sys
 import textwrap
 import warnings
+import codeop
+import keyword
+import tokenize
+import io
 from contextlib import suppress
 import _colorize
 from _colorize import ANSIColors
@ -1090,6 +1094,7 @@ class TracebackException:
            self.end_offset = exc_value.end_offset
            self.msg = exc_value.msg
            self._is_syntax_error = True
+            self._exc_metadata = getattr(exc_value, "_metadata", None)
        elif exc_type and issubclass(exc_type, ImportError) and \
                getattr(exc_value, "name_from", None) is not None:
            wrong_name = getattr(exc_value, "name_from", None)
@ -1273,6 +1278,98 @@ class TracebackException:
            for ex in self.exceptions:
                yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize)

+    def _find_keyword_typos(self):
+        assert self._is_syntax_error
+        try:
+            import _suggestions
+        except ImportError:
+            _suggestions = None
+
+        # Only try to find keyword typos if there is no custom message
+        if self.msg != "invalid syntax" and "Perhaps you forgot a comma" not in self.msg:
+            return
+
+        if not self._exc_metadata:
+            return
+
+        line, offset, source = self._exc_metadata
+        end_line = int(self.lineno) if self.lineno is not None else 0
+        lines = None
+        from_filename = False
+
+        if source is None:
+            if self.filename:
+                try:
+                    with open(self.filename) as f:
+                        lines = f.read().splitlines()
+                except Exception:
+                    line, end_line, offset = 0,1,0
+                else:
+                    from_filename = True
+            lines = lines if lines is not None else self.text.splitlines()
+        else:
+            lines = source.splitlines()
+
+        error_code = lines[line -1 if line > 0 else 0:end_line]
+        error_code[0] = error_code[0][offset:]
+        error_code = textwrap.dedent('\n'.join(error_code))
+
+        # Do not continue if the source is too large
+        if len(error_code) > 1024:
+            return
+
+        error_lines = error_code.splitlines()
+        tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
+        tokens_left_to_process = 10
+        import difflib
+        for token in tokens:
+            start, end = token.start, token.end
+            if token.type != tokenize.NAME:
+                continue
+            # Only consider NAME tokens on the same line as the error
+            if from_filename and token.start[0]+line != end_line+1:
+                continue
+            wrong_name = token.string
+            if wrong_name in keyword.kwlist:
+                continue
+
+            # Limit the number of valid tokens to consider to not spend
+            # to much time in this function
+            tokens_left_to_process -= 1
+            if tokens_left_to_process < 0:
+                break
+            # Limit the number of possible matches to try
+            matches = difflib.get_close_matches(wrong_name, keyword.kwlist, n=3)
+            if not matches and _suggestions is not None:
+                suggestion = _suggestions._generate_suggestions(keyword.kwlist, wrong_name)
+                matches = [suggestion] if suggestion is not None else matches
+            for suggestion in matches:
+                if not suggestion or suggestion == wrong_name:
+                    continue
+                # Try to replace the token with the keyword
+                the_lines = error_lines.copy()
+                the_line = the_lines[start[0] - 1][:]
+                chars = list(the_line)
+                chars[token.start[1]:token.end[1]] = suggestion
+                the_lines[start[0] - 1] = ''.join(chars)
+                code = '\n'.join(the_lines)
+
+                # Check if it works
+                try:
+                    codeop.compile_command(code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
+                except SyntaxError:
+                    continue
+
+                # Keep token.line but handle offsets correctly
+                self.text = token.line
+                self.offset = token.start[1] + 1
+                self.end_offset = token.end[1] + 1
+                self.lineno = start[0]
+                self.end_lineno = end[0]
+                self.msg = f"invalid syntax. Did you mean '{suggestion}'?"
+                return
+
+
    def _format_syntax_error(self, stype, **kwargs):
        """Format SyntaxError exceptions (internal helper)."""
        # Show exactly where the problem was found.
@ -1299,6 +1396,9 @@ class TracebackException:
            # text  = "   foo\n"
            # rtext = "   foo"
            # ltext =    "foo"
+            with suppress(Exception):
+                self._find_keyword_typos()
+            text = self.text
            rtext = text.rstrip('\n')
            ltext = rtext.lstrip(' \n\f')
            spaces = len(rtext) - len(ltext)