gh-132449: Improve syntax error messages for keywords with typos (#132450)

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
Pablo Galindo Salgado 2025-04-22 10:01:55 +01:00 committed by GitHub
parent 3cfab449ab
commit bf3a0a1c0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 818 additions and 450 deletions

View file

@ -6,6 +6,10 @@ import linecache
import sys
import textwrap
import warnings
import codeop
import keyword
import tokenize
import io
from contextlib import suppress
import _colorize
from _colorize import ANSIColors
@ -1090,6 +1094,7 @@ class TracebackException:
self.end_offset = exc_value.end_offset
self.msg = exc_value.msg
self._is_syntax_error = True
self._exc_metadata = getattr(exc_value, "_metadata", None)
elif exc_type and issubclass(exc_type, ImportError) and \
getattr(exc_value, "name_from", None) is not None:
wrong_name = getattr(exc_value, "name_from", None)
@ -1273,6 +1278,98 @@ class TracebackException:
for ex in self.exceptions:
yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize)
def _find_keyword_typos(self):
assert self._is_syntax_error
try:
import _suggestions
except ImportError:
_suggestions = None
# Only try to find keyword typos if there is no custom message
if self.msg != "invalid syntax" and "Perhaps you forgot a comma" not in self.msg:
return
if not self._exc_metadata:
return
line, offset, source = self._exc_metadata
end_line = int(self.lineno) if self.lineno is not None else 0
lines = None
from_filename = False
if source is None:
if self.filename:
try:
with open(self.filename) as f:
lines = f.read().splitlines()
except Exception:
line, end_line, offset = 0,1,0
else:
from_filename = True
lines = lines if lines is not None else self.text.splitlines()
else:
lines = source.splitlines()
error_code = lines[line -1 if line > 0 else 0:end_line]
error_code[0] = error_code[0][offset:]
error_code = textwrap.dedent('\n'.join(error_code))
# Do not continue if the source is too large
if len(error_code) > 1024:
return
error_lines = error_code.splitlines()
tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
tokens_left_to_process = 10
import difflib
for token in tokens:
start, end = token.start, token.end
if token.type != tokenize.NAME:
continue
# Only consider NAME tokens on the same line as the error
if from_filename and token.start[0]+line != end_line+1:
continue
wrong_name = token.string
if wrong_name in keyword.kwlist:
continue
# Limit the number of valid tokens to consider to not spend
# to much time in this function
tokens_left_to_process -= 1
if tokens_left_to_process < 0:
break
# Limit the number of possible matches to try
matches = difflib.get_close_matches(wrong_name, keyword.kwlist, n=3)
if not matches and _suggestions is not None:
suggestion = _suggestions._generate_suggestions(keyword.kwlist, wrong_name)
matches = [suggestion] if suggestion is not None else matches
for suggestion in matches:
if not suggestion or suggestion == wrong_name:
continue
# Try to replace the token with the keyword
the_lines = error_lines.copy()
the_line = the_lines[start[0] - 1][:]
chars = list(the_line)
chars[token.start[1]:token.end[1]] = suggestion
the_lines[start[0] - 1] = ''.join(chars)
code = '\n'.join(the_lines)
# Check if it works
try:
codeop.compile_command(code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
except SyntaxError:
continue
# Keep token.line but handle offsets correctly
self.text = token.line
self.offset = token.start[1] + 1
self.end_offset = token.end[1] + 1
self.lineno = start[0]
self.end_lineno = end[0]
self.msg = f"invalid syntax. Did you mean '{suggestion}'?"
return
def _format_syntax_error(self, stype, **kwargs):
"""Format SyntaxError exceptions (internal helper)."""
# Show exactly where the problem was found.
@ -1299,6 +1396,9 @@ class TracebackException:
# text = " foo\n"
# rtext = " foo"
# ltext = "foo"
with suppress(Exception):
self._find_keyword_typos()
text = self.text
rtext = text.rstrip('\n')
ltext = rtext.lstrip(' \n\f')
spaces = len(rtext) - len(ltext)