[3.12] gh-105390: Correctly raise TokenError instead of SyntaxError for tokenize errors (GH-105399) (#105439)

This commit is contained in:
Miss Islington (bot) 2023-06-07 04:38:36 -07:00 committed by GitHub
parent c607551baf
commit c84d4d165d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 35 additions and 24 deletions

View file

@ -517,14 +517,30 @@ def main():
perror("unexpected error: %s" % err)
raise
def _transform_msg(msg):
"""Transform error messages from the C tokenizer into the Python tokenize
The C tokenizer is more picky than the Python one, so we need to massage
the error messages a bit for backwards compatibility.
"""
if "unterminated triple-quoted string literal" in msg:
return "EOF in multi-line string"
return msg
def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
if encoding is None:
it = _tokenize.TokenizerIter(source, extra_tokens=extra_tokens)
else:
it = _tokenize.TokenizerIter(source, encoding=encoding, extra_tokens=extra_tokens)
for info in it:
yield TokenInfo._make(info)
try:
for info in it:
yield TokenInfo._make(info)
except SyntaxError as e:
if type(e) != SyntaxError:
raise e from None
msg = _transform_msg(e.msg)
raise TokenError(msg, (e.lineno, e.offset)) from None
if __name__ == "__main__":