mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
bpo-40335: Correctly handle multi-line strings in tokenize error scenarios (GH-19619)
Co-authored-by: Guido van Rossum <gvanrossum@gmail.com>
This commit is contained in:
parent
6a9e80a931
commit
11a7f158ef
3 changed files with 37 additions and 23 deletions
|
@ -251,25 +251,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
|||
const char *line_start;
|
||||
|
||||
type = PyTokenizer_Get(tok, &a, &b);
|
||||
if (type == ERRORTOKEN) {
|
||||
err_ret->error = tok->done;
|
||||
break;
|
||||
}
|
||||
if (type == ENDMARKER && started) {
|
||||
type = NEWLINE; /* Add an extra newline */
|
||||
started = 0;
|
||||
/* Add the right number of dedent tokens,
|
||||
except if a certain flag is given --
|
||||
codeop.py uses this. */
|
||||
if (tok->indent &&
|
||||
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
|
||||
{
|
||||
tok->pendin = -tok->indent;
|
||||
tok->indent = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
started = 1;
|
||||
|
||||
len = (a != NULL && b != NULL) ? b - a : 0;
|
||||
str = (char *) PyObject_MALLOC(len + 1);
|
||||
if (str == NULL) {
|
||||
|
@ -328,6 +310,27 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (type == ERRORTOKEN) {
|
||||
err_ret->error = tok->done;
|
||||
break;
|
||||
}
|
||||
if (type == ENDMARKER && started) {
|
||||
type = NEWLINE; /* Add an extra newline */
|
||||
started = 0;
|
||||
/* Add the right number of dedent tokens,
|
||||
except if a certain flag is given --
|
||||
codeop.py uses this. */
|
||||
if (tok->indent &&
|
||||
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
|
||||
{
|
||||
tok->pendin = -tok->indent;
|
||||
tok->indent = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
started = 1;
|
||||
}
|
||||
|
||||
if ((err_ret->error =
|
||||
PyParser_AddToken(ps, (int)type, str,
|
||||
lineno, col_offset, tok->lineno, end_col_offset,
|
||||
|
|
|
@ -1392,13 +1392,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||
if (nonascii && !verify_identifier(tok)) {
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
|
||||
if (c == '"' || c == '\'') {
|
||||
tok->done = E_BADPREFIX;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
*p_start = tok->start;
|
||||
*p_end = tok->cur;
|
||||
|
||||
/* async/await parsing block. */
|
||||
if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
|
||||
/* May be an 'async' or 'await' token. For Python 3.7 or
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue