mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (#104980)
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
parent
402ee5a68b
commit
46b52e6e2b
4 changed files with 34 additions and 15 deletions
|
@ -82,7 +82,7 @@ class TokenizeTest(TestCase):
|
|||
NAME 'False' (4, 11) (4, 16)
|
||||
COMMENT '# NEWLINE' (4, 17) (4, 26)
|
||||
NEWLINE '\\n' (4, 26) (4, 27)
|
||||
DEDENT '' (4, 27) (4, 27)
|
||||
DEDENT '' (5, 0) (5, 0)
|
||||
""")
|
||||
indent_error_file = b"""\
|
||||
def k(x):
|
||||
|
@ -755,8 +755,8 @@ def"', """\
|
|||
NEWLINE '\\n' (2, 5) (2, 6)
|
||||
INDENT ' \\t' (3, 0) (3, 9)
|
||||
NAME 'pass' (3, 9) (3, 13)
|
||||
DEDENT '' (3, 14) (3, 14)
|
||||
DEDENT '' (3, 14) (3, 14)
|
||||
DEDENT '' (4, 0) (4, 0)
|
||||
DEDENT '' (4, 0) (4, 0)
|
||||
""")
|
||||
|
||||
def test_non_ascii_identifiers(self):
|
||||
|
@ -968,7 +968,7 @@ async def foo():
|
|||
NUMBER '1' (2, 17) (2, 18)
|
||||
OP ':' (2, 18) (2, 19)
|
||||
NAME 'pass' (2, 20) (2, 24)
|
||||
DEDENT '' (2, 25) (2, 25)
|
||||
DEDENT '' (3, 0) (3, 0)
|
||||
""")
|
||||
|
||||
self.check_tokenize('''async def foo(async): await''', """\
|
||||
|
@ -1016,7 +1016,7 @@ def f():
|
|||
NAME 'await' (6, 2) (6, 7)
|
||||
OP '=' (6, 8) (6, 9)
|
||||
NUMBER '2' (6, 10) (6, 11)
|
||||
DEDENT '' (6, 12) (6, 12)
|
||||
DEDENT '' (7, 0) (7, 0)
|
||||
""")
|
||||
|
||||
self.check_tokenize('''\
|
||||
|
@ -1054,7 +1054,7 @@ async def f():
|
|||
NAME 'await' (6, 2) (6, 7)
|
||||
OP '=' (6, 8) (6, 9)
|
||||
NUMBER '2' (6, 10) (6, 11)
|
||||
DEDENT '' (6, 12) (6, 12)
|
||||
DEDENT '' (7, 0) (7, 0)
|
||||
""")
|
||||
|
||||
def test_newline_after_parenthesized_block_with_comment(self):
|
||||
|
@ -2680,7 +2680,8 @@ async def f():
|
|||
|
||||
valid = generate_source(MAXINDENT - 1)
|
||||
tokens = list(_generate_tokens_from_c_tokenizer(valid))
|
||||
self.assertEqual(tokens[-1].type, DEDENT)
|
||||
self.assertEqual(tokens[-2].type, DEDENT)
|
||||
self.assertEqual(tokens[-1].type, ENDMARKER)
|
||||
compile(valid, "<string>", "exec")
|
||||
|
||||
invalid = generate_source(MAXINDENT)
|
||||
|
|
|
@ -447,13 +447,8 @@ def tokenize(readline):
|
|||
|
||||
def _tokenize(rl_gen, encoding):
|
||||
source = b"".join(rl_gen).decode(encoding)
|
||||
token = None
|
||||
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
|
||||
yield token
|
||||
if token is not None:
|
||||
last_line, _ = token.start
|
||||
yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')
|
||||
|
||||
|
||||
def generate_tokens(readline):
|
||||
"""Tokenize a source reading Python code as unicode strings.
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Ensure that trailing ``DEDENT`` :class:`tokenize.TokenInfo` objects emitted
|
||||
by the :mod:`tokenize` module are reported as in Python 3.11. Patch by Pablo
|
||||
Galindo
|
|
@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t
|
|||
typedef struct
|
||||
{
|
||||
PyObject_HEAD struct tok_state *tok;
|
||||
int done;
|
||||
} tokenizeriterobject;
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
|
|||
if (extra_tokens) {
|
||||
self->tok->tok_extra_tokens = 1;
|
||||
}
|
||||
self->done = 0;
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
|
@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it)
|
|||
}
|
||||
goto exit;
|
||||
}
|
||||
if (type == ERRORTOKEN || type == ENDMARKER) {
|
||||
if (it->done || type == ERRORTOKEN) {
|
||||
PyErr_SetString(PyExc_StopIteration, "EOF");
|
||||
it->done = 1;
|
||||
goto exit;
|
||||
}
|
||||
PyObject *str = NULL;
|
||||
|
@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it)
|
|||
goto exit;
|
||||
}
|
||||
|
||||
int is_trailing_token = 0;
|
||||
if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) {
|
||||
is_trailing_token = 1;
|
||||
}
|
||||
|
||||
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
|
||||
Py_ssize_t size = it->tok->inp - line_start;
|
||||
PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
||||
PyObject* line = NULL;
|
||||
if (it->tok->tok_extra_tokens && is_trailing_token) {
|
||||
line = PyUnicode_FromString("");
|
||||
} else {
|
||||
Py_ssize_t size = it->tok->inp - line_start;
|
||||
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
||||
}
|
||||
if (line == NULL) {
|
||||
Py_DECREF(str);
|
||||
goto exit;
|
||||
|
@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it)
|
|||
}
|
||||
|
||||
if (it->tok->tok_extra_tokens) {
|
||||
if (is_trailing_token) {
|
||||
lineno = end_lineno = lineno + 1;
|
||||
col_offset = end_col_offset = 0;
|
||||
}
|
||||
// Necessary adjustments to match the original Python tokenize
|
||||
// implementation
|
||||
if (type > DEDENT && type < OP) {
|
||||
|
@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it)
|
|||
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
|
||||
exit:
|
||||
_PyToken_Free(&token);
|
||||
if (type == ENDMARKER) {
|
||||
it->done = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue