mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
[3.12] gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (GH-104980) (#105000)
This commit is contained in:
parent
05189f3054
commit
2c02c68867
4 changed files with 34 additions and 15 deletions
|
@ -82,7 +82,7 @@ class TokenizeTest(TestCase):
|
||||||
NAME 'False' (4, 11) (4, 16)
|
NAME 'False' (4, 11) (4, 16)
|
||||||
COMMENT '# NEWLINE' (4, 17) (4, 26)
|
COMMENT '# NEWLINE' (4, 17) (4, 26)
|
||||||
NEWLINE '\\n' (4, 26) (4, 27)
|
NEWLINE '\\n' (4, 26) (4, 27)
|
||||||
DEDENT '' (4, 27) (4, 27)
|
DEDENT '' (5, 0) (5, 0)
|
||||||
""")
|
""")
|
||||||
indent_error_file = b"""\
|
indent_error_file = b"""\
|
||||||
def k(x):
|
def k(x):
|
||||||
|
@ -755,8 +755,8 @@ def"', """\
|
||||||
NEWLINE '\\n' (2, 5) (2, 6)
|
NEWLINE '\\n' (2, 5) (2, 6)
|
||||||
INDENT ' \\t' (3, 0) (3, 9)
|
INDENT ' \\t' (3, 0) (3, 9)
|
||||||
NAME 'pass' (3, 9) (3, 13)
|
NAME 'pass' (3, 9) (3, 13)
|
||||||
DEDENT '' (3, 14) (3, 14)
|
DEDENT '' (4, 0) (4, 0)
|
||||||
DEDENT '' (3, 14) (3, 14)
|
DEDENT '' (4, 0) (4, 0)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_non_ascii_identifiers(self):
|
def test_non_ascii_identifiers(self):
|
||||||
|
@ -968,7 +968,7 @@ async def foo():
|
||||||
NUMBER '1' (2, 17) (2, 18)
|
NUMBER '1' (2, 17) (2, 18)
|
||||||
OP ':' (2, 18) (2, 19)
|
OP ':' (2, 18) (2, 19)
|
||||||
NAME 'pass' (2, 20) (2, 24)
|
NAME 'pass' (2, 20) (2, 24)
|
||||||
DEDENT '' (2, 25) (2, 25)
|
DEDENT '' (3, 0) (3, 0)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize('''async def foo(async): await''', """\
|
self.check_tokenize('''async def foo(async): await''', """\
|
||||||
|
@ -1016,7 +1016,7 @@ def f():
|
||||||
NAME 'await' (6, 2) (6, 7)
|
NAME 'await' (6, 2) (6, 7)
|
||||||
OP '=' (6, 8) (6, 9)
|
OP '=' (6, 8) (6, 9)
|
||||||
NUMBER '2' (6, 10) (6, 11)
|
NUMBER '2' (6, 10) (6, 11)
|
||||||
DEDENT '' (6, 12) (6, 12)
|
DEDENT '' (7, 0) (7, 0)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
self.check_tokenize('''\
|
self.check_tokenize('''\
|
||||||
|
@ -1054,7 +1054,7 @@ async def f():
|
||||||
NAME 'await' (6, 2) (6, 7)
|
NAME 'await' (6, 2) (6, 7)
|
||||||
OP '=' (6, 8) (6, 9)
|
OP '=' (6, 8) (6, 9)
|
||||||
NUMBER '2' (6, 10) (6, 11)
|
NUMBER '2' (6, 10) (6, 11)
|
||||||
DEDENT '' (6, 12) (6, 12)
|
DEDENT '' (7, 0) (7, 0)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_newline_after_parenthesized_block_with_comment(self):
|
def test_newline_after_parenthesized_block_with_comment(self):
|
||||||
|
@ -2680,7 +2680,8 @@ async def f():
|
||||||
|
|
||||||
valid = generate_source(MAXINDENT - 1)
|
valid = generate_source(MAXINDENT - 1)
|
||||||
tokens = list(_generate_tokens_from_c_tokenizer(valid))
|
tokens = list(_generate_tokens_from_c_tokenizer(valid))
|
||||||
self.assertEqual(tokens[-1].type, DEDENT)
|
self.assertEqual(tokens[-2].type, DEDENT)
|
||||||
|
self.assertEqual(tokens[-1].type, ENDMARKER)
|
||||||
compile(valid, "<string>", "exec")
|
compile(valid, "<string>", "exec")
|
||||||
|
|
||||||
invalid = generate_source(MAXINDENT)
|
invalid = generate_source(MAXINDENT)
|
||||||
|
|
|
@ -447,13 +447,8 @@ def tokenize(readline):
|
||||||
|
|
||||||
def _tokenize(rl_gen, encoding):
|
def _tokenize(rl_gen, encoding):
|
||||||
source = b"".join(rl_gen).decode(encoding)
|
source = b"".join(rl_gen).decode(encoding)
|
||||||
token = None
|
|
||||||
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
|
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
|
||||||
yield token
|
yield token
|
||||||
if token is not None:
|
|
||||||
last_line, _ = token.start
|
|
||||||
yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')
|
|
||||||
|
|
||||||
|
|
||||||
def generate_tokens(readline):
|
def generate_tokens(readline):
|
||||||
"""Tokenize a source reading Python code as unicode strings.
|
"""Tokenize a source reading Python code as unicode strings.
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Ensure that trailing ``DEDENT`` :class:`tokenize.TokenInfo` objects emitted
|
||||||
|
by the :mod:`tokenize` module are reported as in Python 3.11. Patch by Pablo
|
||||||
|
Galindo
|
|
@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
PyObject_HEAD struct tok_state *tok;
|
PyObject_HEAD struct tok_state *tok;
|
||||||
|
int done;
|
||||||
} tokenizeriterobject;
|
} tokenizeriterobject;
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
|
||||||
if (extra_tokens) {
|
if (extra_tokens) {
|
||||||
self->tok->tok_extra_tokens = 1;
|
self->tok->tok_extra_tokens = 1;
|
||||||
}
|
}
|
||||||
|
self->done = 0;
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
}
|
}
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
if (type == ERRORTOKEN || type == ENDMARKER) {
|
if (it->done || type == ERRORTOKEN) {
|
||||||
PyErr_SetString(PyExc_StopIteration, "EOF");
|
PyErr_SetString(PyExc_StopIteration, "EOF");
|
||||||
|
it->done = 1;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
PyObject *str = NULL;
|
PyObject *str = NULL;
|
||||||
|
@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int is_trailing_token = 0;
|
||||||
|
if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) {
|
||||||
|
is_trailing_token = 1;
|
||||||
|
}
|
||||||
|
|
||||||
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
|
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
|
||||||
Py_ssize_t size = it->tok->inp - line_start;
|
PyObject* line = NULL;
|
||||||
PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
if (it->tok->tok_extra_tokens && is_trailing_token) {
|
||||||
|
line = PyUnicode_FromString("");
|
||||||
|
} else {
|
||||||
|
Py_ssize_t size = it->tok->inp - line_start;
|
||||||
|
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
||||||
|
}
|
||||||
if (line == NULL) {
|
if (line == NULL) {
|
||||||
Py_DECREF(str);
|
Py_DECREF(str);
|
||||||
goto exit;
|
goto exit;
|
||||||
|
@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (it->tok->tok_extra_tokens) {
|
if (it->tok->tok_extra_tokens) {
|
||||||
|
if (is_trailing_token) {
|
||||||
|
lineno = end_lineno = lineno + 1;
|
||||||
|
col_offset = end_col_offset = 0;
|
||||||
|
}
|
||||||
// Necessary adjustments to match the original Python tokenize
|
// Necessary adjustments to match the original Python tokenize
|
||||||
// implementation
|
// implementation
|
||||||
if (type > DEDENT && type < OP) {
|
if (type > DEDENT && type < OP) {
|
||||||
|
@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
|
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
|
||||||
exit:
|
exit:
|
||||||
_PyToken_Free(&token);
|
_PyToken_Free(&token);
|
||||||
|
if (type == ENDMARKER) {
|
||||||
|
it->done = 1;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue