mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
[3.12] gh-105549: Tokenize separately NUMBER and NAME tokens and allow 0-prefixed literals (GH-105555) (#105602)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
parent
411366ccdb
commit
ae6e002f5a
3 changed files with 45 additions and 3 deletions
|
@ -284,7 +284,12 @@ def k(x):
|
||||||
# this won't work with compound complex inputs
|
# this won't work with compound complex inputs
|
||||||
continue
|
continue
|
||||||
self.assertEqual(number_token(lit), lit)
|
self.assertEqual(number_token(lit), lit)
|
||||||
|
# Valid cases with extra underscores in the tokenize module
|
||||||
|
# See gh-105549 for context
|
||||||
|
extra_valid_cases = {"0_7", "09_99"}
|
||||||
for lit in INVALID_UNDERSCORE_LITERALS:
|
for lit in INVALID_UNDERSCORE_LITERALS:
|
||||||
|
if lit in extra_valid_cases:
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
number_token(lit)
|
number_token(lit)
|
||||||
except TokenError:
|
except TokenError:
|
||||||
|
@ -1873,6 +1878,34 @@ class TestRoundtrip(TestCase):
|
||||||
self.check_roundtrip(code)
|
self.check_roundtrip(code)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidPythonTests(TestCase):
|
||||||
|
def test_number_followed_by_name(self):
|
||||||
|
# See issue #gh-105549
|
||||||
|
source = "2sin(x)"
|
||||||
|
expected_tokens = [
|
||||||
|
TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
|
||||||
|
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||||
|
]
|
||||||
|
|
||||||
|
tokens = list(generate_tokens(StringIO(source).readline))
|
||||||
|
self.assertEqual(tokens, expected_tokens)
|
||||||
|
|
||||||
|
def test_number_starting_with_zero(self):
|
||||||
|
source = "01234"
|
||||||
|
expected_tokens = [
|
||||||
|
TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
|
||||||
|
TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
|
||||||
|
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||||
|
]
|
||||||
|
|
||||||
|
tokens = list(generate_tokens(StringIO(source).readline))
|
||||||
|
self.assertEqual(tokens, expected_tokens)
|
||||||
|
|
||||||
class CTokenizeTest(TestCase):
|
class CTokenizeTest(TestCase):
|
||||||
def check_tokenize(self, s, expected):
|
def check_tokenize(self, s, expected):
|
||||||
# Format the tokens in s in a table format.
|
# Format the tokens in s in a table format.
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Tokenize separately `NUMBER` and `NAME` tokens that are not ambiguous. Patch
|
||||||
|
by Pablo Galindo
|
|
@ -1600,8 +1600,12 @@ lookahead(struct tok_state *tok, const char *test)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
|
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
|
||||||
{
|
if (tok->tok_extra_tokens) {
|
||||||
|
// When we are parsing extra tokens, we don't want to emit warnings
|
||||||
|
// about invalid literals, because we want to be a bit more liberal.
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
/* Emit a deprecation warning only if the numeric literal is immediately
|
/* Emit a deprecation warning only if the numeric literal is immediately
|
||||||
* followed by one of keywords which can occur after a numeric literal
|
* followed by one of keywords which can occur after a numeric literal
|
||||||
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
|
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
|
||||||
|
@ -1659,6 +1663,9 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
|
||||||
static int
|
static int
|
||||||
verify_identifier(struct tok_state *tok)
|
verify_identifier(struct tok_state *tok)
|
||||||
{
|
{
|
||||||
|
if (tok->tok_extra_tokens) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
PyObject *s;
|
PyObject *s;
|
||||||
if (tok->decoding_erred)
|
if (tok->decoding_erred)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2318,7 +2325,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
else if (c == 'j' || c == 'J') {
|
else if (c == 'j' || c == 'J') {
|
||||||
goto imaginary;
|
goto imaginary;
|
||||||
}
|
}
|
||||||
else if (nonzero) {
|
else if (nonzero && !tok->tok_extra_tokens) {
|
||||||
/* Old-style octal: now disallowed. */
|
/* Old-style octal: now disallowed. */
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
return MAKE_TOKEN(syntaxerror_known_range(
|
return MAKE_TOKEN(syntaxerror_known_range(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue