[3.12] gh-105549: Tokenize separately NUMBER and NAME tokens and allow 0-prefixed literals (GH-105555) (#105602)

Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
2025-08-10 03:49:18 +00:00 · 2023-06-09 14:40:07 -07:00 · 2023-06-09 14:40:07 -07:00 · ae6e002f5a
commit ae6e002f5a
parent 411366ccdb
3 changed files with 45 additions and 3 deletions
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1600,8 +1600,12 @@ lookahead(struct tok_state *tok, const char *test)
 }

 static int
-verify_end_of_number(struct tok_state *tok, int c, const char *kind)
-{
+verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
+    if (tok->tok_extra_tokens) {
+        // When we are parsing extra tokens, we don't want to emit warnings
+        // about invalid literals, because we want to be a bit more liberal.
+        return 1;
+    }
    /* Emit a deprecation warning only if the numeric literal is immediately
     * followed by one of keywords which can occur after a numeric literal
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
@ -1659,6 +1663,9 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
 static int
 verify_identifier(struct tok_state *tok)
 {
+    if (tok->tok_extra_tokens) {
+        return 1;
+    }
    PyObject *s;
    if (tok->decoding_erred)
        return 0;
@ -2318,7 +2325,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                else if (c == 'j' || c == 'J') {
                    goto imaginary;
                }
-                else if (nonzero) {
+                else if (nonzero && !tok->tok_extra_tokens) {
                    /* Old-style octal: now disallowed. */
                    tok_backup(tok, c);
                    return MAKE_TOKEN(syntaxerror_known_range(