mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
Issue #24619: New approach for tokenizing async/await.
This commit fixes how one-line async-defs and defs are tracked by tokenizer. It allows to correctly parse invalid code such as: >>> async def f(): ... def g(): pass ... async = 10 and valid code such as: >>> async def f(): ... async def g(): pass ... await z As a consequence, is is now possible to have one-line 'async def foo(): await ..' functions: >>> async def foo(): return await bar()
This commit is contained in:
parent
80acc3ebbc
commit
8fb307cd65
13 changed files with 343 additions and 69 deletions
|
@ -31,6 +31,12 @@
|
|||
|| c == '_'\
|
||||
|| (c >= 128))
|
||||
|
||||
/* The following DEFTYPE* flags are used in 'tok_state->deftypestack',
|
||||
and should be removed in 3.7, when async/await are regular
|
||||
keywords. */
|
||||
#define DEFTYPE_ASYNC 1
|
||||
#define DEFTYPE_HAS_NL 2
|
||||
|
||||
extern char *PyOS_Readline(FILE *, FILE *, const char *);
|
||||
/* Return malloc'ed string including trailing \n;
|
||||
empty malloc'ed string for EOF;
|
||||
|
@ -130,6 +136,8 @@ tok_new(void)
|
|||
tok->def = 0;
|
||||
tok->defstack[0] = 0;
|
||||
tok->deftypestack[0] = 0;
|
||||
tok->def_async_behind = 0;
|
||||
tok->def_in_async = 0;
|
||||
|
||||
tok->atbol = 1;
|
||||
tok->pendin = 0;
|
||||
|
@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
tok->pendin++;
|
||||
|
||||
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
|
||||
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
|
||||
tok->def_in_async--;
|
||||
assert(tok->def_in_async >= 0);
|
||||
}
|
||||
tok->def--;
|
||||
assert(tok->def >= 0);
|
||||
}
|
||||
|
||||
return DEDENT;
|
||||
|
@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
}
|
||||
}
|
||||
|
||||
if (!blankline && tok->level == 0
|
||||
&& tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL
|
||||
&& tok->defstack[tok->def] >= tok->indent)
|
||||
{
|
||||
/* The top function on the stack did have a NEWLINE
|
||||
token, but didn't have an INDENT. That means that
|
||||
it's a one-line function and it should now be removed
|
||||
from the stack. */
|
||||
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
|
||||
tok->def_in_async--;
|
||||
assert(tok->def_in_async >= 0);
|
||||
}
|
||||
tok->def--;
|
||||
assert(tok->def >= 0);
|
||||
}
|
||||
|
||||
again:
|
||||
tok->start = NULL;
|
||||
/* Skip spaces */
|
||||
|
@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
|
||||
tok_len = tok->cur - tok->start;
|
||||
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
|
||||
if (tok->def && tok->deftypestack[tok->def] == 3) {
|
||||
tok->deftypestack[tok->def] = 2;
|
||||
/* The current token is 'def'. */
|
||||
if (tok->def + 1 >= MAXINDENT) {
|
||||
tok->done = E_TOODEEP;
|
||||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
else if (tok->defstack[tok->def] < tok->indent) {
|
||||
/* We advance defs stack only when we see "def" *and*
|
||||
the indentation level was increased relative to the
|
||||
previous "def". */
|
||||
|
||||
if (tok->def + 1 >= MAXINDENT) {
|
||||
tok->done = E_TOODEEP;
|
||||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
/* Advance defs stack. */
|
||||
tok->def++;
|
||||
tok->defstack[tok->def] = tok->indent;
|
||||
|
||||
tok->def++;
|
||||
tok->defstack[tok->def] = tok->indent;
|
||||
tok->deftypestack[tok->def] = 1;
|
||||
if (tok->def_async_behind) {
|
||||
/* The previous token was 'async'. */
|
||||
tok->def_async_behind = 0;
|
||||
tok->deftypestack[tok->def] = DEFTYPE_ASYNC;
|
||||
tok->def_in_async++;
|
||||
}
|
||||
else {
|
||||
/* This is a regular function (not async def). */
|
||||
tok->deftypestack[tok->def] = 0;
|
||||
}
|
||||
}
|
||||
else if (tok_len == 5) {
|
||||
if (memcmp(tok->start, "async", 5) == 0) {
|
||||
/* The current token is 'async'. */
|
||||
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
||||
|
||||
/* Try to look ahead one token. */
|
||||
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
|
||||
&ahead_top_end);
|
||||
|
||||
if (ahead_tok_kind == NAME &&
|
||||
ahead_tok.cur - ahead_tok.start == 3 &&
|
||||
memcmp(ahead_tok.start, "def", 3) == 0) {
|
||||
|
||||
if (tok->def + 1 >= MAXINDENT) {
|
||||
tok->done = E_TOODEEP;
|
||||
tok->cur = tok->inp;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
|
||||
tok->def++;
|
||||
tok->defstack[tok->def] = tok->indent;
|
||||
tok->deftypestack[tok->def] = 3;
|
||||
|
||||
if (ahead_tok_kind == NAME
|
||||
&& ahead_tok.cur - ahead_tok.start == 3
|
||||
&& memcmp(ahead_tok.start, "def", 3) == 0)
|
||||
{
|
||||
/* The next token is going to be 'def', so instead of
|
||||
returning 'async' NAME token, we return ASYNC. */
|
||||
tok->def_async_behind = 1;
|
||||
return ASYNC;
|
||||
}
|
||||
else if (tok->def && tok->deftypestack[tok->def] == 2
|
||||
&& tok->defstack[tok->def] < tok->indent) {
|
||||
|
||||
else if (tok->def_in_async)
|
||||
{
|
||||
/* We're inside an 'async def' function, so we treat
|
||||
'async' token as ASYNC, instead of NAME. */
|
||||
return ASYNC;
|
||||
}
|
||||
|
||||
}
|
||||
else if (memcmp(tok->start, "await", 5) == 0
|
||||
&& tok->def && tok->deftypestack[tok->def] == 2
|
||||
&& tok->defstack[tok->def] < tok->indent) {
|
||||
|
||||
else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async)
|
||||
{
|
||||
/* We're inside an 'async def' function, so we treat
|
||||
'await' token as AWAIT, instead of NAME. */
|
||||
return AWAIT;
|
||||
}
|
||||
}
|
||||
|
@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
*p_start = tok->start;
|
||||
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
||||
tok->cont_line = 0;
|
||||
if (tok->def) {
|
||||
/* Mark the top function on the stack that it had
|
||||
at least one NEWLINE. That will help us to
|
||||
distinguish one-line functions from functions
|
||||
with multiple statements. */
|
||||
tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL;
|
||||
}
|
||||
return NEWLINE;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,12 +66,21 @@ struct tok_state {
|
|||
const char* str;
|
||||
const char* input; /* Tokenizer's newline translated copy of the string. */
|
||||
|
||||
int defstack[MAXINDENT]; /* stack if funcs & indents where they
|
||||
were defined */
|
||||
int deftypestack[MAXINDENT]; /* stack of func types
|
||||
(0 not func; 1: "def name";
|
||||
2: "async def name") */
|
||||
int def; /* Length of stack of func types */
|
||||
/* `def*` fields are for parsing async/await in a backwards compatible
|
||||
way. They should be removed in 3.7, when they will become
|
||||
regular constants. See PEP 492 for more details. */
|
||||
int defstack[MAXINDENT]; /* Stack of funcs & indents where they
|
||||
were defined. */
|
||||
int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_*
|
||||
constants. */
|
||||
int def; /* Length of stack of func types/flags. */
|
||||
int def_async_behind; /* 1 if there was an 'async' token before
|
||||
a 'def' token. */
|
||||
int def_in_async; /* Counter of how deep 'async def's
|
||||
are nested. If greater than 0,
|
||||
we are somewhere in an 'async def'
|
||||
body, so 'async' and 'await' should
|
||||
be parsed as keywords.*/
|
||||
};
|
||||
|
||||
extern struct tok_state *PyTokenizer_FromString(const char *, int);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue