mirror of
https://github.com/python/cpython.git
synced 2025-09-15 05:06:12 +00:00
Issue #24619: New approach for tokenizing async/await.
This commit fixes how one-line async-defs and defs are tracked by tokenizer. It allows to correctly parse invalid code such as: >>> async def f(): ... def g(): pass ... async = 10 and valid code such as: >>> async def f(): ... async def g(): pass ... await z As a consequence, is is now possible to have one-line 'async def foo(): await ..' functions: >>> async def foo(): return await bar()
This commit is contained in:
parent
80acc3ebbc
commit
8fb307cd65
13 changed files with 343 additions and 69 deletions
|
@ -685,9 +685,7 @@ Execution of Python coroutines can be suspended and resumed at many points
|
||||||
(see :term:`coroutine`). In the body of a coroutine, any ``await`` and
|
(see :term:`coroutine`). In the body of a coroutine, any ``await`` and
|
||||||
``async`` identifiers become reserved keywords; :keyword:`await` expressions,
|
``async`` identifiers become reserved keywords; :keyword:`await` expressions,
|
||||||
:keyword:`async for` and :keyword:`async with` can only be used in
|
:keyword:`async for` and :keyword:`async with` can only be used in
|
||||||
coroutine bodies. However, to simplify the parser, these keywords cannot
|
coroutine bodies.
|
||||||
be used on the same line as a function or coroutine (:keyword:`def`
|
|
||||||
statement) header.
|
|
||||||
|
|
||||||
Functions defined with ``async def`` syntax are always coroutine functions,
|
Functions defined with ``async def`` syntax are always coroutine functions,
|
||||||
even if they do not contain ``await`` or ``async`` keywords.
|
even if they do not contain ``await`` or ``async`` keywords.
|
||||||
|
|
|
@ -369,6 +369,7 @@ def generate_tokens(readline):
|
||||||
# 'stashed' and 'ctx' are used for async/await parsing
|
# 'stashed' and 'ctx' are used for async/await parsing
|
||||||
stashed = None
|
stashed = None
|
||||||
ctx = [('sync', 0)]
|
ctx = [('sync', 0)]
|
||||||
|
in_async = 0
|
||||||
|
|
||||||
while 1: # loop over lines in stream
|
while 1: # loop over lines in stream
|
||||||
try:
|
try:
|
||||||
|
@ -436,6 +437,14 @@ def generate_tokens(readline):
|
||||||
"unindent does not match any outer indentation level",
|
"unindent does not match any outer indentation level",
|
||||||
("<tokenize>", lnum, pos, line))
|
("<tokenize>", lnum, pos, line))
|
||||||
indents = indents[:-1]
|
indents = indents[:-1]
|
||||||
|
|
||||||
|
cur_indent = indents[-1]
|
||||||
|
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
|
||||||
|
if ctx[-1][0] == 'async':
|
||||||
|
in_async -= 1
|
||||||
|
assert in_async >= 0
|
||||||
|
ctx.pop()
|
||||||
|
|
||||||
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
|
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
|
||||||
|
|
||||||
else: # continued statement
|
else: # continued statement
|
||||||
|
@ -499,7 +508,7 @@ def generate_tokens(readline):
|
||||||
yield (STRING, token, spos, epos, line)
|
yield (STRING, token, spos, epos, line)
|
||||||
elif initial in namechars: # ordinary name
|
elif initial in namechars: # ordinary name
|
||||||
if token in ('async', 'await'):
|
if token in ('async', 'await'):
|
||||||
if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
|
if in_async:
|
||||||
yield (ASYNC if token == 'async' else AWAIT,
|
yield (ASYNC if token == 'async' else AWAIT,
|
||||||
token, spos, epos, line)
|
token, spos, epos, line)
|
||||||
continue
|
continue
|
||||||
|
@ -515,6 +524,7 @@ def generate_tokens(readline):
|
||||||
and stashed[1] == 'async'):
|
and stashed[1] == 'async'):
|
||||||
|
|
||||||
ctx.append(('async', indents[-1]))
|
ctx.append(('async', indents[-1]))
|
||||||
|
in_async += 1
|
||||||
|
|
||||||
yield (ASYNC, stashed[1],
|
yield (ASYNC, stashed[1],
|
||||||
stashed[2], stashed[3],
|
stashed[2], stashed[3],
|
||||||
|
|
|
@ -1,3 +1,2 @@
|
||||||
async def foo():
|
async def foo(a=await something()):
|
||||||
def foo(a=await something()):
|
pass
|
||||||
pass
|
|
||||||
|
|
|
@ -1,3 +1,2 @@
|
||||||
async def foo():
|
async def foo(a:await something()):
|
||||||
def foo(a:await something()):
|
pass
|
||||||
pass
|
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
async def foo():
|
async def foo():
|
||||||
async def foo(): await something()
|
await
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
async def foo():
|
|
||||||
await
|
|
|
@ -67,11 +67,11 @@ def silence_coro_gc():
|
||||||
class AsyncBadSyntaxTest(unittest.TestCase):
|
class AsyncBadSyntaxTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_badsyntax_1(self):
|
def test_badsyntax_1(self):
|
||||||
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
|
with self.assertRaisesRegex(SyntaxError, "'await' outside"):
|
||||||
import test.badsyntax_async1
|
import test.badsyntax_async1
|
||||||
|
|
||||||
def test_badsyntax_2(self):
|
def test_badsyntax_2(self):
|
||||||
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
|
with self.assertRaisesRegex(SyntaxError, "'await' outside"):
|
||||||
import test.badsyntax_async2
|
import test.badsyntax_async2
|
||||||
|
|
||||||
def test_badsyntax_3(self):
|
def test_badsyntax_3(self):
|
||||||
|
@ -103,10 +103,6 @@ class AsyncBadSyntaxTest(unittest.TestCase):
|
||||||
import test.badsyntax_async8
|
import test.badsyntax_async8
|
||||||
|
|
||||||
def test_badsyntax_9(self):
|
def test_badsyntax_9(self):
|
||||||
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
|
|
||||||
import test.badsyntax_async9
|
|
||||||
|
|
||||||
def test_badsyntax_10(self):
|
|
||||||
ns = {}
|
ns = {}
|
||||||
for comp in {'(await a for a in b)',
|
for comp in {'(await a for a in b)',
|
||||||
'[await a for a in b]',
|
'[await a for a in b]',
|
||||||
|
@ -116,6 +112,221 @@ class AsyncBadSyntaxTest(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'):
|
with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'):
|
||||||
exec('async def f():\n\t{}'.format(comp), ns, ns)
|
exec('async def f():\n\t{}'.format(comp), ns, ns)
|
||||||
|
|
||||||
|
def test_badsyntax_10(self):
|
||||||
|
# Tests for issue 24619
|
||||||
|
|
||||||
|
samples = [
|
||||||
|
"""async def foo():
|
||||||
|
def bar(): pass
|
||||||
|
await = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
|
||||||
|
def bar(): pass
|
||||||
|
await = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def bar(): pass
|
||||||
|
if 1:
|
||||||
|
await = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
async def bar(): pass
|
||||||
|
if 1:
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
async def bar(): pass
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
def baz(): pass
|
||||||
|
async def bar(): pass
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
def baz(): pass
|
||||||
|
# 456
|
||||||
|
async def bar(): pass
|
||||||
|
# 123
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def baz(): pass
|
||||||
|
# 456
|
||||||
|
async def bar(): pass
|
||||||
|
# 123
|
||||||
|
await = 2
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
|
||||||
|
def baz(): pass
|
||||||
|
|
||||||
|
async def bar(): pass
|
||||||
|
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
|
||||||
|
def baz(): pass
|
||||||
|
|
||||||
|
async def bar(): pass
|
||||||
|
|
||||||
|
await = 2
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def async(): pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def await(): pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def bar():
|
||||||
|
await
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
return lambda async: await
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
return lambda a: await
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo(a: await b):
|
||||||
|
pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def baz():
|
||||||
|
async def foo(a: await b):
|
||||||
|
pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo(async):
|
||||||
|
pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def bar():
|
||||||
|
def baz():
|
||||||
|
async = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def bar():
|
||||||
|
def baz():
|
||||||
|
pass
|
||||||
|
async = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
async def bar():
|
||||||
|
|
||||||
|
async def baz():
|
||||||
|
pass
|
||||||
|
|
||||||
|
def baz():
|
||||||
|
42
|
||||||
|
|
||||||
|
async = 1
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo():
|
||||||
|
def bar():
|
||||||
|
def baz():
|
||||||
|
pass\nawait foo()
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
def bar():
|
||||||
|
async def baz():
|
||||||
|
pass\nawait foo()
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""async def foo(await):
|
||||||
|
pass
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
|
||||||
|
async def bar(): pass
|
||||||
|
|
||||||
|
await a
|
||||||
|
""",
|
||||||
|
|
||||||
|
"""def foo():
|
||||||
|
async def bar():
|
||||||
|
pass\nawait a
|
||||||
|
"""]
|
||||||
|
|
||||||
|
ns = {}
|
||||||
|
for code in samples:
|
||||||
|
with self.subTest(code=code), self.assertRaises(SyntaxError):
|
||||||
|
exec(code, ns, ns)
|
||||||
|
|
||||||
|
def test_goodsyntax_1(self):
|
||||||
|
# Tests for issue 24619
|
||||||
|
|
||||||
|
def foo(await):
|
||||||
|
async def foo(): pass
|
||||||
|
async def foo():
|
||||||
|
pass
|
||||||
|
return await + 1
|
||||||
|
self.assertEqual(foo(10), 11)
|
||||||
|
|
||||||
|
def foo(await):
|
||||||
|
async def foo(): pass
|
||||||
|
async def foo(): pass
|
||||||
|
return await + 2
|
||||||
|
self.assertEqual(foo(20), 22)
|
||||||
|
|
||||||
|
def foo(await):
|
||||||
|
|
||||||
|
async def foo(): pass
|
||||||
|
|
||||||
|
async def foo(): pass
|
||||||
|
|
||||||
|
return await + 2
|
||||||
|
self.assertEqual(foo(20), 22)
|
||||||
|
|
||||||
|
def foo(await):
|
||||||
|
"""spam"""
|
||||||
|
async def foo(): \
|
||||||
|
pass
|
||||||
|
# 123
|
||||||
|
async def foo(): pass
|
||||||
|
# 456
|
||||||
|
return await + 2
|
||||||
|
self.assertEqual(foo(20), 22)
|
||||||
|
|
||||||
|
def foo(await):
|
||||||
|
def foo(): pass
|
||||||
|
def foo(): pass
|
||||||
|
async def bar(): return await_
|
||||||
|
await_ = await
|
||||||
|
try:
|
||||||
|
bar().send(None)
|
||||||
|
except StopIteration as ex:
|
||||||
|
return ex.args[0]
|
||||||
|
self.assertEqual(foo(42), 42)
|
||||||
|
|
||||||
|
async def f():
|
||||||
|
async def g(): pass
|
||||||
|
await z
|
||||||
|
self.assertTrue(inspect.iscoroutinefunction(f))
|
||||||
|
|
||||||
|
|
||||||
class TokenizerRegrTest(unittest.TestCase):
|
class TokenizerRegrTest(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -461,8 +672,7 @@ class CoroutineTest(unittest.TestCase):
|
||||||
class Awaitable:
|
class Awaitable:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def foo():
|
async def foo(): return await Awaitable()
|
||||||
return (await Awaitable())
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
TypeError, "object Awaitable can't be used in 'await' expression"):
|
TypeError, "object Awaitable can't be used in 'await' expression"):
|
||||||
|
|
|
@ -1051,10 +1051,7 @@ class GrammarTests(unittest.TestCase):
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
def sum():
|
def sum():
|
||||||
async = 1
|
pass
|
||||||
await = 41
|
|
||||||
return async + await
|
|
||||||
|
|
||||||
if 1:
|
if 1:
|
||||||
await someobj()
|
await someobj()
|
||||||
|
|
||||||
|
|
|
@ -786,12 +786,12 @@ Async/await extension:
|
||||||
NAME 'def' (2, 2) (2, 5)
|
NAME 'def' (2, 2) (2, 5)
|
||||||
NAME 'foo' (2, 6) (2, 9)
|
NAME 'foo' (2, 6) (2, 9)
|
||||||
OP '(' (2, 9) (2, 10)
|
OP '(' (2, 9) (2, 10)
|
||||||
NAME 'await' (2, 10) (2, 15)
|
AWAIT 'await' (2, 10) (2, 15)
|
||||||
OP ')' (2, 15) (2, 16)
|
OP ')' (2, 15) (2, 16)
|
||||||
OP ':' (2, 16) (2, 17)
|
OP ':' (2, 16) (2, 17)
|
||||||
NEWLINE '\\n' (2, 17) (2, 18)
|
NEWLINE '\\n' (2, 17) (2, 18)
|
||||||
INDENT ' ' (3, 0) (3, 4)
|
INDENT ' ' (3, 0) (3, 4)
|
||||||
NAME 'await' (3, 4) (3, 9)
|
AWAIT 'await' (3, 4) (3, 9)
|
||||||
OP '=' (3, 10) (3, 11)
|
OP '=' (3, 10) (3, 11)
|
||||||
NUMBER '1' (3, 12) (3, 13)
|
NUMBER '1' (3, 12) (3, 13)
|
||||||
NEWLINE '\\n' (3, 13) (3, 14)
|
NEWLINE '\\n' (3, 13) (3, 14)
|
||||||
|
@ -829,6 +829,17 @@ Async/await extension:
|
||||||
OP ':' (2, 18) (2, 19)
|
OP ':' (2, 18) (2, 19)
|
||||||
NAME 'pass' (2, 20) (2, 24)
|
NAME 'pass' (2, 20) (2, 24)
|
||||||
DEDENT '' (3, 0) (3, 0)
|
DEDENT '' (3, 0) (3, 0)
|
||||||
|
|
||||||
|
>>> dump_tokens('''async def foo(async): await''')
|
||||||
|
ENCODING 'utf-8' (0, 0) (0, 0)
|
||||||
|
ASYNC 'async' (1, 0) (1, 5)
|
||||||
|
NAME 'def' (1, 6) (1, 9)
|
||||||
|
NAME 'foo' (1, 10) (1, 13)
|
||||||
|
OP '(' (1, 13) (1, 14)
|
||||||
|
ASYNC 'async' (1, 14) (1, 19)
|
||||||
|
OP ')' (1, 19) (1, 20)
|
||||||
|
OP ':' (1, 20) (1, 21)
|
||||||
|
AWAIT 'await' (1, 22) (1, 27)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
|
|
|
@ -501,6 +501,7 @@ def _tokenize(readline, encoding):
|
||||||
# 'stashed' and 'ctx' are used for async/await parsing
|
# 'stashed' and 'ctx' are used for async/await parsing
|
||||||
stashed = None
|
stashed = None
|
||||||
ctx = [('sync', 0)]
|
ctx = [('sync', 0)]
|
||||||
|
in_async = 0
|
||||||
|
|
||||||
if encoding is not None:
|
if encoding is not None:
|
||||||
if encoding == "utf-8-sig":
|
if encoding == "utf-8-sig":
|
||||||
|
@ -580,6 +581,9 @@ def _tokenize(readline, encoding):
|
||||||
|
|
||||||
cur_indent = indents[-1]
|
cur_indent = indents[-1]
|
||||||
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
|
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
|
||||||
|
if ctx[-1][0] == 'async':
|
||||||
|
in_async -= 1
|
||||||
|
assert in_async >= 0
|
||||||
ctx.pop()
|
ctx.pop()
|
||||||
|
|
||||||
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
|
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
|
||||||
|
@ -640,7 +644,7 @@ def _tokenize(readline, encoding):
|
||||||
yield TokenInfo(STRING, token, spos, epos, line)
|
yield TokenInfo(STRING, token, spos, epos, line)
|
||||||
elif initial.isidentifier(): # ordinary name
|
elif initial.isidentifier(): # ordinary name
|
||||||
if token in ('async', 'await'):
|
if token in ('async', 'await'):
|
||||||
if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
|
if in_async:
|
||||||
yield TokenInfo(
|
yield TokenInfo(
|
||||||
ASYNC if token == 'async' else AWAIT,
|
ASYNC if token == 'async' else AWAIT,
|
||||||
token, spos, epos, line)
|
token, spos, epos, line)
|
||||||
|
@ -657,6 +661,7 @@ def _tokenize(readline, encoding):
|
||||||
and stashed.string == 'async'):
|
and stashed.string == 'async'):
|
||||||
|
|
||||||
ctx.append(('async', indents[-1]))
|
ctx.append(('async', indents[-1]))
|
||||||
|
in_async += 1
|
||||||
|
|
||||||
yield TokenInfo(ASYNC, stashed.string,
|
yield TokenInfo(ASYNC, stashed.string,
|
||||||
stashed.start, stashed.end,
|
stashed.start, stashed.end,
|
||||||
|
|
|
@ -19,6 +19,9 @@ Core and Builtins
|
||||||
|
|
||||||
- Issue #24407: Fix crash when dict is mutated while being updated.
|
- Issue #24407: Fix crash when dict is mutated while being updated.
|
||||||
|
|
||||||
|
- Issue #24619: New approach for tokenizing async/await. As a consequence,
|
||||||
|
is is now possible to have one-line 'async def foo(): await ..' functions.
|
||||||
|
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,12 @@
|
||||||
|| c == '_'\
|
|| c == '_'\
|
||||||
|| (c >= 128))
|
|| (c >= 128))
|
||||||
|
|
||||||
|
/* The following DEFTYPE* flags are used in 'tok_state->deftypestack',
|
||||||
|
and should be removed in 3.7, when async/await are regular
|
||||||
|
keywords. */
|
||||||
|
#define DEFTYPE_ASYNC 1
|
||||||
|
#define DEFTYPE_HAS_NL 2
|
||||||
|
|
||||||
extern char *PyOS_Readline(FILE *, FILE *, const char *);
|
extern char *PyOS_Readline(FILE *, FILE *, const char *);
|
||||||
/* Return malloc'ed string including trailing \n;
|
/* Return malloc'ed string including trailing \n;
|
||||||
empty malloc'ed string for EOF;
|
empty malloc'ed string for EOF;
|
||||||
|
@ -130,6 +136,8 @@ tok_new(void)
|
||||||
tok->def = 0;
|
tok->def = 0;
|
||||||
tok->defstack[0] = 0;
|
tok->defstack[0] = 0;
|
||||||
tok->deftypestack[0] = 0;
|
tok->deftypestack[0] = 0;
|
||||||
|
tok->def_async_behind = 0;
|
||||||
|
tok->def_in_async = 0;
|
||||||
|
|
||||||
tok->atbol = 1;
|
tok->atbol = 1;
|
||||||
tok->pendin = 0;
|
tok->pendin = 0;
|
||||||
|
@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
tok->pendin++;
|
tok->pendin++;
|
||||||
|
|
||||||
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
|
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
|
||||||
|
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
|
||||||
|
tok->def_in_async--;
|
||||||
|
assert(tok->def_in_async >= 0);
|
||||||
|
}
|
||||||
tok->def--;
|
tok->def--;
|
||||||
|
assert(tok->def >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return DEDENT;
|
return DEDENT;
|
||||||
|
@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!blankline && tok->level == 0
|
||||||
|
&& tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL
|
||||||
|
&& tok->defstack[tok->def] >= tok->indent)
|
||||||
|
{
|
||||||
|
/* The top function on the stack did have a NEWLINE
|
||||||
|
token, but didn't have an INDENT. That means that
|
||||||
|
it's a one-line function and it should now be removed
|
||||||
|
from the stack. */
|
||||||
|
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
|
||||||
|
tok->def_in_async--;
|
||||||
|
assert(tok->def_in_async >= 0);
|
||||||
|
}
|
||||||
|
tok->def--;
|
||||||
|
assert(tok->def >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
again:
|
again:
|
||||||
tok->start = NULL;
|
tok->start = NULL;
|
||||||
/* Skip spaces */
|
/* Skip spaces */
|
||||||
|
@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
|
|
||||||
tok_len = tok->cur - tok->start;
|
tok_len = tok->cur - tok->start;
|
||||||
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
|
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
|
||||||
if (tok->def && tok->deftypestack[tok->def] == 3) {
|
/* The current token is 'def'. */
|
||||||
tok->deftypestack[tok->def] = 2;
|
if (tok->def + 1 >= MAXINDENT) {
|
||||||
|
tok->done = E_TOODEEP;
|
||||||
|
tok->cur = tok->inp;
|
||||||
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
else if (tok->defstack[tok->def] < tok->indent) {
|
|
||||||
/* We advance defs stack only when we see "def" *and*
|
|
||||||
the indentation level was increased relative to the
|
|
||||||
previous "def". */
|
|
||||||
|
|
||||||
if (tok->def + 1 >= MAXINDENT) {
|
/* Advance defs stack. */
|
||||||
tok->done = E_TOODEEP;
|
tok->def++;
|
||||||
tok->cur = tok->inp;
|
tok->defstack[tok->def] = tok->indent;
|
||||||
return ERRORTOKEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
tok->def++;
|
if (tok->def_async_behind) {
|
||||||
tok->defstack[tok->def] = tok->indent;
|
/* The previous token was 'async'. */
|
||||||
tok->deftypestack[tok->def] = 1;
|
tok->def_async_behind = 0;
|
||||||
|
tok->deftypestack[tok->def] = DEFTYPE_ASYNC;
|
||||||
|
tok->def_in_async++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* This is a regular function (not async def). */
|
||||||
|
tok->deftypestack[tok->def] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (tok_len == 5) {
|
else if (tok_len == 5) {
|
||||||
if (memcmp(tok->start, "async", 5) == 0) {
|
if (memcmp(tok->start, "async", 5) == 0) {
|
||||||
|
/* The current token is 'async'. */
|
||||||
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
||||||
|
|
||||||
|
/* Try to look ahead one token. */
|
||||||
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
|
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
|
||||||
&ahead_top_end);
|
&ahead_top_end);
|
||||||
|
|
||||||
if (ahead_tok_kind == NAME &&
|
if (ahead_tok_kind == NAME
|
||||||
ahead_tok.cur - ahead_tok.start == 3 &&
|
&& ahead_tok.cur - ahead_tok.start == 3
|
||||||
memcmp(ahead_tok.start, "def", 3) == 0) {
|
&& memcmp(ahead_tok.start, "def", 3) == 0)
|
||||||
|
{
|
||||||
if (tok->def + 1 >= MAXINDENT) {
|
/* The next token is going to be 'def', so instead of
|
||||||
tok->done = E_TOODEEP;
|
returning 'async' NAME token, we return ASYNC. */
|
||||||
tok->cur = tok->inp;
|
tok->def_async_behind = 1;
|
||||||
return ERRORTOKEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
tok->def++;
|
|
||||||
tok->defstack[tok->def] = tok->indent;
|
|
||||||
tok->deftypestack[tok->def] = 3;
|
|
||||||
|
|
||||||
return ASYNC;
|
return ASYNC;
|
||||||
}
|
}
|
||||||
else if (tok->def && tok->deftypestack[tok->def] == 2
|
else if (tok->def_in_async)
|
||||||
&& tok->defstack[tok->def] < tok->indent) {
|
{
|
||||||
|
/* We're inside an 'async def' function, so we treat
|
||||||
|
'async' token as ASYNC, instead of NAME. */
|
||||||
return ASYNC;
|
return ASYNC;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (memcmp(tok->start, "await", 5) == 0
|
else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async)
|
||||||
&& tok->def && tok->deftypestack[tok->def] == 2
|
{
|
||||||
&& tok->defstack[tok->def] < tok->indent) {
|
/* We're inside an 'async def' function, so we treat
|
||||||
|
'await' token as AWAIT, instead of NAME. */
|
||||||
return AWAIT;
|
return AWAIT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
*p_start = tok->start;
|
*p_start = tok->start;
|
||||||
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
||||||
tok->cont_line = 0;
|
tok->cont_line = 0;
|
||||||
|
if (tok->def) {
|
||||||
|
/* Mark the top function on the stack that it had
|
||||||
|
at least one NEWLINE. That will help us to
|
||||||
|
distinguish one-line functions from functions
|
||||||
|
with multiple statements. */
|
||||||
|
tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL;
|
||||||
|
}
|
||||||
return NEWLINE;
|
return NEWLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,12 +66,21 @@ struct tok_state {
|
||||||
const char* str;
|
const char* str;
|
||||||
const char* input; /* Tokenizer's newline translated copy of the string. */
|
const char* input; /* Tokenizer's newline translated copy of the string. */
|
||||||
|
|
||||||
int defstack[MAXINDENT]; /* stack if funcs & indents where they
|
/* `def*` fields are for parsing async/await in a backwards compatible
|
||||||
were defined */
|
way. They should be removed in 3.7, when they will become
|
||||||
int deftypestack[MAXINDENT]; /* stack of func types
|
regular constants. See PEP 492 for more details. */
|
||||||
(0 not func; 1: "def name";
|
int defstack[MAXINDENT]; /* Stack of funcs & indents where they
|
||||||
2: "async def name") */
|
were defined. */
|
||||||
int def; /* Length of stack of func types */
|
int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_*
|
||||||
|
constants. */
|
||||||
|
int def; /* Length of stack of func types/flags. */
|
||||||
|
int def_async_behind; /* 1 if there was an 'async' token before
|
||||||
|
a 'def' token. */
|
||||||
|
int def_in_async; /* Counter of how deep 'async def's
|
||||||
|
are nested. If greater than 0,
|
||||||
|
we are somewhere in an 'async def'
|
||||||
|
body, so 'async' and 'await' should
|
||||||
|
be parsed as keywords.*/
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct tok_state *PyTokenizer_FromString(const char *, int);
|
extern struct tok_state *PyTokenizer_FromString(const char *, int);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue