Issue #24619: Simplify async/await tokenization.

This commit simplifies async/await tokenization in tokenizer.c,
tokenize.py & lib2to3/tokenize.py.  Previous solution was to keep
a stack of async-def & def blocks, whereas the new approach is just
to remember position of the outermost async-def block.

This change won't bring any parsing performance improvements, but
it makes the code much easier to read and validate.
This commit is contained in:
Yury Selivanov 2015-07-23 15:01:58 +03:00
parent f315c1c016
commit 96ec934e75
7 changed files with 183 additions and 132 deletions

View file

@ -366,10 +366,11 @@ def generate_tokens(readline):
contline = None
indents = [0]
# 'stashed' and 'ctx' are used for async/await parsing
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
ctx = [('sync', 0)]
in_async = 0
async_def = False
async_def_indent = 0
async_def_nl = False
while 1: # loop over lines in stream
try:
@ -438,15 +439,18 @@ def generate_tokens(readline):
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
cur_indent = indents[-1]
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
if ctx[-1][0] == 'async':
in_async -= 1
assert in_async >= 0
ctx.pop()
if async_def and async_def_indent >= indents[-1]:
async_def = False
async_def_nl = False
async_def_indent = 0
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
if async_def and async_def_nl and async_def_indent >= indents[-1]:
async_def = False
async_def_nl = False
async_def_indent = 0
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
@ -466,10 +470,13 @@ def generate_tokens(readline):
newline = NEWLINE
if parenlev > 0:
newline = NL
elif async_def:
async_def_nl = True
if stashed:
yield stashed
stashed = None
yield (newline, token, spos, epos, line)
elif initial == '#':
assert not token.endswith("\n")
if stashed:
@ -508,7 +515,7 @@ def generate_tokens(readline):
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
if token in ('async', 'await'):
if in_async:
if async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
@ -523,15 +530,13 @@ def generate_tokens(readline):
and stashed[0] == NAME
and stashed[1] == 'async'):
ctx.append(('async', indents[-1]))
in_async += 1
async_def = True
async_def_indent = indents[-1]
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
stashed[4])
stashed = None
else:
ctx.append(('sync', indents[-1]))
if stashed:
yield stashed