Add untokenize() function to allow full round-trip tokenization.

Should significantly enhance the utility of the module by supporting
the creation of tools that modify the token stream and writeback the
modified result.
This commit is contained in:
Raymond Hettinger 2005-06-10 11:05:19 +00:00
parent bf7255fffb
commit 68c0453418
5 changed files with 182 additions and 7 deletions

View file

@ -31,7 +31,7 @@ from token import *
import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize",
"generate_tokens", "NL"]
"generate_tokens", "NL", "untokenize"]
del x
del token
@ -159,12 +159,55 @@ def tokenize_loop(readline, tokeneater):
for token_info in generate_tokens(readline):
tokeneater(*token_info)
def untokenize(iterable):
"""Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence
with at least two elements, a token number and token value.
Round-trip invariant:
# Output text will tokenize the back to the input
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
newcode = untokenize(t1)
readline = iter(newcode.splitlines(1)).next
t2 = [tok[:2] for tokin generate_tokens(readline)]
assert t1 == t2
"""
startline = False
indents = []
toks = []
toks_append = toks.append
for tok in iterable:
toknum, tokval = tok[:2]
if toknum == NAME:
tokval += ' '
if toknum == INDENT:
indents.append(tokval)
continue
elif toknum == DEDENT:
indents.pop()
continue
elif toknum in (NEWLINE, COMMENT, NL):
startline = True
elif startline and indents:
toks_append(indents[-1])
startline = False
toks_append(tokval)
return ''.join(toks)
def generate_tokens(readline):
"""
The generate_tokens() generator requires one argment, readline, which
must be a callable object which provides the same interface as the
readline() method of built-in file objects. Each call to the function
should return one line of input as a string.
should return one line of input as a string. Alternately, readline
can be a callable function terminating with StopIteration:
readline = open(myfile).next # Example of alternate readline
The generator produces 5-tuples with these members: the token type; the
token string; a 2-tuple (srow, scol) of ints specifying the row and
@ -180,7 +223,10 @@ def generate_tokens(readline):
indents = [0]
while 1: # loop over lines in stream
line = readline()
try:
line = readline()
except StopIteration:
line = ''
lnum = lnum + 1
pos, max = 0, len(line)