mirror of
https://github.com/python/cpython.git
synced 2025-08-02 08:02:56 +00:00
Issue #12943: python -m tokenize support has been added to tokenize.
This commit is contained in:
parent
1d972ad12a
commit
14c0f03b58
3 changed files with 115 additions and 23 deletions
|
@ -15,6 +15,9 @@ implemented in Python. The scanner in this module returns comments as tokens
|
||||||
as well, making it useful for implementing "pretty-printers," including
|
as well, making it useful for implementing "pretty-printers," including
|
||||||
colorizers for on-screen displays.
|
colorizers for on-screen displays.
|
||||||
|
|
||||||
|
Tokenizing Input
|
||||||
|
----------------
|
||||||
|
|
||||||
The primary entry point is a :term:`generator`:
|
The primary entry point is a :term:`generator`:
|
||||||
|
|
||||||
.. function:: tokenize(readline)
|
.. function:: tokenize(readline)
|
||||||
|
@ -116,6 +119,26 @@ function it uses to do this is available:
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
|
|
||||||
|
.. _tokenize-cli:
|
||||||
|
|
||||||
|
Command-Line Usage
|
||||||
|
------------------
|
||||||
|
|
||||||
|
.. versionadded:: 3.3
|
||||||
|
|
||||||
|
The :mod:`tokenize` module can be executed as a script from the command line.
|
||||||
|
It is as simple as:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
python -m tokenize [filename.py]
|
||||||
|
|
||||||
|
If :file:`filename.py` is specified its contents are tokenized to stdout.
|
||||||
|
Otherwise, tokenization is performed on stdin.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
------------------
|
||||||
|
|
||||||
Example of a script rewriter that transforms float literals into Decimal
|
Example of a script rewriter that transforms float literals into Decimal
|
||||||
objects::
|
objects::
|
||||||
|
|
||||||
|
@ -158,3 +181,37 @@ objects::
|
||||||
result.append((toknum, tokval))
|
result.append((toknum, tokval))
|
||||||
return untokenize(result).decode('utf-8')
|
return untokenize(result).decode('utf-8')
|
||||||
|
|
||||||
|
Example of tokenizing from the command line. The script::
|
||||||
|
|
||||||
|
def say_hello():
|
||||||
|
print("Hello, World!")
|
||||||
|
|
||||||
|
say_hello()
|
||||||
|
|
||||||
|
will be tokenized to the following output where the first column is the range
|
||||||
|
of the line/column coordinates where the token is found, the second column is
|
||||||
|
the name of the token, and the final column is the value of the token (if any)
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
$ python -m tokenize hello.py
|
||||||
|
0,0-0,0: ENCODING 'utf-8'
|
||||||
|
1,0-1,3: NAME 'def'
|
||||||
|
1,4-1,13: NAME 'say_hello'
|
||||||
|
1,13-1,14: OP '('
|
||||||
|
1,14-1,15: OP ')'
|
||||||
|
1,15-1,16: OP ':'
|
||||||
|
1,16-1,17: NEWLINE '\n'
|
||||||
|
2,0-2,4: INDENT ' '
|
||||||
|
2,4-2,9: NAME 'print'
|
||||||
|
2,9-2,10: OP '('
|
||||||
|
2,10-2,25: STRING '"Hello, World!"'
|
||||||
|
2,25-2,26: OP ')'
|
||||||
|
2,26-2,27: NEWLINE '\n'
|
||||||
|
3,0-3,1: NL '\n'
|
||||||
|
4,0-4,0: DEDENT ''
|
||||||
|
4,0-4,9: NAME 'say_hello'
|
||||||
|
4,9-4,10: OP '('
|
||||||
|
4,10-4,11: OP ')'
|
||||||
|
4,11-4,12: NEWLINE '\n'
|
||||||
|
5,0-5,0: ENDMARKER ''
|
||||||
|
|
|
@ -530,27 +530,60 @@ def _tokenize(readline, encoding):
|
||||||
def generate_tokens(readline):
|
def generate_tokens(readline):
|
||||||
return _tokenize(readline, None)
|
return _tokenize(readline, None)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Helper error handling routines
|
||||||
|
def perror(message):
|
||||||
|
print(message, file=sys.stderr)
|
||||||
|
|
||||||
|
def error(message, filename=None, location=None):
|
||||||
|
if location:
|
||||||
|
args = (filename,) + location + (message,)
|
||||||
|
perror("%s:%d:%d: error: %s" % args)
|
||||||
|
elif filename:
|
||||||
|
perror("%s: error: %s" % (filename, message))
|
||||||
|
else:
|
||||||
|
perror("error: %s" % message)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Parse the arguments and options
|
||||||
|
parser = argparse.ArgumentParser(prog='python -m tokenize')
|
||||||
|
parser.add_argument(dest='filename', nargs='?',
|
||||||
|
metavar='filename.py',
|
||||||
|
help='the file to tokenize; defaults to stdin')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Tokenize the input
|
||||||
|
if args.filename:
|
||||||
|
filename = args.filename
|
||||||
|
with builtins.open(filename, 'rb') as f:
|
||||||
|
tokens = list(tokenize(f.readline))
|
||||||
|
else:
|
||||||
|
filename = "<stdin>"
|
||||||
|
tokens = _tokenize(sys.stdin.readline, None)
|
||||||
|
|
||||||
|
# Output the tokenization
|
||||||
|
for token in tokens:
|
||||||
|
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
|
||||||
|
print("%-20s%-15s%-15r" %
|
||||||
|
(token_range, tok_name[token.type], token.string))
|
||||||
|
except IndentationError as err:
|
||||||
|
line, column = err.args[1][1:3]
|
||||||
|
error(err.args[0], filename, (line, column))
|
||||||
|
except TokenError as err:
|
||||||
|
line, column = err.args[1]
|
||||||
|
error(err.args[0], filename, (line, column))
|
||||||
|
except SyntaxError as err:
|
||||||
|
error(err, filename)
|
||||||
|
except IOError as err:
|
||||||
|
error(err)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("interrupted\n")
|
||||||
|
except Exception as err:
|
||||||
|
perror("unexpected error: %s" % err)
|
||||||
|
raise
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Quick sanity check
|
main()
|
||||||
s = b'''def parseline(self, line):
|
|
||||||
"""Parse the line into a command name and a string containing
|
|
||||||
the arguments. Returns a tuple containing (command, args, line).
|
|
||||||
'command' and 'args' may be None if the line couldn't be parsed.
|
|
||||||
"""
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
return None, None, line
|
|
||||||
elif line[0] == '?':
|
|
||||||
line = 'help ' + line[1:]
|
|
||||||
elif line[0] == '!':
|
|
||||||
if hasattr(self, 'do_shell'):
|
|
||||||
line = 'shell ' + line[1:]
|
|
||||||
else:
|
|
||||||
return None, None, line
|
|
||||||
i, n = 0, len(line)
|
|
||||||
while i < n and line[i] in self.identchars: i = i+1
|
|
||||||
cmd, arg = line[:i], line[i:].strip()
|
|
||||||
return cmd, arg, line
|
|
||||||
'''
|
|
||||||
for tok in tokenize(iter(s.splitlines()).__next__):
|
|
||||||
print(tok)
|
|
||||||
|
|
|
@ -2520,6 +2520,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #12943: python -m tokenize support has been added to tokenize.
|
||||||
|
|
||||||
- Issue #10465: fix broken delegating of attributes by gzip._PaddedFile.
|
- Issue #10465: fix broken delegating of attributes by gzip._PaddedFile.
|
||||||
|
|
||||||
- Issue #10356: Decimal.__hash__(-1) should return -2.
|
- Issue #10356: Decimal.__hash__(-1) should return -2.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue