Issue #2134: Add support for tokenize.TokenInfo.exact_type.

2025-11-25 04:34:37 +00:00 · 2012-01-19 00:44:45 -06:00 · 2012-01-19 00:44:45 -06:00 · 00c7f85298
commit 00c7f85298
parent 3f67ec1afd
4 changed files with 187 additions and 3 deletions
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -45,6 +45,51 @@ tok_name[NL] = 'NL'
 ENCODING = N_TOKENS + 2
 tok_name[ENCODING] = 'ENCODING'
 N_TOKENS += 3
+EXACT_TOKEN_TYPES = {
+    '(':   LPAR,
+    ')':   RPAR,
+    '[':   LSQB,
+    ']':   RSQB,
+    ':':   COLON,
+    ',':   COMMA,
+    ';':   SEMI,
+    '+':   PLUS,
+    '-':   MINUS,
+    '*':   STAR,
+    '/':   SLASH,
+    '|':   VBAR,
+    '&':   AMPER,
+    '<':   LESS,
+    '>':   GREATER,
+    '=':   EQUAL,
+    '.':   DOT,
+    '%':   PERCENT,
+    '{':   LBRACE,
+    '}':   RBRACE,
+    '==':  EQEQUAL,
+    '!=':  NOTEQUAL,
+    '<=':  LESSEQUAL,
+    '>=':  GREATEREQUAL,
+    '~':   TILDE,
+    '^':   CIRCUMFLEX,
+    '<<':  LEFTSHIFT,
+    '>>':  RIGHTSHIFT,
+    '**':  DOUBLESTAR,
+    '+=':  PLUSEQUAL,
+    '-=':  MINEQUAL,
+    '*=':  STAREQUAL,
+    '/=':  SLASHEQUAL,
+    '%=':  PERCENTEQUAL,
+    '&=':  AMPEREQUAL,
+    '|=':  VBAREQUAL,
+    '^=': CIRCUMFLEXEQUAL,
+    '<<=': LEFTSHIFTEQUAL,
+    '>>=': RIGHTSHIFTEQUAL,
+    '**=': DOUBLESTAREQUAL,
+    '//':  DOUBLESLASH,
+    '//=': DOUBLESLASHEQUAL,
+    '@':   AT
+}

 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
    def __repr__(self):
@ -52,6 +97,13 @@ class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line'
        return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
                self._replace(type=annotated_type))

+    @property
+    def exact_type(self):
+        if self.type == OP and self.string in EXACT_TOKEN_TYPES:
+            return EXACT_TOKEN_TYPES[self.string]
+        else:
+            return self.type
+
 def group(*choices): return '(' + '|'.join(choices) + ')'
 def any(*choices): return group(*choices) + '*'
 def maybe(*choices): return group(*choices) + '?'
@ -549,6 +601,8 @@ def main():
    parser.add_argument(dest='filename', nargs='?',
                        metavar='filename.py',
                        help='the file to tokenize; defaults to stdin')
+    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
+                        help='display token names using the exact type')
    args = parser.parse_args()

    try:
@ -563,9 +617,12 @@ def main():

        # Output the tokenization
        for token in tokens:
+            token_type = token.type
+            if args.exact:
+                token_type = token.exact_type
            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token.type], token.string))
+                  (token_range, tok_name[token_type], token.string))
    except IndentationError as err:
        line, column = err.args[1][1:3]
        error(err.args[0], filename, (line, column))