mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
		
			
				
	
	
		
			86 lines
		
	
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import token
 | 
						|
import tokenize
 | 
						|
from typing import List, Iterator
 | 
						|
 | 
						|
Mark = int  # NewType('Mark', int)
 | 
						|
 | 
						|
exact_token_types = token.EXACT_TOKEN_TYPES  # type: ignore
 | 
						|
 | 
						|
 | 
						|
def shorttok(tok: tokenize.TokenInfo) -> str:
 | 
						|
    return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
 | 
						|
 | 
						|
 | 
						|
class Tokenizer:
 | 
						|
    """Caching wrapper for the tokenize module.
 | 
						|
 | 
						|
    This is pretty tied to Python's syntax.
 | 
						|
    """
 | 
						|
 | 
						|
    _tokens: List[tokenize.TokenInfo]
 | 
						|
 | 
						|
    def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
 | 
						|
        self._tokengen = tokengen
 | 
						|
        self._tokens = []
 | 
						|
        self._index = 0
 | 
						|
        self._verbose = verbose
 | 
						|
        if verbose:
 | 
						|
            self.report(False, False)
 | 
						|
 | 
						|
    def getnext(self) -> tokenize.TokenInfo:
 | 
						|
        """Return the next token and updates the index."""
 | 
						|
        cached = True
 | 
						|
        while self._index == len(self._tokens):
 | 
						|
            tok = next(self._tokengen)
 | 
						|
            if tok.type in (tokenize.NL, tokenize.COMMENT):
 | 
						|
                continue
 | 
						|
            if tok.type == token.ERRORTOKEN and tok.string.isspace():
 | 
						|
                continue
 | 
						|
            self._tokens.append(tok)
 | 
						|
            cached = False
 | 
						|
        tok = self._tokens[self._index]
 | 
						|
        self._index += 1
 | 
						|
        if self._verbose:
 | 
						|
            self.report(cached, False)
 | 
						|
        return tok
 | 
						|
 | 
						|
    def peek(self) -> tokenize.TokenInfo:
 | 
						|
        """Return the next token *without* updating the index."""
 | 
						|
        while self._index == len(self._tokens):
 | 
						|
            tok = next(self._tokengen)
 | 
						|
            if tok.type in (tokenize.NL, tokenize.COMMENT):
 | 
						|
                continue
 | 
						|
            if tok.type == token.ERRORTOKEN and tok.string.isspace():
 | 
						|
                continue
 | 
						|
            self._tokens.append(tok)
 | 
						|
        return self._tokens[self._index]
 | 
						|
 | 
						|
    def diagnose(self) -> tokenize.TokenInfo:
 | 
						|
        if not self._tokens:
 | 
						|
            self.getnext()
 | 
						|
        return self._tokens[-1]
 | 
						|
 | 
						|
    def mark(self) -> Mark:
 | 
						|
        return self._index
 | 
						|
 | 
						|
    def reset(self, index: Mark) -> None:
 | 
						|
        if index == self._index:
 | 
						|
            return
 | 
						|
        assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
 | 
						|
        old_index = self._index
 | 
						|
        self._index = index
 | 
						|
        if self._verbose:
 | 
						|
            self.report(True, index < old_index)
 | 
						|
 | 
						|
    def report(self, cached: bool, back: bool) -> None:
 | 
						|
        if back:
 | 
						|
            fill = "-" * self._index + "-"
 | 
						|
        elif cached:
 | 
						|
            fill = "-" * self._index + ">"
 | 
						|
        else:
 | 
						|
            fill = "-" * self._index + "*"
 | 
						|
        if self._index == 0:
 | 
						|
            print(f"{fill} (Bof)")
 | 
						|
        else:
 | 
						|
            tok = self._tokens[self._index - 1]
 | 
						|
            print(f"{fill} {shorttok(tok)}")
 |