mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			118 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			118 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import token
 | |
| import tokenize
 | |
| from typing import Dict, Iterator, List
 | |
| 
 | |
| Mark = int  # NewType('Mark', int)
 | |
| 
 | |
| exact_token_types = token.EXACT_TOKEN_TYPES
 | |
| 
 | |
| 
 | |
| def shorttok(tok: tokenize.TokenInfo) -> str:
 | |
|     return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
 | |
| 
 | |
| 
 | |
| class Tokenizer:
 | |
|     """Caching wrapper for the tokenize module.
 | |
| 
 | |
|     This is pretty tied to Python's syntax.
 | |
|     """
 | |
| 
 | |
|     _tokens: List[tokenize.TokenInfo]
 | |
| 
 | |
|     def __init__(
 | |
|         self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
 | |
|     ):
 | |
|         self._tokengen = tokengen
 | |
|         self._tokens = []
 | |
|         self._index = 0
 | |
|         self._verbose = verbose
 | |
|         self._lines: Dict[int, str] = {}
 | |
|         self._path = path
 | |
|         if verbose:
 | |
|             self.report(False, False)
 | |
| 
 | |
|     def getnext(self) -> tokenize.TokenInfo:
 | |
|         """Return the next token and updates the index."""
 | |
|         cached = not self._index == len(self._tokens)
 | |
|         tok = self.peek()
 | |
|         self._index += 1
 | |
|         if self._verbose:
 | |
|             self.report(cached, False)
 | |
|         return tok
 | |
| 
 | |
|     def peek(self) -> tokenize.TokenInfo:
 | |
|         """Return the next token *without* updating the index."""
 | |
|         while self._index == len(self._tokens):
 | |
|             tok = next(self._tokengen)
 | |
|             if tok.type in (tokenize.NL, tokenize.COMMENT):
 | |
|                 continue
 | |
|             if tok.type == token.ERRORTOKEN and tok.string.isspace():
 | |
|                 continue
 | |
|             if (
 | |
|                 tok.type == token.NEWLINE
 | |
|                 and self._tokens
 | |
|                 and self._tokens[-1].type == token.NEWLINE
 | |
|             ):
 | |
|                 continue
 | |
|             self._tokens.append(tok)
 | |
|             if not self._path:
 | |
|                 self._lines[tok.start[0]] = tok.line
 | |
|         return self._tokens[self._index]
 | |
| 
 | |
|     def diagnose(self) -> tokenize.TokenInfo:
 | |
|         if not self._tokens:
 | |
|             self.getnext()
 | |
|         return self._tokens[-1]
 | |
| 
 | |
|     def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
 | |
|         for tok in reversed(self._tokens[: self._index]):
 | |
|             if tok.type != tokenize.ENDMARKER and (
 | |
|                 tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
 | |
|             ):
 | |
|                 break
 | |
|         return tok
 | |
| 
 | |
|     def get_lines(self, line_numbers: List[int]) -> List[str]:
 | |
|         """Retrieve source lines corresponding to line numbers."""
 | |
|         if self._lines:
 | |
|             lines = self._lines
 | |
|         else:
 | |
|             n = len(line_numbers)
 | |
|             lines = {}
 | |
|             count = 0
 | |
|             seen = 0
 | |
|             with open(self._path) as f:
 | |
|                 for l in f:
 | |
|                     count += 1
 | |
|                     if count in line_numbers:
 | |
|                         seen += 1
 | |
|                         lines[count] = l
 | |
|                         if seen == n:
 | |
|                             break
 | |
| 
 | |
|         return [lines[n] for n in line_numbers]
 | |
| 
 | |
|     def mark(self) -> Mark:
 | |
|         return self._index
 | |
| 
 | |
|     def reset(self, index: Mark) -> None:
 | |
|         if index == self._index:
 | |
|             return
 | |
|         assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
 | |
|         old_index = self._index
 | |
|         self._index = index
 | |
|         if self._verbose:
 | |
|             self.report(True, index < old_index)
 | |
| 
 | |
|     def report(self, cached: bool, back: bool) -> None:
 | |
|         if back:
 | |
|             fill = "-" * self._index + "-"
 | |
|         elif cached:
 | |
|             fill = "-" * self._index + ">"
 | |
|         else:
 | |
|             fill = "-" * self._index + "*"
 | |
|         if self._index == 0:
 | |
|             print(f"{fill} (Bof)")
 | |
|         else:
 | |
|             tok = self._tokens[self._index - 1]
 | |
|             print(f"{fill} {shorttok(tok)}")
 | 
