1import token 2import tokenize 3from typing import Dict, Iterator, List 4 5Mark = int # NewType('Mark', int) 6 7exact_token_types = token.EXACT_TOKEN_TYPES 8 9 10def shorttok(tok: tokenize.TokenInfo) -> str: 11 return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" 12 13 14class Tokenizer: 15 """Caching wrapper for the tokenize module. 16 17 This is pretty tied to Python's syntax. 18 """ 19 20 _tokens: List[tokenize.TokenInfo] 21 22 def __init__( 23 self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False 24 ): 25 self._tokengen = tokengen 26 self._tokens = [] 27 self._index = 0 28 self._verbose = verbose 29 self._lines: Dict[int, str] = {} 30 self._path = path 31 if verbose: 32 self.report(False, False) 33 34 def getnext(self) -> tokenize.TokenInfo: 35 """Return the next token and updates the index.""" 36 cached = not self._index == len(self._tokens) 37 tok = self.peek() 38 self._index += 1 39 if self._verbose: 40 self.report(cached, False) 41 return tok 42 43 def peek(self) -> tokenize.TokenInfo: 44 """Return the next token *without* updating the index.""" 45 while self._index == len(self._tokens): 46 tok = next(self._tokengen) 47 if tok.type in (tokenize.NL, tokenize.COMMENT): 48 continue 49 if tok.type == token.ERRORTOKEN and tok.string.isspace(): 50 continue 51 if ( 52 tok.type == token.NEWLINE 53 and self._tokens 54 and self._tokens[-1].type == token.NEWLINE 55 ): 56 continue 57 self._tokens.append(tok) 58 if not self._path: 59 self._lines[tok.start[0]] = tok.line 60 return self._tokens[self._index] 61 62 def diagnose(self) -> tokenize.TokenInfo: 63 if not self._tokens: 64 self.getnext() 65 return self._tokens[-1] 66 67 def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: 68 for tok in reversed(self._tokens[: self._index]): 69 if tok.type != tokenize.ENDMARKER and ( 70 tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT 71 ): 72 break 73 return tok 74 75 def get_lines(self, line_numbers: List[int]) -> List[str]: 76 """Retrieve source lines corresponding to line numbers.""" 77 if self._lines: 78 lines = self._lines 79 else: 80 n = len(line_numbers) 81 lines = {} 82 count = 0 83 seen = 0 84 with open(self._path) as f: 85 for l in f: 86 count += 1 87 if count in line_numbers: 88 seen += 1 89 lines[count] = l 90 if seen == n: 91 break 92 93 return [lines[n] for n in line_numbers] 94 95 def mark(self) -> Mark: 96 return self._index 97 98 def reset(self, index: Mark) -> None: 99 if index == self._index: 100 return 101 assert 0 <= index <= len(self._tokens), (index, len(self._tokens)) 102 old_index = self._index 103 self._index = index 104 if self._verbose: 105 self.report(True, index < old_index) 106 107 def report(self, cached: bool, back: bool) -> None: 108 if back: 109 fill = "-" * self._index + "-" 110 elif cached: 111 fill = "-" * self._index + ">" 112 else: 113 fill = "-" * self._index + "*" 114 if self._index == 0: 115 print(f"{fill} (Bof)") 116 else: 117 tok = self._tokens[self._index - 1] 118 print(f"{fill} {shorttok(tok)}") 119