1import token
2import tokenize
3from typing import Dict, Iterator, List
4
5Mark = int  # NewType('Mark', int)
6
7exact_token_types = token.EXACT_TOKEN_TYPES
8
9
10def shorttok(tok: tokenize.TokenInfo) -> str:
11    return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
12
13
14class Tokenizer:
15    """Caching wrapper for the tokenize module.
16
17    This is pretty tied to Python's syntax.
18    """
19
20    _tokens: List[tokenize.TokenInfo]
21
22    def __init__(
23        self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
24    ):
25        self._tokengen = tokengen
26        self._tokens = []
27        self._index = 0
28        self._verbose = verbose
29        self._lines: Dict[int, str] = {}
30        self._path = path
31        if verbose:
32            self.report(False, False)
33
34    def getnext(self) -> tokenize.TokenInfo:
35        """Return the next token and updates the index."""
36        cached = not self._index == len(self._tokens)
37        tok = self.peek()
38        self._index += 1
39        if self._verbose:
40            self.report(cached, False)
41        return tok
42
43    def peek(self) -> tokenize.TokenInfo:
44        """Return the next token *without* updating the index."""
45        while self._index == len(self._tokens):
46            tok = next(self._tokengen)
47            if tok.type in (tokenize.NL, tokenize.COMMENT):
48                continue
49            if tok.type == token.ERRORTOKEN and tok.string.isspace():
50                continue
51            if (
52                tok.type == token.NEWLINE
53                and self._tokens
54                and self._tokens[-1].type == token.NEWLINE
55            ):
56                continue
57            self._tokens.append(tok)
58            if not self._path:
59                self._lines[tok.start[0]] = tok.line
60        return self._tokens[self._index]
61
62    def diagnose(self) -> tokenize.TokenInfo:
63        if not self._tokens:
64            self.getnext()
65        return self._tokens[-1]
66
67    def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
68        for tok in reversed(self._tokens[: self._index]):
69            if tok.type != tokenize.ENDMARKER and (
70                tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
71            ):
72                break
73        return tok
74
75    def get_lines(self, line_numbers: List[int]) -> List[str]:
76        """Retrieve source lines corresponding to line numbers."""
77        if self._lines:
78            lines = self._lines
79        else:
80            n = len(line_numbers)
81            lines = {}
82            count = 0
83            seen = 0
84            with open(self._path) as f:
85                for l in f:
86                    count += 1
87                    if count in line_numbers:
88                        seen += 1
89                        lines[count] = l
90                        if seen == n:
91                            break
92
93        return [lines[n] for n in line_numbers]
94
95    def mark(self) -> Mark:
96        return self._index
97
98    def reset(self, index: Mark) -> None:
99        if index == self._index:
100            return
101        assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
102        old_index = self._index
103        self._index = index
104        if self._verbose:
105            self.report(True, index < old_index)
106
107    def report(self, cached: bool, back: bool) -> None:
108        if back:
109            fill = "-" * self._index + "-"
110        elif cached:
111            fill = "-" * self._index + ">"
112        else:
113            fill = "-" * self._index + "*"
114        if self._index == 0:
115            print(f"{fill} (Bof)")
116        else:
117            tok = self._tokens[self._index - 1]
118            print(f"{fill} {shorttok(tok)}")
119