17db96d56Sopenharmony_ciimport token
27db96d56Sopenharmony_ciimport tokenize
37db96d56Sopenharmony_cifrom typing import Dict, Iterator, List
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ciMark = int  # NewType('Mark', int)
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ciexact_token_types = token.EXACT_TOKEN_TYPES
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_cidef shorttok(tok: tokenize.TokenInfo) -> str:
117db96d56Sopenharmony_ci    return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ciclass Tokenizer:
157db96d56Sopenharmony_ci    """Caching wrapper for the tokenize module.
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci    This is pretty tied to Python's syntax.
187db96d56Sopenharmony_ci    """
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ci    _tokens: List[tokenize.TokenInfo]
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ci    def __init__(
237db96d56Sopenharmony_ci        self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
247db96d56Sopenharmony_ci    ):
257db96d56Sopenharmony_ci        self._tokengen = tokengen
267db96d56Sopenharmony_ci        self._tokens = []
277db96d56Sopenharmony_ci        self._index = 0
287db96d56Sopenharmony_ci        self._verbose = verbose
297db96d56Sopenharmony_ci        self._lines: Dict[int, str] = {}
307db96d56Sopenharmony_ci        self._path = path
317db96d56Sopenharmony_ci        if verbose:
327db96d56Sopenharmony_ci            self.report(False, False)
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci    def getnext(self) -> tokenize.TokenInfo:
357db96d56Sopenharmony_ci        """Return the next token and updates the index."""
367db96d56Sopenharmony_ci        cached = not self._index == len(self._tokens)
377db96d56Sopenharmony_ci        tok = self.peek()
387db96d56Sopenharmony_ci        self._index += 1
397db96d56Sopenharmony_ci        if self._verbose:
407db96d56Sopenharmony_ci            self.report(cached, False)
417db96d56Sopenharmony_ci        return tok
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci    def peek(self) -> tokenize.TokenInfo:
447db96d56Sopenharmony_ci        """Return the next token *without* updating the index."""
457db96d56Sopenharmony_ci        while self._index == len(self._tokens):
467db96d56Sopenharmony_ci            tok = next(self._tokengen)
477db96d56Sopenharmony_ci            if tok.type in (tokenize.NL, tokenize.COMMENT):
487db96d56Sopenharmony_ci                continue
497db96d56Sopenharmony_ci            if tok.type == token.ERRORTOKEN and tok.string.isspace():
507db96d56Sopenharmony_ci                continue
517db96d56Sopenharmony_ci            if (
527db96d56Sopenharmony_ci                tok.type == token.NEWLINE
537db96d56Sopenharmony_ci                and self._tokens
547db96d56Sopenharmony_ci                and self._tokens[-1].type == token.NEWLINE
557db96d56Sopenharmony_ci            ):
567db96d56Sopenharmony_ci                continue
577db96d56Sopenharmony_ci            self._tokens.append(tok)
587db96d56Sopenharmony_ci            if not self._path:
597db96d56Sopenharmony_ci                self._lines[tok.start[0]] = tok.line
607db96d56Sopenharmony_ci        return self._tokens[self._index]
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci    def diagnose(self) -> tokenize.TokenInfo:
637db96d56Sopenharmony_ci        if not self._tokens:
647db96d56Sopenharmony_ci            self.getnext()
657db96d56Sopenharmony_ci        return self._tokens[-1]
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci    def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
687db96d56Sopenharmony_ci        for tok in reversed(self._tokens[: self._index]):
697db96d56Sopenharmony_ci            if tok.type != tokenize.ENDMARKER and (
707db96d56Sopenharmony_ci                tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
717db96d56Sopenharmony_ci            ):
727db96d56Sopenharmony_ci                break
737db96d56Sopenharmony_ci        return tok
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    def get_lines(self, line_numbers: List[int]) -> List[str]:
767db96d56Sopenharmony_ci        """Retrieve source lines corresponding to line numbers."""
777db96d56Sopenharmony_ci        if self._lines:
787db96d56Sopenharmony_ci            lines = self._lines
797db96d56Sopenharmony_ci        else:
807db96d56Sopenharmony_ci            n = len(line_numbers)
817db96d56Sopenharmony_ci            lines = {}
827db96d56Sopenharmony_ci            count = 0
837db96d56Sopenharmony_ci            seen = 0
847db96d56Sopenharmony_ci            with open(self._path) as f:
857db96d56Sopenharmony_ci                for l in f:
867db96d56Sopenharmony_ci                    count += 1
877db96d56Sopenharmony_ci                    if count in line_numbers:
887db96d56Sopenharmony_ci                        seen += 1
897db96d56Sopenharmony_ci                        lines[count] = l
907db96d56Sopenharmony_ci                        if seen == n:
917db96d56Sopenharmony_ci                            break
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci        return [lines[n] for n in line_numbers]
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci    def mark(self) -> Mark:
967db96d56Sopenharmony_ci        return self._index
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    def reset(self, index: Mark) -> None:
997db96d56Sopenharmony_ci        if index == self._index:
1007db96d56Sopenharmony_ci            return
1017db96d56Sopenharmony_ci        assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
1027db96d56Sopenharmony_ci        old_index = self._index
1037db96d56Sopenharmony_ci        self._index = index
1047db96d56Sopenharmony_ci        if self._verbose:
1057db96d56Sopenharmony_ci            self.report(True, index < old_index)
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci    def report(self, cached: bool, back: bool) -> None:
1087db96d56Sopenharmony_ci        if back:
1097db96d56Sopenharmony_ci            fill = "-" * self._index + "-"
1107db96d56Sopenharmony_ci        elif cached:
1117db96d56Sopenharmony_ci            fill = "-" * self._index + ">"
1127db96d56Sopenharmony_ci        else:
1137db96d56Sopenharmony_ci            fill = "-" * self._index + "*"
1147db96d56Sopenharmony_ci        if self._index == 0:
1157db96d56Sopenharmony_ci            print(f"{fill} (Bof)")
1167db96d56Sopenharmony_ci        else:
1177db96d56Sopenharmony_ci            tok = self._tokens[self._index - 1]
1187db96d56Sopenharmony_ci            print(f"{fill} {shorttok(tok)}")
119