17db96d56Sopenharmony_ciimport token 27db96d56Sopenharmony_ciimport tokenize 37db96d56Sopenharmony_cifrom typing import Dict, Iterator, List 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ciMark = int # NewType('Mark', int) 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ciexact_token_types = token.EXACT_TOKEN_TYPES 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_cidef shorttok(tok: tokenize.TokenInfo) -> str: 117db96d56Sopenharmony_ci return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ciclass Tokenizer: 157db96d56Sopenharmony_ci """Caching wrapper for the tokenize module. 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci This is pretty tied to Python's syntax. 187db96d56Sopenharmony_ci """ 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci _tokens: List[tokenize.TokenInfo] 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci def __init__( 237db96d56Sopenharmony_ci self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False 247db96d56Sopenharmony_ci ): 257db96d56Sopenharmony_ci self._tokengen = tokengen 267db96d56Sopenharmony_ci self._tokens = [] 277db96d56Sopenharmony_ci self._index = 0 287db96d56Sopenharmony_ci self._verbose = verbose 297db96d56Sopenharmony_ci self._lines: Dict[int, str] = {} 307db96d56Sopenharmony_ci self._path = path 317db96d56Sopenharmony_ci if verbose: 327db96d56Sopenharmony_ci self.report(False, False) 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_ci def getnext(self) -> tokenize.TokenInfo: 357db96d56Sopenharmony_ci """Return the next token and updates the index.""" 367db96d56Sopenharmony_ci cached = not self._index == len(self._tokens) 377db96d56Sopenharmony_ci tok = self.peek() 387db96d56Sopenharmony_ci self._index += 1 397db96d56Sopenharmony_ci if self._verbose: 407db96d56Sopenharmony_ci self.report(cached, False) 417db96d56Sopenharmony_ci return tok 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci def peek(self) -> tokenize.TokenInfo: 447db96d56Sopenharmony_ci """Return the next token *without* updating the index.""" 457db96d56Sopenharmony_ci while self._index == len(self._tokens): 467db96d56Sopenharmony_ci tok = next(self._tokengen) 477db96d56Sopenharmony_ci if tok.type in (tokenize.NL, tokenize.COMMENT): 487db96d56Sopenharmony_ci continue 497db96d56Sopenharmony_ci if tok.type == token.ERRORTOKEN and tok.string.isspace(): 507db96d56Sopenharmony_ci continue 517db96d56Sopenharmony_ci if ( 527db96d56Sopenharmony_ci tok.type == token.NEWLINE 537db96d56Sopenharmony_ci and self._tokens 547db96d56Sopenharmony_ci and self._tokens[-1].type == token.NEWLINE 557db96d56Sopenharmony_ci ): 567db96d56Sopenharmony_ci continue 577db96d56Sopenharmony_ci self._tokens.append(tok) 587db96d56Sopenharmony_ci if not self._path: 597db96d56Sopenharmony_ci self._lines[tok.start[0]] = tok.line 607db96d56Sopenharmony_ci return self._tokens[self._index] 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci def diagnose(self) -> tokenize.TokenInfo: 637db96d56Sopenharmony_ci if not self._tokens: 647db96d56Sopenharmony_ci self.getnext() 657db96d56Sopenharmony_ci return self._tokens[-1] 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: 687db96d56Sopenharmony_ci for tok in reversed(self._tokens[: self._index]): 697db96d56Sopenharmony_ci if tok.type != tokenize.ENDMARKER and ( 707db96d56Sopenharmony_ci tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT 717db96d56Sopenharmony_ci ): 727db96d56Sopenharmony_ci break 737db96d56Sopenharmony_ci return tok 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci def get_lines(self, line_numbers: List[int]) -> List[str]: 767db96d56Sopenharmony_ci """Retrieve source lines corresponding to line numbers.""" 777db96d56Sopenharmony_ci if self._lines: 787db96d56Sopenharmony_ci lines = self._lines 797db96d56Sopenharmony_ci else: 807db96d56Sopenharmony_ci n = len(line_numbers) 817db96d56Sopenharmony_ci lines = {} 827db96d56Sopenharmony_ci count = 0 837db96d56Sopenharmony_ci seen = 0 847db96d56Sopenharmony_ci with open(self._path) as f: 857db96d56Sopenharmony_ci for l in f: 867db96d56Sopenharmony_ci count += 1 877db96d56Sopenharmony_ci if count in line_numbers: 887db96d56Sopenharmony_ci seen += 1 897db96d56Sopenharmony_ci lines[count] = l 907db96d56Sopenharmony_ci if seen == n: 917db96d56Sopenharmony_ci break 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_ci return [lines[n] for n in line_numbers] 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ci def mark(self) -> Mark: 967db96d56Sopenharmony_ci return self._index 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci def reset(self, index: Mark) -> None: 997db96d56Sopenharmony_ci if index == self._index: 1007db96d56Sopenharmony_ci return 1017db96d56Sopenharmony_ci assert 0 <= index <= len(self._tokens), (index, len(self._tokens)) 1027db96d56Sopenharmony_ci old_index = self._index 1037db96d56Sopenharmony_ci self._index = index 1047db96d56Sopenharmony_ci if self._verbose: 1057db96d56Sopenharmony_ci self.report(True, index < old_index) 1067db96d56Sopenharmony_ci 1077db96d56Sopenharmony_ci def report(self, cached: bool, back: bool) -> None: 1087db96d56Sopenharmony_ci if back: 1097db96d56Sopenharmony_ci fill = "-" * self._index + "-" 1107db96d56Sopenharmony_ci elif cached: 1117db96d56Sopenharmony_ci fill = "-" * self._index + ">" 1127db96d56Sopenharmony_ci else: 1137db96d56Sopenharmony_ci fill = "-" * self._index + "*" 1147db96d56Sopenharmony_ci if self._index == 0: 1157db96d56Sopenharmony_ci print(f"{fill} (Bof)") 1167db96d56Sopenharmony_ci else: 1177db96d56Sopenharmony_ci tok = self._tokens[self._index - 1] 1187db96d56Sopenharmony_ci print(f"{fill} {shorttok(tok)}") 119