xref: /third_party/python/Lib/tokenize.py (revision 7db96d56)
17db96d56Sopenharmony_ci"""Tokenization help for Python programs.
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_citokenize(readline) is a generator that breaks a stream of bytes into
47db96d56Sopenharmony_ciPython tokens.  It decodes the bytes according to PEP-0263 for
57db96d56Sopenharmony_cidetermining source file encoding.
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ciIt accepts a readline-like method which is called repeatedly to get the
87db96d56Sopenharmony_cinext line of input (or b"" for EOF).  It generates 5-tuples with these
97db96d56Sopenharmony_cimembers:
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ci    the token type (see token.py)
127db96d56Sopenharmony_ci    the token (a string)
137db96d56Sopenharmony_ci    the starting (row, column) indices of the token (a 2-tuple of ints)
147db96d56Sopenharmony_ci    the ending (row, column) indices of the token (a 2-tuple of ints)
157db96d56Sopenharmony_ci    the original line (string)
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ciIt is designed to match the working of the Python tokenizer exactly, except
187db96d56Sopenharmony_cithat it produces COMMENT tokens for comments and gives type OP for all
197db96d56Sopenharmony_cioperators.  Additionally, all token lists start with an ENCODING token
207db96d56Sopenharmony_ciwhich tells you which encoding was used to decode the bytes stream.
217db96d56Sopenharmony_ci"""
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci__author__ = 'Ka-Ping Yee <ping@lfw.org>'
247db96d56Sopenharmony_ci__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
257db96d56Sopenharmony_ci               'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
267db96d56Sopenharmony_ci               'Michael Foord')
277db96d56Sopenharmony_cifrom builtins import open as _builtin_open
287db96d56Sopenharmony_cifrom codecs import lookup, BOM_UTF8
297db96d56Sopenharmony_ciimport collections
307db96d56Sopenharmony_ciimport functools
317db96d56Sopenharmony_cifrom io import TextIOWrapper
327db96d56Sopenharmony_ciimport itertools as _itertools
337db96d56Sopenharmony_ciimport re
347db96d56Sopenharmony_ciimport sys
357db96d56Sopenharmony_cifrom token import *
367db96d56Sopenharmony_cifrom token import EXACT_TOKEN_TYPES
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_cicookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
397db96d56Sopenharmony_ciblank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ciimport token
427db96d56Sopenharmony_ci__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
437db96d56Sopenharmony_ci                           "untokenize", "TokenInfo"]
447db96d56Sopenharmony_cidel token
457db96d56Sopenharmony_ci
467db96d56Sopenharmony_ciclass TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
477db96d56Sopenharmony_ci    def __repr__(self):
487db96d56Sopenharmony_ci        annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
497db96d56Sopenharmony_ci        return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
507db96d56Sopenharmony_ci                self._replace(type=annotated_type))
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci    @property
537db96d56Sopenharmony_ci    def exact_type(self):
547db96d56Sopenharmony_ci        if self.type == OP and self.string in EXACT_TOKEN_TYPES:
557db96d56Sopenharmony_ci            return EXACT_TOKEN_TYPES[self.string]
567db96d56Sopenharmony_ci        else:
577db96d56Sopenharmony_ci            return self.type
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_cidef group(*choices): return '(' + '|'.join(choices) + ')'
607db96d56Sopenharmony_cidef any(*choices): return group(*choices) + '*'
617db96d56Sopenharmony_cidef maybe(*choices): return group(*choices) + '?'
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci# Note: we use unicode matching for names ("\w") but ascii matching for
647db96d56Sopenharmony_ci# number literals.
657db96d56Sopenharmony_ciWhitespace = r'[ \f\t]*'
667db96d56Sopenharmony_ciComment = r'#[^\r\n]*'
677db96d56Sopenharmony_ciIgnore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
687db96d56Sopenharmony_ciName = r'\w+'
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ciHexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
717db96d56Sopenharmony_ciBinnumber = r'0[bB](?:_?[01])+'
727db96d56Sopenharmony_ciOctnumber = r'0[oO](?:_?[0-7])+'
737db96d56Sopenharmony_ciDecnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
747db96d56Sopenharmony_ciIntnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
757db96d56Sopenharmony_ciExponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
767db96d56Sopenharmony_ciPointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
777db96d56Sopenharmony_ci                   r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
787db96d56Sopenharmony_ciExpfloat = r'[0-9](?:_?[0-9])*' + Exponent
797db96d56Sopenharmony_ciFloatnumber = group(Pointfloat, Expfloat)
807db96d56Sopenharmony_ciImagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
817db96d56Sopenharmony_ciNumber = group(Imagnumber, Floatnumber, Intnumber)
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_ci# Return the empty string, plus all of the valid string prefixes.
847db96d56Sopenharmony_cidef _all_string_prefixes():
857db96d56Sopenharmony_ci    # The valid string prefixes. Only contain the lower case versions,
867db96d56Sopenharmony_ci    #  and don't contain any permutations (include 'fr', but not
877db96d56Sopenharmony_ci    #  'rf'). The various permutations will be generated.
887db96d56Sopenharmony_ci    _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
897db96d56Sopenharmony_ci    # if we add binary f-strings, add: ['fb', 'fbr']
907db96d56Sopenharmony_ci    result = {''}
917db96d56Sopenharmony_ci    for prefix in _valid_string_prefixes:
927db96d56Sopenharmony_ci        for t in _itertools.permutations(prefix):
937db96d56Sopenharmony_ci            # create a list with upper and lower versions of each
947db96d56Sopenharmony_ci            #  character
957db96d56Sopenharmony_ci            for u in _itertools.product(*[(c, c.upper()) for c in t]):
967db96d56Sopenharmony_ci                result.add(''.join(u))
977db96d56Sopenharmony_ci    return result
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci@functools.lru_cache
1007db96d56Sopenharmony_cidef _compile(expr):
1017db96d56Sopenharmony_ci    return re.compile(expr, re.UNICODE)
1027db96d56Sopenharmony_ci
1037db96d56Sopenharmony_ci# Note that since _all_string_prefixes includes the empty string,
1047db96d56Sopenharmony_ci#  StringPrefix can be the empty string (making it optional).
1057db96d56Sopenharmony_ciStringPrefix = group(*_all_string_prefixes())
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci# Tail end of ' string.
1087db96d56Sopenharmony_ciSingle = r"[^'\\]*(?:\\.[^'\\]*)*'"
1097db96d56Sopenharmony_ci# Tail end of " string.
1107db96d56Sopenharmony_ciDouble = r'[^"\\]*(?:\\.[^"\\]*)*"'
1117db96d56Sopenharmony_ci# Tail end of ''' string.
1127db96d56Sopenharmony_ciSingle3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
1137db96d56Sopenharmony_ci# Tail end of """ string.
1147db96d56Sopenharmony_ciDouble3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
1157db96d56Sopenharmony_ciTriple = group(StringPrefix + "'''", StringPrefix + '"""')
1167db96d56Sopenharmony_ci# Single-line ' or " string.
1177db96d56Sopenharmony_ciString = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
1187db96d56Sopenharmony_ci               StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci# Sorting in reverse order puts the long operators before their prefixes.
1217db96d56Sopenharmony_ci# Otherwise if = came before ==, == would get recognized as two instances
1227db96d56Sopenharmony_ci# of =.
1237db96d56Sopenharmony_ciSpecial = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
1247db96d56Sopenharmony_ciFunny = group(r'\r?\n', Special)
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ciPlainToken = group(Number, Funny, String, Name)
1277db96d56Sopenharmony_ciToken = Ignore + PlainToken
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci# First (or only) line of ' or " string.
1307db96d56Sopenharmony_ciContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
1317db96d56Sopenharmony_ci                group("'", r'\\\r?\n'),
1327db96d56Sopenharmony_ci                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
1337db96d56Sopenharmony_ci                group('"', r'\\\r?\n'))
1347db96d56Sopenharmony_ciPseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
1357db96d56Sopenharmony_ciPseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
1367db96d56Sopenharmony_ci
1377db96d56Sopenharmony_ci# For a given string prefix plus quotes, endpats maps it to a regex
1387db96d56Sopenharmony_ci#  to match the remainder of that string. _prefix can be empty, for
1397db96d56Sopenharmony_ci#  a normal single or triple quoted string (with no prefix).
1407db96d56Sopenharmony_ciendpats = {}
1417db96d56Sopenharmony_cifor _prefix in _all_string_prefixes():
1427db96d56Sopenharmony_ci    endpats[_prefix + "'"] = Single
1437db96d56Sopenharmony_ci    endpats[_prefix + '"'] = Double
1447db96d56Sopenharmony_ci    endpats[_prefix + "'''"] = Single3
1457db96d56Sopenharmony_ci    endpats[_prefix + '"""'] = Double3
1467db96d56Sopenharmony_cidel _prefix
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_ci# A set of all of the single and triple quoted string prefixes,
1497db96d56Sopenharmony_ci#  including the opening quotes.
1507db96d56Sopenharmony_cisingle_quoted = set()
1517db96d56Sopenharmony_citriple_quoted = set()
1527db96d56Sopenharmony_cifor t in _all_string_prefixes():
1537db96d56Sopenharmony_ci    for u in (t + '"', t + "'"):
1547db96d56Sopenharmony_ci        single_quoted.add(u)
1557db96d56Sopenharmony_ci    for u in (t + '"""', t + "'''"):
1567db96d56Sopenharmony_ci        triple_quoted.add(u)
1577db96d56Sopenharmony_cidel t, u
1587db96d56Sopenharmony_ci
1597db96d56Sopenharmony_citabsize = 8
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_ciclass TokenError(Exception): pass
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ciclass StopTokenizing(Exception): pass
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_ciclass Untokenizer:
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci    def __init__(self):
1697db96d56Sopenharmony_ci        self.tokens = []
1707db96d56Sopenharmony_ci        self.prev_row = 1
1717db96d56Sopenharmony_ci        self.prev_col = 0
1727db96d56Sopenharmony_ci        self.encoding = None
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ci    def add_whitespace(self, start):
1757db96d56Sopenharmony_ci        row, col = start
1767db96d56Sopenharmony_ci        if row < self.prev_row or row == self.prev_row and col < self.prev_col:
1777db96d56Sopenharmony_ci            raise ValueError("start ({},{}) precedes previous end ({},{})"
1787db96d56Sopenharmony_ci                             .format(row, col, self.prev_row, self.prev_col))
1797db96d56Sopenharmony_ci        row_offset = row - self.prev_row
1807db96d56Sopenharmony_ci        if row_offset:
1817db96d56Sopenharmony_ci            self.tokens.append("\\\n" * row_offset)
1827db96d56Sopenharmony_ci            self.prev_col = 0
1837db96d56Sopenharmony_ci        col_offset = col - self.prev_col
1847db96d56Sopenharmony_ci        if col_offset:
1857db96d56Sopenharmony_ci            self.tokens.append(" " * col_offset)
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_ci    def untokenize(self, iterable):
1887db96d56Sopenharmony_ci        it = iter(iterable)
1897db96d56Sopenharmony_ci        indents = []
1907db96d56Sopenharmony_ci        startline = False
1917db96d56Sopenharmony_ci        for t in it:
1927db96d56Sopenharmony_ci            if len(t) == 2:
1937db96d56Sopenharmony_ci                self.compat(t, it)
1947db96d56Sopenharmony_ci                break
1957db96d56Sopenharmony_ci            tok_type, token, start, end, line = t
1967db96d56Sopenharmony_ci            if tok_type == ENCODING:
1977db96d56Sopenharmony_ci                self.encoding = token
1987db96d56Sopenharmony_ci                continue
1997db96d56Sopenharmony_ci            if tok_type == ENDMARKER:
2007db96d56Sopenharmony_ci                break
2017db96d56Sopenharmony_ci            if tok_type == INDENT:
2027db96d56Sopenharmony_ci                indents.append(token)
2037db96d56Sopenharmony_ci                continue
2047db96d56Sopenharmony_ci            elif tok_type == DEDENT:
2057db96d56Sopenharmony_ci                indents.pop()
2067db96d56Sopenharmony_ci                self.prev_row, self.prev_col = end
2077db96d56Sopenharmony_ci                continue
2087db96d56Sopenharmony_ci            elif tok_type in (NEWLINE, NL):
2097db96d56Sopenharmony_ci                startline = True
2107db96d56Sopenharmony_ci            elif startline and indents:
2117db96d56Sopenharmony_ci                indent = indents[-1]
2127db96d56Sopenharmony_ci                if start[1] >= len(indent):
2137db96d56Sopenharmony_ci                    self.tokens.append(indent)
2147db96d56Sopenharmony_ci                    self.prev_col = len(indent)
2157db96d56Sopenharmony_ci                startline = False
2167db96d56Sopenharmony_ci            self.add_whitespace(start)
2177db96d56Sopenharmony_ci            self.tokens.append(token)
2187db96d56Sopenharmony_ci            self.prev_row, self.prev_col = end
2197db96d56Sopenharmony_ci            if tok_type in (NEWLINE, NL):
2207db96d56Sopenharmony_ci                self.prev_row += 1
2217db96d56Sopenharmony_ci                self.prev_col = 0
2227db96d56Sopenharmony_ci        return "".join(self.tokens)
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci    def compat(self, token, iterable):
2257db96d56Sopenharmony_ci        indents = []
2267db96d56Sopenharmony_ci        toks_append = self.tokens.append
2277db96d56Sopenharmony_ci        startline = token[0] in (NEWLINE, NL)
2287db96d56Sopenharmony_ci        prevstring = False
2297db96d56Sopenharmony_ci
2307db96d56Sopenharmony_ci        for tok in _itertools.chain([token], iterable):
2317db96d56Sopenharmony_ci            toknum, tokval = tok[:2]
2327db96d56Sopenharmony_ci            if toknum == ENCODING:
2337db96d56Sopenharmony_ci                self.encoding = tokval
2347db96d56Sopenharmony_ci                continue
2357db96d56Sopenharmony_ci
2367db96d56Sopenharmony_ci            if toknum in (NAME, NUMBER):
2377db96d56Sopenharmony_ci                tokval += ' '
2387db96d56Sopenharmony_ci
2397db96d56Sopenharmony_ci            # Insert a space between two consecutive strings
2407db96d56Sopenharmony_ci            if toknum == STRING:
2417db96d56Sopenharmony_ci                if prevstring:
2427db96d56Sopenharmony_ci                    tokval = ' ' + tokval
2437db96d56Sopenharmony_ci                prevstring = True
2447db96d56Sopenharmony_ci            else:
2457db96d56Sopenharmony_ci                prevstring = False
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci            if toknum == INDENT:
2487db96d56Sopenharmony_ci                indents.append(tokval)
2497db96d56Sopenharmony_ci                continue
2507db96d56Sopenharmony_ci            elif toknum == DEDENT:
2517db96d56Sopenharmony_ci                indents.pop()
2527db96d56Sopenharmony_ci                continue
2537db96d56Sopenharmony_ci            elif toknum in (NEWLINE, NL):
2547db96d56Sopenharmony_ci                startline = True
2557db96d56Sopenharmony_ci            elif startline and indents:
2567db96d56Sopenharmony_ci                toks_append(indents[-1])
2577db96d56Sopenharmony_ci                startline = False
2587db96d56Sopenharmony_ci            toks_append(tokval)
2597db96d56Sopenharmony_ci
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_cidef untokenize(iterable):
2627db96d56Sopenharmony_ci    """Transform tokens back into Python source code.
2637db96d56Sopenharmony_ci    It returns a bytes object, encoded using the ENCODING
2647db96d56Sopenharmony_ci    token, which is the first token sequence output by tokenize.
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci    Each element returned by the iterable must be a token sequence
2677db96d56Sopenharmony_ci    with at least two elements, a token number and token value.  If
2687db96d56Sopenharmony_ci    only two tokens are passed, the resulting output is poor.
2697db96d56Sopenharmony_ci
2707db96d56Sopenharmony_ci    Round-trip invariant for full input:
2717db96d56Sopenharmony_ci        Untokenized source will match input source exactly
2727db96d56Sopenharmony_ci
2737db96d56Sopenharmony_ci    Round-trip invariant for limited input:
2747db96d56Sopenharmony_ci        # Output bytes will tokenize back to the input
2757db96d56Sopenharmony_ci        t1 = [tok[:2] for tok in tokenize(f.readline)]
2767db96d56Sopenharmony_ci        newcode = untokenize(t1)
2777db96d56Sopenharmony_ci        readline = BytesIO(newcode).readline
2787db96d56Sopenharmony_ci        t2 = [tok[:2] for tok in tokenize(readline)]
2797db96d56Sopenharmony_ci        assert t1 == t2
2807db96d56Sopenharmony_ci    """
2817db96d56Sopenharmony_ci    ut = Untokenizer()
2827db96d56Sopenharmony_ci    out = ut.untokenize(iterable)
2837db96d56Sopenharmony_ci    if ut.encoding is not None:
2847db96d56Sopenharmony_ci        out = out.encode(ut.encoding)
2857db96d56Sopenharmony_ci    return out
2867db96d56Sopenharmony_ci
2877db96d56Sopenharmony_ci
2887db96d56Sopenharmony_cidef _get_normal_name(orig_enc):
2897db96d56Sopenharmony_ci    """Imitates get_normal_name in tokenizer.c."""
2907db96d56Sopenharmony_ci    # Only care about the first 12 characters.
2917db96d56Sopenharmony_ci    enc = orig_enc[:12].lower().replace("_", "-")
2927db96d56Sopenharmony_ci    if enc == "utf-8" or enc.startswith("utf-8-"):
2937db96d56Sopenharmony_ci        return "utf-8"
2947db96d56Sopenharmony_ci    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
2957db96d56Sopenharmony_ci       enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
2967db96d56Sopenharmony_ci        return "iso-8859-1"
2977db96d56Sopenharmony_ci    return orig_enc
2987db96d56Sopenharmony_ci
2997db96d56Sopenharmony_cidef detect_encoding(readline):
3007db96d56Sopenharmony_ci    """
3017db96d56Sopenharmony_ci    The detect_encoding() function is used to detect the encoding that should
3027db96d56Sopenharmony_ci    be used to decode a Python source file.  It requires one argument, readline,
3037db96d56Sopenharmony_ci    in the same way as the tokenize() generator.
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci    It will call readline a maximum of twice, and return the encoding used
3067db96d56Sopenharmony_ci    (as a string) and a list of any lines (left as bytes) it has read in.
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ci    It detects the encoding from the presence of a utf-8 bom or an encoding
3097db96d56Sopenharmony_ci    cookie as specified in pep-0263.  If both a bom and a cookie are present,
3107db96d56Sopenharmony_ci    but disagree, a SyntaxError will be raised.  If the encoding cookie is an
3117db96d56Sopenharmony_ci    invalid charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
3127db96d56Sopenharmony_ci    'utf-8-sig' is returned.
3137db96d56Sopenharmony_ci
3147db96d56Sopenharmony_ci    If no encoding is specified, then the default of 'utf-8' will be returned.
3157db96d56Sopenharmony_ci    """
3167db96d56Sopenharmony_ci    try:
3177db96d56Sopenharmony_ci        filename = readline.__self__.name
3187db96d56Sopenharmony_ci    except AttributeError:
3197db96d56Sopenharmony_ci        filename = None
3207db96d56Sopenharmony_ci    bom_found = False
3217db96d56Sopenharmony_ci    encoding = None
3227db96d56Sopenharmony_ci    default = 'utf-8'
3237db96d56Sopenharmony_ci    def read_or_stop():
3247db96d56Sopenharmony_ci        try:
3257db96d56Sopenharmony_ci            return readline()
3267db96d56Sopenharmony_ci        except StopIteration:
3277db96d56Sopenharmony_ci            return b''
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci    def find_cookie(line):
3307db96d56Sopenharmony_ci        try:
3317db96d56Sopenharmony_ci            # Decode as UTF-8. Either the line is an encoding declaration,
3327db96d56Sopenharmony_ci            # in which case it should be pure ASCII, or it must be UTF-8
3337db96d56Sopenharmony_ci            # per default encoding.
3347db96d56Sopenharmony_ci            line_string = line.decode('utf-8')
3357db96d56Sopenharmony_ci        except UnicodeDecodeError:
3367db96d56Sopenharmony_ci            msg = "invalid or missing encoding declaration"
3377db96d56Sopenharmony_ci            if filename is not None:
3387db96d56Sopenharmony_ci                msg = '{} for {!r}'.format(msg, filename)
3397db96d56Sopenharmony_ci            raise SyntaxError(msg)
3407db96d56Sopenharmony_ci
3417db96d56Sopenharmony_ci        match = cookie_re.match(line_string)
3427db96d56Sopenharmony_ci        if not match:
3437db96d56Sopenharmony_ci            return None
3447db96d56Sopenharmony_ci        encoding = _get_normal_name(match.group(1))
3457db96d56Sopenharmony_ci        try:
3467db96d56Sopenharmony_ci            codec = lookup(encoding)
3477db96d56Sopenharmony_ci        except LookupError:
3487db96d56Sopenharmony_ci            # This behaviour mimics the Python interpreter
3497db96d56Sopenharmony_ci            if filename is None:
3507db96d56Sopenharmony_ci                msg = "unknown encoding: " + encoding
3517db96d56Sopenharmony_ci            else:
3527db96d56Sopenharmony_ci                msg = "unknown encoding for {!r}: {}".format(filename,
3537db96d56Sopenharmony_ci                        encoding)
3547db96d56Sopenharmony_ci            raise SyntaxError(msg)
3557db96d56Sopenharmony_ci
3567db96d56Sopenharmony_ci        if bom_found:
3577db96d56Sopenharmony_ci            if encoding != 'utf-8':
3587db96d56Sopenharmony_ci                # This behaviour mimics the Python interpreter
3597db96d56Sopenharmony_ci                if filename is None:
3607db96d56Sopenharmony_ci                    msg = 'encoding problem: utf-8'
3617db96d56Sopenharmony_ci                else:
3627db96d56Sopenharmony_ci                    msg = 'encoding problem for {!r}: utf-8'.format(filename)
3637db96d56Sopenharmony_ci                raise SyntaxError(msg)
3647db96d56Sopenharmony_ci            encoding += '-sig'
3657db96d56Sopenharmony_ci        return encoding
3667db96d56Sopenharmony_ci
3677db96d56Sopenharmony_ci    first = read_or_stop()
3687db96d56Sopenharmony_ci    if first.startswith(BOM_UTF8):
3697db96d56Sopenharmony_ci        bom_found = True
3707db96d56Sopenharmony_ci        first = first[3:]
3717db96d56Sopenharmony_ci        default = 'utf-8-sig'
3727db96d56Sopenharmony_ci    if not first:
3737db96d56Sopenharmony_ci        return default, []
3747db96d56Sopenharmony_ci
3757db96d56Sopenharmony_ci    encoding = find_cookie(first)
3767db96d56Sopenharmony_ci    if encoding:
3777db96d56Sopenharmony_ci        return encoding, [first]
3787db96d56Sopenharmony_ci    if not blank_re.match(first):
3797db96d56Sopenharmony_ci        return default, [first]
3807db96d56Sopenharmony_ci
3817db96d56Sopenharmony_ci    second = read_or_stop()
3827db96d56Sopenharmony_ci    if not second:
3837db96d56Sopenharmony_ci        return default, [first]
3847db96d56Sopenharmony_ci
3857db96d56Sopenharmony_ci    encoding = find_cookie(second)
3867db96d56Sopenharmony_ci    if encoding:
3877db96d56Sopenharmony_ci        return encoding, [first, second]
3887db96d56Sopenharmony_ci
3897db96d56Sopenharmony_ci    return default, [first, second]
3907db96d56Sopenharmony_ci
3917db96d56Sopenharmony_ci
3927db96d56Sopenharmony_cidef open(filename):
3937db96d56Sopenharmony_ci    """Open a file in read only mode using the encoding detected by
3947db96d56Sopenharmony_ci    detect_encoding().
3957db96d56Sopenharmony_ci    """
3967db96d56Sopenharmony_ci    buffer = _builtin_open(filename, 'rb')
3977db96d56Sopenharmony_ci    try:
3987db96d56Sopenharmony_ci        encoding, lines = detect_encoding(buffer.readline)
3997db96d56Sopenharmony_ci        buffer.seek(0)
4007db96d56Sopenharmony_ci        text = TextIOWrapper(buffer, encoding, line_buffering=True)
4017db96d56Sopenharmony_ci        text.mode = 'r'
4027db96d56Sopenharmony_ci        return text
4037db96d56Sopenharmony_ci    except:
4047db96d56Sopenharmony_ci        buffer.close()
4057db96d56Sopenharmony_ci        raise
4067db96d56Sopenharmony_ci
4077db96d56Sopenharmony_ci
4087db96d56Sopenharmony_cidef tokenize(readline):
4097db96d56Sopenharmony_ci    """
4107db96d56Sopenharmony_ci    The tokenize() generator requires one argument, readline, which
4117db96d56Sopenharmony_ci    must be a callable object which provides the same interface as the
4127db96d56Sopenharmony_ci    readline() method of built-in file objects.  Each call to the function
4137db96d56Sopenharmony_ci    should return one line of input as bytes.  Alternatively, readline
4147db96d56Sopenharmony_ci    can be a callable function terminating with StopIteration:
4157db96d56Sopenharmony_ci        readline = open(myfile, 'rb').__next__  # Example of alternate readline
4167db96d56Sopenharmony_ci
4177db96d56Sopenharmony_ci    The generator produces 5-tuples with these members: the token type; the
4187db96d56Sopenharmony_ci    token string; a 2-tuple (srow, scol) of ints specifying the row and
4197db96d56Sopenharmony_ci    column where the token begins in the source; a 2-tuple (erow, ecol) of
4207db96d56Sopenharmony_ci    ints specifying the row and column where the token ends in the source;
4217db96d56Sopenharmony_ci    and the line on which the token was found.  The line passed is the
4227db96d56Sopenharmony_ci    physical line.
4237db96d56Sopenharmony_ci
4247db96d56Sopenharmony_ci    The first token sequence will always be an ENCODING token
4257db96d56Sopenharmony_ci    which tells you which encoding was used to decode the bytes stream.
4267db96d56Sopenharmony_ci    """
4277db96d56Sopenharmony_ci    encoding, consumed = detect_encoding(readline)
4287db96d56Sopenharmony_ci    empty = _itertools.repeat(b"")
4297db96d56Sopenharmony_ci    rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
4307db96d56Sopenharmony_ci    return _tokenize(rl_gen.__next__, encoding)
4317db96d56Sopenharmony_ci
4327db96d56Sopenharmony_ci
4337db96d56Sopenharmony_cidef _tokenize(readline, encoding):
4347db96d56Sopenharmony_ci    lnum = parenlev = continued = 0
4357db96d56Sopenharmony_ci    numchars = '0123456789'
4367db96d56Sopenharmony_ci    contstr, needcont = '', 0
4377db96d56Sopenharmony_ci    contline = None
4387db96d56Sopenharmony_ci    indents = [0]
4397db96d56Sopenharmony_ci
4407db96d56Sopenharmony_ci    if encoding is not None:
4417db96d56Sopenharmony_ci        if encoding == "utf-8-sig":
4427db96d56Sopenharmony_ci            # BOM will already have been stripped.
4437db96d56Sopenharmony_ci            encoding = "utf-8"
4447db96d56Sopenharmony_ci        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
4457db96d56Sopenharmony_ci    last_line = b''
4467db96d56Sopenharmony_ci    line = b''
4477db96d56Sopenharmony_ci    while True:                                # loop over lines in stream
4487db96d56Sopenharmony_ci        try:
4497db96d56Sopenharmony_ci            # We capture the value of the line variable here because
4507db96d56Sopenharmony_ci            # readline uses the empty string '' to signal end of input,
4517db96d56Sopenharmony_ci            # hence `line` itself will always be overwritten at the end
4527db96d56Sopenharmony_ci            # of this loop.
4537db96d56Sopenharmony_ci            last_line = line
4547db96d56Sopenharmony_ci            line = readline()
4557db96d56Sopenharmony_ci        except StopIteration:
4567db96d56Sopenharmony_ci            line = b''
4577db96d56Sopenharmony_ci
4587db96d56Sopenharmony_ci        if encoding is not None:
4597db96d56Sopenharmony_ci            line = line.decode(encoding)
4607db96d56Sopenharmony_ci        lnum += 1
4617db96d56Sopenharmony_ci        pos, max = 0, len(line)
4627db96d56Sopenharmony_ci
4637db96d56Sopenharmony_ci        if contstr:                            # continued string
4647db96d56Sopenharmony_ci            if not line:
4657db96d56Sopenharmony_ci                raise TokenError("EOF in multi-line string", strstart)
4667db96d56Sopenharmony_ci            endmatch = endprog.match(line)
4677db96d56Sopenharmony_ci            if endmatch:
4687db96d56Sopenharmony_ci                pos = end = endmatch.end(0)
4697db96d56Sopenharmony_ci                yield TokenInfo(STRING, contstr + line[:end],
4707db96d56Sopenharmony_ci                       strstart, (lnum, end), contline + line)
4717db96d56Sopenharmony_ci                contstr, needcont = '', 0
4727db96d56Sopenharmony_ci                contline = None
4737db96d56Sopenharmony_ci            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
4747db96d56Sopenharmony_ci                yield TokenInfo(ERRORTOKEN, contstr + line,
4757db96d56Sopenharmony_ci                           strstart, (lnum, len(line)), contline)
4767db96d56Sopenharmony_ci                contstr = ''
4777db96d56Sopenharmony_ci                contline = None
4787db96d56Sopenharmony_ci                continue
4797db96d56Sopenharmony_ci            else:
4807db96d56Sopenharmony_ci                contstr = contstr + line
4817db96d56Sopenharmony_ci                contline = contline + line
4827db96d56Sopenharmony_ci                continue
4837db96d56Sopenharmony_ci
4847db96d56Sopenharmony_ci        elif parenlev == 0 and not continued:  # new statement
4857db96d56Sopenharmony_ci            if not line: break
4867db96d56Sopenharmony_ci            column = 0
4877db96d56Sopenharmony_ci            while pos < max:                   # measure leading whitespace
4887db96d56Sopenharmony_ci                if line[pos] == ' ':
4897db96d56Sopenharmony_ci                    column += 1
4907db96d56Sopenharmony_ci                elif line[pos] == '\t':
4917db96d56Sopenharmony_ci                    column = (column//tabsize + 1)*tabsize
4927db96d56Sopenharmony_ci                elif line[pos] == '\f':
4937db96d56Sopenharmony_ci                    column = 0
4947db96d56Sopenharmony_ci                else:
4957db96d56Sopenharmony_ci                    break
4967db96d56Sopenharmony_ci                pos += 1
4977db96d56Sopenharmony_ci            if pos == max:
4987db96d56Sopenharmony_ci                break
4997db96d56Sopenharmony_ci
5007db96d56Sopenharmony_ci            if line[pos] in '#\r\n':           # skip comments or blank lines
5017db96d56Sopenharmony_ci                if line[pos] == '#':
5027db96d56Sopenharmony_ci                    comment_token = line[pos:].rstrip('\r\n')
5037db96d56Sopenharmony_ci                    yield TokenInfo(COMMENT, comment_token,
5047db96d56Sopenharmony_ci                           (lnum, pos), (lnum, pos + len(comment_token)), line)
5057db96d56Sopenharmony_ci                    pos += len(comment_token)
5067db96d56Sopenharmony_ci
5077db96d56Sopenharmony_ci                yield TokenInfo(NL, line[pos:],
5087db96d56Sopenharmony_ci                           (lnum, pos), (lnum, len(line)), line)
5097db96d56Sopenharmony_ci                continue
5107db96d56Sopenharmony_ci
5117db96d56Sopenharmony_ci            if column > indents[-1]:           # count indents or dedents
5127db96d56Sopenharmony_ci                indents.append(column)
5137db96d56Sopenharmony_ci                yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
5147db96d56Sopenharmony_ci            while column < indents[-1]:
5157db96d56Sopenharmony_ci                if column not in indents:
5167db96d56Sopenharmony_ci                    raise IndentationError(
5177db96d56Sopenharmony_ci                        "unindent does not match any outer indentation level",
5187db96d56Sopenharmony_ci                        ("<tokenize>", lnum, pos, line))
5197db96d56Sopenharmony_ci                indents = indents[:-1]
5207db96d56Sopenharmony_ci
5217db96d56Sopenharmony_ci                yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
5227db96d56Sopenharmony_ci
5237db96d56Sopenharmony_ci        else:                                  # continued statement
5247db96d56Sopenharmony_ci            if not line:
5257db96d56Sopenharmony_ci                raise TokenError("EOF in multi-line statement", (lnum, 0))
5267db96d56Sopenharmony_ci            continued = 0
5277db96d56Sopenharmony_ci
5287db96d56Sopenharmony_ci        while pos < max:
5297db96d56Sopenharmony_ci            pseudomatch = _compile(PseudoToken).match(line, pos)
5307db96d56Sopenharmony_ci            if pseudomatch:                                # scan for tokens
5317db96d56Sopenharmony_ci                start, end = pseudomatch.span(1)
5327db96d56Sopenharmony_ci                spos, epos, pos = (lnum, start), (lnum, end), end
5337db96d56Sopenharmony_ci                if start == end:
5347db96d56Sopenharmony_ci                    continue
5357db96d56Sopenharmony_ci                token, initial = line[start:end], line[start]
5367db96d56Sopenharmony_ci
5377db96d56Sopenharmony_ci                if (initial in numchars or                 # ordinary number
5387db96d56Sopenharmony_ci                    (initial == '.' and token != '.' and token != '...')):
5397db96d56Sopenharmony_ci                    yield TokenInfo(NUMBER, token, spos, epos, line)
5407db96d56Sopenharmony_ci                elif initial in '\r\n':
5417db96d56Sopenharmony_ci                    if parenlev > 0:
5427db96d56Sopenharmony_ci                        yield TokenInfo(NL, token, spos, epos, line)
5437db96d56Sopenharmony_ci                    else:
5447db96d56Sopenharmony_ci                        yield TokenInfo(NEWLINE, token, spos, epos, line)
5457db96d56Sopenharmony_ci
5467db96d56Sopenharmony_ci                elif initial == '#':
5477db96d56Sopenharmony_ci                    assert not token.endswith("\n")
5487db96d56Sopenharmony_ci                    yield TokenInfo(COMMENT, token, spos, epos, line)
5497db96d56Sopenharmony_ci
5507db96d56Sopenharmony_ci                elif token in triple_quoted:
5517db96d56Sopenharmony_ci                    endprog = _compile(endpats[token])
5527db96d56Sopenharmony_ci                    endmatch = endprog.match(line, pos)
5537db96d56Sopenharmony_ci                    if endmatch:                           # all on one line
5547db96d56Sopenharmony_ci                        pos = endmatch.end(0)
5557db96d56Sopenharmony_ci                        token = line[start:pos]
5567db96d56Sopenharmony_ci                        yield TokenInfo(STRING, token, spos, (lnum, pos), line)
5577db96d56Sopenharmony_ci                    else:
5587db96d56Sopenharmony_ci                        strstart = (lnum, start)           # multiple lines
5597db96d56Sopenharmony_ci                        contstr = line[start:]
5607db96d56Sopenharmony_ci                        contline = line
5617db96d56Sopenharmony_ci                        break
5627db96d56Sopenharmony_ci
5637db96d56Sopenharmony_ci                # Check up to the first 3 chars of the token to see if
5647db96d56Sopenharmony_ci                #  they're in the single_quoted set. If so, they start
5657db96d56Sopenharmony_ci                #  a string.
5667db96d56Sopenharmony_ci                # We're using the first 3, because we're looking for
5677db96d56Sopenharmony_ci                #  "rb'" (for example) at the start of the token. If
5687db96d56Sopenharmony_ci                #  we switch to longer prefixes, this needs to be
5697db96d56Sopenharmony_ci                #  adjusted.
5707db96d56Sopenharmony_ci                # Note that initial == token[:1].
5717db96d56Sopenharmony_ci                # Also note that single quote checking must come after
5727db96d56Sopenharmony_ci                #  triple quote checking (above).
5737db96d56Sopenharmony_ci                elif (initial in single_quoted or
5747db96d56Sopenharmony_ci                      token[:2] in single_quoted or
5757db96d56Sopenharmony_ci                      token[:3] in single_quoted):
5767db96d56Sopenharmony_ci                    if token[-1] == '\n':                  # continued string
5777db96d56Sopenharmony_ci                        strstart = (lnum, start)
5787db96d56Sopenharmony_ci                        # Again, using the first 3 chars of the
5797db96d56Sopenharmony_ci                        #  token. This is looking for the matching end
5807db96d56Sopenharmony_ci                        #  regex for the correct type of quote
5817db96d56Sopenharmony_ci                        #  character. So it's really looking for
5827db96d56Sopenharmony_ci                        #  endpats["'"] or endpats['"'], by trying to
5837db96d56Sopenharmony_ci                        #  skip string prefix characters, if any.
5847db96d56Sopenharmony_ci                        endprog = _compile(endpats.get(initial) or
5857db96d56Sopenharmony_ci                                           endpats.get(token[1]) or
5867db96d56Sopenharmony_ci                                           endpats.get(token[2]))
5877db96d56Sopenharmony_ci                        contstr, needcont = line[start:], 1
5887db96d56Sopenharmony_ci                        contline = line
5897db96d56Sopenharmony_ci                        break
5907db96d56Sopenharmony_ci                    else:                                  # ordinary string
5917db96d56Sopenharmony_ci                        yield TokenInfo(STRING, token, spos, epos, line)
5927db96d56Sopenharmony_ci
5937db96d56Sopenharmony_ci                elif initial.isidentifier():               # ordinary name
5947db96d56Sopenharmony_ci                    yield TokenInfo(NAME, token, spos, epos, line)
5957db96d56Sopenharmony_ci                elif initial == '\\':                      # continued stmt
5967db96d56Sopenharmony_ci                    continued = 1
5977db96d56Sopenharmony_ci                else:
5987db96d56Sopenharmony_ci                    if initial in '([{':
5997db96d56Sopenharmony_ci                        parenlev += 1
6007db96d56Sopenharmony_ci                    elif initial in ')]}':
6017db96d56Sopenharmony_ci                        parenlev -= 1
6027db96d56Sopenharmony_ci                    yield TokenInfo(OP, token, spos, epos, line)
6037db96d56Sopenharmony_ci            else:
6047db96d56Sopenharmony_ci                yield TokenInfo(ERRORTOKEN, line[pos],
6057db96d56Sopenharmony_ci                           (lnum, pos), (lnum, pos+1), line)
6067db96d56Sopenharmony_ci                pos += 1
6077db96d56Sopenharmony_ci
6087db96d56Sopenharmony_ci    # Add an implicit NEWLINE if the input doesn't end in one
6097db96d56Sopenharmony_ci    if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
6107db96d56Sopenharmony_ci        yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
6117db96d56Sopenharmony_ci    for indent in indents[1:]:                 # pop remaining indent levels
6127db96d56Sopenharmony_ci        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
6137db96d56Sopenharmony_ci    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
6147db96d56Sopenharmony_ci
6157db96d56Sopenharmony_ci
6167db96d56Sopenharmony_cidef generate_tokens(readline):
6177db96d56Sopenharmony_ci    """Tokenize a source reading Python code as unicode strings.
6187db96d56Sopenharmony_ci
6197db96d56Sopenharmony_ci    This has the same API as tokenize(), except that it expects the *readline*
6207db96d56Sopenharmony_ci    callable to return str objects instead of bytes.
6217db96d56Sopenharmony_ci    """
6227db96d56Sopenharmony_ci    return _tokenize(readline, None)
6237db96d56Sopenharmony_ci
6247db96d56Sopenharmony_cidef main():
6257db96d56Sopenharmony_ci    import argparse
6267db96d56Sopenharmony_ci
6277db96d56Sopenharmony_ci    # Helper error handling routines
6287db96d56Sopenharmony_ci    def perror(message):
6297db96d56Sopenharmony_ci        sys.stderr.write(message)
6307db96d56Sopenharmony_ci        sys.stderr.write('\n')
6317db96d56Sopenharmony_ci
6327db96d56Sopenharmony_ci    def error(message, filename=None, location=None):
6337db96d56Sopenharmony_ci        if location:
6347db96d56Sopenharmony_ci            args = (filename,) + location + (message,)
6357db96d56Sopenharmony_ci            perror("%s:%d:%d: error: %s" % args)
6367db96d56Sopenharmony_ci        elif filename:
6377db96d56Sopenharmony_ci            perror("%s: error: %s" % (filename, message))
6387db96d56Sopenharmony_ci        else:
6397db96d56Sopenharmony_ci            perror("error: %s" % message)
6407db96d56Sopenharmony_ci        sys.exit(1)
6417db96d56Sopenharmony_ci
6427db96d56Sopenharmony_ci    # Parse the arguments and options
6437db96d56Sopenharmony_ci    parser = argparse.ArgumentParser(prog='python -m tokenize')
6447db96d56Sopenharmony_ci    parser.add_argument(dest='filename', nargs='?',
6457db96d56Sopenharmony_ci                        metavar='filename.py',
6467db96d56Sopenharmony_ci                        help='the file to tokenize; defaults to stdin')
6477db96d56Sopenharmony_ci    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
6487db96d56Sopenharmony_ci                        help='display token names using the exact type')
6497db96d56Sopenharmony_ci    args = parser.parse_args()
6507db96d56Sopenharmony_ci
6517db96d56Sopenharmony_ci    try:
6527db96d56Sopenharmony_ci        # Tokenize the input
6537db96d56Sopenharmony_ci        if args.filename:
6547db96d56Sopenharmony_ci            filename = args.filename
6557db96d56Sopenharmony_ci            with _builtin_open(filename, 'rb') as f:
6567db96d56Sopenharmony_ci                tokens = list(tokenize(f.readline))
6577db96d56Sopenharmony_ci        else:
6587db96d56Sopenharmony_ci            filename = "<stdin>"
6597db96d56Sopenharmony_ci            tokens = _tokenize(sys.stdin.readline, None)
6607db96d56Sopenharmony_ci
6617db96d56Sopenharmony_ci        # Output the tokenization
6627db96d56Sopenharmony_ci        for token in tokens:
6637db96d56Sopenharmony_ci            token_type = token.type
6647db96d56Sopenharmony_ci            if args.exact:
6657db96d56Sopenharmony_ci                token_type = token.exact_type
6667db96d56Sopenharmony_ci            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
6677db96d56Sopenharmony_ci            print("%-20s%-15s%-15r" %
6687db96d56Sopenharmony_ci                  (token_range, tok_name[token_type], token.string))
6697db96d56Sopenharmony_ci    except IndentationError as err:
6707db96d56Sopenharmony_ci        line, column = err.args[1][1:3]
6717db96d56Sopenharmony_ci        error(err.args[0], filename, (line, column))
6727db96d56Sopenharmony_ci    except TokenError as err:
6737db96d56Sopenharmony_ci        line, column = err.args[1]
6747db96d56Sopenharmony_ci        error(err.args[0], filename, (line, column))
6757db96d56Sopenharmony_ci    except SyntaxError as err:
6767db96d56Sopenharmony_ci        error(err, filename)
6777db96d56Sopenharmony_ci    except OSError as err:
6787db96d56Sopenharmony_ci        error(err)
6797db96d56Sopenharmony_ci    except KeyboardInterrupt:
6807db96d56Sopenharmony_ci        print("interrupted\n")
6817db96d56Sopenharmony_ci    except Exception as err:
6827db96d56Sopenharmony_ci        perror("unexpected error: %s" % err)
6837db96d56Sopenharmony_ci        raise
6847db96d56Sopenharmony_ci
6857db96d56Sopenharmony_cidef _generate_tokens_from_c_tokenizer(source):
6867db96d56Sopenharmony_ci    """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
6877db96d56Sopenharmony_ci    import _tokenize as c_tokenizer
6887db96d56Sopenharmony_ci    for info in c_tokenizer.TokenizerIter(source):
6897db96d56Sopenharmony_ci        tok, type, lineno, end_lineno, col_off, end_col_off, line = info
6907db96d56Sopenharmony_ci        yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
6917db96d56Sopenharmony_ci
6927db96d56Sopenharmony_ci
6937db96d56Sopenharmony_ciif __name__ == "__main__":
6947db96d56Sopenharmony_ci    main()
695