17db96d56Sopenharmony_cifrom test import support
27db96d56Sopenharmony_cifrom test.support import os_helper
37db96d56Sopenharmony_cifrom tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
47db96d56Sopenharmony_ci                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
57db96d56Sopenharmony_ci                     open as tokenize_open, Untokenizer, generate_tokens,
67db96d56Sopenharmony_ci                     NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT)
77db96d56Sopenharmony_cifrom io import BytesIO, StringIO
87db96d56Sopenharmony_ciimport unittest
97db96d56Sopenharmony_cifrom textwrap import dedent
107db96d56Sopenharmony_cifrom unittest import TestCase, mock
117db96d56Sopenharmony_cifrom test.test_grammar import (VALID_UNDERSCORE_LITERALS,
127db96d56Sopenharmony_ci                               INVALID_UNDERSCORE_LITERALS)
137db96d56Sopenharmony_cifrom test.support import os_helper
147db96d56Sopenharmony_cifrom test.support.script_helper import run_test_script, make_script
157db96d56Sopenharmony_ciimport os
167db96d56Sopenharmony_ciimport token
177db96d56Sopenharmony_ci
187db96d56Sopenharmony_ci# Converts a source string into a list of textual representation
197db96d56Sopenharmony_ci# of the tokens such as:
207db96d56Sopenharmony_ci# `    NAME       'if'          (1, 0) (1, 2)`
217db96d56Sopenharmony_ci# to make writing tests easier.
227db96d56Sopenharmony_cidef stringify_tokens_from_source(token_generator, source_string):
237db96d56Sopenharmony_ci    result = []
247db96d56Sopenharmony_ci    num_lines = len(source_string.splitlines())
257db96d56Sopenharmony_ci    missing_trailing_nl = source_string[-1] not in '\r\n'
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci    for type, token, start, end, line in token_generator:
287db96d56Sopenharmony_ci        if type == ENDMARKER:
297db96d56Sopenharmony_ci            break
307db96d56Sopenharmony_ci        # Ignore the new line on the last line if the input lacks one
317db96d56Sopenharmony_ci        if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
327db96d56Sopenharmony_ci            continue
337db96d56Sopenharmony_ci        type = tok_name[type]
347db96d56Sopenharmony_ci        result.append(f"    {type:10} {token!r:13} {start} {end}")
357db96d56Sopenharmony_ci
367db96d56Sopenharmony_ci    return result
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ciclass TokenizeTest(TestCase):
397db96d56Sopenharmony_ci    # Tests for the tokenize module.
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci    # The tests can be really simple. Given a small fragment of source
427db96d56Sopenharmony_ci    # code, print out a table with tokens. The ENDMARKER, ENCODING and
437db96d56Sopenharmony_ci    # final NEWLINE are omitted for brevity.
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci    def check_tokenize(self, s, expected):
467db96d56Sopenharmony_ci        # Format the tokens in s in a table format.
477db96d56Sopenharmony_ci        # The ENDMARKER and final NEWLINE are omitted.
487db96d56Sopenharmony_ci        f = BytesIO(s.encode('utf-8'))
497db96d56Sopenharmony_ci        result = stringify_tokens_from_source(tokenize(f.readline), s)
507db96d56Sopenharmony_ci        self.assertEqual(result,
517db96d56Sopenharmony_ci                         ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
527db96d56Sopenharmony_ci                         expected.rstrip().splitlines())
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci    def test_implicit_newline(self):
557db96d56Sopenharmony_ci        # Make sure that the tokenizer puts in an implicit NEWLINE
567db96d56Sopenharmony_ci        # when the input lacks a trailing new line.
577db96d56Sopenharmony_ci        f = BytesIO("x".encode('utf-8'))
587db96d56Sopenharmony_ci        tokens = list(tokenize(f.readline))
597db96d56Sopenharmony_ci        self.assertEqual(tokens[-2].type, NEWLINE)
607db96d56Sopenharmony_ci        self.assertEqual(tokens[-1].type, ENDMARKER)
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci    def test_basic(self):
637db96d56Sopenharmony_ci        self.check_tokenize("1 + 1", """\
647db96d56Sopenharmony_ci    NUMBER     '1'           (1, 0) (1, 1)
657db96d56Sopenharmony_ci    OP         '+'           (1, 2) (1, 3)
667db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
677db96d56Sopenharmony_ci    """)
687db96d56Sopenharmony_ci        self.check_tokenize("if False:\n"
697db96d56Sopenharmony_ci                            "    # NL\n"
707db96d56Sopenharmony_ci                            "    \n"
717db96d56Sopenharmony_ci                            "    True = False # NEWLINE\n", """\
727db96d56Sopenharmony_ci    NAME       'if'          (1, 0) (1, 2)
737db96d56Sopenharmony_ci    NAME       'False'       (1, 3) (1, 8)
747db96d56Sopenharmony_ci    OP         ':'           (1, 8) (1, 9)
757db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 9) (1, 10)
767db96d56Sopenharmony_ci    COMMENT    '# NL'        (2, 4) (2, 8)
777db96d56Sopenharmony_ci    NL         '\\n'          (2, 8) (2, 9)
787db96d56Sopenharmony_ci    NL         '\\n'          (3, 4) (3, 5)
797db96d56Sopenharmony_ci    INDENT     '    '        (4, 0) (4, 4)
807db96d56Sopenharmony_ci    NAME       'True'        (4, 4) (4, 8)
817db96d56Sopenharmony_ci    OP         '='           (4, 9) (4, 10)
827db96d56Sopenharmony_ci    NAME       'False'       (4, 11) (4, 16)
837db96d56Sopenharmony_ci    COMMENT    '# NEWLINE'   (4, 17) (4, 26)
847db96d56Sopenharmony_ci    NEWLINE    '\\n'          (4, 26) (4, 27)
857db96d56Sopenharmony_ci    DEDENT     ''            (5, 0) (5, 0)
867db96d56Sopenharmony_ci    """)
877db96d56Sopenharmony_ci        indent_error_file = b"""\
887db96d56Sopenharmony_cidef k(x):
897db96d56Sopenharmony_ci    x += 2
907db96d56Sopenharmony_ci  x += 5
917db96d56Sopenharmony_ci"""
927db96d56Sopenharmony_ci        readline = BytesIO(indent_error_file).readline
937db96d56Sopenharmony_ci        with self.assertRaisesRegex(IndentationError,
947db96d56Sopenharmony_ci                                    "unindent does not match any "
957db96d56Sopenharmony_ci                                    "outer indentation level"):
967db96d56Sopenharmony_ci            for tok in tokenize(readline):
977db96d56Sopenharmony_ci                pass
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci    def test_int(self):
1007db96d56Sopenharmony_ci        # Ordinary integers and binary operators
1017db96d56Sopenharmony_ci        self.check_tokenize("0xff <= 255", """\
1027db96d56Sopenharmony_ci    NUMBER     '0xff'        (1, 0) (1, 4)
1037db96d56Sopenharmony_ci    OP         '<='          (1, 5) (1, 7)
1047db96d56Sopenharmony_ci    NUMBER     '255'         (1, 8) (1, 11)
1057db96d56Sopenharmony_ci    """)
1067db96d56Sopenharmony_ci        self.check_tokenize("0b10 <= 255", """\
1077db96d56Sopenharmony_ci    NUMBER     '0b10'        (1, 0) (1, 4)
1087db96d56Sopenharmony_ci    OP         '<='          (1, 5) (1, 7)
1097db96d56Sopenharmony_ci    NUMBER     '255'         (1, 8) (1, 11)
1107db96d56Sopenharmony_ci    """)
1117db96d56Sopenharmony_ci        self.check_tokenize("0o123 <= 0O123", """\
1127db96d56Sopenharmony_ci    NUMBER     '0o123'       (1, 0) (1, 5)
1137db96d56Sopenharmony_ci    OP         '<='          (1, 6) (1, 8)
1147db96d56Sopenharmony_ci    NUMBER     '0O123'       (1, 9) (1, 14)
1157db96d56Sopenharmony_ci    """)
1167db96d56Sopenharmony_ci        self.check_tokenize("1234567 > ~0x15", """\
1177db96d56Sopenharmony_ci    NUMBER     '1234567'     (1, 0) (1, 7)
1187db96d56Sopenharmony_ci    OP         '>'           (1, 8) (1, 9)
1197db96d56Sopenharmony_ci    OP         '~'           (1, 10) (1, 11)
1207db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 11) (1, 15)
1217db96d56Sopenharmony_ci    """)
1227db96d56Sopenharmony_ci        self.check_tokenize("2134568 != 1231515", """\
1237db96d56Sopenharmony_ci    NUMBER     '2134568'     (1, 0) (1, 7)
1247db96d56Sopenharmony_ci    OP         '!='          (1, 8) (1, 10)
1257db96d56Sopenharmony_ci    NUMBER     '1231515'     (1, 11) (1, 18)
1267db96d56Sopenharmony_ci    """)
1277db96d56Sopenharmony_ci        self.check_tokenize("(-124561-1) & 200000000", """\
1287db96d56Sopenharmony_ci    OP         '('           (1, 0) (1, 1)
1297db96d56Sopenharmony_ci    OP         '-'           (1, 1) (1, 2)
1307db96d56Sopenharmony_ci    NUMBER     '124561'      (1, 2) (1, 8)
1317db96d56Sopenharmony_ci    OP         '-'           (1, 8) (1, 9)
1327db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
1337db96d56Sopenharmony_ci    OP         ')'           (1, 10) (1, 11)
1347db96d56Sopenharmony_ci    OP         '&'           (1, 12) (1, 13)
1357db96d56Sopenharmony_ci    NUMBER     '200000000'   (1, 14) (1, 23)
1367db96d56Sopenharmony_ci    """)
1377db96d56Sopenharmony_ci        self.check_tokenize("0xdeadbeef != -1", """\
1387db96d56Sopenharmony_ci    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
1397db96d56Sopenharmony_ci    OP         '!='          (1, 11) (1, 13)
1407db96d56Sopenharmony_ci    OP         '-'           (1, 14) (1, 15)
1417db96d56Sopenharmony_ci    NUMBER     '1'           (1, 15) (1, 16)
1427db96d56Sopenharmony_ci    """)
1437db96d56Sopenharmony_ci        self.check_tokenize("0xdeadc0de & 12345", """\
1447db96d56Sopenharmony_ci    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
1457db96d56Sopenharmony_ci    OP         '&'           (1, 11) (1, 12)
1467db96d56Sopenharmony_ci    NUMBER     '12345'       (1, 13) (1, 18)
1477db96d56Sopenharmony_ci    """)
1487db96d56Sopenharmony_ci        self.check_tokenize("0xFF & 0x15 | 1234", """\
1497db96d56Sopenharmony_ci    NUMBER     '0xFF'        (1, 0) (1, 4)
1507db96d56Sopenharmony_ci    OP         '&'           (1, 5) (1, 6)
1517db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 7) (1, 11)
1527db96d56Sopenharmony_ci    OP         '|'           (1, 12) (1, 13)
1537db96d56Sopenharmony_ci    NUMBER     '1234'        (1, 14) (1, 18)
1547db96d56Sopenharmony_ci    """)
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci    def test_long(self):
1577db96d56Sopenharmony_ci        # Long integers
1587db96d56Sopenharmony_ci        self.check_tokenize("x = 0", """\
1597db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1607db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1617db96d56Sopenharmony_ci    NUMBER     '0'           (1, 4) (1, 5)
1627db96d56Sopenharmony_ci    """)
1637db96d56Sopenharmony_ci        self.check_tokenize("x = 0xfffffffffff", """\
1647db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1657db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1667db96d56Sopenharmony_ci    NUMBER     '0xfffffffffff' (1, 4) (1, 17)
1677db96d56Sopenharmony_ci    """)
1687db96d56Sopenharmony_ci        self.check_tokenize("x = 123141242151251616110", """\
1697db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1707db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1717db96d56Sopenharmony_ci    NUMBER     '123141242151251616110' (1, 4) (1, 25)
1727db96d56Sopenharmony_ci    """)
1737db96d56Sopenharmony_ci        self.check_tokenize("x = -15921590215012591", """\
1747db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1757db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1767db96d56Sopenharmony_ci    OP         '-'           (1, 4) (1, 5)
1777db96d56Sopenharmony_ci    NUMBER     '15921590215012591' (1, 5) (1, 22)
1787db96d56Sopenharmony_ci    """)
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci    def test_float(self):
1817db96d56Sopenharmony_ci        # Floating point numbers
1827db96d56Sopenharmony_ci        self.check_tokenize("x = 3.14159", """\
1837db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1847db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1857db96d56Sopenharmony_ci    NUMBER     '3.14159'     (1, 4) (1, 11)
1867db96d56Sopenharmony_ci    """)
1877db96d56Sopenharmony_ci        self.check_tokenize("x = 314159.", """\
1887db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1897db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1907db96d56Sopenharmony_ci    NUMBER     '314159.'     (1, 4) (1, 11)
1917db96d56Sopenharmony_ci    """)
1927db96d56Sopenharmony_ci        self.check_tokenize("x = .314159", """\
1937db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1947db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
1957db96d56Sopenharmony_ci    NUMBER     '.314159'     (1, 4) (1, 11)
1967db96d56Sopenharmony_ci    """)
1977db96d56Sopenharmony_ci        self.check_tokenize("x = 3e14159", """\
1987db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
1997db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2007db96d56Sopenharmony_ci    NUMBER     '3e14159'     (1, 4) (1, 11)
2017db96d56Sopenharmony_ci    """)
2027db96d56Sopenharmony_ci        self.check_tokenize("x = 3E123", """\
2037db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2047db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2057db96d56Sopenharmony_ci    NUMBER     '3E123'       (1, 4) (1, 9)
2067db96d56Sopenharmony_ci    """)
2077db96d56Sopenharmony_ci        self.check_tokenize("x+y = 3e-1230", """\
2087db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2097db96d56Sopenharmony_ci    OP         '+'           (1, 1) (1, 2)
2107db96d56Sopenharmony_ci    NAME       'y'           (1, 2) (1, 3)
2117db96d56Sopenharmony_ci    OP         '='           (1, 4) (1, 5)
2127db96d56Sopenharmony_ci    NUMBER     '3e-1230'     (1, 6) (1, 13)
2137db96d56Sopenharmony_ci    """)
2147db96d56Sopenharmony_ci        self.check_tokenize("x = 3.14e159", """\
2157db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2167db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2177db96d56Sopenharmony_ci    NUMBER     '3.14e159'    (1, 4) (1, 12)
2187db96d56Sopenharmony_ci    """)
2197db96d56Sopenharmony_ci
2207db96d56Sopenharmony_ci    def test_underscore_literals(self):
2217db96d56Sopenharmony_ci        def number_token(s):
2227db96d56Sopenharmony_ci            f = BytesIO(s.encode('utf-8'))
2237db96d56Sopenharmony_ci            for toktype, token, start, end, line in tokenize(f.readline):
2247db96d56Sopenharmony_ci                if toktype == NUMBER:
2257db96d56Sopenharmony_ci                    return token
2267db96d56Sopenharmony_ci            return 'invalid token'
2277db96d56Sopenharmony_ci        for lit in VALID_UNDERSCORE_LITERALS:
2287db96d56Sopenharmony_ci            if '(' in lit:
2297db96d56Sopenharmony_ci                # this won't work with compound complex inputs
2307db96d56Sopenharmony_ci                continue
2317db96d56Sopenharmony_ci            self.assertEqual(number_token(lit), lit)
2327db96d56Sopenharmony_ci        for lit in INVALID_UNDERSCORE_LITERALS:
2337db96d56Sopenharmony_ci            self.assertNotEqual(number_token(lit), lit)
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci    def test_string(self):
2367db96d56Sopenharmony_ci        # String literals
2377db96d56Sopenharmony_ci        self.check_tokenize("x = ''; y = \"\"", """\
2387db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2397db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2407db96d56Sopenharmony_ci    STRING     "''"          (1, 4) (1, 6)
2417db96d56Sopenharmony_ci    OP         ';'           (1, 6) (1, 7)
2427db96d56Sopenharmony_ci    NAME       'y'           (1, 8) (1, 9)
2437db96d56Sopenharmony_ci    OP         '='           (1, 10) (1, 11)
2447db96d56Sopenharmony_ci    STRING     '""'          (1, 12) (1, 14)
2457db96d56Sopenharmony_ci    """)
2467db96d56Sopenharmony_ci        self.check_tokenize("x = '\"'; y = \"'\"", """\
2477db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2487db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2497db96d56Sopenharmony_ci    STRING     '\\'"\\''       (1, 4) (1, 7)
2507db96d56Sopenharmony_ci    OP         ';'           (1, 7) (1, 8)
2517db96d56Sopenharmony_ci    NAME       'y'           (1, 9) (1, 10)
2527db96d56Sopenharmony_ci    OP         '='           (1, 11) (1, 12)
2537db96d56Sopenharmony_ci    STRING     '"\\'"'        (1, 13) (1, 16)
2547db96d56Sopenharmony_ci    """)
2557db96d56Sopenharmony_ci        self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
2567db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2577db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2587db96d56Sopenharmony_ci    STRING     '"doesn\\'t "' (1, 4) (1, 14)
2597db96d56Sopenharmony_ci    NAME       'shrink'      (1, 14) (1, 20)
2607db96d56Sopenharmony_ci    STRING     '", does it"' (1, 20) (1, 31)
2617db96d56Sopenharmony_ci    """)
2627db96d56Sopenharmony_ci        self.check_tokenize("x = 'abc' + 'ABC'", """\
2637db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2647db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2657db96d56Sopenharmony_ci    STRING     "'abc'"       (1, 4) (1, 9)
2667db96d56Sopenharmony_ci    OP         '+'           (1, 10) (1, 11)
2677db96d56Sopenharmony_ci    STRING     "'ABC'"       (1, 12) (1, 17)
2687db96d56Sopenharmony_ci    """)
2697db96d56Sopenharmony_ci        self.check_tokenize('y = "ABC" + "ABC"', """\
2707db96d56Sopenharmony_ci    NAME       'y'           (1, 0) (1, 1)
2717db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2727db96d56Sopenharmony_ci    STRING     '"ABC"'       (1, 4) (1, 9)
2737db96d56Sopenharmony_ci    OP         '+'           (1, 10) (1, 11)
2747db96d56Sopenharmony_ci    STRING     '"ABC"'       (1, 12) (1, 17)
2757db96d56Sopenharmony_ci    """)
2767db96d56Sopenharmony_ci        self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
2777db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
2787db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2797db96d56Sopenharmony_ci    STRING     "r'abc'"      (1, 4) (1, 10)
2807db96d56Sopenharmony_ci    OP         '+'           (1, 11) (1, 12)
2817db96d56Sopenharmony_ci    STRING     "r'ABC'"      (1, 13) (1, 19)
2827db96d56Sopenharmony_ci    OP         '+'           (1, 20) (1, 21)
2837db96d56Sopenharmony_ci    STRING     "R'ABC'"      (1, 22) (1, 28)
2847db96d56Sopenharmony_ci    OP         '+'           (1, 29) (1, 30)
2857db96d56Sopenharmony_ci    STRING     "R'ABC'"      (1, 31) (1, 37)
2867db96d56Sopenharmony_ci    """)
2877db96d56Sopenharmony_ci        self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
2887db96d56Sopenharmony_ci    NAME       'y'           (1, 0) (1, 1)
2897db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
2907db96d56Sopenharmony_ci    STRING     'r"abc"'      (1, 4) (1, 10)
2917db96d56Sopenharmony_ci    OP         '+'           (1, 11) (1, 12)
2927db96d56Sopenharmony_ci    STRING     'r"ABC"'      (1, 13) (1, 19)
2937db96d56Sopenharmony_ci    OP         '+'           (1, 20) (1, 21)
2947db96d56Sopenharmony_ci    STRING     'R"ABC"'      (1, 22) (1, 28)
2957db96d56Sopenharmony_ci    OP         '+'           (1, 29) (1, 30)
2967db96d56Sopenharmony_ci    STRING     'R"ABC"'      (1, 31) (1, 37)
2977db96d56Sopenharmony_ci    """)
2987db96d56Sopenharmony_ci
2997db96d56Sopenharmony_ci        self.check_tokenize("u'abc' + U'abc'", """\
3007db96d56Sopenharmony_ci    STRING     "u'abc'"      (1, 0) (1, 6)
3017db96d56Sopenharmony_ci    OP         '+'           (1, 7) (1, 8)
3027db96d56Sopenharmony_ci    STRING     "U'abc'"      (1, 9) (1, 15)
3037db96d56Sopenharmony_ci    """)
3047db96d56Sopenharmony_ci        self.check_tokenize('u"abc" + U"abc"', """\
3057db96d56Sopenharmony_ci    STRING     'u"abc"'      (1, 0) (1, 6)
3067db96d56Sopenharmony_ci    OP         '+'           (1, 7) (1, 8)
3077db96d56Sopenharmony_ci    STRING     'U"abc"'      (1, 9) (1, 15)
3087db96d56Sopenharmony_ci    """)
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_ci        self.check_tokenize("b'abc' + B'abc'", """\
3117db96d56Sopenharmony_ci    STRING     "b'abc'"      (1, 0) (1, 6)
3127db96d56Sopenharmony_ci    OP         '+'           (1, 7) (1, 8)
3137db96d56Sopenharmony_ci    STRING     "B'abc'"      (1, 9) (1, 15)
3147db96d56Sopenharmony_ci    """)
3157db96d56Sopenharmony_ci        self.check_tokenize('b"abc" + B"abc"', """\
3167db96d56Sopenharmony_ci    STRING     'b"abc"'      (1, 0) (1, 6)
3177db96d56Sopenharmony_ci    OP         '+'           (1, 7) (1, 8)
3187db96d56Sopenharmony_ci    STRING     'B"abc"'      (1, 9) (1, 15)
3197db96d56Sopenharmony_ci    """)
3207db96d56Sopenharmony_ci        self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
3217db96d56Sopenharmony_ci    STRING     "br'abc'"     (1, 0) (1, 7)
3227db96d56Sopenharmony_ci    OP         '+'           (1, 8) (1, 9)
3237db96d56Sopenharmony_ci    STRING     "bR'abc'"     (1, 10) (1, 17)
3247db96d56Sopenharmony_ci    OP         '+'           (1, 18) (1, 19)
3257db96d56Sopenharmony_ci    STRING     "Br'abc'"     (1, 20) (1, 27)
3267db96d56Sopenharmony_ci    OP         '+'           (1, 28) (1, 29)
3277db96d56Sopenharmony_ci    STRING     "BR'abc'"     (1, 30) (1, 37)
3287db96d56Sopenharmony_ci    """)
3297db96d56Sopenharmony_ci        self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
3307db96d56Sopenharmony_ci    STRING     'br"abc"'     (1, 0) (1, 7)
3317db96d56Sopenharmony_ci    OP         '+'           (1, 8) (1, 9)
3327db96d56Sopenharmony_ci    STRING     'bR"abc"'     (1, 10) (1, 17)
3337db96d56Sopenharmony_ci    OP         '+'           (1, 18) (1, 19)
3347db96d56Sopenharmony_ci    STRING     'Br"abc"'     (1, 20) (1, 27)
3357db96d56Sopenharmony_ci    OP         '+'           (1, 28) (1, 29)
3367db96d56Sopenharmony_ci    STRING     'BR"abc"'     (1, 30) (1, 37)
3377db96d56Sopenharmony_ci    """)
3387db96d56Sopenharmony_ci        self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
3397db96d56Sopenharmony_ci    STRING     "rb'abc'"     (1, 0) (1, 7)
3407db96d56Sopenharmony_ci    OP         '+'           (1, 8) (1, 9)
3417db96d56Sopenharmony_ci    STRING     "rB'abc'"     (1, 10) (1, 17)
3427db96d56Sopenharmony_ci    OP         '+'           (1, 18) (1, 19)
3437db96d56Sopenharmony_ci    STRING     "Rb'abc'"     (1, 20) (1, 27)
3447db96d56Sopenharmony_ci    OP         '+'           (1, 28) (1, 29)
3457db96d56Sopenharmony_ci    STRING     "RB'abc'"     (1, 30) (1, 37)
3467db96d56Sopenharmony_ci    """)
3477db96d56Sopenharmony_ci        self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
3487db96d56Sopenharmony_ci    STRING     'rb"abc"'     (1, 0) (1, 7)
3497db96d56Sopenharmony_ci    OP         '+'           (1, 8) (1, 9)
3507db96d56Sopenharmony_ci    STRING     'rB"abc"'     (1, 10) (1, 17)
3517db96d56Sopenharmony_ci    OP         '+'           (1, 18) (1, 19)
3527db96d56Sopenharmony_ci    STRING     'Rb"abc"'     (1, 20) (1, 27)
3537db96d56Sopenharmony_ci    OP         '+'           (1, 28) (1, 29)
3547db96d56Sopenharmony_ci    STRING     'RB"abc"'     (1, 30) (1, 37)
3557db96d56Sopenharmony_ci    """)
3567db96d56Sopenharmony_ci        # Check 0, 1, and 2 character string prefixes.
3577db96d56Sopenharmony_ci        self.check_tokenize(r'"a\
3587db96d56Sopenharmony_cide\
3597db96d56Sopenharmony_cifg"', """\
3607db96d56Sopenharmony_ci    STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
3617db96d56Sopenharmony_ci    """)
3627db96d56Sopenharmony_ci        self.check_tokenize(r'u"a\
3637db96d56Sopenharmony_cide"', """\
3647db96d56Sopenharmony_ci    STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
3657db96d56Sopenharmony_ci    """)
3667db96d56Sopenharmony_ci        self.check_tokenize(r'rb"a\
3677db96d56Sopenharmony_cid"', """\
3687db96d56Sopenharmony_ci    STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
3697db96d56Sopenharmony_ci    """)
3707db96d56Sopenharmony_ci        self.check_tokenize(r'"""a\
3717db96d56Sopenharmony_cib"""', """\
3727db96d56Sopenharmony_ci    STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
3737db96d56Sopenharmony_ci    """)
3747db96d56Sopenharmony_ci        self.check_tokenize(r'u"""a\
3757db96d56Sopenharmony_cib"""', """\
3767db96d56Sopenharmony_ci    STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
3777db96d56Sopenharmony_ci    """)
3787db96d56Sopenharmony_ci        self.check_tokenize(r'rb"""a\
3797db96d56Sopenharmony_cib\
3807db96d56Sopenharmony_cic"""', """\
3817db96d56Sopenharmony_ci    STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
3827db96d56Sopenharmony_ci    """)
3837db96d56Sopenharmony_ci        self.check_tokenize('f"abc"', """\
3847db96d56Sopenharmony_ci    STRING     'f"abc"'      (1, 0) (1, 6)
3857db96d56Sopenharmony_ci    """)
3867db96d56Sopenharmony_ci        self.check_tokenize('fR"a{b}c"', """\
3877db96d56Sopenharmony_ci    STRING     'fR"a{b}c"'   (1, 0) (1, 9)
3887db96d56Sopenharmony_ci    """)
3897db96d56Sopenharmony_ci        self.check_tokenize('f"""abc"""', """\
3907db96d56Sopenharmony_ci    STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
3917db96d56Sopenharmony_ci    """)
3927db96d56Sopenharmony_ci        self.check_tokenize(r'f"abc\
3937db96d56Sopenharmony_cidef"', """\
3947db96d56Sopenharmony_ci    STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
3957db96d56Sopenharmony_ci    """)
3967db96d56Sopenharmony_ci        self.check_tokenize(r'Rf"abc\
3977db96d56Sopenharmony_cidef"', """\
3987db96d56Sopenharmony_ci    STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
3997db96d56Sopenharmony_ci    """)
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci    def test_function(self):
4027db96d56Sopenharmony_ci        self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
4037db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
4047db96d56Sopenharmony_ci    NAME       'd22'         (1, 4) (1, 7)
4057db96d56Sopenharmony_ci    OP         '('           (1, 7) (1, 8)
4067db96d56Sopenharmony_ci    NAME       'a'           (1, 8) (1, 9)
4077db96d56Sopenharmony_ci    OP         ','           (1, 9) (1, 10)
4087db96d56Sopenharmony_ci    NAME       'b'           (1, 11) (1, 12)
4097db96d56Sopenharmony_ci    OP         ','           (1, 12) (1, 13)
4107db96d56Sopenharmony_ci    NAME       'c'           (1, 14) (1, 15)
4117db96d56Sopenharmony_ci    OP         '='           (1, 15) (1, 16)
4127db96d56Sopenharmony_ci    NUMBER     '2'           (1, 16) (1, 17)
4137db96d56Sopenharmony_ci    OP         ','           (1, 17) (1, 18)
4147db96d56Sopenharmony_ci    NAME       'd'           (1, 19) (1, 20)
4157db96d56Sopenharmony_ci    OP         '='           (1, 20) (1, 21)
4167db96d56Sopenharmony_ci    NUMBER     '2'           (1, 21) (1, 22)
4177db96d56Sopenharmony_ci    OP         ','           (1, 22) (1, 23)
4187db96d56Sopenharmony_ci    OP         '*'           (1, 24) (1, 25)
4197db96d56Sopenharmony_ci    NAME       'k'           (1, 25) (1, 26)
4207db96d56Sopenharmony_ci    OP         ')'           (1, 26) (1, 27)
4217db96d56Sopenharmony_ci    OP         ':'           (1, 27) (1, 28)
4227db96d56Sopenharmony_ci    NAME       'pass'        (1, 29) (1, 33)
4237db96d56Sopenharmony_ci    """)
4247db96d56Sopenharmony_ci        self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
4257db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
4267db96d56Sopenharmony_ci    NAME       'd01v_'       (1, 4) (1, 9)
4277db96d56Sopenharmony_ci    OP         '('           (1, 9) (1, 10)
4287db96d56Sopenharmony_ci    NAME       'a'           (1, 10) (1, 11)
4297db96d56Sopenharmony_ci    OP         '='           (1, 11) (1, 12)
4307db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
4317db96d56Sopenharmony_ci    OP         ','           (1, 13) (1, 14)
4327db96d56Sopenharmony_ci    OP         '*'           (1, 15) (1, 16)
4337db96d56Sopenharmony_ci    NAME       'k'           (1, 16) (1, 17)
4347db96d56Sopenharmony_ci    OP         ','           (1, 17) (1, 18)
4357db96d56Sopenharmony_ci    OP         '**'          (1, 19) (1, 21)
4367db96d56Sopenharmony_ci    NAME       'w'           (1, 21) (1, 22)
4377db96d56Sopenharmony_ci    OP         ')'           (1, 22) (1, 23)
4387db96d56Sopenharmony_ci    OP         ':'           (1, 23) (1, 24)
4397db96d56Sopenharmony_ci    NAME       'pass'        (1, 25) (1, 29)
4407db96d56Sopenharmony_ci    """)
4417db96d56Sopenharmony_ci        self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
4427db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
4437db96d56Sopenharmony_ci    NAME       'd23'         (1, 4) (1, 7)
4447db96d56Sopenharmony_ci    OP         '('           (1, 7) (1, 8)
4457db96d56Sopenharmony_ci    NAME       'a'           (1, 8) (1, 9)
4467db96d56Sopenharmony_ci    OP         ':'           (1, 9) (1, 10)
4477db96d56Sopenharmony_ci    NAME       'str'         (1, 11) (1, 14)
4487db96d56Sopenharmony_ci    OP         ','           (1, 14) (1, 15)
4497db96d56Sopenharmony_ci    NAME       'b'           (1, 16) (1, 17)
4507db96d56Sopenharmony_ci    OP         ':'           (1, 17) (1, 18)
4517db96d56Sopenharmony_ci    NAME       'int'         (1, 19) (1, 22)
4527db96d56Sopenharmony_ci    OP         '='           (1, 22) (1, 23)
4537db96d56Sopenharmony_ci    NUMBER     '3'           (1, 23) (1, 24)
4547db96d56Sopenharmony_ci    OP         ')'           (1, 24) (1, 25)
4557db96d56Sopenharmony_ci    OP         '->'          (1, 26) (1, 28)
4567db96d56Sopenharmony_ci    NAME       'int'         (1, 29) (1, 32)
4577db96d56Sopenharmony_ci    OP         ':'           (1, 32) (1, 33)
4587db96d56Sopenharmony_ci    NAME       'pass'        (1, 34) (1, 38)
4597db96d56Sopenharmony_ci    """)
4607db96d56Sopenharmony_ci
4617db96d56Sopenharmony_ci    def test_comparison(self):
4627db96d56Sopenharmony_ci        # Comparison
4637db96d56Sopenharmony_ci        self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
4647db96d56Sopenharmony_ci                            "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
4657db96d56Sopenharmony_ci    NAME       'if'          (1, 0) (1, 2)
4667db96d56Sopenharmony_ci    NUMBER     '1'           (1, 3) (1, 4)
4677db96d56Sopenharmony_ci    OP         '<'           (1, 5) (1, 6)
4687db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
4697db96d56Sopenharmony_ci    OP         '>'           (1, 9) (1, 10)
4707db96d56Sopenharmony_ci    NUMBER     '1'           (1, 11) (1, 12)
4717db96d56Sopenharmony_ci    OP         '=='          (1, 13) (1, 15)
4727db96d56Sopenharmony_ci    NUMBER     '1'           (1, 16) (1, 17)
4737db96d56Sopenharmony_ci    OP         '>='          (1, 18) (1, 20)
4747db96d56Sopenharmony_ci    NUMBER     '5'           (1, 21) (1, 22)
4757db96d56Sopenharmony_ci    OP         '<='          (1, 23) (1, 25)
4767db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 26) (1, 30)
4777db96d56Sopenharmony_ci    OP         '<='          (1, 31) (1, 33)
4787db96d56Sopenharmony_ci    NUMBER     '0x12'        (1, 34) (1, 38)
4797db96d56Sopenharmony_ci    OP         '!='          (1, 39) (1, 41)
4807db96d56Sopenharmony_ci    NUMBER     '1'           (1, 42) (1, 43)
4817db96d56Sopenharmony_ci    NAME       'and'         (1, 44) (1, 47)
4827db96d56Sopenharmony_ci    NUMBER     '5'           (1, 48) (1, 49)
4837db96d56Sopenharmony_ci    NAME       'in'          (1, 50) (1, 52)
4847db96d56Sopenharmony_ci    NUMBER     '1'           (1, 53) (1, 54)
4857db96d56Sopenharmony_ci    NAME       'not'         (1, 55) (1, 58)
4867db96d56Sopenharmony_ci    NAME       'in'          (1, 59) (1, 61)
4877db96d56Sopenharmony_ci    NUMBER     '1'           (1, 62) (1, 63)
4887db96d56Sopenharmony_ci    NAME       'is'          (1, 64) (1, 66)
4897db96d56Sopenharmony_ci    NUMBER     '1'           (1, 67) (1, 68)
4907db96d56Sopenharmony_ci    NAME       'or'          (1, 69) (1, 71)
4917db96d56Sopenharmony_ci    NUMBER     '5'           (1, 72) (1, 73)
4927db96d56Sopenharmony_ci    NAME       'is'          (1, 74) (1, 76)
4937db96d56Sopenharmony_ci    NAME       'not'         (1, 77) (1, 80)
4947db96d56Sopenharmony_ci    NUMBER     '1'           (1, 81) (1, 82)
4957db96d56Sopenharmony_ci    OP         ':'           (1, 82) (1, 83)
4967db96d56Sopenharmony_ci    NAME       'pass'        (1, 84) (1, 88)
4977db96d56Sopenharmony_ci    """)
4987db96d56Sopenharmony_ci
4997db96d56Sopenharmony_ci    def test_shift(self):
5007db96d56Sopenharmony_ci        # Shift
5017db96d56Sopenharmony_ci        self.check_tokenize("x = 1 << 1 >> 5", """\
5027db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
5037db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
5047db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
5057db96d56Sopenharmony_ci    OP         '<<'          (1, 6) (1, 8)
5067db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
5077db96d56Sopenharmony_ci    OP         '>>'          (1, 11) (1, 13)
5087db96d56Sopenharmony_ci    NUMBER     '5'           (1, 14) (1, 15)
5097db96d56Sopenharmony_ci    """)
5107db96d56Sopenharmony_ci
5117db96d56Sopenharmony_ci    def test_additive(self):
5127db96d56Sopenharmony_ci        # Additive
5137db96d56Sopenharmony_ci        self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
5147db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
5157db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
5167db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
5177db96d56Sopenharmony_ci    OP         '-'           (1, 6) (1, 7)
5187db96d56Sopenharmony_ci    NAME       'y'           (1, 8) (1, 9)
5197db96d56Sopenharmony_ci    OP         '+'           (1, 10) (1, 11)
5207db96d56Sopenharmony_ci    NUMBER     '15'          (1, 12) (1, 14)
5217db96d56Sopenharmony_ci    OP         '-'           (1, 15) (1, 16)
5227db96d56Sopenharmony_ci    NUMBER     '1'           (1, 17) (1, 18)
5237db96d56Sopenharmony_ci    OP         '+'           (1, 19) (1, 20)
5247db96d56Sopenharmony_ci    NUMBER     '0x124'       (1, 21) (1, 26)
5257db96d56Sopenharmony_ci    OP         '+'           (1, 27) (1, 28)
5267db96d56Sopenharmony_ci    NAME       'z'           (1, 29) (1, 30)
5277db96d56Sopenharmony_ci    OP         '+'           (1, 31) (1, 32)
5287db96d56Sopenharmony_ci    NAME       'a'           (1, 33) (1, 34)
5297db96d56Sopenharmony_ci    OP         '['           (1, 34) (1, 35)
5307db96d56Sopenharmony_ci    NUMBER     '5'           (1, 35) (1, 36)
5317db96d56Sopenharmony_ci    OP         ']'           (1, 36) (1, 37)
5327db96d56Sopenharmony_ci    """)
5337db96d56Sopenharmony_ci
5347db96d56Sopenharmony_ci    def test_multiplicative(self):
5357db96d56Sopenharmony_ci        # Multiplicative
5367db96d56Sopenharmony_ci        self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
5377db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
5387db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
5397db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
5407db96d56Sopenharmony_ci    OP         '//'          (1, 5) (1, 7)
5417db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
5427db96d56Sopenharmony_ci    OP         '*'           (1, 8) (1, 9)
5437db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
5447db96d56Sopenharmony_ci    OP         '/'           (1, 10) (1, 11)
5457db96d56Sopenharmony_ci    NUMBER     '5'           (1, 11) (1, 12)
5467db96d56Sopenharmony_ci    OP         '*'           (1, 12) (1, 13)
5477db96d56Sopenharmony_ci    NUMBER     '12'          (1, 13) (1, 15)
5487db96d56Sopenharmony_ci    OP         '%'           (1, 15) (1, 16)
5497db96d56Sopenharmony_ci    NUMBER     '0x12'        (1, 16) (1, 20)
5507db96d56Sopenharmony_ci    OP         '@'           (1, 20) (1, 21)
5517db96d56Sopenharmony_ci    NUMBER     '42'          (1, 21) (1, 23)
5527db96d56Sopenharmony_ci    """)
5537db96d56Sopenharmony_ci
5547db96d56Sopenharmony_ci    def test_unary(self):
5557db96d56Sopenharmony_ci        # Unary
5567db96d56Sopenharmony_ci        self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
5577db96d56Sopenharmony_ci    OP         '~'           (1, 0) (1, 1)
5587db96d56Sopenharmony_ci    NUMBER     '1'           (1, 1) (1, 2)
5597db96d56Sopenharmony_ci    OP         '^'           (1, 3) (1, 4)
5607db96d56Sopenharmony_ci    NUMBER     '1'           (1, 5) (1, 6)
5617db96d56Sopenharmony_ci    OP         '&'           (1, 7) (1, 8)
5627db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
5637db96d56Sopenharmony_ci    OP         '|'           (1, 11) (1, 12)
5647db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
5657db96d56Sopenharmony_ci    OP         '^'           (1, 14) (1, 15)
5667db96d56Sopenharmony_ci    OP         '-'           (1, 16) (1, 17)
5677db96d56Sopenharmony_ci    NUMBER     '1'           (1, 17) (1, 18)
5687db96d56Sopenharmony_ci    """)
5697db96d56Sopenharmony_ci        self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
5707db96d56Sopenharmony_ci    OP         '-'           (1, 0) (1, 1)
5717db96d56Sopenharmony_ci    NUMBER     '1'           (1, 1) (1, 2)
5727db96d56Sopenharmony_ci    OP         '*'           (1, 2) (1, 3)
5737db96d56Sopenharmony_ci    NUMBER     '1'           (1, 3) (1, 4)
5747db96d56Sopenharmony_ci    OP         '/'           (1, 4) (1, 5)
5757db96d56Sopenharmony_ci    NUMBER     '1'           (1, 5) (1, 6)
5767db96d56Sopenharmony_ci    OP         '+'           (1, 6) (1, 7)
5777db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
5787db96d56Sopenharmony_ci    OP         '*'           (1, 8) (1, 9)
5797db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
5807db96d56Sopenharmony_ci    OP         '//'          (1, 10) (1, 12)
5817db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
5827db96d56Sopenharmony_ci    OP         '-'           (1, 14) (1, 15)
5837db96d56Sopenharmony_ci    OP         '-'           (1, 16) (1, 17)
5847db96d56Sopenharmony_ci    OP         '-'           (1, 17) (1, 18)
5857db96d56Sopenharmony_ci    OP         '-'           (1, 18) (1, 19)
5867db96d56Sopenharmony_ci    NUMBER     '1'           (1, 19) (1, 20)
5877db96d56Sopenharmony_ci    OP         '**'          (1, 20) (1, 22)
5887db96d56Sopenharmony_ci    NUMBER     '1'           (1, 22) (1, 23)
5897db96d56Sopenharmony_ci    """)
5907db96d56Sopenharmony_ci
5917db96d56Sopenharmony_ci    def test_selector(self):
5927db96d56Sopenharmony_ci        # Selector
5937db96d56Sopenharmony_ci        self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
5947db96d56Sopenharmony_ci    NAME       'import'      (1, 0) (1, 6)
5957db96d56Sopenharmony_ci    NAME       'sys'         (1, 7) (1, 10)
5967db96d56Sopenharmony_ci    OP         ','           (1, 10) (1, 11)
5977db96d56Sopenharmony_ci    NAME       'time'        (1, 12) (1, 16)
5987db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 16) (1, 17)
5997db96d56Sopenharmony_ci    NAME       'x'           (2, 0) (2, 1)
6007db96d56Sopenharmony_ci    OP         '='           (2, 2) (2, 3)
6017db96d56Sopenharmony_ci    NAME       'sys'         (2, 4) (2, 7)
6027db96d56Sopenharmony_ci    OP         '.'           (2, 7) (2, 8)
6037db96d56Sopenharmony_ci    NAME       'modules'     (2, 8) (2, 15)
6047db96d56Sopenharmony_ci    OP         '['           (2, 15) (2, 16)
6057db96d56Sopenharmony_ci    STRING     "'time'"      (2, 16) (2, 22)
6067db96d56Sopenharmony_ci    OP         ']'           (2, 22) (2, 23)
6077db96d56Sopenharmony_ci    OP         '.'           (2, 23) (2, 24)
6087db96d56Sopenharmony_ci    NAME       'time'        (2, 24) (2, 28)
6097db96d56Sopenharmony_ci    OP         '('           (2, 28) (2, 29)
6107db96d56Sopenharmony_ci    OP         ')'           (2, 29) (2, 30)
6117db96d56Sopenharmony_ci    """)
6127db96d56Sopenharmony_ci
6137db96d56Sopenharmony_ci    def test_method(self):
6147db96d56Sopenharmony_ci        # Methods
6157db96d56Sopenharmony_ci        self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
6167db96d56Sopenharmony_ci    OP         '@'           (1, 0) (1, 1)
6177db96d56Sopenharmony_ci    NAME       'staticmethod' (1, 1) (1, 13)
6187db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 13) (1, 14)
6197db96d56Sopenharmony_ci    NAME       'def'         (2, 0) (2, 3)
6207db96d56Sopenharmony_ci    NAME       'foo'         (2, 4) (2, 7)
6217db96d56Sopenharmony_ci    OP         '('           (2, 7) (2, 8)
6227db96d56Sopenharmony_ci    NAME       'x'           (2, 8) (2, 9)
6237db96d56Sopenharmony_ci    OP         ','           (2, 9) (2, 10)
6247db96d56Sopenharmony_ci    NAME       'y'           (2, 10) (2, 11)
6257db96d56Sopenharmony_ci    OP         ')'           (2, 11) (2, 12)
6267db96d56Sopenharmony_ci    OP         ':'           (2, 12) (2, 13)
6277db96d56Sopenharmony_ci    NAME       'pass'        (2, 14) (2, 18)
6287db96d56Sopenharmony_ci    """)
6297db96d56Sopenharmony_ci
6307db96d56Sopenharmony_ci    def test_tabs(self):
6317db96d56Sopenharmony_ci        # Evil tabs
6327db96d56Sopenharmony_ci        self.check_tokenize("def f():\n"
6337db96d56Sopenharmony_ci                            "\tif x\n"
6347db96d56Sopenharmony_ci                            "        \tpass", """\
6357db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
6367db96d56Sopenharmony_ci    NAME       'f'           (1, 4) (1, 5)
6377db96d56Sopenharmony_ci    OP         '('           (1, 5) (1, 6)
6387db96d56Sopenharmony_ci    OP         ')'           (1, 6) (1, 7)
6397db96d56Sopenharmony_ci    OP         ':'           (1, 7) (1, 8)
6407db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 8) (1, 9)
6417db96d56Sopenharmony_ci    INDENT     '\\t'          (2, 0) (2, 1)
6427db96d56Sopenharmony_ci    NAME       'if'          (2, 1) (2, 3)
6437db96d56Sopenharmony_ci    NAME       'x'           (2, 4) (2, 5)
6447db96d56Sopenharmony_ci    NEWLINE    '\\n'          (2, 5) (2, 6)
6457db96d56Sopenharmony_ci    INDENT     '        \\t'  (3, 0) (3, 9)
6467db96d56Sopenharmony_ci    NAME       'pass'        (3, 9) (3, 13)
6477db96d56Sopenharmony_ci    DEDENT     ''            (4, 0) (4, 0)
6487db96d56Sopenharmony_ci    DEDENT     ''            (4, 0) (4, 0)
6497db96d56Sopenharmony_ci    """)
6507db96d56Sopenharmony_ci
6517db96d56Sopenharmony_ci    def test_non_ascii_identifiers(self):
6527db96d56Sopenharmony_ci        # Non-ascii identifiers
6537db96d56Sopenharmony_ci        self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
6547db96d56Sopenharmony_ci    NAME       'Örter'       (1, 0) (1, 5)
6557db96d56Sopenharmony_ci    OP         '='           (1, 6) (1, 7)
6567db96d56Sopenharmony_ci    STRING     "'places'"    (1, 8) (1, 16)
6577db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 16) (1, 17)
6587db96d56Sopenharmony_ci    NAME       'grün'        (2, 0) (2, 4)
6597db96d56Sopenharmony_ci    OP         '='           (2, 5) (2, 6)
6607db96d56Sopenharmony_ci    STRING     "'green'"     (2, 7) (2, 14)
6617db96d56Sopenharmony_ci    """)
6627db96d56Sopenharmony_ci
6637db96d56Sopenharmony_ci    def test_unicode(self):
6647db96d56Sopenharmony_ci        # Legacy unicode literals:
6657db96d56Sopenharmony_ci        self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
6667db96d56Sopenharmony_ci    NAME       'Örter'       (1, 0) (1, 5)
6677db96d56Sopenharmony_ci    OP         '='           (1, 6) (1, 7)
6687db96d56Sopenharmony_ci    STRING     "u'places'"   (1, 8) (1, 17)
6697db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 17) (1, 18)
6707db96d56Sopenharmony_ci    NAME       'grün'        (2, 0) (2, 4)
6717db96d56Sopenharmony_ci    OP         '='           (2, 5) (2, 6)
6727db96d56Sopenharmony_ci    STRING     "U'green'"    (2, 7) (2, 15)
6737db96d56Sopenharmony_ci    """)
6747db96d56Sopenharmony_ci
6757db96d56Sopenharmony_ci    def test_async(self):
6767db96d56Sopenharmony_ci        # Async/await extension:
6777db96d56Sopenharmony_ci        self.check_tokenize("async = 1", """\
6787db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
6797db96d56Sopenharmony_ci    OP         '='           (1, 6) (1, 7)
6807db96d56Sopenharmony_ci    NUMBER     '1'           (1, 8) (1, 9)
6817db96d56Sopenharmony_ci    """)
6827db96d56Sopenharmony_ci
6837db96d56Sopenharmony_ci        self.check_tokenize("a = (async = 1)", """\
6847db96d56Sopenharmony_ci    NAME       'a'           (1, 0) (1, 1)
6857db96d56Sopenharmony_ci    OP         '='           (1, 2) (1, 3)
6867db96d56Sopenharmony_ci    OP         '('           (1, 4) (1, 5)
6877db96d56Sopenharmony_ci    NAME       'async'       (1, 5) (1, 10)
6887db96d56Sopenharmony_ci    OP         '='           (1, 11) (1, 12)
6897db96d56Sopenharmony_ci    NUMBER     '1'           (1, 13) (1, 14)
6907db96d56Sopenharmony_ci    OP         ')'           (1, 14) (1, 15)
6917db96d56Sopenharmony_ci    """)
6927db96d56Sopenharmony_ci
6937db96d56Sopenharmony_ci        self.check_tokenize("async()", """\
6947db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
6957db96d56Sopenharmony_ci    OP         '('           (1, 5) (1, 6)
6967db96d56Sopenharmony_ci    OP         ')'           (1, 6) (1, 7)
6977db96d56Sopenharmony_ci    """)
6987db96d56Sopenharmony_ci
6997db96d56Sopenharmony_ci        self.check_tokenize("class async(Bar):pass", """\
7007db96d56Sopenharmony_ci    NAME       'class'       (1, 0) (1, 5)
7017db96d56Sopenharmony_ci    NAME       'async'       (1, 6) (1, 11)
7027db96d56Sopenharmony_ci    OP         '('           (1, 11) (1, 12)
7037db96d56Sopenharmony_ci    NAME       'Bar'         (1, 12) (1, 15)
7047db96d56Sopenharmony_ci    OP         ')'           (1, 15) (1, 16)
7057db96d56Sopenharmony_ci    OP         ':'           (1, 16) (1, 17)
7067db96d56Sopenharmony_ci    NAME       'pass'        (1, 17) (1, 21)
7077db96d56Sopenharmony_ci    """)
7087db96d56Sopenharmony_ci
7097db96d56Sopenharmony_ci        self.check_tokenize("class async:pass", """\
7107db96d56Sopenharmony_ci    NAME       'class'       (1, 0) (1, 5)
7117db96d56Sopenharmony_ci    NAME       'async'       (1, 6) (1, 11)
7127db96d56Sopenharmony_ci    OP         ':'           (1, 11) (1, 12)
7137db96d56Sopenharmony_ci    NAME       'pass'        (1, 12) (1, 16)
7147db96d56Sopenharmony_ci    """)
7157db96d56Sopenharmony_ci
7167db96d56Sopenharmony_ci        self.check_tokenize("await = 1", """\
7177db96d56Sopenharmony_ci    NAME       'await'       (1, 0) (1, 5)
7187db96d56Sopenharmony_ci    OP         '='           (1, 6) (1, 7)
7197db96d56Sopenharmony_ci    NUMBER     '1'           (1, 8) (1, 9)
7207db96d56Sopenharmony_ci    """)
7217db96d56Sopenharmony_ci
7227db96d56Sopenharmony_ci        self.check_tokenize("foo.async", """\
7237db96d56Sopenharmony_ci    NAME       'foo'         (1, 0) (1, 3)
7247db96d56Sopenharmony_ci    OP         '.'           (1, 3) (1, 4)
7257db96d56Sopenharmony_ci    NAME       'async'       (1, 4) (1, 9)
7267db96d56Sopenharmony_ci    """)
7277db96d56Sopenharmony_ci
7287db96d56Sopenharmony_ci        self.check_tokenize("async for a in b: pass", """\
7297db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7307db96d56Sopenharmony_ci    NAME       'for'         (1, 6) (1, 9)
7317db96d56Sopenharmony_ci    NAME       'a'           (1, 10) (1, 11)
7327db96d56Sopenharmony_ci    NAME       'in'          (1, 12) (1, 14)
7337db96d56Sopenharmony_ci    NAME       'b'           (1, 15) (1, 16)
7347db96d56Sopenharmony_ci    OP         ':'           (1, 16) (1, 17)
7357db96d56Sopenharmony_ci    NAME       'pass'        (1, 18) (1, 22)
7367db96d56Sopenharmony_ci    """)
7377db96d56Sopenharmony_ci
7387db96d56Sopenharmony_ci        self.check_tokenize("async with a as b: pass", """\
7397db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7407db96d56Sopenharmony_ci    NAME       'with'        (1, 6) (1, 10)
7417db96d56Sopenharmony_ci    NAME       'a'           (1, 11) (1, 12)
7427db96d56Sopenharmony_ci    NAME       'as'          (1, 13) (1, 15)
7437db96d56Sopenharmony_ci    NAME       'b'           (1, 16) (1, 17)
7447db96d56Sopenharmony_ci    OP         ':'           (1, 17) (1, 18)
7457db96d56Sopenharmony_ci    NAME       'pass'        (1, 19) (1, 23)
7467db96d56Sopenharmony_ci    """)
7477db96d56Sopenharmony_ci
7487db96d56Sopenharmony_ci        self.check_tokenize("async.foo", """\
7497db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7507db96d56Sopenharmony_ci    OP         '.'           (1, 5) (1, 6)
7517db96d56Sopenharmony_ci    NAME       'foo'         (1, 6) (1, 9)
7527db96d56Sopenharmony_ci    """)
7537db96d56Sopenharmony_ci
7547db96d56Sopenharmony_ci        self.check_tokenize("async", """\
7557db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7567db96d56Sopenharmony_ci    """)
7577db96d56Sopenharmony_ci
7587db96d56Sopenharmony_ci        self.check_tokenize("async\n#comment\nawait", """\
7597db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7607db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 5) (1, 6)
7617db96d56Sopenharmony_ci    COMMENT    '#comment'    (2, 0) (2, 8)
7627db96d56Sopenharmony_ci    NL         '\\n'          (2, 8) (2, 9)
7637db96d56Sopenharmony_ci    NAME       'await'       (3, 0) (3, 5)
7647db96d56Sopenharmony_ci    """)
7657db96d56Sopenharmony_ci
7667db96d56Sopenharmony_ci        self.check_tokenize("async\n...\nawait", """\
7677db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7687db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 5) (1, 6)
7697db96d56Sopenharmony_ci    OP         '...'         (2, 0) (2, 3)
7707db96d56Sopenharmony_ci    NEWLINE    '\\n'          (2, 3) (2, 4)
7717db96d56Sopenharmony_ci    NAME       'await'       (3, 0) (3, 5)
7727db96d56Sopenharmony_ci    """)
7737db96d56Sopenharmony_ci
7747db96d56Sopenharmony_ci        self.check_tokenize("async\nawait", """\
7757db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7767db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 5) (1, 6)
7777db96d56Sopenharmony_ci    NAME       'await'       (2, 0) (2, 5)
7787db96d56Sopenharmony_ci    """)
7797db96d56Sopenharmony_ci
7807db96d56Sopenharmony_ci        self.check_tokenize("foo.async + 1", """\
7817db96d56Sopenharmony_ci    NAME       'foo'         (1, 0) (1, 3)
7827db96d56Sopenharmony_ci    OP         '.'           (1, 3) (1, 4)
7837db96d56Sopenharmony_ci    NAME       'async'       (1, 4) (1, 9)
7847db96d56Sopenharmony_ci    OP         '+'           (1, 10) (1, 11)
7857db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
7867db96d56Sopenharmony_ci    """)
7877db96d56Sopenharmony_ci
7887db96d56Sopenharmony_ci        self.check_tokenize("async def foo(): pass", """\
7897db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
7907db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
7917db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
7927db96d56Sopenharmony_ci    OP         '('           (1, 13) (1, 14)
7937db96d56Sopenharmony_ci    OP         ')'           (1, 14) (1, 15)
7947db96d56Sopenharmony_ci    OP         ':'           (1, 15) (1, 16)
7957db96d56Sopenharmony_ci    NAME       'pass'        (1, 17) (1, 21)
7967db96d56Sopenharmony_ci    """)
7977db96d56Sopenharmony_ci
7987db96d56Sopenharmony_ci        self.check_tokenize('''\
7997db96d56Sopenharmony_ciasync def foo():
8007db96d56Sopenharmony_ci  def foo(await):
8017db96d56Sopenharmony_ci    await = 1
8027db96d56Sopenharmony_ci  if 1:
8037db96d56Sopenharmony_ci    await
8047db96d56Sopenharmony_ciasync += 1
8057db96d56Sopenharmony_ci''', """\
8067db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
8077db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
8087db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
8097db96d56Sopenharmony_ci    OP         '('           (1, 13) (1, 14)
8107db96d56Sopenharmony_ci    OP         ')'           (1, 14) (1, 15)
8117db96d56Sopenharmony_ci    OP         ':'           (1, 15) (1, 16)
8127db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 16) (1, 17)
8137db96d56Sopenharmony_ci    INDENT     '  '          (2, 0) (2, 2)
8147db96d56Sopenharmony_ci    NAME       'def'         (2, 2) (2, 5)
8157db96d56Sopenharmony_ci    NAME       'foo'         (2, 6) (2, 9)
8167db96d56Sopenharmony_ci    OP         '('           (2, 9) (2, 10)
8177db96d56Sopenharmony_ci    NAME       'await'       (2, 10) (2, 15)
8187db96d56Sopenharmony_ci    OP         ')'           (2, 15) (2, 16)
8197db96d56Sopenharmony_ci    OP         ':'           (2, 16) (2, 17)
8207db96d56Sopenharmony_ci    NEWLINE    '\\n'          (2, 17) (2, 18)
8217db96d56Sopenharmony_ci    INDENT     '    '        (3, 0) (3, 4)
8227db96d56Sopenharmony_ci    NAME       'await'       (3, 4) (3, 9)
8237db96d56Sopenharmony_ci    OP         '='           (3, 10) (3, 11)
8247db96d56Sopenharmony_ci    NUMBER     '1'           (3, 12) (3, 13)
8257db96d56Sopenharmony_ci    NEWLINE    '\\n'          (3, 13) (3, 14)
8267db96d56Sopenharmony_ci    DEDENT     ''            (4, 2) (4, 2)
8277db96d56Sopenharmony_ci    NAME       'if'          (4, 2) (4, 4)
8287db96d56Sopenharmony_ci    NUMBER     '1'           (4, 5) (4, 6)
8297db96d56Sopenharmony_ci    OP         ':'           (4, 6) (4, 7)
8307db96d56Sopenharmony_ci    NEWLINE    '\\n'          (4, 7) (4, 8)
8317db96d56Sopenharmony_ci    INDENT     '    '        (5, 0) (5, 4)
8327db96d56Sopenharmony_ci    NAME       'await'       (5, 4) (5, 9)
8337db96d56Sopenharmony_ci    NEWLINE    '\\n'          (5, 9) (5, 10)
8347db96d56Sopenharmony_ci    DEDENT     ''            (6, 0) (6, 0)
8357db96d56Sopenharmony_ci    DEDENT     ''            (6, 0) (6, 0)
8367db96d56Sopenharmony_ci    NAME       'async'       (6, 0) (6, 5)
8377db96d56Sopenharmony_ci    OP         '+='          (6, 6) (6, 8)
8387db96d56Sopenharmony_ci    NUMBER     '1'           (6, 9) (6, 10)
8397db96d56Sopenharmony_ci    NEWLINE    '\\n'          (6, 10) (6, 11)
8407db96d56Sopenharmony_ci    """)
8417db96d56Sopenharmony_ci
8427db96d56Sopenharmony_ci        self.check_tokenize('''\
8437db96d56Sopenharmony_ciasync def foo():
8447db96d56Sopenharmony_ci  async for i in 1: pass''', """\
8457db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
8467db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
8477db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
8487db96d56Sopenharmony_ci    OP         '('           (1, 13) (1, 14)
8497db96d56Sopenharmony_ci    OP         ')'           (1, 14) (1, 15)
8507db96d56Sopenharmony_ci    OP         ':'           (1, 15) (1, 16)
8517db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 16) (1, 17)
8527db96d56Sopenharmony_ci    INDENT     '  '          (2, 0) (2, 2)
8537db96d56Sopenharmony_ci    NAME       'async'       (2, 2) (2, 7)
8547db96d56Sopenharmony_ci    NAME       'for'         (2, 8) (2, 11)
8557db96d56Sopenharmony_ci    NAME       'i'           (2, 12) (2, 13)
8567db96d56Sopenharmony_ci    NAME       'in'          (2, 14) (2, 16)
8577db96d56Sopenharmony_ci    NUMBER     '1'           (2, 17) (2, 18)
8587db96d56Sopenharmony_ci    OP         ':'           (2, 18) (2, 19)
8597db96d56Sopenharmony_ci    NAME       'pass'        (2, 20) (2, 24)
8607db96d56Sopenharmony_ci    DEDENT     ''            (3, 0) (3, 0)
8617db96d56Sopenharmony_ci    """)
8627db96d56Sopenharmony_ci
8637db96d56Sopenharmony_ci        self.check_tokenize('''async def foo(async): await''', """\
8647db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
8657db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
8667db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
8677db96d56Sopenharmony_ci    OP         '('           (1, 13) (1, 14)
8687db96d56Sopenharmony_ci    NAME       'async'       (1, 14) (1, 19)
8697db96d56Sopenharmony_ci    OP         ')'           (1, 19) (1, 20)
8707db96d56Sopenharmony_ci    OP         ':'           (1, 20) (1, 21)
8717db96d56Sopenharmony_ci    NAME       'await'       (1, 22) (1, 27)
8727db96d56Sopenharmony_ci    """)
8737db96d56Sopenharmony_ci
8747db96d56Sopenharmony_ci        self.check_tokenize('''\
8757db96d56Sopenharmony_cidef f():
8767db96d56Sopenharmony_ci
8777db96d56Sopenharmony_ci  def baz(): pass
8787db96d56Sopenharmony_ci  async def bar(): pass
8797db96d56Sopenharmony_ci
8807db96d56Sopenharmony_ci  await = 2''', """\
8817db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
8827db96d56Sopenharmony_ci    NAME       'f'           (1, 4) (1, 5)
8837db96d56Sopenharmony_ci    OP         '('           (1, 5) (1, 6)
8847db96d56Sopenharmony_ci    OP         ')'           (1, 6) (1, 7)
8857db96d56Sopenharmony_ci    OP         ':'           (1, 7) (1, 8)
8867db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 8) (1, 9)
8877db96d56Sopenharmony_ci    NL         '\\n'          (2, 0) (2, 1)
8887db96d56Sopenharmony_ci    INDENT     '  '          (3, 0) (3, 2)
8897db96d56Sopenharmony_ci    NAME       'def'         (3, 2) (3, 5)
8907db96d56Sopenharmony_ci    NAME       'baz'         (3, 6) (3, 9)
8917db96d56Sopenharmony_ci    OP         '('           (3, 9) (3, 10)
8927db96d56Sopenharmony_ci    OP         ')'           (3, 10) (3, 11)
8937db96d56Sopenharmony_ci    OP         ':'           (3, 11) (3, 12)
8947db96d56Sopenharmony_ci    NAME       'pass'        (3, 13) (3, 17)
8957db96d56Sopenharmony_ci    NEWLINE    '\\n'          (3, 17) (3, 18)
8967db96d56Sopenharmony_ci    NAME       'async'       (4, 2) (4, 7)
8977db96d56Sopenharmony_ci    NAME       'def'         (4, 8) (4, 11)
8987db96d56Sopenharmony_ci    NAME       'bar'         (4, 12) (4, 15)
8997db96d56Sopenharmony_ci    OP         '('           (4, 15) (4, 16)
9007db96d56Sopenharmony_ci    OP         ')'           (4, 16) (4, 17)
9017db96d56Sopenharmony_ci    OP         ':'           (4, 17) (4, 18)
9027db96d56Sopenharmony_ci    NAME       'pass'        (4, 19) (4, 23)
9037db96d56Sopenharmony_ci    NEWLINE    '\\n'          (4, 23) (4, 24)
9047db96d56Sopenharmony_ci    NL         '\\n'          (5, 0) (5, 1)
9057db96d56Sopenharmony_ci    NAME       'await'       (6, 2) (6, 7)
9067db96d56Sopenharmony_ci    OP         '='           (6, 8) (6, 9)
9077db96d56Sopenharmony_ci    NUMBER     '2'           (6, 10) (6, 11)
9087db96d56Sopenharmony_ci    DEDENT     ''            (7, 0) (7, 0)
9097db96d56Sopenharmony_ci    """)
9107db96d56Sopenharmony_ci
9117db96d56Sopenharmony_ci        self.check_tokenize('''\
9127db96d56Sopenharmony_ciasync def f():
9137db96d56Sopenharmony_ci
9147db96d56Sopenharmony_ci  def baz(): pass
9157db96d56Sopenharmony_ci  async def bar(): pass
9167db96d56Sopenharmony_ci
9177db96d56Sopenharmony_ci  await = 2''', """\
9187db96d56Sopenharmony_ci    NAME       'async'       (1, 0) (1, 5)
9197db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
9207db96d56Sopenharmony_ci    NAME       'f'           (1, 10) (1, 11)
9217db96d56Sopenharmony_ci    OP         '('           (1, 11) (1, 12)
9227db96d56Sopenharmony_ci    OP         ')'           (1, 12) (1, 13)
9237db96d56Sopenharmony_ci    OP         ':'           (1, 13) (1, 14)
9247db96d56Sopenharmony_ci    NEWLINE    '\\n'          (1, 14) (1, 15)
9257db96d56Sopenharmony_ci    NL         '\\n'          (2, 0) (2, 1)
9267db96d56Sopenharmony_ci    INDENT     '  '          (3, 0) (3, 2)
9277db96d56Sopenharmony_ci    NAME       'def'         (3, 2) (3, 5)
9287db96d56Sopenharmony_ci    NAME       'baz'         (3, 6) (3, 9)
9297db96d56Sopenharmony_ci    OP         '('           (3, 9) (3, 10)
9307db96d56Sopenharmony_ci    OP         ')'           (3, 10) (3, 11)
9317db96d56Sopenharmony_ci    OP         ':'           (3, 11) (3, 12)
9327db96d56Sopenharmony_ci    NAME       'pass'        (3, 13) (3, 17)
9337db96d56Sopenharmony_ci    NEWLINE    '\\n'          (3, 17) (3, 18)
9347db96d56Sopenharmony_ci    NAME       'async'       (4, 2) (4, 7)
9357db96d56Sopenharmony_ci    NAME       'def'         (4, 8) (4, 11)
9367db96d56Sopenharmony_ci    NAME       'bar'         (4, 12) (4, 15)
9377db96d56Sopenharmony_ci    OP         '('           (4, 15) (4, 16)
9387db96d56Sopenharmony_ci    OP         ')'           (4, 16) (4, 17)
9397db96d56Sopenharmony_ci    OP         ':'           (4, 17) (4, 18)
9407db96d56Sopenharmony_ci    NAME       'pass'        (4, 19) (4, 23)
9417db96d56Sopenharmony_ci    NEWLINE    '\\n'          (4, 23) (4, 24)
9427db96d56Sopenharmony_ci    NL         '\\n'          (5, 0) (5, 1)
9437db96d56Sopenharmony_ci    NAME       'await'       (6, 2) (6, 7)
9447db96d56Sopenharmony_ci    OP         '='           (6, 8) (6, 9)
9457db96d56Sopenharmony_ci    NUMBER     '2'           (6, 10) (6, 11)
9467db96d56Sopenharmony_ci    DEDENT     ''            (7, 0) (7, 0)
9477db96d56Sopenharmony_ci    """)
9487db96d56Sopenharmony_ci
9497db96d56Sopenharmony_ciclass GenerateTokensTest(TokenizeTest):
9507db96d56Sopenharmony_ci    def check_tokenize(self, s, expected):
9517db96d56Sopenharmony_ci        # Format the tokens in s in a table format.
9527db96d56Sopenharmony_ci        # The ENDMARKER and final NEWLINE are omitted.
9537db96d56Sopenharmony_ci        f = StringIO(s)
9547db96d56Sopenharmony_ci        result = stringify_tokens_from_source(generate_tokens(f.readline), s)
9557db96d56Sopenharmony_ci        self.assertEqual(result, expected.rstrip().splitlines())
9567db96d56Sopenharmony_ci
9577db96d56Sopenharmony_ci
9587db96d56Sopenharmony_cidef decistmt(s):
9597db96d56Sopenharmony_ci    result = []
9607db96d56Sopenharmony_ci    g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
9617db96d56Sopenharmony_ci    for toknum, tokval, _, _, _  in g:
9627db96d56Sopenharmony_ci        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
9637db96d56Sopenharmony_ci            result.extend([
9647db96d56Sopenharmony_ci                (NAME, 'Decimal'),
9657db96d56Sopenharmony_ci                (OP, '('),
9667db96d56Sopenharmony_ci                (STRING, repr(tokval)),
9677db96d56Sopenharmony_ci                (OP, ')')
9687db96d56Sopenharmony_ci            ])
9697db96d56Sopenharmony_ci        else:
9707db96d56Sopenharmony_ci            result.append((toknum, tokval))
9717db96d56Sopenharmony_ci    return untokenize(result).decode('utf-8')
9727db96d56Sopenharmony_ci
9737db96d56Sopenharmony_ciclass TestMisc(TestCase):
9747db96d56Sopenharmony_ci
9757db96d56Sopenharmony_ci    def test_decistmt(self):
9767db96d56Sopenharmony_ci        # Substitute Decimals for floats in a string of statements.
9777db96d56Sopenharmony_ci        # This is an example from the docs.
9787db96d56Sopenharmony_ci
9797db96d56Sopenharmony_ci        from decimal import Decimal
9807db96d56Sopenharmony_ci        s = '+21.3e-5*-.1234/81.7'
9817db96d56Sopenharmony_ci        self.assertEqual(decistmt(s),
9827db96d56Sopenharmony_ci                         "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
9837db96d56Sopenharmony_ci
9847db96d56Sopenharmony_ci        # The format of the exponent is inherited from the platform C library.
9857db96d56Sopenharmony_ci        # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
9867db96d56Sopenharmony_ci        # we're only showing 11 digits, and the 12th isn't close to 5, the
9877db96d56Sopenharmony_ci        # rest of the output should be platform-independent.
9887db96d56Sopenharmony_ci        self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
9897db96d56Sopenharmony_ci
9907db96d56Sopenharmony_ci        # Output from calculations with Decimal should be identical across all
9917db96d56Sopenharmony_ci        # platforms.
9927db96d56Sopenharmony_ci        self.assertEqual(eval(decistmt(s)),
9937db96d56Sopenharmony_ci                         Decimal('-3.217160342717258261933904529E-7'))
9947db96d56Sopenharmony_ci
9957db96d56Sopenharmony_ci
9967db96d56Sopenharmony_ciclass TestTokenizerAdheresToPep0263(TestCase):
9977db96d56Sopenharmony_ci    """
9987db96d56Sopenharmony_ci    Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
9997db96d56Sopenharmony_ci    """
10007db96d56Sopenharmony_ci
10017db96d56Sopenharmony_ci    def _testFile(self, filename):
10027db96d56Sopenharmony_ci        path = os.path.join(os.path.dirname(__file__), filename)
10037db96d56Sopenharmony_ci        TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
10047db96d56Sopenharmony_ci
10057db96d56Sopenharmony_ci    def test_utf8_coding_cookie_and_no_utf8_bom(self):
10067db96d56Sopenharmony_ci        f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
10077db96d56Sopenharmony_ci        self._testFile(f)
10087db96d56Sopenharmony_ci
10097db96d56Sopenharmony_ci    def test_latin1_coding_cookie_and_utf8_bom(self):
10107db96d56Sopenharmony_ci        """
10117db96d56Sopenharmony_ci        As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
10127db96d56Sopenharmony_ci        allowed encoding for the comment is 'utf-8'.  The text file used in
10137db96d56Sopenharmony_ci        this test starts with a BOM signature, but specifies latin1 as the
10147db96d56Sopenharmony_ci        coding, so verify that a SyntaxError is raised, which matches the
10157db96d56Sopenharmony_ci        behaviour of the interpreter when it encounters a similar condition.
10167db96d56Sopenharmony_ci        """
10177db96d56Sopenharmony_ci        f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
10187db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, self._testFile, f)
10197db96d56Sopenharmony_ci
10207db96d56Sopenharmony_ci    def test_no_coding_cookie_and_utf8_bom(self):
10217db96d56Sopenharmony_ci        f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
10227db96d56Sopenharmony_ci        self._testFile(f)
10237db96d56Sopenharmony_ci
10247db96d56Sopenharmony_ci    def test_utf8_coding_cookie_and_utf8_bom(self):
10257db96d56Sopenharmony_ci        f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
10267db96d56Sopenharmony_ci        self._testFile(f)
10277db96d56Sopenharmony_ci
10287db96d56Sopenharmony_ci    def test_bad_coding_cookie(self):
10297db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
10307db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
10317db96d56Sopenharmony_ci
10327db96d56Sopenharmony_ci
10337db96d56Sopenharmony_ciclass Test_Tokenize(TestCase):
10347db96d56Sopenharmony_ci
10357db96d56Sopenharmony_ci    def test__tokenize_decodes_with_specified_encoding(self):
10367db96d56Sopenharmony_ci        literal = '"ЉЊЈЁЂ"'
10377db96d56Sopenharmony_ci        line = literal.encode('utf-8')
10387db96d56Sopenharmony_ci        first = False
10397db96d56Sopenharmony_ci        def readline():
10407db96d56Sopenharmony_ci            nonlocal first
10417db96d56Sopenharmony_ci            if not first:
10427db96d56Sopenharmony_ci                first = True
10437db96d56Sopenharmony_ci                return line
10447db96d56Sopenharmony_ci            else:
10457db96d56Sopenharmony_ci                return b''
10467db96d56Sopenharmony_ci
10477db96d56Sopenharmony_ci        # skip the initial encoding token and the end tokens
10487db96d56Sopenharmony_ci        tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
10497db96d56Sopenharmony_ci        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
10507db96d56Sopenharmony_ci        self.assertEqual(tokens, expected_tokens,
10517db96d56Sopenharmony_ci                         "bytes not decoded with encoding")
10527db96d56Sopenharmony_ci
10537db96d56Sopenharmony_ci    def test__tokenize_does_not_decode_with_encoding_none(self):
10547db96d56Sopenharmony_ci        literal = '"ЉЊЈЁЂ"'
10557db96d56Sopenharmony_ci        first = False
10567db96d56Sopenharmony_ci        def readline():
10577db96d56Sopenharmony_ci            nonlocal first
10587db96d56Sopenharmony_ci            if not first:
10597db96d56Sopenharmony_ci                first = True
10607db96d56Sopenharmony_ci                return literal
10617db96d56Sopenharmony_ci            else:
10627db96d56Sopenharmony_ci                return b''
10637db96d56Sopenharmony_ci
10647db96d56Sopenharmony_ci        # skip the end tokens
10657db96d56Sopenharmony_ci        tokens = list(_tokenize(readline, encoding=None))[:-2]
10667db96d56Sopenharmony_ci        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
10677db96d56Sopenharmony_ci        self.assertEqual(tokens, expected_tokens,
10687db96d56Sopenharmony_ci                         "string not tokenized when encoding is None")
10697db96d56Sopenharmony_ci
10707db96d56Sopenharmony_ci
10717db96d56Sopenharmony_ciclass TestDetectEncoding(TestCase):
10727db96d56Sopenharmony_ci
10737db96d56Sopenharmony_ci    def get_readline(self, lines):
10747db96d56Sopenharmony_ci        index = 0
10757db96d56Sopenharmony_ci        def readline():
10767db96d56Sopenharmony_ci            nonlocal index
10777db96d56Sopenharmony_ci            if index == len(lines):
10787db96d56Sopenharmony_ci                raise StopIteration
10797db96d56Sopenharmony_ci            line = lines[index]
10807db96d56Sopenharmony_ci            index += 1
10817db96d56Sopenharmony_ci            return line
10827db96d56Sopenharmony_ci        return readline
10837db96d56Sopenharmony_ci
10847db96d56Sopenharmony_ci    def test_no_bom_no_encoding_cookie(self):
10857db96d56Sopenharmony_ci        lines = (
10867db96d56Sopenharmony_ci            b'# something\n',
10877db96d56Sopenharmony_ci            b'print(something)\n',
10887db96d56Sopenharmony_ci            b'do_something(else)\n'
10897db96d56Sopenharmony_ci        )
10907db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
10917db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8')
10927db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, list(lines[:2]))
10937db96d56Sopenharmony_ci
10947db96d56Sopenharmony_ci    def test_bom_no_cookie(self):
10957db96d56Sopenharmony_ci        lines = (
10967db96d56Sopenharmony_ci            b'\xef\xbb\xbf# something\n',
10977db96d56Sopenharmony_ci            b'print(something)\n',
10987db96d56Sopenharmony_ci            b'do_something(else)\n'
10997db96d56Sopenharmony_ci        )
11007db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11017db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8-sig')
11027db96d56Sopenharmony_ci        self.assertEqual(consumed_lines,
11037db96d56Sopenharmony_ci                         [b'# something\n', b'print(something)\n'])
11047db96d56Sopenharmony_ci
11057db96d56Sopenharmony_ci    def test_cookie_first_line_no_bom(self):
11067db96d56Sopenharmony_ci        lines = (
11077db96d56Sopenharmony_ci            b'# -*- coding: latin-1 -*-\n',
11087db96d56Sopenharmony_ci            b'print(something)\n',
11097db96d56Sopenharmony_ci            b'do_something(else)\n'
11107db96d56Sopenharmony_ci        )
11117db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11127db96d56Sopenharmony_ci        self.assertEqual(encoding, 'iso-8859-1')
11137db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
11147db96d56Sopenharmony_ci
11157db96d56Sopenharmony_ci    def test_matched_bom_and_cookie_first_line(self):
11167db96d56Sopenharmony_ci        lines = (
11177db96d56Sopenharmony_ci            b'\xef\xbb\xbf# coding=utf-8\n',
11187db96d56Sopenharmony_ci            b'print(something)\n',
11197db96d56Sopenharmony_ci            b'do_something(else)\n'
11207db96d56Sopenharmony_ci        )
11217db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11227db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8-sig')
11237db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
11247db96d56Sopenharmony_ci
11257db96d56Sopenharmony_ci    def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
11267db96d56Sopenharmony_ci        lines = (
11277db96d56Sopenharmony_ci            b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
11287db96d56Sopenharmony_ci            b'print(something)\n',
11297db96d56Sopenharmony_ci            b'do_something(else)\n'
11307db96d56Sopenharmony_ci        )
11317db96d56Sopenharmony_ci        readline = self.get_readline(lines)
11327db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, detect_encoding, readline)
11337db96d56Sopenharmony_ci
11347db96d56Sopenharmony_ci    def test_cookie_second_line_no_bom(self):
11357db96d56Sopenharmony_ci        lines = (
11367db96d56Sopenharmony_ci            b'#! something\n',
11377db96d56Sopenharmony_ci            b'# vim: set fileencoding=ascii :\n',
11387db96d56Sopenharmony_ci            b'print(something)\n',
11397db96d56Sopenharmony_ci            b'do_something(else)\n'
11407db96d56Sopenharmony_ci        )
11417db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11427db96d56Sopenharmony_ci        self.assertEqual(encoding, 'ascii')
11437db96d56Sopenharmony_ci        expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
11447db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, expected)
11457db96d56Sopenharmony_ci
11467db96d56Sopenharmony_ci    def test_matched_bom_and_cookie_second_line(self):
11477db96d56Sopenharmony_ci        lines = (
11487db96d56Sopenharmony_ci            b'\xef\xbb\xbf#! something\n',
11497db96d56Sopenharmony_ci            b'f# coding=utf-8\n',
11507db96d56Sopenharmony_ci            b'print(something)\n',
11517db96d56Sopenharmony_ci            b'do_something(else)\n'
11527db96d56Sopenharmony_ci        )
11537db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11547db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8-sig')
11557db96d56Sopenharmony_ci        self.assertEqual(consumed_lines,
11567db96d56Sopenharmony_ci                         [b'#! something\n', b'f# coding=utf-8\n'])
11577db96d56Sopenharmony_ci
11587db96d56Sopenharmony_ci    def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
11597db96d56Sopenharmony_ci        lines = (
11607db96d56Sopenharmony_ci            b'\xef\xbb\xbf#! something\n',
11617db96d56Sopenharmony_ci            b'# vim: set fileencoding=ascii :\n',
11627db96d56Sopenharmony_ci            b'print(something)\n',
11637db96d56Sopenharmony_ci            b'do_something(else)\n'
11647db96d56Sopenharmony_ci        )
11657db96d56Sopenharmony_ci        readline = self.get_readline(lines)
11667db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, detect_encoding, readline)
11677db96d56Sopenharmony_ci
11687db96d56Sopenharmony_ci    def test_cookie_second_line_noncommented_first_line(self):
11697db96d56Sopenharmony_ci        lines = (
11707db96d56Sopenharmony_ci            b"print('\xc2\xa3')\n",
11717db96d56Sopenharmony_ci            b'# vim: set fileencoding=iso8859-15 :\n',
11727db96d56Sopenharmony_ci            b"print('\xe2\x82\xac')\n"
11737db96d56Sopenharmony_ci        )
11747db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11757db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8')
11767db96d56Sopenharmony_ci        expected = [b"print('\xc2\xa3')\n"]
11777db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, expected)
11787db96d56Sopenharmony_ci
11797db96d56Sopenharmony_ci    def test_cookie_second_line_commented_first_line(self):
11807db96d56Sopenharmony_ci        lines = (
11817db96d56Sopenharmony_ci            b"#print('\xc2\xa3')\n",
11827db96d56Sopenharmony_ci            b'# vim: set fileencoding=iso8859-15 :\n',
11837db96d56Sopenharmony_ci            b"print('\xe2\x82\xac')\n"
11847db96d56Sopenharmony_ci        )
11857db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11867db96d56Sopenharmony_ci        self.assertEqual(encoding, 'iso8859-15')
11877db96d56Sopenharmony_ci        expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
11887db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, expected)
11897db96d56Sopenharmony_ci
11907db96d56Sopenharmony_ci    def test_cookie_second_line_empty_first_line(self):
11917db96d56Sopenharmony_ci        lines = (
11927db96d56Sopenharmony_ci            b'\n',
11937db96d56Sopenharmony_ci            b'# vim: set fileencoding=iso8859-15 :\n',
11947db96d56Sopenharmony_ci            b"print('\xe2\x82\xac')\n"
11957db96d56Sopenharmony_ci        )
11967db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
11977db96d56Sopenharmony_ci        self.assertEqual(encoding, 'iso8859-15')
11987db96d56Sopenharmony_ci        expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
11997db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, expected)
12007db96d56Sopenharmony_ci
12017db96d56Sopenharmony_ci    def test_latin1_normalization(self):
12027db96d56Sopenharmony_ci        # See get_normal_name() in tokenizer.c.
12037db96d56Sopenharmony_ci        encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
12047db96d56Sopenharmony_ci                     "iso-8859-1-unix", "iso-latin-1-mac")
12057db96d56Sopenharmony_ci        for encoding in encodings:
12067db96d56Sopenharmony_ci            for rep in ("-", "_"):
12077db96d56Sopenharmony_ci                enc = encoding.replace("-", rep)
12087db96d56Sopenharmony_ci                lines = (b"#!/usr/bin/python\n",
12097db96d56Sopenharmony_ci                         b"# coding: " + enc.encode("ascii") + b"\n",
12107db96d56Sopenharmony_ci                         b"print(things)\n",
12117db96d56Sopenharmony_ci                         b"do_something += 4\n")
12127db96d56Sopenharmony_ci                rl = self.get_readline(lines)
12137db96d56Sopenharmony_ci                found, consumed_lines = detect_encoding(rl)
12147db96d56Sopenharmony_ci                self.assertEqual(found, "iso-8859-1")
12157db96d56Sopenharmony_ci
12167db96d56Sopenharmony_ci    def test_syntaxerror_latin1(self):
12177db96d56Sopenharmony_ci        # Issue 14629: need to raise SyntaxError if the first
12187db96d56Sopenharmony_ci        # line(s) have non-UTF-8 characters
12197db96d56Sopenharmony_ci        lines = (
12207db96d56Sopenharmony_ci            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
12217db96d56Sopenharmony_ci            )
12227db96d56Sopenharmony_ci        readline = self.get_readline(lines)
12237db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, detect_encoding, readline)
12247db96d56Sopenharmony_ci
12257db96d56Sopenharmony_ci
12267db96d56Sopenharmony_ci    def test_utf8_normalization(self):
12277db96d56Sopenharmony_ci        # See get_normal_name() in tokenizer.c.
12287db96d56Sopenharmony_ci        encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
12297db96d56Sopenharmony_ci        for encoding in encodings:
12307db96d56Sopenharmony_ci            for rep in ("-", "_"):
12317db96d56Sopenharmony_ci                enc = encoding.replace("-", rep)
12327db96d56Sopenharmony_ci                lines = (b"#!/usr/bin/python\n",
12337db96d56Sopenharmony_ci                         b"# coding: " + enc.encode("ascii") + b"\n",
12347db96d56Sopenharmony_ci                         b"1 + 3\n")
12357db96d56Sopenharmony_ci                rl = self.get_readline(lines)
12367db96d56Sopenharmony_ci                found, consumed_lines = detect_encoding(rl)
12377db96d56Sopenharmony_ci                self.assertEqual(found, "utf-8")
12387db96d56Sopenharmony_ci
12397db96d56Sopenharmony_ci    def test_short_files(self):
12407db96d56Sopenharmony_ci        readline = self.get_readline((b'print(something)\n',))
12417db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(readline)
12427db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8')
12437db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [b'print(something)\n'])
12447db96d56Sopenharmony_ci
12457db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(self.get_readline(()))
12467db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8')
12477db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [])
12487db96d56Sopenharmony_ci
12497db96d56Sopenharmony_ci        readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
12507db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(readline)
12517db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8-sig')
12527db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [b'print(something)\n'])
12537db96d56Sopenharmony_ci
12547db96d56Sopenharmony_ci        readline = self.get_readline((b'\xef\xbb\xbf',))
12557db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(readline)
12567db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8-sig')
12577db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [])
12587db96d56Sopenharmony_ci
12597db96d56Sopenharmony_ci        readline = self.get_readline((b'# coding: bad\n',))
12607db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, detect_encoding, readline)
12617db96d56Sopenharmony_ci
12627db96d56Sopenharmony_ci    def test_false_encoding(self):
12637db96d56Sopenharmony_ci        # Issue 18873: "Encoding" detected in non-comment lines
12647db96d56Sopenharmony_ci        readline = self.get_readline((b'print("#coding=fake")',))
12657db96d56Sopenharmony_ci        encoding, consumed_lines = detect_encoding(readline)
12667db96d56Sopenharmony_ci        self.assertEqual(encoding, 'utf-8')
12677db96d56Sopenharmony_ci        self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
12687db96d56Sopenharmony_ci
12697db96d56Sopenharmony_ci    def test_open(self):
12707db96d56Sopenharmony_ci        filename = os_helper.TESTFN + '.py'
12717db96d56Sopenharmony_ci        self.addCleanup(os_helper.unlink, filename)
12727db96d56Sopenharmony_ci
12737db96d56Sopenharmony_ci        # test coding cookie
12747db96d56Sopenharmony_ci        for encoding in ('iso-8859-15', 'utf-8'):
12757db96d56Sopenharmony_ci            with open(filename, 'w', encoding=encoding) as fp:
12767db96d56Sopenharmony_ci                print("# coding: %s" % encoding, file=fp)
12777db96d56Sopenharmony_ci                print("print('euro:\u20ac')", file=fp)
12787db96d56Sopenharmony_ci            with tokenize_open(filename) as fp:
12797db96d56Sopenharmony_ci                self.assertEqual(fp.encoding, encoding)
12807db96d56Sopenharmony_ci                self.assertEqual(fp.mode, 'r')
12817db96d56Sopenharmony_ci
12827db96d56Sopenharmony_ci        # test BOM (no coding cookie)
12837db96d56Sopenharmony_ci        with open(filename, 'w', encoding='utf-8-sig') as fp:
12847db96d56Sopenharmony_ci            print("print('euro:\u20ac')", file=fp)
12857db96d56Sopenharmony_ci        with tokenize_open(filename) as fp:
12867db96d56Sopenharmony_ci            self.assertEqual(fp.encoding, 'utf-8-sig')
12877db96d56Sopenharmony_ci            self.assertEqual(fp.mode, 'r')
12887db96d56Sopenharmony_ci
12897db96d56Sopenharmony_ci    def test_filename_in_exception(self):
12907db96d56Sopenharmony_ci        # When possible, include the file name in the exception.
12917db96d56Sopenharmony_ci        path = 'some_file_path'
12927db96d56Sopenharmony_ci        lines = (
12937db96d56Sopenharmony_ci            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
12947db96d56Sopenharmony_ci            )
12957db96d56Sopenharmony_ci        class Bunk:
12967db96d56Sopenharmony_ci            def __init__(self, lines, path):
12977db96d56Sopenharmony_ci                self.name = path
12987db96d56Sopenharmony_ci                self._lines = lines
12997db96d56Sopenharmony_ci                self._index = 0
13007db96d56Sopenharmony_ci
13017db96d56Sopenharmony_ci            def readline(self):
13027db96d56Sopenharmony_ci                if self._index == len(lines):
13037db96d56Sopenharmony_ci                    raise StopIteration
13047db96d56Sopenharmony_ci                line = lines[self._index]
13057db96d56Sopenharmony_ci                self._index += 1
13067db96d56Sopenharmony_ci                return line
13077db96d56Sopenharmony_ci
13087db96d56Sopenharmony_ci        with self.assertRaises(SyntaxError):
13097db96d56Sopenharmony_ci            ins = Bunk(lines, path)
13107db96d56Sopenharmony_ci            # Make sure lacking a name isn't an issue.
13117db96d56Sopenharmony_ci            del ins.name
13127db96d56Sopenharmony_ci            detect_encoding(ins.readline)
13137db96d56Sopenharmony_ci        with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
13147db96d56Sopenharmony_ci            ins = Bunk(lines, path)
13157db96d56Sopenharmony_ci            detect_encoding(ins.readline)
13167db96d56Sopenharmony_ci
13177db96d56Sopenharmony_ci    def test_open_error(self):
13187db96d56Sopenharmony_ci        # Issue #23840: open() must close the binary file on error
13197db96d56Sopenharmony_ci        m = BytesIO(b'#coding:xxx')
13207db96d56Sopenharmony_ci        with mock.patch('tokenize._builtin_open', return_value=m):
13217db96d56Sopenharmony_ci            self.assertRaises(SyntaxError, tokenize_open, 'foobar')
13227db96d56Sopenharmony_ci        self.assertTrue(m.closed)
13237db96d56Sopenharmony_ci
13247db96d56Sopenharmony_ci
13257db96d56Sopenharmony_ciclass TestTokenize(TestCase):
13267db96d56Sopenharmony_ci
13277db96d56Sopenharmony_ci    def test_tokenize(self):
13287db96d56Sopenharmony_ci        import tokenize as tokenize_module
13297db96d56Sopenharmony_ci        encoding = object()
13307db96d56Sopenharmony_ci        encoding_used = None
13317db96d56Sopenharmony_ci        def mock_detect_encoding(readline):
13327db96d56Sopenharmony_ci            return encoding, [b'first', b'second']
13337db96d56Sopenharmony_ci
13347db96d56Sopenharmony_ci        def mock__tokenize(readline, encoding):
13357db96d56Sopenharmony_ci            nonlocal encoding_used
13367db96d56Sopenharmony_ci            encoding_used = encoding
13377db96d56Sopenharmony_ci            out = []
13387db96d56Sopenharmony_ci            while True:
13397db96d56Sopenharmony_ci                next_line = readline()
13407db96d56Sopenharmony_ci                if next_line:
13417db96d56Sopenharmony_ci                    out.append(next_line)
13427db96d56Sopenharmony_ci                    continue
13437db96d56Sopenharmony_ci                return out
13447db96d56Sopenharmony_ci
13457db96d56Sopenharmony_ci        counter = 0
13467db96d56Sopenharmony_ci        def mock_readline():
13477db96d56Sopenharmony_ci            nonlocal counter
13487db96d56Sopenharmony_ci            counter += 1
13497db96d56Sopenharmony_ci            if counter == 5:
13507db96d56Sopenharmony_ci                return b''
13517db96d56Sopenharmony_ci            return str(counter).encode()
13527db96d56Sopenharmony_ci
13537db96d56Sopenharmony_ci        orig_detect_encoding = tokenize_module.detect_encoding
13547db96d56Sopenharmony_ci        orig__tokenize = tokenize_module._tokenize
13557db96d56Sopenharmony_ci        tokenize_module.detect_encoding = mock_detect_encoding
13567db96d56Sopenharmony_ci        tokenize_module._tokenize = mock__tokenize
13577db96d56Sopenharmony_ci        try:
13587db96d56Sopenharmony_ci            results = tokenize(mock_readline)
13597db96d56Sopenharmony_ci            self.assertEqual(list(results),
13607db96d56Sopenharmony_ci                             [b'first', b'second', b'1', b'2', b'3', b'4'])
13617db96d56Sopenharmony_ci        finally:
13627db96d56Sopenharmony_ci            tokenize_module.detect_encoding = orig_detect_encoding
13637db96d56Sopenharmony_ci            tokenize_module._tokenize = orig__tokenize
13647db96d56Sopenharmony_ci
13657db96d56Sopenharmony_ci        self.assertEqual(encoding_used, encoding)
13667db96d56Sopenharmony_ci
13677db96d56Sopenharmony_ci    def test_oneline_defs(self):
13687db96d56Sopenharmony_ci        buf = []
13697db96d56Sopenharmony_ci        for i in range(500):
13707db96d56Sopenharmony_ci            buf.append('def i{i}(): return {i}'.format(i=i))
13717db96d56Sopenharmony_ci        buf.append('OK')
13727db96d56Sopenharmony_ci        buf = '\n'.join(buf)
13737db96d56Sopenharmony_ci
13747db96d56Sopenharmony_ci        # Test that 500 consequent, one-line defs is OK
13757db96d56Sopenharmony_ci        toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
13767db96d56Sopenharmony_ci        self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
13777db96d56Sopenharmony_ci                                                # [-2] is always NEWLINE
13787db96d56Sopenharmony_ci
13797db96d56Sopenharmony_ci    def assertExactTypeEqual(self, opstr, *optypes):
13807db96d56Sopenharmony_ci        tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
13817db96d56Sopenharmony_ci        num_optypes = len(optypes)
13827db96d56Sopenharmony_ci        self.assertEqual(len(tokens), 3 + num_optypes)
13837db96d56Sopenharmony_ci        self.assertEqual(tok_name[tokens[0].exact_type],
13847db96d56Sopenharmony_ci                         tok_name[ENCODING])
13857db96d56Sopenharmony_ci        for i in range(num_optypes):
13867db96d56Sopenharmony_ci            self.assertEqual(tok_name[tokens[i + 1].exact_type],
13877db96d56Sopenharmony_ci                             tok_name[optypes[i]])
13887db96d56Sopenharmony_ci        self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
13897db96d56Sopenharmony_ci                         tok_name[token.NEWLINE])
13907db96d56Sopenharmony_ci        self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
13917db96d56Sopenharmony_ci                         tok_name[token.ENDMARKER])
13927db96d56Sopenharmony_ci
13937db96d56Sopenharmony_ci    def test_exact_type(self):
13947db96d56Sopenharmony_ci        self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
13957db96d56Sopenharmony_ci        self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
13967db96d56Sopenharmony_ci        self.assertExactTypeEqual(':', token.COLON)
13977db96d56Sopenharmony_ci        self.assertExactTypeEqual(',', token.COMMA)
13987db96d56Sopenharmony_ci        self.assertExactTypeEqual(';', token.SEMI)
13997db96d56Sopenharmony_ci        self.assertExactTypeEqual('+', token.PLUS)
14007db96d56Sopenharmony_ci        self.assertExactTypeEqual('-', token.MINUS)
14017db96d56Sopenharmony_ci        self.assertExactTypeEqual('*', token.STAR)
14027db96d56Sopenharmony_ci        self.assertExactTypeEqual('/', token.SLASH)
14037db96d56Sopenharmony_ci        self.assertExactTypeEqual('|', token.VBAR)
14047db96d56Sopenharmony_ci        self.assertExactTypeEqual('&', token.AMPER)
14057db96d56Sopenharmony_ci        self.assertExactTypeEqual('<', token.LESS)
14067db96d56Sopenharmony_ci        self.assertExactTypeEqual('>', token.GREATER)
14077db96d56Sopenharmony_ci        self.assertExactTypeEqual('=', token.EQUAL)
14087db96d56Sopenharmony_ci        self.assertExactTypeEqual('.', token.DOT)
14097db96d56Sopenharmony_ci        self.assertExactTypeEqual('%', token.PERCENT)
14107db96d56Sopenharmony_ci        self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
14117db96d56Sopenharmony_ci        self.assertExactTypeEqual('==', token.EQEQUAL)
14127db96d56Sopenharmony_ci        self.assertExactTypeEqual('!=', token.NOTEQUAL)
14137db96d56Sopenharmony_ci        self.assertExactTypeEqual('<=', token.LESSEQUAL)
14147db96d56Sopenharmony_ci        self.assertExactTypeEqual('>=', token.GREATEREQUAL)
14157db96d56Sopenharmony_ci        self.assertExactTypeEqual('~', token.TILDE)
14167db96d56Sopenharmony_ci        self.assertExactTypeEqual('^', token.CIRCUMFLEX)
14177db96d56Sopenharmony_ci        self.assertExactTypeEqual('<<', token.LEFTSHIFT)
14187db96d56Sopenharmony_ci        self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
14197db96d56Sopenharmony_ci        self.assertExactTypeEqual('**', token.DOUBLESTAR)
14207db96d56Sopenharmony_ci        self.assertExactTypeEqual('+=', token.PLUSEQUAL)
14217db96d56Sopenharmony_ci        self.assertExactTypeEqual('-=', token.MINEQUAL)
14227db96d56Sopenharmony_ci        self.assertExactTypeEqual('*=', token.STAREQUAL)
14237db96d56Sopenharmony_ci        self.assertExactTypeEqual('/=', token.SLASHEQUAL)
14247db96d56Sopenharmony_ci        self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
14257db96d56Sopenharmony_ci        self.assertExactTypeEqual('&=', token.AMPEREQUAL)
14267db96d56Sopenharmony_ci        self.assertExactTypeEqual('|=', token.VBAREQUAL)
14277db96d56Sopenharmony_ci        self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
14287db96d56Sopenharmony_ci        self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
14297db96d56Sopenharmony_ci        self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
14307db96d56Sopenharmony_ci        self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
14317db96d56Sopenharmony_ci        self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
14327db96d56Sopenharmony_ci        self.assertExactTypeEqual('//', token.DOUBLESLASH)
14337db96d56Sopenharmony_ci        self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
14347db96d56Sopenharmony_ci        self.assertExactTypeEqual(':=', token.COLONEQUAL)
14357db96d56Sopenharmony_ci        self.assertExactTypeEqual('...', token.ELLIPSIS)
14367db96d56Sopenharmony_ci        self.assertExactTypeEqual('->', token.RARROW)
14377db96d56Sopenharmony_ci        self.assertExactTypeEqual('@', token.AT)
14387db96d56Sopenharmony_ci        self.assertExactTypeEqual('@=', token.ATEQUAL)
14397db96d56Sopenharmony_ci
14407db96d56Sopenharmony_ci        self.assertExactTypeEqual('a**2+b**2==c**2',
14417db96d56Sopenharmony_ci                                  NAME, token.DOUBLESTAR, NUMBER,
14427db96d56Sopenharmony_ci                                  token.PLUS,
14437db96d56Sopenharmony_ci                                  NAME, token.DOUBLESTAR, NUMBER,
14447db96d56Sopenharmony_ci                                  token.EQEQUAL,
14457db96d56Sopenharmony_ci                                  NAME, token.DOUBLESTAR, NUMBER)
14467db96d56Sopenharmony_ci        self.assertExactTypeEqual('{1, 2, 3}',
14477db96d56Sopenharmony_ci                                  token.LBRACE,
14487db96d56Sopenharmony_ci                                  token.NUMBER, token.COMMA,
14497db96d56Sopenharmony_ci                                  token.NUMBER, token.COMMA,
14507db96d56Sopenharmony_ci                                  token.NUMBER,
14517db96d56Sopenharmony_ci                                  token.RBRACE)
14527db96d56Sopenharmony_ci        self.assertExactTypeEqual('^(x & 0x1)',
14537db96d56Sopenharmony_ci                                  token.CIRCUMFLEX,
14547db96d56Sopenharmony_ci                                  token.LPAR,
14557db96d56Sopenharmony_ci                                  token.NAME, token.AMPER, token.NUMBER,
14567db96d56Sopenharmony_ci                                  token.RPAR)
14577db96d56Sopenharmony_ci
14587db96d56Sopenharmony_ci    def test_pathological_trailing_whitespace(self):
14597db96d56Sopenharmony_ci        # See http://bugs.python.org/issue16152
14607db96d56Sopenharmony_ci        self.assertExactTypeEqual('@          ', token.AT)
14617db96d56Sopenharmony_ci
14627db96d56Sopenharmony_ci    def test_comment_at_the_end_of_the_source_without_newline(self):
14637db96d56Sopenharmony_ci        # See http://bugs.python.org/issue44667
14647db96d56Sopenharmony_ci        source = 'b = 1\n\n#test'
14657db96d56Sopenharmony_ci        expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
14667db96d56Sopenharmony_ci
14677db96d56Sopenharmony_ci        tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
14687db96d56Sopenharmony_ci        self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
14697db96d56Sopenharmony_ci        for i in range(6):
14707db96d56Sopenharmony_ci            self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
14717db96d56Sopenharmony_ci        self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
14727db96d56Sopenharmony_ci
14737db96d56Sopenharmony_ciclass UntokenizeTest(TestCase):
14747db96d56Sopenharmony_ci
14757db96d56Sopenharmony_ci    def test_bad_input_order(self):
14767db96d56Sopenharmony_ci        # raise if previous row
14777db96d56Sopenharmony_ci        u = Untokenizer()
14787db96d56Sopenharmony_ci        u.prev_row = 2
14797db96d56Sopenharmony_ci        u.prev_col = 2
14807db96d56Sopenharmony_ci        with self.assertRaises(ValueError) as cm:
14817db96d56Sopenharmony_ci            u.add_whitespace((1,3))
14827db96d56Sopenharmony_ci        self.assertEqual(cm.exception.args[0],
14837db96d56Sopenharmony_ci                'start (1,3) precedes previous end (2,2)')
14847db96d56Sopenharmony_ci        # raise if previous column in row
14857db96d56Sopenharmony_ci        self.assertRaises(ValueError, u.add_whitespace, (2,1))
14867db96d56Sopenharmony_ci
14877db96d56Sopenharmony_ci    def test_backslash_continuation(self):
14887db96d56Sopenharmony_ci        # The problem is that <whitespace>\<newline> leaves no token
14897db96d56Sopenharmony_ci        u = Untokenizer()
14907db96d56Sopenharmony_ci        u.prev_row = 1
14917db96d56Sopenharmony_ci        u.prev_col =  1
14927db96d56Sopenharmony_ci        u.tokens = []
14937db96d56Sopenharmony_ci        u.add_whitespace((2, 0))
14947db96d56Sopenharmony_ci        self.assertEqual(u.tokens, ['\\\n'])
14957db96d56Sopenharmony_ci        u.prev_row = 2
14967db96d56Sopenharmony_ci        u.add_whitespace((4, 4))
14977db96d56Sopenharmony_ci        self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
14987db96d56Sopenharmony_ci        TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
14997db96d56Sopenharmony_ci
15007db96d56Sopenharmony_ci    def test_iter_compat(self):
15017db96d56Sopenharmony_ci        u = Untokenizer()
15027db96d56Sopenharmony_ci        token = (NAME, 'Hello')
15037db96d56Sopenharmony_ci        tokens = [(ENCODING, 'utf-8'), token]
15047db96d56Sopenharmony_ci        u.compat(token, iter([]))
15057db96d56Sopenharmony_ci        self.assertEqual(u.tokens, ["Hello "])
15067db96d56Sopenharmony_ci        u = Untokenizer()
15077db96d56Sopenharmony_ci        self.assertEqual(u.untokenize(iter([token])), 'Hello ')
15087db96d56Sopenharmony_ci        u = Untokenizer()
15097db96d56Sopenharmony_ci        self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
15107db96d56Sopenharmony_ci        self.assertEqual(u.encoding, 'utf-8')
15117db96d56Sopenharmony_ci        self.assertEqual(untokenize(iter(tokens)), b'Hello ')
15127db96d56Sopenharmony_ci
15137db96d56Sopenharmony_ci
15147db96d56Sopenharmony_ciclass TestRoundtrip(TestCase):
15157db96d56Sopenharmony_ci
15167db96d56Sopenharmony_ci    def check_roundtrip(self, f):
15177db96d56Sopenharmony_ci        """
15187db96d56Sopenharmony_ci        Test roundtrip for `untokenize`. `f` is an open file or a string.
15197db96d56Sopenharmony_ci        The source code in f is tokenized to both 5- and 2-tuples.
15207db96d56Sopenharmony_ci        Both sequences are converted back to source code via
15217db96d56Sopenharmony_ci        tokenize.untokenize(), and the latter tokenized again to 2-tuples.
15227db96d56Sopenharmony_ci        The test fails if the 3 pair tokenizations do not match.
15237db96d56Sopenharmony_ci
15247db96d56Sopenharmony_ci        When untokenize bugs are fixed, untokenize with 5-tuples should
15257db96d56Sopenharmony_ci        reproduce code that does not contain a backslash continuation
15267db96d56Sopenharmony_ci        following spaces.  A proper test should test this.
15277db96d56Sopenharmony_ci        """
15287db96d56Sopenharmony_ci        # Get source code and original tokenizations
15297db96d56Sopenharmony_ci        if isinstance(f, str):
15307db96d56Sopenharmony_ci            code = f.encode('utf-8')
15317db96d56Sopenharmony_ci        else:
15327db96d56Sopenharmony_ci            code = f.read()
15337db96d56Sopenharmony_ci            f.close()
15347db96d56Sopenharmony_ci        readline = iter(code.splitlines(keepends=True)).__next__
15357db96d56Sopenharmony_ci        tokens5 = list(tokenize(readline))
15367db96d56Sopenharmony_ci        tokens2 = [tok[:2] for tok in tokens5]
15377db96d56Sopenharmony_ci        # Reproduce tokens2 from pairs
15387db96d56Sopenharmony_ci        bytes_from2 = untokenize(tokens2)
15397db96d56Sopenharmony_ci        readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
15407db96d56Sopenharmony_ci        tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
15417db96d56Sopenharmony_ci        self.assertEqual(tokens2_from2, tokens2)
15427db96d56Sopenharmony_ci        # Reproduce tokens2 from 5-tuples
15437db96d56Sopenharmony_ci        bytes_from5 = untokenize(tokens5)
15447db96d56Sopenharmony_ci        readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
15457db96d56Sopenharmony_ci        tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
15467db96d56Sopenharmony_ci        self.assertEqual(tokens2_from5, tokens2)
15477db96d56Sopenharmony_ci
15487db96d56Sopenharmony_ci    def test_roundtrip(self):
15497db96d56Sopenharmony_ci        # There are some standard formatting practices that are easy to get right.
15507db96d56Sopenharmony_ci
15517db96d56Sopenharmony_ci        self.check_roundtrip("if x == 1:\n"
15527db96d56Sopenharmony_ci                             "    print(x)\n")
15537db96d56Sopenharmony_ci        self.check_roundtrip("# This is a comment\n"
15547db96d56Sopenharmony_ci                             "# This also\n")
15557db96d56Sopenharmony_ci
15567db96d56Sopenharmony_ci        # Some people use different formatting conventions, which makes
15577db96d56Sopenharmony_ci        # untokenize a little trickier. Note that this test involves trailing
15587db96d56Sopenharmony_ci        # whitespace after the colon. Note that we use hex escapes to make the
15597db96d56Sopenharmony_ci        # two trailing blanks apparent in the expected output.
15607db96d56Sopenharmony_ci
15617db96d56Sopenharmony_ci        self.check_roundtrip("if x == 1 : \n"
15627db96d56Sopenharmony_ci                             "  print(x)\n")
15637db96d56Sopenharmony_ci        fn = support.findfile("tokenize_tests.txt")
15647db96d56Sopenharmony_ci        with open(fn, 'rb') as f:
15657db96d56Sopenharmony_ci            self.check_roundtrip(f)
15667db96d56Sopenharmony_ci        self.check_roundtrip("if x == 1:\n"
15677db96d56Sopenharmony_ci                             "    # A comment by itself.\n"
15687db96d56Sopenharmony_ci                             "    print(x) # Comment here, too.\n"
15697db96d56Sopenharmony_ci                             "    # Another comment.\n"
15707db96d56Sopenharmony_ci                             "after_if = True\n")
15717db96d56Sopenharmony_ci        self.check_roundtrip("if (x # The comments need to go in the right place\n"
15727db96d56Sopenharmony_ci                             "    == 1):\n"
15737db96d56Sopenharmony_ci                             "    print('x==1')\n")
15747db96d56Sopenharmony_ci        self.check_roundtrip("class Test: # A comment here\n"
15757db96d56Sopenharmony_ci                             "  # A comment with weird indent\n"
15767db96d56Sopenharmony_ci                             "  after_com = 5\n"
15777db96d56Sopenharmony_ci                             "  def x(m): return m*5 # a one liner\n"
15787db96d56Sopenharmony_ci                             "  def y(m): # A whitespace after the colon\n"
15797db96d56Sopenharmony_ci                             "     return y*4 # 3-space indent\n")
15807db96d56Sopenharmony_ci
15817db96d56Sopenharmony_ci        # Some error-handling code
15827db96d56Sopenharmony_ci        self.check_roundtrip("try: import somemodule\n"
15837db96d56Sopenharmony_ci                             "except ImportError: # comment\n"
15847db96d56Sopenharmony_ci                             "    print('Can not import' # comment2\n)"
15857db96d56Sopenharmony_ci                             "else:   print('Loaded')\n")
15867db96d56Sopenharmony_ci
15877db96d56Sopenharmony_ci    def test_continuation(self):
15887db96d56Sopenharmony_ci        # Balancing continuation
15897db96d56Sopenharmony_ci        self.check_roundtrip("a = (3,4, \n"
15907db96d56Sopenharmony_ci                             "5,6)\n"
15917db96d56Sopenharmony_ci                             "y = [3, 4,\n"
15927db96d56Sopenharmony_ci                             "5]\n"
15937db96d56Sopenharmony_ci                             "z = {'a': 5,\n"
15947db96d56Sopenharmony_ci                             "'b':15, 'c':True}\n"
15957db96d56Sopenharmony_ci                             "x = len(y) + 5 - a[\n"
15967db96d56Sopenharmony_ci                             "3] - a[2]\n"
15977db96d56Sopenharmony_ci                             "+ len(z) - z[\n"
15987db96d56Sopenharmony_ci                             "'b']\n")
15997db96d56Sopenharmony_ci
16007db96d56Sopenharmony_ci    def test_backslash_continuation(self):
16017db96d56Sopenharmony_ci        # Backslash means line continuation, except for comments
16027db96d56Sopenharmony_ci        self.check_roundtrip("x=1+\\\n"
16037db96d56Sopenharmony_ci                             "1\n"
16047db96d56Sopenharmony_ci                             "# This is a comment\\\n"
16057db96d56Sopenharmony_ci                             "# This also\n")
16067db96d56Sopenharmony_ci        self.check_roundtrip("# Comment \\\n"
16077db96d56Sopenharmony_ci                             "x = 0")
16087db96d56Sopenharmony_ci
16097db96d56Sopenharmony_ci    def test_string_concatenation(self):
16107db96d56Sopenharmony_ci        # Two string literals on the same line
16117db96d56Sopenharmony_ci        self.check_roundtrip("'' ''")
16127db96d56Sopenharmony_ci
16137db96d56Sopenharmony_ci    def test_random_files(self):
16147db96d56Sopenharmony_ci        # Test roundtrip on random python modules.
16157db96d56Sopenharmony_ci        # pass the '-ucpu' option to process the full directory.
16167db96d56Sopenharmony_ci
16177db96d56Sopenharmony_ci        import glob, random
16187db96d56Sopenharmony_ci        fn = support.findfile("tokenize_tests.txt")
16197db96d56Sopenharmony_ci        tempdir = os.path.dirname(fn) or os.curdir
16207db96d56Sopenharmony_ci        testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
16217db96d56Sopenharmony_ci
16227db96d56Sopenharmony_ci        # Tokenize is broken on test_pep3131.py because regular expressions are
16237db96d56Sopenharmony_ci        # broken on the obscure unicode identifiers in it. *sigh*
16247db96d56Sopenharmony_ci        # With roundtrip extended to test the 5-tuple mode of untokenize,
16257db96d56Sopenharmony_ci        # 7 more testfiles fail.  Remove them also until the failure is diagnosed.
16267db96d56Sopenharmony_ci
16277db96d56Sopenharmony_ci        testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
16287db96d56Sopenharmony_ci        for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
16297db96d56Sopenharmony_ci            testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
16307db96d56Sopenharmony_ci
16317db96d56Sopenharmony_ci        if not support.is_resource_enabled("cpu"):
16327db96d56Sopenharmony_ci            testfiles = random.sample(testfiles, 10)
16337db96d56Sopenharmony_ci
16347db96d56Sopenharmony_ci        for testfile in testfiles:
16357db96d56Sopenharmony_ci            if support.verbose >= 2:
16367db96d56Sopenharmony_ci                print('tokenize', testfile)
16377db96d56Sopenharmony_ci            with open(testfile, 'rb') as f:
16387db96d56Sopenharmony_ci                with self.subTest(file=testfile):
16397db96d56Sopenharmony_ci                    self.check_roundtrip(f)
16407db96d56Sopenharmony_ci
16417db96d56Sopenharmony_ci
16427db96d56Sopenharmony_ci    def roundtrip(self, code):
16437db96d56Sopenharmony_ci        if isinstance(code, str):
16447db96d56Sopenharmony_ci            code = code.encode('utf-8')
16457db96d56Sopenharmony_ci        return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
16467db96d56Sopenharmony_ci
16477db96d56Sopenharmony_ci    def test_indentation_semantics_retained(self):
16487db96d56Sopenharmony_ci        """
16497db96d56Sopenharmony_ci        Ensure that although whitespace might be mutated in a roundtrip,
16507db96d56Sopenharmony_ci        the semantic meaning of the indentation remains consistent.
16517db96d56Sopenharmony_ci        """
16527db96d56Sopenharmony_ci        code = "if False:\n\tx=3\n\tx=3\n"
16537db96d56Sopenharmony_ci        codelines = self.roundtrip(code).split('\n')
16547db96d56Sopenharmony_ci        self.assertEqual(codelines[1], codelines[2])
16557db96d56Sopenharmony_ci        self.check_roundtrip(code)
16567db96d56Sopenharmony_ci
16577db96d56Sopenharmony_ci
16587db96d56Sopenharmony_ciclass CTokenizeTest(TestCase):
16597db96d56Sopenharmony_ci    def check_tokenize(self, s, expected):
16607db96d56Sopenharmony_ci        # Format the tokens in s in a table format.
16617db96d56Sopenharmony_ci        # The ENDMARKER and final NEWLINE are omitted.
16627db96d56Sopenharmony_ci        with self.subTest(source=s):
16637db96d56Sopenharmony_ci            result = stringify_tokens_from_source(
16647db96d56Sopenharmony_ci                _generate_tokens_from_c_tokenizer(s), s
16657db96d56Sopenharmony_ci            )
16667db96d56Sopenharmony_ci            self.assertEqual(result, expected.rstrip().splitlines())
16677db96d56Sopenharmony_ci
16687db96d56Sopenharmony_ci    def test_int(self):
16697db96d56Sopenharmony_ci
16707db96d56Sopenharmony_ci        self.check_tokenize('0xff <= 255', """\
16717db96d56Sopenharmony_ci    NUMBER     '0xff'        (1, 0) (1, 4)
16727db96d56Sopenharmony_ci    LESSEQUAL  '<='          (1, 5) (1, 7)
16737db96d56Sopenharmony_ci    NUMBER     '255'         (1, 8) (1, 11)
16747db96d56Sopenharmony_ci    """)
16757db96d56Sopenharmony_ci
16767db96d56Sopenharmony_ci        self.check_tokenize('0b10 <= 255', """\
16777db96d56Sopenharmony_ci    NUMBER     '0b10'        (1, 0) (1, 4)
16787db96d56Sopenharmony_ci    LESSEQUAL  '<='          (1, 5) (1, 7)
16797db96d56Sopenharmony_ci    NUMBER     '255'         (1, 8) (1, 11)
16807db96d56Sopenharmony_ci    """)
16817db96d56Sopenharmony_ci
16827db96d56Sopenharmony_ci        self.check_tokenize('0o123 <= 0O123', """\
16837db96d56Sopenharmony_ci    NUMBER     '0o123'       (1, 0) (1, 5)
16847db96d56Sopenharmony_ci    LESSEQUAL  '<='          (1, 6) (1, 8)
16857db96d56Sopenharmony_ci    NUMBER     '0O123'       (1, 9) (1, 14)
16867db96d56Sopenharmony_ci    """)
16877db96d56Sopenharmony_ci
16887db96d56Sopenharmony_ci        self.check_tokenize('1234567 > ~0x15', """\
16897db96d56Sopenharmony_ci    NUMBER     '1234567'     (1, 0) (1, 7)
16907db96d56Sopenharmony_ci    GREATER    '>'           (1, 8) (1, 9)
16917db96d56Sopenharmony_ci    TILDE      '~'           (1, 10) (1, 11)
16927db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 11) (1, 15)
16937db96d56Sopenharmony_ci    """)
16947db96d56Sopenharmony_ci
16957db96d56Sopenharmony_ci        self.check_tokenize('2134568 != 1231515', """\
16967db96d56Sopenharmony_ci    NUMBER     '2134568'     (1, 0) (1, 7)
16977db96d56Sopenharmony_ci    NOTEQUAL   '!='          (1, 8) (1, 10)
16987db96d56Sopenharmony_ci    NUMBER     '1231515'     (1, 11) (1, 18)
16997db96d56Sopenharmony_ci    """)
17007db96d56Sopenharmony_ci
17017db96d56Sopenharmony_ci        self.check_tokenize('(-124561-1) & 200000000', """\
17027db96d56Sopenharmony_ci    LPAR       '('           (1, 0) (1, 1)
17037db96d56Sopenharmony_ci    MINUS      '-'           (1, 1) (1, 2)
17047db96d56Sopenharmony_ci    NUMBER     '124561'      (1, 2) (1, 8)
17057db96d56Sopenharmony_ci    MINUS      '-'           (1, 8) (1, 9)
17067db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
17077db96d56Sopenharmony_ci    RPAR       ')'           (1, 10) (1, 11)
17087db96d56Sopenharmony_ci    AMPER      '&'           (1, 12) (1, 13)
17097db96d56Sopenharmony_ci    NUMBER     '200000000'   (1, 14) (1, 23)
17107db96d56Sopenharmony_ci    """)
17117db96d56Sopenharmony_ci
17127db96d56Sopenharmony_ci        self.check_tokenize('0xdeadbeef != -1', """\
17137db96d56Sopenharmony_ci    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
17147db96d56Sopenharmony_ci    NOTEQUAL   '!='          (1, 11) (1, 13)
17157db96d56Sopenharmony_ci    MINUS      '-'           (1, 14) (1, 15)
17167db96d56Sopenharmony_ci    NUMBER     '1'           (1, 15) (1, 16)
17177db96d56Sopenharmony_ci    """)
17187db96d56Sopenharmony_ci
17197db96d56Sopenharmony_ci        self.check_tokenize('0xdeadc0de & 12345', """\
17207db96d56Sopenharmony_ci    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
17217db96d56Sopenharmony_ci    AMPER      '&'           (1, 11) (1, 12)
17227db96d56Sopenharmony_ci    NUMBER     '12345'       (1, 13) (1, 18)
17237db96d56Sopenharmony_ci    """)
17247db96d56Sopenharmony_ci
17257db96d56Sopenharmony_ci        self.check_tokenize('0xFF & 0x15 | 1234', """\
17267db96d56Sopenharmony_ci    NUMBER     '0xFF'        (1, 0) (1, 4)
17277db96d56Sopenharmony_ci    AMPER      '&'           (1, 5) (1, 6)
17287db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 7) (1, 11)
17297db96d56Sopenharmony_ci    VBAR       '|'           (1, 12) (1, 13)
17307db96d56Sopenharmony_ci    NUMBER     '1234'        (1, 14) (1, 18)
17317db96d56Sopenharmony_ci    """)
17327db96d56Sopenharmony_ci
17337db96d56Sopenharmony_ci    def test_float(self):
17347db96d56Sopenharmony_ci
17357db96d56Sopenharmony_ci        self.check_tokenize('x = 3.14159', """\
17367db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17377db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17387db96d56Sopenharmony_ci    NUMBER     '3.14159'     (1, 4) (1, 11)
17397db96d56Sopenharmony_ci    """)
17407db96d56Sopenharmony_ci
17417db96d56Sopenharmony_ci        self.check_tokenize('x = 314159.', """\
17427db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17437db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17447db96d56Sopenharmony_ci    NUMBER     '314159.'     (1, 4) (1, 11)
17457db96d56Sopenharmony_ci    """)
17467db96d56Sopenharmony_ci
17477db96d56Sopenharmony_ci        self.check_tokenize('x = .314159', """\
17487db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17497db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17507db96d56Sopenharmony_ci    NUMBER     '.314159'     (1, 4) (1, 11)
17517db96d56Sopenharmony_ci    """)
17527db96d56Sopenharmony_ci
17537db96d56Sopenharmony_ci        self.check_tokenize('x = 3e14159', """\
17547db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17557db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17567db96d56Sopenharmony_ci    NUMBER     '3e14159'     (1, 4) (1, 11)
17577db96d56Sopenharmony_ci    """)
17587db96d56Sopenharmony_ci
17597db96d56Sopenharmony_ci        self.check_tokenize('x = 3E123', """\
17607db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17617db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17627db96d56Sopenharmony_ci    NUMBER     '3E123'       (1, 4) (1, 9)
17637db96d56Sopenharmony_ci    """)
17647db96d56Sopenharmony_ci
17657db96d56Sopenharmony_ci        self.check_tokenize('x+y = 3e-1230', """\
17667db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17677db96d56Sopenharmony_ci    PLUS       '+'           (1, 1) (1, 2)
17687db96d56Sopenharmony_ci    NAME       'y'           (1, 2) (1, 3)
17697db96d56Sopenharmony_ci    EQUAL      '='           (1, 4) (1, 5)
17707db96d56Sopenharmony_ci    NUMBER     '3e-1230'     (1, 6) (1, 13)
17717db96d56Sopenharmony_ci    """)
17727db96d56Sopenharmony_ci
17737db96d56Sopenharmony_ci        self.check_tokenize('x = 3.14e159', """\
17747db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17757db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17767db96d56Sopenharmony_ci    NUMBER     '3.14e159'    (1, 4) (1, 12)
17777db96d56Sopenharmony_ci    """)
17787db96d56Sopenharmony_ci
17797db96d56Sopenharmony_ci    def test_string(self):
17807db96d56Sopenharmony_ci
17817db96d56Sopenharmony_ci        self.check_tokenize('x = \'\'; y = ""', """\
17827db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17837db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17847db96d56Sopenharmony_ci    STRING     "''"          (1, 4) (1, 6)
17857db96d56Sopenharmony_ci    SEMI       ';'           (1, 6) (1, 7)
17867db96d56Sopenharmony_ci    NAME       'y'           (1, 8) (1, 9)
17877db96d56Sopenharmony_ci    EQUAL      '='           (1, 10) (1, 11)
17887db96d56Sopenharmony_ci    STRING     '""'          (1, 12) (1, 14)
17897db96d56Sopenharmony_ci    """)
17907db96d56Sopenharmony_ci
17917db96d56Sopenharmony_ci        self.check_tokenize('x = \'"\'; y = "\'"', """\
17927db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
17937db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
17947db96d56Sopenharmony_ci    STRING     '\\'"\\''       (1, 4) (1, 7)
17957db96d56Sopenharmony_ci    SEMI       ';'           (1, 7) (1, 8)
17967db96d56Sopenharmony_ci    NAME       'y'           (1, 9) (1, 10)
17977db96d56Sopenharmony_ci    EQUAL      '='           (1, 11) (1, 12)
17987db96d56Sopenharmony_ci    STRING     '"\\'"'        (1, 13) (1, 16)
17997db96d56Sopenharmony_ci    """)
18007db96d56Sopenharmony_ci
18017db96d56Sopenharmony_ci        self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
18027db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
18037db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
18047db96d56Sopenharmony_ci    STRING     '"doesn\\'t "' (1, 4) (1, 14)
18057db96d56Sopenharmony_ci    NAME       'shrink'      (1, 14) (1, 20)
18067db96d56Sopenharmony_ci    STRING     '", does it"' (1, 20) (1, 31)
18077db96d56Sopenharmony_ci    """)
18087db96d56Sopenharmony_ci
18097db96d56Sopenharmony_ci        self.check_tokenize("x = 'abc' + 'ABC'", """\
18107db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
18117db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
18127db96d56Sopenharmony_ci    STRING     "'abc'"       (1, 4) (1, 9)
18137db96d56Sopenharmony_ci    PLUS       '+'           (1, 10) (1, 11)
18147db96d56Sopenharmony_ci    STRING     "'ABC'"       (1, 12) (1, 17)
18157db96d56Sopenharmony_ci    """)
18167db96d56Sopenharmony_ci
18177db96d56Sopenharmony_ci        self.check_tokenize('y = "ABC" + "ABC"', """\
18187db96d56Sopenharmony_ci    NAME       'y'           (1, 0) (1, 1)
18197db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
18207db96d56Sopenharmony_ci    STRING     '"ABC"'       (1, 4) (1, 9)
18217db96d56Sopenharmony_ci    PLUS       '+'           (1, 10) (1, 11)
18227db96d56Sopenharmony_ci    STRING     '"ABC"'       (1, 12) (1, 17)
18237db96d56Sopenharmony_ci    """)
18247db96d56Sopenharmony_ci
18257db96d56Sopenharmony_ci        self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
18267db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
18277db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
18287db96d56Sopenharmony_ci    STRING     "r'abc'"      (1, 4) (1, 10)
18297db96d56Sopenharmony_ci    PLUS       '+'           (1, 11) (1, 12)
18307db96d56Sopenharmony_ci    STRING     "r'ABC'"      (1, 13) (1, 19)
18317db96d56Sopenharmony_ci    PLUS       '+'           (1, 20) (1, 21)
18327db96d56Sopenharmony_ci    STRING     "R'ABC'"      (1, 22) (1, 28)
18337db96d56Sopenharmony_ci    PLUS       '+'           (1, 29) (1, 30)
18347db96d56Sopenharmony_ci    STRING     "R'ABC'"      (1, 31) (1, 37)
18357db96d56Sopenharmony_ci    """)
18367db96d56Sopenharmony_ci
18377db96d56Sopenharmony_ci        self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
18387db96d56Sopenharmony_ci    NAME       'y'           (1, 0) (1, 1)
18397db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
18407db96d56Sopenharmony_ci    STRING     'r"abc"'      (1, 4) (1, 10)
18417db96d56Sopenharmony_ci    PLUS       '+'           (1, 11) (1, 12)
18427db96d56Sopenharmony_ci    STRING     'r"ABC"'      (1, 13) (1, 19)
18437db96d56Sopenharmony_ci    PLUS       '+'           (1, 20) (1, 21)
18447db96d56Sopenharmony_ci    STRING     'R"ABC"'      (1, 22) (1, 28)
18457db96d56Sopenharmony_ci    PLUS       '+'           (1, 29) (1, 30)
18467db96d56Sopenharmony_ci    STRING     'R"ABC"'      (1, 31) (1, 37)
18477db96d56Sopenharmony_ci    """)
18487db96d56Sopenharmony_ci
18497db96d56Sopenharmony_ci        self.check_tokenize("u'abc' + U'abc'", """\
18507db96d56Sopenharmony_ci    STRING     "u'abc'"      (1, 0) (1, 6)
18517db96d56Sopenharmony_ci    PLUS       '+'           (1, 7) (1, 8)
18527db96d56Sopenharmony_ci    STRING     "U'abc'"      (1, 9) (1, 15)
18537db96d56Sopenharmony_ci    """)
18547db96d56Sopenharmony_ci
18557db96d56Sopenharmony_ci        self.check_tokenize('u"abc" + U"abc"', """\
18567db96d56Sopenharmony_ci    STRING     'u"abc"'      (1, 0) (1, 6)
18577db96d56Sopenharmony_ci    PLUS       '+'           (1, 7) (1, 8)
18587db96d56Sopenharmony_ci    STRING     'U"abc"'      (1, 9) (1, 15)
18597db96d56Sopenharmony_ci    """)
18607db96d56Sopenharmony_ci
18617db96d56Sopenharmony_ci        self.check_tokenize("b'abc' + B'abc'", """\
18627db96d56Sopenharmony_ci    STRING     "b'abc'"      (1, 0) (1, 6)
18637db96d56Sopenharmony_ci    PLUS       '+'           (1, 7) (1, 8)
18647db96d56Sopenharmony_ci    STRING     "B'abc'"      (1, 9) (1, 15)
18657db96d56Sopenharmony_ci    """)
18667db96d56Sopenharmony_ci
18677db96d56Sopenharmony_ci        self.check_tokenize('b"abc" + B"abc"', """\
18687db96d56Sopenharmony_ci    STRING     'b"abc"'      (1, 0) (1, 6)
18697db96d56Sopenharmony_ci    PLUS       '+'           (1, 7) (1, 8)
18707db96d56Sopenharmony_ci    STRING     'B"abc"'      (1, 9) (1, 15)
18717db96d56Sopenharmony_ci    """)
18727db96d56Sopenharmony_ci
18737db96d56Sopenharmony_ci        self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
18747db96d56Sopenharmony_ci    STRING     "br'abc'"     (1, 0) (1, 7)
18757db96d56Sopenharmony_ci    PLUS       '+'           (1, 8) (1, 9)
18767db96d56Sopenharmony_ci    STRING     "bR'abc'"     (1, 10) (1, 17)
18777db96d56Sopenharmony_ci    PLUS       '+'           (1, 18) (1, 19)
18787db96d56Sopenharmony_ci    STRING     "Br'abc'"     (1, 20) (1, 27)
18797db96d56Sopenharmony_ci    PLUS       '+'           (1, 28) (1, 29)
18807db96d56Sopenharmony_ci    STRING     "BR'abc'"     (1, 30) (1, 37)
18817db96d56Sopenharmony_ci    """)
18827db96d56Sopenharmony_ci
18837db96d56Sopenharmony_ci        self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
18847db96d56Sopenharmony_ci    STRING     'br"abc"'     (1, 0) (1, 7)
18857db96d56Sopenharmony_ci    PLUS       '+'           (1, 8) (1, 9)
18867db96d56Sopenharmony_ci    STRING     'bR"abc"'     (1, 10) (1, 17)
18877db96d56Sopenharmony_ci    PLUS       '+'           (1, 18) (1, 19)
18887db96d56Sopenharmony_ci    STRING     'Br"abc"'     (1, 20) (1, 27)
18897db96d56Sopenharmony_ci    PLUS       '+'           (1, 28) (1, 29)
18907db96d56Sopenharmony_ci    STRING     'BR"abc"'     (1, 30) (1, 37)
18917db96d56Sopenharmony_ci    """)
18927db96d56Sopenharmony_ci
18937db96d56Sopenharmony_ci        self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
18947db96d56Sopenharmony_ci    STRING     "rb'abc'"     (1, 0) (1, 7)
18957db96d56Sopenharmony_ci    PLUS       '+'           (1, 8) (1, 9)
18967db96d56Sopenharmony_ci    STRING     "rB'abc'"     (1, 10) (1, 17)
18977db96d56Sopenharmony_ci    PLUS       '+'           (1, 18) (1, 19)
18987db96d56Sopenharmony_ci    STRING     "Rb'abc'"     (1, 20) (1, 27)
18997db96d56Sopenharmony_ci    PLUS       '+'           (1, 28) (1, 29)
19007db96d56Sopenharmony_ci    STRING     "RB'abc'"     (1, 30) (1, 37)
19017db96d56Sopenharmony_ci    """)
19027db96d56Sopenharmony_ci
19037db96d56Sopenharmony_ci        self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
19047db96d56Sopenharmony_ci    STRING     'rb"abc"'     (1, 0) (1, 7)
19057db96d56Sopenharmony_ci    PLUS       '+'           (1, 8) (1, 9)
19067db96d56Sopenharmony_ci    STRING     'rB"abc"'     (1, 10) (1, 17)
19077db96d56Sopenharmony_ci    PLUS       '+'           (1, 18) (1, 19)
19087db96d56Sopenharmony_ci    STRING     'Rb"abc"'     (1, 20) (1, 27)
19097db96d56Sopenharmony_ci    PLUS       '+'           (1, 28) (1, 29)
19107db96d56Sopenharmony_ci    STRING     'RB"abc"'     (1, 30) (1, 37)
19117db96d56Sopenharmony_ci    """)
19127db96d56Sopenharmony_ci
19137db96d56Sopenharmony_ci        self.check_tokenize('"a\\\nde\\\nfg"', """\
19147db96d56Sopenharmony_ci    STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
19157db96d56Sopenharmony_ci    """)
19167db96d56Sopenharmony_ci
19177db96d56Sopenharmony_ci        self.check_tokenize('u"a\\\nde"', """\
19187db96d56Sopenharmony_ci    STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
19197db96d56Sopenharmony_ci    """)
19207db96d56Sopenharmony_ci
19217db96d56Sopenharmony_ci        self.check_tokenize('rb"a\\\nd"', """\
19227db96d56Sopenharmony_ci    STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
19237db96d56Sopenharmony_ci    """)
19247db96d56Sopenharmony_ci
19257db96d56Sopenharmony_ci        self.check_tokenize(r'"""a\
19267db96d56Sopenharmony_cib"""', """\
19277db96d56Sopenharmony_ci    STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
19287db96d56Sopenharmony_ci    """)
19297db96d56Sopenharmony_ci        self.check_tokenize(r'u"""a\
19307db96d56Sopenharmony_cib"""', """\
19317db96d56Sopenharmony_ci    STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
19327db96d56Sopenharmony_ci    """)
19337db96d56Sopenharmony_ci        self.check_tokenize(r'rb"""a\
19347db96d56Sopenharmony_cib\
19357db96d56Sopenharmony_cic"""', """\
19367db96d56Sopenharmony_ci    STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
19377db96d56Sopenharmony_ci    """)
19387db96d56Sopenharmony_ci
19397db96d56Sopenharmony_ci        self.check_tokenize('f"abc"', """\
19407db96d56Sopenharmony_ci    STRING     'f"abc"'      (1, 0) (1, 6)
19417db96d56Sopenharmony_ci    """)
19427db96d56Sopenharmony_ci
19437db96d56Sopenharmony_ci        self.check_tokenize('fR"a{b}c"', """\
19447db96d56Sopenharmony_ci    STRING     'fR"a{b}c"'   (1, 0) (1, 9)
19457db96d56Sopenharmony_ci    """)
19467db96d56Sopenharmony_ci
19477db96d56Sopenharmony_ci        self.check_tokenize('f"""abc"""', """\
19487db96d56Sopenharmony_ci    STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
19497db96d56Sopenharmony_ci    """)
19507db96d56Sopenharmony_ci
19517db96d56Sopenharmony_ci        self.check_tokenize(r'f"abc\
19527db96d56Sopenharmony_cidef"', """\
19537db96d56Sopenharmony_ci    STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
19547db96d56Sopenharmony_ci    """)
19557db96d56Sopenharmony_ci
19567db96d56Sopenharmony_ci        self.check_tokenize(r'Rf"abc\
19577db96d56Sopenharmony_cidef"', """\
19587db96d56Sopenharmony_ci    STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
19597db96d56Sopenharmony_ci    """)
19607db96d56Sopenharmony_ci
19617db96d56Sopenharmony_ci    def test_function(self):
19627db96d56Sopenharmony_ci
19637db96d56Sopenharmony_ci        self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
19647db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
19657db96d56Sopenharmony_ci    NAME       'd22'         (1, 4) (1, 7)
19667db96d56Sopenharmony_ci    LPAR       '('           (1, 7) (1, 8)
19677db96d56Sopenharmony_ci    NAME       'a'           (1, 8) (1, 9)
19687db96d56Sopenharmony_ci    COMMA      ','           (1, 9) (1, 10)
19697db96d56Sopenharmony_ci    NAME       'b'           (1, 11) (1, 12)
19707db96d56Sopenharmony_ci    COMMA      ','           (1, 12) (1, 13)
19717db96d56Sopenharmony_ci    NAME       'c'           (1, 14) (1, 15)
19727db96d56Sopenharmony_ci    EQUAL      '='           (1, 15) (1, 16)
19737db96d56Sopenharmony_ci    NUMBER     '2'           (1, 16) (1, 17)
19747db96d56Sopenharmony_ci    COMMA      ','           (1, 17) (1, 18)
19757db96d56Sopenharmony_ci    NAME       'd'           (1, 19) (1, 20)
19767db96d56Sopenharmony_ci    EQUAL      '='           (1, 20) (1, 21)
19777db96d56Sopenharmony_ci    NUMBER     '2'           (1, 21) (1, 22)
19787db96d56Sopenharmony_ci    COMMA      ','           (1, 22) (1, 23)
19797db96d56Sopenharmony_ci    STAR       '*'           (1, 24) (1, 25)
19807db96d56Sopenharmony_ci    NAME       'k'           (1, 25) (1, 26)
19817db96d56Sopenharmony_ci    RPAR       ')'           (1, 26) (1, 27)
19827db96d56Sopenharmony_ci    COLON      ':'           (1, 27) (1, 28)
19837db96d56Sopenharmony_ci    NAME       'pass'        (1, 29) (1, 33)
19847db96d56Sopenharmony_ci    """)
19857db96d56Sopenharmony_ci
19867db96d56Sopenharmony_ci        self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
19877db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
19887db96d56Sopenharmony_ci    NAME       'd01v_'       (1, 4) (1, 9)
19897db96d56Sopenharmony_ci    LPAR       '('           (1, 9) (1, 10)
19907db96d56Sopenharmony_ci    NAME       'a'           (1, 10) (1, 11)
19917db96d56Sopenharmony_ci    EQUAL      '='           (1, 11) (1, 12)
19927db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
19937db96d56Sopenharmony_ci    COMMA      ','           (1, 13) (1, 14)
19947db96d56Sopenharmony_ci    STAR       '*'           (1, 15) (1, 16)
19957db96d56Sopenharmony_ci    NAME       'k'           (1, 16) (1, 17)
19967db96d56Sopenharmony_ci    COMMA      ','           (1, 17) (1, 18)
19977db96d56Sopenharmony_ci    DOUBLESTAR '**'          (1, 19) (1, 21)
19987db96d56Sopenharmony_ci    NAME       'w'           (1, 21) (1, 22)
19997db96d56Sopenharmony_ci    RPAR       ')'           (1, 22) (1, 23)
20007db96d56Sopenharmony_ci    COLON      ':'           (1, 23) (1, 24)
20017db96d56Sopenharmony_ci    NAME       'pass'        (1, 25) (1, 29)
20027db96d56Sopenharmony_ci    """)
20037db96d56Sopenharmony_ci
20047db96d56Sopenharmony_ci        self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
20057db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
20067db96d56Sopenharmony_ci    NAME       'd23'         (1, 4) (1, 7)
20077db96d56Sopenharmony_ci    LPAR       '('           (1, 7) (1, 8)
20087db96d56Sopenharmony_ci    NAME       'a'           (1, 8) (1, 9)
20097db96d56Sopenharmony_ci    COLON      ':'           (1, 9) (1, 10)
20107db96d56Sopenharmony_ci    NAME       'str'         (1, 11) (1, 14)
20117db96d56Sopenharmony_ci    COMMA      ','           (1, 14) (1, 15)
20127db96d56Sopenharmony_ci    NAME       'b'           (1, 16) (1, 17)
20137db96d56Sopenharmony_ci    COLON      ':'           (1, 17) (1, 18)
20147db96d56Sopenharmony_ci    NAME       'int'         (1, 19) (1, 22)
20157db96d56Sopenharmony_ci    EQUAL      '='           (1, 22) (1, 23)
20167db96d56Sopenharmony_ci    NUMBER     '3'           (1, 23) (1, 24)
20177db96d56Sopenharmony_ci    RPAR       ')'           (1, 24) (1, 25)
20187db96d56Sopenharmony_ci    RARROW     '->'          (1, 26) (1, 28)
20197db96d56Sopenharmony_ci    NAME       'int'         (1, 29) (1, 32)
20207db96d56Sopenharmony_ci    COLON      ':'           (1, 32) (1, 33)
20217db96d56Sopenharmony_ci    NAME       'pass'        (1, 34) (1, 38)
20227db96d56Sopenharmony_ci    """)
20237db96d56Sopenharmony_ci
20247db96d56Sopenharmony_ci    def test_comparison(self):
20257db96d56Sopenharmony_ci
20267db96d56Sopenharmony_ci        self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
20277db96d56Sopenharmony_ci                            "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
20287db96d56Sopenharmony_ci    NAME       'if'          (1, 0) (1, 2)
20297db96d56Sopenharmony_ci    NUMBER     '1'           (1, 3) (1, 4)
20307db96d56Sopenharmony_ci    LESS       '<'           (1, 5) (1, 6)
20317db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
20327db96d56Sopenharmony_ci    GREATER    '>'           (1, 9) (1, 10)
20337db96d56Sopenharmony_ci    NUMBER     '1'           (1, 11) (1, 12)
20347db96d56Sopenharmony_ci    EQEQUAL    '=='          (1, 13) (1, 15)
20357db96d56Sopenharmony_ci    NUMBER     '1'           (1, 16) (1, 17)
20367db96d56Sopenharmony_ci    GREATEREQUAL '>='          (1, 18) (1, 20)
20377db96d56Sopenharmony_ci    NUMBER     '5'           (1, 21) (1, 22)
20387db96d56Sopenharmony_ci    LESSEQUAL  '<='          (1, 23) (1, 25)
20397db96d56Sopenharmony_ci    NUMBER     '0x15'        (1, 26) (1, 30)
20407db96d56Sopenharmony_ci    LESSEQUAL  '<='          (1, 31) (1, 33)
20417db96d56Sopenharmony_ci    NUMBER     '0x12'        (1, 34) (1, 38)
20427db96d56Sopenharmony_ci    NOTEQUAL   '!='          (1, 39) (1, 41)
20437db96d56Sopenharmony_ci    NUMBER     '1'           (1, 42) (1, 43)
20447db96d56Sopenharmony_ci    NAME       'and'         (1, 44) (1, 47)
20457db96d56Sopenharmony_ci    NUMBER     '5'           (1, 48) (1, 49)
20467db96d56Sopenharmony_ci    NAME       'in'          (1, 50) (1, 52)
20477db96d56Sopenharmony_ci    NUMBER     '1'           (1, 53) (1, 54)
20487db96d56Sopenharmony_ci    NAME       'not'         (1, 55) (1, 58)
20497db96d56Sopenharmony_ci    NAME       'in'          (1, 59) (1, 61)
20507db96d56Sopenharmony_ci    NUMBER     '1'           (1, 62) (1, 63)
20517db96d56Sopenharmony_ci    NAME       'is'          (1, 64) (1, 66)
20527db96d56Sopenharmony_ci    NUMBER     '1'           (1, 67) (1, 68)
20537db96d56Sopenharmony_ci    NAME       'or'          (1, 69) (1, 71)
20547db96d56Sopenharmony_ci    NUMBER     '5'           (1, 72) (1, 73)
20557db96d56Sopenharmony_ci    NAME       'is'          (1, 74) (1, 76)
20567db96d56Sopenharmony_ci    NAME       'not'         (1, 77) (1, 80)
20577db96d56Sopenharmony_ci    NUMBER     '1'           (1, 81) (1, 82)
20587db96d56Sopenharmony_ci    COLON      ':'           (1, 82) (1, 83)
20597db96d56Sopenharmony_ci    NAME       'pass'        (1, 84) (1, 88)
20607db96d56Sopenharmony_ci    """)
20617db96d56Sopenharmony_ci
20627db96d56Sopenharmony_ci    def test_additive(self):
20637db96d56Sopenharmony_ci
20647db96d56Sopenharmony_ci        self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
20657db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
20667db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
20677db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
20687db96d56Sopenharmony_ci    MINUS      '-'           (1, 6) (1, 7)
20697db96d56Sopenharmony_ci    NAME       'y'           (1, 8) (1, 9)
20707db96d56Sopenharmony_ci    PLUS       '+'           (1, 10) (1, 11)
20717db96d56Sopenharmony_ci    NUMBER     '15'          (1, 12) (1, 14)
20727db96d56Sopenharmony_ci    MINUS      '-'           (1, 15) (1, 16)
20737db96d56Sopenharmony_ci    NUMBER     '1'           (1, 17) (1, 18)
20747db96d56Sopenharmony_ci    PLUS       '+'           (1, 19) (1, 20)
20757db96d56Sopenharmony_ci    NUMBER     '0x124'       (1, 21) (1, 26)
20767db96d56Sopenharmony_ci    PLUS       '+'           (1, 27) (1, 28)
20777db96d56Sopenharmony_ci    NAME       'z'           (1, 29) (1, 30)
20787db96d56Sopenharmony_ci    PLUS       '+'           (1, 31) (1, 32)
20797db96d56Sopenharmony_ci    NAME       'a'           (1, 33) (1, 34)
20807db96d56Sopenharmony_ci    LSQB       '['           (1, 34) (1, 35)
20817db96d56Sopenharmony_ci    NUMBER     '5'           (1, 35) (1, 36)
20827db96d56Sopenharmony_ci    RSQB       ']'           (1, 36) (1, 37)
20837db96d56Sopenharmony_ci    """)
20847db96d56Sopenharmony_ci
20857db96d56Sopenharmony_ci    def test_multiplicative(self):
20867db96d56Sopenharmony_ci
20877db96d56Sopenharmony_ci        self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
20887db96d56Sopenharmony_ci    NAME       'x'           (1, 0) (1, 1)
20897db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
20907db96d56Sopenharmony_ci    NUMBER     '1'           (1, 4) (1, 5)
20917db96d56Sopenharmony_ci    DOUBLESLASH '//'          (1, 5) (1, 7)
20927db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
20937db96d56Sopenharmony_ci    STAR       '*'           (1, 8) (1, 9)
20947db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
20957db96d56Sopenharmony_ci    SLASH      '/'           (1, 10) (1, 11)
20967db96d56Sopenharmony_ci    NUMBER     '5'           (1, 11) (1, 12)
20977db96d56Sopenharmony_ci    STAR       '*'           (1, 12) (1, 13)
20987db96d56Sopenharmony_ci    NUMBER     '12'          (1, 13) (1, 15)
20997db96d56Sopenharmony_ci    PERCENT    '%'           (1, 15) (1, 16)
21007db96d56Sopenharmony_ci    NUMBER     '0x12'        (1, 16) (1, 20)
21017db96d56Sopenharmony_ci    AT         '@'           (1, 20) (1, 21)
21027db96d56Sopenharmony_ci    NUMBER     '42'          (1, 21) (1, 23)
21037db96d56Sopenharmony_ci    """)
21047db96d56Sopenharmony_ci
21057db96d56Sopenharmony_ci    def test_unary(self):
21067db96d56Sopenharmony_ci
21077db96d56Sopenharmony_ci        self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
21087db96d56Sopenharmony_ci    TILDE      '~'           (1, 0) (1, 1)
21097db96d56Sopenharmony_ci    NUMBER     '1'           (1, 1) (1, 2)
21107db96d56Sopenharmony_ci    CIRCUMFLEX '^'           (1, 3) (1, 4)
21117db96d56Sopenharmony_ci    NUMBER     '1'           (1, 5) (1, 6)
21127db96d56Sopenharmony_ci    AMPER      '&'           (1, 7) (1, 8)
21137db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
21147db96d56Sopenharmony_ci    VBAR       '|'           (1, 11) (1, 12)
21157db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
21167db96d56Sopenharmony_ci    CIRCUMFLEX '^'           (1, 14) (1, 15)
21177db96d56Sopenharmony_ci    MINUS      '-'           (1, 16) (1, 17)
21187db96d56Sopenharmony_ci    NUMBER     '1'           (1, 17) (1, 18)
21197db96d56Sopenharmony_ci    """)
21207db96d56Sopenharmony_ci
21217db96d56Sopenharmony_ci        self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
21227db96d56Sopenharmony_ci    MINUS      '-'           (1, 0) (1, 1)
21237db96d56Sopenharmony_ci    NUMBER     '1'           (1, 1) (1, 2)
21247db96d56Sopenharmony_ci    STAR       '*'           (1, 2) (1, 3)
21257db96d56Sopenharmony_ci    NUMBER     '1'           (1, 3) (1, 4)
21267db96d56Sopenharmony_ci    SLASH      '/'           (1, 4) (1, 5)
21277db96d56Sopenharmony_ci    NUMBER     '1'           (1, 5) (1, 6)
21287db96d56Sopenharmony_ci    PLUS       '+'           (1, 6) (1, 7)
21297db96d56Sopenharmony_ci    NUMBER     '1'           (1, 7) (1, 8)
21307db96d56Sopenharmony_ci    STAR       '*'           (1, 8) (1, 9)
21317db96d56Sopenharmony_ci    NUMBER     '1'           (1, 9) (1, 10)
21327db96d56Sopenharmony_ci    DOUBLESLASH '//'          (1, 10) (1, 12)
21337db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
21347db96d56Sopenharmony_ci    MINUS      '-'           (1, 14) (1, 15)
21357db96d56Sopenharmony_ci    MINUS      '-'           (1, 16) (1, 17)
21367db96d56Sopenharmony_ci    MINUS      '-'           (1, 17) (1, 18)
21377db96d56Sopenharmony_ci    MINUS      '-'           (1, 18) (1, 19)
21387db96d56Sopenharmony_ci    NUMBER     '1'           (1, 19) (1, 20)
21397db96d56Sopenharmony_ci    DOUBLESTAR '**'          (1, 20) (1, 22)
21407db96d56Sopenharmony_ci    NUMBER     '1'           (1, 22) (1, 23)
21417db96d56Sopenharmony_ci    """)
21427db96d56Sopenharmony_ci
21437db96d56Sopenharmony_ci    def test_selector(self):
21447db96d56Sopenharmony_ci
21457db96d56Sopenharmony_ci        self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
21467db96d56Sopenharmony_ci    NAME       'import'      (1, 0) (1, 6)
21477db96d56Sopenharmony_ci    NAME       'sys'         (1, 7) (1, 10)
21487db96d56Sopenharmony_ci    COMMA      ','           (1, 10) (1, 11)
21497db96d56Sopenharmony_ci    NAME       'time'        (1, 12) (1, 16)
21507db96d56Sopenharmony_ci    NEWLINE    ''            (1, 16) (1, 16)
21517db96d56Sopenharmony_ci    NAME       'x'           (2, 0) (2, 1)
21527db96d56Sopenharmony_ci    EQUAL      '='           (2, 2) (2, 3)
21537db96d56Sopenharmony_ci    NAME       'sys'         (2, 4) (2, 7)
21547db96d56Sopenharmony_ci    DOT        '.'           (2, 7) (2, 8)
21557db96d56Sopenharmony_ci    NAME       'modules'     (2, 8) (2, 15)
21567db96d56Sopenharmony_ci    LSQB       '['           (2, 15) (2, 16)
21577db96d56Sopenharmony_ci    STRING     "'time'"      (2, 16) (2, 22)
21587db96d56Sopenharmony_ci    RSQB       ']'           (2, 22) (2, 23)
21597db96d56Sopenharmony_ci    DOT        '.'           (2, 23) (2, 24)
21607db96d56Sopenharmony_ci    NAME       'time'        (2, 24) (2, 28)
21617db96d56Sopenharmony_ci    LPAR       '('           (2, 28) (2, 29)
21627db96d56Sopenharmony_ci    RPAR       ')'           (2, 29) (2, 30)
21637db96d56Sopenharmony_ci    """)
21647db96d56Sopenharmony_ci
21657db96d56Sopenharmony_ci    def test_method(self):
21667db96d56Sopenharmony_ci
21677db96d56Sopenharmony_ci        self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
21687db96d56Sopenharmony_ci    AT         '@'           (1, 0) (1, 1)
21697db96d56Sopenharmony_ci    NAME       'staticmethod' (1, 1) (1, 13)
21707db96d56Sopenharmony_ci    NEWLINE    ''            (1, 13) (1, 13)
21717db96d56Sopenharmony_ci    NAME       'def'         (2, 0) (2, 3)
21727db96d56Sopenharmony_ci    NAME       'foo'         (2, 4) (2, 7)
21737db96d56Sopenharmony_ci    LPAR       '('           (2, 7) (2, 8)
21747db96d56Sopenharmony_ci    NAME       'x'           (2, 8) (2, 9)
21757db96d56Sopenharmony_ci    COMMA      ','           (2, 9) (2, 10)
21767db96d56Sopenharmony_ci    NAME       'y'           (2, 10) (2, 11)
21777db96d56Sopenharmony_ci    RPAR       ')'           (2, 11) (2, 12)
21787db96d56Sopenharmony_ci    COLON      ':'           (2, 12) (2, 13)
21797db96d56Sopenharmony_ci    NAME       'pass'        (2, 14) (2, 18)
21807db96d56Sopenharmony_ci    """)
21817db96d56Sopenharmony_ci
21827db96d56Sopenharmony_ci    def test_tabs(self):
21837db96d56Sopenharmony_ci
21847db96d56Sopenharmony_ci        self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
21857db96d56Sopenharmony_ci    AT         '@'           (1, 0) (1, 1)
21867db96d56Sopenharmony_ci    NAME       'staticmethod' (1, 1) (1, 13)
21877db96d56Sopenharmony_ci    NEWLINE    ''            (1, 13) (1, 13)
21887db96d56Sopenharmony_ci    NAME       'def'         (2, 0) (2, 3)
21897db96d56Sopenharmony_ci    NAME       'foo'         (2, 4) (2, 7)
21907db96d56Sopenharmony_ci    LPAR       '('           (2, 7) (2, 8)
21917db96d56Sopenharmony_ci    NAME       'x'           (2, 8) (2, 9)
21927db96d56Sopenharmony_ci    COMMA      ','           (2, 9) (2, 10)
21937db96d56Sopenharmony_ci    NAME       'y'           (2, 10) (2, 11)
21947db96d56Sopenharmony_ci    RPAR       ')'           (2, 11) (2, 12)
21957db96d56Sopenharmony_ci    COLON      ':'           (2, 12) (2, 13)
21967db96d56Sopenharmony_ci    NAME       'pass'        (2, 14) (2, 18)
21977db96d56Sopenharmony_ci    """)
21987db96d56Sopenharmony_ci
21997db96d56Sopenharmony_ci    def test_async(self):
22007db96d56Sopenharmony_ci
22017db96d56Sopenharmony_ci        self.check_tokenize('async = 1', """\
22027db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22037db96d56Sopenharmony_ci    EQUAL      '='           (1, 6) (1, 7)
22047db96d56Sopenharmony_ci    NUMBER     '1'           (1, 8) (1, 9)
22057db96d56Sopenharmony_ci    """)
22067db96d56Sopenharmony_ci
22077db96d56Sopenharmony_ci        self.check_tokenize('a = (async = 1)', """\
22087db96d56Sopenharmony_ci    NAME       'a'           (1, 0) (1, 1)
22097db96d56Sopenharmony_ci    EQUAL      '='           (1, 2) (1, 3)
22107db96d56Sopenharmony_ci    LPAR       '('           (1, 4) (1, 5)
22117db96d56Sopenharmony_ci    ASYNC      'async'       (1, 5) (1, 10)
22127db96d56Sopenharmony_ci    EQUAL      '='           (1, 11) (1, 12)
22137db96d56Sopenharmony_ci    NUMBER     '1'           (1, 13) (1, 14)
22147db96d56Sopenharmony_ci    RPAR       ')'           (1, 14) (1, 15)
22157db96d56Sopenharmony_ci    """)
22167db96d56Sopenharmony_ci
22177db96d56Sopenharmony_ci        self.check_tokenize('async()', """\
22187db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22197db96d56Sopenharmony_ci    LPAR       '('           (1, 5) (1, 6)
22207db96d56Sopenharmony_ci    RPAR       ')'           (1, 6) (1, 7)
22217db96d56Sopenharmony_ci    """)
22227db96d56Sopenharmony_ci
22237db96d56Sopenharmony_ci        self.check_tokenize('class async(Bar):pass', """\
22247db96d56Sopenharmony_ci    NAME       'class'       (1, 0) (1, 5)
22257db96d56Sopenharmony_ci    ASYNC      'async'       (1, 6) (1, 11)
22267db96d56Sopenharmony_ci    LPAR       '('           (1, 11) (1, 12)
22277db96d56Sopenharmony_ci    NAME       'Bar'         (1, 12) (1, 15)
22287db96d56Sopenharmony_ci    RPAR       ')'           (1, 15) (1, 16)
22297db96d56Sopenharmony_ci    COLON      ':'           (1, 16) (1, 17)
22307db96d56Sopenharmony_ci    NAME       'pass'        (1, 17) (1, 21)
22317db96d56Sopenharmony_ci    """)
22327db96d56Sopenharmony_ci
22337db96d56Sopenharmony_ci        self.check_tokenize('class async:pass', """\
22347db96d56Sopenharmony_ci    NAME       'class'       (1, 0) (1, 5)
22357db96d56Sopenharmony_ci    ASYNC      'async'       (1, 6) (1, 11)
22367db96d56Sopenharmony_ci    COLON      ':'           (1, 11) (1, 12)
22377db96d56Sopenharmony_ci    NAME       'pass'        (1, 12) (1, 16)
22387db96d56Sopenharmony_ci    """)
22397db96d56Sopenharmony_ci
22407db96d56Sopenharmony_ci        self.check_tokenize('await = 1', """\
22417db96d56Sopenharmony_ci    AWAIT      'await'       (1, 0) (1, 5)
22427db96d56Sopenharmony_ci    EQUAL      '='           (1, 6) (1, 7)
22437db96d56Sopenharmony_ci    NUMBER     '1'           (1, 8) (1, 9)
22447db96d56Sopenharmony_ci    """)
22457db96d56Sopenharmony_ci
22467db96d56Sopenharmony_ci        self.check_tokenize('foo.async', """\
22477db96d56Sopenharmony_ci    NAME       'foo'         (1, 0) (1, 3)
22487db96d56Sopenharmony_ci    DOT        '.'           (1, 3) (1, 4)
22497db96d56Sopenharmony_ci    ASYNC      'async'       (1, 4) (1, 9)
22507db96d56Sopenharmony_ci    """)
22517db96d56Sopenharmony_ci
22527db96d56Sopenharmony_ci        self.check_tokenize('async for a in b: pass', """\
22537db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22547db96d56Sopenharmony_ci    NAME       'for'         (1, 6) (1, 9)
22557db96d56Sopenharmony_ci    NAME       'a'           (1, 10) (1, 11)
22567db96d56Sopenharmony_ci    NAME       'in'          (1, 12) (1, 14)
22577db96d56Sopenharmony_ci    NAME       'b'           (1, 15) (1, 16)
22587db96d56Sopenharmony_ci    COLON      ':'           (1, 16) (1, 17)
22597db96d56Sopenharmony_ci    NAME       'pass'        (1, 18) (1, 22)
22607db96d56Sopenharmony_ci    """)
22617db96d56Sopenharmony_ci
22627db96d56Sopenharmony_ci        self.check_tokenize('async with a as b: pass', """\
22637db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22647db96d56Sopenharmony_ci    NAME       'with'        (1, 6) (1, 10)
22657db96d56Sopenharmony_ci    NAME       'a'           (1, 11) (1, 12)
22667db96d56Sopenharmony_ci    NAME       'as'          (1, 13) (1, 15)
22677db96d56Sopenharmony_ci    NAME       'b'           (1, 16) (1, 17)
22687db96d56Sopenharmony_ci    COLON      ':'           (1, 17) (1, 18)
22697db96d56Sopenharmony_ci    NAME       'pass'        (1, 19) (1, 23)
22707db96d56Sopenharmony_ci    """)
22717db96d56Sopenharmony_ci
22727db96d56Sopenharmony_ci        self.check_tokenize('async.foo', """\
22737db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22747db96d56Sopenharmony_ci    DOT        '.'           (1, 5) (1, 6)
22757db96d56Sopenharmony_ci    NAME       'foo'         (1, 6) (1, 9)
22767db96d56Sopenharmony_ci    """)
22777db96d56Sopenharmony_ci
22787db96d56Sopenharmony_ci        self.check_tokenize('async', """\
22797db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22807db96d56Sopenharmony_ci    """)
22817db96d56Sopenharmony_ci
22827db96d56Sopenharmony_ci        self.check_tokenize('async\n#comment\nawait', """\
22837db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22847db96d56Sopenharmony_ci    NEWLINE    ''            (1, 5) (1, 5)
22857db96d56Sopenharmony_ci    AWAIT      'await'       (3, 0) (3, 5)
22867db96d56Sopenharmony_ci    """)
22877db96d56Sopenharmony_ci
22887db96d56Sopenharmony_ci        self.check_tokenize('async\n...\nawait', """\
22897db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22907db96d56Sopenharmony_ci    NEWLINE    ''            (1, 5) (1, 5)
22917db96d56Sopenharmony_ci    ELLIPSIS   '...'         (2, 0) (2, 3)
22927db96d56Sopenharmony_ci    NEWLINE    ''            (2, 3) (2, 3)
22937db96d56Sopenharmony_ci    AWAIT      'await'       (3, 0) (3, 5)
22947db96d56Sopenharmony_ci    """)
22957db96d56Sopenharmony_ci
22967db96d56Sopenharmony_ci        self.check_tokenize('async\nawait', """\
22977db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
22987db96d56Sopenharmony_ci    NEWLINE    ''            (1, 5) (1, 5)
22997db96d56Sopenharmony_ci    AWAIT      'await'       (2, 0) (2, 5)
23007db96d56Sopenharmony_ci    """)
23017db96d56Sopenharmony_ci
23027db96d56Sopenharmony_ci        self.check_tokenize('foo.async + 1', """\
23037db96d56Sopenharmony_ci    NAME       'foo'         (1, 0) (1, 3)
23047db96d56Sopenharmony_ci    DOT        '.'           (1, 3) (1, 4)
23057db96d56Sopenharmony_ci    ASYNC      'async'       (1, 4) (1, 9)
23067db96d56Sopenharmony_ci    PLUS       '+'           (1, 10) (1, 11)
23077db96d56Sopenharmony_ci    NUMBER     '1'           (1, 12) (1, 13)
23087db96d56Sopenharmony_ci    """)
23097db96d56Sopenharmony_ci
23107db96d56Sopenharmony_ci        self.check_tokenize('async def foo(): pass', """\
23117db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
23127db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
23137db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
23147db96d56Sopenharmony_ci    LPAR       '('           (1, 13) (1, 14)
23157db96d56Sopenharmony_ci    RPAR       ')'           (1, 14) (1, 15)
23167db96d56Sopenharmony_ci    COLON      ':'           (1, 15) (1, 16)
23177db96d56Sopenharmony_ci    NAME       'pass'        (1, 17) (1, 21)
23187db96d56Sopenharmony_ci    """)
23197db96d56Sopenharmony_ci
23207db96d56Sopenharmony_ci        self.check_tokenize('''\
23217db96d56Sopenharmony_ciasync def foo():
23227db96d56Sopenharmony_ci  def foo(await):
23237db96d56Sopenharmony_ci    await = 1
23247db96d56Sopenharmony_ci  if 1:
23257db96d56Sopenharmony_ci    await
23267db96d56Sopenharmony_ciasync += 1
23277db96d56Sopenharmony_ci''', """\
23287db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
23297db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
23307db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
23317db96d56Sopenharmony_ci    LPAR       '('           (1, 13) (1, 14)
23327db96d56Sopenharmony_ci    RPAR       ')'           (1, 14) (1, 15)
23337db96d56Sopenharmony_ci    COLON      ':'           (1, 15) (1, 16)
23347db96d56Sopenharmony_ci    NEWLINE    ''            (1, 16) (1, 16)
23357db96d56Sopenharmony_ci    INDENT     ''            (2, -1) (2, -1)
23367db96d56Sopenharmony_ci    NAME       'def'         (2, 2) (2, 5)
23377db96d56Sopenharmony_ci    NAME       'foo'         (2, 6) (2, 9)
23387db96d56Sopenharmony_ci    LPAR       '('           (2, 9) (2, 10)
23397db96d56Sopenharmony_ci    AWAIT      'await'       (2, 10) (2, 15)
23407db96d56Sopenharmony_ci    RPAR       ')'           (2, 15) (2, 16)
23417db96d56Sopenharmony_ci    COLON      ':'           (2, 16) (2, 17)
23427db96d56Sopenharmony_ci    NEWLINE    ''            (2, 17) (2, 17)
23437db96d56Sopenharmony_ci    INDENT     ''            (3, -1) (3, -1)
23447db96d56Sopenharmony_ci    AWAIT      'await'       (3, 4) (3, 9)
23457db96d56Sopenharmony_ci    EQUAL      '='           (3, 10) (3, 11)
23467db96d56Sopenharmony_ci    NUMBER     '1'           (3, 12) (3, 13)
23477db96d56Sopenharmony_ci    NEWLINE    ''            (3, 13) (3, 13)
23487db96d56Sopenharmony_ci    DEDENT     ''            (4, -1) (4, -1)
23497db96d56Sopenharmony_ci    NAME       'if'          (4, 2) (4, 4)
23507db96d56Sopenharmony_ci    NUMBER     '1'           (4, 5) (4, 6)
23517db96d56Sopenharmony_ci    COLON      ':'           (4, 6) (4, 7)
23527db96d56Sopenharmony_ci    NEWLINE    ''            (4, 7) (4, 7)
23537db96d56Sopenharmony_ci    INDENT     ''            (5, -1) (5, -1)
23547db96d56Sopenharmony_ci    AWAIT      'await'       (5, 4) (5, 9)
23557db96d56Sopenharmony_ci    NEWLINE    ''            (5, 9) (5, 9)
23567db96d56Sopenharmony_ci    DEDENT     ''            (6, -1) (6, -1)
23577db96d56Sopenharmony_ci    DEDENT     ''            (6, -1) (6, -1)
23587db96d56Sopenharmony_ci    ASYNC      'async'       (6, 0) (6, 5)
23597db96d56Sopenharmony_ci    PLUSEQUAL  '+='          (6, 6) (6, 8)
23607db96d56Sopenharmony_ci    NUMBER     '1'           (6, 9) (6, 10)
23617db96d56Sopenharmony_ci    NEWLINE    ''            (6, 10) (6, 10)
23627db96d56Sopenharmony_ci    """)
23637db96d56Sopenharmony_ci
23647db96d56Sopenharmony_ci        self.check_tokenize('async def foo():\n  async for i in 1: pass', """\
23657db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
23667db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
23677db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
23687db96d56Sopenharmony_ci    LPAR       '('           (1, 13) (1, 14)
23697db96d56Sopenharmony_ci    RPAR       ')'           (1, 14) (1, 15)
23707db96d56Sopenharmony_ci    COLON      ':'           (1, 15) (1, 16)
23717db96d56Sopenharmony_ci    NEWLINE    ''            (1, 16) (1, 16)
23727db96d56Sopenharmony_ci    INDENT     ''            (2, -1) (2, -1)
23737db96d56Sopenharmony_ci    ASYNC      'async'       (2, 2) (2, 7)
23747db96d56Sopenharmony_ci    NAME       'for'         (2, 8) (2, 11)
23757db96d56Sopenharmony_ci    NAME       'i'           (2, 12) (2, 13)
23767db96d56Sopenharmony_ci    NAME       'in'          (2, 14) (2, 16)
23777db96d56Sopenharmony_ci    NUMBER     '1'           (2, 17) (2, 18)
23787db96d56Sopenharmony_ci    COLON      ':'           (2, 18) (2, 19)
23797db96d56Sopenharmony_ci    NAME       'pass'        (2, 20) (2, 24)
23807db96d56Sopenharmony_ci    DEDENT     ''            (2, -1) (2, -1)
23817db96d56Sopenharmony_ci    """)
23827db96d56Sopenharmony_ci
23837db96d56Sopenharmony_ci        self.check_tokenize('async def foo(async): await', """\
23847db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
23857db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
23867db96d56Sopenharmony_ci    NAME       'foo'         (1, 10) (1, 13)
23877db96d56Sopenharmony_ci    LPAR       '('           (1, 13) (1, 14)
23887db96d56Sopenharmony_ci    ASYNC      'async'       (1, 14) (1, 19)
23897db96d56Sopenharmony_ci    RPAR       ')'           (1, 19) (1, 20)
23907db96d56Sopenharmony_ci    COLON      ':'           (1, 20) (1, 21)
23917db96d56Sopenharmony_ci    AWAIT      'await'       (1, 22) (1, 27)
23927db96d56Sopenharmony_ci    """)
23937db96d56Sopenharmony_ci
23947db96d56Sopenharmony_ci        self.check_tokenize('''\
23957db96d56Sopenharmony_cidef f():
23967db96d56Sopenharmony_ci
23977db96d56Sopenharmony_ci  def baz(): pass
23987db96d56Sopenharmony_ci  async def bar(): pass
23997db96d56Sopenharmony_ci
24007db96d56Sopenharmony_ci  await = 2''', """\
24017db96d56Sopenharmony_ci    NAME       'def'         (1, 0) (1, 3)
24027db96d56Sopenharmony_ci    NAME       'f'           (1, 4) (1, 5)
24037db96d56Sopenharmony_ci    LPAR       '('           (1, 5) (1, 6)
24047db96d56Sopenharmony_ci    RPAR       ')'           (1, 6) (1, 7)
24057db96d56Sopenharmony_ci    COLON      ':'           (1, 7) (1, 8)
24067db96d56Sopenharmony_ci    NEWLINE    ''            (1, 8) (1, 8)
24077db96d56Sopenharmony_ci    INDENT     ''            (3, -1) (3, -1)
24087db96d56Sopenharmony_ci    NAME       'def'         (3, 2) (3, 5)
24097db96d56Sopenharmony_ci    NAME       'baz'         (3, 6) (3, 9)
24107db96d56Sopenharmony_ci    LPAR       '('           (3, 9) (3, 10)
24117db96d56Sopenharmony_ci    RPAR       ')'           (3, 10) (3, 11)
24127db96d56Sopenharmony_ci    COLON      ':'           (3, 11) (3, 12)
24137db96d56Sopenharmony_ci    NAME       'pass'        (3, 13) (3, 17)
24147db96d56Sopenharmony_ci    NEWLINE    ''            (3, 17) (3, 17)
24157db96d56Sopenharmony_ci    ASYNC      'async'       (4, 2) (4, 7)
24167db96d56Sopenharmony_ci    NAME       'def'         (4, 8) (4, 11)
24177db96d56Sopenharmony_ci    NAME       'bar'         (4, 12) (4, 15)
24187db96d56Sopenharmony_ci    LPAR       '('           (4, 15) (4, 16)
24197db96d56Sopenharmony_ci    RPAR       ')'           (4, 16) (4, 17)
24207db96d56Sopenharmony_ci    COLON      ':'           (4, 17) (4, 18)
24217db96d56Sopenharmony_ci    NAME       'pass'        (4, 19) (4, 23)
24227db96d56Sopenharmony_ci    NEWLINE    ''            (4, 23) (4, 23)
24237db96d56Sopenharmony_ci    AWAIT      'await'       (6, 2) (6, 7)
24247db96d56Sopenharmony_ci    EQUAL      '='           (6, 8) (6, 9)
24257db96d56Sopenharmony_ci    NUMBER     '2'           (6, 10) (6, 11)
24267db96d56Sopenharmony_ci    DEDENT     ''            (6, -1) (6, -1)
24277db96d56Sopenharmony_ci    """)
24287db96d56Sopenharmony_ci
24297db96d56Sopenharmony_ci        self.check_tokenize('''\
24307db96d56Sopenharmony_ciasync def f():
24317db96d56Sopenharmony_ci
24327db96d56Sopenharmony_ci  def baz(): pass
24337db96d56Sopenharmony_ci  async def bar(): pass
24347db96d56Sopenharmony_ci
24357db96d56Sopenharmony_ci  await = 2''', """\
24367db96d56Sopenharmony_ci    ASYNC      'async'       (1, 0) (1, 5)
24377db96d56Sopenharmony_ci    NAME       'def'         (1, 6) (1, 9)
24387db96d56Sopenharmony_ci    NAME       'f'           (1, 10) (1, 11)
24397db96d56Sopenharmony_ci    LPAR       '('           (1, 11) (1, 12)
24407db96d56Sopenharmony_ci    RPAR       ')'           (1, 12) (1, 13)
24417db96d56Sopenharmony_ci    COLON      ':'           (1, 13) (1, 14)
24427db96d56Sopenharmony_ci    NEWLINE    ''            (1, 14) (1, 14)
24437db96d56Sopenharmony_ci    INDENT     ''            (3, -1) (3, -1)
24447db96d56Sopenharmony_ci    NAME       'def'         (3, 2) (3, 5)
24457db96d56Sopenharmony_ci    NAME       'baz'         (3, 6) (3, 9)
24467db96d56Sopenharmony_ci    LPAR       '('           (3, 9) (3, 10)
24477db96d56Sopenharmony_ci    RPAR       ')'           (3, 10) (3, 11)
24487db96d56Sopenharmony_ci    COLON      ':'           (3, 11) (3, 12)
24497db96d56Sopenharmony_ci    NAME       'pass'        (3, 13) (3, 17)
24507db96d56Sopenharmony_ci    NEWLINE    ''            (3, 17) (3, 17)
24517db96d56Sopenharmony_ci    ASYNC      'async'       (4, 2) (4, 7)
24527db96d56Sopenharmony_ci    NAME       'def'         (4, 8) (4, 11)
24537db96d56Sopenharmony_ci    NAME       'bar'         (4, 12) (4, 15)
24547db96d56Sopenharmony_ci    LPAR       '('           (4, 15) (4, 16)
24557db96d56Sopenharmony_ci    RPAR       ')'           (4, 16) (4, 17)
24567db96d56Sopenharmony_ci    COLON      ':'           (4, 17) (4, 18)
24577db96d56Sopenharmony_ci    NAME       'pass'        (4, 19) (4, 23)
24587db96d56Sopenharmony_ci    NEWLINE    ''            (4, 23) (4, 23)
24597db96d56Sopenharmony_ci    AWAIT      'await'       (6, 2) (6, 7)
24607db96d56Sopenharmony_ci    EQUAL      '='           (6, 8) (6, 9)
24617db96d56Sopenharmony_ci    NUMBER     '2'           (6, 10) (6, 11)
24627db96d56Sopenharmony_ci    DEDENT     ''            (6, -1) (6, -1)
24637db96d56Sopenharmony_ci    """)
24647db96d56Sopenharmony_ci
24657db96d56Sopenharmony_ci    def test_unicode(self):
24667db96d56Sopenharmony_ci
24677db96d56Sopenharmony_ci        self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
24687db96d56Sopenharmony_ci    NAME       'Örter'       (1, 0) (1, 6)
24697db96d56Sopenharmony_ci    EQUAL      '='           (1, 7) (1, 8)
24707db96d56Sopenharmony_ci    STRING     "u'places'"   (1, 9) (1, 18)
24717db96d56Sopenharmony_ci    NEWLINE    ''            (1, 18) (1, 18)
24727db96d56Sopenharmony_ci    NAME       'grün'        (2, 0) (2, 5)
24737db96d56Sopenharmony_ci    EQUAL      '='           (2, 6) (2, 7)
24747db96d56Sopenharmony_ci    STRING     "U'green'"    (2, 8) (2, 16)
24757db96d56Sopenharmony_ci    """)
24767db96d56Sopenharmony_ci
24777db96d56Sopenharmony_ci    def test_invalid_syntax(self):
24787db96d56Sopenharmony_ci        def get_tokens(string):
24797db96d56Sopenharmony_ci            return list(_generate_tokens_from_c_tokenizer(string))
24807db96d56Sopenharmony_ci
24817db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "(1+2]")
24827db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "(1+2}")
24837db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "{1+2]")
24847db96d56Sopenharmony_ci
24857db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1_")
24867db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1.2_")
24877db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1e2_")
24887db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1e+")
24897db96d56Sopenharmony_ci
24907db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "\xa0")
24917db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "€")
24927db96d56Sopenharmony_ci
24937db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0b12")
24947db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0b1_2")
24957db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0b2")
24967db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0b1_")
24977db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0b")
24987db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0o18")
24997db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0o1_8")
25007db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0o8")
25017db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0o1_")
25027db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0o")
25037db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0x1_")
25047db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "0x")
25057db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1_")
25067db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "012")
25077db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1.2_")
25087db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1e2_")
25097db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "1e+")
25107db96d56Sopenharmony_ci
25117db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "'sdfsdf")
25127db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''")
25137db96d56Sopenharmony_ci
25147db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
25157db96d56Sopenharmony_ci        self.assertRaises(SyntaxError, get_tokens, "]")
25167db96d56Sopenharmony_ci
25177db96d56Sopenharmony_ci    def test_max_indent(self):
25187db96d56Sopenharmony_ci        MAXINDENT = 100
25197db96d56Sopenharmony_ci
25207db96d56Sopenharmony_ci        def generate_source(indents):
25217db96d56Sopenharmony_ci            source = ''.join(('  ' * x) + 'if True:\n' for x in range(indents))
25227db96d56Sopenharmony_ci            source += '  ' * indents + 'pass\n'
25237db96d56Sopenharmony_ci            return source
25247db96d56Sopenharmony_ci
25257db96d56Sopenharmony_ci        valid = generate_source(MAXINDENT - 1)
25267db96d56Sopenharmony_ci        tokens = list(_generate_tokens_from_c_tokenizer(valid))
25277db96d56Sopenharmony_ci        self.assertEqual(tokens[-1].type, DEDENT)
25287db96d56Sopenharmony_ci        compile(valid, "<string>", "exec")
25297db96d56Sopenharmony_ci
25307db96d56Sopenharmony_ci        invalid = generate_source(MAXINDENT)
25317db96d56Sopenharmony_ci        tokens = list(_generate_tokens_from_c_tokenizer(invalid))
25327db96d56Sopenharmony_ci        self.assertEqual(tokens[-1].type, NEWLINE)
25337db96d56Sopenharmony_ci        self.assertRaises(
25347db96d56Sopenharmony_ci            IndentationError, compile, invalid, "<string>", "exec"
25357db96d56Sopenharmony_ci        )
25367db96d56Sopenharmony_ci
25377db96d56Sopenharmony_ci    def test_continuation_lines_indentation(self):
25387db96d56Sopenharmony_ci        def get_tokens(string):
25397db96d56Sopenharmony_ci            return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
25407db96d56Sopenharmony_ci
25417db96d56Sopenharmony_ci        code = dedent("""
25427db96d56Sopenharmony_ci            def fib(n):
25437db96d56Sopenharmony_ci                \\
25447db96d56Sopenharmony_ci            '''Print a Fibonacci series up to n.'''
25457db96d56Sopenharmony_ci                \\
25467db96d56Sopenharmony_ci            a, b = 0, 1
25477db96d56Sopenharmony_ci        """)
25487db96d56Sopenharmony_ci
25497db96d56Sopenharmony_ci        self.check_tokenize(code, """\
25507db96d56Sopenharmony_ci    NAME       'def'         (2, 0) (2, 3)
25517db96d56Sopenharmony_ci    NAME       'fib'         (2, 4) (2, 7)
25527db96d56Sopenharmony_ci    LPAR       '('           (2, 7) (2, 8)
25537db96d56Sopenharmony_ci    NAME       'n'           (2, 8) (2, 9)
25547db96d56Sopenharmony_ci    RPAR       ')'           (2, 9) (2, 10)
25557db96d56Sopenharmony_ci    COLON      ':'           (2, 10) (2, 11)
25567db96d56Sopenharmony_ci    NEWLINE    ''            (2, 11) (2, 11)
25577db96d56Sopenharmony_ci    INDENT     ''            (4, -1) (4, -1)
25587db96d56Sopenharmony_ci    STRING     "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
25597db96d56Sopenharmony_ci    NEWLINE    ''            (4, 39) (4, 39)
25607db96d56Sopenharmony_ci    NAME       'a'           (6, 0) (6, 1)
25617db96d56Sopenharmony_ci    COMMA      ','           (6, 1) (6, 2)
25627db96d56Sopenharmony_ci    NAME       'b'           (6, 3) (6, 4)
25637db96d56Sopenharmony_ci    EQUAL      '='           (6, 5) (6, 6)
25647db96d56Sopenharmony_ci    NUMBER     '0'           (6, 7) (6, 8)
25657db96d56Sopenharmony_ci    COMMA      ','           (6, 8) (6, 9)
25667db96d56Sopenharmony_ci    NUMBER     '1'           (6, 10) (6, 11)
25677db96d56Sopenharmony_ci    NEWLINE    ''            (6, 11) (6, 11)
25687db96d56Sopenharmony_ci    DEDENT     ''            (6, -1) (6, -1)
25697db96d56Sopenharmony_ci        """)
25707db96d56Sopenharmony_ci
25717db96d56Sopenharmony_ci        code_no_cont = dedent("""
25727db96d56Sopenharmony_ci            def fib(n):
25737db96d56Sopenharmony_ci                '''Print a Fibonacci series up to n.'''
25747db96d56Sopenharmony_ci                a, b = 0, 1
25757db96d56Sopenharmony_ci        """)
25767db96d56Sopenharmony_ci
25777db96d56Sopenharmony_ci        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
25787db96d56Sopenharmony_ci
25797db96d56Sopenharmony_ci        code = dedent("""
25807db96d56Sopenharmony_ci            pass
25817db96d56Sopenharmony_ci                \\
25827db96d56Sopenharmony_ci
25837db96d56Sopenharmony_ci            pass
25847db96d56Sopenharmony_ci        """)
25857db96d56Sopenharmony_ci
25867db96d56Sopenharmony_ci        self.check_tokenize(code, """\
25877db96d56Sopenharmony_ci    NAME       'pass'        (2, 0) (2, 4)
25887db96d56Sopenharmony_ci    NEWLINE    ''            (2, 4) (2, 4)
25897db96d56Sopenharmony_ci    NAME       'pass'        (5, 0) (5, 4)
25907db96d56Sopenharmony_ci    NEWLINE    ''            (5, 4) (5, 4)
25917db96d56Sopenharmony_ci        """)
25927db96d56Sopenharmony_ci
25937db96d56Sopenharmony_ci        code_no_cont = dedent("""
25947db96d56Sopenharmony_ci            pass
25957db96d56Sopenharmony_ci            pass
25967db96d56Sopenharmony_ci        """)
25977db96d56Sopenharmony_ci
25987db96d56Sopenharmony_ci        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
25997db96d56Sopenharmony_ci
26007db96d56Sopenharmony_ci        code = dedent("""
26017db96d56Sopenharmony_ci            if x:
26027db96d56Sopenharmony_ci                y = 1
26037db96d56Sopenharmony_ci                \\
26047db96d56Sopenharmony_ci                        \\
26057db96d56Sopenharmony_ci                    \\
26067db96d56Sopenharmony_ci                \\
26077db96d56Sopenharmony_ci                foo = 1
26087db96d56Sopenharmony_ci        """)
26097db96d56Sopenharmony_ci
26107db96d56Sopenharmony_ci        self.check_tokenize(code, """\
26117db96d56Sopenharmony_ci    NAME       'if'          (2, 0) (2, 2)
26127db96d56Sopenharmony_ci    NAME       'x'           (2, 3) (2, 4)
26137db96d56Sopenharmony_ci    COLON      ':'           (2, 4) (2, 5)
26147db96d56Sopenharmony_ci    NEWLINE    ''            (2, 5) (2, 5)
26157db96d56Sopenharmony_ci    INDENT     ''            (3, -1) (3, -1)
26167db96d56Sopenharmony_ci    NAME       'y'           (3, 4) (3, 5)
26177db96d56Sopenharmony_ci    EQUAL      '='           (3, 6) (3, 7)
26187db96d56Sopenharmony_ci    NUMBER     '1'           (3, 8) (3, 9)
26197db96d56Sopenharmony_ci    NEWLINE    ''            (3, 9) (3, 9)
26207db96d56Sopenharmony_ci    NAME       'foo'         (8, 4) (8, 7)
26217db96d56Sopenharmony_ci    EQUAL      '='           (8, 8) (8, 9)
26227db96d56Sopenharmony_ci    NUMBER     '1'           (8, 10) (8, 11)
26237db96d56Sopenharmony_ci    NEWLINE    ''            (8, 11) (8, 11)
26247db96d56Sopenharmony_ci    DEDENT     ''            (8, -1) (8, -1)
26257db96d56Sopenharmony_ci        """)
26267db96d56Sopenharmony_ci
26277db96d56Sopenharmony_ci        code_no_cont = dedent("""
26287db96d56Sopenharmony_ci            if x:
26297db96d56Sopenharmony_ci                y = 1
26307db96d56Sopenharmony_ci                foo = 1
26317db96d56Sopenharmony_ci        """)
26327db96d56Sopenharmony_ci
26337db96d56Sopenharmony_ci        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
26347db96d56Sopenharmony_ci
26357db96d56Sopenharmony_ci
26367db96d56Sopenharmony_ciclass CTokenizerBufferTests(unittest.TestCase):
26377db96d56Sopenharmony_ci    def test_newline_at_the_end_of_buffer(self):
26387db96d56Sopenharmony_ci        # See issue 99581: Make sure that if we need to add a new line at the
26397db96d56Sopenharmony_ci        # end of the buffer, we have enough space in the buffer, specially when
26407db96d56Sopenharmony_ci        # the current line is as long as the buffer space available.
26417db96d56Sopenharmony_ci        test_script = f"""\
26427db96d56Sopenharmony_ci        #coding: latin-1
26437db96d56Sopenharmony_ci        #{"a"*10000}
26447db96d56Sopenharmony_ci        #{"a"*10002}"""
26457db96d56Sopenharmony_ci        with os_helper.temp_dir() as temp_dir:
26467db96d56Sopenharmony_ci            file_name = make_script(temp_dir, 'foo', test_script)
26477db96d56Sopenharmony_ci            run_test_script(file_name)
26487db96d56Sopenharmony_ci
26497db96d56Sopenharmony_ci
26507db96d56Sopenharmony_ciif __name__ == "__main__":
26517db96d56Sopenharmony_ci    unittest.main()
2652