17db96d56Sopenharmony_cifrom test import support 27db96d56Sopenharmony_cifrom test.support import os_helper 37db96d56Sopenharmony_cifrom tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, 47db96d56Sopenharmony_ci STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, 57db96d56Sopenharmony_ci open as tokenize_open, Untokenizer, generate_tokens, 67db96d56Sopenharmony_ci NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT) 77db96d56Sopenharmony_cifrom io import BytesIO, StringIO 87db96d56Sopenharmony_ciimport unittest 97db96d56Sopenharmony_cifrom textwrap import dedent 107db96d56Sopenharmony_cifrom unittest import TestCase, mock 117db96d56Sopenharmony_cifrom test.test_grammar import (VALID_UNDERSCORE_LITERALS, 127db96d56Sopenharmony_ci INVALID_UNDERSCORE_LITERALS) 137db96d56Sopenharmony_cifrom test.support import os_helper 147db96d56Sopenharmony_cifrom test.support.script_helper import run_test_script, make_script 157db96d56Sopenharmony_ciimport os 167db96d56Sopenharmony_ciimport token 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ci# Converts a source string into a list of textual representation 197db96d56Sopenharmony_ci# of the tokens such as: 207db96d56Sopenharmony_ci# ` NAME 'if' (1, 0) (1, 2)` 217db96d56Sopenharmony_ci# to make writing tests easier. 227db96d56Sopenharmony_cidef stringify_tokens_from_source(token_generator, source_string): 237db96d56Sopenharmony_ci result = [] 247db96d56Sopenharmony_ci num_lines = len(source_string.splitlines()) 257db96d56Sopenharmony_ci missing_trailing_nl = source_string[-1] not in '\r\n' 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci for type, token, start, end, line in token_generator: 287db96d56Sopenharmony_ci if type == ENDMARKER: 297db96d56Sopenharmony_ci break 307db96d56Sopenharmony_ci # Ignore the new line on the last line if the input lacks one 317db96d56Sopenharmony_ci if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: 327db96d56Sopenharmony_ci continue 337db96d56Sopenharmony_ci type = tok_name[type] 347db96d56Sopenharmony_ci result.append(f" {type:10} {token!r:13} {start} {end}") 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci return result 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ciclass TokenizeTest(TestCase): 397db96d56Sopenharmony_ci # Tests for the tokenize module. 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci # The tests can be really simple. Given a small fragment of source 427db96d56Sopenharmony_ci # code, print out a table with tokens. The ENDMARKER, ENCODING and 437db96d56Sopenharmony_ci # final NEWLINE are omitted for brevity. 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci def check_tokenize(self, s, expected): 467db96d56Sopenharmony_ci # Format the tokens in s in a table format. 477db96d56Sopenharmony_ci # The ENDMARKER and final NEWLINE are omitted. 487db96d56Sopenharmony_ci f = BytesIO(s.encode('utf-8')) 497db96d56Sopenharmony_ci result = stringify_tokens_from_source(tokenize(f.readline), s) 507db96d56Sopenharmony_ci self.assertEqual(result, 517db96d56Sopenharmony_ci [" ENCODING 'utf-8' (0, 0) (0, 0)"] + 527db96d56Sopenharmony_ci expected.rstrip().splitlines()) 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ci def test_implicit_newline(self): 557db96d56Sopenharmony_ci # Make sure that the tokenizer puts in an implicit NEWLINE 567db96d56Sopenharmony_ci # when the input lacks a trailing new line. 577db96d56Sopenharmony_ci f = BytesIO("x".encode('utf-8')) 587db96d56Sopenharmony_ci tokens = list(tokenize(f.readline)) 597db96d56Sopenharmony_ci self.assertEqual(tokens[-2].type, NEWLINE) 607db96d56Sopenharmony_ci self.assertEqual(tokens[-1].type, ENDMARKER) 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci def test_basic(self): 637db96d56Sopenharmony_ci self.check_tokenize("1 + 1", """\ 647db96d56Sopenharmony_ci NUMBER '1' (1, 0) (1, 1) 657db96d56Sopenharmony_ci OP '+' (1, 2) (1, 3) 667db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 677db96d56Sopenharmony_ci """) 687db96d56Sopenharmony_ci self.check_tokenize("if False:\n" 697db96d56Sopenharmony_ci " # NL\n" 707db96d56Sopenharmony_ci " \n" 717db96d56Sopenharmony_ci " True = False # NEWLINE\n", """\ 727db96d56Sopenharmony_ci NAME 'if' (1, 0) (1, 2) 737db96d56Sopenharmony_ci NAME 'False' (1, 3) (1, 8) 747db96d56Sopenharmony_ci OP ':' (1, 8) (1, 9) 757db96d56Sopenharmony_ci NEWLINE '\\n' (1, 9) (1, 10) 767db96d56Sopenharmony_ci COMMENT '# NL' (2, 4) (2, 8) 777db96d56Sopenharmony_ci NL '\\n' (2, 8) (2, 9) 787db96d56Sopenharmony_ci NL '\\n' (3, 4) (3, 5) 797db96d56Sopenharmony_ci INDENT ' ' (4, 0) (4, 4) 807db96d56Sopenharmony_ci NAME 'True' (4, 4) (4, 8) 817db96d56Sopenharmony_ci OP '=' (4, 9) (4, 10) 827db96d56Sopenharmony_ci NAME 'False' (4, 11) (4, 16) 837db96d56Sopenharmony_ci COMMENT '# NEWLINE' (4, 17) (4, 26) 847db96d56Sopenharmony_ci NEWLINE '\\n' (4, 26) (4, 27) 857db96d56Sopenharmony_ci DEDENT '' (5, 0) (5, 0) 867db96d56Sopenharmony_ci """) 877db96d56Sopenharmony_ci indent_error_file = b"""\ 887db96d56Sopenharmony_cidef k(x): 897db96d56Sopenharmony_ci x += 2 907db96d56Sopenharmony_ci x += 5 917db96d56Sopenharmony_ci""" 927db96d56Sopenharmony_ci readline = BytesIO(indent_error_file).readline 937db96d56Sopenharmony_ci with self.assertRaisesRegex(IndentationError, 947db96d56Sopenharmony_ci "unindent does not match any " 957db96d56Sopenharmony_ci "outer indentation level"): 967db96d56Sopenharmony_ci for tok in tokenize(readline): 977db96d56Sopenharmony_ci pass 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ci def test_int(self): 1007db96d56Sopenharmony_ci # Ordinary integers and binary operators 1017db96d56Sopenharmony_ci self.check_tokenize("0xff <= 255", """\ 1027db96d56Sopenharmony_ci NUMBER '0xff' (1, 0) (1, 4) 1037db96d56Sopenharmony_ci OP '<=' (1, 5) (1, 7) 1047db96d56Sopenharmony_ci NUMBER '255' (1, 8) (1, 11) 1057db96d56Sopenharmony_ci """) 1067db96d56Sopenharmony_ci self.check_tokenize("0b10 <= 255", """\ 1077db96d56Sopenharmony_ci NUMBER '0b10' (1, 0) (1, 4) 1087db96d56Sopenharmony_ci OP '<=' (1, 5) (1, 7) 1097db96d56Sopenharmony_ci NUMBER '255' (1, 8) (1, 11) 1107db96d56Sopenharmony_ci """) 1117db96d56Sopenharmony_ci self.check_tokenize("0o123 <= 0O123", """\ 1127db96d56Sopenharmony_ci NUMBER '0o123' (1, 0) (1, 5) 1137db96d56Sopenharmony_ci OP '<=' (1, 6) (1, 8) 1147db96d56Sopenharmony_ci NUMBER '0O123' (1, 9) (1, 14) 1157db96d56Sopenharmony_ci """) 1167db96d56Sopenharmony_ci self.check_tokenize("1234567 > ~0x15", """\ 1177db96d56Sopenharmony_ci NUMBER '1234567' (1, 0) (1, 7) 1187db96d56Sopenharmony_ci OP '>' (1, 8) (1, 9) 1197db96d56Sopenharmony_ci OP '~' (1, 10) (1, 11) 1207db96d56Sopenharmony_ci NUMBER '0x15' (1, 11) (1, 15) 1217db96d56Sopenharmony_ci """) 1227db96d56Sopenharmony_ci self.check_tokenize("2134568 != 1231515", """\ 1237db96d56Sopenharmony_ci NUMBER '2134568' (1, 0) (1, 7) 1247db96d56Sopenharmony_ci OP '!=' (1, 8) (1, 10) 1257db96d56Sopenharmony_ci NUMBER '1231515' (1, 11) (1, 18) 1267db96d56Sopenharmony_ci """) 1277db96d56Sopenharmony_ci self.check_tokenize("(-124561-1) & 200000000", """\ 1287db96d56Sopenharmony_ci OP '(' (1, 0) (1, 1) 1297db96d56Sopenharmony_ci OP '-' (1, 1) (1, 2) 1307db96d56Sopenharmony_ci NUMBER '124561' (1, 2) (1, 8) 1317db96d56Sopenharmony_ci OP '-' (1, 8) (1, 9) 1327db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 1337db96d56Sopenharmony_ci OP ')' (1, 10) (1, 11) 1347db96d56Sopenharmony_ci OP '&' (1, 12) (1, 13) 1357db96d56Sopenharmony_ci NUMBER '200000000' (1, 14) (1, 23) 1367db96d56Sopenharmony_ci """) 1377db96d56Sopenharmony_ci self.check_tokenize("0xdeadbeef != -1", """\ 1387db96d56Sopenharmony_ci NUMBER '0xdeadbeef' (1, 0) (1, 10) 1397db96d56Sopenharmony_ci OP '!=' (1, 11) (1, 13) 1407db96d56Sopenharmony_ci OP '-' (1, 14) (1, 15) 1417db96d56Sopenharmony_ci NUMBER '1' (1, 15) (1, 16) 1427db96d56Sopenharmony_ci """) 1437db96d56Sopenharmony_ci self.check_tokenize("0xdeadc0de & 12345", """\ 1447db96d56Sopenharmony_ci NUMBER '0xdeadc0de' (1, 0) (1, 10) 1457db96d56Sopenharmony_ci OP '&' (1, 11) (1, 12) 1467db96d56Sopenharmony_ci NUMBER '12345' (1, 13) (1, 18) 1477db96d56Sopenharmony_ci """) 1487db96d56Sopenharmony_ci self.check_tokenize("0xFF & 0x15 | 1234", """\ 1497db96d56Sopenharmony_ci NUMBER '0xFF' (1, 0) (1, 4) 1507db96d56Sopenharmony_ci OP '&' (1, 5) (1, 6) 1517db96d56Sopenharmony_ci NUMBER '0x15' (1, 7) (1, 11) 1527db96d56Sopenharmony_ci OP '|' (1, 12) (1, 13) 1537db96d56Sopenharmony_ci NUMBER '1234' (1, 14) (1, 18) 1547db96d56Sopenharmony_ci """) 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci def test_long(self): 1577db96d56Sopenharmony_ci # Long integers 1587db96d56Sopenharmony_ci self.check_tokenize("x = 0", """\ 1597db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1607db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1617db96d56Sopenharmony_ci NUMBER '0' (1, 4) (1, 5) 1627db96d56Sopenharmony_ci """) 1637db96d56Sopenharmony_ci self.check_tokenize("x = 0xfffffffffff", """\ 1647db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1657db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1667db96d56Sopenharmony_ci NUMBER '0xfffffffffff' (1, 4) (1, 17) 1677db96d56Sopenharmony_ci """) 1687db96d56Sopenharmony_ci self.check_tokenize("x = 123141242151251616110", """\ 1697db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1707db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1717db96d56Sopenharmony_ci NUMBER '123141242151251616110' (1, 4) (1, 25) 1727db96d56Sopenharmony_ci """) 1737db96d56Sopenharmony_ci self.check_tokenize("x = -15921590215012591", """\ 1747db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1757db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1767db96d56Sopenharmony_ci OP '-' (1, 4) (1, 5) 1777db96d56Sopenharmony_ci NUMBER '15921590215012591' (1, 5) (1, 22) 1787db96d56Sopenharmony_ci """) 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci def test_float(self): 1817db96d56Sopenharmony_ci # Floating point numbers 1827db96d56Sopenharmony_ci self.check_tokenize("x = 3.14159", """\ 1837db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1847db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1857db96d56Sopenharmony_ci NUMBER '3.14159' (1, 4) (1, 11) 1867db96d56Sopenharmony_ci """) 1877db96d56Sopenharmony_ci self.check_tokenize("x = 314159.", """\ 1887db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1897db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1907db96d56Sopenharmony_ci NUMBER '314159.' (1, 4) (1, 11) 1917db96d56Sopenharmony_ci """) 1927db96d56Sopenharmony_ci self.check_tokenize("x = .314159", """\ 1937db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1947db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 1957db96d56Sopenharmony_ci NUMBER '.314159' (1, 4) (1, 11) 1967db96d56Sopenharmony_ci """) 1977db96d56Sopenharmony_ci self.check_tokenize("x = 3e14159", """\ 1987db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 1997db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2007db96d56Sopenharmony_ci NUMBER '3e14159' (1, 4) (1, 11) 2017db96d56Sopenharmony_ci """) 2027db96d56Sopenharmony_ci self.check_tokenize("x = 3E123", """\ 2037db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2047db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2057db96d56Sopenharmony_ci NUMBER '3E123' (1, 4) (1, 9) 2067db96d56Sopenharmony_ci """) 2077db96d56Sopenharmony_ci self.check_tokenize("x+y = 3e-1230", """\ 2087db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2097db96d56Sopenharmony_ci OP '+' (1, 1) (1, 2) 2107db96d56Sopenharmony_ci NAME 'y' (1, 2) (1, 3) 2117db96d56Sopenharmony_ci OP '=' (1, 4) (1, 5) 2127db96d56Sopenharmony_ci NUMBER '3e-1230' (1, 6) (1, 13) 2137db96d56Sopenharmony_ci """) 2147db96d56Sopenharmony_ci self.check_tokenize("x = 3.14e159", """\ 2157db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2167db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2177db96d56Sopenharmony_ci NUMBER '3.14e159' (1, 4) (1, 12) 2187db96d56Sopenharmony_ci """) 2197db96d56Sopenharmony_ci 2207db96d56Sopenharmony_ci def test_underscore_literals(self): 2217db96d56Sopenharmony_ci def number_token(s): 2227db96d56Sopenharmony_ci f = BytesIO(s.encode('utf-8')) 2237db96d56Sopenharmony_ci for toktype, token, start, end, line in tokenize(f.readline): 2247db96d56Sopenharmony_ci if toktype == NUMBER: 2257db96d56Sopenharmony_ci return token 2267db96d56Sopenharmony_ci return 'invalid token' 2277db96d56Sopenharmony_ci for lit in VALID_UNDERSCORE_LITERALS: 2287db96d56Sopenharmony_ci if '(' in lit: 2297db96d56Sopenharmony_ci # this won't work with compound complex inputs 2307db96d56Sopenharmony_ci continue 2317db96d56Sopenharmony_ci self.assertEqual(number_token(lit), lit) 2327db96d56Sopenharmony_ci for lit in INVALID_UNDERSCORE_LITERALS: 2337db96d56Sopenharmony_ci self.assertNotEqual(number_token(lit), lit) 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci def test_string(self): 2367db96d56Sopenharmony_ci # String literals 2377db96d56Sopenharmony_ci self.check_tokenize("x = ''; y = \"\"", """\ 2387db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2397db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2407db96d56Sopenharmony_ci STRING "''" (1, 4) (1, 6) 2417db96d56Sopenharmony_ci OP ';' (1, 6) (1, 7) 2427db96d56Sopenharmony_ci NAME 'y' (1, 8) (1, 9) 2437db96d56Sopenharmony_ci OP '=' (1, 10) (1, 11) 2447db96d56Sopenharmony_ci STRING '""' (1, 12) (1, 14) 2457db96d56Sopenharmony_ci """) 2467db96d56Sopenharmony_ci self.check_tokenize("x = '\"'; y = \"'\"", """\ 2477db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2487db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2497db96d56Sopenharmony_ci STRING '\\'"\\'' (1, 4) (1, 7) 2507db96d56Sopenharmony_ci OP ';' (1, 7) (1, 8) 2517db96d56Sopenharmony_ci NAME 'y' (1, 9) (1, 10) 2527db96d56Sopenharmony_ci OP '=' (1, 11) (1, 12) 2537db96d56Sopenharmony_ci STRING '"\\'"' (1, 13) (1, 16) 2547db96d56Sopenharmony_ci """) 2557db96d56Sopenharmony_ci self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\ 2567db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2577db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2587db96d56Sopenharmony_ci STRING '"doesn\\'t "' (1, 4) (1, 14) 2597db96d56Sopenharmony_ci NAME 'shrink' (1, 14) (1, 20) 2607db96d56Sopenharmony_ci STRING '", does it"' (1, 20) (1, 31) 2617db96d56Sopenharmony_ci """) 2627db96d56Sopenharmony_ci self.check_tokenize("x = 'abc' + 'ABC'", """\ 2637db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2647db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2657db96d56Sopenharmony_ci STRING "'abc'" (1, 4) (1, 9) 2667db96d56Sopenharmony_ci OP '+' (1, 10) (1, 11) 2677db96d56Sopenharmony_ci STRING "'ABC'" (1, 12) (1, 17) 2687db96d56Sopenharmony_ci """) 2697db96d56Sopenharmony_ci self.check_tokenize('y = "ABC" + "ABC"', """\ 2707db96d56Sopenharmony_ci NAME 'y' (1, 0) (1, 1) 2717db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2727db96d56Sopenharmony_ci STRING '"ABC"' (1, 4) (1, 9) 2737db96d56Sopenharmony_ci OP '+' (1, 10) (1, 11) 2747db96d56Sopenharmony_ci STRING '"ABC"' (1, 12) (1, 17) 2757db96d56Sopenharmony_ci """) 2767db96d56Sopenharmony_ci self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ 2777db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 2787db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2797db96d56Sopenharmony_ci STRING "r'abc'" (1, 4) (1, 10) 2807db96d56Sopenharmony_ci OP '+' (1, 11) (1, 12) 2817db96d56Sopenharmony_ci STRING "r'ABC'" (1, 13) (1, 19) 2827db96d56Sopenharmony_ci OP '+' (1, 20) (1, 21) 2837db96d56Sopenharmony_ci STRING "R'ABC'" (1, 22) (1, 28) 2847db96d56Sopenharmony_ci OP '+' (1, 29) (1, 30) 2857db96d56Sopenharmony_ci STRING "R'ABC'" (1, 31) (1, 37) 2867db96d56Sopenharmony_ci """) 2877db96d56Sopenharmony_ci self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ 2887db96d56Sopenharmony_ci NAME 'y' (1, 0) (1, 1) 2897db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 2907db96d56Sopenharmony_ci STRING 'r"abc"' (1, 4) (1, 10) 2917db96d56Sopenharmony_ci OP '+' (1, 11) (1, 12) 2927db96d56Sopenharmony_ci STRING 'r"ABC"' (1, 13) (1, 19) 2937db96d56Sopenharmony_ci OP '+' (1, 20) (1, 21) 2947db96d56Sopenharmony_ci STRING 'R"ABC"' (1, 22) (1, 28) 2957db96d56Sopenharmony_ci OP '+' (1, 29) (1, 30) 2967db96d56Sopenharmony_ci STRING 'R"ABC"' (1, 31) (1, 37) 2977db96d56Sopenharmony_ci """) 2987db96d56Sopenharmony_ci 2997db96d56Sopenharmony_ci self.check_tokenize("u'abc' + U'abc'", """\ 3007db96d56Sopenharmony_ci STRING "u'abc'" (1, 0) (1, 6) 3017db96d56Sopenharmony_ci OP '+' (1, 7) (1, 8) 3027db96d56Sopenharmony_ci STRING "U'abc'" (1, 9) (1, 15) 3037db96d56Sopenharmony_ci """) 3047db96d56Sopenharmony_ci self.check_tokenize('u"abc" + U"abc"', """\ 3057db96d56Sopenharmony_ci STRING 'u"abc"' (1, 0) (1, 6) 3067db96d56Sopenharmony_ci OP '+' (1, 7) (1, 8) 3077db96d56Sopenharmony_ci STRING 'U"abc"' (1, 9) (1, 15) 3087db96d56Sopenharmony_ci """) 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci self.check_tokenize("b'abc' + B'abc'", """\ 3117db96d56Sopenharmony_ci STRING "b'abc'" (1, 0) (1, 6) 3127db96d56Sopenharmony_ci OP '+' (1, 7) (1, 8) 3137db96d56Sopenharmony_ci STRING "B'abc'" (1, 9) (1, 15) 3147db96d56Sopenharmony_ci """) 3157db96d56Sopenharmony_ci self.check_tokenize('b"abc" + B"abc"', """\ 3167db96d56Sopenharmony_ci STRING 'b"abc"' (1, 0) (1, 6) 3177db96d56Sopenharmony_ci OP '+' (1, 7) (1, 8) 3187db96d56Sopenharmony_ci STRING 'B"abc"' (1, 9) (1, 15) 3197db96d56Sopenharmony_ci """) 3207db96d56Sopenharmony_ci self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ 3217db96d56Sopenharmony_ci STRING "br'abc'" (1, 0) (1, 7) 3227db96d56Sopenharmony_ci OP '+' (1, 8) (1, 9) 3237db96d56Sopenharmony_ci STRING "bR'abc'" (1, 10) (1, 17) 3247db96d56Sopenharmony_ci OP '+' (1, 18) (1, 19) 3257db96d56Sopenharmony_ci STRING "Br'abc'" (1, 20) (1, 27) 3267db96d56Sopenharmony_ci OP '+' (1, 28) (1, 29) 3277db96d56Sopenharmony_ci STRING "BR'abc'" (1, 30) (1, 37) 3287db96d56Sopenharmony_ci """) 3297db96d56Sopenharmony_ci self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ 3307db96d56Sopenharmony_ci STRING 'br"abc"' (1, 0) (1, 7) 3317db96d56Sopenharmony_ci OP '+' (1, 8) (1, 9) 3327db96d56Sopenharmony_ci STRING 'bR"abc"' (1, 10) (1, 17) 3337db96d56Sopenharmony_ci OP '+' (1, 18) (1, 19) 3347db96d56Sopenharmony_ci STRING 'Br"abc"' (1, 20) (1, 27) 3357db96d56Sopenharmony_ci OP '+' (1, 28) (1, 29) 3367db96d56Sopenharmony_ci STRING 'BR"abc"' (1, 30) (1, 37) 3377db96d56Sopenharmony_ci """) 3387db96d56Sopenharmony_ci self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ 3397db96d56Sopenharmony_ci STRING "rb'abc'" (1, 0) (1, 7) 3407db96d56Sopenharmony_ci OP '+' (1, 8) (1, 9) 3417db96d56Sopenharmony_ci STRING "rB'abc'" (1, 10) (1, 17) 3427db96d56Sopenharmony_ci OP '+' (1, 18) (1, 19) 3437db96d56Sopenharmony_ci STRING "Rb'abc'" (1, 20) (1, 27) 3447db96d56Sopenharmony_ci OP '+' (1, 28) (1, 29) 3457db96d56Sopenharmony_ci STRING "RB'abc'" (1, 30) (1, 37) 3467db96d56Sopenharmony_ci """) 3477db96d56Sopenharmony_ci self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ 3487db96d56Sopenharmony_ci STRING 'rb"abc"' (1, 0) (1, 7) 3497db96d56Sopenharmony_ci OP '+' (1, 8) (1, 9) 3507db96d56Sopenharmony_ci STRING 'rB"abc"' (1, 10) (1, 17) 3517db96d56Sopenharmony_ci OP '+' (1, 18) (1, 19) 3527db96d56Sopenharmony_ci STRING 'Rb"abc"' (1, 20) (1, 27) 3537db96d56Sopenharmony_ci OP '+' (1, 28) (1, 29) 3547db96d56Sopenharmony_ci STRING 'RB"abc"' (1, 30) (1, 37) 3557db96d56Sopenharmony_ci """) 3567db96d56Sopenharmony_ci # Check 0, 1, and 2 character string prefixes. 3577db96d56Sopenharmony_ci self.check_tokenize(r'"a\ 3587db96d56Sopenharmony_cide\ 3597db96d56Sopenharmony_cifg"', """\ 3607db96d56Sopenharmony_ci STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3) 3617db96d56Sopenharmony_ci """) 3627db96d56Sopenharmony_ci self.check_tokenize(r'u"a\ 3637db96d56Sopenharmony_cide"', """\ 3647db96d56Sopenharmony_ci STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3) 3657db96d56Sopenharmony_ci """) 3667db96d56Sopenharmony_ci self.check_tokenize(r'rb"a\ 3677db96d56Sopenharmony_cid"', """\ 3687db96d56Sopenharmony_ci STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2) 3697db96d56Sopenharmony_ci """) 3707db96d56Sopenharmony_ci self.check_tokenize(r'"""a\ 3717db96d56Sopenharmony_cib"""', """\ 3727db96d56Sopenharmony_ci STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 3737db96d56Sopenharmony_ci """) 3747db96d56Sopenharmony_ci self.check_tokenize(r'u"""a\ 3757db96d56Sopenharmony_cib"""', """\ 3767db96d56Sopenharmony_ci STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 3777db96d56Sopenharmony_ci """) 3787db96d56Sopenharmony_ci self.check_tokenize(r'rb"""a\ 3797db96d56Sopenharmony_cib\ 3807db96d56Sopenharmony_cic"""', """\ 3817db96d56Sopenharmony_ci STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) 3827db96d56Sopenharmony_ci """) 3837db96d56Sopenharmony_ci self.check_tokenize('f"abc"', """\ 3847db96d56Sopenharmony_ci STRING 'f"abc"' (1, 0) (1, 6) 3857db96d56Sopenharmony_ci """) 3867db96d56Sopenharmony_ci self.check_tokenize('fR"a{b}c"', """\ 3877db96d56Sopenharmony_ci STRING 'fR"a{b}c"' (1, 0) (1, 9) 3887db96d56Sopenharmony_ci """) 3897db96d56Sopenharmony_ci self.check_tokenize('f"""abc"""', """\ 3907db96d56Sopenharmony_ci STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10) 3917db96d56Sopenharmony_ci """) 3927db96d56Sopenharmony_ci self.check_tokenize(r'f"abc\ 3937db96d56Sopenharmony_cidef"', """\ 3947db96d56Sopenharmony_ci STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4) 3957db96d56Sopenharmony_ci """) 3967db96d56Sopenharmony_ci self.check_tokenize(r'Rf"abc\ 3977db96d56Sopenharmony_cidef"', """\ 3987db96d56Sopenharmony_ci STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4) 3997db96d56Sopenharmony_ci """) 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci def test_function(self): 4027db96d56Sopenharmony_ci self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\ 4037db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 4047db96d56Sopenharmony_ci NAME 'd22' (1, 4) (1, 7) 4057db96d56Sopenharmony_ci OP '(' (1, 7) (1, 8) 4067db96d56Sopenharmony_ci NAME 'a' (1, 8) (1, 9) 4077db96d56Sopenharmony_ci OP ',' (1, 9) (1, 10) 4087db96d56Sopenharmony_ci NAME 'b' (1, 11) (1, 12) 4097db96d56Sopenharmony_ci OP ',' (1, 12) (1, 13) 4107db96d56Sopenharmony_ci NAME 'c' (1, 14) (1, 15) 4117db96d56Sopenharmony_ci OP '=' (1, 15) (1, 16) 4127db96d56Sopenharmony_ci NUMBER '2' (1, 16) (1, 17) 4137db96d56Sopenharmony_ci OP ',' (1, 17) (1, 18) 4147db96d56Sopenharmony_ci NAME 'd' (1, 19) (1, 20) 4157db96d56Sopenharmony_ci OP '=' (1, 20) (1, 21) 4167db96d56Sopenharmony_ci NUMBER '2' (1, 21) (1, 22) 4177db96d56Sopenharmony_ci OP ',' (1, 22) (1, 23) 4187db96d56Sopenharmony_ci OP '*' (1, 24) (1, 25) 4197db96d56Sopenharmony_ci NAME 'k' (1, 25) (1, 26) 4207db96d56Sopenharmony_ci OP ')' (1, 26) (1, 27) 4217db96d56Sopenharmony_ci OP ':' (1, 27) (1, 28) 4227db96d56Sopenharmony_ci NAME 'pass' (1, 29) (1, 33) 4237db96d56Sopenharmony_ci """) 4247db96d56Sopenharmony_ci self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\ 4257db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 4267db96d56Sopenharmony_ci NAME 'd01v_' (1, 4) (1, 9) 4277db96d56Sopenharmony_ci OP '(' (1, 9) (1, 10) 4287db96d56Sopenharmony_ci NAME 'a' (1, 10) (1, 11) 4297db96d56Sopenharmony_ci OP '=' (1, 11) (1, 12) 4307db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 4317db96d56Sopenharmony_ci OP ',' (1, 13) (1, 14) 4327db96d56Sopenharmony_ci OP '*' (1, 15) (1, 16) 4337db96d56Sopenharmony_ci NAME 'k' (1, 16) (1, 17) 4347db96d56Sopenharmony_ci OP ',' (1, 17) (1, 18) 4357db96d56Sopenharmony_ci OP '**' (1, 19) (1, 21) 4367db96d56Sopenharmony_ci NAME 'w' (1, 21) (1, 22) 4377db96d56Sopenharmony_ci OP ')' (1, 22) (1, 23) 4387db96d56Sopenharmony_ci OP ':' (1, 23) (1, 24) 4397db96d56Sopenharmony_ci NAME 'pass' (1, 25) (1, 29) 4407db96d56Sopenharmony_ci """) 4417db96d56Sopenharmony_ci self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\ 4427db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 4437db96d56Sopenharmony_ci NAME 'd23' (1, 4) (1, 7) 4447db96d56Sopenharmony_ci OP '(' (1, 7) (1, 8) 4457db96d56Sopenharmony_ci NAME 'a' (1, 8) (1, 9) 4467db96d56Sopenharmony_ci OP ':' (1, 9) (1, 10) 4477db96d56Sopenharmony_ci NAME 'str' (1, 11) (1, 14) 4487db96d56Sopenharmony_ci OP ',' (1, 14) (1, 15) 4497db96d56Sopenharmony_ci NAME 'b' (1, 16) (1, 17) 4507db96d56Sopenharmony_ci OP ':' (1, 17) (1, 18) 4517db96d56Sopenharmony_ci NAME 'int' (1, 19) (1, 22) 4527db96d56Sopenharmony_ci OP '=' (1, 22) (1, 23) 4537db96d56Sopenharmony_ci NUMBER '3' (1, 23) (1, 24) 4547db96d56Sopenharmony_ci OP ')' (1, 24) (1, 25) 4557db96d56Sopenharmony_ci OP '->' (1, 26) (1, 28) 4567db96d56Sopenharmony_ci NAME 'int' (1, 29) (1, 32) 4577db96d56Sopenharmony_ci OP ':' (1, 32) (1, 33) 4587db96d56Sopenharmony_ci NAME 'pass' (1, 34) (1, 38) 4597db96d56Sopenharmony_ci """) 4607db96d56Sopenharmony_ci 4617db96d56Sopenharmony_ci def test_comparison(self): 4627db96d56Sopenharmony_ci # Comparison 4637db96d56Sopenharmony_ci self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " 4647db96d56Sopenharmony_ci "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ 4657db96d56Sopenharmony_ci NAME 'if' (1, 0) (1, 2) 4667db96d56Sopenharmony_ci NUMBER '1' (1, 3) (1, 4) 4677db96d56Sopenharmony_ci OP '<' (1, 5) (1, 6) 4687db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 4697db96d56Sopenharmony_ci OP '>' (1, 9) (1, 10) 4707db96d56Sopenharmony_ci NUMBER '1' (1, 11) (1, 12) 4717db96d56Sopenharmony_ci OP '==' (1, 13) (1, 15) 4727db96d56Sopenharmony_ci NUMBER '1' (1, 16) (1, 17) 4737db96d56Sopenharmony_ci OP '>=' (1, 18) (1, 20) 4747db96d56Sopenharmony_ci NUMBER '5' (1, 21) (1, 22) 4757db96d56Sopenharmony_ci OP '<=' (1, 23) (1, 25) 4767db96d56Sopenharmony_ci NUMBER '0x15' (1, 26) (1, 30) 4777db96d56Sopenharmony_ci OP '<=' (1, 31) (1, 33) 4787db96d56Sopenharmony_ci NUMBER '0x12' (1, 34) (1, 38) 4797db96d56Sopenharmony_ci OP '!=' (1, 39) (1, 41) 4807db96d56Sopenharmony_ci NUMBER '1' (1, 42) (1, 43) 4817db96d56Sopenharmony_ci NAME 'and' (1, 44) (1, 47) 4827db96d56Sopenharmony_ci NUMBER '5' (1, 48) (1, 49) 4837db96d56Sopenharmony_ci NAME 'in' (1, 50) (1, 52) 4847db96d56Sopenharmony_ci NUMBER '1' (1, 53) (1, 54) 4857db96d56Sopenharmony_ci NAME 'not' (1, 55) (1, 58) 4867db96d56Sopenharmony_ci NAME 'in' (1, 59) (1, 61) 4877db96d56Sopenharmony_ci NUMBER '1' (1, 62) (1, 63) 4887db96d56Sopenharmony_ci NAME 'is' (1, 64) (1, 66) 4897db96d56Sopenharmony_ci NUMBER '1' (1, 67) (1, 68) 4907db96d56Sopenharmony_ci NAME 'or' (1, 69) (1, 71) 4917db96d56Sopenharmony_ci NUMBER '5' (1, 72) (1, 73) 4927db96d56Sopenharmony_ci NAME 'is' (1, 74) (1, 76) 4937db96d56Sopenharmony_ci NAME 'not' (1, 77) (1, 80) 4947db96d56Sopenharmony_ci NUMBER '1' (1, 81) (1, 82) 4957db96d56Sopenharmony_ci OP ':' (1, 82) (1, 83) 4967db96d56Sopenharmony_ci NAME 'pass' (1, 84) (1, 88) 4977db96d56Sopenharmony_ci """) 4987db96d56Sopenharmony_ci 4997db96d56Sopenharmony_ci def test_shift(self): 5007db96d56Sopenharmony_ci # Shift 5017db96d56Sopenharmony_ci self.check_tokenize("x = 1 << 1 >> 5", """\ 5027db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 5037db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 5047db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 5057db96d56Sopenharmony_ci OP '<<' (1, 6) (1, 8) 5067db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 5077db96d56Sopenharmony_ci OP '>>' (1, 11) (1, 13) 5087db96d56Sopenharmony_ci NUMBER '5' (1, 14) (1, 15) 5097db96d56Sopenharmony_ci """) 5107db96d56Sopenharmony_ci 5117db96d56Sopenharmony_ci def test_additive(self): 5127db96d56Sopenharmony_ci # Additive 5137db96d56Sopenharmony_ci self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\ 5147db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 5157db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 5167db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 5177db96d56Sopenharmony_ci OP '-' (1, 6) (1, 7) 5187db96d56Sopenharmony_ci NAME 'y' (1, 8) (1, 9) 5197db96d56Sopenharmony_ci OP '+' (1, 10) (1, 11) 5207db96d56Sopenharmony_ci NUMBER '15' (1, 12) (1, 14) 5217db96d56Sopenharmony_ci OP '-' (1, 15) (1, 16) 5227db96d56Sopenharmony_ci NUMBER '1' (1, 17) (1, 18) 5237db96d56Sopenharmony_ci OP '+' (1, 19) (1, 20) 5247db96d56Sopenharmony_ci NUMBER '0x124' (1, 21) (1, 26) 5257db96d56Sopenharmony_ci OP '+' (1, 27) (1, 28) 5267db96d56Sopenharmony_ci NAME 'z' (1, 29) (1, 30) 5277db96d56Sopenharmony_ci OP '+' (1, 31) (1, 32) 5287db96d56Sopenharmony_ci NAME 'a' (1, 33) (1, 34) 5297db96d56Sopenharmony_ci OP '[' (1, 34) (1, 35) 5307db96d56Sopenharmony_ci NUMBER '5' (1, 35) (1, 36) 5317db96d56Sopenharmony_ci OP ']' (1, 36) (1, 37) 5327db96d56Sopenharmony_ci """) 5337db96d56Sopenharmony_ci 5347db96d56Sopenharmony_ci def test_multiplicative(self): 5357db96d56Sopenharmony_ci # Multiplicative 5367db96d56Sopenharmony_ci self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\ 5377db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 5387db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 5397db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 5407db96d56Sopenharmony_ci OP '//' (1, 5) (1, 7) 5417db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 5427db96d56Sopenharmony_ci OP '*' (1, 8) (1, 9) 5437db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 5447db96d56Sopenharmony_ci OP '/' (1, 10) (1, 11) 5457db96d56Sopenharmony_ci NUMBER '5' (1, 11) (1, 12) 5467db96d56Sopenharmony_ci OP '*' (1, 12) (1, 13) 5477db96d56Sopenharmony_ci NUMBER '12' (1, 13) (1, 15) 5487db96d56Sopenharmony_ci OP '%' (1, 15) (1, 16) 5497db96d56Sopenharmony_ci NUMBER '0x12' (1, 16) (1, 20) 5507db96d56Sopenharmony_ci OP '@' (1, 20) (1, 21) 5517db96d56Sopenharmony_ci NUMBER '42' (1, 21) (1, 23) 5527db96d56Sopenharmony_ci """) 5537db96d56Sopenharmony_ci 5547db96d56Sopenharmony_ci def test_unary(self): 5557db96d56Sopenharmony_ci # Unary 5567db96d56Sopenharmony_ci self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\ 5577db96d56Sopenharmony_ci OP '~' (1, 0) (1, 1) 5587db96d56Sopenharmony_ci NUMBER '1' (1, 1) (1, 2) 5597db96d56Sopenharmony_ci OP '^' (1, 3) (1, 4) 5607db96d56Sopenharmony_ci NUMBER '1' (1, 5) (1, 6) 5617db96d56Sopenharmony_ci OP '&' (1, 7) (1, 8) 5627db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 5637db96d56Sopenharmony_ci OP '|' (1, 11) (1, 12) 5647db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 5657db96d56Sopenharmony_ci OP '^' (1, 14) (1, 15) 5667db96d56Sopenharmony_ci OP '-' (1, 16) (1, 17) 5677db96d56Sopenharmony_ci NUMBER '1' (1, 17) (1, 18) 5687db96d56Sopenharmony_ci """) 5697db96d56Sopenharmony_ci self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\ 5707db96d56Sopenharmony_ci OP '-' (1, 0) (1, 1) 5717db96d56Sopenharmony_ci NUMBER '1' (1, 1) (1, 2) 5727db96d56Sopenharmony_ci OP '*' (1, 2) (1, 3) 5737db96d56Sopenharmony_ci NUMBER '1' (1, 3) (1, 4) 5747db96d56Sopenharmony_ci OP '/' (1, 4) (1, 5) 5757db96d56Sopenharmony_ci NUMBER '1' (1, 5) (1, 6) 5767db96d56Sopenharmony_ci OP '+' (1, 6) (1, 7) 5777db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 5787db96d56Sopenharmony_ci OP '*' (1, 8) (1, 9) 5797db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 5807db96d56Sopenharmony_ci OP '//' (1, 10) (1, 12) 5817db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 5827db96d56Sopenharmony_ci OP '-' (1, 14) (1, 15) 5837db96d56Sopenharmony_ci OP '-' (1, 16) (1, 17) 5847db96d56Sopenharmony_ci OP '-' (1, 17) (1, 18) 5857db96d56Sopenharmony_ci OP '-' (1, 18) (1, 19) 5867db96d56Sopenharmony_ci NUMBER '1' (1, 19) (1, 20) 5877db96d56Sopenharmony_ci OP '**' (1, 20) (1, 22) 5887db96d56Sopenharmony_ci NUMBER '1' (1, 22) (1, 23) 5897db96d56Sopenharmony_ci """) 5907db96d56Sopenharmony_ci 5917db96d56Sopenharmony_ci def test_selector(self): 5927db96d56Sopenharmony_ci # Selector 5937db96d56Sopenharmony_ci self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ 5947db96d56Sopenharmony_ci NAME 'import' (1, 0) (1, 6) 5957db96d56Sopenharmony_ci NAME 'sys' (1, 7) (1, 10) 5967db96d56Sopenharmony_ci OP ',' (1, 10) (1, 11) 5977db96d56Sopenharmony_ci NAME 'time' (1, 12) (1, 16) 5987db96d56Sopenharmony_ci NEWLINE '\\n' (1, 16) (1, 17) 5997db96d56Sopenharmony_ci NAME 'x' (2, 0) (2, 1) 6007db96d56Sopenharmony_ci OP '=' (2, 2) (2, 3) 6017db96d56Sopenharmony_ci NAME 'sys' (2, 4) (2, 7) 6027db96d56Sopenharmony_ci OP '.' (2, 7) (2, 8) 6037db96d56Sopenharmony_ci NAME 'modules' (2, 8) (2, 15) 6047db96d56Sopenharmony_ci OP '[' (2, 15) (2, 16) 6057db96d56Sopenharmony_ci STRING "'time'" (2, 16) (2, 22) 6067db96d56Sopenharmony_ci OP ']' (2, 22) (2, 23) 6077db96d56Sopenharmony_ci OP '.' (2, 23) (2, 24) 6087db96d56Sopenharmony_ci NAME 'time' (2, 24) (2, 28) 6097db96d56Sopenharmony_ci OP '(' (2, 28) (2, 29) 6107db96d56Sopenharmony_ci OP ')' (2, 29) (2, 30) 6117db96d56Sopenharmony_ci """) 6127db96d56Sopenharmony_ci 6137db96d56Sopenharmony_ci def test_method(self): 6147db96d56Sopenharmony_ci # Methods 6157db96d56Sopenharmony_ci self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\ 6167db96d56Sopenharmony_ci OP '@' (1, 0) (1, 1) 6177db96d56Sopenharmony_ci NAME 'staticmethod' (1, 1) (1, 13) 6187db96d56Sopenharmony_ci NEWLINE '\\n' (1, 13) (1, 14) 6197db96d56Sopenharmony_ci NAME 'def' (2, 0) (2, 3) 6207db96d56Sopenharmony_ci NAME 'foo' (2, 4) (2, 7) 6217db96d56Sopenharmony_ci OP '(' (2, 7) (2, 8) 6227db96d56Sopenharmony_ci NAME 'x' (2, 8) (2, 9) 6237db96d56Sopenharmony_ci OP ',' (2, 9) (2, 10) 6247db96d56Sopenharmony_ci NAME 'y' (2, 10) (2, 11) 6257db96d56Sopenharmony_ci OP ')' (2, 11) (2, 12) 6267db96d56Sopenharmony_ci OP ':' (2, 12) (2, 13) 6277db96d56Sopenharmony_ci NAME 'pass' (2, 14) (2, 18) 6287db96d56Sopenharmony_ci """) 6297db96d56Sopenharmony_ci 6307db96d56Sopenharmony_ci def test_tabs(self): 6317db96d56Sopenharmony_ci # Evil tabs 6327db96d56Sopenharmony_ci self.check_tokenize("def f():\n" 6337db96d56Sopenharmony_ci "\tif x\n" 6347db96d56Sopenharmony_ci " \tpass", """\ 6357db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 6367db96d56Sopenharmony_ci NAME 'f' (1, 4) (1, 5) 6377db96d56Sopenharmony_ci OP '(' (1, 5) (1, 6) 6387db96d56Sopenharmony_ci OP ')' (1, 6) (1, 7) 6397db96d56Sopenharmony_ci OP ':' (1, 7) (1, 8) 6407db96d56Sopenharmony_ci NEWLINE '\\n' (1, 8) (1, 9) 6417db96d56Sopenharmony_ci INDENT '\\t' (2, 0) (2, 1) 6427db96d56Sopenharmony_ci NAME 'if' (2, 1) (2, 3) 6437db96d56Sopenharmony_ci NAME 'x' (2, 4) (2, 5) 6447db96d56Sopenharmony_ci NEWLINE '\\n' (2, 5) (2, 6) 6457db96d56Sopenharmony_ci INDENT ' \\t' (3, 0) (3, 9) 6467db96d56Sopenharmony_ci NAME 'pass' (3, 9) (3, 13) 6477db96d56Sopenharmony_ci DEDENT '' (4, 0) (4, 0) 6487db96d56Sopenharmony_ci DEDENT '' (4, 0) (4, 0) 6497db96d56Sopenharmony_ci """) 6507db96d56Sopenharmony_ci 6517db96d56Sopenharmony_ci def test_non_ascii_identifiers(self): 6527db96d56Sopenharmony_ci # Non-ascii identifiers 6537db96d56Sopenharmony_ci self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\ 6547db96d56Sopenharmony_ci NAME 'Örter' (1, 0) (1, 5) 6557db96d56Sopenharmony_ci OP '=' (1, 6) (1, 7) 6567db96d56Sopenharmony_ci STRING "'places'" (1, 8) (1, 16) 6577db96d56Sopenharmony_ci NEWLINE '\\n' (1, 16) (1, 17) 6587db96d56Sopenharmony_ci NAME 'grün' (2, 0) (2, 4) 6597db96d56Sopenharmony_ci OP '=' (2, 5) (2, 6) 6607db96d56Sopenharmony_ci STRING "'green'" (2, 7) (2, 14) 6617db96d56Sopenharmony_ci """) 6627db96d56Sopenharmony_ci 6637db96d56Sopenharmony_ci def test_unicode(self): 6647db96d56Sopenharmony_ci # Legacy unicode literals: 6657db96d56Sopenharmony_ci self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ 6667db96d56Sopenharmony_ci NAME 'Örter' (1, 0) (1, 5) 6677db96d56Sopenharmony_ci OP '=' (1, 6) (1, 7) 6687db96d56Sopenharmony_ci STRING "u'places'" (1, 8) (1, 17) 6697db96d56Sopenharmony_ci NEWLINE '\\n' (1, 17) (1, 18) 6707db96d56Sopenharmony_ci NAME 'grün' (2, 0) (2, 4) 6717db96d56Sopenharmony_ci OP '=' (2, 5) (2, 6) 6727db96d56Sopenharmony_ci STRING "U'green'" (2, 7) (2, 15) 6737db96d56Sopenharmony_ci """) 6747db96d56Sopenharmony_ci 6757db96d56Sopenharmony_ci def test_async(self): 6767db96d56Sopenharmony_ci # Async/await extension: 6777db96d56Sopenharmony_ci self.check_tokenize("async = 1", """\ 6787db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 6797db96d56Sopenharmony_ci OP '=' (1, 6) (1, 7) 6807db96d56Sopenharmony_ci NUMBER '1' (1, 8) (1, 9) 6817db96d56Sopenharmony_ci """) 6827db96d56Sopenharmony_ci 6837db96d56Sopenharmony_ci self.check_tokenize("a = (async = 1)", """\ 6847db96d56Sopenharmony_ci NAME 'a' (1, 0) (1, 1) 6857db96d56Sopenharmony_ci OP '=' (1, 2) (1, 3) 6867db96d56Sopenharmony_ci OP '(' (1, 4) (1, 5) 6877db96d56Sopenharmony_ci NAME 'async' (1, 5) (1, 10) 6887db96d56Sopenharmony_ci OP '=' (1, 11) (1, 12) 6897db96d56Sopenharmony_ci NUMBER '1' (1, 13) (1, 14) 6907db96d56Sopenharmony_ci OP ')' (1, 14) (1, 15) 6917db96d56Sopenharmony_ci """) 6927db96d56Sopenharmony_ci 6937db96d56Sopenharmony_ci self.check_tokenize("async()", """\ 6947db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 6957db96d56Sopenharmony_ci OP '(' (1, 5) (1, 6) 6967db96d56Sopenharmony_ci OP ')' (1, 6) (1, 7) 6977db96d56Sopenharmony_ci """) 6987db96d56Sopenharmony_ci 6997db96d56Sopenharmony_ci self.check_tokenize("class async(Bar):pass", """\ 7007db96d56Sopenharmony_ci NAME 'class' (1, 0) (1, 5) 7017db96d56Sopenharmony_ci NAME 'async' (1, 6) (1, 11) 7027db96d56Sopenharmony_ci OP '(' (1, 11) (1, 12) 7037db96d56Sopenharmony_ci NAME 'Bar' (1, 12) (1, 15) 7047db96d56Sopenharmony_ci OP ')' (1, 15) (1, 16) 7057db96d56Sopenharmony_ci OP ':' (1, 16) (1, 17) 7067db96d56Sopenharmony_ci NAME 'pass' (1, 17) (1, 21) 7077db96d56Sopenharmony_ci """) 7087db96d56Sopenharmony_ci 7097db96d56Sopenharmony_ci self.check_tokenize("class async:pass", """\ 7107db96d56Sopenharmony_ci NAME 'class' (1, 0) (1, 5) 7117db96d56Sopenharmony_ci NAME 'async' (1, 6) (1, 11) 7127db96d56Sopenharmony_ci OP ':' (1, 11) (1, 12) 7137db96d56Sopenharmony_ci NAME 'pass' (1, 12) (1, 16) 7147db96d56Sopenharmony_ci """) 7157db96d56Sopenharmony_ci 7167db96d56Sopenharmony_ci self.check_tokenize("await = 1", """\ 7177db96d56Sopenharmony_ci NAME 'await' (1, 0) (1, 5) 7187db96d56Sopenharmony_ci OP '=' (1, 6) (1, 7) 7197db96d56Sopenharmony_ci NUMBER '1' (1, 8) (1, 9) 7207db96d56Sopenharmony_ci """) 7217db96d56Sopenharmony_ci 7227db96d56Sopenharmony_ci self.check_tokenize("foo.async", """\ 7237db96d56Sopenharmony_ci NAME 'foo' (1, 0) (1, 3) 7247db96d56Sopenharmony_ci OP '.' (1, 3) (1, 4) 7257db96d56Sopenharmony_ci NAME 'async' (1, 4) (1, 9) 7267db96d56Sopenharmony_ci """) 7277db96d56Sopenharmony_ci 7287db96d56Sopenharmony_ci self.check_tokenize("async for a in b: pass", """\ 7297db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7307db96d56Sopenharmony_ci NAME 'for' (1, 6) (1, 9) 7317db96d56Sopenharmony_ci NAME 'a' (1, 10) (1, 11) 7327db96d56Sopenharmony_ci NAME 'in' (1, 12) (1, 14) 7337db96d56Sopenharmony_ci NAME 'b' (1, 15) (1, 16) 7347db96d56Sopenharmony_ci OP ':' (1, 16) (1, 17) 7357db96d56Sopenharmony_ci NAME 'pass' (1, 18) (1, 22) 7367db96d56Sopenharmony_ci """) 7377db96d56Sopenharmony_ci 7387db96d56Sopenharmony_ci self.check_tokenize("async with a as b: pass", """\ 7397db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7407db96d56Sopenharmony_ci NAME 'with' (1, 6) (1, 10) 7417db96d56Sopenharmony_ci NAME 'a' (1, 11) (1, 12) 7427db96d56Sopenharmony_ci NAME 'as' (1, 13) (1, 15) 7437db96d56Sopenharmony_ci NAME 'b' (1, 16) (1, 17) 7447db96d56Sopenharmony_ci OP ':' (1, 17) (1, 18) 7457db96d56Sopenharmony_ci NAME 'pass' (1, 19) (1, 23) 7467db96d56Sopenharmony_ci """) 7477db96d56Sopenharmony_ci 7487db96d56Sopenharmony_ci self.check_tokenize("async.foo", """\ 7497db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7507db96d56Sopenharmony_ci OP '.' (1, 5) (1, 6) 7517db96d56Sopenharmony_ci NAME 'foo' (1, 6) (1, 9) 7527db96d56Sopenharmony_ci """) 7537db96d56Sopenharmony_ci 7547db96d56Sopenharmony_ci self.check_tokenize("async", """\ 7557db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7567db96d56Sopenharmony_ci """) 7577db96d56Sopenharmony_ci 7587db96d56Sopenharmony_ci self.check_tokenize("async\n#comment\nawait", """\ 7597db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7607db96d56Sopenharmony_ci NEWLINE '\\n' (1, 5) (1, 6) 7617db96d56Sopenharmony_ci COMMENT '#comment' (2, 0) (2, 8) 7627db96d56Sopenharmony_ci NL '\\n' (2, 8) (2, 9) 7637db96d56Sopenharmony_ci NAME 'await' (3, 0) (3, 5) 7647db96d56Sopenharmony_ci """) 7657db96d56Sopenharmony_ci 7667db96d56Sopenharmony_ci self.check_tokenize("async\n...\nawait", """\ 7677db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7687db96d56Sopenharmony_ci NEWLINE '\\n' (1, 5) (1, 6) 7697db96d56Sopenharmony_ci OP '...' (2, 0) (2, 3) 7707db96d56Sopenharmony_ci NEWLINE '\\n' (2, 3) (2, 4) 7717db96d56Sopenharmony_ci NAME 'await' (3, 0) (3, 5) 7727db96d56Sopenharmony_ci """) 7737db96d56Sopenharmony_ci 7747db96d56Sopenharmony_ci self.check_tokenize("async\nawait", """\ 7757db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7767db96d56Sopenharmony_ci NEWLINE '\\n' (1, 5) (1, 6) 7777db96d56Sopenharmony_ci NAME 'await' (2, 0) (2, 5) 7787db96d56Sopenharmony_ci """) 7797db96d56Sopenharmony_ci 7807db96d56Sopenharmony_ci self.check_tokenize("foo.async + 1", """\ 7817db96d56Sopenharmony_ci NAME 'foo' (1, 0) (1, 3) 7827db96d56Sopenharmony_ci OP '.' (1, 3) (1, 4) 7837db96d56Sopenharmony_ci NAME 'async' (1, 4) (1, 9) 7847db96d56Sopenharmony_ci OP '+' (1, 10) (1, 11) 7857db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 7867db96d56Sopenharmony_ci """) 7877db96d56Sopenharmony_ci 7887db96d56Sopenharmony_ci self.check_tokenize("async def foo(): pass", """\ 7897db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 7907db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 7917db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 7927db96d56Sopenharmony_ci OP '(' (1, 13) (1, 14) 7937db96d56Sopenharmony_ci OP ')' (1, 14) (1, 15) 7947db96d56Sopenharmony_ci OP ':' (1, 15) (1, 16) 7957db96d56Sopenharmony_ci NAME 'pass' (1, 17) (1, 21) 7967db96d56Sopenharmony_ci """) 7977db96d56Sopenharmony_ci 7987db96d56Sopenharmony_ci self.check_tokenize('''\ 7997db96d56Sopenharmony_ciasync def foo(): 8007db96d56Sopenharmony_ci def foo(await): 8017db96d56Sopenharmony_ci await = 1 8027db96d56Sopenharmony_ci if 1: 8037db96d56Sopenharmony_ci await 8047db96d56Sopenharmony_ciasync += 1 8057db96d56Sopenharmony_ci''', """\ 8067db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 8077db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 8087db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 8097db96d56Sopenharmony_ci OP '(' (1, 13) (1, 14) 8107db96d56Sopenharmony_ci OP ')' (1, 14) (1, 15) 8117db96d56Sopenharmony_ci OP ':' (1, 15) (1, 16) 8127db96d56Sopenharmony_ci NEWLINE '\\n' (1, 16) (1, 17) 8137db96d56Sopenharmony_ci INDENT ' ' (2, 0) (2, 2) 8147db96d56Sopenharmony_ci NAME 'def' (2, 2) (2, 5) 8157db96d56Sopenharmony_ci NAME 'foo' (2, 6) (2, 9) 8167db96d56Sopenharmony_ci OP '(' (2, 9) (2, 10) 8177db96d56Sopenharmony_ci NAME 'await' (2, 10) (2, 15) 8187db96d56Sopenharmony_ci OP ')' (2, 15) (2, 16) 8197db96d56Sopenharmony_ci OP ':' (2, 16) (2, 17) 8207db96d56Sopenharmony_ci NEWLINE '\\n' (2, 17) (2, 18) 8217db96d56Sopenharmony_ci INDENT ' ' (3, 0) (3, 4) 8227db96d56Sopenharmony_ci NAME 'await' (3, 4) (3, 9) 8237db96d56Sopenharmony_ci OP '=' (3, 10) (3, 11) 8247db96d56Sopenharmony_ci NUMBER '1' (3, 12) (3, 13) 8257db96d56Sopenharmony_ci NEWLINE '\\n' (3, 13) (3, 14) 8267db96d56Sopenharmony_ci DEDENT '' (4, 2) (4, 2) 8277db96d56Sopenharmony_ci NAME 'if' (4, 2) (4, 4) 8287db96d56Sopenharmony_ci NUMBER '1' (4, 5) (4, 6) 8297db96d56Sopenharmony_ci OP ':' (4, 6) (4, 7) 8307db96d56Sopenharmony_ci NEWLINE '\\n' (4, 7) (4, 8) 8317db96d56Sopenharmony_ci INDENT ' ' (5, 0) (5, 4) 8327db96d56Sopenharmony_ci NAME 'await' (5, 4) (5, 9) 8337db96d56Sopenharmony_ci NEWLINE '\\n' (5, 9) (5, 10) 8347db96d56Sopenharmony_ci DEDENT '' (6, 0) (6, 0) 8357db96d56Sopenharmony_ci DEDENT '' (6, 0) (6, 0) 8367db96d56Sopenharmony_ci NAME 'async' (6, 0) (6, 5) 8377db96d56Sopenharmony_ci OP '+=' (6, 6) (6, 8) 8387db96d56Sopenharmony_ci NUMBER '1' (6, 9) (6, 10) 8397db96d56Sopenharmony_ci NEWLINE '\\n' (6, 10) (6, 11) 8407db96d56Sopenharmony_ci """) 8417db96d56Sopenharmony_ci 8427db96d56Sopenharmony_ci self.check_tokenize('''\ 8437db96d56Sopenharmony_ciasync def foo(): 8447db96d56Sopenharmony_ci async for i in 1: pass''', """\ 8457db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 8467db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 8477db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 8487db96d56Sopenharmony_ci OP '(' (1, 13) (1, 14) 8497db96d56Sopenharmony_ci OP ')' (1, 14) (1, 15) 8507db96d56Sopenharmony_ci OP ':' (1, 15) (1, 16) 8517db96d56Sopenharmony_ci NEWLINE '\\n' (1, 16) (1, 17) 8527db96d56Sopenharmony_ci INDENT ' ' (2, 0) (2, 2) 8537db96d56Sopenharmony_ci NAME 'async' (2, 2) (2, 7) 8547db96d56Sopenharmony_ci NAME 'for' (2, 8) (2, 11) 8557db96d56Sopenharmony_ci NAME 'i' (2, 12) (2, 13) 8567db96d56Sopenharmony_ci NAME 'in' (2, 14) (2, 16) 8577db96d56Sopenharmony_ci NUMBER '1' (2, 17) (2, 18) 8587db96d56Sopenharmony_ci OP ':' (2, 18) (2, 19) 8597db96d56Sopenharmony_ci NAME 'pass' (2, 20) (2, 24) 8607db96d56Sopenharmony_ci DEDENT '' (3, 0) (3, 0) 8617db96d56Sopenharmony_ci """) 8627db96d56Sopenharmony_ci 8637db96d56Sopenharmony_ci self.check_tokenize('''async def foo(async): await''', """\ 8647db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 8657db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 8667db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 8677db96d56Sopenharmony_ci OP '(' (1, 13) (1, 14) 8687db96d56Sopenharmony_ci NAME 'async' (1, 14) (1, 19) 8697db96d56Sopenharmony_ci OP ')' (1, 19) (1, 20) 8707db96d56Sopenharmony_ci OP ':' (1, 20) (1, 21) 8717db96d56Sopenharmony_ci NAME 'await' (1, 22) (1, 27) 8727db96d56Sopenharmony_ci """) 8737db96d56Sopenharmony_ci 8747db96d56Sopenharmony_ci self.check_tokenize('''\ 8757db96d56Sopenharmony_cidef f(): 8767db96d56Sopenharmony_ci 8777db96d56Sopenharmony_ci def baz(): pass 8787db96d56Sopenharmony_ci async def bar(): pass 8797db96d56Sopenharmony_ci 8807db96d56Sopenharmony_ci await = 2''', """\ 8817db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 8827db96d56Sopenharmony_ci NAME 'f' (1, 4) (1, 5) 8837db96d56Sopenharmony_ci OP '(' (1, 5) (1, 6) 8847db96d56Sopenharmony_ci OP ')' (1, 6) (1, 7) 8857db96d56Sopenharmony_ci OP ':' (1, 7) (1, 8) 8867db96d56Sopenharmony_ci NEWLINE '\\n' (1, 8) (1, 9) 8877db96d56Sopenharmony_ci NL '\\n' (2, 0) (2, 1) 8887db96d56Sopenharmony_ci INDENT ' ' (3, 0) (3, 2) 8897db96d56Sopenharmony_ci NAME 'def' (3, 2) (3, 5) 8907db96d56Sopenharmony_ci NAME 'baz' (3, 6) (3, 9) 8917db96d56Sopenharmony_ci OP '(' (3, 9) (3, 10) 8927db96d56Sopenharmony_ci OP ')' (3, 10) (3, 11) 8937db96d56Sopenharmony_ci OP ':' (3, 11) (3, 12) 8947db96d56Sopenharmony_ci NAME 'pass' (3, 13) (3, 17) 8957db96d56Sopenharmony_ci NEWLINE '\\n' (3, 17) (3, 18) 8967db96d56Sopenharmony_ci NAME 'async' (4, 2) (4, 7) 8977db96d56Sopenharmony_ci NAME 'def' (4, 8) (4, 11) 8987db96d56Sopenharmony_ci NAME 'bar' (4, 12) (4, 15) 8997db96d56Sopenharmony_ci OP '(' (4, 15) (4, 16) 9007db96d56Sopenharmony_ci OP ')' (4, 16) (4, 17) 9017db96d56Sopenharmony_ci OP ':' (4, 17) (4, 18) 9027db96d56Sopenharmony_ci NAME 'pass' (4, 19) (4, 23) 9037db96d56Sopenharmony_ci NEWLINE '\\n' (4, 23) (4, 24) 9047db96d56Sopenharmony_ci NL '\\n' (5, 0) (5, 1) 9057db96d56Sopenharmony_ci NAME 'await' (6, 2) (6, 7) 9067db96d56Sopenharmony_ci OP '=' (6, 8) (6, 9) 9077db96d56Sopenharmony_ci NUMBER '2' (6, 10) (6, 11) 9087db96d56Sopenharmony_ci DEDENT '' (7, 0) (7, 0) 9097db96d56Sopenharmony_ci """) 9107db96d56Sopenharmony_ci 9117db96d56Sopenharmony_ci self.check_tokenize('''\ 9127db96d56Sopenharmony_ciasync def f(): 9137db96d56Sopenharmony_ci 9147db96d56Sopenharmony_ci def baz(): pass 9157db96d56Sopenharmony_ci async def bar(): pass 9167db96d56Sopenharmony_ci 9177db96d56Sopenharmony_ci await = 2''', """\ 9187db96d56Sopenharmony_ci NAME 'async' (1, 0) (1, 5) 9197db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 9207db96d56Sopenharmony_ci NAME 'f' (1, 10) (1, 11) 9217db96d56Sopenharmony_ci OP '(' (1, 11) (1, 12) 9227db96d56Sopenharmony_ci OP ')' (1, 12) (1, 13) 9237db96d56Sopenharmony_ci OP ':' (1, 13) (1, 14) 9247db96d56Sopenharmony_ci NEWLINE '\\n' (1, 14) (1, 15) 9257db96d56Sopenharmony_ci NL '\\n' (2, 0) (2, 1) 9267db96d56Sopenharmony_ci INDENT ' ' (3, 0) (3, 2) 9277db96d56Sopenharmony_ci NAME 'def' (3, 2) (3, 5) 9287db96d56Sopenharmony_ci NAME 'baz' (3, 6) (3, 9) 9297db96d56Sopenharmony_ci OP '(' (3, 9) (3, 10) 9307db96d56Sopenharmony_ci OP ')' (3, 10) (3, 11) 9317db96d56Sopenharmony_ci OP ':' (3, 11) (3, 12) 9327db96d56Sopenharmony_ci NAME 'pass' (3, 13) (3, 17) 9337db96d56Sopenharmony_ci NEWLINE '\\n' (3, 17) (3, 18) 9347db96d56Sopenharmony_ci NAME 'async' (4, 2) (4, 7) 9357db96d56Sopenharmony_ci NAME 'def' (4, 8) (4, 11) 9367db96d56Sopenharmony_ci NAME 'bar' (4, 12) (4, 15) 9377db96d56Sopenharmony_ci OP '(' (4, 15) (4, 16) 9387db96d56Sopenharmony_ci OP ')' (4, 16) (4, 17) 9397db96d56Sopenharmony_ci OP ':' (4, 17) (4, 18) 9407db96d56Sopenharmony_ci NAME 'pass' (4, 19) (4, 23) 9417db96d56Sopenharmony_ci NEWLINE '\\n' (4, 23) (4, 24) 9427db96d56Sopenharmony_ci NL '\\n' (5, 0) (5, 1) 9437db96d56Sopenharmony_ci NAME 'await' (6, 2) (6, 7) 9447db96d56Sopenharmony_ci OP '=' (6, 8) (6, 9) 9457db96d56Sopenharmony_ci NUMBER '2' (6, 10) (6, 11) 9467db96d56Sopenharmony_ci DEDENT '' (7, 0) (7, 0) 9477db96d56Sopenharmony_ci """) 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_ciclass GenerateTokensTest(TokenizeTest): 9507db96d56Sopenharmony_ci def check_tokenize(self, s, expected): 9517db96d56Sopenharmony_ci # Format the tokens in s in a table format. 9527db96d56Sopenharmony_ci # The ENDMARKER and final NEWLINE are omitted. 9537db96d56Sopenharmony_ci f = StringIO(s) 9547db96d56Sopenharmony_ci result = stringify_tokens_from_source(generate_tokens(f.readline), s) 9557db96d56Sopenharmony_ci self.assertEqual(result, expected.rstrip().splitlines()) 9567db96d56Sopenharmony_ci 9577db96d56Sopenharmony_ci 9587db96d56Sopenharmony_cidef decistmt(s): 9597db96d56Sopenharmony_ci result = [] 9607db96d56Sopenharmony_ci g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string 9617db96d56Sopenharmony_ci for toknum, tokval, _, _, _ in g: 9627db96d56Sopenharmony_ci if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens 9637db96d56Sopenharmony_ci result.extend([ 9647db96d56Sopenharmony_ci (NAME, 'Decimal'), 9657db96d56Sopenharmony_ci (OP, '('), 9667db96d56Sopenharmony_ci (STRING, repr(tokval)), 9677db96d56Sopenharmony_ci (OP, ')') 9687db96d56Sopenharmony_ci ]) 9697db96d56Sopenharmony_ci else: 9707db96d56Sopenharmony_ci result.append((toknum, tokval)) 9717db96d56Sopenharmony_ci return untokenize(result).decode('utf-8') 9727db96d56Sopenharmony_ci 9737db96d56Sopenharmony_ciclass TestMisc(TestCase): 9747db96d56Sopenharmony_ci 9757db96d56Sopenharmony_ci def test_decistmt(self): 9767db96d56Sopenharmony_ci # Substitute Decimals for floats in a string of statements. 9777db96d56Sopenharmony_ci # This is an example from the docs. 9787db96d56Sopenharmony_ci 9797db96d56Sopenharmony_ci from decimal import Decimal 9807db96d56Sopenharmony_ci s = '+21.3e-5*-.1234/81.7' 9817db96d56Sopenharmony_ci self.assertEqual(decistmt(s), 9827db96d56Sopenharmony_ci "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')") 9837db96d56Sopenharmony_ci 9847db96d56Sopenharmony_ci # The format of the exponent is inherited from the platform C library. 9857db96d56Sopenharmony_ci # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since 9867db96d56Sopenharmony_ci # we're only showing 11 digits, and the 12th isn't close to 5, the 9877db96d56Sopenharmony_ci # rest of the output should be platform-independent. 9887db96d56Sopenharmony_ci self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7') 9897db96d56Sopenharmony_ci 9907db96d56Sopenharmony_ci # Output from calculations with Decimal should be identical across all 9917db96d56Sopenharmony_ci # platforms. 9927db96d56Sopenharmony_ci self.assertEqual(eval(decistmt(s)), 9937db96d56Sopenharmony_ci Decimal('-3.217160342717258261933904529E-7')) 9947db96d56Sopenharmony_ci 9957db96d56Sopenharmony_ci 9967db96d56Sopenharmony_ciclass TestTokenizerAdheresToPep0263(TestCase): 9977db96d56Sopenharmony_ci """ 9987db96d56Sopenharmony_ci Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263. 9997db96d56Sopenharmony_ci """ 10007db96d56Sopenharmony_ci 10017db96d56Sopenharmony_ci def _testFile(self, filename): 10027db96d56Sopenharmony_ci path = os.path.join(os.path.dirname(__file__), filename) 10037db96d56Sopenharmony_ci TestRoundtrip.check_roundtrip(self, open(path, 'rb')) 10047db96d56Sopenharmony_ci 10057db96d56Sopenharmony_ci def test_utf8_coding_cookie_and_no_utf8_bom(self): 10067db96d56Sopenharmony_ci f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt' 10077db96d56Sopenharmony_ci self._testFile(f) 10087db96d56Sopenharmony_ci 10097db96d56Sopenharmony_ci def test_latin1_coding_cookie_and_utf8_bom(self): 10107db96d56Sopenharmony_ci """ 10117db96d56Sopenharmony_ci As per PEP 0263, if a file starts with a utf-8 BOM signature, the only 10127db96d56Sopenharmony_ci allowed encoding for the comment is 'utf-8'. The text file used in 10137db96d56Sopenharmony_ci this test starts with a BOM signature, but specifies latin1 as the 10147db96d56Sopenharmony_ci coding, so verify that a SyntaxError is raised, which matches the 10157db96d56Sopenharmony_ci behaviour of the interpreter when it encounters a similar condition. 10167db96d56Sopenharmony_ci """ 10177db96d56Sopenharmony_ci f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt' 10187db96d56Sopenharmony_ci self.assertRaises(SyntaxError, self._testFile, f) 10197db96d56Sopenharmony_ci 10207db96d56Sopenharmony_ci def test_no_coding_cookie_and_utf8_bom(self): 10217db96d56Sopenharmony_ci f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt' 10227db96d56Sopenharmony_ci self._testFile(f) 10237db96d56Sopenharmony_ci 10247db96d56Sopenharmony_ci def test_utf8_coding_cookie_and_utf8_bom(self): 10257db96d56Sopenharmony_ci f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt' 10267db96d56Sopenharmony_ci self._testFile(f) 10277db96d56Sopenharmony_ci 10287db96d56Sopenharmony_ci def test_bad_coding_cookie(self): 10297db96d56Sopenharmony_ci self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py') 10307db96d56Sopenharmony_ci self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py') 10317db96d56Sopenharmony_ci 10327db96d56Sopenharmony_ci 10337db96d56Sopenharmony_ciclass Test_Tokenize(TestCase): 10347db96d56Sopenharmony_ci 10357db96d56Sopenharmony_ci def test__tokenize_decodes_with_specified_encoding(self): 10367db96d56Sopenharmony_ci literal = '"ЉЊЈЁЂ"' 10377db96d56Sopenharmony_ci line = literal.encode('utf-8') 10387db96d56Sopenharmony_ci first = False 10397db96d56Sopenharmony_ci def readline(): 10407db96d56Sopenharmony_ci nonlocal first 10417db96d56Sopenharmony_ci if not first: 10427db96d56Sopenharmony_ci first = True 10437db96d56Sopenharmony_ci return line 10447db96d56Sopenharmony_ci else: 10457db96d56Sopenharmony_ci return b'' 10467db96d56Sopenharmony_ci 10477db96d56Sopenharmony_ci # skip the initial encoding token and the end tokens 10487db96d56Sopenharmony_ci tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2] 10497db96d56Sopenharmony_ci expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] 10507db96d56Sopenharmony_ci self.assertEqual(tokens, expected_tokens, 10517db96d56Sopenharmony_ci "bytes not decoded with encoding") 10527db96d56Sopenharmony_ci 10537db96d56Sopenharmony_ci def test__tokenize_does_not_decode_with_encoding_none(self): 10547db96d56Sopenharmony_ci literal = '"ЉЊЈЁЂ"' 10557db96d56Sopenharmony_ci first = False 10567db96d56Sopenharmony_ci def readline(): 10577db96d56Sopenharmony_ci nonlocal first 10587db96d56Sopenharmony_ci if not first: 10597db96d56Sopenharmony_ci first = True 10607db96d56Sopenharmony_ci return literal 10617db96d56Sopenharmony_ci else: 10627db96d56Sopenharmony_ci return b'' 10637db96d56Sopenharmony_ci 10647db96d56Sopenharmony_ci # skip the end tokens 10657db96d56Sopenharmony_ci tokens = list(_tokenize(readline, encoding=None))[:-2] 10667db96d56Sopenharmony_ci expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] 10677db96d56Sopenharmony_ci self.assertEqual(tokens, expected_tokens, 10687db96d56Sopenharmony_ci "string not tokenized when encoding is None") 10697db96d56Sopenharmony_ci 10707db96d56Sopenharmony_ci 10717db96d56Sopenharmony_ciclass TestDetectEncoding(TestCase): 10727db96d56Sopenharmony_ci 10737db96d56Sopenharmony_ci def get_readline(self, lines): 10747db96d56Sopenharmony_ci index = 0 10757db96d56Sopenharmony_ci def readline(): 10767db96d56Sopenharmony_ci nonlocal index 10777db96d56Sopenharmony_ci if index == len(lines): 10787db96d56Sopenharmony_ci raise StopIteration 10797db96d56Sopenharmony_ci line = lines[index] 10807db96d56Sopenharmony_ci index += 1 10817db96d56Sopenharmony_ci return line 10827db96d56Sopenharmony_ci return readline 10837db96d56Sopenharmony_ci 10847db96d56Sopenharmony_ci def test_no_bom_no_encoding_cookie(self): 10857db96d56Sopenharmony_ci lines = ( 10867db96d56Sopenharmony_ci b'# something\n', 10877db96d56Sopenharmony_ci b'print(something)\n', 10887db96d56Sopenharmony_ci b'do_something(else)\n' 10897db96d56Sopenharmony_ci ) 10907db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 10917db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8') 10927db96d56Sopenharmony_ci self.assertEqual(consumed_lines, list(lines[:2])) 10937db96d56Sopenharmony_ci 10947db96d56Sopenharmony_ci def test_bom_no_cookie(self): 10957db96d56Sopenharmony_ci lines = ( 10967db96d56Sopenharmony_ci b'\xef\xbb\xbf# something\n', 10977db96d56Sopenharmony_ci b'print(something)\n', 10987db96d56Sopenharmony_ci b'do_something(else)\n' 10997db96d56Sopenharmony_ci ) 11007db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11017db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8-sig') 11027db96d56Sopenharmony_ci self.assertEqual(consumed_lines, 11037db96d56Sopenharmony_ci [b'# something\n', b'print(something)\n']) 11047db96d56Sopenharmony_ci 11057db96d56Sopenharmony_ci def test_cookie_first_line_no_bom(self): 11067db96d56Sopenharmony_ci lines = ( 11077db96d56Sopenharmony_ci b'# -*- coding: latin-1 -*-\n', 11087db96d56Sopenharmony_ci b'print(something)\n', 11097db96d56Sopenharmony_ci b'do_something(else)\n' 11107db96d56Sopenharmony_ci ) 11117db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11127db96d56Sopenharmony_ci self.assertEqual(encoding, 'iso-8859-1') 11137db96d56Sopenharmony_ci self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n']) 11147db96d56Sopenharmony_ci 11157db96d56Sopenharmony_ci def test_matched_bom_and_cookie_first_line(self): 11167db96d56Sopenharmony_ci lines = ( 11177db96d56Sopenharmony_ci b'\xef\xbb\xbf# coding=utf-8\n', 11187db96d56Sopenharmony_ci b'print(something)\n', 11197db96d56Sopenharmony_ci b'do_something(else)\n' 11207db96d56Sopenharmony_ci ) 11217db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11227db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8-sig') 11237db96d56Sopenharmony_ci self.assertEqual(consumed_lines, [b'# coding=utf-8\n']) 11247db96d56Sopenharmony_ci 11257db96d56Sopenharmony_ci def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self): 11267db96d56Sopenharmony_ci lines = ( 11277db96d56Sopenharmony_ci b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n', 11287db96d56Sopenharmony_ci b'print(something)\n', 11297db96d56Sopenharmony_ci b'do_something(else)\n' 11307db96d56Sopenharmony_ci ) 11317db96d56Sopenharmony_ci readline = self.get_readline(lines) 11327db96d56Sopenharmony_ci self.assertRaises(SyntaxError, detect_encoding, readline) 11337db96d56Sopenharmony_ci 11347db96d56Sopenharmony_ci def test_cookie_second_line_no_bom(self): 11357db96d56Sopenharmony_ci lines = ( 11367db96d56Sopenharmony_ci b'#! something\n', 11377db96d56Sopenharmony_ci b'# vim: set fileencoding=ascii :\n', 11387db96d56Sopenharmony_ci b'print(something)\n', 11397db96d56Sopenharmony_ci b'do_something(else)\n' 11407db96d56Sopenharmony_ci ) 11417db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11427db96d56Sopenharmony_ci self.assertEqual(encoding, 'ascii') 11437db96d56Sopenharmony_ci expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n'] 11447db96d56Sopenharmony_ci self.assertEqual(consumed_lines, expected) 11457db96d56Sopenharmony_ci 11467db96d56Sopenharmony_ci def test_matched_bom_and_cookie_second_line(self): 11477db96d56Sopenharmony_ci lines = ( 11487db96d56Sopenharmony_ci b'\xef\xbb\xbf#! something\n', 11497db96d56Sopenharmony_ci b'f# coding=utf-8\n', 11507db96d56Sopenharmony_ci b'print(something)\n', 11517db96d56Sopenharmony_ci b'do_something(else)\n' 11527db96d56Sopenharmony_ci ) 11537db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11547db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8-sig') 11557db96d56Sopenharmony_ci self.assertEqual(consumed_lines, 11567db96d56Sopenharmony_ci [b'#! something\n', b'f# coding=utf-8\n']) 11577db96d56Sopenharmony_ci 11587db96d56Sopenharmony_ci def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self): 11597db96d56Sopenharmony_ci lines = ( 11607db96d56Sopenharmony_ci b'\xef\xbb\xbf#! something\n', 11617db96d56Sopenharmony_ci b'# vim: set fileencoding=ascii :\n', 11627db96d56Sopenharmony_ci b'print(something)\n', 11637db96d56Sopenharmony_ci b'do_something(else)\n' 11647db96d56Sopenharmony_ci ) 11657db96d56Sopenharmony_ci readline = self.get_readline(lines) 11667db96d56Sopenharmony_ci self.assertRaises(SyntaxError, detect_encoding, readline) 11677db96d56Sopenharmony_ci 11687db96d56Sopenharmony_ci def test_cookie_second_line_noncommented_first_line(self): 11697db96d56Sopenharmony_ci lines = ( 11707db96d56Sopenharmony_ci b"print('\xc2\xa3')\n", 11717db96d56Sopenharmony_ci b'# vim: set fileencoding=iso8859-15 :\n', 11727db96d56Sopenharmony_ci b"print('\xe2\x82\xac')\n" 11737db96d56Sopenharmony_ci ) 11747db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11757db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8') 11767db96d56Sopenharmony_ci expected = [b"print('\xc2\xa3')\n"] 11777db96d56Sopenharmony_ci self.assertEqual(consumed_lines, expected) 11787db96d56Sopenharmony_ci 11797db96d56Sopenharmony_ci def test_cookie_second_line_commented_first_line(self): 11807db96d56Sopenharmony_ci lines = ( 11817db96d56Sopenharmony_ci b"#print('\xc2\xa3')\n", 11827db96d56Sopenharmony_ci b'# vim: set fileencoding=iso8859-15 :\n', 11837db96d56Sopenharmony_ci b"print('\xe2\x82\xac')\n" 11847db96d56Sopenharmony_ci ) 11857db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11867db96d56Sopenharmony_ci self.assertEqual(encoding, 'iso8859-15') 11877db96d56Sopenharmony_ci expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] 11887db96d56Sopenharmony_ci self.assertEqual(consumed_lines, expected) 11897db96d56Sopenharmony_ci 11907db96d56Sopenharmony_ci def test_cookie_second_line_empty_first_line(self): 11917db96d56Sopenharmony_ci lines = ( 11927db96d56Sopenharmony_ci b'\n', 11937db96d56Sopenharmony_ci b'# vim: set fileencoding=iso8859-15 :\n', 11947db96d56Sopenharmony_ci b"print('\xe2\x82\xac')\n" 11957db96d56Sopenharmony_ci ) 11967db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 11977db96d56Sopenharmony_ci self.assertEqual(encoding, 'iso8859-15') 11987db96d56Sopenharmony_ci expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] 11997db96d56Sopenharmony_ci self.assertEqual(consumed_lines, expected) 12007db96d56Sopenharmony_ci 12017db96d56Sopenharmony_ci def test_latin1_normalization(self): 12027db96d56Sopenharmony_ci # See get_normal_name() in tokenizer.c. 12037db96d56Sopenharmony_ci encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix", 12047db96d56Sopenharmony_ci "iso-8859-1-unix", "iso-latin-1-mac") 12057db96d56Sopenharmony_ci for encoding in encodings: 12067db96d56Sopenharmony_ci for rep in ("-", "_"): 12077db96d56Sopenharmony_ci enc = encoding.replace("-", rep) 12087db96d56Sopenharmony_ci lines = (b"#!/usr/bin/python\n", 12097db96d56Sopenharmony_ci b"# coding: " + enc.encode("ascii") + b"\n", 12107db96d56Sopenharmony_ci b"print(things)\n", 12117db96d56Sopenharmony_ci b"do_something += 4\n") 12127db96d56Sopenharmony_ci rl = self.get_readline(lines) 12137db96d56Sopenharmony_ci found, consumed_lines = detect_encoding(rl) 12147db96d56Sopenharmony_ci self.assertEqual(found, "iso-8859-1") 12157db96d56Sopenharmony_ci 12167db96d56Sopenharmony_ci def test_syntaxerror_latin1(self): 12177db96d56Sopenharmony_ci # Issue 14629: need to raise SyntaxError if the first 12187db96d56Sopenharmony_ci # line(s) have non-UTF-8 characters 12197db96d56Sopenharmony_ci lines = ( 12207db96d56Sopenharmony_ci b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S 12217db96d56Sopenharmony_ci ) 12227db96d56Sopenharmony_ci readline = self.get_readline(lines) 12237db96d56Sopenharmony_ci self.assertRaises(SyntaxError, detect_encoding, readline) 12247db96d56Sopenharmony_ci 12257db96d56Sopenharmony_ci 12267db96d56Sopenharmony_ci def test_utf8_normalization(self): 12277db96d56Sopenharmony_ci # See get_normal_name() in tokenizer.c. 12287db96d56Sopenharmony_ci encodings = ("utf-8", "utf-8-mac", "utf-8-unix") 12297db96d56Sopenharmony_ci for encoding in encodings: 12307db96d56Sopenharmony_ci for rep in ("-", "_"): 12317db96d56Sopenharmony_ci enc = encoding.replace("-", rep) 12327db96d56Sopenharmony_ci lines = (b"#!/usr/bin/python\n", 12337db96d56Sopenharmony_ci b"# coding: " + enc.encode("ascii") + b"\n", 12347db96d56Sopenharmony_ci b"1 + 3\n") 12357db96d56Sopenharmony_ci rl = self.get_readline(lines) 12367db96d56Sopenharmony_ci found, consumed_lines = detect_encoding(rl) 12377db96d56Sopenharmony_ci self.assertEqual(found, "utf-8") 12387db96d56Sopenharmony_ci 12397db96d56Sopenharmony_ci def test_short_files(self): 12407db96d56Sopenharmony_ci readline = self.get_readline((b'print(something)\n',)) 12417db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(readline) 12427db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8') 12437db96d56Sopenharmony_ci self.assertEqual(consumed_lines, [b'print(something)\n']) 12447db96d56Sopenharmony_ci 12457db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(self.get_readline(())) 12467db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8') 12477db96d56Sopenharmony_ci self.assertEqual(consumed_lines, []) 12487db96d56Sopenharmony_ci 12497db96d56Sopenharmony_ci readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',)) 12507db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(readline) 12517db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8-sig') 12527db96d56Sopenharmony_ci self.assertEqual(consumed_lines, [b'print(something)\n']) 12537db96d56Sopenharmony_ci 12547db96d56Sopenharmony_ci readline = self.get_readline((b'\xef\xbb\xbf',)) 12557db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(readline) 12567db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8-sig') 12577db96d56Sopenharmony_ci self.assertEqual(consumed_lines, []) 12587db96d56Sopenharmony_ci 12597db96d56Sopenharmony_ci readline = self.get_readline((b'# coding: bad\n',)) 12607db96d56Sopenharmony_ci self.assertRaises(SyntaxError, detect_encoding, readline) 12617db96d56Sopenharmony_ci 12627db96d56Sopenharmony_ci def test_false_encoding(self): 12637db96d56Sopenharmony_ci # Issue 18873: "Encoding" detected in non-comment lines 12647db96d56Sopenharmony_ci readline = self.get_readline((b'print("#coding=fake")',)) 12657db96d56Sopenharmony_ci encoding, consumed_lines = detect_encoding(readline) 12667db96d56Sopenharmony_ci self.assertEqual(encoding, 'utf-8') 12677db96d56Sopenharmony_ci self.assertEqual(consumed_lines, [b'print("#coding=fake")']) 12687db96d56Sopenharmony_ci 12697db96d56Sopenharmony_ci def test_open(self): 12707db96d56Sopenharmony_ci filename = os_helper.TESTFN + '.py' 12717db96d56Sopenharmony_ci self.addCleanup(os_helper.unlink, filename) 12727db96d56Sopenharmony_ci 12737db96d56Sopenharmony_ci # test coding cookie 12747db96d56Sopenharmony_ci for encoding in ('iso-8859-15', 'utf-8'): 12757db96d56Sopenharmony_ci with open(filename, 'w', encoding=encoding) as fp: 12767db96d56Sopenharmony_ci print("# coding: %s" % encoding, file=fp) 12777db96d56Sopenharmony_ci print("print('euro:\u20ac')", file=fp) 12787db96d56Sopenharmony_ci with tokenize_open(filename) as fp: 12797db96d56Sopenharmony_ci self.assertEqual(fp.encoding, encoding) 12807db96d56Sopenharmony_ci self.assertEqual(fp.mode, 'r') 12817db96d56Sopenharmony_ci 12827db96d56Sopenharmony_ci # test BOM (no coding cookie) 12837db96d56Sopenharmony_ci with open(filename, 'w', encoding='utf-8-sig') as fp: 12847db96d56Sopenharmony_ci print("print('euro:\u20ac')", file=fp) 12857db96d56Sopenharmony_ci with tokenize_open(filename) as fp: 12867db96d56Sopenharmony_ci self.assertEqual(fp.encoding, 'utf-8-sig') 12877db96d56Sopenharmony_ci self.assertEqual(fp.mode, 'r') 12887db96d56Sopenharmony_ci 12897db96d56Sopenharmony_ci def test_filename_in_exception(self): 12907db96d56Sopenharmony_ci # When possible, include the file name in the exception. 12917db96d56Sopenharmony_ci path = 'some_file_path' 12927db96d56Sopenharmony_ci lines = ( 12937db96d56Sopenharmony_ci b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S 12947db96d56Sopenharmony_ci ) 12957db96d56Sopenharmony_ci class Bunk: 12967db96d56Sopenharmony_ci def __init__(self, lines, path): 12977db96d56Sopenharmony_ci self.name = path 12987db96d56Sopenharmony_ci self._lines = lines 12997db96d56Sopenharmony_ci self._index = 0 13007db96d56Sopenharmony_ci 13017db96d56Sopenharmony_ci def readline(self): 13027db96d56Sopenharmony_ci if self._index == len(lines): 13037db96d56Sopenharmony_ci raise StopIteration 13047db96d56Sopenharmony_ci line = lines[self._index] 13057db96d56Sopenharmony_ci self._index += 1 13067db96d56Sopenharmony_ci return line 13077db96d56Sopenharmony_ci 13087db96d56Sopenharmony_ci with self.assertRaises(SyntaxError): 13097db96d56Sopenharmony_ci ins = Bunk(lines, path) 13107db96d56Sopenharmony_ci # Make sure lacking a name isn't an issue. 13117db96d56Sopenharmony_ci del ins.name 13127db96d56Sopenharmony_ci detect_encoding(ins.readline) 13137db96d56Sopenharmony_ci with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)): 13147db96d56Sopenharmony_ci ins = Bunk(lines, path) 13157db96d56Sopenharmony_ci detect_encoding(ins.readline) 13167db96d56Sopenharmony_ci 13177db96d56Sopenharmony_ci def test_open_error(self): 13187db96d56Sopenharmony_ci # Issue #23840: open() must close the binary file on error 13197db96d56Sopenharmony_ci m = BytesIO(b'#coding:xxx') 13207db96d56Sopenharmony_ci with mock.patch('tokenize._builtin_open', return_value=m): 13217db96d56Sopenharmony_ci self.assertRaises(SyntaxError, tokenize_open, 'foobar') 13227db96d56Sopenharmony_ci self.assertTrue(m.closed) 13237db96d56Sopenharmony_ci 13247db96d56Sopenharmony_ci 13257db96d56Sopenharmony_ciclass TestTokenize(TestCase): 13267db96d56Sopenharmony_ci 13277db96d56Sopenharmony_ci def test_tokenize(self): 13287db96d56Sopenharmony_ci import tokenize as tokenize_module 13297db96d56Sopenharmony_ci encoding = object() 13307db96d56Sopenharmony_ci encoding_used = None 13317db96d56Sopenharmony_ci def mock_detect_encoding(readline): 13327db96d56Sopenharmony_ci return encoding, [b'first', b'second'] 13337db96d56Sopenharmony_ci 13347db96d56Sopenharmony_ci def mock__tokenize(readline, encoding): 13357db96d56Sopenharmony_ci nonlocal encoding_used 13367db96d56Sopenharmony_ci encoding_used = encoding 13377db96d56Sopenharmony_ci out = [] 13387db96d56Sopenharmony_ci while True: 13397db96d56Sopenharmony_ci next_line = readline() 13407db96d56Sopenharmony_ci if next_line: 13417db96d56Sopenharmony_ci out.append(next_line) 13427db96d56Sopenharmony_ci continue 13437db96d56Sopenharmony_ci return out 13447db96d56Sopenharmony_ci 13457db96d56Sopenharmony_ci counter = 0 13467db96d56Sopenharmony_ci def mock_readline(): 13477db96d56Sopenharmony_ci nonlocal counter 13487db96d56Sopenharmony_ci counter += 1 13497db96d56Sopenharmony_ci if counter == 5: 13507db96d56Sopenharmony_ci return b'' 13517db96d56Sopenharmony_ci return str(counter).encode() 13527db96d56Sopenharmony_ci 13537db96d56Sopenharmony_ci orig_detect_encoding = tokenize_module.detect_encoding 13547db96d56Sopenharmony_ci orig__tokenize = tokenize_module._tokenize 13557db96d56Sopenharmony_ci tokenize_module.detect_encoding = mock_detect_encoding 13567db96d56Sopenharmony_ci tokenize_module._tokenize = mock__tokenize 13577db96d56Sopenharmony_ci try: 13587db96d56Sopenharmony_ci results = tokenize(mock_readline) 13597db96d56Sopenharmony_ci self.assertEqual(list(results), 13607db96d56Sopenharmony_ci [b'first', b'second', b'1', b'2', b'3', b'4']) 13617db96d56Sopenharmony_ci finally: 13627db96d56Sopenharmony_ci tokenize_module.detect_encoding = orig_detect_encoding 13637db96d56Sopenharmony_ci tokenize_module._tokenize = orig__tokenize 13647db96d56Sopenharmony_ci 13657db96d56Sopenharmony_ci self.assertEqual(encoding_used, encoding) 13667db96d56Sopenharmony_ci 13677db96d56Sopenharmony_ci def test_oneline_defs(self): 13687db96d56Sopenharmony_ci buf = [] 13697db96d56Sopenharmony_ci for i in range(500): 13707db96d56Sopenharmony_ci buf.append('def i{i}(): return {i}'.format(i=i)) 13717db96d56Sopenharmony_ci buf.append('OK') 13727db96d56Sopenharmony_ci buf = '\n'.join(buf) 13737db96d56Sopenharmony_ci 13747db96d56Sopenharmony_ci # Test that 500 consequent, one-line defs is OK 13757db96d56Sopenharmony_ci toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline)) 13767db96d56Sopenharmony_ci self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER 13777db96d56Sopenharmony_ci # [-2] is always NEWLINE 13787db96d56Sopenharmony_ci 13797db96d56Sopenharmony_ci def assertExactTypeEqual(self, opstr, *optypes): 13807db96d56Sopenharmony_ci tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline)) 13817db96d56Sopenharmony_ci num_optypes = len(optypes) 13827db96d56Sopenharmony_ci self.assertEqual(len(tokens), 3 + num_optypes) 13837db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[0].exact_type], 13847db96d56Sopenharmony_ci tok_name[ENCODING]) 13857db96d56Sopenharmony_ci for i in range(num_optypes): 13867db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[i + 1].exact_type], 13877db96d56Sopenharmony_ci tok_name[optypes[i]]) 13887db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type], 13897db96d56Sopenharmony_ci tok_name[token.NEWLINE]) 13907db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type], 13917db96d56Sopenharmony_ci tok_name[token.ENDMARKER]) 13927db96d56Sopenharmony_ci 13937db96d56Sopenharmony_ci def test_exact_type(self): 13947db96d56Sopenharmony_ci self.assertExactTypeEqual('()', token.LPAR, token.RPAR) 13957db96d56Sopenharmony_ci self.assertExactTypeEqual('[]', token.LSQB, token.RSQB) 13967db96d56Sopenharmony_ci self.assertExactTypeEqual(':', token.COLON) 13977db96d56Sopenharmony_ci self.assertExactTypeEqual(',', token.COMMA) 13987db96d56Sopenharmony_ci self.assertExactTypeEqual(';', token.SEMI) 13997db96d56Sopenharmony_ci self.assertExactTypeEqual('+', token.PLUS) 14007db96d56Sopenharmony_ci self.assertExactTypeEqual('-', token.MINUS) 14017db96d56Sopenharmony_ci self.assertExactTypeEqual('*', token.STAR) 14027db96d56Sopenharmony_ci self.assertExactTypeEqual('/', token.SLASH) 14037db96d56Sopenharmony_ci self.assertExactTypeEqual('|', token.VBAR) 14047db96d56Sopenharmony_ci self.assertExactTypeEqual('&', token.AMPER) 14057db96d56Sopenharmony_ci self.assertExactTypeEqual('<', token.LESS) 14067db96d56Sopenharmony_ci self.assertExactTypeEqual('>', token.GREATER) 14077db96d56Sopenharmony_ci self.assertExactTypeEqual('=', token.EQUAL) 14087db96d56Sopenharmony_ci self.assertExactTypeEqual('.', token.DOT) 14097db96d56Sopenharmony_ci self.assertExactTypeEqual('%', token.PERCENT) 14107db96d56Sopenharmony_ci self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE) 14117db96d56Sopenharmony_ci self.assertExactTypeEqual('==', token.EQEQUAL) 14127db96d56Sopenharmony_ci self.assertExactTypeEqual('!=', token.NOTEQUAL) 14137db96d56Sopenharmony_ci self.assertExactTypeEqual('<=', token.LESSEQUAL) 14147db96d56Sopenharmony_ci self.assertExactTypeEqual('>=', token.GREATEREQUAL) 14157db96d56Sopenharmony_ci self.assertExactTypeEqual('~', token.TILDE) 14167db96d56Sopenharmony_ci self.assertExactTypeEqual('^', token.CIRCUMFLEX) 14177db96d56Sopenharmony_ci self.assertExactTypeEqual('<<', token.LEFTSHIFT) 14187db96d56Sopenharmony_ci self.assertExactTypeEqual('>>', token.RIGHTSHIFT) 14197db96d56Sopenharmony_ci self.assertExactTypeEqual('**', token.DOUBLESTAR) 14207db96d56Sopenharmony_ci self.assertExactTypeEqual('+=', token.PLUSEQUAL) 14217db96d56Sopenharmony_ci self.assertExactTypeEqual('-=', token.MINEQUAL) 14227db96d56Sopenharmony_ci self.assertExactTypeEqual('*=', token.STAREQUAL) 14237db96d56Sopenharmony_ci self.assertExactTypeEqual('/=', token.SLASHEQUAL) 14247db96d56Sopenharmony_ci self.assertExactTypeEqual('%=', token.PERCENTEQUAL) 14257db96d56Sopenharmony_ci self.assertExactTypeEqual('&=', token.AMPEREQUAL) 14267db96d56Sopenharmony_ci self.assertExactTypeEqual('|=', token.VBAREQUAL) 14277db96d56Sopenharmony_ci self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL) 14287db96d56Sopenharmony_ci self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL) 14297db96d56Sopenharmony_ci self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL) 14307db96d56Sopenharmony_ci self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL) 14317db96d56Sopenharmony_ci self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL) 14327db96d56Sopenharmony_ci self.assertExactTypeEqual('//', token.DOUBLESLASH) 14337db96d56Sopenharmony_ci self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL) 14347db96d56Sopenharmony_ci self.assertExactTypeEqual(':=', token.COLONEQUAL) 14357db96d56Sopenharmony_ci self.assertExactTypeEqual('...', token.ELLIPSIS) 14367db96d56Sopenharmony_ci self.assertExactTypeEqual('->', token.RARROW) 14377db96d56Sopenharmony_ci self.assertExactTypeEqual('@', token.AT) 14387db96d56Sopenharmony_ci self.assertExactTypeEqual('@=', token.ATEQUAL) 14397db96d56Sopenharmony_ci 14407db96d56Sopenharmony_ci self.assertExactTypeEqual('a**2+b**2==c**2', 14417db96d56Sopenharmony_ci NAME, token.DOUBLESTAR, NUMBER, 14427db96d56Sopenharmony_ci token.PLUS, 14437db96d56Sopenharmony_ci NAME, token.DOUBLESTAR, NUMBER, 14447db96d56Sopenharmony_ci token.EQEQUAL, 14457db96d56Sopenharmony_ci NAME, token.DOUBLESTAR, NUMBER) 14467db96d56Sopenharmony_ci self.assertExactTypeEqual('{1, 2, 3}', 14477db96d56Sopenharmony_ci token.LBRACE, 14487db96d56Sopenharmony_ci token.NUMBER, token.COMMA, 14497db96d56Sopenharmony_ci token.NUMBER, token.COMMA, 14507db96d56Sopenharmony_ci token.NUMBER, 14517db96d56Sopenharmony_ci token.RBRACE) 14527db96d56Sopenharmony_ci self.assertExactTypeEqual('^(x & 0x1)', 14537db96d56Sopenharmony_ci token.CIRCUMFLEX, 14547db96d56Sopenharmony_ci token.LPAR, 14557db96d56Sopenharmony_ci token.NAME, token.AMPER, token.NUMBER, 14567db96d56Sopenharmony_ci token.RPAR) 14577db96d56Sopenharmony_ci 14587db96d56Sopenharmony_ci def test_pathological_trailing_whitespace(self): 14597db96d56Sopenharmony_ci # See http://bugs.python.org/issue16152 14607db96d56Sopenharmony_ci self.assertExactTypeEqual('@ ', token.AT) 14617db96d56Sopenharmony_ci 14627db96d56Sopenharmony_ci def test_comment_at_the_end_of_the_source_without_newline(self): 14637db96d56Sopenharmony_ci # See http://bugs.python.org/issue44667 14647db96d56Sopenharmony_ci source = 'b = 1\n\n#test' 14657db96d56Sopenharmony_ci expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT] 14667db96d56Sopenharmony_ci 14677db96d56Sopenharmony_ci tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) 14687db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING]) 14697db96d56Sopenharmony_ci for i in range(6): 14707db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) 14717db96d56Sopenharmony_ci self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) 14727db96d56Sopenharmony_ci 14737db96d56Sopenharmony_ciclass UntokenizeTest(TestCase): 14747db96d56Sopenharmony_ci 14757db96d56Sopenharmony_ci def test_bad_input_order(self): 14767db96d56Sopenharmony_ci # raise if previous row 14777db96d56Sopenharmony_ci u = Untokenizer() 14787db96d56Sopenharmony_ci u.prev_row = 2 14797db96d56Sopenharmony_ci u.prev_col = 2 14807db96d56Sopenharmony_ci with self.assertRaises(ValueError) as cm: 14817db96d56Sopenharmony_ci u.add_whitespace((1,3)) 14827db96d56Sopenharmony_ci self.assertEqual(cm.exception.args[0], 14837db96d56Sopenharmony_ci 'start (1,3) precedes previous end (2,2)') 14847db96d56Sopenharmony_ci # raise if previous column in row 14857db96d56Sopenharmony_ci self.assertRaises(ValueError, u.add_whitespace, (2,1)) 14867db96d56Sopenharmony_ci 14877db96d56Sopenharmony_ci def test_backslash_continuation(self): 14887db96d56Sopenharmony_ci # The problem is that <whitespace>\<newline> leaves no token 14897db96d56Sopenharmony_ci u = Untokenizer() 14907db96d56Sopenharmony_ci u.prev_row = 1 14917db96d56Sopenharmony_ci u.prev_col = 1 14927db96d56Sopenharmony_ci u.tokens = [] 14937db96d56Sopenharmony_ci u.add_whitespace((2, 0)) 14947db96d56Sopenharmony_ci self.assertEqual(u.tokens, ['\\\n']) 14957db96d56Sopenharmony_ci u.prev_row = 2 14967db96d56Sopenharmony_ci u.add_whitespace((4, 4)) 14977db96d56Sopenharmony_ci self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' ']) 14987db96d56Sopenharmony_ci TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n') 14997db96d56Sopenharmony_ci 15007db96d56Sopenharmony_ci def test_iter_compat(self): 15017db96d56Sopenharmony_ci u = Untokenizer() 15027db96d56Sopenharmony_ci token = (NAME, 'Hello') 15037db96d56Sopenharmony_ci tokens = [(ENCODING, 'utf-8'), token] 15047db96d56Sopenharmony_ci u.compat(token, iter([])) 15057db96d56Sopenharmony_ci self.assertEqual(u.tokens, ["Hello "]) 15067db96d56Sopenharmony_ci u = Untokenizer() 15077db96d56Sopenharmony_ci self.assertEqual(u.untokenize(iter([token])), 'Hello ') 15087db96d56Sopenharmony_ci u = Untokenizer() 15097db96d56Sopenharmony_ci self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') 15107db96d56Sopenharmony_ci self.assertEqual(u.encoding, 'utf-8') 15117db96d56Sopenharmony_ci self.assertEqual(untokenize(iter(tokens)), b'Hello ') 15127db96d56Sopenharmony_ci 15137db96d56Sopenharmony_ci 15147db96d56Sopenharmony_ciclass TestRoundtrip(TestCase): 15157db96d56Sopenharmony_ci 15167db96d56Sopenharmony_ci def check_roundtrip(self, f): 15177db96d56Sopenharmony_ci """ 15187db96d56Sopenharmony_ci Test roundtrip for `untokenize`. `f` is an open file or a string. 15197db96d56Sopenharmony_ci The source code in f is tokenized to both 5- and 2-tuples. 15207db96d56Sopenharmony_ci Both sequences are converted back to source code via 15217db96d56Sopenharmony_ci tokenize.untokenize(), and the latter tokenized again to 2-tuples. 15227db96d56Sopenharmony_ci The test fails if the 3 pair tokenizations do not match. 15237db96d56Sopenharmony_ci 15247db96d56Sopenharmony_ci When untokenize bugs are fixed, untokenize with 5-tuples should 15257db96d56Sopenharmony_ci reproduce code that does not contain a backslash continuation 15267db96d56Sopenharmony_ci following spaces. A proper test should test this. 15277db96d56Sopenharmony_ci """ 15287db96d56Sopenharmony_ci # Get source code and original tokenizations 15297db96d56Sopenharmony_ci if isinstance(f, str): 15307db96d56Sopenharmony_ci code = f.encode('utf-8') 15317db96d56Sopenharmony_ci else: 15327db96d56Sopenharmony_ci code = f.read() 15337db96d56Sopenharmony_ci f.close() 15347db96d56Sopenharmony_ci readline = iter(code.splitlines(keepends=True)).__next__ 15357db96d56Sopenharmony_ci tokens5 = list(tokenize(readline)) 15367db96d56Sopenharmony_ci tokens2 = [tok[:2] for tok in tokens5] 15377db96d56Sopenharmony_ci # Reproduce tokens2 from pairs 15387db96d56Sopenharmony_ci bytes_from2 = untokenize(tokens2) 15397db96d56Sopenharmony_ci readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ 15407db96d56Sopenharmony_ci tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] 15417db96d56Sopenharmony_ci self.assertEqual(tokens2_from2, tokens2) 15427db96d56Sopenharmony_ci # Reproduce tokens2 from 5-tuples 15437db96d56Sopenharmony_ci bytes_from5 = untokenize(tokens5) 15447db96d56Sopenharmony_ci readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ 15457db96d56Sopenharmony_ci tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] 15467db96d56Sopenharmony_ci self.assertEqual(tokens2_from5, tokens2) 15477db96d56Sopenharmony_ci 15487db96d56Sopenharmony_ci def test_roundtrip(self): 15497db96d56Sopenharmony_ci # There are some standard formatting practices that are easy to get right. 15507db96d56Sopenharmony_ci 15517db96d56Sopenharmony_ci self.check_roundtrip("if x == 1:\n" 15527db96d56Sopenharmony_ci " print(x)\n") 15537db96d56Sopenharmony_ci self.check_roundtrip("# This is a comment\n" 15547db96d56Sopenharmony_ci "# This also\n") 15557db96d56Sopenharmony_ci 15567db96d56Sopenharmony_ci # Some people use different formatting conventions, which makes 15577db96d56Sopenharmony_ci # untokenize a little trickier. Note that this test involves trailing 15587db96d56Sopenharmony_ci # whitespace after the colon. Note that we use hex escapes to make the 15597db96d56Sopenharmony_ci # two trailing blanks apparent in the expected output. 15607db96d56Sopenharmony_ci 15617db96d56Sopenharmony_ci self.check_roundtrip("if x == 1 : \n" 15627db96d56Sopenharmony_ci " print(x)\n") 15637db96d56Sopenharmony_ci fn = support.findfile("tokenize_tests.txt") 15647db96d56Sopenharmony_ci with open(fn, 'rb') as f: 15657db96d56Sopenharmony_ci self.check_roundtrip(f) 15667db96d56Sopenharmony_ci self.check_roundtrip("if x == 1:\n" 15677db96d56Sopenharmony_ci " # A comment by itself.\n" 15687db96d56Sopenharmony_ci " print(x) # Comment here, too.\n" 15697db96d56Sopenharmony_ci " # Another comment.\n" 15707db96d56Sopenharmony_ci "after_if = True\n") 15717db96d56Sopenharmony_ci self.check_roundtrip("if (x # The comments need to go in the right place\n" 15727db96d56Sopenharmony_ci " == 1):\n" 15737db96d56Sopenharmony_ci " print('x==1')\n") 15747db96d56Sopenharmony_ci self.check_roundtrip("class Test: # A comment here\n" 15757db96d56Sopenharmony_ci " # A comment with weird indent\n" 15767db96d56Sopenharmony_ci " after_com = 5\n" 15777db96d56Sopenharmony_ci " def x(m): return m*5 # a one liner\n" 15787db96d56Sopenharmony_ci " def y(m): # A whitespace after the colon\n" 15797db96d56Sopenharmony_ci " return y*4 # 3-space indent\n") 15807db96d56Sopenharmony_ci 15817db96d56Sopenharmony_ci # Some error-handling code 15827db96d56Sopenharmony_ci self.check_roundtrip("try: import somemodule\n" 15837db96d56Sopenharmony_ci "except ImportError: # comment\n" 15847db96d56Sopenharmony_ci " print('Can not import' # comment2\n)" 15857db96d56Sopenharmony_ci "else: print('Loaded')\n") 15867db96d56Sopenharmony_ci 15877db96d56Sopenharmony_ci def test_continuation(self): 15887db96d56Sopenharmony_ci # Balancing continuation 15897db96d56Sopenharmony_ci self.check_roundtrip("a = (3,4, \n" 15907db96d56Sopenharmony_ci "5,6)\n" 15917db96d56Sopenharmony_ci "y = [3, 4,\n" 15927db96d56Sopenharmony_ci "5]\n" 15937db96d56Sopenharmony_ci "z = {'a': 5,\n" 15947db96d56Sopenharmony_ci "'b':15, 'c':True}\n" 15957db96d56Sopenharmony_ci "x = len(y) + 5 - a[\n" 15967db96d56Sopenharmony_ci "3] - a[2]\n" 15977db96d56Sopenharmony_ci "+ len(z) - z[\n" 15987db96d56Sopenharmony_ci "'b']\n") 15997db96d56Sopenharmony_ci 16007db96d56Sopenharmony_ci def test_backslash_continuation(self): 16017db96d56Sopenharmony_ci # Backslash means line continuation, except for comments 16027db96d56Sopenharmony_ci self.check_roundtrip("x=1+\\\n" 16037db96d56Sopenharmony_ci "1\n" 16047db96d56Sopenharmony_ci "# This is a comment\\\n" 16057db96d56Sopenharmony_ci "# This also\n") 16067db96d56Sopenharmony_ci self.check_roundtrip("# Comment \\\n" 16077db96d56Sopenharmony_ci "x = 0") 16087db96d56Sopenharmony_ci 16097db96d56Sopenharmony_ci def test_string_concatenation(self): 16107db96d56Sopenharmony_ci # Two string literals on the same line 16117db96d56Sopenharmony_ci self.check_roundtrip("'' ''") 16127db96d56Sopenharmony_ci 16137db96d56Sopenharmony_ci def test_random_files(self): 16147db96d56Sopenharmony_ci # Test roundtrip on random python modules. 16157db96d56Sopenharmony_ci # pass the '-ucpu' option to process the full directory. 16167db96d56Sopenharmony_ci 16177db96d56Sopenharmony_ci import glob, random 16187db96d56Sopenharmony_ci fn = support.findfile("tokenize_tests.txt") 16197db96d56Sopenharmony_ci tempdir = os.path.dirname(fn) or os.curdir 16207db96d56Sopenharmony_ci testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) 16217db96d56Sopenharmony_ci 16227db96d56Sopenharmony_ci # Tokenize is broken on test_pep3131.py because regular expressions are 16237db96d56Sopenharmony_ci # broken on the obscure unicode identifiers in it. *sigh* 16247db96d56Sopenharmony_ci # With roundtrip extended to test the 5-tuple mode of untokenize, 16257db96d56Sopenharmony_ci # 7 more testfiles fail. Remove them also until the failure is diagnosed. 16267db96d56Sopenharmony_ci 16277db96d56Sopenharmony_ci testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) 16287db96d56Sopenharmony_ci for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): 16297db96d56Sopenharmony_ci testfiles.remove(os.path.join(tempdir, "test_%s.py") % f) 16307db96d56Sopenharmony_ci 16317db96d56Sopenharmony_ci if not support.is_resource_enabled("cpu"): 16327db96d56Sopenharmony_ci testfiles = random.sample(testfiles, 10) 16337db96d56Sopenharmony_ci 16347db96d56Sopenharmony_ci for testfile in testfiles: 16357db96d56Sopenharmony_ci if support.verbose >= 2: 16367db96d56Sopenharmony_ci print('tokenize', testfile) 16377db96d56Sopenharmony_ci with open(testfile, 'rb') as f: 16387db96d56Sopenharmony_ci with self.subTest(file=testfile): 16397db96d56Sopenharmony_ci self.check_roundtrip(f) 16407db96d56Sopenharmony_ci 16417db96d56Sopenharmony_ci 16427db96d56Sopenharmony_ci def roundtrip(self, code): 16437db96d56Sopenharmony_ci if isinstance(code, str): 16447db96d56Sopenharmony_ci code = code.encode('utf-8') 16457db96d56Sopenharmony_ci return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') 16467db96d56Sopenharmony_ci 16477db96d56Sopenharmony_ci def test_indentation_semantics_retained(self): 16487db96d56Sopenharmony_ci """ 16497db96d56Sopenharmony_ci Ensure that although whitespace might be mutated in a roundtrip, 16507db96d56Sopenharmony_ci the semantic meaning of the indentation remains consistent. 16517db96d56Sopenharmony_ci """ 16527db96d56Sopenharmony_ci code = "if False:\n\tx=3\n\tx=3\n" 16537db96d56Sopenharmony_ci codelines = self.roundtrip(code).split('\n') 16547db96d56Sopenharmony_ci self.assertEqual(codelines[1], codelines[2]) 16557db96d56Sopenharmony_ci self.check_roundtrip(code) 16567db96d56Sopenharmony_ci 16577db96d56Sopenharmony_ci 16587db96d56Sopenharmony_ciclass CTokenizeTest(TestCase): 16597db96d56Sopenharmony_ci def check_tokenize(self, s, expected): 16607db96d56Sopenharmony_ci # Format the tokens in s in a table format. 16617db96d56Sopenharmony_ci # The ENDMARKER and final NEWLINE are omitted. 16627db96d56Sopenharmony_ci with self.subTest(source=s): 16637db96d56Sopenharmony_ci result = stringify_tokens_from_source( 16647db96d56Sopenharmony_ci _generate_tokens_from_c_tokenizer(s), s 16657db96d56Sopenharmony_ci ) 16667db96d56Sopenharmony_ci self.assertEqual(result, expected.rstrip().splitlines()) 16677db96d56Sopenharmony_ci 16687db96d56Sopenharmony_ci def test_int(self): 16697db96d56Sopenharmony_ci 16707db96d56Sopenharmony_ci self.check_tokenize('0xff <= 255', """\ 16717db96d56Sopenharmony_ci NUMBER '0xff' (1, 0) (1, 4) 16727db96d56Sopenharmony_ci LESSEQUAL '<=' (1, 5) (1, 7) 16737db96d56Sopenharmony_ci NUMBER '255' (1, 8) (1, 11) 16747db96d56Sopenharmony_ci """) 16757db96d56Sopenharmony_ci 16767db96d56Sopenharmony_ci self.check_tokenize('0b10 <= 255', """\ 16777db96d56Sopenharmony_ci NUMBER '0b10' (1, 0) (1, 4) 16787db96d56Sopenharmony_ci LESSEQUAL '<=' (1, 5) (1, 7) 16797db96d56Sopenharmony_ci NUMBER '255' (1, 8) (1, 11) 16807db96d56Sopenharmony_ci """) 16817db96d56Sopenharmony_ci 16827db96d56Sopenharmony_ci self.check_tokenize('0o123 <= 0O123', """\ 16837db96d56Sopenharmony_ci NUMBER '0o123' (1, 0) (1, 5) 16847db96d56Sopenharmony_ci LESSEQUAL '<=' (1, 6) (1, 8) 16857db96d56Sopenharmony_ci NUMBER '0O123' (1, 9) (1, 14) 16867db96d56Sopenharmony_ci """) 16877db96d56Sopenharmony_ci 16887db96d56Sopenharmony_ci self.check_tokenize('1234567 > ~0x15', """\ 16897db96d56Sopenharmony_ci NUMBER '1234567' (1, 0) (1, 7) 16907db96d56Sopenharmony_ci GREATER '>' (1, 8) (1, 9) 16917db96d56Sopenharmony_ci TILDE '~' (1, 10) (1, 11) 16927db96d56Sopenharmony_ci NUMBER '0x15' (1, 11) (1, 15) 16937db96d56Sopenharmony_ci """) 16947db96d56Sopenharmony_ci 16957db96d56Sopenharmony_ci self.check_tokenize('2134568 != 1231515', """\ 16967db96d56Sopenharmony_ci NUMBER '2134568' (1, 0) (1, 7) 16977db96d56Sopenharmony_ci NOTEQUAL '!=' (1, 8) (1, 10) 16987db96d56Sopenharmony_ci NUMBER '1231515' (1, 11) (1, 18) 16997db96d56Sopenharmony_ci """) 17007db96d56Sopenharmony_ci 17017db96d56Sopenharmony_ci self.check_tokenize('(-124561-1) & 200000000', """\ 17027db96d56Sopenharmony_ci LPAR '(' (1, 0) (1, 1) 17037db96d56Sopenharmony_ci MINUS '-' (1, 1) (1, 2) 17047db96d56Sopenharmony_ci NUMBER '124561' (1, 2) (1, 8) 17057db96d56Sopenharmony_ci MINUS '-' (1, 8) (1, 9) 17067db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 17077db96d56Sopenharmony_ci RPAR ')' (1, 10) (1, 11) 17087db96d56Sopenharmony_ci AMPER '&' (1, 12) (1, 13) 17097db96d56Sopenharmony_ci NUMBER '200000000' (1, 14) (1, 23) 17107db96d56Sopenharmony_ci """) 17117db96d56Sopenharmony_ci 17127db96d56Sopenharmony_ci self.check_tokenize('0xdeadbeef != -1', """\ 17137db96d56Sopenharmony_ci NUMBER '0xdeadbeef' (1, 0) (1, 10) 17147db96d56Sopenharmony_ci NOTEQUAL '!=' (1, 11) (1, 13) 17157db96d56Sopenharmony_ci MINUS '-' (1, 14) (1, 15) 17167db96d56Sopenharmony_ci NUMBER '1' (1, 15) (1, 16) 17177db96d56Sopenharmony_ci """) 17187db96d56Sopenharmony_ci 17197db96d56Sopenharmony_ci self.check_tokenize('0xdeadc0de & 12345', """\ 17207db96d56Sopenharmony_ci NUMBER '0xdeadc0de' (1, 0) (1, 10) 17217db96d56Sopenharmony_ci AMPER '&' (1, 11) (1, 12) 17227db96d56Sopenharmony_ci NUMBER '12345' (1, 13) (1, 18) 17237db96d56Sopenharmony_ci """) 17247db96d56Sopenharmony_ci 17257db96d56Sopenharmony_ci self.check_tokenize('0xFF & 0x15 | 1234', """\ 17267db96d56Sopenharmony_ci NUMBER '0xFF' (1, 0) (1, 4) 17277db96d56Sopenharmony_ci AMPER '&' (1, 5) (1, 6) 17287db96d56Sopenharmony_ci NUMBER '0x15' (1, 7) (1, 11) 17297db96d56Sopenharmony_ci VBAR '|' (1, 12) (1, 13) 17307db96d56Sopenharmony_ci NUMBER '1234' (1, 14) (1, 18) 17317db96d56Sopenharmony_ci """) 17327db96d56Sopenharmony_ci 17337db96d56Sopenharmony_ci def test_float(self): 17347db96d56Sopenharmony_ci 17357db96d56Sopenharmony_ci self.check_tokenize('x = 3.14159', """\ 17367db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17377db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17387db96d56Sopenharmony_ci NUMBER '3.14159' (1, 4) (1, 11) 17397db96d56Sopenharmony_ci """) 17407db96d56Sopenharmony_ci 17417db96d56Sopenharmony_ci self.check_tokenize('x = 314159.', """\ 17427db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17437db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17447db96d56Sopenharmony_ci NUMBER '314159.' (1, 4) (1, 11) 17457db96d56Sopenharmony_ci """) 17467db96d56Sopenharmony_ci 17477db96d56Sopenharmony_ci self.check_tokenize('x = .314159', """\ 17487db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17497db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17507db96d56Sopenharmony_ci NUMBER '.314159' (1, 4) (1, 11) 17517db96d56Sopenharmony_ci """) 17527db96d56Sopenharmony_ci 17537db96d56Sopenharmony_ci self.check_tokenize('x = 3e14159', """\ 17547db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17557db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17567db96d56Sopenharmony_ci NUMBER '3e14159' (1, 4) (1, 11) 17577db96d56Sopenharmony_ci """) 17587db96d56Sopenharmony_ci 17597db96d56Sopenharmony_ci self.check_tokenize('x = 3E123', """\ 17607db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17617db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17627db96d56Sopenharmony_ci NUMBER '3E123' (1, 4) (1, 9) 17637db96d56Sopenharmony_ci """) 17647db96d56Sopenharmony_ci 17657db96d56Sopenharmony_ci self.check_tokenize('x+y = 3e-1230', """\ 17667db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17677db96d56Sopenharmony_ci PLUS '+' (1, 1) (1, 2) 17687db96d56Sopenharmony_ci NAME 'y' (1, 2) (1, 3) 17697db96d56Sopenharmony_ci EQUAL '=' (1, 4) (1, 5) 17707db96d56Sopenharmony_ci NUMBER '3e-1230' (1, 6) (1, 13) 17717db96d56Sopenharmony_ci """) 17727db96d56Sopenharmony_ci 17737db96d56Sopenharmony_ci self.check_tokenize('x = 3.14e159', """\ 17747db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17757db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17767db96d56Sopenharmony_ci NUMBER '3.14e159' (1, 4) (1, 12) 17777db96d56Sopenharmony_ci """) 17787db96d56Sopenharmony_ci 17797db96d56Sopenharmony_ci def test_string(self): 17807db96d56Sopenharmony_ci 17817db96d56Sopenharmony_ci self.check_tokenize('x = \'\'; y = ""', """\ 17827db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17837db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17847db96d56Sopenharmony_ci STRING "''" (1, 4) (1, 6) 17857db96d56Sopenharmony_ci SEMI ';' (1, 6) (1, 7) 17867db96d56Sopenharmony_ci NAME 'y' (1, 8) (1, 9) 17877db96d56Sopenharmony_ci EQUAL '=' (1, 10) (1, 11) 17887db96d56Sopenharmony_ci STRING '""' (1, 12) (1, 14) 17897db96d56Sopenharmony_ci """) 17907db96d56Sopenharmony_ci 17917db96d56Sopenharmony_ci self.check_tokenize('x = \'"\'; y = "\'"', """\ 17927db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 17937db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 17947db96d56Sopenharmony_ci STRING '\\'"\\'' (1, 4) (1, 7) 17957db96d56Sopenharmony_ci SEMI ';' (1, 7) (1, 8) 17967db96d56Sopenharmony_ci NAME 'y' (1, 9) (1, 10) 17977db96d56Sopenharmony_ci EQUAL '=' (1, 11) (1, 12) 17987db96d56Sopenharmony_ci STRING '"\\'"' (1, 13) (1, 16) 17997db96d56Sopenharmony_ci """) 18007db96d56Sopenharmony_ci 18017db96d56Sopenharmony_ci self.check_tokenize('x = "doesn\'t "shrink", does it"', """\ 18027db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 18037db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 18047db96d56Sopenharmony_ci STRING '"doesn\\'t "' (1, 4) (1, 14) 18057db96d56Sopenharmony_ci NAME 'shrink' (1, 14) (1, 20) 18067db96d56Sopenharmony_ci STRING '", does it"' (1, 20) (1, 31) 18077db96d56Sopenharmony_ci """) 18087db96d56Sopenharmony_ci 18097db96d56Sopenharmony_ci self.check_tokenize("x = 'abc' + 'ABC'", """\ 18107db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 18117db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 18127db96d56Sopenharmony_ci STRING "'abc'" (1, 4) (1, 9) 18137db96d56Sopenharmony_ci PLUS '+' (1, 10) (1, 11) 18147db96d56Sopenharmony_ci STRING "'ABC'" (1, 12) (1, 17) 18157db96d56Sopenharmony_ci """) 18167db96d56Sopenharmony_ci 18177db96d56Sopenharmony_ci self.check_tokenize('y = "ABC" + "ABC"', """\ 18187db96d56Sopenharmony_ci NAME 'y' (1, 0) (1, 1) 18197db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 18207db96d56Sopenharmony_ci STRING '"ABC"' (1, 4) (1, 9) 18217db96d56Sopenharmony_ci PLUS '+' (1, 10) (1, 11) 18227db96d56Sopenharmony_ci STRING '"ABC"' (1, 12) (1, 17) 18237db96d56Sopenharmony_ci """) 18247db96d56Sopenharmony_ci 18257db96d56Sopenharmony_ci self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ 18267db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 18277db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 18287db96d56Sopenharmony_ci STRING "r'abc'" (1, 4) (1, 10) 18297db96d56Sopenharmony_ci PLUS '+' (1, 11) (1, 12) 18307db96d56Sopenharmony_ci STRING "r'ABC'" (1, 13) (1, 19) 18317db96d56Sopenharmony_ci PLUS '+' (1, 20) (1, 21) 18327db96d56Sopenharmony_ci STRING "R'ABC'" (1, 22) (1, 28) 18337db96d56Sopenharmony_ci PLUS '+' (1, 29) (1, 30) 18347db96d56Sopenharmony_ci STRING "R'ABC'" (1, 31) (1, 37) 18357db96d56Sopenharmony_ci """) 18367db96d56Sopenharmony_ci 18377db96d56Sopenharmony_ci self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ 18387db96d56Sopenharmony_ci NAME 'y' (1, 0) (1, 1) 18397db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 18407db96d56Sopenharmony_ci STRING 'r"abc"' (1, 4) (1, 10) 18417db96d56Sopenharmony_ci PLUS '+' (1, 11) (1, 12) 18427db96d56Sopenharmony_ci STRING 'r"ABC"' (1, 13) (1, 19) 18437db96d56Sopenharmony_ci PLUS '+' (1, 20) (1, 21) 18447db96d56Sopenharmony_ci STRING 'R"ABC"' (1, 22) (1, 28) 18457db96d56Sopenharmony_ci PLUS '+' (1, 29) (1, 30) 18467db96d56Sopenharmony_ci STRING 'R"ABC"' (1, 31) (1, 37) 18477db96d56Sopenharmony_ci """) 18487db96d56Sopenharmony_ci 18497db96d56Sopenharmony_ci self.check_tokenize("u'abc' + U'abc'", """\ 18507db96d56Sopenharmony_ci STRING "u'abc'" (1, 0) (1, 6) 18517db96d56Sopenharmony_ci PLUS '+' (1, 7) (1, 8) 18527db96d56Sopenharmony_ci STRING "U'abc'" (1, 9) (1, 15) 18537db96d56Sopenharmony_ci """) 18547db96d56Sopenharmony_ci 18557db96d56Sopenharmony_ci self.check_tokenize('u"abc" + U"abc"', """\ 18567db96d56Sopenharmony_ci STRING 'u"abc"' (1, 0) (1, 6) 18577db96d56Sopenharmony_ci PLUS '+' (1, 7) (1, 8) 18587db96d56Sopenharmony_ci STRING 'U"abc"' (1, 9) (1, 15) 18597db96d56Sopenharmony_ci """) 18607db96d56Sopenharmony_ci 18617db96d56Sopenharmony_ci self.check_tokenize("b'abc' + B'abc'", """\ 18627db96d56Sopenharmony_ci STRING "b'abc'" (1, 0) (1, 6) 18637db96d56Sopenharmony_ci PLUS '+' (1, 7) (1, 8) 18647db96d56Sopenharmony_ci STRING "B'abc'" (1, 9) (1, 15) 18657db96d56Sopenharmony_ci """) 18667db96d56Sopenharmony_ci 18677db96d56Sopenharmony_ci self.check_tokenize('b"abc" + B"abc"', """\ 18687db96d56Sopenharmony_ci STRING 'b"abc"' (1, 0) (1, 6) 18697db96d56Sopenharmony_ci PLUS '+' (1, 7) (1, 8) 18707db96d56Sopenharmony_ci STRING 'B"abc"' (1, 9) (1, 15) 18717db96d56Sopenharmony_ci """) 18727db96d56Sopenharmony_ci 18737db96d56Sopenharmony_ci self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ 18747db96d56Sopenharmony_ci STRING "br'abc'" (1, 0) (1, 7) 18757db96d56Sopenharmony_ci PLUS '+' (1, 8) (1, 9) 18767db96d56Sopenharmony_ci STRING "bR'abc'" (1, 10) (1, 17) 18777db96d56Sopenharmony_ci PLUS '+' (1, 18) (1, 19) 18787db96d56Sopenharmony_ci STRING "Br'abc'" (1, 20) (1, 27) 18797db96d56Sopenharmony_ci PLUS '+' (1, 28) (1, 29) 18807db96d56Sopenharmony_ci STRING "BR'abc'" (1, 30) (1, 37) 18817db96d56Sopenharmony_ci """) 18827db96d56Sopenharmony_ci 18837db96d56Sopenharmony_ci self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ 18847db96d56Sopenharmony_ci STRING 'br"abc"' (1, 0) (1, 7) 18857db96d56Sopenharmony_ci PLUS '+' (1, 8) (1, 9) 18867db96d56Sopenharmony_ci STRING 'bR"abc"' (1, 10) (1, 17) 18877db96d56Sopenharmony_ci PLUS '+' (1, 18) (1, 19) 18887db96d56Sopenharmony_ci STRING 'Br"abc"' (1, 20) (1, 27) 18897db96d56Sopenharmony_ci PLUS '+' (1, 28) (1, 29) 18907db96d56Sopenharmony_ci STRING 'BR"abc"' (1, 30) (1, 37) 18917db96d56Sopenharmony_ci """) 18927db96d56Sopenharmony_ci 18937db96d56Sopenharmony_ci self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ 18947db96d56Sopenharmony_ci STRING "rb'abc'" (1, 0) (1, 7) 18957db96d56Sopenharmony_ci PLUS '+' (1, 8) (1, 9) 18967db96d56Sopenharmony_ci STRING "rB'abc'" (1, 10) (1, 17) 18977db96d56Sopenharmony_ci PLUS '+' (1, 18) (1, 19) 18987db96d56Sopenharmony_ci STRING "Rb'abc'" (1, 20) (1, 27) 18997db96d56Sopenharmony_ci PLUS '+' (1, 28) (1, 29) 19007db96d56Sopenharmony_ci STRING "RB'abc'" (1, 30) (1, 37) 19017db96d56Sopenharmony_ci """) 19027db96d56Sopenharmony_ci 19037db96d56Sopenharmony_ci self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ 19047db96d56Sopenharmony_ci STRING 'rb"abc"' (1, 0) (1, 7) 19057db96d56Sopenharmony_ci PLUS '+' (1, 8) (1, 9) 19067db96d56Sopenharmony_ci STRING 'rB"abc"' (1, 10) (1, 17) 19077db96d56Sopenharmony_ci PLUS '+' (1, 18) (1, 19) 19087db96d56Sopenharmony_ci STRING 'Rb"abc"' (1, 20) (1, 27) 19097db96d56Sopenharmony_ci PLUS '+' (1, 28) (1, 29) 19107db96d56Sopenharmony_ci STRING 'RB"abc"' (1, 30) (1, 37) 19117db96d56Sopenharmony_ci """) 19127db96d56Sopenharmony_ci 19137db96d56Sopenharmony_ci self.check_tokenize('"a\\\nde\\\nfg"', """\ 19147db96d56Sopenharmony_ci STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3) 19157db96d56Sopenharmony_ci """) 19167db96d56Sopenharmony_ci 19177db96d56Sopenharmony_ci self.check_tokenize('u"a\\\nde"', """\ 19187db96d56Sopenharmony_ci STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3) 19197db96d56Sopenharmony_ci """) 19207db96d56Sopenharmony_ci 19217db96d56Sopenharmony_ci self.check_tokenize('rb"a\\\nd"', """\ 19227db96d56Sopenharmony_ci STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2) 19237db96d56Sopenharmony_ci """) 19247db96d56Sopenharmony_ci 19257db96d56Sopenharmony_ci self.check_tokenize(r'"""a\ 19267db96d56Sopenharmony_cib"""', """\ 19277db96d56Sopenharmony_ci STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 19287db96d56Sopenharmony_ci """) 19297db96d56Sopenharmony_ci self.check_tokenize(r'u"""a\ 19307db96d56Sopenharmony_cib"""', """\ 19317db96d56Sopenharmony_ci STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 19327db96d56Sopenharmony_ci """) 19337db96d56Sopenharmony_ci self.check_tokenize(r'rb"""a\ 19347db96d56Sopenharmony_cib\ 19357db96d56Sopenharmony_cic"""', """\ 19367db96d56Sopenharmony_ci STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) 19377db96d56Sopenharmony_ci """) 19387db96d56Sopenharmony_ci 19397db96d56Sopenharmony_ci self.check_tokenize('f"abc"', """\ 19407db96d56Sopenharmony_ci STRING 'f"abc"' (1, 0) (1, 6) 19417db96d56Sopenharmony_ci """) 19427db96d56Sopenharmony_ci 19437db96d56Sopenharmony_ci self.check_tokenize('fR"a{b}c"', """\ 19447db96d56Sopenharmony_ci STRING 'fR"a{b}c"' (1, 0) (1, 9) 19457db96d56Sopenharmony_ci """) 19467db96d56Sopenharmony_ci 19477db96d56Sopenharmony_ci self.check_tokenize('f"""abc"""', """\ 19487db96d56Sopenharmony_ci STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10) 19497db96d56Sopenharmony_ci """) 19507db96d56Sopenharmony_ci 19517db96d56Sopenharmony_ci self.check_tokenize(r'f"abc\ 19527db96d56Sopenharmony_cidef"', """\ 19537db96d56Sopenharmony_ci STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4) 19547db96d56Sopenharmony_ci """) 19557db96d56Sopenharmony_ci 19567db96d56Sopenharmony_ci self.check_tokenize(r'Rf"abc\ 19577db96d56Sopenharmony_cidef"', """\ 19587db96d56Sopenharmony_ci STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4) 19597db96d56Sopenharmony_ci """) 19607db96d56Sopenharmony_ci 19617db96d56Sopenharmony_ci def test_function(self): 19627db96d56Sopenharmony_ci 19637db96d56Sopenharmony_ci self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\ 19647db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 19657db96d56Sopenharmony_ci NAME 'd22' (1, 4) (1, 7) 19667db96d56Sopenharmony_ci LPAR '(' (1, 7) (1, 8) 19677db96d56Sopenharmony_ci NAME 'a' (1, 8) (1, 9) 19687db96d56Sopenharmony_ci COMMA ',' (1, 9) (1, 10) 19697db96d56Sopenharmony_ci NAME 'b' (1, 11) (1, 12) 19707db96d56Sopenharmony_ci COMMA ',' (1, 12) (1, 13) 19717db96d56Sopenharmony_ci NAME 'c' (1, 14) (1, 15) 19727db96d56Sopenharmony_ci EQUAL '=' (1, 15) (1, 16) 19737db96d56Sopenharmony_ci NUMBER '2' (1, 16) (1, 17) 19747db96d56Sopenharmony_ci COMMA ',' (1, 17) (1, 18) 19757db96d56Sopenharmony_ci NAME 'd' (1, 19) (1, 20) 19767db96d56Sopenharmony_ci EQUAL '=' (1, 20) (1, 21) 19777db96d56Sopenharmony_ci NUMBER '2' (1, 21) (1, 22) 19787db96d56Sopenharmony_ci COMMA ',' (1, 22) (1, 23) 19797db96d56Sopenharmony_ci STAR '*' (1, 24) (1, 25) 19807db96d56Sopenharmony_ci NAME 'k' (1, 25) (1, 26) 19817db96d56Sopenharmony_ci RPAR ')' (1, 26) (1, 27) 19827db96d56Sopenharmony_ci COLON ':' (1, 27) (1, 28) 19837db96d56Sopenharmony_ci NAME 'pass' (1, 29) (1, 33) 19847db96d56Sopenharmony_ci """) 19857db96d56Sopenharmony_ci 19867db96d56Sopenharmony_ci self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\ 19877db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 19887db96d56Sopenharmony_ci NAME 'd01v_' (1, 4) (1, 9) 19897db96d56Sopenharmony_ci LPAR '(' (1, 9) (1, 10) 19907db96d56Sopenharmony_ci NAME 'a' (1, 10) (1, 11) 19917db96d56Sopenharmony_ci EQUAL '=' (1, 11) (1, 12) 19927db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 19937db96d56Sopenharmony_ci COMMA ',' (1, 13) (1, 14) 19947db96d56Sopenharmony_ci STAR '*' (1, 15) (1, 16) 19957db96d56Sopenharmony_ci NAME 'k' (1, 16) (1, 17) 19967db96d56Sopenharmony_ci COMMA ',' (1, 17) (1, 18) 19977db96d56Sopenharmony_ci DOUBLESTAR '**' (1, 19) (1, 21) 19987db96d56Sopenharmony_ci NAME 'w' (1, 21) (1, 22) 19997db96d56Sopenharmony_ci RPAR ')' (1, 22) (1, 23) 20007db96d56Sopenharmony_ci COLON ':' (1, 23) (1, 24) 20017db96d56Sopenharmony_ci NAME 'pass' (1, 25) (1, 29) 20027db96d56Sopenharmony_ci """) 20037db96d56Sopenharmony_ci 20047db96d56Sopenharmony_ci self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\ 20057db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 20067db96d56Sopenharmony_ci NAME 'd23' (1, 4) (1, 7) 20077db96d56Sopenharmony_ci LPAR '(' (1, 7) (1, 8) 20087db96d56Sopenharmony_ci NAME 'a' (1, 8) (1, 9) 20097db96d56Sopenharmony_ci COLON ':' (1, 9) (1, 10) 20107db96d56Sopenharmony_ci NAME 'str' (1, 11) (1, 14) 20117db96d56Sopenharmony_ci COMMA ',' (1, 14) (1, 15) 20127db96d56Sopenharmony_ci NAME 'b' (1, 16) (1, 17) 20137db96d56Sopenharmony_ci COLON ':' (1, 17) (1, 18) 20147db96d56Sopenharmony_ci NAME 'int' (1, 19) (1, 22) 20157db96d56Sopenharmony_ci EQUAL '=' (1, 22) (1, 23) 20167db96d56Sopenharmony_ci NUMBER '3' (1, 23) (1, 24) 20177db96d56Sopenharmony_ci RPAR ')' (1, 24) (1, 25) 20187db96d56Sopenharmony_ci RARROW '->' (1, 26) (1, 28) 20197db96d56Sopenharmony_ci NAME 'int' (1, 29) (1, 32) 20207db96d56Sopenharmony_ci COLON ':' (1, 32) (1, 33) 20217db96d56Sopenharmony_ci NAME 'pass' (1, 34) (1, 38) 20227db96d56Sopenharmony_ci """) 20237db96d56Sopenharmony_ci 20247db96d56Sopenharmony_ci def test_comparison(self): 20257db96d56Sopenharmony_ci 20267db96d56Sopenharmony_ci self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " 20277db96d56Sopenharmony_ci "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ 20287db96d56Sopenharmony_ci NAME 'if' (1, 0) (1, 2) 20297db96d56Sopenharmony_ci NUMBER '1' (1, 3) (1, 4) 20307db96d56Sopenharmony_ci LESS '<' (1, 5) (1, 6) 20317db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 20327db96d56Sopenharmony_ci GREATER '>' (1, 9) (1, 10) 20337db96d56Sopenharmony_ci NUMBER '1' (1, 11) (1, 12) 20347db96d56Sopenharmony_ci EQEQUAL '==' (1, 13) (1, 15) 20357db96d56Sopenharmony_ci NUMBER '1' (1, 16) (1, 17) 20367db96d56Sopenharmony_ci GREATEREQUAL '>=' (1, 18) (1, 20) 20377db96d56Sopenharmony_ci NUMBER '5' (1, 21) (1, 22) 20387db96d56Sopenharmony_ci LESSEQUAL '<=' (1, 23) (1, 25) 20397db96d56Sopenharmony_ci NUMBER '0x15' (1, 26) (1, 30) 20407db96d56Sopenharmony_ci LESSEQUAL '<=' (1, 31) (1, 33) 20417db96d56Sopenharmony_ci NUMBER '0x12' (1, 34) (1, 38) 20427db96d56Sopenharmony_ci NOTEQUAL '!=' (1, 39) (1, 41) 20437db96d56Sopenharmony_ci NUMBER '1' (1, 42) (1, 43) 20447db96d56Sopenharmony_ci NAME 'and' (1, 44) (1, 47) 20457db96d56Sopenharmony_ci NUMBER '5' (1, 48) (1, 49) 20467db96d56Sopenharmony_ci NAME 'in' (1, 50) (1, 52) 20477db96d56Sopenharmony_ci NUMBER '1' (1, 53) (1, 54) 20487db96d56Sopenharmony_ci NAME 'not' (1, 55) (1, 58) 20497db96d56Sopenharmony_ci NAME 'in' (1, 59) (1, 61) 20507db96d56Sopenharmony_ci NUMBER '1' (1, 62) (1, 63) 20517db96d56Sopenharmony_ci NAME 'is' (1, 64) (1, 66) 20527db96d56Sopenharmony_ci NUMBER '1' (1, 67) (1, 68) 20537db96d56Sopenharmony_ci NAME 'or' (1, 69) (1, 71) 20547db96d56Sopenharmony_ci NUMBER '5' (1, 72) (1, 73) 20557db96d56Sopenharmony_ci NAME 'is' (1, 74) (1, 76) 20567db96d56Sopenharmony_ci NAME 'not' (1, 77) (1, 80) 20577db96d56Sopenharmony_ci NUMBER '1' (1, 81) (1, 82) 20587db96d56Sopenharmony_ci COLON ':' (1, 82) (1, 83) 20597db96d56Sopenharmony_ci NAME 'pass' (1, 84) (1, 88) 20607db96d56Sopenharmony_ci """) 20617db96d56Sopenharmony_ci 20627db96d56Sopenharmony_ci def test_additive(self): 20637db96d56Sopenharmony_ci 20647db96d56Sopenharmony_ci self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\ 20657db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 20667db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 20677db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 20687db96d56Sopenharmony_ci MINUS '-' (1, 6) (1, 7) 20697db96d56Sopenharmony_ci NAME 'y' (1, 8) (1, 9) 20707db96d56Sopenharmony_ci PLUS '+' (1, 10) (1, 11) 20717db96d56Sopenharmony_ci NUMBER '15' (1, 12) (1, 14) 20727db96d56Sopenharmony_ci MINUS '-' (1, 15) (1, 16) 20737db96d56Sopenharmony_ci NUMBER '1' (1, 17) (1, 18) 20747db96d56Sopenharmony_ci PLUS '+' (1, 19) (1, 20) 20757db96d56Sopenharmony_ci NUMBER '0x124' (1, 21) (1, 26) 20767db96d56Sopenharmony_ci PLUS '+' (1, 27) (1, 28) 20777db96d56Sopenharmony_ci NAME 'z' (1, 29) (1, 30) 20787db96d56Sopenharmony_ci PLUS '+' (1, 31) (1, 32) 20797db96d56Sopenharmony_ci NAME 'a' (1, 33) (1, 34) 20807db96d56Sopenharmony_ci LSQB '[' (1, 34) (1, 35) 20817db96d56Sopenharmony_ci NUMBER '5' (1, 35) (1, 36) 20827db96d56Sopenharmony_ci RSQB ']' (1, 36) (1, 37) 20837db96d56Sopenharmony_ci """) 20847db96d56Sopenharmony_ci 20857db96d56Sopenharmony_ci def test_multiplicative(self): 20867db96d56Sopenharmony_ci 20877db96d56Sopenharmony_ci self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\ 20887db96d56Sopenharmony_ci NAME 'x' (1, 0) (1, 1) 20897db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 20907db96d56Sopenharmony_ci NUMBER '1' (1, 4) (1, 5) 20917db96d56Sopenharmony_ci DOUBLESLASH '//' (1, 5) (1, 7) 20927db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 20937db96d56Sopenharmony_ci STAR '*' (1, 8) (1, 9) 20947db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 20957db96d56Sopenharmony_ci SLASH '/' (1, 10) (1, 11) 20967db96d56Sopenharmony_ci NUMBER '5' (1, 11) (1, 12) 20977db96d56Sopenharmony_ci STAR '*' (1, 12) (1, 13) 20987db96d56Sopenharmony_ci NUMBER '12' (1, 13) (1, 15) 20997db96d56Sopenharmony_ci PERCENT '%' (1, 15) (1, 16) 21007db96d56Sopenharmony_ci NUMBER '0x12' (1, 16) (1, 20) 21017db96d56Sopenharmony_ci AT '@' (1, 20) (1, 21) 21027db96d56Sopenharmony_ci NUMBER '42' (1, 21) (1, 23) 21037db96d56Sopenharmony_ci """) 21047db96d56Sopenharmony_ci 21057db96d56Sopenharmony_ci def test_unary(self): 21067db96d56Sopenharmony_ci 21077db96d56Sopenharmony_ci self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\ 21087db96d56Sopenharmony_ci TILDE '~' (1, 0) (1, 1) 21097db96d56Sopenharmony_ci NUMBER '1' (1, 1) (1, 2) 21107db96d56Sopenharmony_ci CIRCUMFLEX '^' (1, 3) (1, 4) 21117db96d56Sopenharmony_ci NUMBER '1' (1, 5) (1, 6) 21127db96d56Sopenharmony_ci AMPER '&' (1, 7) (1, 8) 21137db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 21147db96d56Sopenharmony_ci VBAR '|' (1, 11) (1, 12) 21157db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 21167db96d56Sopenharmony_ci CIRCUMFLEX '^' (1, 14) (1, 15) 21177db96d56Sopenharmony_ci MINUS '-' (1, 16) (1, 17) 21187db96d56Sopenharmony_ci NUMBER '1' (1, 17) (1, 18) 21197db96d56Sopenharmony_ci """) 21207db96d56Sopenharmony_ci 21217db96d56Sopenharmony_ci self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\ 21227db96d56Sopenharmony_ci MINUS '-' (1, 0) (1, 1) 21237db96d56Sopenharmony_ci NUMBER '1' (1, 1) (1, 2) 21247db96d56Sopenharmony_ci STAR '*' (1, 2) (1, 3) 21257db96d56Sopenharmony_ci NUMBER '1' (1, 3) (1, 4) 21267db96d56Sopenharmony_ci SLASH '/' (1, 4) (1, 5) 21277db96d56Sopenharmony_ci NUMBER '1' (1, 5) (1, 6) 21287db96d56Sopenharmony_ci PLUS '+' (1, 6) (1, 7) 21297db96d56Sopenharmony_ci NUMBER '1' (1, 7) (1, 8) 21307db96d56Sopenharmony_ci STAR '*' (1, 8) (1, 9) 21317db96d56Sopenharmony_ci NUMBER '1' (1, 9) (1, 10) 21327db96d56Sopenharmony_ci DOUBLESLASH '//' (1, 10) (1, 12) 21337db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 21347db96d56Sopenharmony_ci MINUS '-' (1, 14) (1, 15) 21357db96d56Sopenharmony_ci MINUS '-' (1, 16) (1, 17) 21367db96d56Sopenharmony_ci MINUS '-' (1, 17) (1, 18) 21377db96d56Sopenharmony_ci MINUS '-' (1, 18) (1, 19) 21387db96d56Sopenharmony_ci NUMBER '1' (1, 19) (1, 20) 21397db96d56Sopenharmony_ci DOUBLESTAR '**' (1, 20) (1, 22) 21407db96d56Sopenharmony_ci NUMBER '1' (1, 22) (1, 23) 21417db96d56Sopenharmony_ci """) 21427db96d56Sopenharmony_ci 21437db96d56Sopenharmony_ci def test_selector(self): 21447db96d56Sopenharmony_ci 21457db96d56Sopenharmony_ci self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ 21467db96d56Sopenharmony_ci NAME 'import' (1, 0) (1, 6) 21477db96d56Sopenharmony_ci NAME 'sys' (1, 7) (1, 10) 21487db96d56Sopenharmony_ci COMMA ',' (1, 10) (1, 11) 21497db96d56Sopenharmony_ci NAME 'time' (1, 12) (1, 16) 21507db96d56Sopenharmony_ci NEWLINE '' (1, 16) (1, 16) 21517db96d56Sopenharmony_ci NAME 'x' (2, 0) (2, 1) 21527db96d56Sopenharmony_ci EQUAL '=' (2, 2) (2, 3) 21537db96d56Sopenharmony_ci NAME 'sys' (2, 4) (2, 7) 21547db96d56Sopenharmony_ci DOT '.' (2, 7) (2, 8) 21557db96d56Sopenharmony_ci NAME 'modules' (2, 8) (2, 15) 21567db96d56Sopenharmony_ci LSQB '[' (2, 15) (2, 16) 21577db96d56Sopenharmony_ci STRING "'time'" (2, 16) (2, 22) 21587db96d56Sopenharmony_ci RSQB ']' (2, 22) (2, 23) 21597db96d56Sopenharmony_ci DOT '.' (2, 23) (2, 24) 21607db96d56Sopenharmony_ci NAME 'time' (2, 24) (2, 28) 21617db96d56Sopenharmony_ci LPAR '(' (2, 28) (2, 29) 21627db96d56Sopenharmony_ci RPAR ')' (2, 29) (2, 30) 21637db96d56Sopenharmony_ci """) 21647db96d56Sopenharmony_ci 21657db96d56Sopenharmony_ci def test_method(self): 21667db96d56Sopenharmony_ci 21677db96d56Sopenharmony_ci self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ 21687db96d56Sopenharmony_ci AT '@' (1, 0) (1, 1) 21697db96d56Sopenharmony_ci NAME 'staticmethod' (1, 1) (1, 13) 21707db96d56Sopenharmony_ci NEWLINE '' (1, 13) (1, 13) 21717db96d56Sopenharmony_ci NAME 'def' (2, 0) (2, 3) 21727db96d56Sopenharmony_ci NAME 'foo' (2, 4) (2, 7) 21737db96d56Sopenharmony_ci LPAR '(' (2, 7) (2, 8) 21747db96d56Sopenharmony_ci NAME 'x' (2, 8) (2, 9) 21757db96d56Sopenharmony_ci COMMA ',' (2, 9) (2, 10) 21767db96d56Sopenharmony_ci NAME 'y' (2, 10) (2, 11) 21777db96d56Sopenharmony_ci RPAR ')' (2, 11) (2, 12) 21787db96d56Sopenharmony_ci COLON ':' (2, 12) (2, 13) 21797db96d56Sopenharmony_ci NAME 'pass' (2, 14) (2, 18) 21807db96d56Sopenharmony_ci """) 21817db96d56Sopenharmony_ci 21827db96d56Sopenharmony_ci def test_tabs(self): 21837db96d56Sopenharmony_ci 21847db96d56Sopenharmony_ci self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ 21857db96d56Sopenharmony_ci AT '@' (1, 0) (1, 1) 21867db96d56Sopenharmony_ci NAME 'staticmethod' (1, 1) (1, 13) 21877db96d56Sopenharmony_ci NEWLINE '' (1, 13) (1, 13) 21887db96d56Sopenharmony_ci NAME 'def' (2, 0) (2, 3) 21897db96d56Sopenharmony_ci NAME 'foo' (2, 4) (2, 7) 21907db96d56Sopenharmony_ci LPAR '(' (2, 7) (2, 8) 21917db96d56Sopenharmony_ci NAME 'x' (2, 8) (2, 9) 21927db96d56Sopenharmony_ci COMMA ',' (2, 9) (2, 10) 21937db96d56Sopenharmony_ci NAME 'y' (2, 10) (2, 11) 21947db96d56Sopenharmony_ci RPAR ')' (2, 11) (2, 12) 21957db96d56Sopenharmony_ci COLON ':' (2, 12) (2, 13) 21967db96d56Sopenharmony_ci NAME 'pass' (2, 14) (2, 18) 21977db96d56Sopenharmony_ci """) 21987db96d56Sopenharmony_ci 21997db96d56Sopenharmony_ci def test_async(self): 22007db96d56Sopenharmony_ci 22017db96d56Sopenharmony_ci self.check_tokenize('async = 1', """\ 22027db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22037db96d56Sopenharmony_ci EQUAL '=' (1, 6) (1, 7) 22047db96d56Sopenharmony_ci NUMBER '1' (1, 8) (1, 9) 22057db96d56Sopenharmony_ci """) 22067db96d56Sopenharmony_ci 22077db96d56Sopenharmony_ci self.check_tokenize('a = (async = 1)', """\ 22087db96d56Sopenharmony_ci NAME 'a' (1, 0) (1, 1) 22097db96d56Sopenharmony_ci EQUAL '=' (1, 2) (1, 3) 22107db96d56Sopenharmony_ci LPAR '(' (1, 4) (1, 5) 22117db96d56Sopenharmony_ci ASYNC 'async' (1, 5) (1, 10) 22127db96d56Sopenharmony_ci EQUAL '=' (1, 11) (1, 12) 22137db96d56Sopenharmony_ci NUMBER '1' (1, 13) (1, 14) 22147db96d56Sopenharmony_ci RPAR ')' (1, 14) (1, 15) 22157db96d56Sopenharmony_ci """) 22167db96d56Sopenharmony_ci 22177db96d56Sopenharmony_ci self.check_tokenize('async()', """\ 22187db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22197db96d56Sopenharmony_ci LPAR '(' (1, 5) (1, 6) 22207db96d56Sopenharmony_ci RPAR ')' (1, 6) (1, 7) 22217db96d56Sopenharmony_ci """) 22227db96d56Sopenharmony_ci 22237db96d56Sopenharmony_ci self.check_tokenize('class async(Bar):pass', """\ 22247db96d56Sopenharmony_ci NAME 'class' (1, 0) (1, 5) 22257db96d56Sopenharmony_ci ASYNC 'async' (1, 6) (1, 11) 22267db96d56Sopenharmony_ci LPAR '(' (1, 11) (1, 12) 22277db96d56Sopenharmony_ci NAME 'Bar' (1, 12) (1, 15) 22287db96d56Sopenharmony_ci RPAR ')' (1, 15) (1, 16) 22297db96d56Sopenharmony_ci COLON ':' (1, 16) (1, 17) 22307db96d56Sopenharmony_ci NAME 'pass' (1, 17) (1, 21) 22317db96d56Sopenharmony_ci """) 22327db96d56Sopenharmony_ci 22337db96d56Sopenharmony_ci self.check_tokenize('class async:pass', """\ 22347db96d56Sopenharmony_ci NAME 'class' (1, 0) (1, 5) 22357db96d56Sopenharmony_ci ASYNC 'async' (1, 6) (1, 11) 22367db96d56Sopenharmony_ci COLON ':' (1, 11) (1, 12) 22377db96d56Sopenharmony_ci NAME 'pass' (1, 12) (1, 16) 22387db96d56Sopenharmony_ci """) 22397db96d56Sopenharmony_ci 22407db96d56Sopenharmony_ci self.check_tokenize('await = 1', """\ 22417db96d56Sopenharmony_ci AWAIT 'await' (1, 0) (1, 5) 22427db96d56Sopenharmony_ci EQUAL '=' (1, 6) (1, 7) 22437db96d56Sopenharmony_ci NUMBER '1' (1, 8) (1, 9) 22447db96d56Sopenharmony_ci """) 22457db96d56Sopenharmony_ci 22467db96d56Sopenharmony_ci self.check_tokenize('foo.async', """\ 22477db96d56Sopenharmony_ci NAME 'foo' (1, 0) (1, 3) 22487db96d56Sopenharmony_ci DOT '.' (1, 3) (1, 4) 22497db96d56Sopenharmony_ci ASYNC 'async' (1, 4) (1, 9) 22507db96d56Sopenharmony_ci """) 22517db96d56Sopenharmony_ci 22527db96d56Sopenharmony_ci self.check_tokenize('async for a in b: pass', """\ 22537db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22547db96d56Sopenharmony_ci NAME 'for' (1, 6) (1, 9) 22557db96d56Sopenharmony_ci NAME 'a' (1, 10) (1, 11) 22567db96d56Sopenharmony_ci NAME 'in' (1, 12) (1, 14) 22577db96d56Sopenharmony_ci NAME 'b' (1, 15) (1, 16) 22587db96d56Sopenharmony_ci COLON ':' (1, 16) (1, 17) 22597db96d56Sopenharmony_ci NAME 'pass' (1, 18) (1, 22) 22607db96d56Sopenharmony_ci """) 22617db96d56Sopenharmony_ci 22627db96d56Sopenharmony_ci self.check_tokenize('async with a as b: pass', """\ 22637db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22647db96d56Sopenharmony_ci NAME 'with' (1, 6) (1, 10) 22657db96d56Sopenharmony_ci NAME 'a' (1, 11) (1, 12) 22667db96d56Sopenharmony_ci NAME 'as' (1, 13) (1, 15) 22677db96d56Sopenharmony_ci NAME 'b' (1, 16) (1, 17) 22687db96d56Sopenharmony_ci COLON ':' (1, 17) (1, 18) 22697db96d56Sopenharmony_ci NAME 'pass' (1, 19) (1, 23) 22707db96d56Sopenharmony_ci """) 22717db96d56Sopenharmony_ci 22727db96d56Sopenharmony_ci self.check_tokenize('async.foo', """\ 22737db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22747db96d56Sopenharmony_ci DOT '.' (1, 5) (1, 6) 22757db96d56Sopenharmony_ci NAME 'foo' (1, 6) (1, 9) 22767db96d56Sopenharmony_ci """) 22777db96d56Sopenharmony_ci 22787db96d56Sopenharmony_ci self.check_tokenize('async', """\ 22797db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22807db96d56Sopenharmony_ci """) 22817db96d56Sopenharmony_ci 22827db96d56Sopenharmony_ci self.check_tokenize('async\n#comment\nawait', """\ 22837db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22847db96d56Sopenharmony_ci NEWLINE '' (1, 5) (1, 5) 22857db96d56Sopenharmony_ci AWAIT 'await' (3, 0) (3, 5) 22867db96d56Sopenharmony_ci """) 22877db96d56Sopenharmony_ci 22887db96d56Sopenharmony_ci self.check_tokenize('async\n...\nawait', """\ 22897db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22907db96d56Sopenharmony_ci NEWLINE '' (1, 5) (1, 5) 22917db96d56Sopenharmony_ci ELLIPSIS '...' (2, 0) (2, 3) 22927db96d56Sopenharmony_ci NEWLINE '' (2, 3) (2, 3) 22937db96d56Sopenharmony_ci AWAIT 'await' (3, 0) (3, 5) 22947db96d56Sopenharmony_ci """) 22957db96d56Sopenharmony_ci 22967db96d56Sopenharmony_ci self.check_tokenize('async\nawait', """\ 22977db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 22987db96d56Sopenharmony_ci NEWLINE '' (1, 5) (1, 5) 22997db96d56Sopenharmony_ci AWAIT 'await' (2, 0) (2, 5) 23007db96d56Sopenharmony_ci """) 23017db96d56Sopenharmony_ci 23027db96d56Sopenharmony_ci self.check_tokenize('foo.async + 1', """\ 23037db96d56Sopenharmony_ci NAME 'foo' (1, 0) (1, 3) 23047db96d56Sopenharmony_ci DOT '.' (1, 3) (1, 4) 23057db96d56Sopenharmony_ci ASYNC 'async' (1, 4) (1, 9) 23067db96d56Sopenharmony_ci PLUS '+' (1, 10) (1, 11) 23077db96d56Sopenharmony_ci NUMBER '1' (1, 12) (1, 13) 23087db96d56Sopenharmony_ci """) 23097db96d56Sopenharmony_ci 23107db96d56Sopenharmony_ci self.check_tokenize('async def foo(): pass', """\ 23117db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 23127db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 23137db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 23147db96d56Sopenharmony_ci LPAR '(' (1, 13) (1, 14) 23157db96d56Sopenharmony_ci RPAR ')' (1, 14) (1, 15) 23167db96d56Sopenharmony_ci COLON ':' (1, 15) (1, 16) 23177db96d56Sopenharmony_ci NAME 'pass' (1, 17) (1, 21) 23187db96d56Sopenharmony_ci """) 23197db96d56Sopenharmony_ci 23207db96d56Sopenharmony_ci self.check_tokenize('''\ 23217db96d56Sopenharmony_ciasync def foo(): 23227db96d56Sopenharmony_ci def foo(await): 23237db96d56Sopenharmony_ci await = 1 23247db96d56Sopenharmony_ci if 1: 23257db96d56Sopenharmony_ci await 23267db96d56Sopenharmony_ciasync += 1 23277db96d56Sopenharmony_ci''', """\ 23287db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 23297db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 23307db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 23317db96d56Sopenharmony_ci LPAR '(' (1, 13) (1, 14) 23327db96d56Sopenharmony_ci RPAR ')' (1, 14) (1, 15) 23337db96d56Sopenharmony_ci COLON ':' (1, 15) (1, 16) 23347db96d56Sopenharmony_ci NEWLINE '' (1, 16) (1, 16) 23357db96d56Sopenharmony_ci INDENT '' (2, -1) (2, -1) 23367db96d56Sopenharmony_ci NAME 'def' (2, 2) (2, 5) 23377db96d56Sopenharmony_ci NAME 'foo' (2, 6) (2, 9) 23387db96d56Sopenharmony_ci LPAR '(' (2, 9) (2, 10) 23397db96d56Sopenharmony_ci AWAIT 'await' (2, 10) (2, 15) 23407db96d56Sopenharmony_ci RPAR ')' (2, 15) (2, 16) 23417db96d56Sopenharmony_ci COLON ':' (2, 16) (2, 17) 23427db96d56Sopenharmony_ci NEWLINE '' (2, 17) (2, 17) 23437db96d56Sopenharmony_ci INDENT '' (3, -1) (3, -1) 23447db96d56Sopenharmony_ci AWAIT 'await' (3, 4) (3, 9) 23457db96d56Sopenharmony_ci EQUAL '=' (3, 10) (3, 11) 23467db96d56Sopenharmony_ci NUMBER '1' (3, 12) (3, 13) 23477db96d56Sopenharmony_ci NEWLINE '' (3, 13) (3, 13) 23487db96d56Sopenharmony_ci DEDENT '' (4, -1) (4, -1) 23497db96d56Sopenharmony_ci NAME 'if' (4, 2) (4, 4) 23507db96d56Sopenharmony_ci NUMBER '1' (4, 5) (4, 6) 23517db96d56Sopenharmony_ci COLON ':' (4, 6) (4, 7) 23527db96d56Sopenharmony_ci NEWLINE '' (4, 7) (4, 7) 23537db96d56Sopenharmony_ci INDENT '' (5, -1) (5, -1) 23547db96d56Sopenharmony_ci AWAIT 'await' (5, 4) (5, 9) 23557db96d56Sopenharmony_ci NEWLINE '' (5, 9) (5, 9) 23567db96d56Sopenharmony_ci DEDENT '' (6, -1) (6, -1) 23577db96d56Sopenharmony_ci DEDENT '' (6, -1) (6, -1) 23587db96d56Sopenharmony_ci ASYNC 'async' (6, 0) (6, 5) 23597db96d56Sopenharmony_ci PLUSEQUAL '+=' (6, 6) (6, 8) 23607db96d56Sopenharmony_ci NUMBER '1' (6, 9) (6, 10) 23617db96d56Sopenharmony_ci NEWLINE '' (6, 10) (6, 10) 23627db96d56Sopenharmony_ci """) 23637db96d56Sopenharmony_ci 23647db96d56Sopenharmony_ci self.check_tokenize('async def foo():\n async for i in 1: pass', """\ 23657db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 23667db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 23677db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 23687db96d56Sopenharmony_ci LPAR '(' (1, 13) (1, 14) 23697db96d56Sopenharmony_ci RPAR ')' (1, 14) (1, 15) 23707db96d56Sopenharmony_ci COLON ':' (1, 15) (1, 16) 23717db96d56Sopenharmony_ci NEWLINE '' (1, 16) (1, 16) 23727db96d56Sopenharmony_ci INDENT '' (2, -1) (2, -1) 23737db96d56Sopenharmony_ci ASYNC 'async' (2, 2) (2, 7) 23747db96d56Sopenharmony_ci NAME 'for' (2, 8) (2, 11) 23757db96d56Sopenharmony_ci NAME 'i' (2, 12) (2, 13) 23767db96d56Sopenharmony_ci NAME 'in' (2, 14) (2, 16) 23777db96d56Sopenharmony_ci NUMBER '1' (2, 17) (2, 18) 23787db96d56Sopenharmony_ci COLON ':' (2, 18) (2, 19) 23797db96d56Sopenharmony_ci NAME 'pass' (2, 20) (2, 24) 23807db96d56Sopenharmony_ci DEDENT '' (2, -1) (2, -1) 23817db96d56Sopenharmony_ci """) 23827db96d56Sopenharmony_ci 23837db96d56Sopenharmony_ci self.check_tokenize('async def foo(async): await', """\ 23847db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 23857db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 23867db96d56Sopenharmony_ci NAME 'foo' (1, 10) (1, 13) 23877db96d56Sopenharmony_ci LPAR '(' (1, 13) (1, 14) 23887db96d56Sopenharmony_ci ASYNC 'async' (1, 14) (1, 19) 23897db96d56Sopenharmony_ci RPAR ')' (1, 19) (1, 20) 23907db96d56Sopenharmony_ci COLON ':' (1, 20) (1, 21) 23917db96d56Sopenharmony_ci AWAIT 'await' (1, 22) (1, 27) 23927db96d56Sopenharmony_ci """) 23937db96d56Sopenharmony_ci 23947db96d56Sopenharmony_ci self.check_tokenize('''\ 23957db96d56Sopenharmony_cidef f(): 23967db96d56Sopenharmony_ci 23977db96d56Sopenharmony_ci def baz(): pass 23987db96d56Sopenharmony_ci async def bar(): pass 23997db96d56Sopenharmony_ci 24007db96d56Sopenharmony_ci await = 2''', """\ 24017db96d56Sopenharmony_ci NAME 'def' (1, 0) (1, 3) 24027db96d56Sopenharmony_ci NAME 'f' (1, 4) (1, 5) 24037db96d56Sopenharmony_ci LPAR '(' (1, 5) (1, 6) 24047db96d56Sopenharmony_ci RPAR ')' (1, 6) (1, 7) 24057db96d56Sopenharmony_ci COLON ':' (1, 7) (1, 8) 24067db96d56Sopenharmony_ci NEWLINE '' (1, 8) (1, 8) 24077db96d56Sopenharmony_ci INDENT '' (3, -1) (3, -1) 24087db96d56Sopenharmony_ci NAME 'def' (3, 2) (3, 5) 24097db96d56Sopenharmony_ci NAME 'baz' (3, 6) (3, 9) 24107db96d56Sopenharmony_ci LPAR '(' (3, 9) (3, 10) 24117db96d56Sopenharmony_ci RPAR ')' (3, 10) (3, 11) 24127db96d56Sopenharmony_ci COLON ':' (3, 11) (3, 12) 24137db96d56Sopenharmony_ci NAME 'pass' (3, 13) (3, 17) 24147db96d56Sopenharmony_ci NEWLINE '' (3, 17) (3, 17) 24157db96d56Sopenharmony_ci ASYNC 'async' (4, 2) (4, 7) 24167db96d56Sopenharmony_ci NAME 'def' (4, 8) (4, 11) 24177db96d56Sopenharmony_ci NAME 'bar' (4, 12) (4, 15) 24187db96d56Sopenharmony_ci LPAR '(' (4, 15) (4, 16) 24197db96d56Sopenharmony_ci RPAR ')' (4, 16) (4, 17) 24207db96d56Sopenharmony_ci COLON ':' (4, 17) (4, 18) 24217db96d56Sopenharmony_ci NAME 'pass' (4, 19) (4, 23) 24227db96d56Sopenharmony_ci NEWLINE '' (4, 23) (4, 23) 24237db96d56Sopenharmony_ci AWAIT 'await' (6, 2) (6, 7) 24247db96d56Sopenharmony_ci EQUAL '=' (6, 8) (6, 9) 24257db96d56Sopenharmony_ci NUMBER '2' (6, 10) (6, 11) 24267db96d56Sopenharmony_ci DEDENT '' (6, -1) (6, -1) 24277db96d56Sopenharmony_ci """) 24287db96d56Sopenharmony_ci 24297db96d56Sopenharmony_ci self.check_tokenize('''\ 24307db96d56Sopenharmony_ciasync def f(): 24317db96d56Sopenharmony_ci 24327db96d56Sopenharmony_ci def baz(): pass 24337db96d56Sopenharmony_ci async def bar(): pass 24347db96d56Sopenharmony_ci 24357db96d56Sopenharmony_ci await = 2''', """\ 24367db96d56Sopenharmony_ci ASYNC 'async' (1, 0) (1, 5) 24377db96d56Sopenharmony_ci NAME 'def' (1, 6) (1, 9) 24387db96d56Sopenharmony_ci NAME 'f' (1, 10) (1, 11) 24397db96d56Sopenharmony_ci LPAR '(' (1, 11) (1, 12) 24407db96d56Sopenharmony_ci RPAR ')' (1, 12) (1, 13) 24417db96d56Sopenharmony_ci COLON ':' (1, 13) (1, 14) 24427db96d56Sopenharmony_ci NEWLINE '' (1, 14) (1, 14) 24437db96d56Sopenharmony_ci INDENT '' (3, -1) (3, -1) 24447db96d56Sopenharmony_ci NAME 'def' (3, 2) (3, 5) 24457db96d56Sopenharmony_ci NAME 'baz' (3, 6) (3, 9) 24467db96d56Sopenharmony_ci LPAR '(' (3, 9) (3, 10) 24477db96d56Sopenharmony_ci RPAR ')' (3, 10) (3, 11) 24487db96d56Sopenharmony_ci COLON ':' (3, 11) (3, 12) 24497db96d56Sopenharmony_ci NAME 'pass' (3, 13) (3, 17) 24507db96d56Sopenharmony_ci NEWLINE '' (3, 17) (3, 17) 24517db96d56Sopenharmony_ci ASYNC 'async' (4, 2) (4, 7) 24527db96d56Sopenharmony_ci NAME 'def' (4, 8) (4, 11) 24537db96d56Sopenharmony_ci NAME 'bar' (4, 12) (4, 15) 24547db96d56Sopenharmony_ci LPAR '(' (4, 15) (4, 16) 24557db96d56Sopenharmony_ci RPAR ')' (4, 16) (4, 17) 24567db96d56Sopenharmony_ci COLON ':' (4, 17) (4, 18) 24577db96d56Sopenharmony_ci NAME 'pass' (4, 19) (4, 23) 24587db96d56Sopenharmony_ci NEWLINE '' (4, 23) (4, 23) 24597db96d56Sopenharmony_ci AWAIT 'await' (6, 2) (6, 7) 24607db96d56Sopenharmony_ci EQUAL '=' (6, 8) (6, 9) 24617db96d56Sopenharmony_ci NUMBER '2' (6, 10) (6, 11) 24627db96d56Sopenharmony_ci DEDENT '' (6, -1) (6, -1) 24637db96d56Sopenharmony_ci """) 24647db96d56Sopenharmony_ci 24657db96d56Sopenharmony_ci def test_unicode(self): 24667db96d56Sopenharmony_ci 24677db96d56Sopenharmony_ci self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ 24687db96d56Sopenharmony_ci NAME 'Örter' (1, 0) (1, 6) 24697db96d56Sopenharmony_ci EQUAL '=' (1, 7) (1, 8) 24707db96d56Sopenharmony_ci STRING "u'places'" (1, 9) (1, 18) 24717db96d56Sopenharmony_ci NEWLINE '' (1, 18) (1, 18) 24727db96d56Sopenharmony_ci NAME 'grün' (2, 0) (2, 5) 24737db96d56Sopenharmony_ci EQUAL '=' (2, 6) (2, 7) 24747db96d56Sopenharmony_ci STRING "U'green'" (2, 8) (2, 16) 24757db96d56Sopenharmony_ci """) 24767db96d56Sopenharmony_ci 24777db96d56Sopenharmony_ci def test_invalid_syntax(self): 24787db96d56Sopenharmony_ci def get_tokens(string): 24797db96d56Sopenharmony_ci return list(_generate_tokens_from_c_tokenizer(string)) 24807db96d56Sopenharmony_ci 24817db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "(1+2]") 24827db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "(1+2}") 24837db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "{1+2]") 24847db96d56Sopenharmony_ci 24857db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1_") 24867db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1.2_") 24877db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1e2_") 24887db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1e+") 24897db96d56Sopenharmony_ci 24907db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "\xa0") 24917db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "€") 24927db96d56Sopenharmony_ci 24937db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0b12") 24947db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0b1_2") 24957db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0b2") 24967db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0b1_") 24977db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0b") 24987db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0o18") 24997db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0o1_8") 25007db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0o8") 25017db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0o1_") 25027db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0o") 25037db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0x1_") 25047db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "0x") 25057db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1_") 25067db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "012") 25077db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1.2_") 25087db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1e2_") 25097db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "1e+") 25107db96d56Sopenharmony_ci 25117db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "'sdfsdf") 25127db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''") 25137db96d56Sopenharmony_ci 25147db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000) 25157db96d56Sopenharmony_ci self.assertRaises(SyntaxError, get_tokens, "]") 25167db96d56Sopenharmony_ci 25177db96d56Sopenharmony_ci def test_max_indent(self): 25187db96d56Sopenharmony_ci MAXINDENT = 100 25197db96d56Sopenharmony_ci 25207db96d56Sopenharmony_ci def generate_source(indents): 25217db96d56Sopenharmony_ci source = ''.join((' ' * x) + 'if True:\n' for x in range(indents)) 25227db96d56Sopenharmony_ci source += ' ' * indents + 'pass\n' 25237db96d56Sopenharmony_ci return source 25247db96d56Sopenharmony_ci 25257db96d56Sopenharmony_ci valid = generate_source(MAXINDENT - 1) 25267db96d56Sopenharmony_ci tokens = list(_generate_tokens_from_c_tokenizer(valid)) 25277db96d56Sopenharmony_ci self.assertEqual(tokens[-1].type, DEDENT) 25287db96d56Sopenharmony_ci compile(valid, "<string>", "exec") 25297db96d56Sopenharmony_ci 25307db96d56Sopenharmony_ci invalid = generate_source(MAXINDENT) 25317db96d56Sopenharmony_ci tokens = list(_generate_tokens_from_c_tokenizer(invalid)) 25327db96d56Sopenharmony_ci self.assertEqual(tokens[-1].type, NEWLINE) 25337db96d56Sopenharmony_ci self.assertRaises( 25347db96d56Sopenharmony_ci IndentationError, compile, invalid, "<string>", "exec" 25357db96d56Sopenharmony_ci ) 25367db96d56Sopenharmony_ci 25377db96d56Sopenharmony_ci def test_continuation_lines_indentation(self): 25387db96d56Sopenharmony_ci def get_tokens(string): 25397db96d56Sopenharmony_ci return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)] 25407db96d56Sopenharmony_ci 25417db96d56Sopenharmony_ci code = dedent(""" 25427db96d56Sopenharmony_ci def fib(n): 25437db96d56Sopenharmony_ci \\ 25447db96d56Sopenharmony_ci '''Print a Fibonacci series up to n.''' 25457db96d56Sopenharmony_ci \\ 25467db96d56Sopenharmony_ci a, b = 0, 1 25477db96d56Sopenharmony_ci """) 25487db96d56Sopenharmony_ci 25497db96d56Sopenharmony_ci self.check_tokenize(code, """\ 25507db96d56Sopenharmony_ci NAME 'def' (2, 0) (2, 3) 25517db96d56Sopenharmony_ci NAME 'fib' (2, 4) (2, 7) 25527db96d56Sopenharmony_ci LPAR '(' (2, 7) (2, 8) 25537db96d56Sopenharmony_ci NAME 'n' (2, 8) (2, 9) 25547db96d56Sopenharmony_ci RPAR ')' (2, 9) (2, 10) 25557db96d56Sopenharmony_ci COLON ':' (2, 10) (2, 11) 25567db96d56Sopenharmony_ci NEWLINE '' (2, 11) (2, 11) 25577db96d56Sopenharmony_ci INDENT '' (4, -1) (4, -1) 25587db96d56Sopenharmony_ci STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39) 25597db96d56Sopenharmony_ci NEWLINE '' (4, 39) (4, 39) 25607db96d56Sopenharmony_ci NAME 'a' (6, 0) (6, 1) 25617db96d56Sopenharmony_ci COMMA ',' (6, 1) (6, 2) 25627db96d56Sopenharmony_ci NAME 'b' (6, 3) (6, 4) 25637db96d56Sopenharmony_ci EQUAL '=' (6, 5) (6, 6) 25647db96d56Sopenharmony_ci NUMBER '0' (6, 7) (6, 8) 25657db96d56Sopenharmony_ci COMMA ',' (6, 8) (6, 9) 25667db96d56Sopenharmony_ci NUMBER '1' (6, 10) (6, 11) 25677db96d56Sopenharmony_ci NEWLINE '' (6, 11) (6, 11) 25687db96d56Sopenharmony_ci DEDENT '' (6, -1) (6, -1) 25697db96d56Sopenharmony_ci """) 25707db96d56Sopenharmony_ci 25717db96d56Sopenharmony_ci code_no_cont = dedent(""" 25727db96d56Sopenharmony_ci def fib(n): 25737db96d56Sopenharmony_ci '''Print a Fibonacci series up to n.''' 25747db96d56Sopenharmony_ci a, b = 0, 1 25757db96d56Sopenharmony_ci """) 25767db96d56Sopenharmony_ci 25777db96d56Sopenharmony_ci self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) 25787db96d56Sopenharmony_ci 25797db96d56Sopenharmony_ci code = dedent(""" 25807db96d56Sopenharmony_ci pass 25817db96d56Sopenharmony_ci \\ 25827db96d56Sopenharmony_ci 25837db96d56Sopenharmony_ci pass 25847db96d56Sopenharmony_ci """) 25857db96d56Sopenharmony_ci 25867db96d56Sopenharmony_ci self.check_tokenize(code, """\ 25877db96d56Sopenharmony_ci NAME 'pass' (2, 0) (2, 4) 25887db96d56Sopenharmony_ci NEWLINE '' (2, 4) (2, 4) 25897db96d56Sopenharmony_ci NAME 'pass' (5, 0) (5, 4) 25907db96d56Sopenharmony_ci NEWLINE '' (5, 4) (5, 4) 25917db96d56Sopenharmony_ci """) 25927db96d56Sopenharmony_ci 25937db96d56Sopenharmony_ci code_no_cont = dedent(""" 25947db96d56Sopenharmony_ci pass 25957db96d56Sopenharmony_ci pass 25967db96d56Sopenharmony_ci """) 25977db96d56Sopenharmony_ci 25987db96d56Sopenharmony_ci self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) 25997db96d56Sopenharmony_ci 26007db96d56Sopenharmony_ci code = dedent(""" 26017db96d56Sopenharmony_ci if x: 26027db96d56Sopenharmony_ci y = 1 26037db96d56Sopenharmony_ci \\ 26047db96d56Sopenharmony_ci \\ 26057db96d56Sopenharmony_ci \\ 26067db96d56Sopenharmony_ci \\ 26077db96d56Sopenharmony_ci foo = 1 26087db96d56Sopenharmony_ci """) 26097db96d56Sopenharmony_ci 26107db96d56Sopenharmony_ci self.check_tokenize(code, """\ 26117db96d56Sopenharmony_ci NAME 'if' (2, 0) (2, 2) 26127db96d56Sopenharmony_ci NAME 'x' (2, 3) (2, 4) 26137db96d56Sopenharmony_ci COLON ':' (2, 4) (2, 5) 26147db96d56Sopenharmony_ci NEWLINE '' (2, 5) (2, 5) 26157db96d56Sopenharmony_ci INDENT '' (3, -1) (3, -1) 26167db96d56Sopenharmony_ci NAME 'y' (3, 4) (3, 5) 26177db96d56Sopenharmony_ci EQUAL '=' (3, 6) (3, 7) 26187db96d56Sopenharmony_ci NUMBER '1' (3, 8) (3, 9) 26197db96d56Sopenharmony_ci NEWLINE '' (3, 9) (3, 9) 26207db96d56Sopenharmony_ci NAME 'foo' (8, 4) (8, 7) 26217db96d56Sopenharmony_ci EQUAL '=' (8, 8) (8, 9) 26227db96d56Sopenharmony_ci NUMBER '1' (8, 10) (8, 11) 26237db96d56Sopenharmony_ci NEWLINE '' (8, 11) (8, 11) 26247db96d56Sopenharmony_ci DEDENT '' (8, -1) (8, -1) 26257db96d56Sopenharmony_ci """) 26267db96d56Sopenharmony_ci 26277db96d56Sopenharmony_ci code_no_cont = dedent(""" 26287db96d56Sopenharmony_ci if x: 26297db96d56Sopenharmony_ci y = 1 26307db96d56Sopenharmony_ci foo = 1 26317db96d56Sopenharmony_ci """) 26327db96d56Sopenharmony_ci 26337db96d56Sopenharmony_ci self.assertEqual(get_tokens(code), get_tokens(code_no_cont)) 26347db96d56Sopenharmony_ci 26357db96d56Sopenharmony_ci 26367db96d56Sopenharmony_ciclass CTokenizerBufferTests(unittest.TestCase): 26377db96d56Sopenharmony_ci def test_newline_at_the_end_of_buffer(self): 26387db96d56Sopenharmony_ci # See issue 99581: Make sure that if we need to add a new line at the 26397db96d56Sopenharmony_ci # end of the buffer, we have enough space in the buffer, specially when 26407db96d56Sopenharmony_ci # the current line is as long as the buffer space available. 26417db96d56Sopenharmony_ci test_script = f"""\ 26427db96d56Sopenharmony_ci #coding: latin-1 26437db96d56Sopenharmony_ci #{"a"*10000} 26447db96d56Sopenharmony_ci #{"a"*10002}""" 26457db96d56Sopenharmony_ci with os_helper.temp_dir() as temp_dir: 26467db96d56Sopenharmony_ci file_name = make_script(temp_dir, 'foo', test_script) 26477db96d56Sopenharmony_ci run_test_script(file_name) 26487db96d56Sopenharmony_ci 26497db96d56Sopenharmony_ci 26507db96d56Sopenharmony_ciif __name__ == "__main__": 26517db96d56Sopenharmony_ci unittest.main() 2652