17db96d56Sopenharmony_ci#! /usr/bin/env python3 27db96d56Sopenharmony_ci# This script generates token related files from Grammar/Tokens: 37db96d56Sopenharmony_ci# 47db96d56Sopenharmony_ci# Doc/library/token-list.inc 57db96d56Sopenharmony_ci# Include/token.h 67db96d56Sopenharmony_ci# Parser/token.c 77db96d56Sopenharmony_ci# Lib/token.py 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ciNT_OFFSET = 256 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_cidef load_tokens(path): 137db96d56Sopenharmony_ci tok_names = [] 147db96d56Sopenharmony_ci string_to_tok = {} 157db96d56Sopenharmony_ci ERRORTOKEN = None 167db96d56Sopenharmony_ci with open(path) as fp: 177db96d56Sopenharmony_ci for line in fp: 187db96d56Sopenharmony_ci line = line.strip() 197db96d56Sopenharmony_ci # strip comments 207db96d56Sopenharmony_ci i = line.find('#') 217db96d56Sopenharmony_ci if i >= 0: 227db96d56Sopenharmony_ci line = line[:i].strip() 237db96d56Sopenharmony_ci if not line: 247db96d56Sopenharmony_ci continue 257db96d56Sopenharmony_ci fields = line.split() 267db96d56Sopenharmony_ci name = fields[0] 277db96d56Sopenharmony_ci value = len(tok_names) 287db96d56Sopenharmony_ci if name == 'ERRORTOKEN': 297db96d56Sopenharmony_ci ERRORTOKEN = value 307db96d56Sopenharmony_ci string = fields[1] if len(fields) > 1 else None 317db96d56Sopenharmony_ci if string: 327db96d56Sopenharmony_ci string = eval(string) 337db96d56Sopenharmony_ci string_to_tok[string] = value 347db96d56Sopenharmony_ci tok_names.append(name) 357db96d56Sopenharmony_ci return tok_names, ERRORTOKEN, string_to_tok 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_cidef update_file(file, content): 397db96d56Sopenharmony_ci try: 407db96d56Sopenharmony_ci with open(file, 'r') as fobj: 417db96d56Sopenharmony_ci if fobj.read() == content: 427db96d56Sopenharmony_ci return False 437db96d56Sopenharmony_ci except (OSError, ValueError): 447db96d56Sopenharmony_ci pass 457db96d56Sopenharmony_ci with open(file, 'w') as fobj: 467db96d56Sopenharmony_ci fobj.write(content) 477db96d56Sopenharmony_ci return True 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_citoken_h_template = """\ 517db96d56Sopenharmony_ci/* Auto-generated by Tools/scripts/generate_token.py */ 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci/* Token types */ 547db96d56Sopenharmony_ci#ifndef Py_LIMITED_API 557db96d56Sopenharmony_ci#ifndef Py_TOKEN_H 567db96d56Sopenharmony_ci#define Py_TOKEN_H 577db96d56Sopenharmony_ci#ifdef __cplusplus 587db96d56Sopenharmony_ciextern "C" { 597db96d56Sopenharmony_ci#endif 607db96d56Sopenharmony_ci 617db96d56Sopenharmony_ci#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_ci%s\ 647db96d56Sopenharmony_ci#define N_TOKENS %d 657db96d56Sopenharmony_ci#define NT_OFFSET %d 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci/* Special definitions for cooperation with parser */ 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci#define ISTERMINAL(x) ((x) < NT_OFFSET) 707db96d56Sopenharmony_ci#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) 717db96d56Sopenharmony_ci#define ISEOF(x) ((x) == ENDMARKER) 727db96d56Sopenharmony_ci#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ 737db96d56Sopenharmony_ci (x) == NEWLINE || \\ 747db96d56Sopenharmony_ci (x) == INDENT || \\ 757db96d56Sopenharmony_ci (x) == DEDENT) 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_ciPyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ 797db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_OneChar(int); 807db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_TwoChars(int, int); 817db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); 827db96d56Sopenharmony_ci 837db96d56Sopenharmony_ci#ifdef __cplusplus 847db96d56Sopenharmony_ci} 857db96d56Sopenharmony_ci#endif 867db96d56Sopenharmony_ci#endif /* !Py_TOKEN_H */ 877db96d56Sopenharmony_ci#endif /* Py_LIMITED_API */ 887db96d56Sopenharmony_ci""" 897db96d56Sopenharmony_ci 907db96d56Sopenharmony_cidef make_h(infile, outfile='Include/token.h'): 917db96d56Sopenharmony_ci tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_ci defines = [] 947db96d56Sopenharmony_ci for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 957db96d56Sopenharmony_ci defines.append("#define %-15s %d\n" % (name, value)) 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci if update_file(outfile, token_h_template % ( 987db96d56Sopenharmony_ci ''.join(defines), 997db96d56Sopenharmony_ci len(tok_names), 1007db96d56Sopenharmony_ci NT_OFFSET 1017db96d56Sopenharmony_ci )): 1027db96d56Sopenharmony_ci print("%s regenerated from %s" % (outfile, infile)) 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_citoken_c_template = """\ 1067db96d56Sopenharmony_ci/* Auto-generated by Tools/scripts/generate_token.py */ 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci#include "Python.h" 1097db96d56Sopenharmony_ci#include "token.h" 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci/* Token names */ 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ciconst char * const _PyParser_TokenNames[] = { 1147db96d56Sopenharmony_ci%s\ 1157db96d56Sopenharmony_ci}; 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci/* Return the token corresponding to a single character */ 1187db96d56Sopenharmony_ci 1197db96d56Sopenharmony_ciint 1207db96d56Sopenharmony_ciPyToken_OneChar(int c1) 1217db96d56Sopenharmony_ci{ 1227db96d56Sopenharmony_ci%s\ 1237db96d56Sopenharmony_ci return OP; 1247db96d56Sopenharmony_ci} 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ciint 1277db96d56Sopenharmony_ciPyToken_TwoChars(int c1, int c2) 1287db96d56Sopenharmony_ci{ 1297db96d56Sopenharmony_ci%s\ 1307db96d56Sopenharmony_ci return OP; 1317db96d56Sopenharmony_ci} 1327db96d56Sopenharmony_ci 1337db96d56Sopenharmony_ciint 1347db96d56Sopenharmony_ciPyToken_ThreeChars(int c1, int c2, int c3) 1357db96d56Sopenharmony_ci{ 1367db96d56Sopenharmony_ci%s\ 1377db96d56Sopenharmony_ci return OP; 1387db96d56Sopenharmony_ci} 1397db96d56Sopenharmony_ci""" 1407db96d56Sopenharmony_ci 1417db96d56Sopenharmony_cidef generate_chars_to_token(mapping, n=1): 1427db96d56Sopenharmony_ci result = [] 1437db96d56Sopenharmony_ci write = result.append 1447db96d56Sopenharmony_ci indent = ' ' * n 1457db96d56Sopenharmony_ci write(indent) 1467db96d56Sopenharmony_ci write('switch (c%d) {\n' % (n,)) 1477db96d56Sopenharmony_ci for c in sorted(mapping): 1487db96d56Sopenharmony_ci write(indent) 1497db96d56Sopenharmony_ci value = mapping[c] 1507db96d56Sopenharmony_ci if isinstance(value, dict): 1517db96d56Sopenharmony_ci write("case '%s':\n" % (c,)) 1527db96d56Sopenharmony_ci write(generate_chars_to_token(value, n + 1)) 1537db96d56Sopenharmony_ci write(indent) 1547db96d56Sopenharmony_ci write(' break;\n') 1557db96d56Sopenharmony_ci else: 1567db96d56Sopenharmony_ci write("case '%s': return %s;\n" % (c, value)) 1577db96d56Sopenharmony_ci write(indent) 1587db96d56Sopenharmony_ci write('}\n') 1597db96d56Sopenharmony_ci return ''.join(result) 1607db96d56Sopenharmony_ci 1617db96d56Sopenharmony_cidef make_c(infile, outfile='Parser/token.c'): 1627db96d56Sopenharmony_ci tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 1637db96d56Sopenharmony_ci string_to_tok['<>'] = string_to_tok['!='] 1647db96d56Sopenharmony_ci chars_to_token = {} 1657db96d56Sopenharmony_ci for string, value in string_to_tok.items(): 1667db96d56Sopenharmony_ci assert 1 <= len(string) <= 3 1677db96d56Sopenharmony_ci name = tok_names[value] 1687db96d56Sopenharmony_ci m = chars_to_token.setdefault(len(string), {}) 1697db96d56Sopenharmony_ci for c in string[:-1]: 1707db96d56Sopenharmony_ci m = m.setdefault(c, {}) 1717db96d56Sopenharmony_ci m[string[-1]] = name 1727db96d56Sopenharmony_ci 1737db96d56Sopenharmony_ci names = [] 1747db96d56Sopenharmony_ci for value, name in enumerate(tok_names): 1757db96d56Sopenharmony_ci if value >= ERRORTOKEN: 1767db96d56Sopenharmony_ci name = '<%s>' % name 1777db96d56Sopenharmony_ci names.append(' "%s",\n' % name) 1787db96d56Sopenharmony_ci names.append(' "<N_TOKENS>",\n') 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci if update_file(outfile, token_c_template % ( 1817db96d56Sopenharmony_ci ''.join(names), 1827db96d56Sopenharmony_ci generate_chars_to_token(chars_to_token[1]), 1837db96d56Sopenharmony_ci generate_chars_to_token(chars_to_token[2]), 1847db96d56Sopenharmony_ci generate_chars_to_token(chars_to_token[3]) 1857db96d56Sopenharmony_ci )): 1867db96d56Sopenharmony_ci print("%s regenerated from %s" % (outfile, infile)) 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci 1897db96d56Sopenharmony_citoken_inc_template = """\ 1907db96d56Sopenharmony_ci.. Auto-generated by Tools/scripts/generate_token.py 1917db96d56Sopenharmony_ci%s 1927db96d56Sopenharmony_ci.. data:: N_TOKENS 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci.. data:: NT_OFFSET 1957db96d56Sopenharmony_ci""" 1967db96d56Sopenharmony_ci 1977db96d56Sopenharmony_cidef make_rst(infile, outfile='Doc/library/token-list.inc'): 1987db96d56Sopenharmony_ci tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 1997db96d56Sopenharmony_ci tok_to_string = {value: s for s, value in string_to_tok.items()} 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci names = [] 2027db96d56Sopenharmony_ci for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): 2037db96d56Sopenharmony_ci names.append('.. data:: %s' % (name,)) 2047db96d56Sopenharmony_ci if value in tok_to_string: 2057db96d56Sopenharmony_ci names.append('') 2067db96d56Sopenharmony_ci names.append(' Token value for ``"%s"``.' % tok_to_string[value]) 2077db96d56Sopenharmony_ci names.append('') 2087db96d56Sopenharmony_ci 2097db96d56Sopenharmony_ci if update_file(outfile, token_inc_template % '\n'.join(names)): 2107db96d56Sopenharmony_ci print("%s regenerated from %s" % (outfile, infile)) 2117db96d56Sopenharmony_ci 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_citoken_py_template = '''\ 2147db96d56Sopenharmony_ci"""Token constants.""" 2157db96d56Sopenharmony_ci# Auto-generated by Tools/scripts/generate_token.py 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci%s 2207db96d56Sopenharmony_ciN_TOKENS = %d 2217db96d56Sopenharmony_ci# Special definitions for cooperation with parser 2227db96d56Sopenharmony_ciNT_OFFSET = %d 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_citok_name = {value: name 2257db96d56Sopenharmony_ci for name, value in globals().items() 2267db96d56Sopenharmony_ci if isinstance(value, int) and not name.startswith('_')} 2277db96d56Sopenharmony_ci__all__.extend(tok_name.values()) 2287db96d56Sopenharmony_ci 2297db96d56Sopenharmony_ciEXACT_TOKEN_TYPES = { 2307db96d56Sopenharmony_ci%s 2317db96d56Sopenharmony_ci} 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_cidef ISTERMINAL(x): 2347db96d56Sopenharmony_ci return x < NT_OFFSET 2357db96d56Sopenharmony_ci 2367db96d56Sopenharmony_cidef ISNONTERMINAL(x): 2377db96d56Sopenharmony_ci return x >= NT_OFFSET 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_cidef ISEOF(x): 2407db96d56Sopenharmony_ci return x == ENDMARKER 2417db96d56Sopenharmony_ci''' 2427db96d56Sopenharmony_ci 2437db96d56Sopenharmony_cidef make_py(infile, outfile='Lib/token.py'): 2447db96d56Sopenharmony_ci tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) 2457db96d56Sopenharmony_ci 2467db96d56Sopenharmony_ci constants = [] 2477db96d56Sopenharmony_ci for value, name in enumerate(tok_names): 2487db96d56Sopenharmony_ci constants.append('%s = %d' % (name, value)) 2497db96d56Sopenharmony_ci constants.insert(ERRORTOKEN, 2507db96d56Sopenharmony_ci "# These aren't used by the C tokenizer but are needed for tokenize.py") 2517db96d56Sopenharmony_ci 2527db96d56Sopenharmony_ci token_types = [] 2537db96d56Sopenharmony_ci for s, value in sorted(string_to_tok.items()): 2547db96d56Sopenharmony_ci token_types.append(' %r: %s,' % (s, tok_names[value])) 2557db96d56Sopenharmony_ci 2567db96d56Sopenharmony_ci if update_file(outfile, token_py_template % ( 2577db96d56Sopenharmony_ci '\n'.join(constants), 2587db96d56Sopenharmony_ci len(tok_names), 2597db96d56Sopenharmony_ci NT_OFFSET, 2607db96d56Sopenharmony_ci '\n'.join(token_types), 2617db96d56Sopenharmony_ci )): 2627db96d56Sopenharmony_ci print("%s regenerated from %s" % (outfile, infile)) 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_ci 2657db96d56Sopenharmony_cidef main(op, infile='Grammar/Tokens', *args): 2667db96d56Sopenharmony_ci make = globals()['make_' + op] 2677db96d56Sopenharmony_ci make(infile, *args) 2687db96d56Sopenharmony_ci 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ciif __name__ == '__main__': 2717db96d56Sopenharmony_ci import sys 2727db96d56Sopenharmony_ci main(*sys.argv[1:]) 273