17db96d56Sopenharmony_ci#! /usr/bin/env python3
27db96d56Sopenharmony_ci# This script generates token related files from Grammar/Tokens:
37db96d56Sopenharmony_ci#
47db96d56Sopenharmony_ci#   Doc/library/token-list.inc
57db96d56Sopenharmony_ci#   Include/token.h
67db96d56Sopenharmony_ci#   Parser/token.c
77db96d56Sopenharmony_ci#   Lib/token.py
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciNT_OFFSET = 256
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_cidef load_tokens(path):
137db96d56Sopenharmony_ci    tok_names = []
147db96d56Sopenharmony_ci    string_to_tok = {}
157db96d56Sopenharmony_ci    ERRORTOKEN = None
167db96d56Sopenharmony_ci    with open(path) as fp:
177db96d56Sopenharmony_ci        for line in fp:
187db96d56Sopenharmony_ci            line = line.strip()
197db96d56Sopenharmony_ci            # strip comments
207db96d56Sopenharmony_ci            i = line.find('#')
217db96d56Sopenharmony_ci            if i >= 0:
227db96d56Sopenharmony_ci                line = line[:i].strip()
237db96d56Sopenharmony_ci            if not line:
247db96d56Sopenharmony_ci                continue
257db96d56Sopenharmony_ci            fields = line.split()
267db96d56Sopenharmony_ci            name = fields[0]
277db96d56Sopenharmony_ci            value = len(tok_names)
287db96d56Sopenharmony_ci            if name == 'ERRORTOKEN':
297db96d56Sopenharmony_ci                ERRORTOKEN = value
307db96d56Sopenharmony_ci            string = fields[1] if len(fields) > 1 else None
317db96d56Sopenharmony_ci            if string:
327db96d56Sopenharmony_ci                string = eval(string)
337db96d56Sopenharmony_ci                string_to_tok[string] = value
347db96d56Sopenharmony_ci            tok_names.append(name)
357db96d56Sopenharmony_ci    return tok_names, ERRORTOKEN, string_to_tok
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_cidef update_file(file, content):
397db96d56Sopenharmony_ci    try:
407db96d56Sopenharmony_ci        with open(file, 'r') as fobj:
417db96d56Sopenharmony_ci            if fobj.read() == content:
427db96d56Sopenharmony_ci                return False
437db96d56Sopenharmony_ci    except (OSError, ValueError):
447db96d56Sopenharmony_ci        pass
457db96d56Sopenharmony_ci    with open(file, 'w') as fobj:
467db96d56Sopenharmony_ci        fobj.write(content)
477db96d56Sopenharmony_ci    return True
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_citoken_h_template = """\
517db96d56Sopenharmony_ci/* Auto-generated by Tools/scripts/generate_token.py */
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci/* Token types */
547db96d56Sopenharmony_ci#ifndef Py_LIMITED_API
557db96d56Sopenharmony_ci#ifndef Py_TOKEN_H
567db96d56Sopenharmony_ci#define Py_TOKEN_H
577db96d56Sopenharmony_ci#ifdef __cplusplus
587db96d56Sopenharmony_ciextern "C" {
597db96d56Sopenharmony_ci#endif
607db96d56Sopenharmony_ci
617db96d56Sopenharmony_ci#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci%s\
647db96d56Sopenharmony_ci#define N_TOKENS        %d
657db96d56Sopenharmony_ci#define NT_OFFSET       %d
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci/* Special definitions for cooperation with parser */
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci#define ISTERMINAL(x)           ((x) < NT_OFFSET)
707db96d56Sopenharmony_ci#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
717db96d56Sopenharmony_ci#define ISEOF(x)                ((x) == ENDMARKER)
727db96d56Sopenharmony_ci#define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
737db96d56Sopenharmony_ci                                 (x) == NEWLINE   || \\
747db96d56Sopenharmony_ci                                 (x) == INDENT    || \\
757db96d56Sopenharmony_ci                                 (x) == DEDENT)
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ciPyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
797db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_OneChar(int);
807db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_TwoChars(int, int);
817db96d56Sopenharmony_ciPyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_ci#ifdef __cplusplus
847db96d56Sopenharmony_ci}
857db96d56Sopenharmony_ci#endif
867db96d56Sopenharmony_ci#endif /* !Py_TOKEN_H */
877db96d56Sopenharmony_ci#endif /* Py_LIMITED_API */
887db96d56Sopenharmony_ci"""
897db96d56Sopenharmony_ci
907db96d56Sopenharmony_cidef make_h(infile, outfile='Include/token.h'):
917db96d56Sopenharmony_ci    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci    defines = []
947db96d56Sopenharmony_ci    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
957db96d56Sopenharmony_ci        defines.append("#define %-15s %d\n" % (name, value))
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci    if update_file(outfile, token_h_template % (
987db96d56Sopenharmony_ci            ''.join(defines),
997db96d56Sopenharmony_ci            len(tok_names),
1007db96d56Sopenharmony_ci            NT_OFFSET
1017db96d56Sopenharmony_ci        )):
1027db96d56Sopenharmony_ci        print("%s regenerated from %s" % (outfile, infile))
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_citoken_c_template = """\
1067db96d56Sopenharmony_ci/* Auto-generated by Tools/scripts/generate_token.py */
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci#include "Python.h"
1097db96d56Sopenharmony_ci#include "token.h"
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci/* Token names */
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ciconst char * const _PyParser_TokenNames[] = {
1147db96d56Sopenharmony_ci%s\
1157db96d56Sopenharmony_ci};
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci/* Return the token corresponding to a single character */
1187db96d56Sopenharmony_ci
1197db96d56Sopenharmony_ciint
1207db96d56Sopenharmony_ciPyToken_OneChar(int c1)
1217db96d56Sopenharmony_ci{
1227db96d56Sopenharmony_ci%s\
1237db96d56Sopenharmony_ci    return OP;
1247db96d56Sopenharmony_ci}
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ciint
1277db96d56Sopenharmony_ciPyToken_TwoChars(int c1, int c2)
1287db96d56Sopenharmony_ci{
1297db96d56Sopenharmony_ci%s\
1307db96d56Sopenharmony_ci    return OP;
1317db96d56Sopenharmony_ci}
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_ciint
1347db96d56Sopenharmony_ciPyToken_ThreeChars(int c1, int c2, int c3)
1357db96d56Sopenharmony_ci{
1367db96d56Sopenharmony_ci%s\
1377db96d56Sopenharmony_ci    return OP;
1387db96d56Sopenharmony_ci}
1397db96d56Sopenharmony_ci"""
1407db96d56Sopenharmony_ci
1417db96d56Sopenharmony_cidef generate_chars_to_token(mapping, n=1):
1427db96d56Sopenharmony_ci    result = []
1437db96d56Sopenharmony_ci    write = result.append
1447db96d56Sopenharmony_ci    indent = '    ' * n
1457db96d56Sopenharmony_ci    write(indent)
1467db96d56Sopenharmony_ci    write('switch (c%d) {\n' % (n,))
1477db96d56Sopenharmony_ci    for c in sorted(mapping):
1487db96d56Sopenharmony_ci        write(indent)
1497db96d56Sopenharmony_ci        value = mapping[c]
1507db96d56Sopenharmony_ci        if isinstance(value, dict):
1517db96d56Sopenharmony_ci            write("case '%s':\n" % (c,))
1527db96d56Sopenharmony_ci            write(generate_chars_to_token(value, n + 1))
1537db96d56Sopenharmony_ci            write(indent)
1547db96d56Sopenharmony_ci            write('    break;\n')
1557db96d56Sopenharmony_ci        else:
1567db96d56Sopenharmony_ci            write("case '%s': return %s;\n" % (c, value))
1577db96d56Sopenharmony_ci    write(indent)
1587db96d56Sopenharmony_ci    write('}\n')
1597db96d56Sopenharmony_ci    return ''.join(result)
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_cidef make_c(infile, outfile='Parser/token.c'):
1627db96d56Sopenharmony_ci    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
1637db96d56Sopenharmony_ci    string_to_tok['<>'] = string_to_tok['!=']
1647db96d56Sopenharmony_ci    chars_to_token = {}
1657db96d56Sopenharmony_ci    for string, value in string_to_tok.items():
1667db96d56Sopenharmony_ci        assert 1 <= len(string) <= 3
1677db96d56Sopenharmony_ci        name = tok_names[value]
1687db96d56Sopenharmony_ci        m = chars_to_token.setdefault(len(string), {})
1697db96d56Sopenharmony_ci        for c in string[:-1]:
1707db96d56Sopenharmony_ci            m = m.setdefault(c, {})
1717db96d56Sopenharmony_ci        m[string[-1]] = name
1727db96d56Sopenharmony_ci
1737db96d56Sopenharmony_ci    names = []
1747db96d56Sopenharmony_ci    for value, name in enumerate(tok_names):
1757db96d56Sopenharmony_ci        if value >= ERRORTOKEN:
1767db96d56Sopenharmony_ci            name = '<%s>' % name
1777db96d56Sopenharmony_ci        names.append('    "%s",\n' % name)
1787db96d56Sopenharmony_ci    names.append('    "<N_TOKENS>",\n')
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci    if update_file(outfile, token_c_template % (
1817db96d56Sopenharmony_ci            ''.join(names),
1827db96d56Sopenharmony_ci            generate_chars_to_token(chars_to_token[1]),
1837db96d56Sopenharmony_ci            generate_chars_to_token(chars_to_token[2]),
1847db96d56Sopenharmony_ci            generate_chars_to_token(chars_to_token[3])
1857db96d56Sopenharmony_ci        )):
1867db96d56Sopenharmony_ci        print("%s regenerated from %s" % (outfile, infile))
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ci
1897db96d56Sopenharmony_citoken_inc_template = """\
1907db96d56Sopenharmony_ci.. Auto-generated by Tools/scripts/generate_token.py
1917db96d56Sopenharmony_ci%s
1927db96d56Sopenharmony_ci.. data:: N_TOKENS
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci.. data:: NT_OFFSET
1957db96d56Sopenharmony_ci"""
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_cidef make_rst(infile, outfile='Doc/library/token-list.inc'):
1987db96d56Sopenharmony_ci    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
1997db96d56Sopenharmony_ci    tok_to_string = {value: s for s, value in string_to_tok.items()}
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci    names = []
2027db96d56Sopenharmony_ci    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
2037db96d56Sopenharmony_ci        names.append('.. data:: %s' % (name,))
2047db96d56Sopenharmony_ci        if value in tok_to_string:
2057db96d56Sopenharmony_ci            names.append('')
2067db96d56Sopenharmony_ci            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
2077db96d56Sopenharmony_ci        names.append('')
2087db96d56Sopenharmony_ci
2097db96d56Sopenharmony_ci    if update_file(outfile, token_inc_template % '\n'.join(names)):
2107db96d56Sopenharmony_ci        print("%s regenerated from %s" % (outfile, infile))
2117db96d56Sopenharmony_ci
2127db96d56Sopenharmony_ci
2137db96d56Sopenharmony_citoken_py_template = '''\
2147db96d56Sopenharmony_ci"""Token constants."""
2157db96d56Sopenharmony_ci# Auto-generated by Tools/scripts/generate_token.py
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci%s
2207db96d56Sopenharmony_ciN_TOKENS = %d
2217db96d56Sopenharmony_ci# Special definitions for cooperation with parser
2227db96d56Sopenharmony_ciNT_OFFSET = %d
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_citok_name = {value: name
2257db96d56Sopenharmony_ci            for name, value in globals().items()
2267db96d56Sopenharmony_ci            if isinstance(value, int) and not name.startswith('_')}
2277db96d56Sopenharmony_ci__all__.extend(tok_name.values())
2287db96d56Sopenharmony_ci
2297db96d56Sopenharmony_ciEXACT_TOKEN_TYPES = {
2307db96d56Sopenharmony_ci%s
2317db96d56Sopenharmony_ci}
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_cidef ISTERMINAL(x):
2347db96d56Sopenharmony_ci    return x < NT_OFFSET
2357db96d56Sopenharmony_ci
2367db96d56Sopenharmony_cidef ISNONTERMINAL(x):
2377db96d56Sopenharmony_ci    return x >= NT_OFFSET
2387db96d56Sopenharmony_ci
2397db96d56Sopenharmony_cidef ISEOF(x):
2407db96d56Sopenharmony_ci    return x == ENDMARKER
2417db96d56Sopenharmony_ci'''
2427db96d56Sopenharmony_ci
2437db96d56Sopenharmony_cidef make_py(infile, outfile='Lib/token.py'):
2447db96d56Sopenharmony_ci    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ci    constants = []
2477db96d56Sopenharmony_ci    for value, name in enumerate(tok_names):
2487db96d56Sopenharmony_ci        constants.append('%s = %d' % (name, value))
2497db96d56Sopenharmony_ci    constants.insert(ERRORTOKEN,
2507db96d56Sopenharmony_ci        "# These aren't used by the C tokenizer but are needed for tokenize.py")
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_ci    token_types = []
2537db96d56Sopenharmony_ci    for s, value in sorted(string_to_tok.items()):
2547db96d56Sopenharmony_ci        token_types.append('    %r: %s,' % (s, tok_names[value]))
2557db96d56Sopenharmony_ci
2567db96d56Sopenharmony_ci    if update_file(outfile, token_py_template % (
2577db96d56Sopenharmony_ci            '\n'.join(constants),
2587db96d56Sopenharmony_ci            len(tok_names),
2597db96d56Sopenharmony_ci            NT_OFFSET,
2607db96d56Sopenharmony_ci            '\n'.join(token_types),
2617db96d56Sopenharmony_ci        )):
2627db96d56Sopenharmony_ci        print("%s regenerated from %s" % (outfile, infile))
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_ci
2657db96d56Sopenharmony_cidef main(op, infile='Grammar/Tokens', *args):
2667db96d56Sopenharmony_ci    make = globals()['make_' + op]
2677db96d56Sopenharmony_ci    make(infile, *args)
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci
2707db96d56Sopenharmony_ciif __name__ == '__main__':
2717db96d56Sopenharmony_ci    import sys
2727db96d56Sopenharmony_ci    main(*sys.argv[1:])
273