17db96d56Sopenharmony_ciimport itertools
27db96d56Sopenharmony_ciimport pathlib
37db96d56Sopenharmony_ciimport sys
47db96d56Sopenharmony_ciimport sysconfig
57db96d56Sopenharmony_ciimport tempfile
67db96d56Sopenharmony_ciimport tokenize
77db96d56Sopenharmony_cifrom typing import IO, Dict, List, Optional, Set, Tuple
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_cifrom pegen.c_generator import CParserGenerator
107db96d56Sopenharmony_cifrom pegen.grammar import Grammar
117db96d56Sopenharmony_cifrom pegen.grammar_parser import GeneratedParser as GrammarParser
127db96d56Sopenharmony_cifrom pegen.parser import Parser
137db96d56Sopenharmony_cifrom pegen.parser_generator import ParserGenerator
147db96d56Sopenharmony_cifrom pegen.python_generator import PythonParserGenerator
157db96d56Sopenharmony_cifrom pegen.tokenizer import Tokenizer
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ciMOD_DIR = pathlib.Path(__file__).resolve().parent
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciTokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_cidef get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
237db96d56Sopenharmony_ci    flags = sysconfig.get_config_var(compiler_flags)
247db96d56Sopenharmony_ci    py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
257db96d56Sopenharmony_ci    if flags is None or py_flags_nodist is None:
267db96d56Sopenharmony_ci        return []
277db96d56Sopenharmony_ci    return f"{flags} {py_flags_nodist}".split()
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_cidef compile_c_extension(
317db96d56Sopenharmony_ci    generated_source_path: str,
327db96d56Sopenharmony_ci    build_dir: Optional[str] = None,
337db96d56Sopenharmony_ci    verbose: bool = False,
347db96d56Sopenharmony_ci    keep_asserts: bool = True,
357db96d56Sopenharmony_ci    disable_optimization: bool = False,
367db96d56Sopenharmony_ci    library_dir: Optional[str] = None,
377db96d56Sopenharmony_ci) -> str:
387db96d56Sopenharmony_ci    """Compile the generated source for a parser generator into an extension module.
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    The extension module will be generated in the same directory as the provided path
417db96d56Sopenharmony_ci    for the generated source, with the same basename (in addition to extension module
427db96d56Sopenharmony_ci    metadata). For example, for the source mydir/parser.c the generated extension
437db96d56Sopenharmony_ci    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci    If *build_dir* is provided, that path will be used as the temporary build directory
467db96d56Sopenharmony_ci    of distutils (this is useful in case you want to use a temporary directory).
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci    If *library_dir* is provided, that path will be used as the directory for a
497db96d56Sopenharmony_ci    static library of the common parser sources (this is useful in case you are
507db96d56Sopenharmony_ci    creating multiple extensions).
517db96d56Sopenharmony_ci    """
527db96d56Sopenharmony_ci    import distutils.log
537db96d56Sopenharmony_ci    from distutils.core import Distribution, Extension
547db96d56Sopenharmony_ci    from distutils.tests.support import fixup_build_ext  # type: ignore
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci    from distutils.ccompiler import new_compiler
577db96d56Sopenharmony_ci    from distutils.dep_util import newer_group
587db96d56Sopenharmony_ci    from distutils.sysconfig import customize_compiler
597db96d56Sopenharmony_ci
607db96d56Sopenharmony_ci    if verbose:
617db96d56Sopenharmony_ci        distutils.log.set_threshold(distutils.log.DEBUG)
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci    source_file_path = pathlib.Path(generated_source_path)
647db96d56Sopenharmony_ci    extension_name = source_file_path.stem
657db96d56Sopenharmony_ci    extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
667db96d56Sopenharmony_ci    extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
677db96d56Sopenharmony_ci    # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
687db96d56Sopenharmony_ci    extra_compile_args.append("-D_Py_TEST_PEGEN")
697db96d56Sopenharmony_ci    extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
707db96d56Sopenharmony_ci    if keep_asserts:
717db96d56Sopenharmony_ci        extra_compile_args.append("-UNDEBUG")
727db96d56Sopenharmony_ci    if disable_optimization:
737db96d56Sopenharmony_ci        if sys.platform == 'win32':
747db96d56Sopenharmony_ci            extra_compile_args.append("/Od")
757db96d56Sopenharmony_ci            extra_link_args.append("/LTCG:OFF")
767db96d56Sopenharmony_ci        else:
777db96d56Sopenharmony_ci            extra_compile_args.append("-O0")
787db96d56Sopenharmony_ci            if sysconfig.get_config_var("GNULD") == "yes":
797db96d56Sopenharmony_ci                extra_link_args.append("-fno-lto")
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci    common_sources = [
827db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
837db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
847db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
857db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
867db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
877db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
887db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
897db96d56Sopenharmony_ci        str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
907db96d56Sopenharmony_ci    ]
917db96d56Sopenharmony_ci    include_dirs = [
927db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
937db96d56Sopenharmony_ci        str(MOD_DIR.parent.parent.parent / "Parser"),
947db96d56Sopenharmony_ci    ]
957db96d56Sopenharmony_ci    extension = Extension(
967db96d56Sopenharmony_ci        extension_name,
977db96d56Sopenharmony_ci        sources=[generated_source_path],
987db96d56Sopenharmony_ci        extra_compile_args=extra_compile_args,
997db96d56Sopenharmony_ci        extra_link_args=extra_link_args,
1007db96d56Sopenharmony_ci    )
1017db96d56Sopenharmony_ci    dist = Distribution({"name": extension_name, "ext_modules": [extension]})
1027db96d56Sopenharmony_ci    cmd = dist.get_command_obj("build_ext")
1037db96d56Sopenharmony_ci    fixup_build_ext(cmd)
1047db96d56Sopenharmony_ci    cmd.build_lib = str(source_file_path.parent)
1057db96d56Sopenharmony_ci    cmd.include_dirs = include_dirs
1067db96d56Sopenharmony_ci    if build_dir:
1077db96d56Sopenharmony_ci        cmd.build_temp = build_dir
1087db96d56Sopenharmony_ci    cmd.ensure_finalized()
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_ci    compiler = new_compiler()
1117db96d56Sopenharmony_ci    customize_compiler(compiler)
1127db96d56Sopenharmony_ci    compiler.set_include_dirs(cmd.include_dirs)
1137db96d56Sopenharmony_ci    compiler.set_library_dirs(cmd.library_dirs)
1147db96d56Sopenharmony_ci    # build static lib
1157db96d56Sopenharmony_ci    if library_dir:
1167db96d56Sopenharmony_ci        library_filename = compiler.library_filename(extension_name,
1177db96d56Sopenharmony_ci                                                     output_dir=library_dir)
1187db96d56Sopenharmony_ci        if newer_group(common_sources, library_filename, 'newer'):
1197db96d56Sopenharmony_ci            if sys.platform == 'win32':
1207db96d56Sopenharmony_ci                pdb = compiler.static_lib_format % (extension_name, '.pdb')
1217db96d56Sopenharmony_ci                compile_opts = [f"/Fd{library_dir}\\{pdb}"]
1227db96d56Sopenharmony_ci                compile_opts.extend(extra_compile_args)
1237db96d56Sopenharmony_ci            else:
1247db96d56Sopenharmony_ci                compile_opts = extra_compile_args
1257db96d56Sopenharmony_ci            objects = compiler.compile(common_sources,
1267db96d56Sopenharmony_ci                                       output_dir=library_dir,
1277db96d56Sopenharmony_ci                                       debug=cmd.debug,
1287db96d56Sopenharmony_ci                                       extra_postargs=compile_opts)
1297db96d56Sopenharmony_ci            compiler.create_static_lib(objects, extension_name,
1307db96d56Sopenharmony_ci                                       output_dir=library_dir,
1317db96d56Sopenharmony_ci                                       debug=cmd.debug)
1327db96d56Sopenharmony_ci        if sys.platform == 'win32':
1337db96d56Sopenharmony_ci            compiler.add_library_dir(library_dir)
1347db96d56Sopenharmony_ci            extension.libraries = [extension_name]
1357db96d56Sopenharmony_ci        elif sys.platform == 'darwin':
1367db96d56Sopenharmony_ci            compiler.set_link_objects([
1377db96d56Sopenharmony_ci                '-Wl,-force_load', library_filename,
1387db96d56Sopenharmony_ci            ])
1397db96d56Sopenharmony_ci        else:
1407db96d56Sopenharmony_ci            compiler.set_link_objects([
1417db96d56Sopenharmony_ci                '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
1427db96d56Sopenharmony_ci            ])
1437db96d56Sopenharmony_ci    else:
1447db96d56Sopenharmony_ci        extension.sources[0:0] = common_sources
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci    # Compile the source code to object files.
1477db96d56Sopenharmony_ci    ext_path = cmd.get_ext_fullpath(extension_name)
1487db96d56Sopenharmony_ci    if newer_group(extension.sources, ext_path, 'newer'):
1497db96d56Sopenharmony_ci        objects = compiler.compile(extension.sources,
1507db96d56Sopenharmony_ci                                    output_dir=cmd.build_temp,
1517db96d56Sopenharmony_ci                                    debug=cmd.debug,
1527db96d56Sopenharmony_ci                                    extra_postargs=extra_compile_args)
1537db96d56Sopenharmony_ci    else:
1547db96d56Sopenharmony_ci        objects = compiler.object_filenames(extension.sources,
1557db96d56Sopenharmony_ci                                            output_dir=cmd.build_temp)
1567db96d56Sopenharmony_ci    # Now link the object files together into a "shared object"
1577db96d56Sopenharmony_ci    compiler.link_shared_object(
1587db96d56Sopenharmony_ci        objects, ext_path,
1597db96d56Sopenharmony_ci        libraries=cmd.get_libraries(extension),
1607db96d56Sopenharmony_ci        extra_postargs=extra_link_args,
1617db96d56Sopenharmony_ci        export_symbols=cmd.get_export_symbols(extension),
1627db96d56Sopenharmony_ci        debug=cmd.debug,
1637db96d56Sopenharmony_ci        build_temp=cmd.build_temp)
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci    return pathlib.Path(ext_path)
1667db96d56Sopenharmony_ci
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_cidef build_parser(
1697db96d56Sopenharmony_ci    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
1707db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer]:
1717db96d56Sopenharmony_ci    with open(grammar_file) as file:
1727db96d56Sopenharmony_ci        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
1737db96d56Sopenharmony_ci        parser = GrammarParser(tokenizer, verbose=verbose_parser)
1747db96d56Sopenharmony_ci        grammar = parser.start()
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci        if not grammar:
1777db96d56Sopenharmony_ci            raise parser.make_syntax_error(grammar_file)
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci    return grammar, parser, tokenizer
1807db96d56Sopenharmony_ci
1817db96d56Sopenharmony_ci
1827db96d56Sopenharmony_cidef generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
1837db96d56Sopenharmony_ci    all_tokens = {}
1847db96d56Sopenharmony_ci    exact_tokens = {}
1857db96d56Sopenharmony_ci    non_exact_tokens = set()
1867db96d56Sopenharmony_ci    numbers = itertools.count(0)
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ci    for line in tokens:
1897db96d56Sopenharmony_ci        line = line.strip()
1907db96d56Sopenharmony_ci
1917db96d56Sopenharmony_ci        if not line or line.startswith("#"):
1927db96d56Sopenharmony_ci            continue
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci        pieces = line.split()
1957db96d56Sopenharmony_ci        index = next(numbers)
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_ci        if len(pieces) == 1:
1987db96d56Sopenharmony_ci            (token,) = pieces
1997db96d56Sopenharmony_ci            non_exact_tokens.add(token)
2007db96d56Sopenharmony_ci            all_tokens[index] = token
2017db96d56Sopenharmony_ci        elif len(pieces) == 2:
2027db96d56Sopenharmony_ci            token, op = pieces
2037db96d56Sopenharmony_ci            exact_tokens[op.strip("'")] = index
2047db96d56Sopenharmony_ci            all_tokens[index] = token
2057db96d56Sopenharmony_ci        else:
2067db96d56Sopenharmony_ci            raise ValueError(f"Unexpected line found in Tokens file: {line}")
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci    return all_tokens, exact_tokens, non_exact_tokens
2097db96d56Sopenharmony_ci
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_cidef build_c_generator(
2127db96d56Sopenharmony_ci    grammar: Grammar,
2137db96d56Sopenharmony_ci    grammar_file: str,
2147db96d56Sopenharmony_ci    tokens_file: str,
2157db96d56Sopenharmony_ci    output_file: str,
2167db96d56Sopenharmony_ci    compile_extension: bool = False,
2177db96d56Sopenharmony_ci    verbose_c_extension: bool = False,
2187db96d56Sopenharmony_ci    keep_asserts_in_extension: bool = True,
2197db96d56Sopenharmony_ci    skip_actions: bool = False,
2207db96d56Sopenharmony_ci) -> ParserGenerator:
2217db96d56Sopenharmony_ci    with open(tokens_file, "r") as tok_file:
2227db96d56Sopenharmony_ci        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
2237db96d56Sopenharmony_ci    with open(output_file, "w") as file:
2247db96d56Sopenharmony_ci        gen: ParserGenerator = CParserGenerator(
2257db96d56Sopenharmony_ci            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
2267db96d56Sopenharmony_ci        )
2277db96d56Sopenharmony_ci        gen.generate(grammar_file)
2287db96d56Sopenharmony_ci
2297db96d56Sopenharmony_ci    if compile_extension:
2307db96d56Sopenharmony_ci        with tempfile.TemporaryDirectory() as build_dir:
2317db96d56Sopenharmony_ci            compile_c_extension(
2327db96d56Sopenharmony_ci                output_file,
2337db96d56Sopenharmony_ci                build_dir=build_dir,
2347db96d56Sopenharmony_ci                verbose=verbose_c_extension,
2357db96d56Sopenharmony_ci                keep_asserts=keep_asserts_in_extension,
2367db96d56Sopenharmony_ci            )
2377db96d56Sopenharmony_ci    return gen
2387db96d56Sopenharmony_ci
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_cidef build_python_generator(
2417db96d56Sopenharmony_ci    grammar: Grammar,
2427db96d56Sopenharmony_ci    grammar_file: str,
2437db96d56Sopenharmony_ci    output_file: str,
2447db96d56Sopenharmony_ci    skip_actions: bool = False,
2457db96d56Sopenharmony_ci) -> ParserGenerator:
2467db96d56Sopenharmony_ci    with open(output_file, "w") as file:
2477db96d56Sopenharmony_ci        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
2487db96d56Sopenharmony_ci        gen.generate(grammar_file)
2497db96d56Sopenharmony_ci    return gen
2507db96d56Sopenharmony_ci
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_cidef build_c_parser_and_generator(
2537db96d56Sopenharmony_ci    grammar_file: str,
2547db96d56Sopenharmony_ci    tokens_file: str,
2557db96d56Sopenharmony_ci    output_file: str,
2567db96d56Sopenharmony_ci    compile_extension: bool = False,
2577db96d56Sopenharmony_ci    verbose_tokenizer: bool = False,
2587db96d56Sopenharmony_ci    verbose_parser: bool = False,
2597db96d56Sopenharmony_ci    verbose_c_extension: bool = False,
2607db96d56Sopenharmony_ci    keep_asserts_in_extension: bool = True,
2617db96d56Sopenharmony_ci    skip_actions: bool = False,
2627db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
2637db96d56Sopenharmony_ci    """Generate rules, C parser, tokenizer, parser generator for a given grammar
2647db96d56Sopenharmony_ci
2657db96d56Sopenharmony_ci    Args:
2667db96d56Sopenharmony_ci        grammar_file (string): Path for the grammar file
2677db96d56Sopenharmony_ci        tokens_file (string): Path for the tokens file
2687db96d56Sopenharmony_ci        output_file (string): Path for the output file
2697db96d56Sopenharmony_ci        compile_extension (bool, optional): Whether to compile the C extension.
2707db96d56Sopenharmony_ci          Defaults to False.
2717db96d56Sopenharmony_ci        verbose_tokenizer (bool, optional): Whether to display additional output
2727db96d56Sopenharmony_ci          when generating the tokenizer. Defaults to False.
2737db96d56Sopenharmony_ci        verbose_parser (bool, optional): Whether to display additional output
2747db96d56Sopenharmony_ci          when generating the parser. Defaults to False.
2757db96d56Sopenharmony_ci        verbose_c_extension (bool, optional): Whether to display additional
2767db96d56Sopenharmony_ci          output when compiling the C extension . Defaults to False.
2777db96d56Sopenharmony_ci        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
2787db96d56Sopenharmony_ci          when compiling the extension module. Defaults to True.
2797db96d56Sopenharmony_ci        skip_actions (bool, optional): Whether to pretend no rule has any actions.
2807db96d56Sopenharmony_ci    """
2817db96d56Sopenharmony_ci    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
2827db96d56Sopenharmony_ci    gen = build_c_generator(
2837db96d56Sopenharmony_ci        grammar,
2847db96d56Sopenharmony_ci        grammar_file,
2857db96d56Sopenharmony_ci        tokens_file,
2867db96d56Sopenharmony_ci        output_file,
2877db96d56Sopenharmony_ci        compile_extension,
2887db96d56Sopenharmony_ci        verbose_c_extension,
2897db96d56Sopenharmony_ci        keep_asserts_in_extension,
2907db96d56Sopenharmony_ci        skip_actions=skip_actions,
2917db96d56Sopenharmony_ci    )
2927db96d56Sopenharmony_ci
2937db96d56Sopenharmony_ci    return grammar, parser, tokenizer, gen
2947db96d56Sopenharmony_ci
2957db96d56Sopenharmony_ci
2967db96d56Sopenharmony_cidef build_python_parser_and_generator(
2977db96d56Sopenharmony_ci    grammar_file: str,
2987db96d56Sopenharmony_ci    output_file: str,
2997db96d56Sopenharmony_ci    verbose_tokenizer: bool = False,
3007db96d56Sopenharmony_ci    verbose_parser: bool = False,
3017db96d56Sopenharmony_ci    skip_actions: bool = False,
3027db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
3037db96d56Sopenharmony_ci    """Generate rules, python parser, tokenizer, parser generator for a given grammar
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci    Args:
3067db96d56Sopenharmony_ci        grammar_file (string): Path for the grammar file
3077db96d56Sopenharmony_ci        output_file (string): Path for the output file
3087db96d56Sopenharmony_ci        verbose_tokenizer (bool, optional): Whether to display additional output
3097db96d56Sopenharmony_ci          when generating the tokenizer. Defaults to False.
3107db96d56Sopenharmony_ci        verbose_parser (bool, optional): Whether to display additional output
3117db96d56Sopenharmony_ci          when generating the parser. Defaults to False.
3127db96d56Sopenharmony_ci        skip_actions (bool, optional): Whether to pretend no rule has any actions.
3137db96d56Sopenharmony_ci    """
3147db96d56Sopenharmony_ci    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
3157db96d56Sopenharmony_ci    gen = build_python_generator(
3167db96d56Sopenharmony_ci        grammar,
3177db96d56Sopenharmony_ci        grammar_file,
3187db96d56Sopenharmony_ci        output_file,
3197db96d56Sopenharmony_ci        skip_actions=skip_actions,
3207db96d56Sopenharmony_ci    )
3217db96d56Sopenharmony_ci    return grammar, parser, tokenizer, gen
322