17db96d56Sopenharmony_ciimport itertools 27db96d56Sopenharmony_ciimport pathlib 37db96d56Sopenharmony_ciimport sys 47db96d56Sopenharmony_ciimport sysconfig 57db96d56Sopenharmony_ciimport tempfile 67db96d56Sopenharmony_ciimport tokenize 77db96d56Sopenharmony_cifrom typing import IO, Dict, List, Optional, Set, Tuple 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_cifrom pegen.c_generator import CParserGenerator 107db96d56Sopenharmony_cifrom pegen.grammar import Grammar 117db96d56Sopenharmony_cifrom pegen.grammar_parser import GeneratedParser as GrammarParser 127db96d56Sopenharmony_cifrom pegen.parser import Parser 137db96d56Sopenharmony_cifrom pegen.parser_generator import ParserGenerator 147db96d56Sopenharmony_cifrom pegen.python_generator import PythonParserGenerator 157db96d56Sopenharmony_cifrom pegen.tokenizer import Tokenizer 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ciMOD_DIR = pathlib.Path(__file__).resolve().parent 187db96d56Sopenharmony_ci 197db96d56Sopenharmony_ciTokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] 207db96d56Sopenharmony_ci 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_cidef get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: 237db96d56Sopenharmony_ci flags = sysconfig.get_config_var(compiler_flags) 247db96d56Sopenharmony_ci py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) 257db96d56Sopenharmony_ci if flags is None or py_flags_nodist is None: 267db96d56Sopenharmony_ci return [] 277db96d56Sopenharmony_ci return f"{flags} {py_flags_nodist}".split() 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_cidef compile_c_extension( 317db96d56Sopenharmony_ci generated_source_path: str, 327db96d56Sopenharmony_ci build_dir: Optional[str] = None, 337db96d56Sopenharmony_ci verbose: bool = False, 347db96d56Sopenharmony_ci keep_asserts: bool = True, 357db96d56Sopenharmony_ci disable_optimization: bool = False, 367db96d56Sopenharmony_ci library_dir: Optional[str] = None, 377db96d56Sopenharmony_ci) -> str: 387db96d56Sopenharmony_ci """Compile the generated source for a parser generator into an extension module. 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci The extension module will be generated in the same directory as the provided path 417db96d56Sopenharmony_ci for the generated source, with the same basename (in addition to extension module 427db96d56Sopenharmony_ci metadata). For example, for the source mydir/parser.c the generated extension 437db96d56Sopenharmony_ci in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci If *build_dir* is provided, that path will be used as the temporary build directory 467db96d56Sopenharmony_ci of distutils (this is useful in case you want to use a temporary directory). 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci If *library_dir* is provided, that path will be used as the directory for a 497db96d56Sopenharmony_ci static library of the common parser sources (this is useful in case you are 507db96d56Sopenharmony_ci creating multiple extensions). 517db96d56Sopenharmony_ci """ 527db96d56Sopenharmony_ci import distutils.log 537db96d56Sopenharmony_ci from distutils.core import Distribution, Extension 547db96d56Sopenharmony_ci from distutils.tests.support import fixup_build_ext # type: ignore 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci from distutils.ccompiler import new_compiler 577db96d56Sopenharmony_ci from distutils.dep_util import newer_group 587db96d56Sopenharmony_ci from distutils.sysconfig import customize_compiler 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_ci if verbose: 617db96d56Sopenharmony_ci distutils.log.set_threshold(distutils.log.DEBUG) 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_ci source_file_path = pathlib.Path(generated_source_path) 647db96d56Sopenharmony_ci extension_name = source_file_path.stem 657db96d56Sopenharmony_ci extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") 667db96d56Sopenharmony_ci extra_compile_args.append("-DPy_BUILD_CORE_MODULE") 677db96d56Sopenharmony_ci # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c 687db96d56Sopenharmony_ci extra_compile_args.append("-D_Py_TEST_PEGEN") 697db96d56Sopenharmony_ci extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") 707db96d56Sopenharmony_ci if keep_asserts: 717db96d56Sopenharmony_ci extra_compile_args.append("-UNDEBUG") 727db96d56Sopenharmony_ci if disable_optimization: 737db96d56Sopenharmony_ci if sys.platform == 'win32': 747db96d56Sopenharmony_ci extra_compile_args.append("/Od") 757db96d56Sopenharmony_ci extra_link_args.append("/LTCG:OFF") 767db96d56Sopenharmony_ci else: 777db96d56Sopenharmony_ci extra_compile_args.append("-O0") 787db96d56Sopenharmony_ci if sysconfig.get_config_var("GNULD") == "yes": 797db96d56Sopenharmony_ci extra_link_args.append("-fno-lto") 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci common_sources = [ 827db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), 837db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), 847db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), 857db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"), 867db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"), 877db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"), 887db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"), 897db96d56Sopenharmony_ci str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), 907db96d56Sopenharmony_ci ] 917db96d56Sopenharmony_ci include_dirs = [ 927db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Include" / "internal"), 937db96d56Sopenharmony_ci str(MOD_DIR.parent.parent.parent / "Parser"), 947db96d56Sopenharmony_ci ] 957db96d56Sopenharmony_ci extension = Extension( 967db96d56Sopenharmony_ci extension_name, 977db96d56Sopenharmony_ci sources=[generated_source_path], 987db96d56Sopenharmony_ci extra_compile_args=extra_compile_args, 997db96d56Sopenharmony_ci extra_link_args=extra_link_args, 1007db96d56Sopenharmony_ci ) 1017db96d56Sopenharmony_ci dist = Distribution({"name": extension_name, "ext_modules": [extension]}) 1027db96d56Sopenharmony_ci cmd = dist.get_command_obj("build_ext") 1037db96d56Sopenharmony_ci fixup_build_ext(cmd) 1047db96d56Sopenharmony_ci cmd.build_lib = str(source_file_path.parent) 1057db96d56Sopenharmony_ci cmd.include_dirs = include_dirs 1067db96d56Sopenharmony_ci if build_dir: 1077db96d56Sopenharmony_ci cmd.build_temp = build_dir 1087db96d56Sopenharmony_ci cmd.ensure_finalized() 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci compiler = new_compiler() 1117db96d56Sopenharmony_ci customize_compiler(compiler) 1127db96d56Sopenharmony_ci compiler.set_include_dirs(cmd.include_dirs) 1137db96d56Sopenharmony_ci compiler.set_library_dirs(cmd.library_dirs) 1147db96d56Sopenharmony_ci # build static lib 1157db96d56Sopenharmony_ci if library_dir: 1167db96d56Sopenharmony_ci library_filename = compiler.library_filename(extension_name, 1177db96d56Sopenharmony_ci output_dir=library_dir) 1187db96d56Sopenharmony_ci if newer_group(common_sources, library_filename, 'newer'): 1197db96d56Sopenharmony_ci if sys.platform == 'win32': 1207db96d56Sopenharmony_ci pdb = compiler.static_lib_format % (extension_name, '.pdb') 1217db96d56Sopenharmony_ci compile_opts = [f"/Fd{library_dir}\\{pdb}"] 1227db96d56Sopenharmony_ci compile_opts.extend(extra_compile_args) 1237db96d56Sopenharmony_ci else: 1247db96d56Sopenharmony_ci compile_opts = extra_compile_args 1257db96d56Sopenharmony_ci objects = compiler.compile(common_sources, 1267db96d56Sopenharmony_ci output_dir=library_dir, 1277db96d56Sopenharmony_ci debug=cmd.debug, 1287db96d56Sopenharmony_ci extra_postargs=compile_opts) 1297db96d56Sopenharmony_ci compiler.create_static_lib(objects, extension_name, 1307db96d56Sopenharmony_ci output_dir=library_dir, 1317db96d56Sopenharmony_ci debug=cmd.debug) 1327db96d56Sopenharmony_ci if sys.platform == 'win32': 1337db96d56Sopenharmony_ci compiler.add_library_dir(library_dir) 1347db96d56Sopenharmony_ci extension.libraries = [extension_name] 1357db96d56Sopenharmony_ci elif sys.platform == 'darwin': 1367db96d56Sopenharmony_ci compiler.set_link_objects([ 1377db96d56Sopenharmony_ci '-Wl,-force_load', library_filename, 1387db96d56Sopenharmony_ci ]) 1397db96d56Sopenharmony_ci else: 1407db96d56Sopenharmony_ci compiler.set_link_objects([ 1417db96d56Sopenharmony_ci '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive', 1427db96d56Sopenharmony_ci ]) 1437db96d56Sopenharmony_ci else: 1447db96d56Sopenharmony_ci extension.sources[0:0] = common_sources 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci # Compile the source code to object files. 1477db96d56Sopenharmony_ci ext_path = cmd.get_ext_fullpath(extension_name) 1487db96d56Sopenharmony_ci if newer_group(extension.sources, ext_path, 'newer'): 1497db96d56Sopenharmony_ci objects = compiler.compile(extension.sources, 1507db96d56Sopenharmony_ci output_dir=cmd.build_temp, 1517db96d56Sopenharmony_ci debug=cmd.debug, 1527db96d56Sopenharmony_ci extra_postargs=extra_compile_args) 1537db96d56Sopenharmony_ci else: 1547db96d56Sopenharmony_ci objects = compiler.object_filenames(extension.sources, 1557db96d56Sopenharmony_ci output_dir=cmd.build_temp) 1567db96d56Sopenharmony_ci # Now link the object files together into a "shared object" 1577db96d56Sopenharmony_ci compiler.link_shared_object( 1587db96d56Sopenharmony_ci objects, ext_path, 1597db96d56Sopenharmony_ci libraries=cmd.get_libraries(extension), 1607db96d56Sopenharmony_ci extra_postargs=extra_link_args, 1617db96d56Sopenharmony_ci export_symbols=cmd.get_export_symbols(extension), 1627db96d56Sopenharmony_ci debug=cmd.debug, 1637db96d56Sopenharmony_ci build_temp=cmd.build_temp) 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci return pathlib.Path(ext_path) 1667db96d56Sopenharmony_ci 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_cidef build_parser( 1697db96d56Sopenharmony_ci grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False 1707db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer]: 1717db96d56Sopenharmony_ci with open(grammar_file) as file: 1727db96d56Sopenharmony_ci tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) 1737db96d56Sopenharmony_ci parser = GrammarParser(tokenizer, verbose=verbose_parser) 1747db96d56Sopenharmony_ci grammar = parser.start() 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci if not grammar: 1777db96d56Sopenharmony_ci raise parser.make_syntax_error(grammar_file) 1787db96d56Sopenharmony_ci 1797db96d56Sopenharmony_ci return grammar, parser, tokenizer 1807db96d56Sopenharmony_ci 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_cidef generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: 1837db96d56Sopenharmony_ci all_tokens = {} 1847db96d56Sopenharmony_ci exact_tokens = {} 1857db96d56Sopenharmony_ci non_exact_tokens = set() 1867db96d56Sopenharmony_ci numbers = itertools.count(0) 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci for line in tokens: 1897db96d56Sopenharmony_ci line = line.strip() 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ci if not line or line.startswith("#"): 1927db96d56Sopenharmony_ci continue 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci pieces = line.split() 1957db96d56Sopenharmony_ci index = next(numbers) 1967db96d56Sopenharmony_ci 1977db96d56Sopenharmony_ci if len(pieces) == 1: 1987db96d56Sopenharmony_ci (token,) = pieces 1997db96d56Sopenharmony_ci non_exact_tokens.add(token) 2007db96d56Sopenharmony_ci all_tokens[index] = token 2017db96d56Sopenharmony_ci elif len(pieces) == 2: 2027db96d56Sopenharmony_ci token, op = pieces 2037db96d56Sopenharmony_ci exact_tokens[op.strip("'")] = index 2047db96d56Sopenharmony_ci all_tokens[index] = token 2057db96d56Sopenharmony_ci else: 2067db96d56Sopenharmony_ci raise ValueError(f"Unexpected line found in Tokens file: {line}") 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci return all_tokens, exact_tokens, non_exact_tokens 2097db96d56Sopenharmony_ci 2107db96d56Sopenharmony_ci 2117db96d56Sopenharmony_cidef build_c_generator( 2127db96d56Sopenharmony_ci grammar: Grammar, 2137db96d56Sopenharmony_ci grammar_file: str, 2147db96d56Sopenharmony_ci tokens_file: str, 2157db96d56Sopenharmony_ci output_file: str, 2167db96d56Sopenharmony_ci compile_extension: bool = False, 2177db96d56Sopenharmony_ci verbose_c_extension: bool = False, 2187db96d56Sopenharmony_ci keep_asserts_in_extension: bool = True, 2197db96d56Sopenharmony_ci skip_actions: bool = False, 2207db96d56Sopenharmony_ci) -> ParserGenerator: 2217db96d56Sopenharmony_ci with open(tokens_file, "r") as tok_file: 2227db96d56Sopenharmony_ci all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) 2237db96d56Sopenharmony_ci with open(output_file, "w") as file: 2247db96d56Sopenharmony_ci gen: ParserGenerator = CParserGenerator( 2257db96d56Sopenharmony_ci grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions 2267db96d56Sopenharmony_ci ) 2277db96d56Sopenharmony_ci gen.generate(grammar_file) 2287db96d56Sopenharmony_ci 2297db96d56Sopenharmony_ci if compile_extension: 2307db96d56Sopenharmony_ci with tempfile.TemporaryDirectory() as build_dir: 2317db96d56Sopenharmony_ci compile_c_extension( 2327db96d56Sopenharmony_ci output_file, 2337db96d56Sopenharmony_ci build_dir=build_dir, 2347db96d56Sopenharmony_ci verbose=verbose_c_extension, 2357db96d56Sopenharmony_ci keep_asserts=keep_asserts_in_extension, 2367db96d56Sopenharmony_ci ) 2377db96d56Sopenharmony_ci return gen 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_cidef build_python_generator( 2417db96d56Sopenharmony_ci grammar: Grammar, 2427db96d56Sopenharmony_ci grammar_file: str, 2437db96d56Sopenharmony_ci output_file: str, 2447db96d56Sopenharmony_ci skip_actions: bool = False, 2457db96d56Sopenharmony_ci) -> ParserGenerator: 2467db96d56Sopenharmony_ci with open(output_file, "w") as file: 2477db96d56Sopenharmony_ci gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions 2487db96d56Sopenharmony_ci gen.generate(grammar_file) 2497db96d56Sopenharmony_ci return gen 2507db96d56Sopenharmony_ci 2517db96d56Sopenharmony_ci 2527db96d56Sopenharmony_cidef build_c_parser_and_generator( 2537db96d56Sopenharmony_ci grammar_file: str, 2547db96d56Sopenharmony_ci tokens_file: str, 2557db96d56Sopenharmony_ci output_file: str, 2567db96d56Sopenharmony_ci compile_extension: bool = False, 2577db96d56Sopenharmony_ci verbose_tokenizer: bool = False, 2587db96d56Sopenharmony_ci verbose_parser: bool = False, 2597db96d56Sopenharmony_ci verbose_c_extension: bool = False, 2607db96d56Sopenharmony_ci keep_asserts_in_extension: bool = True, 2617db96d56Sopenharmony_ci skip_actions: bool = False, 2627db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 2637db96d56Sopenharmony_ci """Generate rules, C parser, tokenizer, parser generator for a given grammar 2647db96d56Sopenharmony_ci 2657db96d56Sopenharmony_ci Args: 2667db96d56Sopenharmony_ci grammar_file (string): Path for the grammar file 2677db96d56Sopenharmony_ci tokens_file (string): Path for the tokens file 2687db96d56Sopenharmony_ci output_file (string): Path for the output file 2697db96d56Sopenharmony_ci compile_extension (bool, optional): Whether to compile the C extension. 2707db96d56Sopenharmony_ci Defaults to False. 2717db96d56Sopenharmony_ci verbose_tokenizer (bool, optional): Whether to display additional output 2727db96d56Sopenharmony_ci when generating the tokenizer. Defaults to False. 2737db96d56Sopenharmony_ci verbose_parser (bool, optional): Whether to display additional output 2747db96d56Sopenharmony_ci when generating the parser. Defaults to False. 2757db96d56Sopenharmony_ci verbose_c_extension (bool, optional): Whether to display additional 2767db96d56Sopenharmony_ci output when compiling the C extension . Defaults to False. 2777db96d56Sopenharmony_ci keep_asserts_in_extension (bool, optional): Whether to keep the assert statements 2787db96d56Sopenharmony_ci when compiling the extension module. Defaults to True. 2797db96d56Sopenharmony_ci skip_actions (bool, optional): Whether to pretend no rule has any actions. 2807db96d56Sopenharmony_ci """ 2817db96d56Sopenharmony_ci grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 2827db96d56Sopenharmony_ci gen = build_c_generator( 2837db96d56Sopenharmony_ci grammar, 2847db96d56Sopenharmony_ci grammar_file, 2857db96d56Sopenharmony_ci tokens_file, 2867db96d56Sopenharmony_ci output_file, 2877db96d56Sopenharmony_ci compile_extension, 2887db96d56Sopenharmony_ci verbose_c_extension, 2897db96d56Sopenharmony_ci keep_asserts_in_extension, 2907db96d56Sopenharmony_ci skip_actions=skip_actions, 2917db96d56Sopenharmony_ci ) 2927db96d56Sopenharmony_ci 2937db96d56Sopenharmony_ci return grammar, parser, tokenizer, gen 2947db96d56Sopenharmony_ci 2957db96d56Sopenharmony_ci 2967db96d56Sopenharmony_cidef build_python_parser_and_generator( 2977db96d56Sopenharmony_ci grammar_file: str, 2987db96d56Sopenharmony_ci output_file: str, 2997db96d56Sopenharmony_ci verbose_tokenizer: bool = False, 3007db96d56Sopenharmony_ci verbose_parser: bool = False, 3017db96d56Sopenharmony_ci skip_actions: bool = False, 3027db96d56Sopenharmony_ci) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 3037db96d56Sopenharmony_ci """Generate rules, python parser, tokenizer, parser generator for a given grammar 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci Args: 3067db96d56Sopenharmony_ci grammar_file (string): Path for the grammar file 3077db96d56Sopenharmony_ci output_file (string): Path for the output file 3087db96d56Sopenharmony_ci verbose_tokenizer (bool, optional): Whether to display additional output 3097db96d56Sopenharmony_ci when generating the tokenizer. Defaults to False. 3107db96d56Sopenharmony_ci verbose_parser (bool, optional): Whether to display additional output 3117db96d56Sopenharmony_ci when generating the parser. Defaults to False. 3127db96d56Sopenharmony_ci skip_actions (bool, optional): Whether to pretend no rule has any actions. 3137db96d56Sopenharmony_ci """ 3147db96d56Sopenharmony_ci grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 3157db96d56Sopenharmony_ci gen = build_python_generator( 3167db96d56Sopenharmony_ci grammar, 3177db96d56Sopenharmony_ci grammar_file, 3187db96d56Sopenharmony_ci output_file, 3197db96d56Sopenharmony_ci skip_actions=skip_actions, 3207db96d56Sopenharmony_ci ) 3217db96d56Sopenharmony_ci return grammar, parser, tokenizer, gen 322