1#!/usr/bin/env python3.8 2 3"""pegen -- PEG Generator. 4 5Search the web for PEG Parsers for reference. 6""" 7 8import argparse 9import sys 10import time 11import token 12import traceback 13from typing import Tuple 14 15from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer 16from pegen.validator import validate_grammar 17 18 19def generate_c_code( 20 args: argparse.Namespace, 21) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 22 from pegen.build import build_c_parser_and_generator 23 24 verbose = args.verbose 25 verbose_tokenizer = verbose >= 3 26 verbose_parser = verbose == 2 or verbose >= 4 27 try: 28 grammar, parser, tokenizer, gen = build_c_parser_and_generator( 29 args.grammar_filename, 30 args.tokens_filename, 31 args.output, 32 args.compile_extension, 33 verbose_tokenizer, 34 verbose_parser, 35 args.verbose, 36 keep_asserts_in_extension=False if args.optimized else True, 37 skip_actions=args.skip_actions, 38 ) 39 return grammar, parser, tokenizer, gen 40 except Exception as err: 41 if args.verbose: 42 raise # Show traceback 43 traceback.print_exception(err.__class__, err, None) 44 sys.stderr.write("For full traceback, use -v\n") 45 sys.exit(1) 46 47 48def generate_python_code( 49 args: argparse.Namespace, 50) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 51 from pegen.build import build_python_parser_and_generator 52 53 verbose = args.verbose 54 verbose_tokenizer = verbose >= 3 55 verbose_parser = verbose == 2 or verbose >= 4 56 try: 57 grammar, parser, tokenizer, gen = build_python_parser_and_generator( 58 args.grammar_filename, 59 args.output, 60 verbose_tokenizer, 61 verbose_parser, 62 skip_actions=args.skip_actions, 63 ) 64 return grammar, parser, tokenizer, gen 65 except Exception as err: 66 if args.verbose: 67 raise # Show traceback 68 traceback.print_exception(err.__class__, err, None) 69 sys.stderr.write("For full traceback, use -v\n") 70 sys.exit(1) 71 72 73argparser = argparse.ArgumentParser( 74 prog="pegen", description="Experimental PEG-like parser generator" 75) 76argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") 77argparser.add_argument( 78 "-v", 79 "--verbose", 80 action="count", 81 default=0, 82 help="Print timing stats; repeat for more debug output", 83) 84subparsers = argparser.add_subparsers(help="target language for the generated code") 85 86c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") 87c_parser.set_defaults(func=generate_c_code) 88c_parser.add_argument("grammar_filename", help="Grammar description") 89c_parser.add_argument("tokens_filename", help="Tokens description") 90c_parser.add_argument( 91 "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" 92) 93c_parser.add_argument( 94 "--compile-extension", 95 action="store_true", 96 help="Compile generated C code into an extension module", 97) 98c_parser.add_argument( 99 "--optimized", action="store_true", help="Compile the extension in optimized mode" 100) 101c_parser.add_argument( 102 "--skip-actions", 103 action="store_true", 104 help="Suppress code emission for rule actions", 105) 106 107python_parser = subparsers.add_parser("python", help="Generate Python code") 108python_parser.set_defaults(func=generate_python_code) 109python_parser.add_argument("grammar_filename", help="Grammar description") 110python_parser.add_argument( 111 "-o", 112 "--output", 113 metavar="OUT", 114 default="parse.py", 115 help="Where to write the generated parser", 116) 117python_parser.add_argument( 118 "--skip-actions", 119 action="store_true", 120 help="Suppress code emission for rule actions", 121) 122 123 124def main() -> None: 125 from pegen.testutil import print_memstats 126 127 args = argparser.parse_args() 128 if "func" not in args: 129 argparser.error("Must specify the target language mode ('c' or 'python')") 130 131 t0 = time.time() 132 grammar, parser, tokenizer, gen = args.func(args) 133 t1 = time.time() 134 135 validate_grammar(grammar) 136 137 if not args.quiet: 138 if args.verbose: 139 print("Raw Grammar:") 140 for line in repr(grammar).splitlines(): 141 print(" ", line) 142 143 print("Clean Grammar:") 144 for line in str(grammar).splitlines(): 145 print(" ", line) 146 147 if args.verbose: 148 print("First Graph:") 149 for src, dsts in gen.first_graph.items(): 150 print(f" {src} -> {', '.join(dsts)}") 151 print("First SCCS:") 152 for scc in gen.first_sccs: 153 print(" ", scc, end="") 154 if len(scc) > 1: 155 print( 156 " # Indirectly left-recursive; leaders:", 157 {name for name in scc if grammar.rules[name].leader}, 158 ) 159 else: 160 name = next(iter(scc)) 161 if name in gen.first_graph[name]: 162 print(" # Left-recursive") 163 else: 164 print() 165 166 if args.verbose: 167 dt = t1 - t0 168 diag = tokenizer.diagnose() 169 nlines = diag.end[0] 170 if diag.type == token.ENDMARKER: 171 nlines -= 1 172 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") 173 if dt: 174 print(f"; {nlines / dt:.0f} lines/sec") 175 else: 176 print() 177 print("Caches sizes:") 178 print(f" token array : {len(tokenizer._tokens):10}") 179 print(f" cache : {len(parser._cache):10}") 180 if not print_memstats(): 181 print("(Can't find psutil; install it for memory stats.)") 182 183 184if __name__ == "__main__": 185 if sys.version_info < (3, 8): 186 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) 187 sys.exit(1) 188 main() 189