1#!/usr/bin/env python3.8
2
3"""pegen -- PEG Generator.
4
5Search the web for PEG Parsers for reference.
6"""
7
8import argparse
9import sys
10import time
11import token
12import traceback
13from typing import Tuple
14
15from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
16from pegen.validator import validate_grammar
17
18
19def generate_c_code(
20    args: argparse.Namespace,
21) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
22    from pegen.build import build_c_parser_and_generator
23
24    verbose = args.verbose
25    verbose_tokenizer = verbose >= 3
26    verbose_parser = verbose == 2 or verbose >= 4
27    try:
28        grammar, parser, tokenizer, gen = build_c_parser_and_generator(
29            args.grammar_filename,
30            args.tokens_filename,
31            args.output,
32            args.compile_extension,
33            verbose_tokenizer,
34            verbose_parser,
35            args.verbose,
36            keep_asserts_in_extension=False if args.optimized else True,
37            skip_actions=args.skip_actions,
38        )
39        return grammar, parser, tokenizer, gen
40    except Exception as err:
41        if args.verbose:
42            raise  # Show traceback
43        traceback.print_exception(err.__class__, err, None)
44        sys.stderr.write("For full traceback, use -v\n")
45        sys.exit(1)
46
47
48def generate_python_code(
49    args: argparse.Namespace,
50) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
51    from pegen.build import build_python_parser_and_generator
52
53    verbose = args.verbose
54    verbose_tokenizer = verbose >= 3
55    verbose_parser = verbose == 2 or verbose >= 4
56    try:
57        grammar, parser, tokenizer, gen = build_python_parser_and_generator(
58            args.grammar_filename,
59            args.output,
60            verbose_tokenizer,
61            verbose_parser,
62            skip_actions=args.skip_actions,
63        )
64        return grammar, parser, tokenizer, gen
65    except Exception as err:
66        if args.verbose:
67            raise  # Show traceback
68        traceback.print_exception(err.__class__, err, None)
69        sys.stderr.write("For full traceback, use -v\n")
70        sys.exit(1)
71
72
73argparser = argparse.ArgumentParser(
74    prog="pegen", description="Experimental PEG-like parser generator"
75)
76argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
77argparser.add_argument(
78    "-v",
79    "--verbose",
80    action="count",
81    default=0,
82    help="Print timing stats; repeat for more debug output",
83)
84subparsers = argparser.add_subparsers(help="target language for the generated code")
85
86c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
87c_parser.set_defaults(func=generate_c_code)
88c_parser.add_argument("grammar_filename", help="Grammar description")
89c_parser.add_argument("tokens_filename", help="Tokens description")
90c_parser.add_argument(
91    "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
92)
93c_parser.add_argument(
94    "--compile-extension",
95    action="store_true",
96    help="Compile generated C code into an extension module",
97)
98c_parser.add_argument(
99    "--optimized", action="store_true", help="Compile the extension in optimized mode"
100)
101c_parser.add_argument(
102    "--skip-actions",
103    action="store_true",
104    help="Suppress code emission for rule actions",
105)
106
107python_parser = subparsers.add_parser("python", help="Generate Python code")
108python_parser.set_defaults(func=generate_python_code)
109python_parser.add_argument("grammar_filename", help="Grammar description")
110python_parser.add_argument(
111    "-o",
112    "--output",
113    metavar="OUT",
114    default="parse.py",
115    help="Where to write the generated parser",
116)
117python_parser.add_argument(
118    "--skip-actions",
119    action="store_true",
120    help="Suppress code emission for rule actions",
121)
122
123
124def main() -> None:
125    from pegen.testutil import print_memstats
126
127    args = argparser.parse_args()
128    if "func" not in args:
129        argparser.error("Must specify the target language mode ('c' or 'python')")
130
131    t0 = time.time()
132    grammar, parser, tokenizer, gen = args.func(args)
133    t1 = time.time()
134
135    validate_grammar(grammar)
136
137    if not args.quiet:
138        if args.verbose:
139            print("Raw Grammar:")
140            for line in repr(grammar).splitlines():
141                print(" ", line)
142
143        print("Clean Grammar:")
144        for line in str(grammar).splitlines():
145            print(" ", line)
146
147    if args.verbose:
148        print("First Graph:")
149        for src, dsts in gen.first_graph.items():
150            print(f"  {src} -> {', '.join(dsts)}")
151        print("First SCCS:")
152        for scc in gen.first_sccs:
153            print(" ", scc, end="")
154            if len(scc) > 1:
155                print(
156                    "  # Indirectly left-recursive; leaders:",
157                    {name for name in scc if grammar.rules[name].leader},
158                )
159            else:
160                name = next(iter(scc))
161                if name in gen.first_graph[name]:
162                    print("  # Left-recursive")
163                else:
164                    print()
165
166    if args.verbose:
167        dt = t1 - t0
168        diag = tokenizer.diagnose()
169        nlines = diag.end[0]
170        if diag.type == token.ENDMARKER:
171            nlines -= 1
172        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
173        if dt:
174            print(f"; {nlines / dt:.0f} lines/sec")
175        else:
176            print()
177        print("Caches sizes:")
178        print(f"  token array : {len(tokenizer._tokens):10}")
179        print(f"        cache : {len(parser._cache):10}")
180        if not print_memstats():
181            print("(Can't find psutil; install it for memory stats.)")
182
183
184if __name__ == "__main__":
185    if sys.version_info < (3, 8):
186        print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
187        sys.exit(1)
188    main()
189