1#!/usr/bin/env python3.8
2
3import argparse
4import pprint
5import sys
6from typing import Dict, Set
7
8from pegen.build import build_parser
9from pegen.grammar import (
10    Alt,
11    Cut,
12    Gather,
13    GrammarVisitor,
14    Group,
15    Lookahead,
16    NamedItem,
17    NameLeaf,
18    NegativeLookahead,
19    Opt,
20    Repeat0,
21    Repeat1,
22    Rhs,
23    Rule,
24    StringLeaf,
25)
26from pegen.parser_generator import compute_nullables
27
28argparser = argparse.ArgumentParser(
29    prog="calculate_first_sets",
30    description="Calculate the first sets of a grammar",
31)
32argparser.add_argument("grammar_file", help="The grammar file")
33
34
35class FirstSetCalculator(GrammarVisitor):
36    def __init__(self, rules: Dict[str, Rule]) -> None:
37        self.rules = rules
38        self.nullables = compute_nullables(rules)
39        self.first_sets: Dict[str, Set[str]] = dict()
40        self.in_process: Set[str] = set()
41
42    def calculate(self) -> Dict[str, Set[str]]:
43        for name, rule in self.rules.items():
44            self.visit(rule)
45        return self.first_sets
46
47    def visit_Alt(self, item: Alt) -> Set[str]:
48        result: Set[str] = set()
49        to_remove: Set[str] = set()
50        for other in item.items:
51            new_terminals = self.visit(other)
52            if isinstance(other.item, NegativeLookahead):
53                to_remove |= new_terminals
54            result |= new_terminals
55            if to_remove:
56                result -= to_remove
57
58            # If the set of new terminals can start with the empty string,
59            # it means that the item is completely nullable and we should
60            # also considering at least the next item in case the current
61            # one fails to parse.
62
63            if "" in new_terminals:
64                continue
65
66            if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
67                break
68
69        # Do not allow the empty string to propagate.
70        result.discard("")
71
72        return result
73
74    def visit_Cut(self, item: Cut) -> Set[str]:
75        return set()
76
77    def visit_Group(self, item: Group) -> Set[str]:
78        return self.visit(item.rhs)
79
80    def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
81        return self.visit(item.node)
82
83    def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
84        return self.visit(item.node)
85
86    def visit_NamedItem(self, item: NamedItem) -> Set[str]:
87        return self.visit(item.item)
88
89    def visit_Opt(self, item: Opt) -> Set[str]:
90        return self.visit(item.node)
91
92    def visit_Gather(self, item: Gather) -> Set[str]:
93        return self.visit(item.node)
94
95    def visit_Repeat0(self, item: Repeat0) -> Set[str]:
96        return self.visit(item.node)
97
98    def visit_Repeat1(self, item: Repeat1) -> Set[str]:
99        return self.visit(item.node)
100
101    def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
102        if item.value not in self.rules:
103            return {item.value}
104
105        if item.value not in self.first_sets:
106            self.first_sets[item.value] = self.visit(self.rules[item.value])
107            return self.first_sets[item.value]
108        elif item.value in self.in_process:
109            return set()
110
111        return self.first_sets[item.value]
112
113    def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
114        return {item.value}
115
116    def visit_Rhs(self, item: Rhs) -> Set[str]:
117        result: Set[str] = set()
118        for alt in item.alts:
119            result |= self.visit(alt)
120        return result
121
122    def visit_Rule(self, item: Rule) -> Set[str]:
123        if item.name in self.in_process:
124            return set()
125        elif item.name not in self.first_sets:
126            self.in_process.add(item.name)
127            terminals = self.visit(item.rhs)
128            if item in self.nullables:
129                terminals.add("")
130            self.first_sets[item.name] = terminals
131            self.in_process.remove(item.name)
132        return self.first_sets[item.name]
133
134
135def main() -> None:
136    args = argparser.parse_args()
137
138    try:
139        grammar, parser, tokenizer = build_parser(args.grammar_file)
140    except Exception as err:
141        print("ERROR: Failed to parse grammar file", file=sys.stderr)
142        sys.exit(1)
143
144    firs_sets = FirstSetCalculator(grammar.rules).calculate()
145    pprint.pprint(firs_sets)
146
147
148if __name__ == "__main__":
149    main()
150