1#!/usr/bin/env python3.8 2 3import argparse 4import pprint 5import sys 6from typing import Dict, Set 7 8from pegen.build import build_parser 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Gather, 13 GrammarVisitor, 14 Group, 15 Lookahead, 16 NamedItem, 17 NameLeaf, 18 NegativeLookahead, 19 Opt, 20 Repeat0, 21 Repeat1, 22 Rhs, 23 Rule, 24 StringLeaf, 25) 26from pegen.parser_generator import compute_nullables 27 28argparser = argparse.ArgumentParser( 29 prog="calculate_first_sets", 30 description="Calculate the first sets of a grammar", 31) 32argparser.add_argument("grammar_file", help="The grammar file") 33 34 35class FirstSetCalculator(GrammarVisitor): 36 def __init__(self, rules: Dict[str, Rule]) -> None: 37 self.rules = rules 38 self.nullables = compute_nullables(rules) 39 self.first_sets: Dict[str, Set[str]] = dict() 40 self.in_process: Set[str] = set() 41 42 def calculate(self) -> Dict[str, Set[str]]: 43 for name, rule in self.rules.items(): 44 self.visit(rule) 45 return self.first_sets 46 47 def visit_Alt(self, item: Alt) -> Set[str]: 48 result: Set[str] = set() 49 to_remove: Set[str] = set() 50 for other in item.items: 51 new_terminals = self.visit(other) 52 if isinstance(other.item, NegativeLookahead): 53 to_remove |= new_terminals 54 result |= new_terminals 55 if to_remove: 56 result -= to_remove 57 58 # If the set of new terminals can start with the empty string, 59 # it means that the item is completely nullable and we should 60 # also considering at least the next item in case the current 61 # one fails to parse. 62 63 if "" in new_terminals: 64 continue 65 66 if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)): 67 break 68 69 # Do not allow the empty string to propagate. 70 result.discard("") 71 72 return result 73 74 def visit_Cut(self, item: Cut) -> Set[str]: 75 return set() 76 77 def visit_Group(self, item: Group) -> Set[str]: 78 return self.visit(item.rhs) 79 80 def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: 81 return self.visit(item.node) 82 83 def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: 84 return self.visit(item.node) 85 86 def visit_NamedItem(self, item: NamedItem) -> Set[str]: 87 return self.visit(item.item) 88 89 def visit_Opt(self, item: Opt) -> Set[str]: 90 return self.visit(item.node) 91 92 def visit_Gather(self, item: Gather) -> Set[str]: 93 return self.visit(item.node) 94 95 def visit_Repeat0(self, item: Repeat0) -> Set[str]: 96 return self.visit(item.node) 97 98 def visit_Repeat1(self, item: Repeat1) -> Set[str]: 99 return self.visit(item.node) 100 101 def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: 102 if item.value not in self.rules: 103 return {item.value} 104 105 if item.value not in self.first_sets: 106 self.first_sets[item.value] = self.visit(self.rules[item.value]) 107 return self.first_sets[item.value] 108 elif item.value in self.in_process: 109 return set() 110 111 return self.first_sets[item.value] 112 113 def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: 114 return {item.value} 115 116 def visit_Rhs(self, item: Rhs) -> Set[str]: 117 result: Set[str] = set() 118 for alt in item.alts: 119 result |= self.visit(alt) 120 return result 121 122 def visit_Rule(self, item: Rule) -> Set[str]: 123 if item.name in self.in_process: 124 return set() 125 elif item.name not in self.first_sets: 126 self.in_process.add(item.name) 127 terminals = self.visit(item.rhs) 128 if item in self.nullables: 129 terminals.add("") 130 self.first_sets[item.name] = terminals 131 self.in_process.remove(item.name) 132 return self.first_sets[item.name] 133 134 135def main() -> None: 136 args = argparser.parse_args() 137 138 try: 139 grammar, parser, tokenizer = build_parser(args.grammar_file) 140 except Exception as err: 141 print("ERROR: Failed to parse grammar file", file=sys.stderr) 142 sys.exit(1) 143 144 firs_sets = FirstSetCalculator(grammar.rules).calculate() 145 pprint.pprint(firs_sets) 146 147 148if __name__ == "__main__": 149 main() 150