17db96d56Sopenharmony_ci#!/usr/bin/env python3.8 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciimport argparse 47db96d56Sopenharmony_ciimport pprint 57db96d56Sopenharmony_ciimport sys 67db96d56Sopenharmony_cifrom typing import Dict, Set 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_cifrom pegen.build import build_parser 97db96d56Sopenharmony_cifrom pegen.grammar import ( 107db96d56Sopenharmony_ci Alt, 117db96d56Sopenharmony_ci Cut, 127db96d56Sopenharmony_ci Gather, 137db96d56Sopenharmony_ci GrammarVisitor, 147db96d56Sopenharmony_ci Group, 157db96d56Sopenharmony_ci Lookahead, 167db96d56Sopenharmony_ci NamedItem, 177db96d56Sopenharmony_ci NameLeaf, 187db96d56Sopenharmony_ci NegativeLookahead, 197db96d56Sopenharmony_ci Opt, 207db96d56Sopenharmony_ci Repeat0, 217db96d56Sopenharmony_ci Repeat1, 227db96d56Sopenharmony_ci Rhs, 237db96d56Sopenharmony_ci Rule, 247db96d56Sopenharmony_ci StringLeaf, 257db96d56Sopenharmony_ci) 267db96d56Sopenharmony_cifrom pegen.parser_generator import compute_nullables 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ciargparser = argparse.ArgumentParser( 297db96d56Sopenharmony_ci prog="calculate_first_sets", 307db96d56Sopenharmony_ci description="Calculate the first sets of a grammar", 317db96d56Sopenharmony_ci) 327db96d56Sopenharmony_ciargparser.add_argument("grammar_file", help="The grammar file") 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ciclass FirstSetCalculator(GrammarVisitor): 367db96d56Sopenharmony_ci def __init__(self, rules: Dict[str, Rule]) -> None: 377db96d56Sopenharmony_ci self.rules = rules 387db96d56Sopenharmony_ci self.nullables = compute_nullables(rules) 397db96d56Sopenharmony_ci self.first_sets: Dict[str, Set[str]] = dict() 407db96d56Sopenharmony_ci self.in_process: Set[str] = set() 417db96d56Sopenharmony_ci 427db96d56Sopenharmony_ci def calculate(self) -> Dict[str, Set[str]]: 437db96d56Sopenharmony_ci for name, rule in self.rules.items(): 447db96d56Sopenharmony_ci self.visit(rule) 457db96d56Sopenharmony_ci return self.first_sets 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci def visit_Alt(self, item: Alt) -> Set[str]: 487db96d56Sopenharmony_ci result: Set[str] = set() 497db96d56Sopenharmony_ci to_remove: Set[str] = set() 507db96d56Sopenharmony_ci for other in item.items: 517db96d56Sopenharmony_ci new_terminals = self.visit(other) 527db96d56Sopenharmony_ci if isinstance(other.item, NegativeLookahead): 537db96d56Sopenharmony_ci to_remove |= new_terminals 547db96d56Sopenharmony_ci result |= new_terminals 557db96d56Sopenharmony_ci if to_remove: 567db96d56Sopenharmony_ci result -= to_remove 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci # If the set of new terminals can start with the empty string, 597db96d56Sopenharmony_ci # it means that the item is completely nullable and we should 607db96d56Sopenharmony_ci # also considering at least the next item in case the current 617db96d56Sopenharmony_ci # one fails to parse. 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_ci if "" in new_terminals: 647db96d56Sopenharmony_ci continue 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)): 677db96d56Sopenharmony_ci break 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci # Do not allow the empty string to propagate. 707db96d56Sopenharmony_ci result.discard("") 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci return result 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_ci def visit_Cut(self, item: Cut) -> Set[str]: 757db96d56Sopenharmony_ci return set() 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci def visit_Group(self, item: Group) -> Set[str]: 787db96d56Sopenharmony_ci return self.visit(item.rhs) 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: 817db96d56Sopenharmony_ci return self.visit(item.node) 827db96d56Sopenharmony_ci 837db96d56Sopenharmony_ci def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: 847db96d56Sopenharmony_ci return self.visit(item.node) 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci def visit_NamedItem(self, item: NamedItem) -> Set[str]: 877db96d56Sopenharmony_ci return self.visit(item.item) 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci def visit_Opt(self, item: Opt) -> Set[str]: 907db96d56Sopenharmony_ci return self.visit(item.node) 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ci def visit_Gather(self, item: Gather) -> Set[str]: 937db96d56Sopenharmony_ci return self.visit(item.node) 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ci def visit_Repeat0(self, item: Repeat0) -> Set[str]: 967db96d56Sopenharmony_ci return self.visit(item.node) 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci def visit_Repeat1(self, item: Repeat1) -> Set[str]: 997db96d56Sopenharmony_ci return self.visit(item.node) 1007db96d56Sopenharmony_ci 1017db96d56Sopenharmony_ci def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: 1027db96d56Sopenharmony_ci if item.value not in self.rules: 1037db96d56Sopenharmony_ci return {item.value} 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci if item.value not in self.first_sets: 1067db96d56Sopenharmony_ci self.first_sets[item.value] = self.visit(self.rules[item.value]) 1077db96d56Sopenharmony_ci return self.first_sets[item.value] 1087db96d56Sopenharmony_ci elif item.value in self.in_process: 1097db96d56Sopenharmony_ci return set() 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci return self.first_sets[item.value] 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: 1147db96d56Sopenharmony_ci return {item.value} 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_ci def visit_Rhs(self, item: Rhs) -> Set[str]: 1177db96d56Sopenharmony_ci result: Set[str] = set() 1187db96d56Sopenharmony_ci for alt in item.alts: 1197db96d56Sopenharmony_ci result |= self.visit(alt) 1207db96d56Sopenharmony_ci return result 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci def visit_Rule(self, item: Rule) -> Set[str]: 1237db96d56Sopenharmony_ci if item.name in self.in_process: 1247db96d56Sopenharmony_ci return set() 1257db96d56Sopenharmony_ci elif item.name not in self.first_sets: 1267db96d56Sopenharmony_ci self.in_process.add(item.name) 1277db96d56Sopenharmony_ci terminals = self.visit(item.rhs) 1287db96d56Sopenharmony_ci if item in self.nullables: 1297db96d56Sopenharmony_ci terminals.add("") 1307db96d56Sopenharmony_ci self.first_sets[item.name] = terminals 1317db96d56Sopenharmony_ci self.in_process.remove(item.name) 1327db96d56Sopenharmony_ci return self.first_sets[item.name] 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_cidef main() -> None: 1367db96d56Sopenharmony_ci args = argparser.parse_args() 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci try: 1397db96d56Sopenharmony_ci grammar, parser, tokenizer = build_parser(args.grammar_file) 1407db96d56Sopenharmony_ci except Exception as err: 1417db96d56Sopenharmony_ci print("ERROR: Failed to parse grammar file", file=sys.stderr) 1427db96d56Sopenharmony_ci sys.exit(1) 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci firs_sets = FirstSetCalculator(grammar.rules).calculate() 1457db96d56Sopenharmony_ci pprint.pprint(firs_sets) 1467db96d56Sopenharmony_ci 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ciif __name__ == "__main__": 1497db96d56Sopenharmony_ci main() 150