17db96d56Sopenharmony_ci#!/usr/bin/env python3.8
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ciimport argparse
47db96d56Sopenharmony_ciimport pprint
57db96d56Sopenharmony_ciimport sys
67db96d56Sopenharmony_cifrom typing import Dict, Set
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_cifrom pegen.build import build_parser
97db96d56Sopenharmony_cifrom pegen.grammar import (
107db96d56Sopenharmony_ci    Alt,
117db96d56Sopenharmony_ci    Cut,
127db96d56Sopenharmony_ci    Gather,
137db96d56Sopenharmony_ci    GrammarVisitor,
147db96d56Sopenharmony_ci    Group,
157db96d56Sopenharmony_ci    Lookahead,
167db96d56Sopenharmony_ci    NamedItem,
177db96d56Sopenharmony_ci    NameLeaf,
187db96d56Sopenharmony_ci    NegativeLookahead,
197db96d56Sopenharmony_ci    Opt,
207db96d56Sopenharmony_ci    Repeat0,
217db96d56Sopenharmony_ci    Repeat1,
227db96d56Sopenharmony_ci    Rhs,
237db96d56Sopenharmony_ci    Rule,
247db96d56Sopenharmony_ci    StringLeaf,
257db96d56Sopenharmony_ci)
267db96d56Sopenharmony_cifrom pegen.parser_generator import compute_nullables
277db96d56Sopenharmony_ci
287db96d56Sopenharmony_ciargparser = argparse.ArgumentParser(
297db96d56Sopenharmony_ci    prog="calculate_first_sets",
307db96d56Sopenharmony_ci    description="Calculate the first sets of a grammar",
317db96d56Sopenharmony_ci)
327db96d56Sopenharmony_ciargparser.add_argument("grammar_file", help="The grammar file")
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ciclass FirstSetCalculator(GrammarVisitor):
367db96d56Sopenharmony_ci    def __init__(self, rules: Dict[str, Rule]) -> None:
377db96d56Sopenharmony_ci        self.rules = rules
387db96d56Sopenharmony_ci        self.nullables = compute_nullables(rules)
397db96d56Sopenharmony_ci        self.first_sets: Dict[str, Set[str]] = dict()
407db96d56Sopenharmony_ci        self.in_process: Set[str] = set()
417db96d56Sopenharmony_ci
427db96d56Sopenharmony_ci    def calculate(self) -> Dict[str, Set[str]]:
437db96d56Sopenharmony_ci        for name, rule in self.rules.items():
447db96d56Sopenharmony_ci            self.visit(rule)
457db96d56Sopenharmony_ci        return self.first_sets
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci    def visit_Alt(self, item: Alt) -> Set[str]:
487db96d56Sopenharmony_ci        result: Set[str] = set()
497db96d56Sopenharmony_ci        to_remove: Set[str] = set()
507db96d56Sopenharmony_ci        for other in item.items:
517db96d56Sopenharmony_ci            new_terminals = self.visit(other)
527db96d56Sopenharmony_ci            if isinstance(other.item, NegativeLookahead):
537db96d56Sopenharmony_ci                to_remove |= new_terminals
547db96d56Sopenharmony_ci            result |= new_terminals
557db96d56Sopenharmony_ci            if to_remove:
567db96d56Sopenharmony_ci                result -= to_remove
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci            # If the set of new terminals can start with the empty string,
597db96d56Sopenharmony_ci            # it means that the item is completely nullable and we should
607db96d56Sopenharmony_ci            # also considering at least the next item in case the current
617db96d56Sopenharmony_ci            # one fails to parse.
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci            if "" in new_terminals:
647db96d56Sopenharmony_ci                continue
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci            if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
677db96d56Sopenharmony_ci                break
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci        # Do not allow the empty string to propagate.
707db96d56Sopenharmony_ci        result.discard("")
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci        return result
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_ci    def visit_Cut(self, item: Cut) -> Set[str]:
757db96d56Sopenharmony_ci        return set()
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci    def visit_Group(self, item: Group) -> Set[str]:
787db96d56Sopenharmony_ci        return self.visit(item.rhs)
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci    def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
817db96d56Sopenharmony_ci        return self.visit(item.node)
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_ci    def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
847db96d56Sopenharmony_ci        return self.visit(item.node)
857db96d56Sopenharmony_ci
867db96d56Sopenharmony_ci    def visit_NamedItem(self, item: NamedItem) -> Set[str]:
877db96d56Sopenharmony_ci        return self.visit(item.item)
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci    def visit_Opt(self, item: Opt) -> Set[str]:
907db96d56Sopenharmony_ci        return self.visit(item.node)
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    def visit_Gather(self, item: Gather) -> Set[str]:
937db96d56Sopenharmony_ci        return self.visit(item.node)
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci    def visit_Repeat0(self, item: Repeat0) -> Set[str]:
967db96d56Sopenharmony_ci        return self.visit(item.node)
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    def visit_Repeat1(self, item: Repeat1) -> Set[str]:
997db96d56Sopenharmony_ci        return self.visit(item.node)
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci    def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
1027db96d56Sopenharmony_ci        if item.value not in self.rules:
1037db96d56Sopenharmony_ci            return {item.value}
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci        if item.value not in self.first_sets:
1067db96d56Sopenharmony_ci            self.first_sets[item.value] = self.visit(self.rules[item.value])
1077db96d56Sopenharmony_ci            return self.first_sets[item.value]
1087db96d56Sopenharmony_ci        elif item.value in self.in_process:
1097db96d56Sopenharmony_ci            return set()
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci        return self.first_sets[item.value]
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci    def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
1147db96d56Sopenharmony_ci        return {item.value}
1157db96d56Sopenharmony_ci
1167db96d56Sopenharmony_ci    def visit_Rhs(self, item: Rhs) -> Set[str]:
1177db96d56Sopenharmony_ci        result: Set[str] = set()
1187db96d56Sopenharmony_ci        for alt in item.alts:
1197db96d56Sopenharmony_ci            result |= self.visit(alt)
1207db96d56Sopenharmony_ci        return result
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci    def visit_Rule(self, item: Rule) -> Set[str]:
1237db96d56Sopenharmony_ci        if item.name in self.in_process:
1247db96d56Sopenharmony_ci            return set()
1257db96d56Sopenharmony_ci        elif item.name not in self.first_sets:
1267db96d56Sopenharmony_ci            self.in_process.add(item.name)
1277db96d56Sopenharmony_ci            terminals = self.visit(item.rhs)
1287db96d56Sopenharmony_ci            if item in self.nullables:
1297db96d56Sopenharmony_ci                terminals.add("")
1307db96d56Sopenharmony_ci            self.first_sets[item.name] = terminals
1317db96d56Sopenharmony_ci            self.in_process.remove(item.name)
1327db96d56Sopenharmony_ci        return self.first_sets[item.name]
1337db96d56Sopenharmony_ci
1347db96d56Sopenharmony_ci
1357db96d56Sopenharmony_cidef main() -> None:
1367db96d56Sopenharmony_ci    args = argparser.parse_args()
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci    try:
1397db96d56Sopenharmony_ci        grammar, parser, tokenizer = build_parser(args.grammar_file)
1407db96d56Sopenharmony_ci    except Exception as err:
1417db96d56Sopenharmony_ci        print("ERROR: Failed to parse grammar file", file=sys.stderr)
1427db96d56Sopenharmony_ci        sys.exit(1)
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci    firs_sets = FirstSetCalculator(grammar.rules).calculate()
1457db96d56Sopenharmony_ci    pprint.pprint(firs_sets)
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_ciif __name__ == "__main__":
1497db96d56Sopenharmony_ci    main()
150