17db96d56Sopenharmony_ci# Copyright 2006 Google, Inc. All Rights Reserved.
27db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement.
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci"""Pattern compiler.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThe grammar is taken from PatternGrammar.txt.
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ciThe compiler compiles a pattern to a pytree.*Pattern instance.
97db96d56Sopenharmony_ci"""
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ci__author__ = "Guido van Rossum <guido@python.org>"
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci# Python imports
147db96d56Sopenharmony_ciimport io
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ci# Fairly local imports
177db96d56Sopenharmony_cifrom .pgen2 import driver, literals, token, tokenize, parse, grammar
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ci# Really local imports
207db96d56Sopenharmony_cifrom . import pytree
217db96d56Sopenharmony_cifrom . import pygram
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ciclass PatternSyntaxError(Exception):
257db96d56Sopenharmony_ci    pass
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci
287db96d56Sopenharmony_cidef tokenize_wrapper(input):
297db96d56Sopenharmony_ci    """Tokenizes a string suppressing significant whitespace."""
307db96d56Sopenharmony_ci    skip = {token.NEWLINE, token.INDENT, token.DEDENT}
317db96d56Sopenharmony_ci    tokens = tokenize.generate_tokens(io.StringIO(input).readline)
327db96d56Sopenharmony_ci    for quintuple in tokens:
337db96d56Sopenharmony_ci        type, value, start, end, line_text = quintuple
347db96d56Sopenharmony_ci        if type not in skip:
357db96d56Sopenharmony_ci            yield quintuple
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ciclass PatternCompiler(object):
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    def __init__(self, grammar_file=None):
417db96d56Sopenharmony_ci        """Initializer.
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci        Takes an optional alternative filename for the pattern grammar.
447db96d56Sopenharmony_ci        """
457db96d56Sopenharmony_ci        if grammar_file is None:
467db96d56Sopenharmony_ci            self.grammar = pygram.pattern_grammar
477db96d56Sopenharmony_ci            self.syms = pygram.pattern_symbols
487db96d56Sopenharmony_ci        else:
497db96d56Sopenharmony_ci            self.grammar = driver.load_grammar(grammar_file)
507db96d56Sopenharmony_ci            self.syms = pygram.Symbols(self.grammar)
517db96d56Sopenharmony_ci        self.pygrammar = pygram.python_grammar
527db96d56Sopenharmony_ci        self.pysyms = pygram.python_symbols
537db96d56Sopenharmony_ci        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci    def compile_pattern(self, input, debug=False, with_tree=False):
567db96d56Sopenharmony_ci        """Compiles a pattern string to a nested pytree.*Pattern object."""
577db96d56Sopenharmony_ci        tokens = tokenize_wrapper(input)
587db96d56Sopenharmony_ci        try:
597db96d56Sopenharmony_ci            root = self.driver.parse_tokens(tokens, debug=debug)
607db96d56Sopenharmony_ci        except parse.ParseError as e:
617db96d56Sopenharmony_ci            raise PatternSyntaxError(str(e)) from None
627db96d56Sopenharmony_ci        if with_tree:
637db96d56Sopenharmony_ci            return self.compile_node(root), root
647db96d56Sopenharmony_ci        else:
657db96d56Sopenharmony_ci            return self.compile_node(root)
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci    def compile_node(self, node):
687db96d56Sopenharmony_ci        """Compiles a node, recursively.
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci        This is one big switch on the node type.
717db96d56Sopenharmony_ci        """
727db96d56Sopenharmony_ci        # XXX Optimize certain Wildcard-containing-Wildcard patterns
737db96d56Sopenharmony_ci        # that can be merged
747db96d56Sopenharmony_ci        if node.type == self.syms.Matcher:
757db96d56Sopenharmony_ci            node = node.children[0] # Avoid unneeded recursion
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci        if node.type == self.syms.Alternatives:
787db96d56Sopenharmony_ci            # Skip the odd children since they are just '|' tokens
797db96d56Sopenharmony_ci            alts = [self.compile_node(ch) for ch in node.children[::2]]
807db96d56Sopenharmony_ci            if len(alts) == 1:
817db96d56Sopenharmony_ci                return alts[0]
827db96d56Sopenharmony_ci            p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
837db96d56Sopenharmony_ci            return p.optimize()
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci        if node.type == self.syms.Alternative:
867db96d56Sopenharmony_ci            units = [self.compile_node(ch) for ch in node.children]
877db96d56Sopenharmony_ci            if len(units) == 1:
887db96d56Sopenharmony_ci                return units[0]
897db96d56Sopenharmony_ci            p = pytree.WildcardPattern([units], min=1, max=1)
907db96d56Sopenharmony_ci            return p.optimize()
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci        if node.type == self.syms.NegatedUnit:
937db96d56Sopenharmony_ci            pattern = self.compile_basic(node.children[1:])
947db96d56Sopenharmony_ci            p = pytree.NegatedPattern(pattern)
957db96d56Sopenharmony_ci            return p.optimize()
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci        assert node.type == self.syms.Unit
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci        name = None
1007db96d56Sopenharmony_ci        nodes = node.children
1017db96d56Sopenharmony_ci        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
1027db96d56Sopenharmony_ci            name = nodes[0].value
1037db96d56Sopenharmony_ci            nodes = nodes[2:]
1047db96d56Sopenharmony_ci        repeat = None
1057db96d56Sopenharmony_ci        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
1067db96d56Sopenharmony_ci            repeat = nodes[-1]
1077db96d56Sopenharmony_ci            nodes = nodes[:-1]
1087db96d56Sopenharmony_ci
1097db96d56Sopenharmony_ci        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
1107db96d56Sopenharmony_ci        pattern = self.compile_basic(nodes, repeat)
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci        if repeat is not None:
1137db96d56Sopenharmony_ci            assert repeat.type == self.syms.Repeater
1147db96d56Sopenharmony_ci            children = repeat.children
1157db96d56Sopenharmony_ci            child = children[0]
1167db96d56Sopenharmony_ci            if child.type == token.STAR:
1177db96d56Sopenharmony_ci                min = 0
1187db96d56Sopenharmony_ci                max = pytree.HUGE
1197db96d56Sopenharmony_ci            elif child.type == token.PLUS:
1207db96d56Sopenharmony_ci                min = 1
1217db96d56Sopenharmony_ci                max = pytree.HUGE
1227db96d56Sopenharmony_ci            elif child.type == token.LBRACE:
1237db96d56Sopenharmony_ci                assert children[-1].type == token.RBRACE
1247db96d56Sopenharmony_ci                assert  len(children) in (3, 5)
1257db96d56Sopenharmony_ci                min = max = self.get_int(children[1])
1267db96d56Sopenharmony_ci                if len(children) == 5:
1277db96d56Sopenharmony_ci                    max = self.get_int(children[3])
1287db96d56Sopenharmony_ci            else:
1297db96d56Sopenharmony_ci                assert False
1307db96d56Sopenharmony_ci            if min != 1 or max != 1:
1317db96d56Sopenharmony_ci                pattern = pattern.optimize()
1327db96d56Sopenharmony_ci                pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
1337db96d56Sopenharmony_ci
1347db96d56Sopenharmony_ci        if name is not None:
1357db96d56Sopenharmony_ci            pattern.name = name
1367db96d56Sopenharmony_ci        return pattern.optimize()
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci    def compile_basic(self, nodes, repeat=None):
1397db96d56Sopenharmony_ci        # Compile STRING | NAME [Details] | (...) | [...]
1407db96d56Sopenharmony_ci        assert len(nodes) >= 1
1417db96d56Sopenharmony_ci        node = nodes[0]
1427db96d56Sopenharmony_ci        if node.type == token.STRING:
1437db96d56Sopenharmony_ci            value = str(literals.evalString(node.value))
1447db96d56Sopenharmony_ci            return pytree.LeafPattern(_type_of_literal(value), value)
1457db96d56Sopenharmony_ci        elif node.type == token.NAME:
1467db96d56Sopenharmony_ci            value = node.value
1477db96d56Sopenharmony_ci            if value.isupper():
1487db96d56Sopenharmony_ci                if value not in TOKEN_MAP:
1497db96d56Sopenharmony_ci                    raise PatternSyntaxError("Invalid token: %r" % value)
1507db96d56Sopenharmony_ci                if nodes[1:]:
1517db96d56Sopenharmony_ci                    raise PatternSyntaxError("Can't have details for token")
1527db96d56Sopenharmony_ci                return pytree.LeafPattern(TOKEN_MAP[value])
1537db96d56Sopenharmony_ci            else:
1547db96d56Sopenharmony_ci                if value == "any":
1557db96d56Sopenharmony_ci                    type = None
1567db96d56Sopenharmony_ci                elif not value.startswith("_"):
1577db96d56Sopenharmony_ci                    type = getattr(self.pysyms, value, None)
1587db96d56Sopenharmony_ci                    if type is None:
1597db96d56Sopenharmony_ci                        raise PatternSyntaxError("Invalid symbol: %r" % value)
1607db96d56Sopenharmony_ci                if nodes[1:]: # Details present
1617db96d56Sopenharmony_ci                    content = [self.compile_node(nodes[1].children[1])]
1627db96d56Sopenharmony_ci                else:
1637db96d56Sopenharmony_ci                    content = None
1647db96d56Sopenharmony_ci                return pytree.NodePattern(type, content)
1657db96d56Sopenharmony_ci        elif node.value == "(":
1667db96d56Sopenharmony_ci            return self.compile_node(nodes[1])
1677db96d56Sopenharmony_ci        elif node.value == "[":
1687db96d56Sopenharmony_ci            assert repeat is None
1697db96d56Sopenharmony_ci            subpattern = self.compile_node(nodes[1])
1707db96d56Sopenharmony_ci            return pytree.WildcardPattern([[subpattern]], min=0, max=1)
1717db96d56Sopenharmony_ci        assert False, node
1727db96d56Sopenharmony_ci
1737db96d56Sopenharmony_ci    def get_int(self, node):
1747db96d56Sopenharmony_ci        assert node.type == token.NUMBER
1757db96d56Sopenharmony_ci        return int(node.value)
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci# Map named tokens to the type value for a LeafPattern
1797db96d56Sopenharmony_ciTOKEN_MAP = {"NAME": token.NAME,
1807db96d56Sopenharmony_ci             "STRING": token.STRING,
1817db96d56Sopenharmony_ci             "NUMBER": token.NUMBER,
1827db96d56Sopenharmony_ci             "TOKEN": None}
1837db96d56Sopenharmony_ci
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_cidef _type_of_literal(value):
1867db96d56Sopenharmony_ci    if value[0].isalpha():
1877db96d56Sopenharmony_ci        return token.NAME
1887db96d56Sopenharmony_ci    elif value in grammar.opmap:
1897db96d56Sopenharmony_ci        return grammar.opmap[value]
1907db96d56Sopenharmony_ci    else:
1917db96d56Sopenharmony_ci        return None
1927db96d56Sopenharmony_ci
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_cidef pattern_convert(grammar, raw_node_info):
1957db96d56Sopenharmony_ci    """Converts raw node information to a Node or Leaf instance."""
1967db96d56Sopenharmony_ci    type, value, context, children = raw_node_info
1977db96d56Sopenharmony_ci    if children or type in grammar.number2symbol:
1987db96d56Sopenharmony_ci        return pytree.Node(type, children, context=context)
1997db96d56Sopenharmony_ci    else:
2007db96d56Sopenharmony_ci        return pytree.Leaf(type, value, context=context)
2017db96d56Sopenharmony_ci
2027db96d56Sopenharmony_ci
2037db96d56Sopenharmony_cidef compile_pattern(pattern):
2047db96d56Sopenharmony_ci    return PatternCompiler().compile_pattern(pattern)
205