17db96d56Sopenharmony_ci"Utility functions used by the btm_matcher module"
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_cifrom . import pytree
47db96d56Sopenharmony_cifrom .pgen2 import grammar, token
57db96d56Sopenharmony_cifrom .pygram import pattern_symbols, python_symbols
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_cisyms = pattern_symbols
87db96d56Sopenharmony_cipysyms = python_symbols
97db96d56Sopenharmony_citokens = grammar.opmap
107db96d56Sopenharmony_citoken_labels = token
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ciTYPE_ANY = -1
137db96d56Sopenharmony_ciTYPE_ALTERNATIVES = -2
147db96d56Sopenharmony_ciTYPE_GROUP = -3
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciclass MinNode(object):
177db96d56Sopenharmony_ci    """This class serves as an intermediate representation of the
187db96d56Sopenharmony_ci    pattern tree during the conversion to sets of leaf-to-root
197db96d56Sopenharmony_ci    subpatterns"""
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci    def __init__(self, type=None, name=None):
227db96d56Sopenharmony_ci        self.type = type
237db96d56Sopenharmony_ci        self.name = name
247db96d56Sopenharmony_ci        self.children = []
257db96d56Sopenharmony_ci        self.leaf = False
267db96d56Sopenharmony_ci        self.parent = None
277db96d56Sopenharmony_ci        self.alternatives = []
287db96d56Sopenharmony_ci        self.group = []
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_ci    def __repr__(self):
317db96d56Sopenharmony_ci        return str(self.type) + ' ' + str(self.name)
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci    def leaf_to_root(self):
347db96d56Sopenharmony_ci        """Internal method. Returns a characteristic path of the
357db96d56Sopenharmony_ci        pattern tree. This method must be run for all leaves until the
367db96d56Sopenharmony_ci        linear subpatterns are merged into a single"""
377db96d56Sopenharmony_ci        node = self
387db96d56Sopenharmony_ci        subp = []
397db96d56Sopenharmony_ci        while node:
407db96d56Sopenharmony_ci            if node.type == TYPE_ALTERNATIVES:
417db96d56Sopenharmony_ci                node.alternatives.append(subp)
427db96d56Sopenharmony_ci                if len(node.alternatives) == len(node.children):
437db96d56Sopenharmony_ci                    #last alternative
447db96d56Sopenharmony_ci                    subp = [tuple(node.alternatives)]
457db96d56Sopenharmony_ci                    node.alternatives = []
467db96d56Sopenharmony_ci                    node = node.parent
477db96d56Sopenharmony_ci                    continue
487db96d56Sopenharmony_ci                else:
497db96d56Sopenharmony_ci                    node = node.parent
507db96d56Sopenharmony_ci                    subp = None
517db96d56Sopenharmony_ci                    break
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci            if node.type == TYPE_GROUP:
547db96d56Sopenharmony_ci                node.group.append(subp)
557db96d56Sopenharmony_ci                #probably should check the number of leaves
567db96d56Sopenharmony_ci                if len(node.group) == len(node.children):
577db96d56Sopenharmony_ci                    subp = get_characteristic_subpattern(node.group)
587db96d56Sopenharmony_ci                    node.group = []
597db96d56Sopenharmony_ci                    node = node.parent
607db96d56Sopenharmony_ci                    continue
617db96d56Sopenharmony_ci                else:
627db96d56Sopenharmony_ci                    node = node.parent
637db96d56Sopenharmony_ci                    subp = None
647db96d56Sopenharmony_ci                    break
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci            if node.type == token_labels.NAME and node.name:
677db96d56Sopenharmony_ci                #in case of type=name, use the name instead
687db96d56Sopenharmony_ci                subp.append(node.name)
697db96d56Sopenharmony_ci            else:
707db96d56Sopenharmony_ci                subp.append(node.type)
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci            node = node.parent
737db96d56Sopenharmony_ci        return subp
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    def get_linear_subpattern(self):
767db96d56Sopenharmony_ci        """Drives the leaf_to_root method. The reason that
777db96d56Sopenharmony_ci        leaf_to_root must be run multiple times is because we need to
787db96d56Sopenharmony_ci        reject 'group' matches; for example the alternative form
797db96d56Sopenharmony_ci        (a | b c) creates a group [b c] that needs to be matched. Since
807db96d56Sopenharmony_ci        matching multiple linear patterns overcomes the automaton's
817db96d56Sopenharmony_ci        capabilities, leaf_to_root merges each group into a single
827db96d56Sopenharmony_ci        choice based on 'characteristic'ity,
837db96d56Sopenharmony_ci
847db96d56Sopenharmony_ci        i.e. (a|b c) -> (a|b) if b more characteristic than c
857db96d56Sopenharmony_ci
867db96d56Sopenharmony_ci        Returns: The most 'characteristic'(as defined by
877db96d56Sopenharmony_ci          get_characteristic_subpattern) path for the compiled pattern
887db96d56Sopenharmony_ci          tree.
897db96d56Sopenharmony_ci        """
907db96d56Sopenharmony_ci
917db96d56Sopenharmony_ci        for l in self.leaves():
927db96d56Sopenharmony_ci            subp = l.leaf_to_root()
937db96d56Sopenharmony_ci            if subp:
947db96d56Sopenharmony_ci                return subp
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci    def leaves(self):
977db96d56Sopenharmony_ci        "Generator that returns the leaves of the tree"
987db96d56Sopenharmony_ci        for child in self.children:
997db96d56Sopenharmony_ci            yield from child.leaves()
1007db96d56Sopenharmony_ci        if not self.children:
1017db96d56Sopenharmony_ci            yield self
1027db96d56Sopenharmony_ci
1037db96d56Sopenharmony_cidef reduce_tree(node, parent=None):
1047db96d56Sopenharmony_ci    """
1057db96d56Sopenharmony_ci    Internal function. Reduces a compiled pattern tree to an
1067db96d56Sopenharmony_ci    intermediate representation suitable for feeding the
1077db96d56Sopenharmony_ci    automaton. This also trims off any optional pattern elements(like
1087db96d56Sopenharmony_ci    [a], a*).
1097db96d56Sopenharmony_ci    """
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci    new_node = None
1127db96d56Sopenharmony_ci    #switch on the node type
1137db96d56Sopenharmony_ci    if node.type == syms.Matcher:
1147db96d56Sopenharmony_ci        #skip
1157db96d56Sopenharmony_ci        node = node.children[0]
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci    if node.type == syms.Alternatives  :
1187db96d56Sopenharmony_ci        #2 cases
1197db96d56Sopenharmony_ci        if len(node.children) <= 2:
1207db96d56Sopenharmony_ci            #just a single 'Alternative', skip this node
1217db96d56Sopenharmony_ci            new_node = reduce_tree(node.children[0], parent)
1227db96d56Sopenharmony_ci        else:
1237db96d56Sopenharmony_ci            #real alternatives
1247db96d56Sopenharmony_ci            new_node = MinNode(type=TYPE_ALTERNATIVES)
1257db96d56Sopenharmony_ci            #skip odd children('|' tokens)
1267db96d56Sopenharmony_ci            for child in node.children:
1277db96d56Sopenharmony_ci                if node.children.index(child)%2:
1287db96d56Sopenharmony_ci                    continue
1297db96d56Sopenharmony_ci                reduced = reduce_tree(child, new_node)
1307db96d56Sopenharmony_ci                if reduced is not None:
1317db96d56Sopenharmony_ci                    new_node.children.append(reduced)
1327db96d56Sopenharmony_ci    elif node.type == syms.Alternative:
1337db96d56Sopenharmony_ci        if len(node.children) > 1:
1347db96d56Sopenharmony_ci
1357db96d56Sopenharmony_ci            new_node = MinNode(type=TYPE_GROUP)
1367db96d56Sopenharmony_ci            for child in node.children:
1377db96d56Sopenharmony_ci                reduced = reduce_tree(child, new_node)
1387db96d56Sopenharmony_ci                if reduced:
1397db96d56Sopenharmony_ci                    new_node.children.append(reduced)
1407db96d56Sopenharmony_ci            if not new_node.children:
1417db96d56Sopenharmony_ci                # delete the group if all of the children were reduced to None
1427db96d56Sopenharmony_ci                new_node = None
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci        else:
1457db96d56Sopenharmony_ci            new_node = reduce_tree(node.children[0], parent)
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ci    elif node.type == syms.Unit:
1487db96d56Sopenharmony_ci        if (isinstance(node.children[0], pytree.Leaf) and
1497db96d56Sopenharmony_ci            node.children[0].value == '('):
1507db96d56Sopenharmony_ci            #skip parentheses
1517db96d56Sopenharmony_ci            return reduce_tree(node.children[1], parent)
1527db96d56Sopenharmony_ci        if ((isinstance(node.children[0], pytree.Leaf) and
1537db96d56Sopenharmony_ci               node.children[0].value == '[')
1547db96d56Sopenharmony_ci               or
1557db96d56Sopenharmony_ci               (len(node.children)>1 and
1567db96d56Sopenharmony_ci               hasattr(node.children[1], "value") and
1577db96d56Sopenharmony_ci               node.children[1].value == '[')):
1587db96d56Sopenharmony_ci            #skip whole unit if its optional
1597db96d56Sopenharmony_ci            return None
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_ci        leaf = True
1627db96d56Sopenharmony_ci        details_node = None
1637db96d56Sopenharmony_ci        alternatives_node = None
1647db96d56Sopenharmony_ci        has_repeater = False
1657db96d56Sopenharmony_ci        repeater_node = None
1667db96d56Sopenharmony_ci        has_variable_name = False
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci        for child in node.children:
1697db96d56Sopenharmony_ci            if child.type == syms.Details:
1707db96d56Sopenharmony_ci                leaf = False
1717db96d56Sopenharmony_ci                details_node = child
1727db96d56Sopenharmony_ci            elif child.type == syms.Repeater:
1737db96d56Sopenharmony_ci                has_repeater = True
1747db96d56Sopenharmony_ci                repeater_node = child
1757db96d56Sopenharmony_ci            elif child.type == syms.Alternatives:
1767db96d56Sopenharmony_ci                alternatives_node = child
1777db96d56Sopenharmony_ci            if hasattr(child, 'value') and child.value == '=': # variable name
1787db96d56Sopenharmony_ci                has_variable_name = True
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci        #skip variable name
1817db96d56Sopenharmony_ci        if has_variable_name:
1827db96d56Sopenharmony_ci            #skip variable name, '='
1837db96d56Sopenharmony_ci            name_leaf = node.children[2]
1847db96d56Sopenharmony_ci            if hasattr(name_leaf, 'value') and name_leaf.value == '(':
1857db96d56Sopenharmony_ci                # skip parenthesis
1867db96d56Sopenharmony_ci                name_leaf = node.children[3]
1877db96d56Sopenharmony_ci        else:
1887db96d56Sopenharmony_ci            name_leaf = node.children[0]
1897db96d56Sopenharmony_ci
1907db96d56Sopenharmony_ci        #set node type
1917db96d56Sopenharmony_ci        if name_leaf.type == token_labels.NAME:
1927db96d56Sopenharmony_ci            #(python) non-name or wildcard
1937db96d56Sopenharmony_ci            if name_leaf.value == 'any':
1947db96d56Sopenharmony_ci                new_node = MinNode(type=TYPE_ANY)
1957db96d56Sopenharmony_ci            else:
1967db96d56Sopenharmony_ci                if hasattr(token_labels, name_leaf.value):
1977db96d56Sopenharmony_ci                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
1987db96d56Sopenharmony_ci                else:
1997db96d56Sopenharmony_ci                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci        elif name_leaf.type == token_labels.STRING:
2027db96d56Sopenharmony_ci            #(python) name or character; remove the apostrophes from
2037db96d56Sopenharmony_ci            #the string value
2047db96d56Sopenharmony_ci            name = name_leaf.value.strip("'")
2057db96d56Sopenharmony_ci            if name in tokens:
2067db96d56Sopenharmony_ci                new_node = MinNode(type=tokens[name])
2077db96d56Sopenharmony_ci            else:
2087db96d56Sopenharmony_ci                new_node = MinNode(type=token_labels.NAME, name=name)
2097db96d56Sopenharmony_ci        elif name_leaf.type == syms.Alternatives:
2107db96d56Sopenharmony_ci            new_node = reduce_tree(alternatives_node, parent)
2117db96d56Sopenharmony_ci
2127db96d56Sopenharmony_ci        #handle repeaters
2137db96d56Sopenharmony_ci        if has_repeater:
2147db96d56Sopenharmony_ci            if repeater_node.children[0].value == '*':
2157db96d56Sopenharmony_ci                #reduce to None
2167db96d56Sopenharmony_ci                new_node = None
2177db96d56Sopenharmony_ci            elif repeater_node.children[0].value == '+':
2187db96d56Sopenharmony_ci                #reduce to a single occurrence i.e. do nothing
2197db96d56Sopenharmony_ci                pass
2207db96d56Sopenharmony_ci            else:
2217db96d56Sopenharmony_ci                #TODO: handle {min, max} repeaters
2227db96d56Sopenharmony_ci                raise NotImplementedError
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci        #add children
2257db96d56Sopenharmony_ci        if details_node and new_node is not None:
2267db96d56Sopenharmony_ci            for child in details_node.children[1:-1]:
2277db96d56Sopenharmony_ci                #skip '<', '>' markers
2287db96d56Sopenharmony_ci                reduced = reduce_tree(child, new_node)
2297db96d56Sopenharmony_ci                if reduced is not None:
2307db96d56Sopenharmony_ci                    new_node.children.append(reduced)
2317db96d56Sopenharmony_ci    if new_node:
2327db96d56Sopenharmony_ci        new_node.parent = parent
2337db96d56Sopenharmony_ci    return new_node
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci
2367db96d56Sopenharmony_cidef get_characteristic_subpattern(subpatterns):
2377db96d56Sopenharmony_ci    """Picks the most characteristic from a list of linear patterns
2387db96d56Sopenharmony_ci    Current order used is:
2397db96d56Sopenharmony_ci    names > common_names > common_chars
2407db96d56Sopenharmony_ci    """
2417db96d56Sopenharmony_ci    if not isinstance(subpatterns, list):
2427db96d56Sopenharmony_ci        return subpatterns
2437db96d56Sopenharmony_ci    if len(subpatterns)==1:
2447db96d56Sopenharmony_ci        return subpatterns[0]
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ci    # first pick out the ones containing variable names
2477db96d56Sopenharmony_ci    subpatterns_with_names = []
2487db96d56Sopenharmony_ci    subpatterns_with_common_names = []
2497db96d56Sopenharmony_ci    common_names = ['in', 'for', 'if' , 'not', 'None']
2507db96d56Sopenharmony_ci    subpatterns_with_common_chars = []
2517db96d56Sopenharmony_ci    common_chars = "[]().,:"
2527db96d56Sopenharmony_ci    for subpattern in subpatterns:
2537db96d56Sopenharmony_ci        if any(rec_test(subpattern, lambda x: type(x) is str)):
2547db96d56Sopenharmony_ci            if any(rec_test(subpattern,
2557db96d56Sopenharmony_ci                            lambda x: isinstance(x, str) and x in common_chars)):
2567db96d56Sopenharmony_ci                subpatterns_with_common_chars.append(subpattern)
2577db96d56Sopenharmony_ci            elif any(rec_test(subpattern,
2587db96d56Sopenharmony_ci                              lambda x: isinstance(x, str) and x in common_names)):
2597db96d56Sopenharmony_ci                subpatterns_with_common_names.append(subpattern)
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_ci            else:
2627db96d56Sopenharmony_ci                subpatterns_with_names.append(subpattern)
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_ci    if subpatterns_with_names:
2657db96d56Sopenharmony_ci        subpatterns = subpatterns_with_names
2667db96d56Sopenharmony_ci    elif subpatterns_with_common_names:
2677db96d56Sopenharmony_ci        subpatterns = subpatterns_with_common_names
2687db96d56Sopenharmony_ci    elif subpatterns_with_common_chars:
2697db96d56Sopenharmony_ci        subpatterns = subpatterns_with_common_chars
2707db96d56Sopenharmony_ci    # of the remaining subpatterns pick out the longest one
2717db96d56Sopenharmony_ci    return max(subpatterns, key=len)
2727db96d56Sopenharmony_ci
2737db96d56Sopenharmony_cidef rec_test(sequence, test_func):
2747db96d56Sopenharmony_ci    """Tests test_func on all items of sequence and items of included
2757db96d56Sopenharmony_ci    sub-iterables"""
2767db96d56Sopenharmony_ci    for x in sequence:
2777db96d56Sopenharmony_ci        if isinstance(x, (list, tuple)):
2787db96d56Sopenharmony_ci            yield from rec_test(x, test_func)
2797db96d56Sopenharmony_ci        else:
2807db96d56Sopenharmony_ci            yield test_func(x)
281