17db96d56Sopenharmony_ci"Utility functions used by the btm_matcher module" 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_cifrom . import pytree 47db96d56Sopenharmony_cifrom .pgen2 import grammar, token 57db96d56Sopenharmony_cifrom .pygram import pattern_symbols, python_symbols 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_cisyms = pattern_symbols 87db96d56Sopenharmony_cipysyms = python_symbols 97db96d56Sopenharmony_citokens = grammar.opmap 107db96d56Sopenharmony_citoken_labels = token 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ciTYPE_ANY = -1 137db96d56Sopenharmony_ciTYPE_ALTERNATIVES = -2 147db96d56Sopenharmony_ciTYPE_GROUP = -3 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ciclass MinNode(object): 177db96d56Sopenharmony_ci """This class serves as an intermediate representation of the 187db96d56Sopenharmony_ci pattern tree during the conversion to sets of leaf-to-root 197db96d56Sopenharmony_ci subpatterns""" 207db96d56Sopenharmony_ci 217db96d56Sopenharmony_ci def __init__(self, type=None, name=None): 227db96d56Sopenharmony_ci self.type = type 237db96d56Sopenharmony_ci self.name = name 247db96d56Sopenharmony_ci self.children = [] 257db96d56Sopenharmony_ci self.leaf = False 267db96d56Sopenharmony_ci self.parent = None 277db96d56Sopenharmony_ci self.alternatives = [] 287db96d56Sopenharmony_ci self.group = [] 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_ci def __repr__(self): 317db96d56Sopenharmony_ci return str(self.type) + ' ' + str(self.name) 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci def leaf_to_root(self): 347db96d56Sopenharmony_ci """Internal method. Returns a characteristic path of the 357db96d56Sopenharmony_ci pattern tree. This method must be run for all leaves until the 367db96d56Sopenharmony_ci linear subpatterns are merged into a single""" 377db96d56Sopenharmony_ci node = self 387db96d56Sopenharmony_ci subp = [] 397db96d56Sopenharmony_ci while node: 407db96d56Sopenharmony_ci if node.type == TYPE_ALTERNATIVES: 417db96d56Sopenharmony_ci node.alternatives.append(subp) 427db96d56Sopenharmony_ci if len(node.alternatives) == len(node.children): 437db96d56Sopenharmony_ci #last alternative 447db96d56Sopenharmony_ci subp = [tuple(node.alternatives)] 457db96d56Sopenharmony_ci node.alternatives = [] 467db96d56Sopenharmony_ci node = node.parent 477db96d56Sopenharmony_ci continue 487db96d56Sopenharmony_ci else: 497db96d56Sopenharmony_ci node = node.parent 507db96d56Sopenharmony_ci subp = None 517db96d56Sopenharmony_ci break 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci if node.type == TYPE_GROUP: 547db96d56Sopenharmony_ci node.group.append(subp) 557db96d56Sopenharmony_ci #probably should check the number of leaves 567db96d56Sopenharmony_ci if len(node.group) == len(node.children): 577db96d56Sopenharmony_ci subp = get_characteristic_subpattern(node.group) 587db96d56Sopenharmony_ci node.group = [] 597db96d56Sopenharmony_ci node = node.parent 607db96d56Sopenharmony_ci continue 617db96d56Sopenharmony_ci else: 627db96d56Sopenharmony_ci node = node.parent 637db96d56Sopenharmony_ci subp = None 647db96d56Sopenharmony_ci break 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci if node.type == token_labels.NAME and node.name: 677db96d56Sopenharmony_ci #in case of type=name, use the name instead 687db96d56Sopenharmony_ci subp.append(node.name) 697db96d56Sopenharmony_ci else: 707db96d56Sopenharmony_ci subp.append(node.type) 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci node = node.parent 737db96d56Sopenharmony_ci return subp 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci def get_linear_subpattern(self): 767db96d56Sopenharmony_ci """Drives the leaf_to_root method. The reason that 777db96d56Sopenharmony_ci leaf_to_root must be run multiple times is because we need to 787db96d56Sopenharmony_ci reject 'group' matches; for example the alternative form 797db96d56Sopenharmony_ci (a | b c) creates a group [b c] that needs to be matched. Since 807db96d56Sopenharmony_ci matching multiple linear patterns overcomes the automaton's 817db96d56Sopenharmony_ci capabilities, leaf_to_root merges each group into a single 827db96d56Sopenharmony_ci choice based on 'characteristic'ity, 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_ci i.e. (a|b c) -> (a|b) if b more characteristic than c 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci Returns: The most 'characteristic'(as defined by 877db96d56Sopenharmony_ci get_characteristic_subpattern) path for the compiled pattern 887db96d56Sopenharmony_ci tree. 897db96d56Sopenharmony_ci """ 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_ci for l in self.leaves(): 927db96d56Sopenharmony_ci subp = l.leaf_to_root() 937db96d56Sopenharmony_ci if subp: 947db96d56Sopenharmony_ci return subp 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci def leaves(self): 977db96d56Sopenharmony_ci "Generator that returns the leaves of the tree" 987db96d56Sopenharmony_ci for child in self.children: 997db96d56Sopenharmony_ci yield from child.leaves() 1007db96d56Sopenharmony_ci if not self.children: 1017db96d56Sopenharmony_ci yield self 1027db96d56Sopenharmony_ci 1037db96d56Sopenharmony_cidef reduce_tree(node, parent=None): 1047db96d56Sopenharmony_ci """ 1057db96d56Sopenharmony_ci Internal function. Reduces a compiled pattern tree to an 1067db96d56Sopenharmony_ci intermediate representation suitable for feeding the 1077db96d56Sopenharmony_ci automaton. This also trims off any optional pattern elements(like 1087db96d56Sopenharmony_ci [a], a*). 1097db96d56Sopenharmony_ci """ 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci new_node = None 1127db96d56Sopenharmony_ci #switch on the node type 1137db96d56Sopenharmony_ci if node.type == syms.Matcher: 1147db96d56Sopenharmony_ci #skip 1157db96d56Sopenharmony_ci node = node.children[0] 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci if node.type == syms.Alternatives : 1187db96d56Sopenharmony_ci #2 cases 1197db96d56Sopenharmony_ci if len(node.children) <= 2: 1207db96d56Sopenharmony_ci #just a single 'Alternative', skip this node 1217db96d56Sopenharmony_ci new_node = reduce_tree(node.children[0], parent) 1227db96d56Sopenharmony_ci else: 1237db96d56Sopenharmony_ci #real alternatives 1247db96d56Sopenharmony_ci new_node = MinNode(type=TYPE_ALTERNATIVES) 1257db96d56Sopenharmony_ci #skip odd children('|' tokens) 1267db96d56Sopenharmony_ci for child in node.children: 1277db96d56Sopenharmony_ci if node.children.index(child)%2: 1287db96d56Sopenharmony_ci continue 1297db96d56Sopenharmony_ci reduced = reduce_tree(child, new_node) 1307db96d56Sopenharmony_ci if reduced is not None: 1317db96d56Sopenharmony_ci new_node.children.append(reduced) 1327db96d56Sopenharmony_ci elif node.type == syms.Alternative: 1337db96d56Sopenharmony_ci if len(node.children) > 1: 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_ci new_node = MinNode(type=TYPE_GROUP) 1367db96d56Sopenharmony_ci for child in node.children: 1377db96d56Sopenharmony_ci reduced = reduce_tree(child, new_node) 1387db96d56Sopenharmony_ci if reduced: 1397db96d56Sopenharmony_ci new_node.children.append(reduced) 1407db96d56Sopenharmony_ci if not new_node.children: 1417db96d56Sopenharmony_ci # delete the group if all of the children were reduced to None 1427db96d56Sopenharmony_ci new_node = None 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci else: 1457db96d56Sopenharmony_ci new_node = reduce_tree(node.children[0], parent) 1467db96d56Sopenharmony_ci 1477db96d56Sopenharmony_ci elif node.type == syms.Unit: 1487db96d56Sopenharmony_ci if (isinstance(node.children[0], pytree.Leaf) and 1497db96d56Sopenharmony_ci node.children[0].value == '('): 1507db96d56Sopenharmony_ci #skip parentheses 1517db96d56Sopenharmony_ci return reduce_tree(node.children[1], parent) 1527db96d56Sopenharmony_ci if ((isinstance(node.children[0], pytree.Leaf) and 1537db96d56Sopenharmony_ci node.children[0].value == '[') 1547db96d56Sopenharmony_ci or 1557db96d56Sopenharmony_ci (len(node.children)>1 and 1567db96d56Sopenharmony_ci hasattr(node.children[1], "value") and 1577db96d56Sopenharmony_ci node.children[1].value == '[')): 1587db96d56Sopenharmony_ci #skip whole unit if its optional 1597db96d56Sopenharmony_ci return None 1607db96d56Sopenharmony_ci 1617db96d56Sopenharmony_ci leaf = True 1627db96d56Sopenharmony_ci details_node = None 1637db96d56Sopenharmony_ci alternatives_node = None 1647db96d56Sopenharmony_ci has_repeater = False 1657db96d56Sopenharmony_ci repeater_node = None 1667db96d56Sopenharmony_ci has_variable_name = False 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci for child in node.children: 1697db96d56Sopenharmony_ci if child.type == syms.Details: 1707db96d56Sopenharmony_ci leaf = False 1717db96d56Sopenharmony_ci details_node = child 1727db96d56Sopenharmony_ci elif child.type == syms.Repeater: 1737db96d56Sopenharmony_ci has_repeater = True 1747db96d56Sopenharmony_ci repeater_node = child 1757db96d56Sopenharmony_ci elif child.type == syms.Alternatives: 1767db96d56Sopenharmony_ci alternatives_node = child 1777db96d56Sopenharmony_ci if hasattr(child, 'value') and child.value == '=': # variable name 1787db96d56Sopenharmony_ci has_variable_name = True 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci #skip variable name 1817db96d56Sopenharmony_ci if has_variable_name: 1827db96d56Sopenharmony_ci #skip variable name, '=' 1837db96d56Sopenharmony_ci name_leaf = node.children[2] 1847db96d56Sopenharmony_ci if hasattr(name_leaf, 'value') and name_leaf.value == '(': 1857db96d56Sopenharmony_ci # skip parenthesis 1867db96d56Sopenharmony_ci name_leaf = node.children[3] 1877db96d56Sopenharmony_ci else: 1887db96d56Sopenharmony_ci name_leaf = node.children[0] 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ci #set node type 1917db96d56Sopenharmony_ci if name_leaf.type == token_labels.NAME: 1927db96d56Sopenharmony_ci #(python) non-name or wildcard 1937db96d56Sopenharmony_ci if name_leaf.value == 'any': 1947db96d56Sopenharmony_ci new_node = MinNode(type=TYPE_ANY) 1957db96d56Sopenharmony_ci else: 1967db96d56Sopenharmony_ci if hasattr(token_labels, name_leaf.value): 1977db96d56Sopenharmony_ci new_node = MinNode(type=getattr(token_labels, name_leaf.value)) 1987db96d56Sopenharmony_ci else: 1997db96d56Sopenharmony_ci new_node = MinNode(type=getattr(pysyms, name_leaf.value)) 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci elif name_leaf.type == token_labels.STRING: 2027db96d56Sopenharmony_ci #(python) name or character; remove the apostrophes from 2037db96d56Sopenharmony_ci #the string value 2047db96d56Sopenharmony_ci name = name_leaf.value.strip("'") 2057db96d56Sopenharmony_ci if name in tokens: 2067db96d56Sopenharmony_ci new_node = MinNode(type=tokens[name]) 2077db96d56Sopenharmony_ci else: 2087db96d56Sopenharmony_ci new_node = MinNode(type=token_labels.NAME, name=name) 2097db96d56Sopenharmony_ci elif name_leaf.type == syms.Alternatives: 2107db96d56Sopenharmony_ci new_node = reduce_tree(alternatives_node, parent) 2117db96d56Sopenharmony_ci 2127db96d56Sopenharmony_ci #handle repeaters 2137db96d56Sopenharmony_ci if has_repeater: 2147db96d56Sopenharmony_ci if repeater_node.children[0].value == '*': 2157db96d56Sopenharmony_ci #reduce to None 2167db96d56Sopenharmony_ci new_node = None 2177db96d56Sopenharmony_ci elif repeater_node.children[0].value == '+': 2187db96d56Sopenharmony_ci #reduce to a single occurrence i.e. do nothing 2197db96d56Sopenharmony_ci pass 2207db96d56Sopenharmony_ci else: 2217db96d56Sopenharmony_ci #TODO: handle {min, max} repeaters 2227db96d56Sopenharmony_ci raise NotImplementedError 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci #add children 2257db96d56Sopenharmony_ci if details_node and new_node is not None: 2267db96d56Sopenharmony_ci for child in details_node.children[1:-1]: 2277db96d56Sopenharmony_ci #skip '<', '>' markers 2287db96d56Sopenharmony_ci reduced = reduce_tree(child, new_node) 2297db96d56Sopenharmony_ci if reduced is not None: 2307db96d56Sopenharmony_ci new_node.children.append(reduced) 2317db96d56Sopenharmony_ci if new_node: 2327db96d56Sopenharmony_ci new_node.parent = parent 2337db96d56Sopenharmony_ci return new_node 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci 2367db96d56Sopenharmony_cidef get_characteristic_subpattern(subpatterns): 2377db96d56Sopenharmony_ci """Picks the most characteristic from a list of linear patterns 2387db96d56Sopenharmony_ci Current order used is: 2397db96d56Sopenharmony_ci names > common_names > common_chars 2407db96d56Sopenharmony_ci """ 2417db96d56Sopenharmony_ci if not isinstance(subpatterns, list): 2427db96d56Sopenharmony_ci return subpatterns 2437db96d56Sopenharmony_ci if len(subpatterns)==1: 2447db96d56Sopenharmony_ci return subpatterns[0] 2457db96d56Sopenharmony_ci 2467db96d56Sopenharmony_ci # first pick out the ones containing variable names 2477db96d56Sopenharmony_ci subpatterns_with_names = [] 2487db96d56Sopenharmony_ci subpatterns_with_common_names = [] 2497db96d56Sopenharmony_ci common_names = ['in', 'for', 'if' , 'not', 'None'] 2507db96d56Sopenharmony_ci subpatterns_with_common_chars = [] 2517db96d56Sopenharmony_ci common_chars = "[]().,:" 2527db96d56Sopenharmony_ci for subpattern in subpatterns: 2537db96d56Sopenharmony_ci if any(rec_test(subpattern, lambda x: type(x) is str)): 2547db96d56Sopenharmony_ci if any(rec_test(subpattern, 2557db96d56Sopenharmony_ci lambda x: isinstance(x, str) and x in common_chars)): 2567db96d56Sopenharmony_ci subpatterns_with_common_chars.append(subpattern) 2577db96d56Sopenharmony_ci elif any(rec_test(subpattern, 2587db96d56Sopenharmony_ci lambda x: isinstance(x, str) and x in common_names)): 2597db96d56Sopenharmony_ci subpatterns_with_common_names.append(subpattern) 2607db96d56Sopenharmony_ci 2617db96d56Sopenharmony_ci else: 2627db96d56Sopenharmony_ci subpatterns_with_names.append(subpattern) 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_ci if subpatterns_with_names: 2657db96d56Sopenharmony_ci subpatterns = subpatterns_with_names 2667db96d56Sopenharmony_ci elif subpatterns_with_common_names: 2677db96d56Sopenharmony_ci subpatterns = subpatterns_with_common_names 2687db96d56Sopenharmony_ci elif subpatterns_with_common_chars: 2697db96d56Sopenharmony_ci subpatterns = subpatterns_with_common_chars 2707db96d56Sopenharmony_ci # of the remaining subpatterns pick out the longest one 2717db96d56Sopenharmony_ci return max(subpatterns, key=len) 2727db96d56Sopenharmony_ci 2737db96d56Sopenharmony_cidef rec_test(sequence, test_func): 2747db96d56Sopenharmony_ci """Tests test_func on all items of sequence and items of included 2757db96d56Sopenharmony_ci sub-iterables""" 2767db96d56Sopenharmony_ci for x in sequence: 2777db96d56Sopenharmony_ci if isinstance(x, (list, tuple)): 2787db96d56Sopenharmony_ci yield from rec_test(x, test_func) 2797db96d56Sopenharmony_ci else: 2807db96d56Sopenharmony_ci yield test_func(x) 281