17db96d56Sopenharmony_ci# Copyright 2006 Google, Inc. All Rights Reserved. 27db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement. 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci"""Pattern compiler. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciThe grammar is taken from PatternGrammar.txt. 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ciThe compiler compiles a pattern to a pytree.*Pattern instance. 97db96d56Sopenharmony_ci""" 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci__author__ = "Guido van Rossum <guido@python.org>" 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci# Python imports 147db96d56Sopenharmony_ciimport io 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ci# Fairly local imports 177db96d56Sopenharmony_cifrom .pgen2 import driver, literals, token, tokenize, parse, grammar 187db96d56Sopenharmony_ci 197db96d56Sopenharmony_ci# Really local imports 207db96d56Sopenharmony_cifrom . import pytree 217db96d56Sopenharmony_cifrom . import pygram 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ciclass PatternSyntaxError(Exception): 257db96d56Sopenharmony_ci pass 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_cidef tokenize_wrapper(input): 297db96d56Sopenharmony_ci """Tokenizes a string suppressing significant whitespace.""" 307db96d56Sopenharmony_ci skip = {token.NEWLINE, token.INDENT, token.DEDENT} 317db96d56Sopenharmony_ci tokens = tokenize.generate_tokens(io.StringIO(input).readline) 327db96d56Sopenharmony_ci for quintuple in tokens: 337db96d56Sopenharmony_ci type, value, start, end, line_text = quintuple 347db96d56Sopenharmony_ci if type not in skip: 357db96d56Sopenharmony_ci yield quintuple 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ciclass PatternCompiler(object): 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci def __init__(self, grammar_file=None): 417db96d56Sopenharmony_ci """Initializer. 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci Takes an optional alternative filename for the pattern grammar. 447db96d56Sopenharmony_ci """ 457db96d56Sopenharmony_ci if grammar_file is None: 467db96d56Sopenharmony_ci self.grammar = pygram.pattern_grammar 477db96d56Sopenharmony_ci self.syms = pygram.pattern_symbols 487db96d56Sopenharmony_ci else: 497db96d56Sopenharmony_ci self.grammar = driver.load_grammar(grammar_file) 507db96d56Sopenharmony_ci self.syms = pygram.Symbols(self.grammar) 517db96d56Sopenharmony_ci self.pygrammar = pygram.python_grammar 527db96d56Sopenharmony_ci self.pysyms = pygram.python_symbols 537db96d56Sopenharmony_ci self.driver = driver.Driver(self.grammar, convert=pattern_convert) 547db96d56Sopenharmony_ci 557db96d56Sopenharmony_ci def compile_pattern(self, input, debug=False, with_tree=False): 567db96d56Sopenharmony_ci """Compiles a pattern string to a nested pytree.*Pattern object.""" 577db96d56Sopenharmony_ci tokens = tokenize_wrapper(input) 587db96d56Sopenharmony_ci try: 597db96d56Sopenharmony_ci root = self.driver.parse_tokens(tokens, debug=debug) 607db96d56Sopenharmony_ci except parse.ParseError as e: 617db96d56Sopenharmony_ci raise PatternSyntaxError(str(e)) from None 627db96d56Sopenharmony_ci if with_tree: 637db96d56Sopenharmony_ci return self.compile_node(root), root 647db96d56Sopenharmony_ci else: 657db96d56Sopenharmony_ci return self.compile_node(root) 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci def compile_node(self, node): 687db96d56Sopenharmony_ci """Compiles a node, recursively. 697db96d56Sopenharmony_ci 707db96d56Sopenharmony_ci This is one big switch on the node type. 717db96d56Sopenharmony_ci """ 727db96d56Sopenharmony_ci # XXX Optimize certain Wildcard-containing-Wildcard patterns 737db96d56Sopenharmony_ci # that can be merged 747db96d56Sopenharmony_ci if node.type == self.syms.Matcher: 757db96d56Sopenharmony_ci node = node.children[0] # Avoid unneeded recursion 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci if node.type == self.syms.Alternatives: 787db96d56Sopenharmony_ci # Skip the odd children since they are just '|' tokens 797db96d56Sopenharmony_ci alts = [self.compile_node(ch) for ch in node.children[::2]] 807db96d56Sopenharmony_ci if len(alts) == 1: 817db96d56Sopenharmony_ci return alts[0] 827db96d56Sopenharmony_ci p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) 837db96d56Sopenharmony_ci return p.optimize() 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ci if node.type == self.syms.Alternative: 867db96d56Sopenharmony_ci units = [self.compile_node(ch) for ch in node.children] 877db96d56Sopenharmony_ci if len(units) == 1: 887db96d56Sopenharmony_ci return units[0] 897db96d56Sopenharmony_ci p = pytree.WildcardPattern([units], min=1, max=1) 907db96d56Sopenharmony_ci return p.optimize() 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ci if node.type == self.syms.NegatedUnit: 937db96d56Sopenharmony_ci pattern = self.compile_basic(node.children[1:]) 947db96d56Sopenharmony_ci p = pytree.NegatedPattern(pattern) 957db96d56Sopenharmony_ci return p.optimize() 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci assert node.type == self.syms.Unit 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ci name = None 1007db96d56Sopenharmony_ci nodes = node.children 1017db96d56Sopenharmony_ci if len(nodes) >= 3 and nodes[1].type == token.EQUAL: 1027db96d56Sopenharmony_ci name = nodes[0].value 1037db96d56Sopenharmony_ci nodes = nodes[2:] 1047db96d56Sopenharmony_ci repeat = None 1057db96d56Sopenharmony_ci if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: 1067db96d56Sopenharmony_ci repeat = nodes[-1] 1077db96d56Sopenharmony_ci nodes = nodes[:-1] 1087db96d56Sopenharmony_ci 1097db96d56Sopenharmony_ci # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] 1107db96d56Sopenharmony_ci pattern = self.compile_basic(nodes, repeat) 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci if repeat is not None: 1137db96d56Sopenharmony_ci assert repeat.type == self.syms.Repeater 1147db96d56Sopenharmony_ci children = repeat.children 1157db96d56Sopenharmony_ci child = children[0] 1167db96d56Sopenharmony_ci if child.type == token.STAR: 1177db96d56Sopenharmony_ci min = 0 1187db96d56Sopenharmony_ci max = pytree.HUGE 1197db96d56Sopenharmony_ci elif child.type == token.PLUS: 1207db96d56Sopenharmony_ci min = 1 1217db96d56Sopenharmony_ci max = pytree.HUGE 1227db96d56Sopenharmony_ci elif child.type == token.LBRACE: 1237db96d56Sopenharmony_ci assert children[-1].type == token.RBRACE 1247db96d56Sopenharmony_ci assert len(children) in (3, 5) 1257db96d56Sopenharmony_ci min = max = self.get_int(children[1]) 1267db96d56Sopenharmony_ci if len(children) == 5: 1277db96d56Sopenharmony_ci max = self.get_int(children[3]) 1287db96d56Sopenharmony_ci else: 1297db96d56Sopenharmony_ci assert False 1307db96d56Sopenharmony_ci if min != 1 or max != 1: 1317db96d56Sopenharmony_ci pattern = pattern.optimize() 1327db96d56Sopenharmony_ci pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci if name is not None: 1357db96d56Sopenharmony_ci pattern.name = name 1367db96d56Sopenharmony_ci return pattern.optimize() 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci def compile_basic(self, nodes, repeat=None): 1397db96d56Sopenharmony_ci # Compile STRING | NAME [Details] | (...) | [...] 1407db96d56Sopenharmony_ci assert len(nodes) >= 1 1417db96d56Sopenharmony_ci node = nodes[0] 1427db96d56Sopenharmony_ci if node.type == token.STRING: 1437db96d56Sopenharmony_ci value = str(literals.evalString(node.value)) 1447db96d56Sopenharmony_ci return pytree.LeafPattern(_type_of_literal(value), value) 1457db96d56Sopenharmony_ci elif node.type == token.NAME: 1467db96d56Sopenharmony_ci value = node.value 1477db96d56Sopenharmony_ci if value.isupper(): 1487db96d56Sopenharmony_ci if value not in TOKEN_MAP: 1497db96d56Sopenharmony_ci raise PatternSyntaxError("Invalid token: %r" % value) 1507db96d56Sopenharmony_ci if nodes[1:]: 1517db96d56Sopenharmony_ci raise PatternSyntaxError("Can't have details for token") 1527db96d56Sopenharmony_ci return pytree.LeafPattern(TOKEN_MAP[value]) 1537db96d56Sopenharmony_ci else: 1547db96d56Sopenharmony_ci if value == "any": 1557db96d56Sopenharmony_ci type = None 1567db96d56Sopenharmony_ci elif not value.startswith("_"): 1577db96d56Sopenharmony_ci type = getattr(self.pysyms, value, None) 1587db96d56Sopenharmony_ci if type is None: 1597db96d56Sopenharmony_ci raise PatternSyntaxError("Invalid symbol: %r" % value) 1607db96d56Sopenharmony_ci if nodes[1:]: # Details present 1617db96d56Sopenharmony_ci content = [self.compile_node(nodes[1].children[1])] 1627db96d56Sopenharmony_ci else: 1637db96d56Sopenharmony_ci content = None 1647db96d56Sopenharmony_ci return pytree.NodePattern(type, content) 1657db96d56Sopenharmony_ci elif node.value == "(": 1667db96d56Sopenharmony_ci return self.compile_node(nodes[1]) 1677db96d56Sopenharmony_ci elif node.value == "[": 1687db96d56Sopenharmony_ci assert repeat is None 1697db96d56Sopenharmony_ci subpattern = self.compile_node(nodes[1]) 1707db96d56Sopenharmony_ci return pytree.WildcardPattern([[subpattern]], min=0, max=1) 1717db96d56Sopenharmony_ci assert False, node 1727db96d56Sopenharmony_ci 1737db96d56Sopenharmony_ci def get_int(self, node): 1747db96d56Sopenharmony_ci assert node.type == token.NUMBER 1757db96d56Sopenharmony_ci return int(node.value) 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci 1787db96d56Sopenharmony_ci# Map named tokens to the type value for a LeafPattern 1797db96d56Sopenharmony_ciTOKEN_MAP = {"NAME": token.NAME, 1807db96d56Sopenharmony_ci "STRING": token.STRING, 1817db96d56Sopenharmony_ci "NUMBER": token.NUMBER, 1827db96d56Sopenharmony_ci "TOKEN": None} 1837db96d56Sopenharmony_ci 1847db96d56Sopenharmony_ci 1857db96d56Sopenharmony_cidef _type_of_literal(value): 1867db96d56Sopenharmony_ci if value[0].isalpha(): 1877db96d56Sopenharmony_ci return token.NAME 1887db96d56Sopenharmony_ci elif value in grammar.opmap: 1897db96d56Sopenharmony_ci return grammar.opmap[value] 1907db96d56Sopenharmony_ci else: 1917db96d56Sopenharmony_ci return None 1927db96d56Sopenharmony_ci 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_cidef pattern_convert(grammar, raw_node_info): 1957db96d56Sopenharmony_ci """Converts raw node information to a Node or Leaf instance.""" 1967db96d56Sopenharmony_ci type, value, context, children = raw_node_info 1977db96d56Sopenharmony_ci if children or type in grammar.number2symbol: 1987db96d56Sopenharmony_ci return pytree.Node(type, children, context=context) 1997db96d56Sopenharmony_ci else: 2007db96d56Sopenharmony_ci return pytree.Leaf(type, value, context=context) 2017db96d56Sopenharmony_ci 2027db96d56Sopenharmony_ci 2037db96d56Sopenharmony_cidef compile_pattern(pattern): 2047db96d56Sopenharmony_ci return PatternCompiler().compile_pattern(pattern) 205