17db96d56Sopenharmony_ci# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 27db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement. 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci# Modifications: 57db96d56Sopenharmony_ci# Copyright 2006 Google, Inc. All Rights Reserved. 67db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement. 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ci"""Parser driver. 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ciThis provides a high-level interface to parse a file into a syntax tree. 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ci""" 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci__author__ = "Guido van Rossum <guido@python.org>" 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ci__all__ = ["Driver", "load_grammar"] 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ci# Python imports 197db96d56Sopenharmony_ciimport io 207db96d56Sopenharmony_ciimport os 217db96d56Sopenharmony_ciimport logging 227db96d56Sopenharmony_ciimport pkgutil 237db96d56Sopenharmony_ciimport sys 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ci# Pgen imports 267db96d56Sopenharmony_cifrom . import grammar, parse, token, tokenize, pgen 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ciclass Driver(object): 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ci def __init__(self, grammar, convert=None, logger=None): 327db96d56Sopenharmony_ci self.grammar = grammar 337db96d56Sopenharmony_ci if logger is None: 347db96d56Sopenharmony_ci logger = logging.getLogger() 357db96d56Sopenharmony_ci self.logger = logger 367db96d56Sopenharmony_ci self.convert = convert 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci def parse_tokens(self, tokens, debug=False): 397db96d56Sopenharmony_ci """Parse a series of tokens and return the syntax tree.""" 407db96d56Sopenharmony_ci # XXX Move the prefix computation into a wrapper around tokenize. 417db96d56Sopenharmony_ci p = parse.Parser(self.grammar, self.convert) 427db96d56Sopenharmony_ci p.setup() 437db96d56Sopenharmony_ci lineno = 1 447db96d56Sopenharmony_ci column = 0 457db96d56Sopenharmony_ci type = value = start = end = line_text = None 467db96d56Sopenharmony_ci prefix = "" 477db96d56Sopenharmony_ci for quintuple in tokens: 487db96d56Sopenharmony_ci type, value, start, end, line_text = quintuple 497db96d56Sopenharmony_ci if start != (lineno, column): 507db96d56Sopenharmony_ci assert (lineno, column) <= start, ((lineno, column), start) 517db96d56Sopenharmony_ci s_lineno, s_column = start 527db96d56Sopenharmony_ci if lineno < s_lineno: 537db96d56Sopenharmony_ci prefix += "\n" * (s_lineno - lineno) 547db96d56Sopenharmony_ci lineno = s_lineno 557db96d56Sopenharmony_ci column = 0 567db96d56Sopenharmony_ci if column < s_column: 577db96d56Sopenharmony_ci prefix += line_text[column:s_column] 587db96d56Sopenharmony_ci column = s_column 597db96d56Sopenharmony_ci if type in (tokenize.COMMENT, tokenize.NL): 607db96d56Sopenharmony_ci prefix += value 617db96d56Sopenharmony_ci lineno, column = end 627db96d56Sopenharmony_ci if value.endswith("\n"): 637db96d56Sopenharmony_ci lineno += 1 647db96d56Sopenharmony_ci column = 0 657db96d56Sopenharmony_ci continue 667db96d56Sopenharmony_ci if type == token.OP: 677db96d56Sopenharmony_ci type = grammar.opmap[value] 687db96d56Sopenharmony_ci if debug: 697db96d56Sopenharmony_ci self.logger.debug("%s %r (prefix=%r)", 707db96d56Sopenharmony_ci token.tok_name[type], value, prefix) 717db96d56Sopenharmony_ci if p.addtoken(type, value, (prefix, start)): 727db96d56Sopenharmony_ci if debug: 737db96d56Sopenharmony_ci self.logger.debug("Stop.") 747db96d56Sopenharmony_ci break 757db96d56Sopenharmony_ci prefix = "" 767db96d56Sopenharmony_ci lineno, column = end 777db96d56Sopenharmony_ci if value.endswith("\n"): 787db96d56Sopenharmony_ci lineno += 1 797db96d56Sopenharmony_ci column = 0 807db96d56Sopenharmony_ci else: 817db96d56Sopenharmony_ci # We never broke out -- EOF is too soon (how can this happen???) 827db96d56Sopenharmony_ci raise parse.ParseError("incomplete input", 837db96d56Sopenharmony_ci type, value, (prefix, start)) 847db96d56Sopenharmony_ci return p.rootnode 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci def parse_stream_raw(self, stream, debug=False): 877db96d56Sopenharmony_ci """Parse a stream and return the syntax tree.""" 887db96d56Sopenharmony_ci tokens = tokenize.generate_tokens(stream.readline) 897db96d56Sopenharmony_ci return self.parse_tokens(tokens, debug) 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_ci def parse_stream(self, stream, debug=False): 927db96d56Sopenharmony_ci """Parse a stream and return the syntax tree.""" 937db96d56Sopenharmony_ci return self.parse_stream_raw(stream, debug) 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ci def parse_file(self, filename, encoding=None, debug=False): 967db96d56Sopenharmony_ci """Parse a file and return the syntax tree.""" 977db96d56Sopenharmony_ci with io.open(filename, "r", encoding=encoding) as stream: 987db96d56Sopenharmony_ci return self.parse_stream(stream, debug) 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_ci def parse_string(self, text, debug=False): 1017db96d56Sopenharmony_ci """Parse a string and return the syntax tree.""" 1027db96d56Sopenharmony_ci tokens = tokenize.generate_tokens(io.StringIO(text).readline) 1037db96d56Sopenharmony_ci return self.parse_tokens(tokens, debug) 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci 1067db96d56Sopenharmony_cidef _generate_pickle_name(gt): 1077db96d56Sopenharmony_ci head, tail = os.path.splitext(gt) 1087db96d56Sopenharmony_ci if tail == ".txt": 1097db96d56Sopenharmony_ci tail = "" 1107db96d56Sopenharmony_ci return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_cidef load_grammar(gt="Grammar.txt", gp=None, 1147db96d56Sopenharmony_ci save=True, force=False, logger=None): 1157db96d56Sopenharmony_ci """Load the grammar (maybe from a pickle).""" 1167db96d56Sopenharmony_ci if logger is None: 1177db96d56Sopenharmony_ci logger = logging.getLogger() 1187db96d56Sopenharmony_ci gp = _generate_pickle_name(gt) if gp is None else gp 1197db96d56Sopenharmony_ci if force or not _newer(gp, gt): 1207db96d56Sopenharmony_ci logger.info("Generating grammar tables from %s", gt) 1217db96d56Sopenharmony_ci g = pgen.generate_grammar(gt) 1227db96d56Sopenharmony_ci if save: 1237db96d56Sopenharmony_ci logger.info("Writing grammar tables to %s", gp) 1247db96d56Sopenharmony_ci try: 1257db96d56Sopenharmony_ci g.dump(gp) 1267db96d56Sopenharmony_ci except OSError as e: 1277db96d56Sopenharmony_ci logger.info("Writing failed: %s", e) 1287db96d56Sopenharmony_ci else: 1297db96d56Sopenharmony_ci g = grammar.Grammar() 1307db96d56Sopenharmony_ci g.load(gp) 1317db96d56Sopenharmony_ci return g 1327db96d56Sopenharmony_ci 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_cidef _newer(a, b): 1357db96d56Sopenharmony_ci """Inquire whether file a was written since file b.""" 1367db96d56Sopenharmony_ci if not os.path.exists(a): 1377db96d56Sopenharmony_ci return False 1387db96d56Sopenharmony_ci if not os.path.exists(b): 1397db96d56Sopenharmony_ci return True 1407db96d56Sopenharmony_ci return os.path.getmtime(a) >= os.path.getmtime(b) 1417db96d56Sopenharmony_ci 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_cidef load_packaged_grammar(package, grammar_source): 1447db96d56Sopenharmony_ci """Normally, loads a pickled grammar by doing 1457db96d56Sopenharmony_ci pkgutil.get_data(package, pickled_grammar) 1467db96d56Sopenharmony_ci where *pickled_grammar* is computed from *grammar_source* by adding the 1477db96d56Sopenharmony_ci Python version and using a ``.pickle`` extension. 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci However, if *grammar_source* is an extant file, load_grammar(grammar_source) 1507db96d56Sopenharmony_ci is called instead. This facilitates using a packaged grammar file when needed 1517db96d56Sopenharmony_ci but preserves load_grammar's automatic regeneration behavior when possible. 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci """ 1547db96d56Sopenharmony_ci if os.path.isfile(grammar_source): 1557db96d56Sopenharmony_ci return load_grammar(grammar_source) 1567db96d56Sopenharmony_ci pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) 1577db96d56Sopenharmony_ci data = pkgutil.get_data(package, pickled_name) 1587db96d56Sopenharmony_ci g = grammar.Grammar() 1597db96d56Sopenharmony_ci g.loads(data) 1607db96d56Sopenharmony_ci return g 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_cidef main(*args): 1647db96d56Sopenharmony_ci """Main program, when run as a script: produce grammar pickle files. 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_ci Calls load_grammar for each argument, a path to a grammar text file. 1677db96d56Sopenharmony_ci """ 1687db96d56Sopenharmony_ci if not args: 1697db96d56Sopenharmony_ci args = sys.argv[1:] 1707db96d56Sopenharmony_ci logging.basicConfig(level=logging.INFO, stream=sys.stdout, 1717db96d56Sopenharmony_ci format='%(message)s') 1727db96d56Sopenharmony_ci for gt in args: 1737db96d56Sopenharmony_ci load_grammar(gt, save=True, force=True) 1747db96d56Sopenharmony_ci return True 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ciif __name__ == "__main__": 1777db96d56Sopenharmony_ci sys.exit(int(not main())) 178