17db96d56Sopenharmony_ci# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
27db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement.
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci"""Parser engine for the grammar tables generated by pgen.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThe grammar table must be loaded first.
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ciSee Parser/parser.c in the Python distribution for additional info on
97db96d56Sopenharmony_cihow this parsing engine works.
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ci"""
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci# Local imports
147db96d56Sopenharmony_cifrom . import token
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciclass ParseError(Exception):
177db96d56Sopenharmony_ci    """Exception to signal the parser is stuck."""
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ci    def __init__(self, msg, type, value, context):
207db96d56Sopenharmony_ci        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
217db96d56Sopenharmony_ci                           (msg, type, value, context))
227db96d56Sopenharmony_ci        self.msg = msg
237db96d56Sopenharmony_ci        self.type = type
247db96d56Sopenharmony_ci        self.value = value
257db96d56Sopenharmony_ci        self.context = context
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci    def __reduce__(self):
287db96d56Sopenharmony_ci        return type(self), (self.msg, self.type, self.value, self.context)
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_ciclass Parser(object):
317db96d56Sopenharmony_ci    """Parser engine.
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci    The proper usage sequence is:
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci    p = Parser(grammar, [converter])  # create instance
367db96d56Sopenharmony_ci    p.setup([start])                  # prepare for parsing
377db96d56Sopenharmony_ci    <for each input token>:
387db96d56Sopenharmony_ci        if p.addtoken(...):           # parse a token; may raise ParseError
397db96d56Sopenharmony_ci            break
407db96d56Sopenharmony_ci    root = p.rootnode                 # root of abstract syntax tree
417db96d56Sopenharmony_ci
427db96d56Sopenharmony_ci    A Parser instance may be reused by calling setup() repeatedly.
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci    A Parser instance contains state pertaining to the current token
457db96d56Sopenharmony_ci    sequence, and should not be used concurrently by different threads
467db96d56Sopenharmony_ci    to parse separate token sequences.
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci    See driver.py for how to get input tokens by tokenizing a file or
497db96d56Sopenharmony_ci    string.
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ci    Parsing is complete when addtoken() returns True; the root of the
527db96d56Sopenharmony_ci    abstract syntax tree can then be retrieved from the rootnode
537db96d56Sopenharmony_ci    instance variable.  When a syntax error occurs, addtoken() raises
547db96d56Sopenharmony_ci    the ParseError exception.  There is no error recovery; the parser
557db96d56Sopenharmony_ci    cannot be used after a syntax error was reported (but it can be
567db96d56Sopenharmony_ci    reinitialized by calling setup()).
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    """
597db96d56Sopenharmony_ci
607db96d56Sopenharmony_ci    def __init__(self, grammar, convert=None):
617db96d56Sopenharmony_ci        """Constructor.
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci        The grammar argument is a grammar.Grammar instance; see the
647db96d56Sopenharmony_ci        grammar module for more information.
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci        The parser is not ready yet for parsing; you must call the
677db96d56Sopenharmony_ci        setup() method to get it started.
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci        The optional convert argument is a function mapping concrete
707db96d56Sopenharmony_ci        syntax tree nodes to abstract syntax tree nodes.  If not
717db96d56Sopenharmony_ci        given, no conversion is done and the syntax tree produced is
727db96d56Sopenharmony_ci        the concrete syntax tree.  If given, it must be a function of
737db96d56Sopenharmony_ci        two arguments, the first being the grammar (a grammar.Grammar
747db96d56Sopenharmony_ci        instance), and the second being the concrete syntax tree node
757db96d56Sopenharmony_ci        to be converted.  The syntax tree is converted from the bottom
767db96d56Sopenharmony_ci        up.
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci        A concrete syntax tree node is a (type, value, context, nodes)
797db96d56Sopenharmony_ci        tuple, where type is the node type (a token or symbol number),
807db96d56Sopenharmony_ci        value is None for symbols and a string for tokens, context is
817db96d56Sopenharmony_ci        None or an opaque value used for error reporting (typically a
827db96d56Sopenharmony_ci        (lineno, offset) pair), and nodes is a list of children for
837db96d56Sopenharmony_ci        symbols, and None for tokens.
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci        An abstract syntax tree node may be anything; this is entirely
867db96d56Sopenharmony_ci        up to the converter function.
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci        """
897db96d56Sopenharmony_ci        self.grammar = grammar
907db96d56Sopenharmony_ci        self.convert = convert or (lambda grammar, node: node)
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    def setup(self, start=None):
937db96d56Sopenharmony_ci        """Prepare for parsing.
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci        This *must* be called before starting to parse.
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci        The optional argument is an alternative start symbol; it
987db96d56Sopenharmony_ci        defaults to the grammar's start symbol.
997db96d56Sopenharmony_ci
1007db96d56Sopenharmony_ci        You can use a Parser instance to parse any number of programs;
1017db96d56Sopenharmony_ci        each time you call setup() the parser is reset to an initial
1027db96d56Sopenharmony_ci        state determined by the (implicit or explicit) start symbol.
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci        """
1057db96d56Sopenharmony_ci        if start is None:
1067db96d56Sopenharmony_ci            start = self.grammar.start
1077db96d56Sopenharmony_ci        # Each stack entry is a tuple: (dfa, state, node).
1087db96d56Sopenharmony_ci        # A node is a tuple: (type, value, context, children),
1097db96d56Sopenharmony_ci        # where children is a list of nodes or None, and context may be None.
1107db96d56Sopenharmony_ci        newnode = (start, None, None, [])
1117db96d56Sopenharmony_ci        stackentry = (self.grammar.dfas[start], 0, newnode)
1127db96d56Sopenharmony_ci        self.stack = [stackentry]
1137db96d56Sopenharmony_ci        self.rootnode = None
1147db96d56Sopenharmony_ci        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
1157db96d56Sopenharmony_ci
1167db96d56Sopenharmony_ci    def addtoken(self, type, value, context):
1177db96d56Sopenharmony_ci        """Add a token; return True iff this is the end of the program."""
1187db96d56Sopenharmony_ci        # Map from token to label
1197db96d56Sopenharmony_ci        ilabel = self.classify(type, value, context)
1207db96d56Sopenharmony_ci        # Loop until the token is shifted; may raise exceptions
1217db96d56Sopenharmony_ci        while True:
1227db96d56Sopenharmony_ci            dfa, state, node = self.stack[-1]
1237db96d56Sopenharmony_ci            states, first = dfa
1247db96d56Sopenharmony_ci            arcs = states[state]
1257db96d56Sopenharmony_ci            # Look for a state with this label
1267db96d56Sopenharmony_ci            for i, newstate in arcs:
1277db96d56Sopenharmony_ci                t, v = self.grammar.labels[i]
1287db96d56Sopenharmony_ci                if ilabel == i:
1297db96d56Sopenharmony_ci                    # Look it up in the list of labels
1307db96d56Sopenharmony_ci                    assert t < 256
1317db96d56Sopenharmony_ci                    # Shift a token; we're done with it
1327db96d56Sopenharmony_ci                    self.shift(type, value, newstate, context)
1337db96d56Sopenharmony_ci                    # Pop while we are in an accept-only state
1347db96d56Sopenharmony_ci                    state = newstate
1357db96d56Sopenharmony_ci                    while states[state] == [(0, state)]:
1367db96d56Sopenharmony_ci                        self.pop()
1377db96d56Sopenharmony_ci                        if not self.stack:
1387db96d56Sopenharmony_ci                            # Done parsing!
1397db96d56Sopenharmony_ci                            return True
1407db96d56Sopenharmony_ci                        dfa, state, node = self.stack[-1]
1417db96d56Sopenharmony_ci                        states, first = dfa
1427db96d56Sopenharmony_ci                    # Done with this token
1437db96d56Sopenharmony_ci                    return False
1447db96d56Sopenharmony_ci                elif t >= 256:
1457db96d56Sopenharmony_ci                    # See if it's a symbol and if we're in its first set
1467db96d56Sopenharmony_ci                    itsdfa = self.grammar.dfas[t]
1477db96d56Sopenharmony_ci                    itsstates, itsfirst = itsdfa
1487db96d56Sopenharmony_ci                    if ilabel in itsfirst:
1497db96d56Sopenharmony_ci                        # Push a symbol
1507db96d56Sopenharmony_ci                        self.push(t, self.grammar.dfas[t], newstate, context)
1517db96d56Sopenharmony_ci                        break # To continue the outer while loop
1527db96d56Sopenharmony_ci            else:
1537db96d56Sopenharmony_ci                if (0, state) in arcs:
1547db96d56Sopenharmony_ci                    # An accepting state, pop it and try something else
1557db96d56Sopenharmony_ci                    self.pop()
1567db96d56Sopenharmony_ci                    if not self.stack:
1577db96d56Sopenharmony_ci                        # Done parsing, but another token is input
1587db96d56Sopenharmony_ci                        raise ParseError("too much input",
1597db96d56Sopenharmony_ci                                         type, value, context)
1607db96d56Sopenharmony_ci                else:
1617db96d56Sopenharmony_ci                    # No success finding a transition
1627db96d56Sopenharmony_ci                    raise ParseError("bad input", type, value, context)
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ci    def classify(self, type, value, context):
1657db96d56Sopenharmony_ci        """Turn a token into a label.  (Internal)"""
1667db96d56Sopenharmony_ci        if type == token.NAME:
1677db96d56Sopenharmony_ci            # Keep a listing of all used names
1687db96d56Sopenharmony_ci            self.used_names.add(value)
1697db96d56Sopenharmony_ci            # Check for reserved words
1707db96d56Sopenharmony_ci            ilabel = self.grammar.keywords.get(value)
1717db96d56Sopenharmony_ci            if ilabel is not None:
1727db96d56Sopenharmony_ci                return ilabel
1737db96d56Sopenharmony_ci        ilabel = self.grammar.tokens.get(type)
1747db96d56Sopenharmony_ci        if ilabel is None:
1757db96d56Sopenharmony_ci            raise ParseError("bad token", type, value, context)
1767db96d56Sopenharmony_ci        return ilabel
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci    def shift(self, type, value, newstate, context):
1797db96d56Sopenharmony_ci        """Shift a token.  (Internal)"""
1807db96d56Sopenharmony_ci        dfa, state, node = self.stack[-1]
1817db96d56Sopenharmony_ci        newnode = (type, value, context, None)
1827db96d56Sopenharmony_ci        newnode = self.convert(self.grammar, newnode)
1837db96d56Sopenharmony_ci        if newnode is not None:
1847db96d56Sopenharmony_ci            node[-1].append(newnode)
1857db96d56Sopenharmony_ci        self.stack[-1] = (dfa, newstate, node)
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_ci    def push(self, type, newdfa, newstate, context):
1887db96d56Sopenharmony_ci        """Push a nonterminal.  (Internal)"""
1897db96d56Sopenharmony_ci        dfa, state, node = self.stack[-1]
1907db96d56Sopenharmony_ci        newnode = (type, None, context, [])
1917db96d56Sopenharmony_ci        self.stack[-1] = (dfa, newstate, node)
1927db96d56Sopenharmony_ci        self.stack.append((newdfa, 0, newnode))
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci    def pop(self):
1957db96d56Sopenharmony_ci        """Pop a nonterminal.  (Internal)"""
1967db96d56Sopenharmony_ci        popdfa, popstate, popnode = self.stack.pop()
1977db96d56Sopenharmony_ci        newnode = self.convert(self.grammar, popnode)
1987db96d56Sopenharmony_ci        if newnode is not None:
1997db96d56Sopenharmony_ci            if self.stack:
2007db96d56Sopenharmony_ci                dfa, state, node = self.stack[-1]
2017db96d56Sopenharmony_ci                node[-1].append(newnode)
2027db96d56Sopenharmony_ci            else:
2037db96d56Sopenharmony_ci                self.rootnode = newnode
2047db96d56Sopenharmony_ci                self.rootnode.used_names = self.used_names
205