17db96d56Sopenharmony_ci# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
27db96d56Sopenharmony_ci# Licensed to PSF under a Contributor Agreement.
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci"""This module defines the data structures used to represent a grammar.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThese are a bit arcane because they are derived from the data
77db96d56Sopenharmony_cistructures used by Python's 'pgen' parser generator.
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ciThere's also a table here mapping operators to their names in the
107db96d56Sopenharmony_citoken module; the Python tokenize module reports all operators as the
117db96d56Sopenharmony_cifallback token code OP, but the parser needs the actual token code.
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci"""
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ci# Python imports
167db96d56Sopenharmony_ciimport pickle
177db96d56Sopenharmony_ci
187db96d56Sopenharmony_ci# Local imports
197db96d56Sopenharmony_cifrom . import token
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ciclass Grammar(object):
237db96d56Sopenharmony_ci    """Pgen parsing tables conversion class.
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ci    Once initialized, this class supplies the grammar tables for the
267db96d56Sopenharmony_ci    parsing engine implemented by parse.py.  The parsing engine
277db96d56Sopenharmony_ci    accesses the instance variables directly.  The class here does not
287db96d56Sopenharmony_ci    provide initialization of the tables; several subclasses exist to
297db96d56Sopenharmony_ci    do this (see the conv and pgen modules).
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci    The load() method reads the tables from a pickle file, which is
327db96d56Sopenharmony_ci    much faster than the other ways offered by subclasses.  The pickle
337db96d56Sopenharmony_ci    file is written by calling dump() (after loading the grammar
347db96d56Sopenharmony_ci    tables using a subclass).  The report() method prints a readable
357db96d56Sopenharmony_ci    representation of the tables to stdout, for debugging.
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci    The instance variables are as follows:
387db96d56Sopenharmony_ci
397db96d56Sopenharmony_ci    symbol2number -- a dict mapping symbol names to numbers.  Symbol
407db96d56Sopenharmony_ci                     numbers are always 256 or higher, to distinguish
417db96d56Sopenharmony_ci                     them from token numbers, which are between 0 and
427db96d56Sopenharmony_ci                     255 (inclusive).
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci    number2symbol -- a dict mapping numbers to symbol names;
457db96d56Sopenharmony_ci                     these two are each other's inverse.
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci    states        -- a list of DFAs, where each DFA is a list of
487db96d56Sopenharmony_ci                     states, each state is a list of arcs, and each
497db96d56Sopenharmony_ci                     arc is a (i, j) pair where i is a label and j is
507db96d56Sopenharmony_ci                     a state number.  The DFA number is the index into
517db96d56Sopenharmony_ci                     this list.  (This name is slightly confusing.)
527db96d56Sopenharmony_ci                     Final states are represented by a special arc of
537db96d56Sopenharmony_ci                     the form (0, j) where j is its own state number.
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci    dfas          -- a dict mapping symbol numbers to (DFA, first)
567db96d56Sopenharmony_ci                     pairs, where DFA is an item from the states list
577db96d56Sopenharmony_ci                     above, and first is a set of tokens that can
587db96d56Sopenharmony_ci                     begin this grammar rule (represented by a dict
597db96d56Sopenharmony_ci                     whose values are always 1).
607db96d56Sopenharmony_ci
617db96d56Sopenharmony_ci    labels        -- a list of (x, y) pairs where x is either a token
627db96d56Sopenharmony_ci                     number or a symbol number, and y is either None
637db96d56Sopenharmony_ci                     or a string; the strings are keywords.  The label
647db96d56Sopenharmony_ci                     number is the index in this list; label numbers
657db96d56Sopenharmony_ci                     are used to mark state transitions (arcs) in the
667db96d56Sopenharmony_ci                     DFAs.
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci    start         -- the number of the grammar's start symbol.
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci    keywords      -- a dict mapping keyword strings to arc labels.
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    tokens        -- a dict mapping token numbers to arc labels.
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_ci    """
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ci    def __init__(self):
777db96d56Sopenharmony_ci        self.symbol2number = {}
787db96d56Sopenharmony_ci        self.number2symbol = {}
797db96d56Sopenharmony_ci        self.states = []
807db96d56Sopenharmony_ci        self.dfas = {}
817db96d56Sopenharmony_ci        self.labels = [(0, "EMPTY")]
827db96d56Sopenharmony_ci        self.keywords = {}
837db96d56Sopenharmony_ci        self.tokens = {}
847db96d56Sopenharmony_ci        self.symbol2label = {}
857db96d56Sopenharmony_ci        self.start = 256
867db96d56Sopenharmony_ci
877db96d56Sopenharmony_ci    def dump(self, filename):
887db96d56Sopenharmony_ci        """Dump the grammar tables to a pickle file."""
897db96d56Sopenharmony_ci        with open(filename, "wb") as f:
907db96d56Sopenharmony_ci            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    def load(self, filename):
937db96d56Sopenharmony_ci        """Load the grammar tables from a pickle file."""
947db96d56Sopenharmony_ci        with open(filename, "rb") as f:
957db96d56Sopenharmony_ci            d = pickle.load(f)
967db96d56Sopenharmony_ci        self.__dict__.update(d)
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    def loads(self, pkl):
997db96d56Sopenharmony_ci        """Load the grammar tables from a pickle bytes object."""
1007db96d56Sopenharmony_ci        self.__dict__.update(pickle.loads(pkl))
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    def copy(self):
1037db96d56Sopenharmony_ci        """
1047db96d56Sopenharmony_ci        Copy the grammar.
1057db96d56Sopenharmony_ci        """
1067db96d56Sopenharmony_ci        new = self.__class__()
1077db96d56Sopenharmony_ci        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
1087db96d56Sopenharmony_ci                          "tokens", "symbol2label"):
1097db96d56Sopenharmony_ci            setattr(new, dict_attr, getattr(self, dict_attr).copy())
1107db96d56Sopenharmony_ci        new.labels = self.labels[:]
1117db96d56Sopenharmony_ci        new.states = self.states[:]
1127db96d56Sopenharmony_ci        new.start = self.start
1137db96d56Sopenharmony_ci        return new
1147db96d56Sopenharmony_ci
1157db96d56Sopenharmony_ci    def report(self):
1167db96d56Sopenharmony_ci        """Dump the grammar tables to standard output, for debugging."""
1177db96d56Sopenharmony_ci        from pprint import pprint
1187db96d56Sopenharmony_ci        print("s2n")
1197db96d56Sopenharmony_ci        pprint(self.symbol2number)
1207db96d56Sopenharmony_ci        print("n2s")
1217db96d56Sopenharmony_ci        pprint(self.number2symbol)
1227db96d56Sopenharmony_ci        print("states")
1237db96d56Sopenharmony_ci        pprint(self.states)
1247db96d56Sopenharmony_ci        print("dfas")
1257db96d56Sopenharmony_ci        pprint(self.dfas)
1267db96d56Sopenharmony_ci        print("labels")
1277db96d56Sopenharmony_ci        pprint(self.labels)
1287db96d56Sopenharmony_ci        print("start", self.start)
1297db96d56Sopenharmony_ci
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci# Map from operator to number (since tokenize doesn't do this)
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_ciopmap_raw = """
1347db96d56Sopenharmony_ci( LPAR
1357db96d56Sopenharmony_ci) RPAR
1367db96d56Sopenharmony_ci[ LSQB
1377db96d56Sopenharmony_ci] RSQB
1387db96d56Sopenharmony_ci: COLON
1397db96d56Sopenharmony_ci, COMMA
1407db96d56Sopenharmony_ci; SEMI
1417db96d56Sopenharmony_ci+ PLUS
1427db96d56Sopenharmony_ci- MINUS
1437db96d56Sopenharmony_ci* STAR
1447db96d56Sopenharmony_ci/ SLASH
1457db96d56Sopenharmony_ci| VBAR
1467db96d56Sopenharmony_ci& AMPER
1477db96d56Sopenharmony_ci< LESS
1487db96d56Sopenharmony_ci> GREATER
1497db96d56Sopenharmony_ci= EQUAL
1507db96d56Sopenharmony_ci. DOT
1517db96d56Sopenharmony_ci% PERCENT
1527db96d56Sopenharmony_ci` BACKQUOTE
1537db96d56Sopenharmony_ci{ LBRACE
1547db96d56Sopenharmony_ci} RBRACE
1557db96d56Sopenharmony_ci@ AT
1567db96d56Sopenharmony_ci@= ATEQUAL
1577db96d56Sopenharmony_ci== EQEQUAL
1587db96d56Sopenharmony_ci!= NOTEQUAL
1597db96d56Sopenharmony_ci<> NOTEQUAL
1607db96d56Sopenharmony_ci<= LESSEQUAL
1617db96d56Sopenharmony_ci>= GREATEREQUAL
1627db96d56Sopenharmony_ci~ TILDE
1637db96d56Sopenharmony_ci^ CIRCUMFLEX
1647db96d56Sopenharmony_ci<< LEFTSHIFT
1657db96d56Sopenharmony_ci>> RIGHTSHIFT
1667db96d56Sopenharmony_ci** DOUBLESTAR
1677db96d56Sopenharmony_ci+= PLUSEQUAL
1687db96d56Sopenharmony_ci-= MINEQUAL
1697db96d56Sopenharmony_ci*= STAREQUAL
1707db96d56Sopenharmony_ci/= SLASHEQUAL
1717db96d56Sopenharmony_ci%= PERCENTEQUAL
1727db96d56Sopenharmony_ci&= AMPEREQUAL
1737db96d56Sopenharmony_ci|= VBAREQUAL
1747db96d56Sopenharmony_ci^= CIRCUMFLEXEQUAL
1757db96d56Sopenharmony_ci<<= LEFTSHIFTEQUAL
1767db96d56Sopenharmony_ci>>= RIGHTSHIFTEQUAL
1777db96d56Sopenharmony_ci**= DOUBLESTAREQUAL
1787db96d56Sopenharmony_ci// DOUBLESLASH
1797db96d56Sopenharmony_ci//= DOUBLESLASHEQUAL
1807db96d56Sopenharmony_ci-> RARROW
1817db96d56Sopenharmony_ci:= COLONEQUAL
1827db96d56Sopenharmony_ci"""
1837db96d56Sopenharmony_ci
1847db96d56Sopenharmony_ciopmap = {}
1857db96d56Sopenharmony_cifor line in opmap_raw.splitlines():
1867db96d56Sopenharmony_ci    if line:
1877db96d56Sopenharmony_ci        op, name = line.split()
1887db96d56Sopenharmony_ci        opmap[op] = getattr(token, name)
1897db96d56Sopenharmony_cidel line, op, name
190