17db96d56Sopenharmony_ci# Implementat marshal.loads() in pure Python
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ciimport ast
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_cifrom typing import Any, Tuple
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ciclass Type:
97db96d56Sopenharmony_ci    # Adapted from marshal.c
107db96d56Sopenharmony_ci    NULL                = ord('0')
117db96d56Sopenharmony_ci    NONE                = ord('N')
127db96d56Sopenharmony_ci    FALSE               = ord('F')
137db96d56Sopenharmony_ci    TRUE                = ord('T')
147db96d56Sopenharmony_ci    STOPITER            = ord('S')
157db96d56Sopenharmony_ci    ELLIPSIS            = ord('.')
167db96d56Sopenharmony_ci    INT                 = ord('i')
177db96d56Sopenharmony_ci    INT64               = ord('I')
187db96d56Sopenharmony_ci    FLOAT               = ord('f')
197db96d56Sopenharmony_ci    BINARY_FLOAT        = ord('g')
207db96d56Sopenharmony_ci    COMPLEX             = ord('x')
217db96d56Sopenharmony_ci    BINARY_COMPLEX      = ord('y')
227db96d56Sopenharmony_ci    LONG                = ord('l')
237db96d56Sopenharmony_ci    STRING              = ord('s')
247db96d56Sopenharmony_ci    INTERNED            = ord('t')
257db96d56Sopenharmony_ci    REF                 = ord('r')
267db96d56Sopenharmony_ci    TUPLE               = ord('(')
277db96d56Sopenharmony_ci    LIST                = ord('[')
287db96d56Sopenharmony_ci    DICT                = ord('{')
297db96d56Sopenharmony_ci    CODE                = ord('c')
307db96d56Sopenharmony_ci    UNICODE             = ord('u')
317db96d56Sopenharmony_ci    UNKNOWN             = ord('?')
327db96d56Sopenharmony_ci    SET                 = ord('<')
337db96d56Sopenharmony_ci    FROZENSET           = ord('>')
347db96d56Sopenharmony_ci    ASCII               = ord('a')
357db96d56Sopenharmony_ci    ASCII_INTERNED      = ord('A')
367db96d56Sopenharmony_ci    SMALL_TUPLE         = ord(')')
377db96d56Sopenharmony_ci    SHORT_ASCII         = ord('z')
387db96d56Sopenharmony_ci    SHORT_ASCII_INTERNED = ord('Z')
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ciFLAG_REF = 0x80  # with a type, add obj to index
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ciNULL = object()  # marker
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci# Cell kinds
467db96d56Sopenharmony_ciCO_FAST_LOCAL = 0x20
477db96d56Sopenharmony_ciCO_FAST_CELL = 0x40
487db96d56Sopenharmony_ciCO_FAST_FREE = 0x80
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ciclass Code:
527db96d56Sopenharmony_ci    def __init__(self, **kwds: Any):
537db96d56Sopenharmony_ci        self.__dict__.update(kwds)
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci    def __repr__(self) -> str:
567db96d56Sopenharmony_ci        return f"Code(**{self.__dict__})"
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    co_localsplusnames: Tuple[str]
597db96d56Sopenharmony_ci    co_localspluskinds: Tuple[int]
607db96d56Sopenharmony_ci
617db96d56Sopenharmony_ci    def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
627db96d56Sopenharmony_ci        varnames: list[str] = []
637db96d56Sopenharmony_ci        for name, kind in zip(self.co_localsplusnames,
647db96d56Sopenharmony_ci                              self.co_localspluskinds):
657db96d56Sopenharmony_ci            if kind & select_kind:
667db96d56Sopenharmony_ci                varnames.append(name)
677db96d56Sopenharmony_ci        return tuple(varnames)
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci    @property
707db96d56Sopenharmony_ci    def co_varnames(self) -> Tuple[str, ...]:
717db96d56Sopenharmony_ci        return self.get_localsplus_names(CO_FAST_LOCAL)
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci    @property
747db96d56Sopenharmony_ci    def co_cellvars(self) -> Tuple[str, ...]:
757db96d56Sopenharmony_ci        return self.get_localsplus_names(CO_FAST_CELL)
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci    @property
787db96d56Sopenharmony_ci    def co_freevars(self) -> Tuple[str, ...]:
797db96d56Sopenharmony_ci        return self.get_localsplus_names(CO_FAST_FREE)
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci    @property
827db96d56Sopenharmony_ci    def co_nlocals(self) -> int:
837db96d56Sopenharmony_ci        return len(self.co_varnames)
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci
867db96d56Sopenharmony_ciclass Reader:
877db96d56Sopenharmony_ci    # A fairly literal translation of the marshal reader.
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci    def __init__(self, data: bytes):
907db96d56Sopenharmony_ci        self.data: bytes = data
917db96d56Sopenharmony_ci        self.end: int = len(self.data)
927db96d56Sopenharmony_ci        self.pos: int = 0
937db96d56Sopenharmony_ci        self.refs: list[Any] = []
947db96d56Sopenharmony_ci        self.level: int = 0
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci    def r_string(self, n: int) -> bytes:
977db96d56Sopenharmony_ci        assert 0 <= n <= self.end - self.pos
987db96d56Sopenharmony_ci        buf = self.data[self.pos : self.pos + n]
997db96d56Sopenharmony_ci        self.pos += n
1007db96d56Sopenharmony_ci        return buf
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    def r_byte(self) -> int:
1037db96d56Sopenharmony_ci        buf = self.r_string(1)
1047db96d56Sopenharmony_ci        return buf[0]
1057db96d56Sopenharmony_ci
1067db96d56Sopenharmony_ci    def r_short(self) -> int:
1077db96d56Sopenharmony_ci        buf = self.r_string(2)
1087db96d56Sopenharmony_ci        x = buf[0]
1097db96d56Sopenharmony_ci        x |= buf[1] << 8
1107db96d56Sopenharmony_ci        x |= -(x & (1<<15))  # Sign-extend
1117db96d56Sopenharmony_ci        return x
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci    def r_long(self) -> int:
1147db96d56Sopenharmony_ci        buf = self.r_string(4)
1157db96d56Sopenharmony_ci        x = buf[0]
1167db96d56Sopenharmony_ci        x |= buf[1] << 8
1177db96d56Sopenharmony_ci        x |= buf[2] << 16
1187db96d56Sopenharmony_ci        x |= buf[3] << 24
1197db96d56Sopenharmony_ci        x |= -(x & (1<<31))  # Sign-extend
1207db96d56Sopenharmony_ci        return x
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci    def r_long64(self) -> int:
1237db96d56Sopenharmony_ci        buf = self.r_string(8)
1247db96d56Sopenharmony_ci        x = buf[0]
1257db96d56Sopenharmony_ci        x |= buf[1] << 8
1267db96d56Sopenharmony_ci        x |= buf[2] << 16
1277db96d56Sopenharmony_ci        x |= buf[3] << 24
1287db96d56Sopenharmony_ci        x |= buf[1] << 32
1297db96d56Sopenharmony_ci        x |= buf[1] << 40
1307db96d56Sopenharmony_ci        x |= buf[1] << 48
1317db96d56Sopenharmony_ci        x |= buf[1] << 56
1327db96d56Sopenharmony_ci        x |= -(x & (1<<63))  # Sign-extend
1337db96d56Sopenharmony_ci        return x
1347db96d56Sopenharmony_ci
1357db96d56Sopenharmony_ci    def r_PyLong(self) -> int:
1367db96d56Sopenharmony_ci        n = self.r_long()
1377db96d56Sopenharmony_ci        size = abs(n)
1387db96d56Sopenharmony_ci        x = 0
1397db96d56Sopenharmony_ci        # Pray this is right
1407db96d56Sopenharmony_ci        for i in range(size):
1417db96d56Sopenharmony_ci            x |= self.r_short() << i*15
1427db96d56Sopenharmony_ci        if n < 0:
1437db96d56Sopenharmony_ci            x = -x
1447db96d56Sopenharmony_ci        return x
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci    def r_float_bin(self) -> float:
1477db96d56Sopenharmony_ci        buf = self.r_string(8)
1487db96d56Sopenharmony_ci        import struct  # Lazy import to avoid breaking UNIX build
1497db96d56Sopenharmony_ci        return struct.unpack("d", buf)[0]
1507db96d56Sopenharmony_ci
1517db96d56Sopenharmony_ci    def r_float_str(self) -> float:
1527db96d56Sopenharmony_ci        n = self.r_byte()
1537db96d56Sopenharmony_ci        buf = self.r_string(n)
1547db96d56Sopenharmony_ci        return ast.literal_eval(buf.decode("ascii"))
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci    def r_ref_reserve(self, flag: int) -> int:
1577db96d56Sopenharmony_ci        if flag:
1587db96d56Sopenharmony_ci            idx = len(self.refs)
1597db96d56Sopenharmony_ci            self.refs.append(None)
1607db96d56Sopenharmony_ci            return idx
1617db96d56Sopenharmony_ci        else:
1627db96d56Sopenharmony_ci            return 0
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ci    def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
1657db96d56Sopenharmony_ci        if flag:
1667db96d56Sopenharmony_ci            self.refs[idx] = obj
1677db96d56Sopenharmony_ci        return obj
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_ci    def r_ref(self, obj: Any, flag: int) -> Any:
1707db96d56Sopenharmony_ci        assert flag & FLAG_REF
1717db96d56Sopenharmony_ci        self.refs.append(obj)
1727db96d56Sopenharmony_ci        return obj
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ci    def r_object(self) -> Any:
1757db96d56Sopenharmony_ci        old_level = self.level
1767db96d56Sopenharmony_ci        try:
1777db96d56Sopenharmony_ci            return self._r_object()
1787db96d56Sopenharmony_ci        finally:
1797db96d56Sopenharmony_ci            self.level = old_level
1807db96d56Sopenharmony_ci
1817db96d56Sopenharmony_ci    def _r_object(self) -> Any:
1827db96d56Sopenharmony_ci        code = self.r_byte()
1837db96d56Sopenharmony_ci        flag = code & FLAG_REF
1847db96d56Sopenharmony_ci        type = code & ~FLAG_REF
1857db96d56Sopenharmony_ci        # print("  "*self.level + f"{code} {flag} {type} {chr(type)!r}")
1867db96d56Sopenharmony_ci        self.level += 1
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ci        def R_REF(obj: Any) -> Any:
1897db96d56Sopenharmony_ci            if flag:
1907db96d56Sopenharmony_ci                obj = self.r_ref(obj, flag)
1917db96d56Sopenharmony_ci            return obj
1927db96d56Sopenharmony_ci
1937db96d56Sopenharmony_ci        if type == Type.NULL:
1947db96d56Sopenharmony_ci            return NULL
1957db96d56Sopenharmony_ci        elif type == Type.NONE:
1967db96d56Sopenharmony_ci            return None
1977db96d56Sopenharmony_ci        elif type == Type.ELLIPSIS:
1987db96d56Sopenharmony_ci            return Ellipsis
1997db96d56Sopenharmony_ci        elif type == Type.FALSE:
2007db96d56Sopenharmony_ci            return False
2017db96d56Sopenharmony_ci        elif type == Type.TRUE:
2027db96d56Sopenharmony_ci            return True
2037db96d56Sopenharmony_ci        elif type == Type.INT:
2047db96d56Sopenharmony_ci            return R_REF(self.r_long())
2057db96d56Sopenharmony_ci        elif type == Type.INT64:
2067db96d56Sopenharmony_ci            return R_REF(self.r_long64())
2077db96d56Sopenharmony_ci        elif type == Type.LONG:
2087db96d56Sopenharmony_ci            return R_REF(self.r_PyLong())
2097db96d56Sopenharmony_ci        elif type == Type.FLOAT:
2107db96d56Sopenharmony_ci            return R_REF(self.r_float_str())
2117db96d56Sopenharmony_ci        elif type == Type.BINARY_FLOAT:
2127db96d56Sopenharmony_ci            return R_REF(self.r_float_bin())
2137db96d56Sopenharmony_ci        elif type == Type.COMPLEX:
2147db96d56Sopenharmony_ci            return R_REF(complex(self.r_float_str(),
2157db96d56Sopenharmony_ci                                    self.r_float_str()))
2167db96d56Sopenharmony_ci        elif type == Type.BINARY_COMPLEX:
2177db96d56Sopenharmony_ci            return R_REF(complex(self.r_float_bin(),
2187db96d56Sopenharmony_ci                                    self.r_float_bin()))
2197db96d56Sopenharmony_ci        elif type == Type.STRING:
2207db96d56Sopenharmony_ci            n = self.r_long()
2217db96d56Sopenharmony_ci            return R_REF(self.r_string(n))
2227db96d56Sopenharmony_ci        elif type == Type.ASCII_INTERNED or type == Type.ASCII:
2237db96d56Sopenharmony_ci            n = self.r_long()
2247db96d56Sopenharmony_ci            return R_REF(self.r_string(n).decode("ascii"))
2257db96d56Sopenharmony_ci        elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
2267db96d56Sopenharmony_ci            n = self.r_byte()
2277db96d56Sopenharmony_ci            return R_REF(self.r_string(n).decode("ascii"))
2287db96d56Sopenharmony_ci        elif type == Type.INTERNED or type == Type.UNICODE:
2297db96d56Sopenharmony_ci            n = self.r_long()
2307db96d56Sopenharmony_ci            return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
2317db96d56Sopenharmony_ci        elif type == Type.SMALL_TUPLE:
2327db96d56Sopenharmony_ci            n = self.r_byte()
2337db96d56Sopenharmony_ci            idx = self.r_ref_reserve(flag)
2347db96d56Sopenharmony_ci            retval: Any = tuple(self.r_object() for _ in range(n))
2357db96d56Sopenharmony_ci            self.r_ref_insert(retval, idx, flag)
2367db96d56Sopenharmony_ci            return retval
2377db96d56Sopenharmony_ci        elif type == Type.TUPLE:
2387db96d56Sopenharmony_ci            n = self.r_long()
2397db96d56Sopenharmony_ci            idx = self.r_ref_reserve(flag)
2407db96d56Sopenharmony_ci            retval = tuple(self.r_object() for _ in range(n))
2417db96d56Sopenharmony_ci            self.r_ref_insert(retval, idx, flag)
2427db96d56Sopenharmony_ci            return retval
2437db96d56Sopenharmony_ci        elif type == Type.LIST:
2447db96d56Sopenharmony_ci            n = self.r_long()
2457db96d56Sopenharmony_ci            retval = R_REF([])
2467db96d56Sopenharmony_ci            for _ in range(n):
2477db96d56Sopenharmony_ci                retval.append(self.r_object())
2487db96d56Sopenharmony_ci            return retval
2497db96d56Sopenharmony_ci        elif type == Type.DICT:
2507db96d56Sopenharmony_ci            retval = R_REF({})
2517db96d56Sopenharmony_ci            while True:
2527db96d56Sopenharmony_ci                key = self.r_object()
2537db96d56Sopenharmony_ci                if key == NULL:
2547db96d56Sopenharmony_ci                    break
2557db96d56Sopenharmony_ci                val = self.r_object()
2567db96d56Sopenharmony_ci                retval[key] = val
2577db96d56Sopenharmony_ci            return retval
2587db96d56Sopenharmony_ci        elif type == Type.SET:
2597db96d56Sopenharmony_ci            n = self.r_long()
2607db96d56Sopenharmony_ci            retval = R_REF(set())
2617db96d56Sopenharmony_ci            for _ in range(n):
2627db96d56Sopenharmony_ci                v = self.r_object()
2637db96d56Sopenharmony_ci                retval.add(v)
2647db96d56Sopenharmony_ci            return retval
2657db96d56Sopenharmony_ci        elif type == Type.FROZENSET:
2667db96d56Sopenharmony_ci            n = self.r_long()
2677db96d56Sopenharmony_ci            s: set[Any] = set()
2687db96d56Sopenharmony_ci            idx = self.r_ref_reserve(flag)
2697db96d56Sopenharmony_ci            for _ in range(n):
2707db96d56Sopenharmony_ci                v = self.r_object()
2717db96d56Sopenharmony_ci                s.add(v)
2727db96d56Sopenharmony_ci            retval = frozenset(s)
2737db96d56Sopenharmony_ci            self.r_ref_insert(retval, idx, flag)
2747db96d56Sopenharmony_ci            return retval
2757db96d56Sopenharmony_ci        elif type == Type.CODE:
2767db96d56Sopenharmony_ci            retval = R_REF(Code())
2777db96d56Sopenharmony_ci            retval.co_argcount = self.r_long()
2787db96d56Sopenharmony_ci            retval.co_posonlyargcount = self.r_long()
2797db96d56Sopenharmony_ci            retval.co_kwonlyargcount = self.r_long()
2807db96d56Sopenharmony_ci            retval.co_stacksize = self.r_long()
2817db96d56Sopenharmony_ci            retval.co_flags = self.r_long()
2827db96d56Sopenharmony_ci            retval.co_code = self.r_object()
2837db96d56Sopenharmony_ci            retval.co_consts = self.r_object()
2847db96d56Sopenharmony_ci            retval.co_names = self.r_object()
2857db96d56Sopenharmony_ci            retval.co_localsplusnames = self.r_object()
2867db96d56Sopenharmony_ci            retval.co_localspluskinds = self.r_object()
2877db96d56Sopenharmony_ci            retval.co_filename = self.r_object()
2887db96d56Sopenharmony_ci            retval.co_name = self.r_object()
2897db96d56Sopenharmony_ci            retval.co_qualname = self.r_object()
2907db96d56Sopenharmony_ci            retval.co_firstlineno = self.r_long()
2917db96d56Sopenharmony_ci            retval.co_linetable = self.r_object()
2927db96d56Sopenharmony_ci            retval.co_exceptiontable = self.r_object()
2937db96d56Sopenharmony_ci            return retval
2947db96d56Sopenharmony_ci        elif type == Type.REF:
2957db96d56Sopenharmony_ci            n = self.r_long()
2967db96d56Sopenharmony_ci            retval = self.refs[n]
2977db96d56Sopenharmony_ci            assert retval is not None
2987db96d56Sopenharmony_ci            return retval
2997db96d56Sopenharmony_ci        else:
3007db96d56Sopenharmony_ci            breakpoint()
3017db96d56Sopenharmony_ci            raise AssertionError(f"Unknown type {type} {chr(type)!r}")
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci
3047db96d56Sopenharmony_cidef loads(data: bytes) -> Any:
3057db96d56Sopenharmony_ci    assert isinstance(data, bytes)
3067db96d56Sopenharmony_ci    r = Reader(data)
3077db96d56Sopenharmony_ci    return r.r_object()
3087db96d56Sopenharmony_ci
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_cidef main():
3117db96d56Sopenharmony_ci    # Test
3127db96d56Sopenharmony_ci    import marshal, pprint
3137db96d56Sopenharmony_ci    sample = {'foo': {(42, "bar", 3.14)}}
3147db96d56Sopenharmony_ci    data = marshal.dumps(sample)
3157db96d56Sopenharmony_ci    retval = loads(data)
3167db96d56Sopenharmony_ci    assert retval == sample, retval
3177db96d56Sopenharmony_ci    sample = main.__code__
3187db96d56Sopenharmony_ci    data = marshal.dumps(sample)
3197db96d56Sopenharmony_ci    retval = loads(data)
3207db96d56Sopenharmony_ci    assert isinstance(retval, Code), retval
3217db96d56Sopenharmony_ci    pprint.pprint(retval.__dict__)
3227db96d56Sopenharmony_ci
3237db96d56Sopenharmony_ci
3247db96d56Sopenharmony_ciif __name__ == "__main__":
3257db96d56Sopenharmony_ci    main()
326