17db96d56Sopenharmony_ci# Implementat marshal.loads() in pure Python 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciimport ast 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_cifrom typing import Any, Tuple 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ciclass Type: 97db96d56Sopenharmony_ci # Adapted from marshal.c 107db96d56Sopenharmony_ci NULL = ord('0') 117db96d56Sopenharmony_ci NONE = ord('N') 127db96d56Sopenharmony_ci FALSE = ord('F') 137db96d56Sopenharmony_ci TRUE = ord('T') 147db96d56Sopenharmony_ci STOPITER = ord('S') 157db96d56Sopenharmony_ci ELLIPSIS = ord('.') 167db96d56Sopenharmony_ci INT = ord('i') 177db96d56Sopenharmony_ci INT64 = ord('I') 187db96d56Sopenharmony_ci FLOAT = ord('f') 197db96d56Sopenharmony_ci BINARY_FLOAT = ord('g') 207db96d56Sopenharmony_ci COMPLEX = ord('x') 217db96d56Sopenharmony_ci BINARY_COMPLEX = ord('y') 227db96d56Sopenharmony_ci LONG = ord('l') 237db96d56Sopenharmony_ci STRING = ord('s') 247db96d56Sopenharmony_ci INTERNED = ord('t') 257db96d56Sopenharmony_ci REF = ord('r') 267db96d56Sopenharmony_ci TUPLE = ord('(') 277db96d56Sopenharmony_ci LIST = ord('[') 287db96d56Sopenharmony_ci DICT = ord('{') 297db96d56Sopenharmony_ci CODE = ord('c') 307db96d56Sopenharmony_ci UNICODE = ord('u') 317db96d56Sopenharmony_ci UNKNOWN = ord('?') 327db96d56Sopenharmony_ci SET = ord('<') 337db96d56Sopenharmony_ci FROZENSET = ord('>') 347db96d56Sopenharmony_ci ASCII = ord('a') 357db96d56Sopenharmony_ci ASCII_INTERNED = ord('A') 367db96d56Sopenharmony_ci SMALL_TUPLE = ord(')') 377db96d56Sopenharmony_ci SHORT_ASCII = ord('z') 387db96d56Sopenharmony_ci SHORT_ASCII_INTERNED = ord('Z') 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ciFLAG_REF = 0x80 # with a type, add obj to index 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ciNULL = object() # marker 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci# Cell kinds 467db96d56Sopenharmony_ciCO_FAST_LOCAL = 0x20 477db96d56Sopenharmony_ciCO_FAST_CELL = 0x40 487db96d56Sopenharmony_ciCO_FAST_FREE = 0x80 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ciclass Code: 527db96d56Sopenharmony_ci def __init__(self, **kwds: Any): 537db96d56Sopenharmony_ci self.__dict__.update(kwds) 547db96d56Sopenharmony_ci 557db96d56Sopenharmony_ci def __repr__(self) -> str: 567db96d56Sopenharmony_ci return f"Code(**{self.__dict__})" 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci co_localsplusnames: Tuple[str] 597db96d56Sopenharmony_ci co_localspluskinds: Tuple[int] 607db96d56Sopenharmony_ci 617db96d56Sopenharmony_ci def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]: 627db96d56Sopenharmony_ci varnames: list[str] = [] 637db96d56Sopenharmony_ci for name, kind in zip(self.co_localsplusnames, 647db96d56Sopenharmony_ci self.co_localspluskinds): 657db96d56Sopenharmony_ci if kind & select_kind: 667db96d56Sopenharmony_ci varnames.append(name) 677db96d56Sopenharmony_ci return tuple(varnames) 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci @property 707db96d56Sopenharmony_ci def co_varnames(self) -> Tuple[str, ...]: 717db96d56Sopenharmony_ci return self.get_localsplus_names(CO_FAST_LOCAL) 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci @property 747db96d56Sopenharmony_ci def co_cellvars(self) -> Tuple[str, ...]: 757db96d56Sopenharmony_ci return self.get_localsplus_names(CO_FAST_CELL) 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci @property 787db96d56Sopenharmony_ci def co_freevars(self) -> Tuple[str, ...]: 797db96d56Sopenharmony_ci return self.get_localsplus_names(CO_FAST_FREE) 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci @property 827db96d56Sopenharmony_ci def co_nlocals(self) -> int: 837db96d56Sopenharmony_ci return len(self.co_varnames) 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ciclass Reader: 877db96d56Sopenharmony_ci # A fairly literal translation of the marshal reader. 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci def __init__(self, data: bytes): 907db96d56Sopenharmony_ci self.data: bytes = data 917db96d56Sopenharmony_ci self.end: int = len(self.data) 927db96d56Sopenharmony_ci self.pos: int = 0 937db96d56Sopenharmony_ci self.refs: list[Any] = [] 947db96d56Sopenharmony_ci self.level: int = 0 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci def r_string(self, n: int) -> bytes: 977db96d56Sopenharmony_ci assert 0 <= n <= self.end - self.pos 987db96d56Sopenharmony_ci buf = self.data[self.pos : self.pos + n] 997db96d56Sopenharmony_ci self.pos += n 1007db96d56Sopenharmony_ci return buf 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci def r_byte(self) -> int: 1037db96d56Sopenharmony_ci buf = self.r_string(1) 1047db96d56Sopenharmony_ci return buf[0] 1057db96d56Sopenharmony_ci 1067db96d56Sopenharmony_ci def r_short(self) -> int: 1077db96d56Sopenharmony_ci buf = self.r_string(2) 1087db96d56Sopenharmony_ci x = buf[0] 1097db96d56Sopenharmony_ci x |= buf[1] << 8 1107db96d56Sopenharmony_ci x |= -(x & (1<<15)) # Sign-extend 1117db96d56Sopenharmony_ci return x 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci def r_long(self) -> int: 1147db96d56Sopenharmony_ci buf = self.r_string(4) 1157db96d56Sopenharmony_ci x = buf[0] 1167db96d56Sopenharmony_ci x |= buf[1] << 8 1177db96d56Sopenharmony_ci x |= buf[2] << 16 1187db96d56Sopenharmony_ci x |= buf[3] << 24 1197db96d56Sopenharmony_ci x |= -(x & (1<<31)) # Sign-extend 1207db96d56Sopenharmony_ci return x 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci def r_long64(self) -> int: 1237db96d56Sopenharmony_ci buf = self.r_string(8) 1247db96d56Sopenharmony_ci x = buf[0] 1257db96d56Sopenharmony_ci x |= buf[1] << 8 1267db96d56Sopenharmony_ci x |= buf[2] << 16 1277db96d56Sopenharmony_ci x |= buf[3] << 24 1287db96d56Sopenharmony_ci x |= buf[1] << 32 1297db96d56Sopenharmony_ci x |= buf[1] << 40 1307db96d56Sopenharmony_ci x |= buf[1] << 48 1317db96d56Sopenharmony_ci x |= buf[1] << 56 1327db96d56Sopenharmony_ci x |= -(x & (1<<63)) # Sign-extend 1337db96d56Sopenharmony_ci return x 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_ci def r_PyLong(self) -> int: 1367db96d56Sopenharmony_ci n = self.r_long() 1377db96d56Sopenharmony_ci size = abs(n) 1387db96d56Sopenharmony_ci x = 0 1397db96d56Sopenharmony_ci # Pray this is right 1407db96d56Sopenharmony_ci for i in range(size): 1417db96d56Sopenharmony_ci x |= self.r_short() << i*15 1427db96d56Sopenharmony_ci if n < 0: 1437db96d56Sopenharmony_ci x = -x 1447db96d56Sopenharmony_ci return x 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci def r_float_bin(self) -> float: 1477db96d56Sopenharmony_ci buf = self.r_string(8) 1487db96d56Sopenharmony_ci import struct # Lazy import to avoid breaking UNIX build 1497db96d56Sopenharmony_ci return struct.unpack("d", buf)[0] 1507db96d56Sopenharmony_ci 1517db96d56Sopenharmony_ci def r_float_str(self) -> float: 1527db96d56Sopenharmony_ci n = self.r_byte() 1537db96d56Sopenharmony_ci buf = self.r_string(n) 1547db96d56Sopenharmony_ci return ast.literal_eval(buf.decode("ascii")) 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci def r_ref_reserve(self, flag: int) -> int: 1577db96d56Sopenharmony_ci if flag: 1587db96d56Sopenharmony_ci idx = len(self.refs) 1597db96d56Sopenharmony_ci self.refs.append(None) 1607db96d56Sopenharmony_ci return idx 1617db96d56Sopenharmony_ci else: 1627db96d56Sopenharmony_ci return 0 1637db96d56Sopenharmony_ci 1647db96d56Sopenharmony_ci def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any: 1657db96d56Sopenharmony_ci if flag: 1667db96d56Sopenharmony_ci self.refs[idx] = obj 1677db96d56Sopenharmony_ci return obj 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci def r_ref(self, obj: Any, flag: int) -> Any: 1707db96d56Sopenharmony_ci assert flag & FLAG_REF 1717db96d56Sopenharmony_ci self.refs.append(obj) 1727db96d56Sopenharmony_ci return obj 1737db96d56Sopenharmony_ci 1747db96d56Sopenharmony_ci def r_object(self) -> Any: 1757db96d56Sopenharmony_ci old_level = self.level 1767db96d56Sopenharmony_ci try: 1777db96d56Sopenharmony_ci return self._r_object() 1787db96d56Sopenharmony_ci finally: 1797db96d56Sopenharmony_ci self.level = old_level 1807db96d56Sopenharmony_ci 1817db96d56Sopenharmony_ci def _r_object(self) -> Any: 1827db96d56Sopenharmony_ci code = self.r_byte() 1837db96d56Sopenharmony_ci flag = code & FLAG_REF 1847db96d56Sopenharmony_ci type = code & ~FLAG_REF 1857db96d56Sopenharmony_ci # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}") 1867db96d56Sopenharmony_ci self.level += 1 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci def R_REF(obj: Any) -> Any: 1897db96d56Sopenharmony_ci if flag: 1907db96d56Sopenharmony_ci obj = self.r_ref(obj, flag) 1917db96d56Sopenharmony_ci return obj 1927db96d56Sopenharmony_ci 1937db96d56Sopenharmony_ci if type == Type.NULL: 1947db96d56Sopenharmony_ci return NULL 1957db96d56Sopenharmony_ci elif type == Type.NONE: 1967db96d56Sopenharmony_ci return None 1977db96d56Sopenharmony_ci elif type == Type.ELLIPSIS: 1987db96d56Sopenharmony_ci return Ellipsis 1997db96d56Sopenharmony_ci elif type == Type.FALSE: 2007db96d56Sopenharmony_ci return False 2017db96d56Sopenharmony_ci elif type == Type.TRUE: 2027db96d56Sopenharmony_ci return True 2037db96d56Sopenharmony_ci elif type == Type.INT: 2047db96d56Sopenharmony_ci return R_REF(self.r_long()) 2057db96d56Sopenharmony_ci elif type == Type.INT64: 2067db96d56Sopenharmony_ci return R_REF(self.r_long64()) 2077db96d56Sopenharmony_ci elif type == Type.LONG: 2087db96d56Sopenharmony_ci return R_REF(self.r_PyLong()) 2097db96d56Sopenharmony_ci elif type == Type.FLOAT: 2107db96d56Sopenharmony_ci return R_REF(self.r_float_str()) 2117db96d56Sopenharmony_ci elif type == Type.BINARY_FLOAT: 2127db96d56Sopenharmony_ci return R_REF(self.r_float_bin()) 2137db96d56Sopenharmony_ci elif type == Type.COMPLEX: 2147db96d56Sopenharmony_ci return R_REF(complex(self.r_float_str(), 2157db96d56Sopenharmony_ci self.r_float_str())) 2167db96d56Sopenharmony_ci elif type == Type.BINARY_COMPLEX: 2177db96d56Sopenharmony_ci return R_REF(complex(self.r_float_bin(), 2187db96d56Sopenharmony_ci self.r_float_bin())) 2197db96d56Sopenharmony_ci elif type == Type.STRING: 2207db96d56Sopenharmony_ci n = self.r_long() 2217db96d56Sopenharmony_ci return R_REF(self.r_string(n)) 2227db96d56Sopenharmony_ci elif type == Type.ASCII_INTERNED or type == Type.ASCII: 2237db96d56Sopenharmony_ci n = self.r_long() 2247db96d56Sopenharmony_ci return R_REF(self.r_string(n).decode("ascii")) 2257db96d56Sopenharmony_ci elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII: 2267db96d56Sopenharmony_ci n = self.r_byte() 2277db96d56Sopenharmony_ci return R_REF(self.r_string(n).decode("ascii")) 2287db96d56Sopenharmony_ci elif type == Type.INTERNED or type == Type.UNICODE: 2297db96d56Sopenharmony_ci n = self.r_long() 2307db96d56Sopenharmony_ci return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) 2317db96d56Sopenharmony_ci elif type == Type.SMALL_TUPLE: 2327db96d56Sopenharmony_ci n = self.r_byte() 2337db96d56Sopenharmony_ci idx = self.r_ref_reserve(flag) 2347db96d56Sopenharmony_ci retval: Any = tuple(self.r_object() for _ in range(n)) 2357db96d56Sopenharmony_ci self.r_ref_insert(retval, idx, flag) 2367db96d56Sopenharmony_ci return retval 2377db96d56Sopenharmony_ci elif type == Type.TUPLE: 2387db96d56Sopenharmony_ci n = self.r_long() 2397db96d56Sopenharmony_ci idx = self.r_ref_reserve(flag) 2407db96d56Sopenharmony_ci retval = tuple(self.r_object() for _ in range(n)) 2417db96d56Sopenharmony_ci self.r_ref_insert(retval, idx, flag) 2427db96d56Sopenharmony_ci return retval 2437db96d56Sopenharmony_ci elif type == Type.LIST: 2447db96d56Sopenharmony_ci n = self.r_long() 2457db96d56Sopenharmony_ci retval = R_REF([]) 2467db96d56Sopenharmony_ci for _ in range(n): 2477db96d56Sopenharmony_ci retval.append(self.r_object()) 2487db96d56Sopenharmony_ci return retval 2497db96d56Sopenharmony_ci elif type == Type.DICT: 2507db96d56Sopenharmony_ci retval = R_REF({}) 2517db96d56Sopenharmony_ci while True: 2527db96d56Sopenharmony_ci key = self.r_object() 2537db96d56Sopenharmony_ci if key == NULL: 2547db96d56Sopenharmony_ci break 2557db96d56Sopenharmony_ci val = self.r_object() 2567db96d56Sopenharmony_ci retval[key] = val 2577db96d56Sopenharmony_ci return retval 2587db96d56Sopenharmony_ci elif type == Type.SET: 2597db96d56Sopenharmony_ci n = self.r_long() 2607db96d56Sopenharmony_ci retval = R_REF(set()) 2617db96d56Sopenharmony_ci for _ in range(n): 2627db96d56Sopenharmony_ci v = self.r_object() 2637db96d56Sopenharmony_ci retval.add(v) 2647db96d56Sopenharmony_ci return retval 2657db96d56Sopenharmony_ci elif type == Type.FROZENSET: 2667db96d56Sopenharmony_ci n = self.r_long() 2677db96d56Sopenharmony_ci s: set[Any] = set() 2687db96d56Sopenharmony_ci idx = self.r_ref_reserve(flag) 2697db96d56Sopenharmony_ci for _ in range(n): 2707db96d56Sopenharmony_ci v = self.r_object() 2717db96d56Sopenharmony_ci s.add(v) 2727db96d56Sopenharmony_ci retval = frozenset(s) 2737db96d56Sopenharmony_ci self.r_ref_insert(retval, idx, flag) 2747db96d56Sopenharmony_ci return retval 2757db96d56Sopenharmony_ci elif type == Type.CODE: 2767db96d56Sopenharmony_ci retval = R_REF(Code()) 2777db96d56Sopenharmony_ci retval.co_argcount = self.r_long() 2787db96d56Sopenharmony_ci retval.co_posonlyargcount = self.r_long() 2797db96d56Sopenharmony_ci retval.co_kwonlyargcount = self.r_long() 2807db96d56Sopenharmony_ci retval.co_stacksize = self.r_long() 2817db96d56Sopenharmony_ci retval.co_flags = self.r_long() 2827db96d56Sopenharmony_ci retval.co_code = self.r_object() 2837db96d56Sopenharmony_ci retval.co_consts = self.r_object() 2847db96d56Sopenharmony_ci retval.co_names = self.r_object() 2857db96d56Sopenharmony_ci retval.co_localsplusnames = self.r_object() 2867db96d56Sopenharmony_ci retval.co_localspluskinds = self.r_object() 2877db96d56Sopenharmony_ci retval.co_filename = self.r_object() 2887db96d56Sopenharmony_ci retval.co_name = self.r_object() 2897db96d56Sopenharmony_ci retval.co_qualname = self.r_object() 2907db96d56Sopenharmony_ci retval.co_firstlineno = self.r_long() 2917db96d56Sopenharmony_ci retval.co_linetable = self.r_object() 2927db96d56Sopenharmony_ci retval.co_exceptiontable = self.r_object() 2937db96d56Sopenharmony_ci return retval 2947db96d56Sopenharmony_ci elif type == Type.REF: 2957db96d56Sopenharmony_ci n = self.r_long() 2967db96d56Sopenharmony_ci retval = self.refs[n] 2977db96d56Sopenharmony_ci assert retval is not None 2987db96d56Sopenharmony_ci return retval 2997db96d56Sopenharmony_ci else: 3007db96d56Sopenharmony_ci breakpoint() 3017db96d56Sopenharmony_ci raise AssertionError(f"Unknown type {type} {chr(type)!r}") 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci 3047db96d56Sopenharmony_cidef loads(data: bytes) -> Any: 3057db96d56Sopenharmony_ci assert isinstance(data, bytes) 3067db96d56Sopenharmony_ci r = Reader(data) 3077db96d56Sopenharmony_ci return r.r_object() 3087db96d56Sopenharmony_ci 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_cidef main(): 3117db96d56Sopenharmony_ci # Test 3127db96d56Sopenharmony_ci import marshal, pprint 3137db96d56Sopenharmony_ci sample = {'foo': {(42, "bar", 3.14)}} 3147db96d56Sopenharmony_ci data = marshal.dumps(sample) 3157db96d56Sopenharmony_ci retval = loads(data) 3167db96d56Sopenharmony_ci assert retval == sample, retval 3177db96d56Sopenharmony_ci sample = main.__code__ 3187db96d56Sopenharmony_ci data = marshal.dumps(sample) 3197db96d56Sopenharmony_ci retval = loads(data) 3207db96d56Sopenharmony_ci assert isinstance(retval, Code), retval 3217db96d56Sopenharmony_ci pprint.pprint(retval.__dict__) 3227db96d56Sopenharmony_ci 3237db96d56Sopenharmony_ci 3247db96d56Sopenharmony_ciif __name__ == "__main__": 3257db96d56Sopenharmony_ci main() 326