17db96d56Sopenharmony_ci'''"Executable documentation" for the pickle module.
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ciExtensive comments about the pickle protocols and pickle-machine opcodes
47db96d56Sopenharmony_cican be found here.  Some functions meant for external use:
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_cigenops(pickle)
77db96d56Sopenharmony_ci   Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_cidis(pickle, out=None, memo=None, indentlevel=4)
107db96d56Sopenharmony_ci   Print a symbolic disassembly of a pickle.
117db96d56Sopenharmony_ci'''
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ciimport codecs
147db96d56Sopenharmony_ciimport io
157db96d56Sopenharmony_ciimport pickle
167db96d56Sopenharmony_ciimport re
177db96d56Sopenharmony_ciimport sys
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ci__all__ = ['dis', 'genops', 'optimize']
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_cibytes_types = pickle.bytes_types
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci# Other ideas:
247db96d56Sopenharmony_ci#
257db96d56Sopenharmony_ci# - A pickle verifier:  read a pickle and check it exhaustively for
267db96d56Sopenharmony_ci#   well-formedness.  dis() does a lot of this already.
277db96d56Sopenharmony_ci#
287db96d56Sopenharmony_ci# - A protocol identifier:  examine a pickle and return its protocol number
297db96d56Sopenharmony_ci#   (== the highest .proto attr value among all the opcodes in the pickle).
307db96d56Sopenharmony_ci#   dis() already prints this info at the end.
317db96d56Sopenharmony_ci#
327db96d56Sopenharmony_ci# - A pickle optimizer:  for example, tuple-building code is sometimes more
337db96d56Sopenharmony_ci#   elaborate than necessary, catering for the possibility that the tuple
347db96d56Sopenharmony_ci#   is recursive.  Or lots of times a PUT is generated that's never accessed
357db96d56Sopenharmony_ci#   by a later GET.
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci# "A pickle" is a program for a virtual pickle machine (PM, but more accurately
397db96d56Sopenharmony_ci# called an unpickling machine).  It's a sequence of opcodes, interpreted by the
407db96d56Sopenharmony_ci# PM, building an arbitrarily complex Python object.
417db96d56Sopenharmony_ci#
427db96d56Sopenharmony_ci# For the most part, the PM is very simple:  there are no looping, testing, or
437db96d56Sopenharmony_ci# conditional instructions, no arithmetic and no function calls.  Opcodes are
447db96d56Sopenharmony_ci# executed once each, from first to last, until a STOP opcode is reached.
457db96d56Sopenharmony_ci#
467db96d56Sopenharmony_ci# The PM has two data areas, "the stack" and "the memo".
477db96d56Sopenharmony_ci#
487db96d56Sopenharmony_ci# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
497db96d56Sopenharmony_ci# integer object on the stack, whose value is gotten from a decimal string
507db96d56Sopenharmony_ci# literal immediately following the INT opcode in the pickle bytestream.  Other
517db96d56Sopenharmony_ci# opcodes take Python objects off the stack.  The result of unpickling is
527db96d56Sopenharmony_ci# whatever object is left on the stack when the final STOP opcode is executed.
537db96d56Sopenharmony_ci#
547db96d56Sopenharmony_ci# The memo is simply an array of objects, or it can be implemented as a dict
557db96d56Sopenharmony_ci# mapping little integers to objects.  The memo serves as the PM's "long term
567db96d56Sopenharmony_ci# memory", and the little integers indexing the memo are akin to variable
577db96d56Sopenharmony_ci# names.  Some opcodes pop a stack object into the memo at a given index,
587db96d56Sopenharmony_ci# and others push a memo object at a given index onto the stack again.
597db96d56Sopenharmony_ci#
607db96d56Sopenharmony_ci# At heart, that's all the PM has.  Subtleties arise for these reasons:
617db96d56Sopenharmony_ci#
627db96d56Sopenharmony_ci# + Object identity.  Objects can be arbitrarily complex, and subobjects
637db96d56Sopenharmony_ci#   may be shared (for example, the list [a, a] refers to the same object a
647db96d56Sopenharmony_ci#   twice).  It can be vital that unpickling recreate an isomorphic object
657db96d56Sopenharmony_ci#   graph, faithfully reproducing sharing.
667db96d56Sopenharmony_ci#
677db96d56Sopenharmony_ci# + Recursive objects.  For example, after "L = []; L.append(L)", L is a
687db96d56Sopenharmony_ci#   list, and L[0] is the same list.  This is related to the object identity
697db96d56Sopenharmony_ci#   point, and some sequences of pickle opcodes are subtle in order to
707db96d56Sopenharmony_ci#   get the right result in all cases.
717db96d56Sopenharmony_ci#
727db96d56Sopenharmony_ci# + Things pickle doesn't know everything about.  Examples of things pickle
737db96d56Sopenharmony_ci#   does know everything about are Python's builtin scalar and container
747db96d56Sopenharmony_ci#   types, like ints and tuples.  They generally have opcodes dedicated to
757db96d56Sopenharmony_ci#   them.  For things like module references and instances of user-defined
767db96d56Sopenharmony_ci#   classes, pickle's knowledge is limited.  Historically, many enhancements
777db96d56Sopenharmony_ci#   have been made to the pickle protocol in order to do a better (faster,
787db96d56Sopenharmony_ci#   and/or more compact) job on those.
797db96d56Sopenharmony_ci#
807db96d56Sopenharmony_ci# + Backward compatibility and micro-optimization.  As explained below,
817db96d56Sopenharmony_ci#   pickle opcodes never go away, not even when better ways to do a thing
827db96d56Sopenharmony_ci#   get invented.  The repertoire of the PM just keeps growing over time.
837db96d56Sopenharmony_ci#   For example, protocol 0 had two opcodes for building Python integers (INT
847db96d56Sopenharmony_ci#   and LONG), protocol 1 added three more for more-efficient pickling of short
857db96d56Sopenharmony_ci#   integers, and protocol 2 added two more for more-efficient pickling of
867db96d56Sopenharmony_ci#   long integers (before protocol 2, the only ways to pickle a Python long
877db96d56Sopenharmony_ci#   took time quadratic in the number of digits, for both pickling and
887db96d56Sopenharmony_ci#   unpickling).  "Opcode bloat" isn't so much a subtlety as a source of
897db96d56Sopenharmony_ci#   wearying complication.
907db96d56Sopenharmony_ci#
917db96d56Sopenharmony_ci#
927db96d56Sopenharmony_ci# Pickle protocols:
937db96d56Sopenharmony_ci#
947db96d56Sopenharmony_ci# For compatibility, the meaning of a pickle opcode never changes.  Instead new
957db96d56Sopenharmony_ci# pickle opcodes get added, and each version's unpickler can handle all the
967db96d56Sopenharmony_ci# pickle opcodes in all protocol versions to date.  So old pickles continue to
977db96d56Sopenharmony_ci# be readable forever.  The pickler can generally be told to restrict itself to
987db96d56Sopenharmony_ci# the subset of opcodes available under previous protocol versions too, so that
997db96d56Sopenharmony_ci# users can create pickles under the current version readable by older
1007db96d56Sopenharmony_ci# versions.  However, a pickle does not contain its version number embedded
1017db96d56Sopenharmony_ci# within it.  If an older unpickler tries to read a pickle using a later
1027db96d56Sopenharmony_ci# protocol, the result is most likely an exception due to seeing an unknown (in
1037db96d56Sopenharmony_ci# the older unpickler) opcode.
1047db96d56Sopenharmony_ci#
1057db96d56Sopenharmony_ci# The original pickle used what's now called "protocol 0", and what was called
1067db96d56Sopenharmony_ci# "text mode" before Python 2.3.  The entire pickle bytestream is made up of
1077db96d56Sopenharmony_ci# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
1087db96d56Sopenharmony_ci# That's why it was called text mode.  Protocol 0 is small and elegant, but
1097db96d56Sopenharmony_ci# sometimes painfully inefficient.
1107db96d56Sopenharmony_ci#
1117db96d56Sopenharmony_ci# The second major set of additions is now called "protocol 1", and was called
1127db96d56Sopenharmony_ci# "binary mode" before Python 2.3.  This added many opcodes with arguments
1137db96d56Sopenharmony_ci# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
1147db96d56Sopenharmony_ci# bytes.  Binary mode pickles can be substantially smaller than equivalent
1157db96d56Sopenharmony_ci# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
1167db96d56Sopenharmony_ci# int as 4 bytes following the opcode, which is cheaper to unpickle than the
1177db96d56Sopenharmony_ci# (perhaps) 11-character decimal string attached to INT.  Protocol 1 also added
1187db96d56Sopenharmony_ci# a number of opcodes that operate on many stack elements at once (like APPENDS
1197db96d56Sopenharmony_ci# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
1207db96d56Sopenharmony_ci#
1217db96d56Sopenharmony_ci# The third major set of additions came in Python 2.3, and is called "protocol
1227db96d56Sopenharmony_ci# 2".  This added:
1237db96d56Sopenharmony_ci#
1247db96d56Sopenharmony_ci# - A better way to pickle instances of new-style classes (NEWOBJ).
1257db96d56Sopenharmony_ci#
1267db96d56Sopenharmony_ci# - A way for a pickle to identify its protocol (PROTO).
1277db96d56Sopenharmony_ci#
1287db96d56Sopenharmony_ci# - Time- and space- efficient pickling of long ints (LONG{1,4}).
1297db96d56Sopenharmony_ci#
1307db96d56Sopenharmony_ci# - Shortcuts for small tuples (TUPLE{1,2,3}}.
1317db96d56Sopenharmony_ci#
1327db96d56Sopenharmony_ci# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
1337db96d56Sopenharmony_ci#
1347db96d56Sopenharmony_ci# - The "extension registry", a vector of popular objects that can be pushed
1357db96d56Sopenharmony_ci#   efficiently by index (EXT{1,2,4}).  This is akin to the memo and GET, but
1367db96d56Sopenharmony_ci#   the registry contents are predefined (there's nothing akin to the memo's
1377db96d56Sopenharmony_ci#   PUT).
1387db96d56Sopenharmony_ci#
1397db96d56Sopenharmony_ci# Another independent change with Python 2.3 is the abandonment of any
1407db96d56Sopenharmony_ci# pretense that it might be safe to load pickles received from untrusted
1417db96d56Sopenharmony_ci# parties -- no sufficient security analysis has been done to guarantee
1427db96d56Sopenharmony_ci# this and there isn't a use case that warrants the expense of such an
1437db96d56Sopenharmony_ci# analysis.
1447db96d56Sopenharmony_ci#
1457db96d56Sopenharmony_ci# To this end, all tests for __safe_for_unpickling__ or for
1467db96d56Sopenharmony_ci# copyreg.safe_constructors are removed from the unpickling code.
1477db96d56Sopenharmony_ci# References to these variables in the descriptions below are to be seen
1487db96d56Sopenharmony_ci# as describing unpickling in Python 2.2 and before.
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ci
1517db96d56Sopenharmony_ci# Meta-rule:  Descriptions are stored in instances of descriptor objects,
1527db96d56Sopenharmony_ci# with plain constructors.  No meta-language is defined from which
1537db96d56Sopenharmony_ci# descriptors could be constructed.  If you want, e.g., XML, write a little
1547db96d56Sopenharmony_ci# program to generate XML from the objects.
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci##############################################################################
1577db96d56Sopenharmony_ci# Some pickle opcodes have an argument, following the opcode in the
1587db96d56Sopenharmony_ci# bytestream.  An argument is of a specific type, described by an instance
1597db96d56Sopenharmony_ci# of ArgumentDescriptor.  These are not to be confused with arguments taken
1607db96d56Sopenharmony_ci# off the stack -- ArgumentDescriptor applies only to arguments embedded in
1617db96d56Sopenharmony_ci# the opcode stream, immediately following an opcode.
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ci# Represents the number of bytes consumed by an argument delimited by the
1647db96d56Sopenharmony_ci# next newline character.
1657db96d56Sopenharmony_ciUP_TO_NEWLINE = -1
1667db96d56Sopenharmony_ci
1677db96d56Sopenharmony_ci# Represents the number of bytes consumed by a two-argument opcode where
1687db96d56Sopenharmony_ci# the first argument gives the number of bytes in the second argument.
1697db96d56Sopenharmony_ciTAKEN_FROM_ARGUMENT1  = -2   # num bytes is 1-byte unsigned int
1707db96d56Sopenharmony_ciTAKEN_FROM_ARGUMENT4  = -3   # num bytes is 4-byte signed little-endian int
1717db96d56Sopenharmony_ciTAKEN_FROM_ARGUMENT4U = -4   # num bytes is 4-byte unsigned little-endian int
1727db96d56Sopenharmony_ciTAKEN_FROM_ARGUMENT8U = -5   # num bytes is 8-byte unsigned little-endian int
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ciclass ArgumentDescriptor(object):
1757db96d56Sopenharmony_ci    __slots__ = (
1767db96d56Sopenharmony_ci        # name of descriptor record, also a module global name; a string
1777db96d56Sopenharmony_ci        'name',
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci        # length of argument, in bytes; an int; UP_TO_NEWLINE and
1807db96d56Sopenharmony_ci        # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length
1817db96d56Sopenharmony_ci        # cases
1827db96d56Sopenharmony_ci        'n',
1837db96d56Sopenharmony_ci
1847db96d56Sopenharmony_ci        # a function taking a file-like object, reading this kind of argument
1857db96d56Sopenharmony_ci        # from the object at the current position, advancing the current
1867db96d56Sopenharmony_ci        # position by n bytes, and returning the value of the argument
1877db96d56Sopenharmony_ci        'reader',
1887db96d56Sopenharmony_ci
1897db96d56Sopenharmony_ci        # human-readable docs for this arg descriptor; a string
1907db96d56Sopenharmony_ci        'doc',
1917db96d56Sopenharmony_ci    )
1927db96d56Sopenharmony_ci
1937db96d56Sopenharmony_ci    def __init__(self, name, n, reader, doc):
1947db96d56Sopenharmony_ci        assert isinstance(name, str)
1957db96d56Sopenharmony_ci        self.name = name
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_ci        assert isinstance(n, int) and (n >= 0 or
1987db96d56Sopenharmony_ci                                       n in (UP_TO_NEWLINE,
1997db96d56Sopenharmony_ci                                             TAKEN_FROM_ARGUMENT1,
2007db96d56Sopenharmony_ci                                             TAKEN_FROM_ARGUMENT4,
2017db96d56Sopenharmony_ci                                             TAKEN_FROM_ARGUMENT4U,
2027db96d56Sopenharmony_ci                                             TAKEN_FROM_ARGUMENT8U))
2037db96d56Sopenharmony_ci        self.n = n
2047db96d56Sopenharmony_ci
2057db96d56Sopenharmony_ci        self.reader = reader
2067db96d56Sopenharmony_ci
2077db96d56Sopenharmony_ci        assert isinstance(doc, str)
2087db96d56Sopenharmony_ci        self.doc = doc
2097db96d56Sopenharmony_ci
2107db96d56Sopenharmony_cifrom struct import unpack as _unpack
2117db96d56Sopenharmony_ci
2127db96d56Sopenharmony_cidef read_uint1(f):
2137db96d56Sopenharmony_ci    r"""
2147db96d56Sopenharmony_ci    >>> import io
2157db96d56Sopenharmony_ci    >>> read_uint1(io.BytesIO(b'\xff'))
2167db96d56Sopenharmony_ci    255
2177db96d56Sopenharmony_ci    """
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci    data = f.read(1)
2207db96d56Sopenharmony_ci    if data:
2217db96d56Sopenharmony_ci        return data[0]
2227db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read uint1")
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ciuint1 = ArgumentDescriptor(
2257db96d56Sopenharmony_ci            name='uint1',
2267db96d56Sopenharmony_ci            n=1,
2277db96d56Sopenharmony_ci            reader=read_uint1,
2287db96d56Sopenharmony_ci            doc="One-byte unsigned integer.")
2297db96d56Sopenharmony_ci
2307db96d56Sopenharmony_ci
2317db96d56Sopenharmony_cidef read_uint2(f):
2327db96d56Sopenharmony_ci    r"""
2337db96d56Sopenharmony_ci    >>> import io
2347db96d56Sopenharmony_ci    >>> read_uint2(io.BytesIO(b'\xff\x00'))
2357db96d56Sopenharmony_ci    255
2367db96d56Sopenharmony_ci    >>> read_uint2(io.BytesIO(b'\xff\xff'))
2377db96d56Sopenharmony_ci    65535
2387db96d56Sopenharmony_ci    """
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci    data = f.read(2)
2417db96d56Sopenharmony_ci    if len(data) == 2:
2427db96d56Sopenharmony_ci        return _unpack("<H", data)[0]
2437db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read uint2")
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_ciuint2 = ArgumentDescriptor(
2467db96d56Sopenharmony_ci            name='uint2',
2477db96d56Sopenharmony_ci            n=2,
2487db96d56Sopenharmony_ci            reader=read_uint2,
2497db96d56Sopenharmony_ci            doc="Two-byte unsigned integer, little-endian.")
2507db96d56Sopenharmony_ci
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_cidef read_int4(f):
2537db96d56Sopenharmony_ci    r"""
2547db96d56Sopenharmony_ci    >>> import io
2557db96d56Sopenharmony_ci    >>> read_int4(io.BytesIO(b'\xff\x00\x00\x00'))
2567db96d56Sopenharmony_ci    255
2577db96d56Sopenharmony_ci    >>> read_int4(io.BytesIO(b'\x00\x00\x00\x80')) == -(2**31)
2587db96d56Sopenharmony_ci    True
2597db96d56Sopenharmony_ci    """
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_ci    data = f.read(4)
2627db96d56Sopenharmony_ci    if len(data) == 4:
2637db96d56Sopenharmony_ci        return _unpack("<i", data)[0]
2647db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read int4")
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ciint4 = ArgumentDescriptor(
2677db96d56Sopenharmony_ci           name='int4',
2687db96d56Sopenharmony_ci           n=4,
2697db96d56Sopenharmony_ci           reader=read_int4,
2707db96d56Sopenharmony_ci           doc="Four-byte signed integer, little-endian, 2's complement.")
2717db96d56Sopenharmony_ci
2727db96d56Sopenharmony_ci
2737db96d56Sopenharmony_cidef read_uint4(f):
2747db96d56Sopenharmony_ci    r"""
2757db96d56Sopenharmony_ci    >>> import io
2767db96d56Sopenharmony_ci    >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
2777db96d56Sopenharmony_ci    255
2787db96d56Sopenharmony_ci    >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
2797db96d56Sopenharmony_ci    True
2807db96d56Sopenharmony_ci    """
2817db96d56Sopenharmony_ci
2827db96d56Sopenharmony_ci    data = f.read(4)
2837db96d56Sopenharmony_ci    if len(data) == 4:
2847db96d56Sopenharmony_ci        return _unpack("<I", data)[0]
2857db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read uint4")
2867db96d56Sopenharmony_ci
2877db96d56Sopenharmony_ciuint4 = ArgumentDescriptor(
2887db96d56Sopenharmony_ci            name='uint4',
2897db96d56Sopenharmony_ci            n=4,
2907db96d56Sopenharmony_ci            reader=read_uint4,
2917db96d56Sopenharmony_ci            doc="Four-byte unsigned integer, little-endian.")
2927db96d56Sopenharmony_ci
2937db96d56Sopenharmony_ci
2947db96d56Sopenharmony_cidef read_uint8(f):
2957db96d56Sopenharmony_ci    r"""
2967db96d56Sopenharmony_ci    >>> import io
2977db96d56Sopenharmony_ci    >>> read_uint8(io.BytesIO(b'\xff\x00\x00\x00\x00\x00\x00\x00'))
2987db96d56Sopenharmony_ci    255
2997db96d56Sopenharmony_ci    >>> read_uint8(io.BytesIO(b'\xff' * 8)) == 2**64-1
3007db96d56Sopenharmony_ci    True
3017db96d56Sopenharmony_ci    """
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci    data = f.read(8)
3047db96d56Sopenharmony_ci    if len(data) == 8:
3057db96d56Sopenharmony_ci        return _unpack("<Q", data)[0]
3067db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read uint8")
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ciuint8 = ArgumentDescriptor(
3097db96d56Sopenharmony_ci            name='uint8',
3107db96d56Sopenharmony_ci            n=8,
3117db96d56Sopenharmony_ci            reader=read_uint8,
3127db96d56Sopenharmony_ci            doc="Eight-byte unsigned integer, little-endian.")
3137db96d56Sopenharmony_ci
3147db96d56Sopenharmony_ci
3157db96d56Sopenharmony_cidef read_stringnl(f, decode=True, stripquotes=True):
3167db96d56Sopenharmony_ci    r"""
3177db96d56Sopenharmony_ci    >>> import io
3187db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
3197db96d56Sopenharmony_ci    'abcd'
3207db96d56Sopenharmony_ci
3217db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(b"\n"))
3227db96d56Sopenharmony_ci    Traceback (most recent call last):
3237db96d56Sopenharmony_ci    ...
3247db96d56Sopenharmony_ci    ValueError: no string quotes around b''
3257db96d56Sopenharmony_ci
3267db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
3277db96d56Sopenharmony_ci    ''
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(b"''\n"))
3307db96d56Sopenharmony_ci    ''
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(b'"abcd"'))
3337db96d56Sopenharmony_ci    Traceback (most recent call last):
3347db96d56Sopenharmony_ci    ...
3357db96d56Sopenharmony_ci    ValueError: no newline found when trying to read stringnl
3367db96d56Sopenharmony_ci
3377db96d56Sopenharmony_ci    Embedded escapes are undone in the result.
3387db96d56Sopenharmony_ci    >>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'"))
3397db96d56Sopenharmony_ci    'a\n\\b\x00c\td'
3407db96d56Sopenharmony_ci    """
3417db96d56Sopenharmony_ci
3427db96d56Sopenharmony_ci    data = f.readline()
3437db96d56Sopenharmony_ci    if not data.endswith(b'\n'):
3447db96d56Sopenharmony_ci        raise ValueError("no newline found when trying to read stringnl")
3457db96d56Sopenharmony_ci    data = data[:-1]    # lose the newline
3467db96d56Sopenharmony_ci
3477db96d56Sopenharmony_ci    if stripquotes:
3487db96d56Sopenharmony_ci        for q in (b'"', b"'"):
3497db96d56Sopenharmony_ci            if data.startswith(q):
3507db96d56Sopenharmony_ci                if not data.endswith(q):
3517db96d56Sopenharmony_ci                    raise ValueError("strinq quote %r not found at both "
3527db96d56Sopenharmony_ci                                     "ends of %r" % (q, data))
3537db96d56Sopenharmony_ci                data = data[1:-1]
3547db96d56Sopenharmony_ci                break
3557db96d56Sopenharmony_ci        else:
3567db96d56Sopenharmony_ci            raise ValueError("no string quotes around %r" % data)
3577db96d56Sopenharmony_ci
3587db96d56Sopenharmony_ci    if decode:
3597db96d56Sopenharmony_ci        data = codecs.escape_decode(data)[0].decode("ascii")
3607db96d56Sopenharmony_ci    return data
3617db96d56Sopenharmony_ci
3627db96d56Sopenharmony_cistringnl = ArgumentDescriptor(
3637db96d56Sopenharmony_ci               name='stringnl',
3647db96d56Sopenharmony_ci               n=UP_TO_NEWLINE,
3657db96d56Sopenharmony_ci               reader=read_stringnl,
3667db96d56Sopenharmony_ci               doc="""A newline-terminated string.
3677db96d56Sopenharmony_ci
3687db96d56Sopenharmony_ci                   This is a repr-style string, with embedded escapes, and
3697db96d56Sopenharmony_ci                   bracketing quotes.
3707db96d56Sopenharmony_ci                   """)
3717db96d56Sopenharmony_ci
3727db96d56Sopenharmony_cidef read_stringnl_noescape(f):
3737db96d56Sopenharmony_ci    return read_stringnl(f, stripquotes=False)
3747db96d56Sopenharmony_ci
3757db96d56Sopenharmony_cistringnl_noescape = ArgumentDescriptor(
3767db96d56Sopenharmony_ci                        name='stringnl_noescape',
3777db96d56Sopenharmony_ci                        n=UP_TO_NEWLINE,
3787db96d56Sopenharmony_ci                        reader=read_stringnl_noescape,
3797db96d56Sopenharmony_ci                        doc="""A newline-terminated string.
3807db96d56Sopenharmony_ci
3817db96d56Sopenharmony_ci                        This is a str-style string, without embedded escapes,
3827db96d56Sopenharmony_ci                        or bracketing quotes.  It should consist solely of
3837db96d56Sopenharmony_ci                        printable ASCII characters.
3847db96d56Sopenharmony_ci                        """)
3857db96d56Sopenharmony_ci
3867db96d56Sopenharmony_cidef read_stringnl_noescape_pair(f):
3877db96d56Sopenharmony_ci    r"""
3887db96d56Sopenharmony_ci    >>> import io
3897db96d56Sopenharmony_ci    >>> read_stringnl_noescape_pair(io.BytesIO(b"Queue\nEmpty\njunk"))
3907db96d56Sopenharmony_ci    'Queue Empty'
3917db96d56Sopenharmony_ci    """
3927db96d56Sopenharmony_ci
3937db96d56Sopenharmony_ci    return "%s %s" % (read_stringnl_noescape(f), read_stringnl_noescape(f))
3947db96d56Sopenharmony_ci
3957db96d56Sopenharmony_cistringnl_noescape_pair = ArgumentDescriptor(
3967db96d56Sopenharmony_ci                             name='stringnl_noescape_pair',
3977db96d56Sopenharmony_ci                             n=UP_TO_NEWLINE,
3987db96d56Sopenharmony_ci                             reader=read_stringnl_noescape_pair,
3997db96d56Sopenharmony_ci                             doc="""A pair of newline-terminated strings.
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci                             These are str-style strings, without embedded
4027db96d56Sopenharmony_ci                             escapes, or bracketing quotes.  They should
4037db96d56Sopenharmony_ci                             consist solely of printable ASCII characters.
4047db96d56Sopenharmony_ci                             The pair is returned as a single string, with
4057db96d56Sopenharmony_ci                             a single blank separating the two strings.
4067db96d56Sopenharmony_ci                             """)
4077db96d56Sopenharmony_ci
4087db96d56Sopenharmony_ci
4097db96d56Sopenharmony_cidef read_string1(f):
4107db96d56Sopenharmony_ci    r"""
4117db96d56Sopenharmony_ci    >>> import io
4127db96d56Sopenharmony_ci    >>> read_string1(io.BytesIO(b"\x00"))
4137db96d56Sopenharmony_ci    ''
4147db96d56Sopenharmony_ci    >>> read_string1(io.BytesIO(b"\x03abcdef"))
4157db96d56Sopenharmony_ci    'abc'
4167db96d56Sopenharmony_ci    """
4177db96d56Sopenharmony_ci
4187db96d56Sopenharmony_ci    n = read_uint1(f)
4197db96d56Sopenharmony_ci    assert n >= 0
4207db96d56Sopenharmony_ci    data = f.read(n)
4217db96d56Sopenharmony_ci    if len(data) == n:
4227db96d56Sopenharmony_ci        return data.decode("latin-1")
4237db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a string1, but only %d remain" %
4247db96d56Sopenharmony_ci                     (n, len(data)))
4257db96d56Sopenharmony_ci
4267db96d56Sopenharmony_cistring1 = ArgumentDescriptor(
4277db96d56Sopenharmony_ci              name="string1",
4287db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT1,
4297db96d56Sopenharmony_ci              reader=read_string1,
4307db96d56Sopenharmony_ci              doc="""A counted string.
4317db96d56Sopenharmony_ci
4327db96d56Sopenharmony_ci              The first argument is a 1-byte unsigned int giving the number
4337db96d56Sopenharmony_ci              of bytes in the string, and the second argument is that many
4347db96d56Sopenharmony_ci              bytes.
4357db96d56Sopenharmony_ci              """)
4367db96d56Sopenharmony_ci
4377db96d56Sopenharmony_ci
4387db96d56Sopenharmony_cidef read_string4(f):
4397db96d56Sopenharmony_ci    r"""
4407db96d56Sopenharmony_ci    >>> import io
4417db96d56Sopenharmony_ci    >>> read_string4(io.BytesIO(b"\x00\x00\x00\x00abc"))
4427db96d56Sopenharmony_ci    ''
4437db96d56Sopenharmony_ci    >>> read_string4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
4447db96d56Sopenharmony_ci    'abc'
4457db96d56Sopenharmony_ci    >>> read_string4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
4467db96d56Sopenharmony_ci    Traceback (most recent call last):
4477db96d56Sopenharmony_ci    ...
4487db96d56Sopenharmony_ci    ValueError: expected 50331648 bytes in a string4, but only 6 remain
4497db96d56Sopenharmony_ci    """
4507db96d56Sopenharmony_ci
4517db96d56Sopenharmony_ci    n = read_int4(f)
4527db96d56Sopenharmony_ci    if n < 0:
4537db96d56Sopenharmony_ci        raise ValueError("string4 byte count < 0: %d" % n)
4547db96d56Sopenharmony_ci    data = f.read(n)
4557db96d56Sopenharmony_ci    if len(data) == n:
4567db96d56Sopenharmony_ci        return data.decode("latin-1")
4577db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a string4, but only %d remain" %
4587db96d56Sopenharmony_ci                     (n, len(data)))
4597db96d56Sopenharmony_ci
4607db96d56Sopenharmony_cistring4 = ArgumentDescriptor(
4617db96d56Sopenharmony_ci              name="string4",
4627db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT4,
4637db96d56Sopenharmony_ci              reader=read_string4,
4647db96d56Sopenharmony_ci              doc="""A counted string.
4657db96d56Sopenharmony_ci
4667db96d56Sopenharmony_ci              The first argument is a 4-byte little-endian signed int giving
4677db96d56Sopenharmony_ci              the number of bytes in the string, and the second argument is
4687db96d56Sopenharmony_ci              that many bytes.
4697db96d56Sopenharmony_ci              """)
4707db96d56Sopenharmony_ci
4717db96d56Sopenharmony_ci
4727db96d56Sopenharmony_cidef read_bytes1(f):
4737db96d56Sopenharmony_ci    r"""
4747db96d56Sopenharmony_ci    >>> import io
4757db96d56Sopenharmony_ci    >>> read_bytes1(io.BytesIO(b"\x00"))
4767db96d56Sopenharmony_ci    b''
4777db96d56Sopenharmony_ci    >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
4787db96d56Sopenharmony_ci    b'abc'
4797db96d56Sopenharmony_ci    """
4807db96d56Sopenharmony_ci
4817db96d56Sopenharmony_ci    n = read_uint1(f)
4827db96d56Sopenharmony_ci    assert n >= 0
4837db96d56Sopenharmony_ci    data = f.read(n)
4847db96d56Sopenharmony_ci    if len(data) == n:
4857db96d56Sopenharmony_ci        return data
4867db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
4877db96d56Sopenharmony_ci                     (n, len(data)))
4887db96d56Sopenharmony_ci
4897db96d56Sopenharmony_cibytes1 = ArgumentDescriptor(
4907db96d56Sopenharmony_ci              name="bytes1",
4917db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT1,
4927db96d56Sopenharmony_ci              reader=read_bytes1,
4937db96d56Sopenharmony_ci              doc="""A counted bytes string.
4947db96d56Sopenharmony_ci
4957db96d56Sopenharmony_ci              The first argument is a 1-byte unsigned int giving the number
4967db96d56Sopenharmony_ci              of bytes, and the second argument is that many bytes.
4977db96d56Sopenharmony_ci              """)
4987db96d56Sopenharmony_ci
4997db96d56Sopenharmony_ci
5007db96d56Sopenharmony_cidef read_bytes4(f):
5017db96d56Sopenharmony_ci    r"""
5027db96d56Sopenharmony_ci    >>> import io
5037db96d56Sopenharmony_ci    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
5047db96d56Sopenharmony_ci    b''
5057db96d56Sopenharmony_ci    >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
5067db96d56Sopenharmony_ci    b'abc'
5077db96d56Sopenharmony_ci    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
5087db96d56Sopenharmony_ci    Traceback (most recent call last):
5097db96d56Sopenharmony_ci    ...
5107db96d56Sopenharmony_ci    ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
5117db96d56Sopenharmony_ci    """
5127db96d56Sopenharmony_ci
5137db96d56Sopenharmony_ci    n = read_uint4(f)
5147db96d56Sopenharmony_ci    assert n >= 0
5157db96d56Sopenharmony_ci    if n > sys.maxsize:
5167db96d56Sopenharmony_ci        raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
5177db96d56Sopenharmony_ci    data = f.read(n)
5187db96d56Sopenharmony_ci    if len(data) == n:
5197db96d56Sopenharmony_ci        return data
5207db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
5217db96d56Sopenharmony_ci                     (n, len(data)))
5227db96d56Sopenharmony_ci
5237db96d56Sopenharmony_cibytes4 = ArgumentDescriptor(
5247db96d56Sopenharmony_ci              name="bytes4",
5257db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT4U,
5267db96d56Sopenharmony_ci              reader=read_bytes4,
5277db96d56Sopenharmony_ci              doc="""A counted bytes string.
5287db96d56Sopenharmony_ci
5297db96d56Sopenharmony_ci              The first argument is a 4-byte little-endian unsigned int giving
5307db96d56Sopenharmony_ci              the number of bytes, and the second argument is that many bytes.
5317db96d56Sopenharmony_ci              """)
5327db96d56Sopenharmony_ci
5337db96d56Sopenharmony_ci
5347db96d56Sopenharmony_cidef read_bytes8(f):
5357db96d56Sopenharmony_ci    r"""
5367db96d56Sopenharmony_ci    >>> import io, struct, sys
5377db96d56Sopenharmony_ci    >>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
5387db96d56Sopenharmony_ci    b''
5397db96d56Sopenharmony_ci    >>> read_bytes8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
5407db96d56Sopenharmony_ci    b'abc'
5417db96d56Sopenharmony_ci    >>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
5427db96d56Sopenharmony_ci    >>> read_bytes8(io.BytesIO(bigsize8 + b"abcdef"))  #doctest: +ELLIPSIS
5437db96d56Sopenharmony_ci    Traceback (most recent call last):
5447db96d56Sopenharmony_ci    ...
5457db96d56Sopenharmony_ci    ValueError: expected ... bytes in a bytes8, but only 6 remain
5467db96d56Sopenharmony_ci    """
5477db96d56Sopenharmony_ci
5487db96d56Sopenharmony_ci    n = read_uint8(f)
5497db96d56Sopenharmony_ci    assert n >= 0
5507db96d56Sopenharmony_ci    if n > sys.maxsize:
5517db96d56Sopenharmony_ci        raise ValueError("bytes8 byte count > sys.maxsize: %d" % n)
5527db96d56Sopenharmony_ci    data = f.read(n)
5537db96d56Sopenharmony_ci    if len(data) == n:
5547db96d56Sopenharmony_ci        return data
5557db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a bytes8, but only %d remain" %
5567db96d56Sopenharmony_ci                     (n, len(data)))
5577db96d56Sopenharmony_ci
5587db96d56Sopenharmony_cibytes8 = ArgumentDescriptor(
5597db96d56Sopenharmony_ci              name="bytes8",
5607db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT8U,
5617db96d56Sopenharmony_ci              reader=read_bytes8,
5627db96d56Sopenharmony_ci              doc="""A counted bytes string.
5637db96d56Sopenharmony_ci
5647db96d56Sopenharmony_ci              The first argument is an 8-byte little-endian unsigned int giving
5657db96d56Sopenharmony_ci              the number of bytes, and the second argument is that many bytes.
5667db96d56Sopenharmony_ci              """)
5677db96d56Sopenharmony_ci
5687db96d56Sopenharmony_ci
5697db96d56Sopenharmony_cidef read_bytearray8(f):
5707db96d56Sopenharmony_ci    r"""
5717db96d56Sopenharmony_ci    >>> import io, struct, sys
5727db96d56Sopenharmony_ci    >>> read_bytearray8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
5737db96d56Sopenharmony_ci    bytearray(b'')
5747db96d56Sopenharmony_ci    >>> read_bytearray8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
5757db96d56Sopenharmony_ci    bytearray(b'abc')
5767db96d56Sopenharmony_ci    >>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
5777db96d56Sopenharmony_ci    >>> read_bytearray8(io.BytesIO(bigsize8 + b"abcdef"))  #doctest: +ELLIPSIS
5787db96d56Sopenharmony_ci    Traceback (most recent call last):
5797db96d56Sopenharmony_ci    ...
5807db96d56Sopenharmony_ci    ValueError: expected ... bytes in a bytearray8, but only 6 remain
5817db96d56Sopenharmony_ci    """
5827db96d56Sopenharmony_ci
5837db96d56Sopenharmony_ci    n = read_uint8(f)
5847db96d56Sopenharmony_ci    assert n >= 0
5857db96d56Sopenharmony_ci    if n > sys.maxsize:
5867db96d56Sopenharmony_ci        raise ValueError("bytearray8 byte count > sys.maxsize: %d" % n)
5877db96d56Sopenharmony_ci    data = f.read(n)
5887db96d56Sopenharmony_ci    if len(data) == n:
5897db96d56Sopenharmony_ci        return bytearray(data)
5907db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a bytearray8, but only %d remain" %
5917db96d56Sopenharmony_ci                     (n, len(data)))
5927db96d56Sopenharmony_ci
5937db96d56Sopenharmony_cibytearray8 = ArgumentDescriptor(
5947db96d56Sopenharmony_ci              name="bytearray8",
5957db96d56Sopenharmony_ci              n=TAKEN_FROM_ARGUMENT8U,
5967db96d56Sopenharmony_ci              reader=read_bytearray8,
5977db96d56Sopenharmony_ci              doc="""A counted bytearray.
5987db96d56Sopenharmony_ci
5997db96d56Sopenharmony_ci              The first argument is an 8-byte little-endian unsigned int giving
6007db96d56Sopenharmony_ci              the number of bytes, and the second argument is that many bytes.
6017db96d56Sopenharmony_ci              """)
6027db96d56Sopenharmony_ci
6037db96d56Sopenharmony_cidef read_unicodestringnl(f):
6047db96d56Sopenharmony_ci    r"""
6057db96d56Sopenharmony_ci    >>> import io
6067db96d56Sopenharmony_ci    >>> read_unicodestringnl(io.BytesIO(b"abc\\uabcd\njunk")) == 'abc\uabcd'
6077db96d56Sopenharmony_ci    True
6087db96d56Sopenharmony_ci    """
6097db96d56Sopenharmony_ci
6107db96d56Sopenharmony_ci    data = f.readline()
6117db96d56Sopenharmony_ci    if not data.endswith(b'\n'):
6127db96d56Sopenharmony_ci        raise ValueError("no newline found when trying to read "
6137db96d56Sopenharmony_ci                         "unicodestringnl")
6147db96d56Sopenharmony_ci    data = data[:-1]    # lose the newline
6157db96d56Sopenharmony_ci    return str(data, 'raw-unicode-escape')
6167db96d56Sopenharmony_ci
6177db96d56Sopenharmony_ciunicodestringnl = ArgumentDescriptor(
6187db96d56Sopenharmony_ci                      name='unicodestringnl',
6197db96d56Sopenharmony_ci                      n=UP_TO_NEWLINE,
6207db96d56Sopenharmony_ci                      reader=read_unicodestringnl,
6217db96d56Sopenharmony_ci                      doc="""A newline-terminated Unicode string.
6227db96d56Sopenharmony_ci
6237db96d56Sopenharmony_ci                      This is raw-unicode-escape encoded, so consists of
6247db96d56Sopenharmony_ci                      printable ASCII characters, and may contain embedded
6257db96d56Sopenharmony_ci                      escape sequences.
6267db96d56Sopenharmony_ci                      """)
6277db96d56Sopenharmony_ci
6287db96d56Sopenharmony_ci
6297db96d56Sopenharmony_cidef read_unicodestring1(f):
6307db96d56Sopenharmony_ci    r"""
6317db96d56Sopenharmony_ci    >>> import io
6327db96d56Sopenharmony_ci    >>> s = 'abcd\uabcd'
6337db96d56Sopenharmony_ci    >>> enc = s.encode('utf-8')
6347db96d56Sopenharmony_ci    >>> enc
6357db96d56Sopenharmony_ci    b'abcd\xea\xaf\x8d'
6367db96d56Sopenharmony_ci    >>> n = bytes([len(enc)])  # little-endian 1-byte length
6377db96d56Sopenharmony_ci    >>> t = read_unicodestring1(io.BytesIO(n + enc + b'junk'))
6387db96d56Sopenharmony_ci    >>> s == t
6397db96d56Sopenharmony_ci    True
6407db96d56Sopenharmony_ci
6417db96d56Sopenharmony_ci    >>> read_unicodestring1(io.BytesIO(n + enc[:-1]))
6427db96d56Sopenharmony_ci    Traceback (most recent call last):
6437db96d56Sopenharmony_ci    ...
6447db96d56Sopenharmony_ci    ValueError: expected 7 bytes in a unicodestring1, but only 6 remain
6457db96d56Sopenharmony_ci    """
6467db96d56Sopenharmony_ci
6477db96d56Sopenharmony_ci    n = read_uint1(f)
6487db96d56Sopenharmony_ci    assert n >= 0
6497db96d56Sopenharmony_ci    data = f.read(n)
6507db96d56Sopenharmony_ci    if len(data) == n:
6517db96d56Sopenharmony_ci        return str(data, 'utf-8', 'surrogatepass')
6527db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a unicodestring1, but only %d "
6537db96d56Sopenharmony_ci                     "remain" % (n, len(data)))
6547db96d56Sopenharmony_ci
6557db96d56Sopenharmony_ciunicodestring1 = ArgumentDescriptor(
6567db96d56Sopenharmony_ci                    name="unicodestring1",
6577db96d56Sopenharmony_ci                    n=TAKEN_FROM_ARGUMENT1,
6587db96d56Sopenharmony_ci                    reader=read_unicodestring1,
6597db96d56Sopenharmony_ci                    doc="""A counted Unicode string.
6607db96d56Sopenharmony_ci
6617db96d56Sopenharmony_ci                    The first argument is a 1-byte little-endian signed int
6627db96d56Sopenharmony_ci                    giving the number of bytes in the string, and the second
6637db96d56Sopenharmony_ci                    argument-- the UTF-8 encoding of the Unicode string --
6647db96d56Sopenharmony_ci                    contains that many bytes.
6657db96d56Sopenharmony_ci                    """)
6667db96d56Sopenharmony_ci
6677db96d56Sopenharmony_ci
6687db96d56Sopenharmony_cidef read_unicodestring4(f):
6697db96d56Sopenharmony_ci    r"""
6707db96d56Sopenharmony_ci    >>> import io
6717db96d56Sopenharmony_ci    >>> s = 'abcd\uabcd'
6727db96d56Sopenharmony_ci    >>> enc = s.encode('utf-8')
6737db96d56Sopenharmony_ci    >>> enc
6747db96d56Sopenharmony_ci    b'abcd\xea\xaf\x8d'
6757db96d56Sopenharmony_ci    >>> n = bytes([len(enc), 0, 0, 0])  # little-endian 4-byte length
6767db96d56Sopenharmony_ci    >>> t = read_unicodestring4(io.BytesIO(n + enc + b'junk'))
6777db96d56Sopenharmony_ci    >>> s == t
6787db96d56Sopenharmony_ci    True
6797db96d56Sopenharmony_ci
6807db96d56Sopenharmony_ci    >>> read_unicodestring4(io.BytesIO(n + enc[:-1]))
6817db96d56Sopenharmony_ci    Traceback (most recent call last):
6827db96d56Sopenharmony_ci    ...
6837db96d56Sopenharmony_ci    ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
6847db96d56Sopenharmony_ci    """
6857db96d56Sopenharmony_ci
6867db96d56Sopenharmony_ci    n = read_uint4(f)
6877db96d56Sopenharmony_ci    assert n >= 0
6887db96d56Sopenharmony_ci    if n > sys.maxsize:
6897db96d56Sopenharmony_ci        raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
6907db96d56Sopenharmony_ci    data = f.read(n)
6917db96d56Sopenharmony_ci    if len(data) == n:
6927db96d56Sopenharmony_ci        return str(data, 'utf-8', 'surrogatepass')
6937db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a unicodestring4, but only %d "
6947db96d56Sopenharmony_ci                     "remain" % (n, len(data)))
6957db96d56Sopenharmony_ci
6967db96d56Sopenharmony_ciunicodestring4 = ArgumentDescriptor(
6977db96d56Sopenharmony_ci                    name="unicodestring4",
6987db96d56Sopenharmony_ci                    n=TAKEN_FROM_ARGUMENT4U,
6997db96d56Sopenharmony_ci                    reader=read_unicodestring4,
7007db96d56Sopenharmony_ci                    doc="""A counted Unicode string.
7017db96d56Sopenharmony_ci
7027db96d56Sopenharmony_ci                    The first argument is a 4-byte little-endian signed int
7037db96d56Sopenharmony_ci                    giving the number of bytes in the string, and the second
7047db96d56Sopenharmony_ci                    argument-- the UTF-8 encoding of the Unicode string --
7057db96d56Sopenharmony_ci                    contains that many bytes.
7067db96d56Sopenharmony_ci                    """)
7077db96d56Sopenharmony_ci
7087db96d56Sopenharmony_ci
7097db96d56Sopenharmony_cidef read_unicodestring8(f):
7107db96d56Sopenharmony_ci    r"""
7117db96d56Sopenharmony_ci    >>> import io
7127db96d56Sopenharmony_ci    >>> s = 'abcd\uabcd'
7137db96d56Sopenharmony_ci    >>> enc = s.encode('utf-8')
7147db96d56Sopenharmony_ci    >>> enc
7157db96d56Sopenharmony_ci    b'abcd\xea\xaf\x8d'
7167db96d56Sopenharmony_ci    >>> n = bytes([len(enc)]) + b'\0' * 7  # little-endian 8-byte length
7177db96d56Sopenharmony_ci    >>> t = read_unicodestring8(io.BytesIO(n + enc + b'junk'))
7187db96d56Sopenharmony_ci    >>> s == t
7197db96d56Sopenharmony_ci    True
7207db96d56Sopenharmony_ci
7217db96d56Sopenharmony_ci    >>> read_unicodestring8(io.BytesIO(n + enc[:-1]))
7227db96d56Sopenharmony_ci    Traceback (most recent call last):
7237db96d56Sopenharmony_ci    ...
7247db96d56Sopenharmony_ci    ValueError: expected 7 bytes in a unicodestring8, but only 6 remain
7257db96d56Sopenharmony_ci    """
7267db96d56Sopenharmony_ci
7277db96d56Sopenharmony_ci    n = read_uint8(f)
7287db96d56Sopenharmony_ci    assert n >= 0
7297db96d56Sopenharmony_ci    if n > sys.maxsize:
7307db96d56Sopenharmony_ci        raise ValueError("unicodestring8 byte count > sys.maxsize: %d" % n)
7317db96d56Sopenharmony_ci    data = f.read(n)
7327db96d56Sopenharmony_ci    if len(data) == n:
7337db96d56Sopenharmony_ci        return str(data, 'utf-8', 'surrogatepass')
7347db96d56Sopenharmony_ci    raise ValueError("expected %d bytes in a unicodestring8, but only %d "
7357db96d56Sopenharmony_ci                     "remain" % (n, len(data)))
7367db96d56Sopenharmony_ci
7377db96d56Sopenharmony_ciunicodestring8 = ArgumentDescriptor(
7387db96d56Sopenharmony_ci                    name="unicodestring8",
7397db96d56Sopenharmony_ci                    n=TAKEN_FROM_ARGUMENT8U,
7407db96d56Sopenharmony_ci                    reader=read_unicodestring8,
7417db96d56Sopenharmony_ci                    doc="""A counted Unicode string.
7427db96d56Sopenharmony_ci
7437db96d56Sopenharmony_ci                    The first argument is an 8-byte little-endian signed int
7447db96d56Sopenharmony_ci                    giving the number of bytes in the string, and the second
7457db96d56Sopenharmony_ci                    argument-- the UTF-8 encoding of the Unicode string --
7467db96d56Sopenharmony_ci                    contains that many bytes.
7477db96d56Sopenharmony_ci                    """)
7487db96d56Sopenharmony_ci
7497db96d56Sopenharmony_ci
7507db96d56Sopenharmony_cidef read_decimalnl_short(f):
7517db96d56Sopenharmony_ci    r"""
7527db96d56Sopenharmony_ci    >>> import io
7537db96d56Sopenharmony_ci    >>> read_decimalnl_short(io.BytesIO(b"1234\n56"))
7547db96d56Sopenharmony_ci    1234
7557db96d56Sopenharmony_ci
7567db96d56Sopenharmony_ci    >>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
7577db96d56Sopenharmony_ci    Traceback (most recent call last):
7587db96d56Sopenharmony_ci    ...
7597db96d56Sopenharmony_ci    ValueError: invalid literal for int() with base 10: b'1234L'
7607db96d56Sopenharmony_ci    """
7617db96d56Sopenharmony_ci
7627db96d56Sopenharmony_ci    s = read_stringnl(f, decode=False, stripquotes=False)
7637db96d56Sopenharmony_ci
7647db96d56Sopenharmony_ci    # There's a hack for True and False here.
7657db96d56Sopenharmony_ci    if s == b"00":
7667db96d56Sopenharmony_ci        return False
7677db96d56Sopenharmony_ci    elif s == b"01":
7687db96d56Sopenharmony_ci        return True
7697db96d56Sopenharmony_ci
7707db96d56Sopenharmony_ci    return int(s)
7717db96d56Sopenharmony_ci
7727db96d56Sopenharmony_cidef read_decimalnl_long(f):
7737db96d56Sopenharmony_ci    r"""
7747db96d56Sopenharmony_ci    >>> import io
7757db96d56Sopenharmony_ci
7767db96d56Sopenharmony_ci    >>> read_decimalnl_long(io.BytesIO(b"1234L\n56"))
7777db96d56Sopenharmony_ci    1234
7787db96d56Sopenharmony_ci
7797db96d56Sopenharmony_ci    >>> read_decimalnl_long(io.BytesIO(b"123456789012345678901234L\n6"))
7807db96d56Sopenharmony_ci    123456789012345678901234
7817db96d56Sopenharmony_ci    """
7827db96d56Sopenharmony_ci
7837db96d56Sopenharmony_ci    s = read_stringnl(f, decode=False, stripquotes=False)
7847db96d56Sopenharmony_ci    if s[-1:] == b'L':
7857db96d56Sopenharmony_ci        s = s[:-1]
7867db96d56Sopenharmony_ci    return int(s)
7877db96d56Sopenharmony_ci
7887db96d56Sopenharmony_ci
7897db96d56Sopenharmony_cidecimalnl_short = ArgumentDescriptor(
7907db96d56Sopenharmony_ci                      name='decimalnl_short',
7917db96d56Sopenharmony_ci                      n=UP_TO_NEWLINE,
7927db96d56Sopenharmony_ci                      reader=read_decimalnl_short,
7937db96d56Sopenharmony_ci                      doc="""A newline-terminated decimal integer literal.
7947db96d56Sopenharmony_ci
7957db96d56Sopenharmony_ci                          This never has a trailing 'L', and the integer fit
7967db96d56Sopenharmony_ci                          in a short Python int on the box where the pickle
7977db96d56Sopenharmony_ci                          was written -- but there's no guarantee it will fit
7987db96d56Sopenharmony_ci                          in a short Python int on the box where the pickle
7997db96d56Sopenharmony_ci                          is read.
8007db96d56Sopenharmony_ci                          """)
8017db96d56Sopenharmony_ci
8027db96d56Sopenharmony_cidecimalnl_long = ArgumentDescriptor(
8037db96d56Sopenharmony_ci                     name='decimalnl_long',
8047db96d56Sopenharmony_ci                     n=UP_TO_NEWLINE,
8057db96d56Sopenharmony_ci                     reader=read_decimalnl_long,
8067db96d56Sopenharmony_ci                     doc="""A newline-terminated decimal integer literal.
8077db96d56Sopenharmony_ci
8087db96d56Sopenharmony_ci                         This has a trailing 'L', and can represent integers
8097db96d56Sopenharmony_ci                         of any size.
8107db96d56Sopenharmony_ci                         """)
8117db96d56Sopenharmony_ci
8127db96d56Sopenharmony_ci
8137db96d56Sopenharmony_cidef read_floatnl(f):
8147db96d56Sopenharmony_ci    r"""
8157db96d56Sopenharmony_ci    >>> import io
8167db96d56Sopenharmony_ci    >>> read_floatnl(io.BytesIO(b"-1.25\n6"))
8177db96d56Sopenharmony_ci    -1.25
8187db96d56Sopenharmony_ci    """
8197db96d56Sopenharmony_ci    s = read_stringnl(f, decode=False, stripquotes=False)
8207db96d56Sopenharmony_ci    return float(s)
8217db96d56Sopenharmony_ci
8227db96d56Sopenharmony_cifloatnl = ArgumentDescriptor(
8237db96d56Sopenharmony_ci              name='floatnl',
8247db96d56Sopenharmony_ci              n=UP_TO_NEWLINE,
8257db96d56Sopenharmony_ci              reader=read_floatnl,
8267db96d56Sopenharmony_ci              doc="""A newline-terminated decimal floating literal.
8277db96d56Sopenharmony_ci
8287db96d56Sopenharmony_ci              In general this requires 17 significant digits for roundtrip
8297db96d56Sopenharmony_ci              identity, and pickling then unpickling infinities, NaNs, and
8307db96d56Sopenharmony_ci              minus zero doesn't work across boxes, or on some boxes even
8317db96d56Sopenharmony_ci              on itself (e.g., Windows can't read the strings it produces
8327db96d56Sopenharmony_ci              for infinities or NaNs).
8337db96d56Sopenharmony_ci              """)
8347db96d56Sopenharmony_ci
8357db96d56Sopenharmony_cidef read_float8(f):
8367db96d56Sopenharmony_ci    r"""
8377db96d56Sopenharmony_ci    >>> import io, struct
8387db96d56Sopenharmony_ci    >>> raw = struct.pack(">d", -1.25)
8397db96d56Sopenharmony_ci    >>> raw
8407db96d56Sopenharmony_ci    b'\xbf\xf4\x00\x00\x00\x00\x00\x00'
8417db96d56Sopenharmony_ci    >>> read_float8(io.BytesIO(raw + b"\n"))
8427db96d56Sopenharmony_ci    -1.25
8437db96d56Sopenharmony_ci    """
8447db96d56Sopenharmony_ci
8457db96d56Sopenharmony_ci    data = f.read(8)
8467db96d56Sopenharmony_ci    if len(data) == 8:
8477db96d56Sopenharmony_ci        return _unpack(">d", data)[0]
8487db96d56Sopenharmony_ci    raise ValueError("not enough data in stream to read float8")
8497db96d56Sopenharmony_ci
8507db96d56Sopenharmony_ci
8517db96d56Sopenharmony_cifloat8 = ArgumentDescriptor(
8527db96d56Sopenharmony_ci             name='float8',
8537db96d56Sopenharmony_ci             n=8,
8547db96d56Sopenharmony_ci             reader=read_float8,
8557db96d56Sopenharmony_ci             doc="""An 8-byte binary representation of a float, big-endian.
8567db96d56Sopenharmony_ci
8577db96d56Sopenharmony_ci             The format is unique to Python, and shared with the struct
8587db96d56Sopenharmony_ci             module (format string '>d') "in theory" (the struct and pickle
8597db96d56Sopenharmony_ci             implementations don't share the code -- they should).  It's
8607db96d56Sopenharmony_ci             strongly related to the IEEE-754 double format, and, in normal
8617db96d56Sopenharmony_ci             cases, is in fact identical to the big-endian 754 double format.
8627db96d56Sopenharmony_ci             On other boxes the dynamic range is limited to that of a 754
8637db96d56Sopenharmony_ci             double, and "add a half and chop" rounding is used to reduce
8647db96d56Sopenharmony_ci             the precision to 53 bits.  However, even on a 754 box,
8657db96d56Sopenharmony_ci             infinities, NaNs, and minus zero may not be handled correctly
8667db96d56Sopenharmony_ci             (may not survive roundtrip pickling intact).
8677db96d56Sopenharmony_ci             """)
8687db96d56Sopenharmony_ci
8697db96d56Sopenharmony_ci# Protocol 2 formats
8707db96d56Sopenharmony_ci
8717db96d56Sopenharmony_cifrom pickle import decode_long
8727db96d56Sopenharmony_ci
8737db96d56Sopenharmony_cidef read_long1(f):
8747db96d56Sopenharmony_ci    r"""
8757db96d56Sopenharmony_ci    >>> import io
8767db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x00"))
8777db96d56Sopenharmony_ci    0
8787db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x02\xff\x00"))
8797db96d56Sopenharmony_ci    255
8807db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x02\xff\x7f"))
8817db96d56Sopenharmony_ci    32767
8827db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x02\x00\xff"))
8837db96d56Sopenharmony_ci    -256
8847db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x02\x00\x80"))
8857db96d56Sopenharmony_ci    -32768
8867db96d56Sopenharmony_ci    """
8877db96d56Sopenharmony_ci
8887db96d56Sopenharmony_ci    n = read_uint1(f)
8897db96d56Sopenharmony_ci    data = f.read(n)
8907db96d56Sopenharmony_ci    if len(data) != n:
8917db96d56Sopenharmony_ci        raise ValueError("not enough data in stream to read long1")
8927db96d56Sopenharmony_ci    return decode_long(data)
8937db96d56Sopenharmony_ci
8947db96d56Sopenharmony_cilong1 = ArgumentDescriptor(
8957db96d56Sopenharmony_ci    name="long1",
8967db96d56Sopenharmony_ci    n=TAKEN_FROM_ARGUMENT1,
8977db96d56Sopenharmony_ci    reader=read_long1,
8987db96d56Sopenharmony_ci    doc="""A binary long, little-endian, using 1-byte size.
8997db96d56Sopenharmony_ci
9007db96d56Sopenharmony_ci    This first reads one byte as an unsigned size, then reads that
9017db96d56Sopenharmony_ci    many bytes and interprets them as a little-endian 2's-complement long.
9027db96d56Sopenharmony_ci    If the size is 0, that's taken as a shortcut for the long 0L.
9037db96d56Sopenharmony_ci    """)
9047db96d56Sopenharmony_ci
9057db96d56Sopenharmony_cidef read_long4(f):
9067db96d56Sopenharmony_ci    r"""
9077db96d56Sopenharmony_ci    >>> import io
9087db96d56Sopenharmony_ci    >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x00"))
9097db96d56Sopenharmony_ci    255
9107db96d56Sopenharmony_ci    >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x7f"))
9117db96d56Sopenharmony_ci    32767
9127db96d56Sopenharmony_ci    >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\xff"))
9137db96d56Sopenharmony_ci    -256
9147db96d56Sopenharmony_ci    >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\x80"))
9157db96d56Sopenharmony_ci    -32768
9167db96d56Sopenharmony_ci    >>> read_long1(io.BytesIO(b"\x00\x00\x00\x00"))
9177db96d56Sopenharmony_ci    0
9187db96d56Sopenharmony_ci    """
9197db96d56Sopenharmony_ci
9207db96d56Sopenharmony_ci    n = read_int4(f)
9217db96d56Sopenharmony_ci    if n < 0:
9227db96d56Sopenharmony_ci        raise ValueError("long4 byte count < 0: %d" % n)
9237db96d56Sopenharmony_ci    data = f.read(n)
9247db96d56Sopenharmony_ci    if len(data) != n:
9257db96d56Sopenharmony_ci        raise ValueError("not enough data in stream to read long4")
9267db96d56Sopenharmony_ci    return decode_long(data)
9277db96d56Sopenharmony_ci
9287db96d56Sopenharmony_cilong4 = ArgumentDescriptor(
9297db96d56Sopenharmony_ci    name="long4",
9307db96d56Sopenharmony_ci    n=TAKEN_FROM_ARGUMENT4,
9317db96d56Sopenharmony_ci    reader=read_long4,
9327db96d56Sopenharmony_ci    doc="""A binary representation of a long, little-endian.
9337db96d56Sopenharmony_ci
9347db96d56Sopenharmony_ci    This first reads four bytes as a signed size (but requires the
9357db96d56Sopenharmony_ci    size to be >= 0), then reads that many bytes and interprets them
9367db96d56Sopenharmony_ci    as a little-endian 2's-complement long.  If the size is 0, that's taken
9377db96d56Sopenharmony_ci    as a shortcut for the int 0, although LONG1 should really be used
9387db96d56Sopenharmony_ci    then instead (and in any case where # of bytes < 256).
9397db96d56Sopenharmony_ci    """)
9407db96d56Sopenharmony_ci
9417db96d56Sopenharmony_ci
9427db96d56Sopenharmony_ci##############################################################################
9437db96d56Sopenharmony_ci# Object descriptors.  The stack used by the pickle machine holds objects,
9447db96d56Sopenharmony_ci# and in the stack_before and stack_after attributes of OpcodeInfo
9457db96d56Sopenharmony_ci# descriptors we need names to describe the various types of objects that can
9467db96d56Sopenharmony_ci# appear on the stack.
9477db96d56Sopenharmony_ci
9487db96d56Sopenharmony_ciclass StackObject(object):
9497db96d56Sopenharmony_ci    __slots__ = (
9507db96d56Sopenharmony_ci        # name of descriptor record, for info only
9517db96d56Sopenharmony_ci        'name',
9527db96d56Sopenharmony_ci
9537db96d56Sopenharmony_ci        # type of object, or tuple of type objects (meaning the object can
9547db96d56Sopenharmony_ci        # be of any type in the tuple)
9557db96d56Sopenharmony_ci        'obtype',
9567db96d56Sopenharmony_ci
9577db96d56Sopenharmony_ci        # human-readable docs for this kind of stack object; a string
9587db96d56Sopenharmony_ci        'doc',
9597db96d56Sopenharmony_ci    )
9607db96d56Sopenharmony_ci
9617db96d56Sopenharmony_ci    def __init__(self, name, obtype, doc):
9627db96d56Sopenharmony_ci        assert isinstance(name, str)
9637db96d56Sopenharmony_ci        self.name = name
9647db96d56Sopenharmony_ci
9657db96d56Sopenharmony_ci        assert isinstance(obtype, type) or isinstance(obtype, tuple)
9667db96d56Sopenharmony_ci        if isinstance(obtype, tuple):
9677db96d56Sopenharmony_ci            for contained in obtype:
9687db96d56Sopenharmony_ci                assert isinstance(contained, type)
9697db96d56Sopenharmony_ci        self.obtype = obtype
9707db96d56Sopenharmony_ci
9717db96d56Sopenharmony_ci        assert isinstance(doc, str)
9727db96d56Sopenharmony_ci        self.doc = doc
9737db96d56Sopenharmony_ci
9747db96d56Sopenharmony_ci    def __repr__(self):
9757db96d56Sopenharmony_ci        return self.name
9767db96d56Sopenharmony_ci
9777db96d56Sopenharmony_ci
9787db96d56Sopenharmony_cipyint = pylong = StackObject(
9797db96d56Sopenharmony_ci    name='int',
9807db96d56Sopenharmony_ci    obtype=int,
9817db96d56Sopenharmony_ci    doc="A Python integer object.")
9827db96d56Sopenharmony_ci
9837db96d56Sopenharmony_cipyinteger_or_bool = StackObject(
9847db96d56Sopenharmony_ci    name='int_or_bool',
9857db96d56Sopenharmony_ci    obtype=(int, bool),
9867db96d56Sopenharmony_ci    doc="A Python integer or boolean object.")
9877db96d56Sopenharmony_ci
9887db96d56Sopenharmony_cipybool = StackObject(
9897db96d56Sopenharmony_ci    name='bool',
9907db96d56Sopenharmony_ci    obtype=bool,
9917db96d56Sopenharmony_ci    doc="A Python boolean object.")
9927db96d56Sopenharmony_ci
9937db96d56Sopenharmony_cipyfloat = StackObject(
9947db96d56Sopenharmony_ci    name='float',
9957db96d56Sopenharmony_ci    obtype=float,
9967db96d56Sopenharmony_ci    doc="A Python float object.")
9977db96d56Sopenharmony_ci
9987db96d56Sopenharmony_cipybytes_or_str = pystring = StackObject(
9997db96d56Sopenharmony_ci    name='bytes_or_str',
10007db96d56Sopenharmony_ci    obtype=(bytes, str),
10017db96d56Sopenharmony_ci    doc="A Python bytes or (Unicode) string object.")
10027db96d56Sopenharmony_ci
10037db96d56Sopenharmony_cipybytes = StackObject(
10047db96d56Sopenharmony_ci    name='bytes',
10057db96d56Sopenharmony_ci    obtype=bytes,
10067db96d56Sopenharmony_ci    doc="A Python bytes object.")
10077db96d56Sopenharmony_ci
10087db96d56Sopenharmony_cipybytearray = StackObject(
10097db96d56Sopenharmony_ci    name='bytearray',
10107db96d56Sopenharmony_ci    obtype=bytearray,
10117db96d56Sopenharmony_ci    doc="A Python bytearray object.")
10127db96d56Sopenharmony_ci
10137db96d56Sopenharmony_cipyunicode = StackObject(
10147db96d56Sopenharmony_ci    name='str',
10157db96d56Sopenharmony_ci    obtype=str,
10167db96d56Sopenharmony_ci    doc="A Python (Unicode) string object.")
10177db96d56Sopenharmony_ci
10187db96d56Sopenharmony_cipynone = StackObject(
10197db96d56Sopenharmony_ci    name="None",
10207db96d56Sopenharmony_ci    obtype=type(None),
10217db96d56Sopenharmony_ci    doc="The Python None object.")
10227db96d56Sopenharmony_ci
10237db96d56Sopenharmony_cipytuple = StackObject(
10247db96d56Sopenharmony_ci    name="tuple",
10257db96d56Sopenharmony_ci    obtype=tuple,
10267db96d56Sopenharmony_ci    doc="A Python tuple object.")
10277db96d56Sopenharmony_ci
10287db96d56Sopenharmony_cipylist = StackObject(
10297db96d56Sopenharmony_ci    name="list",
10307db96d56Sopenharmony_ci    obtype=list,
10317db96d56Sopenharmony_ci    doc="A Python list object.")
10327db96d56Sopenharmony_ci
10337db96d56Sopenharmony_cipydict = StackObject(
10347db96d56Sopenharmony_ci    name="dict",
10357db96d56Sopenharmony_ci    obtype=dict,
10367db96d56Sopenharmony_ci    doc="A Python dict object.")
10377db96d56Sopenharmony_ci
10387db96d56Sopenharmony_cipyset = StackObject(
10397db96d56Sopenharmony_ci    name="set",
10407db96d56Sopenharmony_ci    obtype=set,
10417db96d56Sopenharmony_ci    doc="A Python set object.")
10427db96d56Sopenharmony_ci
10437db96d56Sopenharmony_cipyfrozenset = StackObject(
10447db96d56Sopenharmony_ci    name="frozenset",
10457db96d56Sopenharmony_ci    obtype=set,
10467db96d56Sopenharmony_ci    doc="A Python frozenset object.")
10477db96d56Sopenharmony_ci
10487db96d56Sopenharmony_cipybuffer = StackObject(
10497db96d56Sopenharmony_ci    name='buffer',
10507db96d56Sopenharmony_ci    obtype=object,
10517db96d56Sopenharmony_ci    doc="A Python buffer-like object.")
10527db96d56Sopenharmony_ci
10537db96d56Sopenharmony_cianyobject = StackObject(
10547db96d56Sopenharmony_ci    name='any',
10557db96d56Sopenharmony_ci    obtype=object,
10567db96d56Sopenharmony_ci    doc="Any kind of object whatsoever.")
10577db96d56Sopenharmony_ci
10587db96d56Sopenharmony_cimarkobject = StackObject(
10597db96d56Sopenharmony_ci    name="mark",
10607db96d56Sopenharmony_ci    obtype=StackObject,
10617db96d56Sopenharmony_ci    doc="""'The mark' is a unique object.
10627db96d56Sopenharmony_ci
10637db96d56Sopenharmony_ciOpcodes that operate on a variable number of objects
10647db96d56Sopenharmony_cigenerally don't embed the count of objects in the opcode,
10657db96d56Sopenharmony_cior pull it off the stack.  Instead the MARK opcode is used
10667db96d56Sopenharmony_cito push a special marker object on the stack, and then
10677db96d56Sopenharmony_cisome other opcodes grab all the objects from the top of
10687db96d56Sopenharmony_cithe stack down to (but not including) the topmost marker
10697db96d56Sopenharmony_ciobject.
10707db96d56Sopenharmony_ci""")
10717db96d56Sopenharmony_ci
10727db96d56Sopenharmony_cistackslice = StackObject(
10737db96d56Sopenharmony_ci    name="stackslice",
10747db96d56Sopenharmony_ci    obtype=StackObject,
10757db96d56Sopenharmony_ci    doc="""An object representing a contiguous slice of the stack.
10767db96d56Sopenharmony_ci
10777db96d56Sopenharmony_ciThis is used in conjunction with markobject, to represent all
10787db96d56Sopenharmony_ciof the stack following the topmost markobject.  For example,
10797db96d56Sopenharmony_cithe POP_MARK opcode changes the stack from
10807db96d56Sopenharmony_ci
10817db96d56Sopenharmony_ci    [..., markobject, stackslice]
10827db96d56Sopenharmony_cito
10837db96d56Sopenharmony_ci    [...]
10847db96d56Sopenharmony_ci
10857db96d56Sopenharmony_ciNo matter how many object are on the stack after the topmost
10867db96d56Sopenharmony_cimarkobject, POP_MARK gets rid of all of them (including the
10877db96d56Sopenharmony_citopmost markobject too).
10887db96d56Sopenharmony_ci""")
10897db96d56Sopenharmony_ci
10907db96d56Sopenharmony_ci##############################################################################
10917db96d56Sopenharmony_ci# Descriptors for pickle opcodes.
10927db96d56Sopenharmony_ci
10937db96d56Sopenharmony_ciclass OpcodeInfo(object):
10947db96d56Sopenharmony_ci
10957db96d56Sopenharmony_ci    __slots__ = (
10967db96d56Sopenharmony_ci        # symbolic name of opcode; a string
10977db96d56Sopenharmony_ci        'name',
10987db96d56Sopenharmony_ci
10997db96d56Sopenharmony_ci        # the code used in a bytestream to represent the opcode; a
11007db96d56Sopenharmony_ci        # one-character string
11017db96d56Sopenharmony_ci        'code',
11027db96d56Sopenharmony_ci
11037db96d56Sopenharmony_ci        # If the opcode has an argument embedded in the byte string, an
11047db96d56Sopenharmony_ci        # instance of ArgumentDescriptor specifying its type.  Note that
11057db96d56Sopenharmony_ci        # arg.reader(s) can be used to read and decode the argument from
11067db96d56Sopenharmony_ci        # the bytestream s, and arg.doc documents the format of the raw
11077db96d56Sopenharmony_ci        # argument bytes.  If the opcode doesn't have an argument embedded
11087db96d56Sopenharmony_ci        # in the bytestream, arg should be None.
11097db96d56Sopenharmony_ci        'arg',
11107db96d56Sopenharmony_ci
11117db96d56Sopenharmony_ci        # what the stack looks like before this opcode runs; a list
11127db96d56Sopenharmony_ci        'stack_before',
11137db96d56Sopenharmony_ci
11147db96d56Sopenharmony_ci        # what the stack looks like after this opcode runs; a list
11157db96d56Sopenharmony_ci        'stack_after',
11167db96d56Sopenharmony_ci
11177db96d56Sopenharmony_ci        # the protocol number in which this opcode was introduced; an int
11187db96d56Sopenharmony_ci        'proto',
11197db96d56Sopenharmony_ci
11207db96d56Sopenharmony_ci        # human-readable docs for this opcode; a string
11217db96d56Sopenharmony_ci        'doc',
11227db96d56Sopenharmony_ci    )
11237db96d56Sopenharmony_ci
11247db96d56Sopenharmony_ci    def __init__(self, name, code, arg,
11257db96d56Sopenharmony_ci                 stack_before, stack_after, proto, doc):
11267db96d56Sopenharmony_ci        assert isinstance(name, str)
11277db96d56Sopenharmony_ci        self.name = name
11287db96d56Sopenharmony_ci
11297db96d56Sopenharmony_ci        assert isinstance(code, str)
11307db96d56Sopenharmony_ci        assert len(code) == 1
11317db96d56Sopenharmony_ci        self.code = code
11327db96d56Sopenharmony_ci
11337db96d56Sopenharmony_ci        assert arg is None or isinstance(arg, ArgumentDescriptor)
11347db96d56Sopenharmony_ci        self.arg = arg
11357db96d56Sopenharmony_ci
11367db96d56Sopenharmony_ci        assert isinstance(stack_before, list)
11377db96d56Sopenharmony_ci        for x in stack_before:
11387db96d56Sopenharmony_ci            assert isinstance(x, StackObject)
11397db96d56Sopenharmony_ci        self.stack_before = stack_before
11407db96d56Sopenharmony_ci
11417db96d56Sopenharmony_ci        assert isinstance(stack_after, list)
11427db96d56Sopenharmony_ci        for x in stack_after:
11437db96d56Sopenharmony_ci            assert isinstance(x, StackObject)
11447db96d56Sopenharmony_ci        self.stack_after = stack_after
11457db96d56Sopenharmony_ci
11467db96d56Sopenharmony_ci        assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
11477db96d56Sopenharmony_ci        self.proto = proto
11487db96d56Sopenharmony_ci
11497db96d56Sopenharmony_ci        assert isinstance(doc, str)
11507db96d56Sopenharmony_ci        self.doc = doc
11517db96d56Sopenharmony_ci
11527db96d56Sopenharmony_ciI = OpcodeInfo
11537db96d56Sopenharmony_ciopcodes = [
11547db96d56Sopenharmony_ci
11557db96d56Sopenharmony_ci    # Ways to spell integers.
11567db96d56Sopenharmony_ci
11577db96d56Sopenharmony_ci    I(name='INT',
11587db96d56Sopenharmony_ci      code='I',
11597db96d56Sopenharmony_ci      arg=decimalnl_short,
11607db96d56Sopenharmony_ci      stack_before=[],
11617db96d56Sopenharmony_ci      stack_after=[pyinteger_or_bool],
11627db96d56Sopenharmony_ci      proto=0,
11637db96d56Sopenharmony_ci      doc="""Push an integer or bool.
11647db96d56Sopenharmony_ci
11657db96d56Sopenharmony_ci      The argument is a newline-terminated decimal literal string.
11667db96d56Sopenharmony_ci
11677db96d56Sopenharmony_ci      The intent may have been that this always fit in a short Python int,
11687db96d56Sopenharmony_ci      but INT can be generated in pickles written on a 64-bit box that
11697db96d56Sopenharmony_ci      require a Python long on a 32-bit box.  The difference between this
11707db96d56Sopenharmony_ci      and LONG then is that INT skips a trailing 'L', and produces a short
11717db96d56Sopenharmony_ci      int whenever possible.
11727db96d56Sopenharmony_ci
11737db96d56Sopenharmony_ci      Another difference is due to that, when bool was introduced as a
11747db96d56Sopenharmony_ci      distinct type in 2.3, builtin names True and False were also added to
11757db96d56Sopenharmony_ci      2.2.2, mapping to ints 1 and 0.  For compatibility in both directions,
11767db96d56Sopenharmony_ci      True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
11777db96d56Sopenharmony_ci      Leading zeroes are never produced for a genuine integer.  The 2.3
11787db96d56Sopenharmony_ci      (and later) unpicklers special-case these and return bool instead;
11797db96d56Sopenharmony_ci      earlier unpicklers ignore the leading "0" and return the int.
11807db96d56Sopenharmony_ci      """),
11817db96d56Sopenharmony_ci
11827db96d56Sopenharmony_ci    I(name='BININT',
11837db96d56Sopenharmony_ci      code='J',
11847db96d56Sopenharmony_ci      arg=int4,
11857db96d56Sopenharmony_ci      stack_before=[],
11867db96d56Sopenharmony_ci      stack_after=[pyint],
11877db96d56Sopenharmony_ci      proto=1,
11887db96d56Sopenharmony_ci      doc="""Push a four-byte signed integer.
11897db96d56Sopenharmony_ci
11907db96d56Sopenharmony_ci      This handles the full range of Python (short) integers on a 32-bit
11917db96d56Sopenharmony_ci      box, directly as binary bytes (1 for the opcode and 4 for the integer).
11927db96d56Sopenharmony_ci      If the integer is non-negative and fits in 1 or 2 bytes, pickling via
11937db96d56Sopenharmony_ci      BININT1 or BININT2 saves space.
11947db96d56Sopenharmony_ci      """),
11957db96d56Sopenharmony_ci
11967db96d56Sopenharmony_ci    I(name='BININT1',
11977db96d56Sopenharmony_ci      code='K',
11987db96d56Sopenharmony_ci      arg=uint1,
11997db96d56Sopenharmony_ci      stack_before=[],
12007db96d56Sopenharmony_ci      stack_after=[pyint],
12017db96d56Sopenharmony_ci      proto=1,
12027db96d56Sopenharmony_ci      doc="""Push a one-byte unsigned integer.
12037db96d56Sopenharmony_ci
12047db96d56Sopenharmony_ci      This is a space optimization for pickling very small non-negative ints,
12057db96d56Sopenharmony_ci      in range(256).
12067db96d56Sopenharmony_ci      """),
12077db96d56Sopenharmony_ci
12087db96d56Sopenharmony_ci    I(name='BININT2',
12097db96d56Sopenharmony_ci      code='M',
12107db96d56Sopenharmony_ci      arg=uint2,
12117db96d56Sopenharmony_ci      stack_before=[],
12127db96d56Sopenharmony_ci      stack_after=[pyint],
12137db96d56Sopenharmony_ci      proto=1,
12147db96d56Sopenharmony_ci      doc="""Push a two-byte unsigned integer.
12157db96d56Sopenharmony_ci
12167db96d56Sopenharmony_ci      This is a space optimization for pickling small positive ints, in
12177db96d56Sopenharmony_ci      range(256, 2**16).  Integers in range(256) can also be pickled via
12187db96d56Sopenharmony_ci      BININT2, but BININT1 instead saves a byte.
12197db96d56Sopenharmony_ci      """),
12207db96d56Sopenharmony_ci
12217db96d56Sopenharmony_ci    I(name='LONG',
12227db96d56Sopenharmony_ci      code='L',
12237db96d56Sopenharmony_ci      arg=decimalnl_long,
12247db96d56Sopenharmony_ci      stack_before=[],
12257db96d56Sopenharmony_ci      stack_after=[pyint],
12267db96d56Sopenharmony_ci      proto=0,
12277db96d56Sopenharmony_ci      doc="""Push a long integer.
12287db96d56Sopenharmony_ci
12297db96d56Sopenharmony_ci      The same as INT, except that the literal ends with 'L', and always
12307db96d56Sopenharmony_ci      unpickles to a Python long.  There doesn't seem a real purpose to the
12317db96d56Sopenharmony_ci      trailing 'L'.
12327db96d56Sopenharmony_ci
12337db96d56Sopenharmony_ci      Note that LONG takes time quadratic in the number of digits when
12347db96d56Sopenharmony_ci      unpickling (this is simply due to the nature of decimal->binary
12357db96d56Sopenharmony_ci      conversion).  Proto 2 added linear-time (in C; still quadratic-time
12367db96d56Sopenharmony_ci      in Python) LONG1 and LONG4 opcodes.
12377db96d56Sopenharmony_ci      """),
12387db96d56Sopenharmony_ci
12397db96d56Sopenharmony_ci    I(name="LONG1",
12407db96d56Sopenharmony_ci      code='\x8a',
12417db96d56Sopenharmony_ci      arg=long1,
12427db96d56Sopenharmony_ci      stack_before=[],
12437db96d56Sopenharmony_ci      stack_after=[pyint],
12447db96d56Sopenharmony_ci      proto=2,
12457db96d56Sopenharmony_ci      doc="""Long integer using one-byte length.
12467db96d56Sopenharmony_ci
12477db96d56Sopenharmony_ci      A more efficient encoding of a Python long; the long1 encoding
12487db96d56Sopenharmony_ci      says it all."""),
12497db96d56Sopenharmony_ci
12507db96d56Sopenharmony_ci    I(name="LONG4",
12517db96d56Sopenharmony_ci      code='\x8b',
12527db96d56Sopenharmony_ci      arg=long4,
12537db96d56Sopenharmony_ci      stack_before=[],
12547db96d56Sopenharmony_ci      stack_after=[pyint],
12557db96d56Sopenharmony_ci      proto=2,
12567db96d56Sopenharmony_ci      doc="""Long integer using found-byte length.
12577db96d56Sopenharmony_ci
12587db96d56Sopenharmony_ci      A more efficient encoding of a Python long; the long4 encoding
12597db96d56Sopenharmony_ci      says it all."""),
12607db96d56Sopenharmony_ci
12617db96d56Sopenharmony_ci    # Ways to spell strings (8-bit, not Unicode).
12627db96d56Sopenharmony_ci
12637db96d56Sopenharmony_ci    I(name='STRING',
12647db96d56Sopenharmony_ci      code='S',
12657db96d56Sopenharmony_ci      arg=stringnl,
12667db96d56Sopenharmony_ci      stack_before=[],
12677db96d56Sopenharmony_ci      stack_after=[pybytes_or_str],
12687db96d56Sopenharmony_ci      proto=0,
12697db96d56Sopenharmony_ci      doc="""Push a Python string object.
12707db96d56Sopenharmony_ci
12717db96d56Sopenharmony_ci      The argument is a repr-style string, with bracketing quote characters,
12727db96d56Sopenharmony_ci      and perhaps embedded escapes.  The argument extends until the next
12737db96d56Sopenharmony_ci      newline character.  These are usually decoded into a str instance
12747db96d56Sopenharmony_ci      using the encoding given to the Unpickler constructor. or the default,
12757db96d56Sopenharmony_ci      'ASCII'.  If the encoding given was 'bytes' however, they will be
12767db96d56Sopenharmony_ci      decoded as bytes object instead.
12777db96d56Sopenharmony_ci      """),
12787db96d56Sopenharmony_ci
12797db96d56Sopenharmony_ci    I(name='BINSTRING',
12807db96d56Sopenharmony_ci      code='T',
12817db96d56Sopenharmony_ci      arg=string4,
12827db96d56Sopenharmony_ci      stack_before=[],
12837db96d56Sopenharmony_ci      stack_after=[pybytes_or_str],
12847db96d56Sopenharmony_ci      proto=1,
12857db96d56Sopenharmony_ci      doc="""Push a Python string object.
12867db96d56Sopenharmony_ci
12877db96d56Sopenharmony_ci      There are two arguments: the first is a 4-byte little-endian
12887db96d56Sopenharmony_ci      signed int giving the number of bytes in the string, and the
12897db96d56Sopenharmony_ci      second is that many bytes, which are taken literally as the string
12907db96d56Sopenharmony_ci      content.  These are usually decoded into a str instance using the
12917db96d56Sopenharmony_ci      encoding given to the Unpickler constructor. or the default,
12927db96d56Sopenharmony_ci      'ASCII'.  If the encoding given was 'bytes' however, they will be
12937db96d56Sopenharmony_ci      decoded as bytes object instead.
12947db96d56Sopenharmony_ci      """),
12957db96d56Sopenharmony_ci
12967db96d56Sopenharmony_ci    I(name='SHORT_BINSTRING',
12977db96d56Sopenharmony_ci      code='U',
12987db96d56Sopenharmony_ci      arg=string1,
12997db96d56Sopenharmony_ci      stack_before=[],
13007db96d56Sopenharmony_ci      stack_after=[pybytes_or_str],
13017db96d56Sopenharmony_ci      proto=1,
13027db96d56Sopenharmony_ci      doc="""Push a Python string object.
13037db96d56Sopenharmony_ci
13047db96d56Sopenharmony_ci      There are two arguments: the first is a 1-byte unsigned int giving
13057db96d56Sopenharmony_ci      the number of bytes in the string, and the second is that many
13067db96d56Sopenharmony_ci      bytes, which are taken literally as the string content.  These are
13077db96d56Sopenharmony_ci      usually decoded into a str instance using the encoding given to
13087db96d56Sopenharmony_ci      the Unpickler constructor. or the default, 'ASCII'.  If the
13097db96d56Sopenharmony_ci      encoding given was 'bytes' however, they will be decoded as bytes
13107db96d56Sopenharmony_ci      object instead.
13117db96d56Sopenharmony_ci      """),
13127db96d56Sopenharmony_ci
13137db96d56Sopenharmony_ci    # Bytes (protocol 3 and higher)
13147db96d56Sopenharmony_ci
13157db96d56Sopenharmony_ci    I(name='BINBYTES',
13167db96d56Sopenharmony_ci      code='B',
13177db96d56Sopenharmony_ci      arg=bytes4,
13187db96d56Sopenharmony_ci      stack_before=[],
13197db96d56Sopenharmony_ci      stack_after=[pybytes],
13207db96d56Sopenharmony_ci      proto=3,
13217db96d56Sopenharmony_ci      doc="""Push a Python bytes object.
13227db96d56Sopenharmony_ci
13237db96d56Sopenharmony_ci      There are two arguments:  the first is a 4-byte little-endian unsigned int
13247db96d56Sopenharmony_ci      giving the number of bytes, and the second is that many bytes, which are
13257db96d56Sopenharmony_ci      taken literally as the bytes content.
13267db96d56Sopenharmony_ci      """),
13277db96d56Sopenharmony_ci
13287db96d56Sopenharmony_ci    I(name='SHORT_BINBYTES',
13297db96d56Sopenharmony_ci      code='C',
13307db96d56Sopenharmony_ci      arg=bytes1,
13317db96d56Sopenharmony_ci      stack_before=[],
13327db96d56Sopenharmony_ci      stack_after=[pybytes],
13337db96d56Sopenharmony_ci      proto=3,
13347db96d56Sopenharmony_ci      doc="""Push a Python bytes object.
13357db96d56Sopenharmony_ci
13367db96d56Sopenharmony_ci      There are two arguments:  the first is a 1-byte unsigned int giving
13377db96d56Sopenharmony_ci      the number of bytes, and the second is that many bytes, which are taken
13387db96d56Sopenharmony_ci      literally as the string content.
13397db96d56Sopenharmony_ci      """),
13407db96d56Sopenharmony_ci
13417db96d56Sopenharmony_ci    I(name='BINBYTES8',
13427db96d56Sopenharmony_ci      code='\x8e',
13437db96d56Sopenharmony_ci      arg=bytes8,
13447db96d56Sopenharmony_ci      stack_before=[],
13457db96d56Sopenharmony_ci      stack_after=[pybytes],
13467db96d56Sopenharmony_ci      proto=4,
13477db96d56Sopenharmony_ci      doc="""Push a Python bytes object.
13487db96d56Sopenharmony_ci
13497db96d56Sopenharmony_ci      There are two arguments:  the first is an 8-byte unsigned int giving
13507db96d56Sopenharmony_ci      the number of bytes in the string, and the second is that many bytes,
13517db96d56Sopenharmony_ci      which are taken literally as the string content.
13527db96d56Sopenharmony_ci      """),
13537db96d56Sopenharmony_ci
13547db96d56Sopenharmony_ci    # Bytearray (protocol 5 and higher)
13557db96d56Sopenharmony_ci
13567db96d56Sopenharmony_ci    I(name='BYTEARRAY8',
13577db96d56Sopenharmony_ci      code='\x96',
13587db96d56Sopenharmony_ci      arg=bytearray8,
13597db96d56Sopenharmony_ci      stack_before=[],
13607db96d56Sopenharmony_ci      stack_after=[pybytearray],
13617db96d56Sopenharmony_ci      proto=5,
13627db96d56Sopenharmony_ci      doc="""Push a Python bytearray object.
13637db96d56Sopenharmony_ci
13647db96d56Sopenharmony_ci      There are two arguments:  the first is an 8-byte unsigned int giving
13657db96d56Sopenharmony_ci      the number of bytes in the bytearray, and the second is that many bytes,
13667db96d56Sopenharmony_ci      which are taken literally as the bytearray content.
13677db96d56Sopenharmony_ci      """),
13687db96d56Sopenharmony_ci
13697db96d56Sopenharmony_ci    # Out-of-band buffer (protocol 5 and higher)
13707db96d56Sopenharmony_ci
13717db96d56Sopenharmony_ci    I(name='NEXT_BUFFER',
13727db96d56Sopenharmony_ci      code='\x97',
13737db96d56Sopenharmony_ci      arg=None,
13747db96d56Sopenharmony_ci      stack_before=[],
13757db96d56Sopenharmony_ci      stack_after=[pybuffer],
13767db96d56Sopenharmony_ci      proto=5,
13777db96d56Sopenharmony_ci      doc="Push an out-of-band buffer object."),
13787db96d56Sopenharmony_ci
13797db96d56Sopenharmony_ci    I(name='READONLY_BUFFER',
13807db96d56Sopenharmony_ci      code='\x98',
13817db96d56Sopenharmony_ci      arg=None,
13827db96d56Sopenharmony_ci      stack_before=[pybuffer],
13837db96d56Sopenharmony_ci      stack_after=[pybuffer],
13847db96d56Sopenharmony_ci      proto=5,
13857db96d56Sopenharmony_ci      doc="Make an out-of-band buffer object read-only."),
13867db96d56Sopenharmony_ci
13877db96d56Sopenharmony_ci    # Ways to spell None.
13887db96d56Sopenharmony_ci
13897db96d56Sopenharmony_ci    I(name='NONE',
13907db96d56Sopenharmony_ci      code='N',
13917db96d56Sopenharmony_ci      arg=None,
13927db96d56Sopenharmony_ci      stack_before=[],
13937db96d56Sopenharmony_ci      stack_after=[pynone],
13947db96d56Sopenharmony_ci      proto=0,
13957db96d56Sopenharmony_ci      doc="Push None on the stack."),
13967db96d56Sopenharmony_ci
13977db96d56Sopenharmony_ci    # Ways to spell bools, starting with proto 2.  See INT for how this was
13987db96d56Sopenharmony_ci    # done before proto 2.
13997db96d56Sopenharmony_ci
14007db96d56Sopenharmony_ci    I(name='NEWTRUE',
14017db96d56Sopenharmony_ci      code='\x88',
14027db96d56Sopenharmony_ci      arg=None,
14037db96d56Sopenharmony_ci      stack_before=[],
14047db96d56Sopenharmony_ci      stack_after=[pybool],
14057db96d56Sopenharmony_ci      proto=2,
14067db96d56Sopenharmony_ci      doc="Push True onto the stack."),
14077db96d56Sopenharmony_ci
14087db96d56Sopenharmony_ci    I(name='NEWFALSE',
14097db96d56Sopenharmony_ci      code='\x89',
14107db96d56Sopenharmony_ci      arg=None,
14117db96d56Sopenharmony_ci      stack_before=[],
14127db96d56Sopenharmony_ci      stack_after=[pybool],
14137db96d56Sopenharmony_ci      proto=2,
14147db96d56Sopenharmony_ci      doc="Push False onto the stack."),
14157db96d56Sopenharmony_ci
14167db96d56Sopenharmony_ci    # Ways to spell Unicode strings.
14177db96d56Sopenharmony_ci
14187db96d56Sopenharmony_ci    I(name='UNICODE',
14197db96d56Sopenharmony_ci      code='V',
14207db96d56Sopenharmony_ci      arg=unicodestringnl,
14217db96d56Sopenharmony_ci      stack_before=[],
14227db96d56Sopenharmony_ci      stack_after=[pyunicode],
14237db96d56Sopenharmony_ci      proto=0,  # this may be pure-text, but it's a later addition
14247db96d56Sopenharmony_ci      doc="""Push a Python Unicode string object.
14257db96d56Sopenharmony_ci
14267db96d56Sopenharmony_ci      The argument is a raw-unicode-escape encoding of a Unicode string,
14277db96d56Sopenharmony_ci      and so may contain embedded escape sequences.  The argument extends
14287db96d56Sopenharmony_ci      until the next newline character.
14297db96d56Sopenharmony_ci      """),
14307db96d56Sopenharmony_ci
14317db96d56Sopenharmony_ci    I(name='SHORT_BINUNICODE',
14327db96d56Sopenharmony_ci      code='\x8c',
14337db96d56Sopenharmony_ci      arg=unicodestring1,
14347db96d56Sopenharmony_ci      stack_before=[],
14357db96d56Sopenharmony_ci      stack_after=[pyunicode],
14367db96d56Sopenharmony_ci      proto=4,
14377db96d56Sopenharmony_ci      doc="""Push a Python Unicode string object.
14387db96d56Sopenharmony_ci
14397db96d56Sopenharmony_ci      There are two arguments:  the first is a 1-byte little-endian signed int
14407db96d56Sopenharmony_ci      giving the number of bytes in the string.  The second is that many
14417db96d56Sopenharmony_ci      bytes, and is the UTF-8 encoding of the Unicode string.
14427db96d56Sopenharmony_ci      """),
14437db96d56Sopenharmony_ci
14447db96d56Sopenharmony_ci    I(name='BINUNICODE',
14457db96d56Sopenharmony_ci      code='X',
14467db96d56Sopenharmony_ci      arg=unicodestring4,
14477db96d56Sopenharmony_ci      stack_before=[],
14487db96d56Sopenharmony_ci      stack_after=[pyunicode],
14497db96d56Sopenharmony_ci      proto=1,
14507db96d56Sopenharmony_ci      doc="""Push a Python Unicode string object.
14517db96d56Sopenharmony_ci
14527db96d56Sopenharmony_ci      There are two arguments:  the first is a 4-byte little-endian unsigned int
14537db96d56Sopenharmony_ci      giving the number of bytes in the string.  The second is that many
14547db96d56Sopenharmony_ci      bytes, and is the UTF-8 encoding of the Unicode string.
14557db96d56Sopenharmony_ci      """),
14567db96d56Sopenharmony_ci
14577db96d56Sopenharmony_ci    I(name='BINUNICODE8',
14587db96d56Sopenharmony_ci      code='\x8d',
14597db96d56Sopenharmony_ci      arg=unicodestring8,
14607db96d56Sopenharmony_ci      stack_before=[],
14617db96d56Sopenharmony_ci      stack_after=[pyunicode],
14627db96d56Sopenharmony_ci      proto=4,
14637db96d56Sopenharmony_ci      doc="""Push a Python Unicode string object.
14647db96d56Sopenharmony_ci
14657db96d56Sopenharmony_ci      There are two arguments:  the first is an 8-byte little-endian signed int
14667db96d56Sopenharmony_ci      giving the number of bytes in the string.  The second is that many
14677db96d56Sopenharmony_ci      bytes, and is the UTF-8 encoding of the Unicode string.
14687db96d56Sopenharmony_ci      """),
14697db96d56Sopenharmony_ci
14707db96d56Sopenharmony_ci    # Ways to spell floats.
14717db96d56Sopenharmony_ci
14727db96d56Sopenharmony_ci    I(name='FLOAT',
14737db96d56Sopenharmony_ci      code='F',
14747db96d56Sopenharmony_ci      arg=floatnl,
14757db96d56Sopenharmony_ci      stack_before=[],
14767db96d56Sopenharmony_ci      stack_after=[pyfloat],
14777db96d56Sopenharmony_ci      proto=0,
14787db96d56Sopenharmony_ci      doc="""Newline-terminated decimal float literal.
14797db96d56Sopenharmony_ci
14807db96d56Sopenharmony_ci      The argument is repr(a_float), and in general requires 17 significant
14817db96d56Sopenharmony_ci      digits for roundtrip conversion to be an identity (this is so for
14827db96d56Sopenharmony_ci      IEEE-754 double precision values, which is what Python float maps to
14837db96d56Sopenharmony_ci      on most boxes).
14847db96d56Sopenharmony_ci
14857db96d56Sopenharmony_ci      In general, FLOAT cannot be used to transport infinities, NaNs, or
14867db96d56Sopenharmony_ci      minus zero across boxes (or even on a single box, if the platform C
14877db96d56Sopenharmony_ci      library can't read the strings it produces for such things -- Windows
14887db96d56Sopenharmony_ci      is like that), but may do less damage than BINFLOAT on boxes with
14897db96d56Sopenharmony_ci      greater precision or dynamic range than IEEE-754 double.
14907db96d56Sopenharmony_ci      """),
14917db96d56Sopenharmony_ci
14927db96d56Sopenharmony_ci    I(name='BINFLOAT',
14937db96d56Sopenharmony_ci      code='G',
14947db96d56Sopenharmony_ci      arg=float8,
14957db96d56Sopenharmony_ci      stack_before=[],
14967db96d56Sopenharmony_ci      stack_after=[pyfloat],
14977db96d56Sopenharmony_ci      proto=1,
14987db96d56Sopenharmony_ci      doc="""Float stored in binary form, with 8 bytes of data.
14997db96d56Sopenharmony_ci
15007db96d56Sopenharmony_ci      This generally requires less than half the space of FLOAT encoding.
15017db96d56Sopenharmony_ci      In general, BINFLOAT cannot be used to transport infinities, NaNs, or
15027db96d56Sopenharmony_ci      minus zero, raises an exception if the exponent exceeds the range of
15037db96d56Sopenharmony_ci      an IEEE-754 double, and retains no more than 53 bits of precision (if
15047db96d56Sopenharmony_ci      there are more than that, "add a half and chop" rounding is used to
15057db96d56Sopenharmony_ci      cut it back to 53 significant bits).
15067db96d56Sopenharmony_ci      """),
15077db96d56Sopenharmony_ci
15087db96d56Sopenharmony_ci    # Ways to build lists.
15097db96d56Sopenharmony_ci
15107db96d56Sopenharmony_ci    I(name='EMPTY_LIST',
15117db96d56Sopenharmony_ci      code=']',
15127db96d56Sopenharmony_ci      arg=None,
15137db96d56Sopenharmony_ci      stack_before=[],
15147db96d56Sopenharmony_ci      stack_after=[pylist],
15157db96d56Sopenharmony_ci      proto=1,
15167db96d56Sopenharmony_ci      doc="Push an empty list."),
15177db96d56Sopenharmony_ci
15187db96d56Sopenharmony_ci    I(name='APPEND',
15197db96d56Sopenharmony_ci      code='a',
15207db96d56Sopenharmony_ci      arg=None,
15217db96d56Sopenharmony_ci      stack_before=[pylist, anyobject],
15227db96d56Sopenharmony_ci      stack_after=[pylist],
15237db96d56Sopenharmony_ci      proto=0,
15247db96d56Sopenharmony_ci      doc="""Append an object to a list.
15257db96d56Sopenharmony_ci
15267db96d56Sopenharmony_ci      Stack before:  ... pylist anyobject
15277db96d56Sopenharmony_ci      Stack after:   ... pylist+[anyobject]
15287db96d56Sopenharmony_ci
15297db96d56Sopenharmony_ci      although pylist is really extended in-place.
15307db96d56Sopenharmony_ci      """),
15317db96d56Sopenharmony_ci
15327db96d56Sopenharmony_ci    I(name='APPENDS',
15337db96d56Sopenharmony_ci      code='e',
15347db96d56Sopenharmony_ci      arg=None,
15357db96d56Sopenharmony_ci      stack_before=[pylist, markobject, stackslice],
15367db96d56Sopenharmony_ci      stack_after=[pylist],
15377db96d56Sopenharmony_ci      proto=1,
15387db96d56Sopenharmony_ci      doc="""Extend a list by a slice of stack objects.
15397db96d56Sopenharmony_ci
15407db96d56Sopenharmony_ci      Stack before:  ... pylist markobject stackslice
15417db96d56Sopenharmony_ci      Stack after:   ... pylist+stackslice
15427db96d56Sopenharmony_ci
15437db96d56Sopenharmony_ci      although pylist is really extended in-place.
15447db96d56Sopenharmony_ci      """),
15457db96d56Sopenharmony_ci
15467db96d56Sopenharmony_ci    I(name='LIST',
15477db96d56Sopenharmony_ci      code='l',
15487db96d56Sopenharmony_ci      arg=None,
15497db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
15507db96d56Sopenharmony_ci      stack_after=[pylist],
15517db96d56Sopenharmony_ci      proto=0,
15527db96d56Sopenharmony_ci      doc="""Build a list out of the topmost stack slice, after markobject.
15537db96d56Sopenharmony_ci
15547db96d56Sopenharmony_ci      All the stack entries following the topmost markobject are placed into
15557db96d56Sopenharmony_ci      a single Python list, which single list object replaces all of the
15567db96d56Sopenharmony_ci      stack from the topmost markobject onward.  For example,
15577db96d56Sopenharmony_ci
15587db96d56Sopenharmony_ci      Stack before: ... markobject 1 2 3 'abc'
15597db96d56Sopenharmony_ci      Stack after:  ... [1, 2, 3, 'abc']
15607db96d56Sopenharmony_ci      """),
15617db96d56Sopenharmony_ci
15627db96d56Sopenharmony_ci    # Ways to build tuples.
15637db96d56Sopenharmony_ci
15647db96d56Sopenharmony_ci    I(name='EMPTY_TUPLE',
15657db96d56Sopenharmony_ci      code=')',
15667db96d56Sopenharmony_ci      arg=None,
15677db96d56Sopenharmony_ci      stack_before=[],
15687db96d56Sopenharmony_ci      stack_after=[pytuple],
15697db96d56Sopenharmony_ci      proto=1,
15707db96d56Sopenharmony_ci      doc="Push an empty tuple."),
15717db96d56Sopenharmony_ci
15727db96d56Sopenharmony_ci    I(name='TUPLE',
15737db96d56Sopenharmony_ci      code='t',
15747db96d56Sopenharmony_ci      arg=None,
15757db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
15767db96d56Sopenharmony_ci      stack_after=[pytuple],
15777db96d56Sopenharmony_ci      proto=0,
15787db96d56Sopenharmony_ci      doc="""Build a tuple out of the topmost stack slice, after markobject.
15797db96d56Sopenharmony_ci
15807db96d56Sopenharmony_ci      All the stack entries following the topmost markobject are placed into
15817db96d56Sopenharmony_ci      a single Python tuple, which single tuple object replaces all of the
15827db96d56Sopenharmony_ci      stack from the topmost markobject onward.  For example,
15837db96d56Sopenharmony_ci
15847db96d56Sopenharmony_ci      Stack before: ... markobject 1 2 3 'abc'
15857db96d56Sopenharmony_ci      Stack after:  ... (1, 2, 3, 'abc')
15867db96d56Sopenharmony_ci      """),
15877db96d56Sopenharmony_ci
15887db96d56Sopenharmony_ci    I(name='TUPLE1',
15897db96d56Sopenharmony_ci      code='\x85',
15907db96d56Sopenharmony_ci      arg=None,
15917db96d56Sopenharmony_ci      stack_before=[anyobject],
15927db96d56Sopenharmony_ci      stack_after=[pytuple],
15937db96d56Sopenharmony_ci      proto=2,
15947db96d56Sopenharmony_ci      doc="""Build a one-tuple out of the topmost item on the stack.
15957db96d56Sopenharmony_ci
15967db96d56Sopenharmony_ci      This code pops one value off the stack and pushes a tuple of
15977db96d56Sopenharmony_ci      length 1 whose one item is that value back onto it.  In other
15987db96d56Sopenharmony_ci      words:
15997db96d56Sopenharmony_ci
16007db96d56Sopenharmony_ci          stack[-1] = tuple(stack[-1:])
16017db96d56Sopenharmony_ci      """),
16027db96d56Sopenharmony_ci
16037db96d56Sopenharmony_ci    I(name='TUPLE2',
16047db96d56Sopenharmony_ci      code='\x86',
16057db96d56Sopenharmony_ci      arg=None,
16067db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject],
16077db96d56Sopenharmony_ci      stack_after=[pytuple],
16087db96d56Sopenharmony_ci      proto=2,
16097db96d56Sopenharmony_ci      doc="""Build a two-tuple out of the top two items on the stack.
16107db96d56Sopenharmony_ci
16117db96d56Sopenharmony_ci      This code pops two values off the stack and pushes a tuple of
16127db96d56Sopenharmony_ci      length 2 whose items are those values back onto it.  In other
16137db96d56Sopenharmony_ci      words:
16147db96d56Sopenharmony_ci
16157db96d56Sopenharmony_ci          stack[-2:] = [tuple(stack[-2:])]
16167db96d56Sopenharmony_ci      """),
16177db96d56Sopenharmony_ci
16187db96d56Sopenharmony_ci    I(name='TUPLE3',
16197db96d56Sopenharmony_ci      code='\x87',
16207db96d56Sopenharmony_ci      arg=None,
16217db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject, anyobject],
16227db96d56Sopenharmony_ci      stack_after=[pytuple],
16237db96d56Sopenharmony_ci      proto=2,
16247db96d56Sopenharmony_ci      doc="""Build a three-tuple out of the top three items on the stack.
16257db96d56Sopenharmony_ci
16267db96d56Sopenharmony_ci      This code pops three values off the stack and pushes a tuple of
16277db96d56Sopenharmony_ci      length 3 whose items are those values back onto it.  In other
16287db96d56Sopenharmony_ci      words:
16297db96d56Sopenharmony_ci
16307db96d56Sopenharmony_ci          stack[-3:] = [tuple(stack[-3:])]
16317db96d56Sopenharmony_ci      """),
16327db96d56Sopenharmony_ci
16337db96d56Sopenharmony_ci    # Ways to build dicts.
16347db96d56Sopenharmony_ci
16357db96d56Sopenharmony_ci    I(name='EMPTY_DICT',
16367db96d56Sopenharmony_ci      code='}',
16377db96d56Sopenharmony_ci      arg=None,
16387db96d56Sopenharmony_ci      stack_before=[],
16397db96d56Sopenharmony_ci      stack_after=[pydict],
16407db96d56Sopenharmony_ci      proto=1,
16417db96d56Sopenharmony_ci      doc="Push an empty dict."),
16427db96d56Sopenharmony_ci
16437db96d56Sopenharmony_ci    I(name='DICT',
16447db96d56Sopenharmony_ci      code='d',
16457db96d56Sopenharmony_ci      arg=None,
16467db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
16477db96d56Sopenharmony_ci      stack_after=[pydict],
16487db96d56Sopenharmony_ci      proto=0,
16497db96d56Sopenharmony_ci      doc="""Build a dict out of the topmost stack slice, after markobject.
16507db96d56Sopenharmony_ci
16517db96d56Sopenharmony_ci      All the stack entries following the topmost markobject are placed into
16527db96d56Sopenharmony_ci      a single Python dict, which single dict object replaces all of the
16537db96d56Sopenharmony_ci      stack from the topmost markobject onward.  The stack slice alternates
16547db96d56Sopenharmony_ci      key, value, key, value, ....  For example,
16557db96d56Sopenharmony_ci
16567db96d56Sopenharmony_ci      Stack before: ... markobject 1 2 3 'abc'
16577db96d56Sopenharmony_ci      Stack after:  ... {1: 2, 3: 'abc'}
16587db96d56Sopenharmony_ci      """),
16597db96d56Sopenharmony_ci
16607db96d56Sopenharmony_ci    I(name='SETITEM',
16617db96d56Sopenharmony_ci      code='s',
16627db96d56Sopenharmony_ci      arg=None,
16637db96d56Sopenharmony_ci      stack_before=[pydict, anyobject, anyobject],
16647db96d56Sopenharmony_ci      stack_after=[pydict],
16657db96d56Sopenharmony_ci      proto=0,
16667db96d56Sopenharmony_ci      doc="""Add a key+value pair to an existing dict.
16677db96d56Sopenharmony_ci
16687db96d56Sopenharmony_ci      Stack before:  ... pydict key value
16697db96d56Sopenharmony_ci      Stack after:   ... pydict
16707db96d56Sopenharmony_ci
16717db96d56Sopenharmony_ci      where pydict has been modified via pydict[key] = value.
16727db96d56Sopenharmony_ci      """),
16737db96d56Sopenharmony_ci
16747db96d56Sopenharmony_ci    I(name='SETITEMS',
16757db96d56Sopenharmony_ci      code='u',
16767db96d56Sopenharmony_ci      arg=None,
16777db96d56Sopenharmony_ci      stack_before=[pydict, markobject, stackslice],
16787db96d56Sopenharmony_ci      stack_after=[pydict],
16797db96d56Sopenharmony_ci      proto=1,
16807db96d56Sopenharmony_ci      doc="""Add an arbitrary number of key+value pairs to an existing dict.
16817db96d56Sopenharmony_ci
16827db96d56Sopenharmony_ci      The slice of the stack following the topmost markobject is taken as
16837db96d56Sopenharmony_ci      an alternating sequence of keys and values, added to the dict
16847db96d56Sopenharmony_ci      immediately under the topmost markobject.  Everything at and after the
16857db96d56Sopenharmony_ci      topmost markobject is popped, leaving the mutated dict at the top
16867db96d56Sopenharmony_ci      of the stack.
16877db96d56Sopenharmony_ci
16887db96d56Sopenharmony_ci      Stack before:  ... pydict markobject key_1 value_1 ... key_n value_n
16897db96d56Sopenharmony_ci      Stack after:   ... pydict
16907db96d56Sopenharmony_ci
16917db96d56Sopenharmony_ci      where pydict has been modified via pydict[key_i] = value_i for i in
16927db96d56Sopenharmony_ci      1, 2, ..., n, and in that order.
16937db96d56Sopenharmony_ci      """),
16947db96d56Sopenharmony_ci
16957db96d56Sopenharmony_ci    # Ways to build sets
16967db96d56Sopenharmony_ci
16977db96d56Sopenharmony_ci    I(name='EMPTY_SET',
16987db96d56Sopenharmony_ci      code='\x8f',
16997db96d56Sopenharmony_ci      arg=None,
17007db96d56Sopenharmony_ci      stack_before=[],
17017db96d56Sopenharmony_ci      stack_after=[pyset],
17027db96d56Sopenharmony_ci      proto=4,
17037db96d56Sopenharmony_ci      doc="Push an empty set."),
17047db96d56Sopenharmony_ci
17057db96d56Sopenharmony_ci    I(name='ADDITEMS',
17067db96d56Sopenharmony_ci      code='\x90',
17077db96d56Sopenharmony_ci      arg=None,
17087db96d56Sopenharmony_ci      stack_before=[pyset, markobject, stackslice],
17097db96d56Sopenharmony_ci      stack_after=[pyset],
17107db96d56Sopenharmony_ci      proto=4,
17117db96d56Sopenharmony_ci      doc="""Add an arbitrary number of items to an existing set.
17127db96d56Sopenharmony_ci
17137db96d56Sopenharmony_ci      The slice of the stack following the topmost markobject is taken as
17147db96d56Sopenharmony_ci      a sequence of items, added to the set immediately under the topmost
17157db96d56Sopenharmony_ci      markobject.  Everything at and after the topmost markobject is popped,
17167db96d56Sopenharmony_ci      leaving the mutated set at the top of the stack.
17177db96d56Sopenharmony_ci
17187db96d56Sopenharmony_ci      Stack before:  ... pyset markobject item_1 ... item_n
17197db96d56Sopenharmony_ci      Stack after:   ... pyset
17207db96d56Sopenharmony_ci
17217db96d56Sopenharmony_ci      where pyset has been modified via pyset.add(item_i) = item_i for i in
17227db96d56Sopenharmony_ci      1, 2, ..., n, and in that order.
17237db96d56Sopenharmony_ci      """),
17247db96d56Sopenharmony_ci
17257db96d56Sopenharmony_ci    # Way to build frozensets
17267db96d56Sopenharmony_ci
17277db96d56Sopenharmony_ci    I(name='FROZENSET',
17287db96d56Sopenharmony_ci      code='\x91',
17297db96d56Sopenharmony_ci      arg=None,
17307db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
17317db96d56Sopenharmony_ci      stack_after=[pyfrozenset],
17327db96d56Sopenharmony_ci      proto=4,
17337db96d56Sopenharmony_ci      doc="""Build a frozenset out of the topmost slice, after markobject.
17347db96d56Sopenharmony_ci
17357db96d56Sopenharmony_ci      All the stack entries following the topmost markobject are placed into
17367db96d56Sopenharmony_ci      a single Python frozenset, which single frozenset object replaces all
17377db96d56Sopenharmony_ci      of the stack from the topmost markobject onward.  For example,
17387db96d56Sopenharmony_ci
17397db96d56Sopenharmony_ci      Stack before: ... markobject 1 2 3
17407db96d56Sopenharmony_ci      Stack after:  ... frozenset({1, 2, 3})
17417db96d56Sopenharmony_ci      """),
17427db96d56Sopenharmony_ci
17437db96d56Sopenharmony_ci    # Stack manipulation.
17447db96d56Sopenharmony_ci
17457db96d56Sopenharmony_ci    I(name='POP',
17467db96d56Sopenharmony_ci      code='0',
17477db96d56Sopenharmony_ci      arg=None,
17487db96d56Sopenharmony_ci      stack_before=[anyobject],
17497db96d56Sopenharmony_ci      stack_after=[],
17507db96d56Sopenharmony_ci      proto=0,
17517db96d56Sopenharmony_ci      doc="Discard the top stack item, shrinking the stack by one item."),
17527db96d56Sopenharmony_ci
17537db96d56Sopenharmony_ci    I(name='DUP',
17547db96d56Sopenharmony_ci      code='2',
17557db96d56Sopenharmony_ci      arg=None,
17567db96d56Sopenharmony_ci      stack_before=[anyobject],
17577db96d56Sopenharmony_ci      stack_after=[anyobject, anyobject],
17587db96d56Sopenharmony_ci      proto=0,
17597db96d56Sopenharmony_ci      doc="Push the top stack item onto the stack again, duplicating it."),
17607db96d56Sopenharmony_ci
17617db96d56Sopenharmony_ci    I(name='MARK',
17627db96d56Sopenharmony_ci      code='(',
17637db96d56Sopenharmony_ci      arg=None,
17647db96d56Sopenharmony_ci      stack_before=[],
17657db96d56Sopenharmony_ci      stack_after=[markobject],
17667db96d56Sopenharmony_ci      proto=0,
17677db96d56Sopenharmony_ci      doc="""Push markobject onto the stack.
17687db96d56Sopenharmony_ci
17697db96d56Sopenharmony_ci      markobject is a unique object, used by other opcodes to identify a
17707db96d56Sopenharmony_ci      region of the stack containing a variable number of objects for them
17717db96d56Sopenharmony_ci      to work on.  See markobject.doc for more detail.
17727db96d56Sopenharmony_ci      """),
17737db96d56Sopenharmony_ci
17747db96d56Sopenharmony_ci    I(name='POP_MARK',
17757db96d56Sopenharmony_ci      code='1',
17767db96d56Sopenharmony_ci      arg=None,
17777db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
17787db96d56Sopenharmony_ci      stack_after=[],
17797db96d56Sopenharmony_ci      proto=1,
17807db96d56Sopenharmony_ci      doc="""Pop all the stack objects at and above the topmost markobject.
17817db96d56Sopenharmony_ci
17827db96d56Sopenharmony_ci      When an opcode using a variable number of stack objects is done,
17837db96d56Sopenharmony_ci      POP_MARK is used to remove those objects, and to remove the markobject
17847db96d56Sopenharmony_ci      that delimited their starting position on the stack.
17857db96d56Sopenharmony_ci      """),
17867db96d56Sopenharmony_ci
17877db96d56Sopenharmony_ci    # Memo manipulation.  There are really only two operations (get and put),
17887db96d56Sopenharmony_ci    # each in all-text, "short binary", and "long binary" flavors.
17897db96d56Sopenharmony_ci
17907db96d56Sopenharmony_ci    I(name='GET',
17917db96d56Sopenharmony_ci      code='g',
17927db96d56Sopenharmony_ci      arg=decimalnl_short,
17937db96d56Sopenharmony_ci      stack_before=[],
17947db96d56Sopenharmony_ci      stack_after=[anyobject],
17957db96d56Sopenharmony_ci      proto=0,
17967db96d56Sopenharmony_ci      doc="""Read an object from the memo and push it on the stack.
17977db96d56Sopenharmony_ci
17987db96d56Sopenharmony_ci      The index of the memo object to push is given by the newline-terminated
17997db96d56Sopenharmony_ci      decimal string following.  BINGET and LONG_BINGET are space-optimized
18007db96d56Sopenharmony_ci      versions.
18017db96d56Sopenharmony_ci      """),
18027db96d56Sopenharmony_ci
18037db96d56Sopenharmony_ci    I(name='BINGET',
18047db96d56Sopenharmony_ci      code='h',
18057db96d56Sopenharmony_ci      arg=uint1,
18067db96d56Sopenharmony_ci      stack_before=[],
18077db96d56Sopenharmony_ci      stack_after=[anyobject],
18087db96d56Sopenharmony_ci      proto=1,
18097db96d56Sopenharmony_ci      doc="""Read an object from the memo and push it on the stack.
18107db96d56Sopenharmony_ci
18117db96d56Sopenharmony_ci      The index of the memo object to push is given by the 1-byte unsigned
18127db96d56Sopenharmony_ci      integer following.
18137db96d56Sopenharmony_ci      """),
18147db96d56Sopenharmony_ci
18157db96d56Sopenharmony_ci    I(name='LONG_BINGET',
18167db96d56Sopenharmony_ci      code='j',
18177db96d56Sopenharmony_ci      arg=uint4,
18187db96d56Sopenharmony_ci      stack_before=[],
18197db96d56Sopenharmony_ci      stack_after=[anyobject],
18207db96d56Sopenharmony_ci      proto=1,
18217db96d56Sopenharmony_ci      doc="""Read an object from the memo and push it on the stack.
18227db96d56Sopenharmony_ci
18237db96d56Sopenharmony_ci      The index of the memo object to push is given by the 4-byte unsigned
18247db96d56Sopenharmony_ci      little-endian integer following.
18257db96d56Sopenharmony_ci      """),
18267db96d56Sopenharmony_ci
18277db96d56Sopenharmony_ci    I(name='PUT',
18287db96d56Sopenharmony_ci      code='p',
18297db96d56Sopenharmony_ci      arg=decimalnl_short,
18307db96d56Sopenharmony_ci      stack_before=[],
18317db96d56Sopenharmony_ci      stack_after=[],
18327db96d56Sopenharmony_ci      proto=0,
18337db96d56Sopenharmony_ci      doc="""Store the stack top into the memo.  The stack is not popped.
18347db96d56Sopenharmony_ci
18357db96d56Sopenharmony_ci      The index of the memo location to write into is given by the newline-
18367db96d56Sopenharmony_ci      terminated decimal string following.  BINPUT and LONG_BINPUT are
18377db96d56Sopenharmony_ci      space-optimized versions.
18387db96d56Sopenharmony_ci      """),
18397db96d56Sopenharmony_ci
18407db96d56Sopenharmony_ci    I(name='BINPUT',
18417db96d56Sopenharmony_ci      code='q',
18427db96d56Sopenharmony_ci      arg=uint1,
18437db96d56Sopenharmony_ci      stack_before=[],
18447db96d56Sopenharmony_ci      stack_after=[],
18457db96d56Sopenharmony_ci      proto=1,
18467db96d56Sopenharmony_ci      doc="""Store the stack top into the memo.  The stack is not popped.
18477db96d56Sopenharmony_ci
18487db96d56Sopenharmony_ci      The index of the memo location to write into is given by the 1-byte
18497db96d56Sopenharmony_ci      unsigned integer following.
18507db96d56Sopenharmony_ci      """),
18517db96d56Sopenharmony_ci
18527db96d56Sopenharmony_ci    I(name='LONG_BINPUT',
18537db96d56Sopenharmony_ci      code='r',
18547db96d56Sopenharmony_ci      arg=uint4,
18557db96d56Sopenharmony_ci      stack_before=[],
18567db96d56Sopenharmony_ci      stack_after=[],
18577db96d56Sopenharmony_ci      proto=1,
18587db96d56Sopenharmony_ci      doc="""Store the stack top into the memo.  The stack is not popped.
18597db96d56Sopenharmony_ci
18607db96d56Sopenharmony_ci      The index of the memo location to write into is given by the 4-byte
18617db96d56Sopenharmony_ci      unsigned little-endian integer following.
18627db96d56Sopenharmony_ci      """),
18637db96d56Sopenharmony_ci
18647db96d56Sopenharmony_ci    I(name='MEMOIZE',
18657db96d56Sopenharmony_ci      code='\x94',
18667db96d56Sopenharmony_ci      arg=None,
18677db96d56Sopenharmony_ci      stack_before=[anyobject],
18687db96d56Sopenharmony_ci      stack_after=[anyobject],
18697db96d56Sopenharmony_ci      proto=4,
18707db96d56Sopenharmony_ci      doc="""Store the stack top into the memo.  The stack is not popped.
18717db96d56Sopenharmony_ci
18727db96d56Sopenharmony_ci      The index of the memo location to write is the number of
18737db96d56Sopenharmony_ci      elements currently present in the memo.
18747db96d56Sopenharmony_ci      """),
18757db96d56Sopenharmony_ci
18767db96d56Sopenharmony_ci    # Access the extension registry (predefined objects).  Akin to the GET
18777db96d56Sopenharmony_ci    # family.
18787db96d56Sopenharmony_ci
18797db96d56Sopenharmony_ci    I(name='EXT1',
18807db96d56Sopenharmony_ci      code='\x82',
18817db96d56Sopenharmony_ci      arg=uint1,
18827db96d56Sopenharmony_ci      stack_before=[],
18837db96d56Sopenharmony_ci      stack_after=[anyobject],
18847db96d56Sopenharmony_ci      proto=2,
18857db96d56Sopenharmony_ci      doc="""Extension code.
18867db96d56Sopenharmony_ci
18877db96d56Sopenharmony_ci      This code and the similar EXT2 and EXT4 allow using a registry
18887db96d56Sopenharmony_ci      of popular objects that are pickled by name, typically classes.
18897db96d56Sopenharmony_ci      It is envisioned that through a global negotiation and
18907db96d56Sopenharmony_ci      registration process, third parties can set up a mapping between
18917db96d56Sopenharmony_ci      ints and object names.
18927db96d56Sopenharmony_ci
18937db96d56Sopenharmony_ci      In order to guarantee pickle interchangeability, the extension
18947db96d56Sopenharmony_ci      code registry ought to be global, although a range of codes may
18957db96d56Sopenharmony_ci      be reserved for private use.
18967db96d56Sopenharmony_ci
18977db96d56Sopenharmony_ci      EXT1 has a 1-byte integer argument.  This is used to index into the
18987db96d56Sopenharmony_ci      extension registry, and the object at that index is pushed on the stack.
18997db96d56Sopenharmony_ci      """),
19007db96d56Sopenharmony_ci
19017db96d56Sopenharmony_ci    I(name='EXT2',
19027db96d56Sopenharmony_ci      code='\x83',
19037db96d56Sopenharmony_ci      arg=uint2,
19047db96d56Sopenharmony_ci      stack_before=[],
19057db96d56Sopenharmony_ci      stack_after=[anyobject],
19067db96d56Sopenharmony_ci      proto=2,
19077db96d56Sopenharmony_ci      doc="""Extension code.
19087db96d56Sopenharmony_ci
19097db96d56Sopenharmony_ci      See EXT1.  EXT2 has a two-byte integer argument.
19107db96d56Sopenharmony_ci      """),
19117db96d56Sopenharmony_ci
19127db96d56Sopenharmony_ci    I(name='EXT4',
19137db96d56Sopenharmony_ci      code='\x84',
19147db96d56Sopenharmony_ci      arg=int4,
19157db96d56Sopenharmony_ci      stack_before=[],
19167db96d56Sopenharmony_ci      stack_after=[anyobject],
19177db96d56Sopenharmony_ci      proto=2,
19187db96d56Sopenharmony_ci      doc="""Extension code.
19197db96d56Sopenharmony_ci
19207db96d56Sopenharmony_ci      See EXT1.  EXT4 has a four-byte integer argument.
19217db96d56Sopenharmony_ci      """),
19227db96d56Sopenharmony_ci
19237db96d56Sopenharmony_ci    # Push a class object, or module function, on the stack, via its module
19247db96d56Sopenharmony_ci    # and name.
19257db96d56Sopenharmony_ci
19267db96d56Sopenharmony_ci    I(name='GLOBAL',
19277db96d56Sopenharmony_ci      code='c',
19287db96d56Sopenharmony_ci      arg=stringnl_noescape_pair,
19297db96d56Sopenharmony_ci      stack_before=[],
19307db96d56Sopenharmony_ci      stack_after=[anyobject],
19317db96d56Sopenharmony_ci      proto=0,
19327db96d56Sopenharmony_ci      doc="""Push a global object (module.attr) on the stack.
19337db96d56Sopenharmony_ci
19347db96d56Sopenharmony_ci      Two newline-terminated strings follow the GLOBAL opcode.  The first is
19357db96d56Sopenharmony_ci      taken as a module name, and the second as a class name.  The class
19367db96d56Sopenharmony_ci      object module.class is pushed on the stack.  More accurately, the
19377db96d56Sopenharmony_ci      object returned by self.find_class(module, class) is pushed on the
19387db96d56Sopenharmony_ci      stack, so unpickling subclasses can override this form of lookup.
19397db96d56Sopenharmony_ci      """),
19407db96d56Sopenharmony_ci
19417db96d56Sopenharmony_ci    I(name='STACK_GLOBAL',
19427db96d56Sopenharmony_ci      code='\x93',
19437db96d56Sopenharmony_ci      arg=None,
19447db96d56Sopenharmony_ci      stack_before=[pyunicode, pyunicode],
19457db96d56Sopenharmony_ci      stack_after=[anyobject],
19467db96d56Sopenharmony_ci      proto=4,
19477db96d56Sopenharmony_ci      doc="""Push a global object (module.attr) on the stack.
19487db96d56Sopenharmony_ci      """),
19497db96d56Sopenharmony_ci
19507db96d56Sopenharmony_ci    # Ways to build objects of classes pickle doesn't know about directly
19517db96d56Sopenharmony_ci    # (user-defined classes).  I despair of documenting this accurately
19527db96d56Sopenharmony_ci    # and comprehensibly -- you really have to read the pickle code to
19537db96d56Sopenharmony_ci    # find all the special cases.
19547db96d56Sopenharmony_ci
19557db96d56Sopenharmony_ci    I(name='REDUCE',
19567db96d56Sopenharmony_ci      code='R',
19577db96d56Sopenharmony_ci      arg=None,
19587db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject],
19597db96d56Sopenharmony_ci      stack_after=[anyobject],
19607db96d56Sopenharmony_ci      proto=0,
19617db96d56Sopenharmony_ci      doc="""Push an object built from a callable and an argument tuple.
19627db96d56Sopenharmony_ci
19637db96d56Sopenharmony_ci      The opcode is named to remind of the __reduce__() method.
19647db96d56Sopenharmony_ci
19657db96d56Sopenharmony_ci      Stack before: ... callable pytuple
19667db96d56Sopenharmony_ci      Stack after:  ... callable(*pytuple)
19677db96d56Sopenharmony_ci
19687db96d56Sopenharmony_ci      The callable and the argument tuple are the first two items returned
19697db96d56Sopenharmony_ci      by a __reduce__ method.  Applying the callable to the argtuple is
19707db96d56Sopenharmony_ci      supposed to reproduce the original object, or at least get it started.
19717db96d56Sopenharmony_ci      If the __reduce__ method returns a 3-tuple, the last component is an
19727db96d56Sopenharmony_ci      argument to be passed to the object's __setstate__, and then the REDUCE
19737db96d56Sopenharmony_ci      opcode is followed by code to create setstate's argument, and then a
19747db96d56Sopenharmony_ci      BUILD opcode to apply  __setstate__ to that argument.
19757db96d56Sopenharmony_ci
19767db96d56Sopenharmony_ci      If not isinstance(callable, type), REDUCE complains unless the
19777db96d56Sopenharmony_ci      callable has been registered with the copyreg module's
19787db96d56Sopenharmony_ci      safe_constructors dict, or the callable has a magic
19797db96d56Sopenharmony_ci      '__safe_for_unpickling__' attribute with a true value.  I'm not sure
19807db96d56Sopenharmony_ci      why it does this, but I've sure seen this complaint often enough when
19817db96d56Sopenharmony_ci      I didn't want to <wink>.
19827db96d56Sopenharmony_ci      """),
19837db96d56Sopenharmony_ci
19847db96d56Sopenharmony_ci    I(name='BUILD',
19857db96d56Sopenharmony_ci      code='b',
19867db96d56Sopenharmony_ci      arg=None,
19877db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject],
19887db96d56Sopenharmony_ci      stack_after=[anyobject],
19897db96d56Sopenharmony_ci      proto=0,
19907db96d56Sopenharmony_ci      doc="""Finish building an object, via __setstate__ or dict update.
19917db96d56Sopenharmony_ci
19927db96d56Sopenharmony_ci      Stack before: ... anyobject argument
19937db96d56Sopenharmony_ci      Stack after:  ... anyobject
19947db96d56Sopenharmony_ci
19957db96d56Sopenharmony_ci      where anyobject may have been mutated, as follows:
19967db96d56Sopenharmony_ci
19977db96d56Sopenharmony_ci      If the object has a __setstate__ method,
19987db96d56Sopenharmony_ci
19997db96d56Sopenharmony_ci          anyobject.__setstate__(argument)
20007db96d56Sopenharmony_ci
20017db96d56Sopenharmony_ci      is called.
20027db96d56Sopenharmony_ci
20037db96d56Sopenharmony_ci      Else the argument must be a dict, the object must have a __dict__, and
20047db96d56Sopenharmony_ci      the object is updated via
20057db96d56Sopenharmony_ci
20067db96d56Sopenharmony_ci          anyobject.__dict__.update(argument)
20077db96d56Sopenharmony_ci      """),
20087db96d56Sopenharmony_ci
20097db96d56Sopenharmony_ci    I(name='INST',
20107db96d56Sopenharmony_ci      code='i',
20117db96d56Sopenharmony_ci      arg=stringnl_noescape_pair,
20127db96d56Sopenharmony_ci      stack_before=[markobject, stackslice],
20137db96d56Sopenharmony_ci      stack_after=[anyobject],
20147db96d56Sopenharmony_ci      proto=0,
20157db96d56Sopenharmony_ci      doc="""Build a class instance.
20167db96d56Sopenharmony_ci
20177db96d56Sopenharmony_ci      This is the protocol 0 version of protocol 1's OBJ opcode.
20187db96d56Sopenharmony_ci      INST is followed by two newline-terminated strings, giving a
20197db96d56Sopenharmony_ci      module and class name, just as for the GLOBAL opcode (and see
20207db96d56Sopenharmony_ci      GLOBAL for more details about that).  self.find_class(module, name)
20217db96d56Sopenharmony_ci      is used to get a class object.
20227db96d56Sopenharmony_ci
20237db96d56Sopenharmony_ci      In addition, all the objects on the stack following the topmost
20247db96d56Sopenharmony_ci      markobject are gathered into a tuple and popped (along with the
20257db96d56Sopenharmony_ci      topmost markobject), just as for the TUPLE opcode.
20267db96d56Sopenharmony_ci
20277db96d56Sopenharmony_ci      Now it gets complicated.  If all of these are true:
20287db96d56Sopenharmony_ci
20297db96d56Sopenharmony_ci        + The argtuple is empty (markobject was at the top of the stack
20307db96d56Sopenharmony_ci          at the start).
20317db96d56Sopenharmony_ci
20327db96d56Sopenharmony_ci        + The class object does not have a __getinitargs__ attribute.
20337db96d56Sopenharmony_ci
20347db96d56Sopenharmony_ci      then we want to create an old-style class instance without invoking
20357db96d56Sopenharmony_ci      its __init__() method (pickle has waffled on this over the years; not
20367db96d56Sopenharmony_ci      calling __init__() is current wisdom).  In this case, an instance of
20377db96d56Sopenharmony_ci      an old-style dummy class is created, and then we try to rebind its
20387db96d56Sopenharmony_ci      __class__ attribute to the desired class object.  If this succeeds,
20397db96d56Sopenharmony_ci      the new instance object is pushed on the stack, and we're done.
20407db96d56Sopenharmony_ci
20417db96d56Sopenharmony_ci      Else (the argtuple is not empty, it's not an old-style class object,
20427db96d56Sopenharmony_ci      or the class object does have a __getinitargs__ attribute), the code
20437db96d56Sopenharmony_ci      first insists that the class object have a __safe_for_unpickling__
20447db96d56Sopenharmony_ci      attribute.  Unlike as for the __safe_for_unpickling__ check in REDUCE,
20457db96d56Sopenharmony_ci      it doesn't matter whether this attribute has a true or false value, it
20467db96d56Sopenharmony_ci      only matters whether it exists (XXX this is a bug).  If
20477db96d56Sopenharmony_ci      __safe_for_unpickling__ doesn't exist, UnpicklingError is raised.
20487db96d56Sopenharmony_ci
20497db96d56Sopenharmony_ci      Else (the class object does have a __safe_for_unpickling__ attr),
20507db96d56Sopenharmony_ci      the class object obtained from INST's arguments is applied to the
20517db96d56Sopenharmony_ci      argtuple obtained from the stack, and the resulting instance object
20527db96d56Sopenharmony_ci      is pushed on the stack.
20537db96d56Sopenharmony_ci
20547db96d56Sopenharmony_ci      NOTE:  checks for __safe_for_unpickling__ went away in Python 2.3.
20557db96d56Sopenharmony_ci      NOTE:  the distinction between old-style and new-style classes does
20567db96d56Sopenharmony_ci             not make sense in Python 3.
20577db96d56Sopenharmony_ci      """),
20587db96d56Sopenharmony_ci
20597db96d56Sopenharmony_ci    I(name='OBJ',
20607db96d56Sopenharmony_ci      code='o',
20617db96d56Sopenharmony_ci      arg=None,
20627db96d56Sopenharmony_ci      stack_before=[markobject, anyobject, stackslice],
20637db96d56Sopenharmony_ci      stack_after=[anyobject],
20647db96d56Sopenharmony_ci      proto=1,
20657db96d56Sopenharmony_ci      doc="""Build a class instance.
20667db96d56Sopenharmony_ci
20677db96d56Sopenharmony_ci      This is the protocol 1 version of protocol 0's INST opcode, and is
20687db96d56Sopenharmony_ci      very much like it.  The major difference is that the class object
20697db96d56Sopenharmony_ci      is taken off the stack, allowing it to be retrieved from the memo
20707db96d56Sopenharmony_ci      repeatedly if several instances of the same class are created.  This
20717db96d56Sopenharmony_ci      can be much more efficient (in both time and space) than repeatedly
20727db96d56Sopenharmony_ci      embedding the module and class names in INST opcodes.
20737db96d56Sopenharmony_ci
20747db96d56Sopenharmony_ci      Unlike INST, OBJ takes no arguments from the opcode stream.  Instead
20757db96d56Sopenharmony_ci      the class object is taken off the stack, immediately above the
20767db96d56Sopenharmony_ci      topmost markobject:
20777db96d56Sopenharmony_ci
20787db96d56Sopenharmony_ci      Stack before: ... markobject classobject stackslice
20797db96d56Sopenharmony_ci      Stack after:  ... new_instance_object
20807db96d56Sopenharmony_ci
20817db96d56Sopenharmony_ci      As for INST, the remainder of the stack above the markobject is
20827db96d56Sopenharmony_ci      gathered into an argument tuple, and then the logic seems identical,
20837db96d56Sopenharmony_ci      except that no __safe_for_unpickling__ check is done (XXX this is
20847db96d56Sopenharmony_ci      a bug).  See INST for the gory details.
20857db96d56Sopenharmony_ci
20867db96d56Sopenharmony_ci      NOTE:  In Python 2.3, INST and OBJ are identical except for how they
20877db96d56Sopenharmony_ci      get the class object.  That was always the intent; the implementations
20887db96d56Sopenharmony_ci      had diverged for accidental reasons.
20897db96d56Sopenharmony_ci      """),
20907db96d56Sopenharmony_ci
20917db96d56Sopenharmony_ci    I(name='NEWOBJ',
20927db96d56Sopenharmony_ci      code='\x81',
20937db96d56Sopenharmony_ci      arg=None,
20947db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject],
20957db96d56Sopenharmony_ci      stack_after=[anyobject],
20967db96d56Sopenharmony_ci      proto=2,
20977db96d56Sopenharmony_ci      doc="""Build an object instance.
20987db96d56Sopenharmony_ci
20997db96d56Sopenharmony_ci      The stack before should be thought of as containing a class
21007db96d56Sopenharmony_ci      object followed by an argument tuple (the tuple being the stack
21017db96d56Sopenharmony_ci      top).  Call these cls and args.  They are popped off the stack,
21027db96d56Sopenharmony_ci      and the value returned by cls.__new__(cls, *args) is pushed back
21037db96d56Sopenharmony_ci      onto the stack.
21047db96d56Sopenharmony_ci      """),
21057db96d56Sopenharmony_ci
21067db96d56Sopenharmony_ci    I(name='NEWOBJ_EX',
21077db96d56Sopenharmony_ci      code='\x92',
21087db96d56Sopenharmony_ci      arg=None,
21097db96d56Sopenharmony_ci      stack_before=[anyobject, anyobject, anyobject],
21107db96d56Sopenharmony_ci      stack_after=[anyobject],
21117db96d56Sopenharmony_ci      proto=4,
21127db96d56Sopenharmony_ci      doc="""Build an object instance.
21137db96d56Sopenharmony_ci
21147db96d56Sopenharmony_ci      The stack before should be thought of as containing a class
21157db96d56Sopenharmony_ci      object followed by an argument tuple and by a keyword argument dict
21167db96d56Sopenharmony_ci      (the dict being the stack top).  Call these cls and args.  They are
21177db96d56Sopenharmony_ci      popped off the stack, and the value returned by
21187db96d56Sopenharmony_ci      cls.__new__(cls, *args, *kwargs) is  pushed back  onto the stack.
21197db96d56Sopenharmony_ci      """),
21207db96d56Sopenharmony_ci
21217db96d56Sopenharmony_ci    # Machine control.
21227db96d56Sopenharmony_ci
21237db96d56Sopenharmony_ci    I(name='PROTO',
21247db96d56Sopenharmony_ci      code='\x80',
21257db96d56Sopenharmony_ci      arg=uint1,
21267db96d56Sopenharmony_ci      stack_before=[],
21277db96d56Sopenharmony_ci      stack_after=[],
21287db96d56Sopenharmony_ci      proto=2,
21297db96d56Sopenharmony_ci      doc="""Protocol version indicator.
21307db96d56Sopenharmony_ci
21317db96d56Sopenharmony_ci      For protocol 2 and above, a pickle must start with this opcode.
21327db96d56Sopenharmony_ci      The argument is the protocol version, an int in range(2, 256).
21337db96d56Sopenharmony_ci      """),
21347db96d56Sopenharmony_ci
21357db96d56Sopenharmony_ci    I(name='STOP',
21367db96d56Sopenharmony_ci      code='.',
21377db96d56Sopenharmony_ci      arg=None,
21387db96d56Sopenharmony_ci      stack_before=[anyobject],
21397db96d56Sopenharmony_ci      stack_after=[],
21407db96d56Sopenharmony_ci      proto=0,
21417db96d56Sopenharmony_ci      doc="""Stop the unpickling machine.
21427db96d56Sopenharmony_ci
21437db96d56Sopenharmony_ci      Every pickle ends with this opcode.  The object at the top of the stack
21447db96d56Sopenharmony_ci      is popped, and that's the result of unpickling.  The stack should be
21457db96d56Sopenharmony_ci      empty then.
21467db96d56Sopenharmony_ci      """),
21477db96d56Sopenharmony_ci
21487db96d56Sopenharmony_ci    # Framing support.
21497db96d56Sopenharmony_ci
21507db96d56Sopenharmony_ci    I(name='FRAME',
21517db96d56Sopenharmony_ci      code='\x95',
21527db96d56Sopenharmony_ci      arg=uint8,
21537db96d56Sopenharmony_ci      stack_before=[],
21547db96d56Sopenharmony_ci      stack_after=[],
21557db96d56Sopenharmony_ci      proto=4,
21567db96d56Sopenharmony_ci      doc="""Indicate the beginning of a new frame.
21577db96d56Sopenharmony_ci
21587db96d56Sopenharmony_ci      The unpickler may use this opcode to safely prefetch data from its
21597db96d56Sopenharmony_ci      underlying stream.
21607db96d56Sopenharmony_ci      """),
21617db96d56Sopenharmony_ci
21627db96d56Sopenharmony_ci    # Ways to deal with persistent IDs.
21637db96d56Sopenharmony_ci
21647db96d56Sopenharmony_ci    I(name='PERSID',
21657db96d56Sopenharmony_ci      code='P',
21667db96d56Sopenharmony_ci      arg=stringnl_noescape,
21677db96d56Sopenharmony_ci      stack_before=[],
21687db96d56Sopenharmony_ci      stack_after=[anyobject],
21697db96d56Sopenharmony_ci      proto=0,
21707db96d56Sopenharmony_ci      doc="""Push an object identified by a persistent ID.
21717db96d56Sopenharmony_ci
21727db96d56Sopenharmony_ci      The pickle module doesn't define what a persistent ID means.  PERSID's
21737db96d56Sopenharmony_ci      argument is a newline-terminated str-style (no embedded escapes, no
21747db96d56Sopenharmony_ci      bracketing quote characters) string, which *is* "the persistent ID".
21757db96d56Sopenharmony_ci      The unpickler passes this string to self.persistent_load().  Whatever
21767db96d56Sopenharmony_ci      object that returns is pushed on the stack.  There is no implementation
21777db96d56Sopenharmony_ci      of persistent_load() in Python's unpickler:  it must be supplied by an
21787db96d56Sopenharmony_ci      unpickler subclass.
21797db96d56Sopenharmony_ci      """),
21807db96d56Sopenharmony_ci
21817db96d56Sopenharmony_ci    I(name='BINPERSID',
21827db96d56Sopenharmony_ci      code='Q',
21837db96d56Sopenharmony_ci      arg=None,
21847db96d56Sopenharmony_ci      stack_before=[anyobject],
21857db96d56Sopenharmony_ci      stack_after=[anyobject],
21867db96d56Sopenharmony_ci      proto=1,
21877db96d56Sopenharmony_ci      doc="""Push an object identified by a persistent ID.
21887db96d56Sopenharmony_ci
21897db96d56Sopenharmony_ci      Like PERSID, except the persistent ID is popped off the stack (instead
21907db96d56Sopenharmony_ci      of being a string embedded in the opcode bytestream).  The persistent
21917db96d56Sopenharmony_ci      ID is passed to self.persistent_load(), and whatever object that
21927db96d56Sopenharmony_ci      returns is pushed on the stack.  See PERSID for more detail.
21937db96d56Sopenharmony_ci      """),
21947db96d56Sopenharmony_ci]
21957db96d56Sopenharmony_cidel I
21967db96d56Sopenharmony_ci
21977db96d56Sopenharmony_ci# Verify uniqueness of .name and .code members.
21987db96d56Sopenharmony_ciname2i = {}
21997db96d56Sopenharmony_cicode2i = {}
22007db96d56Sopenharmony_ci
22017db96d56Sopenharmony_cifor i, d in enumerate(opcodes):
22027db96d56Sopenharmony_ci    if d.name in name2i:
22037db96d56Sopenharmony_ci        raise ValueError("repeated name %r at indices %d and %d" %
22047db96d56Sopenharmony_ci                         (d.name, name2i[d.name], i))
22057db96d56Sopenharmony_ci    if d.code in code2i:
22067db96d56Sopenharmony_ci        raise ValueError("repeated code %r at indices %d and %d" %
22077db96d56Sopenharmony_ci                         (d.code, code2i[d.code], i))
22087db96d56Sopenharmony_ci
22097db96d56Sopenharmony_ci    name2i[d.name] = i
22107db96d56Sopenharmony_ci    code2i[d.code] = i
22117db96d56Sopenharmony_ci
22127db96d56Sopenharmony_cidel name2i, code2i, i, d
22137db96d56Sopenharmony_ci
22147db96d56Sopenharmony_ci##############################################################################
22157db96d56Sopenharmony_ci# Build a code2op dict, mapping opcode characters to OpcodeInfo records.
22167db96d56Sopenharmony_ci# Also ensure we've got the same stuff as pickle.py, although the
22177db96d56Sopenharmony_ci# introspection here is dicey.
22187db96d56Sopenharmony_ci
22197db96d56Sopenharmony_cicode2op = {}
22207db96d56Sopenharmony_cifor d in opcodes:
22217db96d56Sopenharmony_ci    code2op[d.code] = d
22227db96d56Sopenharmony_cidel d
22237db96d56Sopenharmony_ci
22247db96d56Sopenharmony_cidef assure_pickle_consistency(verbose=False):
22257db96d56Sopenharmony_ci
22267db96d56Sopenharmony_ci    copy = code2op.copy()
22277db96d56Sopenharmony_ci    for name in pickle.__all__:
22287db96d56Sopenharmony_ci        if not re.match("[A-Z][A-Z0-9_]+$", name):
22297db96d56Sopenharmony_ci            if verbose:
22307db96d56Sopenharmony_ci                print("skipping %r: it doesn't look like an opcode name" % name)
22317db96d56Sopenharmony_ci            continue
22327db96d56Sopenharmony_ci        picklecode = getattr(pickle, name)
22337db96d56Sopenharmony_ci        if not isinstance(picklecode, bytes) or len(picklecode) != 1:
22347db96d56Sopenharmony_ci            if verbose:
22357db96d56Sopenharmony_ci                print(("skipping %r: value %r doesn't look like a pickle "
22367db96d56Sopenharmony_ci                       "code" % (name, picklecode)))
22377db96d56Sopenharmony_ci            continue
22387db96d56Sopenharmony_ci        picklecode = picklecode.decode("latin-1")
22397db96d56Sopenharmony_ci        if picklecode in copy:
22407db96d56Sopenharmony_ci            if verbose:
22417db96d56Sopenharmony_ci                print("checking name %r w/ code %r for consistency" % (
22427db96d56Sopenharmony_ci                      name, picklecode))
22437db96d56Sopenharmony_ci            d = copy[picklecode]
22447db96d56Sopenharmony_ci            if d.name != name:
22457db96d56Sopenharmony_ci                raise ValueError("for pickle code %r, pickle.py uses name %r "
22467db96d56Sopenharmony_ci                                 "but we're using name %r" % (picklecode,
22477db96d56Sopenharmony_ci                                                              name,
22487db96d56Sopenharmony_ci                                                              d.name))
22497db96d56Sopenharmony_ci            # Forget this one.  Any left over in copy at the end are a problem
22507db96d56Sopenharmony_ci            # of a different kind.
22517db96d56Sopenharmony_ci            del copy[picklecode]
22527db96d56Sopenharmony_ci        else:
22537db96d56Sopenharmony_ci            raise ValueError("pickle.py appears to have a pickle opcode with "
22547db96d56Sopenharmony_ci                             "name %r and code %r, but we don't" %
22557db96d56Sopenharmony_ci                             (name, picklecode))
22567db96d56Sopenharmony_ci    if copy:
22577db96d56Sopenharmony_ci        msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"]
22587db96d56Sopenharmony_ci        for code, d in copy.items():
22597db96d56Sopenharmony_ci            msg.append("    name %r with code %r" % (d.name, code))
22607db96d56Sopenharmony_ci        raise ValueError("\n".join(msg))
22617db96d56Sopenharmony_ci
22627db96d56Sopenharmony_ciassure_pickle_consistency()
22637db96d56Sopenharmony_cidel assure_pickle_consistency
22647db96d56Sopenharmony_ci
22657db96d56Sopenharmony_ci##############################################################################
22667db96d56Sopenharmony_ci# A pickle opcode generator.
22677db96d56Sopenharmony_ci
22687db96d56Sopenharmony_cidef _genops(data, yield_end_pos=False):
22697db96d56Sopenharmony_ci    if isinstance(data, bytes_types):
22707db96d56Sopenharmony_ci        data = io.BytesIO(data)
22717db96d56Sopenharmony_ci
22727db96d56Sopenharmony_ci    if hasattr(data, "tell"):
22737db96d56Sopenharmony_ci        getpos = data.tell
22747db96d56Sopenharmony_ci    else:
22757db96d56Sopenharmony_ci        getpos = lambda: None
22767db96d56Sopenharmony_ci
22777db96d56Sopenharmony_ci    while True:
22787db96d56Sopenharmony_ci        pos = getpos()
22797db96d56Sopenharmony_ci        code = data.read(1)
22807db96d56Sopenharmony_ci        opcode = code2op.get(code.decode("latin-1"))
22817db96d56Sopenharmony_ci        if opcode is None:
22827db96d56Sopenharmony_ci            if code == b"":
22837db96d56Sopenharmony_ci                raise ValueError("pickle exhausted before seeing STOP")
22847db96d56Sopenharmony_ci            else:
22857db96d56Sopenharmony_ci                raise ValueError("at position %s, opcode %r unknown" % (
22867db96d56Sopenharmony_ci                                 "<unknown>" if pos is None else pos,
22877db96d56Sopenharmony_ci                                 code))
22887db96d56Sopenharmony_ci        if opcode.arg is None:
22897db96d56Sopenharmony_ci            arg = None
22907db96d56Sopenharmony_ci        else:
22917db96d56Sopenharmony_ci            arg = opcode.arg.reader(data)
22927db96d56Sopenharmony_ci        if yield_end_pos:
22937db96d56Sopenharmony_ci            yield opcode, arg, pos, getpos()
22947db96d56Sopenharmony_ci        else:
22957db96d56Sopenharmony_ci            yield opcode, arg, pos
22967db96d56Sopenharmony_ci        if code == b'.':
22977db96d56Sopenharmony_ci            assert opcode.name == 'STOP'
22987db96d56Sopenharmony_ci            break
22997db96d56Sopenharmony_ci
23007db96d56Sopenharmony_cidef genops(pickle):
23017db96d56Sopenharmony_ci    """Generate all the opcodes in a pickle.
23027db96d56Sopenharmony_ci
23037db96d56Sopenharmony_ci    'pickle' is a file-like object, or string, containing the pickle.
23047db96d56Sopenharmony_ci
23057db96d56Sopenharmony_ci    Each opcode in the pickle is generated, from the current pickle position,
23067db96d56Sopenharmony_ci    stopping after a STOP opcode is delivered.  A triple is generated for
23077db96d56Sopenharmony_ci    each opcode:
23087db96d56Sopenharmony_ci
23097db96d56Sopenharmony_ci        opcode, arg, pos
23107db96d56Sopenharmony_ci
23117db96d56Sopenharmony_ci    opcode is an OpcodeInfo record, describing the current opcode.
23127db96d56Sopenharmony_ci
23137db96d56Sopenharmony_ci    If the opcode has an argument embedded in the pickle, arg is its decoded
23147db96d56Sopenharmony_ci    value, as a Python object.  If the opcode doesn't have an argument, arg
23157db96d56Sopenharmony_ci    is None.
23167db96d56Sopenharmony_ci
23177db96d56Sopenharmony_ci    If the pickle has a tell() method, pos was the value of pickle.tell()
23187db96d56Sopenharmony_ci    before reading the current opcode.  If the pickle is a bytes object,
23197db96d56Sopenharmony_ci    it's wrapped in a BytesIO object, and the latter's tell() result is
23207db96d56Sopenharmony_ci    used.  Else (the pickle doesn't have a tell(), and it's not obvious how
23217db96d56Sopenharmony_ci    to query its current position) pos is None.
23227db96d56Sopenharmony_ci    """
23237db96d56Sopenharmony_ci    return _genops(pickle)
23247db96d56Sopenharmony_ci
23257db96d56Sopenharmony_ci##############################################################################
23267db96d56Sopenharmony_ci# A pickle optimizer.
23277db96d56Sopenharmony_ci
23287db96d56Sopenharmony_cidef optimize(p):
23297db96d56Sopenharmony_ci    'Optimize a pickle string by removing unused PUT opcodes'
23307db96d56Sopenharmony_ci    put = 'PUT'
23317db96d56Sopenharmony_ci    get = 'GET'
23327db96d56Sopenharmony_ci    oldids = set()          # set of all PUT ids
23337db96d56Sopenharmony_ci    newids = {}             # set of ids used by a GET opcode
23347db96d56Sopenharmony_ci    opcodes = []            # (op, idx) or (pos, end_pos)
23357db96d56Sopenharmony_ci    proto = 0
23367db96d56Sopenharmony_ci    protoheader = b''
23377db96d56Sopenharmony_ci    for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
23387db96d56Sopenharmony_ci        if 'PUT' in opcode.name:
23397db96d56Sopenharmony_ci            oldids.add(arg)
23407db96d56Sopenharmony_ci            opcodes.append((put, arg))
23417db96d56Sopenharmony_ci        elif opcode.name == 'MEMOIZE':
23427db96d56Sopenharmony_ci            idx = len(oldids)
23437db96d56Sopenharmony_ci            oldids.add(idx)
23447db96d56Sopenharmony_ci            opcodes.append((put, idx))
23457db96d56Sopenharmony_ci        elif 'FRAME' in opcode.name:
23467db96d56Sopenharmony_ci            pass
23477db96d56Sopenharmony_ci        elif 'GET' in opcode.name:
23487db96d56Sopenharmony_ci            if opcode.proto > proto:
23497db96d56Sopenharmony_ci                proto = opcode.proto
23507db96d56Sopenharmony_ci            newids[arg] = None
23517db96d56Sopenharmony_ci            opcodes.append((get, arg))
23527db96d56Sopenharmony_ci        elif opcode.name == 'PROTO':
23537db96d56Sopenharmony_ci            if arg > proto:
23547db96d56Sopenharmony_ci                proto = arg
23557db96d56Sopenharmony_ci            if pos == 0:
23567db96d56Sopenharmony_ci                protoheader = p[pos:end_pos]
23577db96d56Sopenharmony_ci            else:
23587db96d56Sopenharmony_ci                opcodes.append((pos, end_pos))
23597db96d56Sopenharmony_ci        else:
23607db96d56Sopenharmony_ci            opcodes.append((pos, end_pos))
23617db96d56Sopenharmony_ci    del oldids
23627db96d56Sopenharmony_ci
23637db96d56Sopenharmony_ci    # Copy the opcodes except for PUTS without a corresponding GET
23647db96d56Sopenharmony_ci    out = io.BytesIO()
23657db96d56Sopenharmony_ci    # Write the PROTO header before any framing
23667db96d56Sopenharmony_ci    out.write(protoheader)
23677db96d56Sopenharmony_ci    pickler = pickle._Pickler(out, proto)
23687db96d56Sopenharmony_ci    if proto >= 4:
23697db96d56Sopenharmony_ci        pickler.framer.start_framing()
23707db96d56Sopenharmony_ci    idx = 0
23717db96d56Sopenharmony_ci    for op, arg in opcodes:
23727db96d56Sopenharmony_ci        frameless = False
23737db96d56Sopenharmony_ci        if op is put:
23747db96d56Sopenharmony_ci            if arg not in newids:
23757db96d56Sopenharmony_ci                continue
23767db96d56Sopenharmony_ci            data = pickler.put(idx)
23777db96d56Sopenharmony_ci            newids[arg] = idx
23787db96d56Sopenharmony_ci            idx += 1
23797db96d56Sopenharmony_ci        elif op is get:
23807db96d56Sopenharmony_ci            data = pickler.get(newids[arg])
23817db96d56Sopenharmony_ci        else:
23827db96d56Sopenharmony_ci            data = p[op:arg]
23837db96d56Sopenharmony_ci            frameless = len(data) > pickler.framer._FRAME_SIZE_TARGET
23847db96d56Sopenharmony_ci        pickler.framer.commit_frame(force=frameless)
23857db96d56Sopenharmony_ci        if frameless:
23867db96d56Sopenharmony_ci            pickler.framer.file_write(data)
23877db96d56Sopenharmony_ci        else:
23887db96d56Sopenharmony_ci            pickler.write(data)
23897db96d56Sopenharmony_ci    pickler.framer.end_framing()
23907db96d56Sopenharmony_ci    return out.getvalue()
23917db96d56Sopenharmony_ci
23927db96d56Sopenharmony_ci##############################################################################
23937db96d56Sopenharmony_ci# A symbolic pickle disassembler.
23947db96d56Sopenharmony_ci
23957db96d56Sopenharmony_cidef dis(pickle, out=None, memo=None, indentlevel=4, annotate=0):
23967db96d56Sopenharmony_ci    """Produce a symbolic disassembly of a pickle.
23977db96d56Sopenharmony_ci
23987db96d56Sopenharmony_ci    'pickle' is a file-like object, or string, containing a (at least one)
23997db96d56Sopenharmony_ci    pickle.  The pickle is disassembled from the current position, through
24007db96d56Sopenharmony_ci    the first STOP opcode encountered.
24017db96d56Sopenharmony_ci
24027db96d56Sopenharmony_ci    Optional arg 'out' is a file-like object to which the disassembly is
24037db96d56Sopenharmony_ci    printed.  It defaults to sys.stdout.
24047db96d56Sopenharmony_ci
24057db96d56Sopenharmony_ci    Optional arg 'memo' is a Python dict, used as the pickle's memo.  It
24067db96d56Sopenharmony_ci    may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
24077db96d56Sopenharmony_ci    Passing the same memo object to another dis() call then allows disassembly
24087db96d56Sopenharmony_ci    to proceed across multiple pickles that were all created by the same
24097db96d56Sopenharmony_ci    pickler with the same memo.  Ordinarily you don't need to worry about this.
24107db96d56Sopenharmony_ci
24117db96d56Sopenharmony_ci    Optional arg 'indentlevel' is the number of blanks by which to indent
24127db96d56Sopenharmony_ci    a new MARK level.  It defaults to 4.
24137db96d56Sopenharmony_ci
24147db96d56Sopenharmony_ci    Optional arg 'annotate' if nonzero instructs dis() to add short
24157db96d56Sopenharmony_ci    description of the opcode on each line of disassembled output.
24167db96d56Sopenharmony_ci    The value given to 'annotate' must be an integer and is used as a
24177db96d56Sopenharmony_ci    hint for the column where annotation should start.  The default
24187db96d56Sopenharmony_ci    value is 0, meaning no annotations.
24197db96d56Sopenharmony_ci
24207db96d56Sopenharmony_ci    In addition to printing the disassembly, some sanity checks are made:
24217db96d56Sopenharmony_ci
24227db96d56Sopenharmony_ci    + All embedded opcode arguments "make sense".
24237db96d56Sopenharmony_ci
24247db96d56Sopenharmony_ci    + Explicit and implicit pop operations have enough items on the stack.
24257db96d56Sopenharmony_ci
24267db96d56Sopenharmony_ci    + When an opcode implicitly refers to a markobject, a markobject is
24277db96d56Sopenharmony_ci      actually on the stack.
24287db96d56Sopenharmony_ci
24297db96d56Sopenharmony_ci    + A memo entry isn't referenced before it's defined.
24307db96d56Sopenharmony_ci
24317db96d56Sopenharmony_ci    + The markobject isn't stored in the memo.
24327db96d56Sopenharmony_ci
24337db96d56Sopenharmony_ci    + A memo entry isn't redefined.
24347db96d56Sopenharmony_ci    """
24357db96d56Sopenharmony_ci
24367db96d56Sopenharmony_ci    # Most of the hair here is for sanity checks, but most of it is needed
24377db96d56Sopenharmony_ci    # anyway to detect when a protocol 0 POP takes a MARK off the stack
24387db96d56Sopenharmony_ci    # (which in turn is needed to indent MARK blocks correctly).
24397db96d56Sopenharmony_ci
24407db96d56Sopenharmony_ci    stack = []          # crude emulation of unpickler stack
24417db96d56Sopenharmony_ci    if memo is None:
24427db96d56Sopenharmony_ci        memo = {}       # crude emulation of unpickler memo
24437db96d56Sopenharmony_ci    maxproto = -1       # max protocol number seen
24447db96d56Sopenharmony_ci    markstack = []      # bytecode positions of MARK opcodes
24457db96d56Sopenharmony_ci    indentchunk = ' ' * indentlevel
24467db96d56Sopenharmony_ci    errormsg = None
24477db96d56Sopenharmony_ci    annocol = annotate  # column hint for annotations
24487db96d56Sopenharmony_ci    for opcode, arg, pos in genops(pickle):
24497db96d56Sopenharmony_ci        if pos is not None:
24507db96d56Sopenharmony_ci            print("%5d:" % pos, end=' ', file=out)
24517db96d56Sopenharmony_ci
24527db96d56Sopenharmony_ci        line = "%-4s %s%s" % (repr(opcode.code)[1:-1],
24537db96d56Sopenharmony_ci                              indentchunk * len(markstack),
24547db96d56Sopenharmony_ci                              opcode.name)
24557db96d56Sopenharmony_ci
24567db96d56Sopenharmony_ci        maxproto = max(maxproto, opcode.proto)
24577db96d56Sopenharmony_ci        before = opcode.stack_before    # don't mutate
24587db96d56Sopenharmony_ci        after = opcode.stack_after      # don't mutate
24597db96d56Sopenharmony_ci        numtopop = len(before)
24607db96d56Sopenharmony_ci
24617db96d56Sopenharmony_ci        # See whether a MARK should be popped.
24627db96d56Sopenharmony_ci        markmsg = None
24637db96d56Sopenharmony_ci        if markobject in before or (opcode.name == "POP" and
24647db96d56Sopenharmony_ci                                    stack and
24657db96d56Sopenharmony_ci                                    stack[-1] is markobject):
24667db96d56Sopenharmony_ci            assert markobject not in after
24677db96d56Sopenharmony_ci            if __debug__:
24687db96d56Sopenharmony_ci                if markobject in before:
24697db96d56Sopenharmony_ci                    assert before[-1] is stackslice
24707db96d56Sopenharmony_ci            if markstack:
24717db96d56Sopenharmony_ci                markpos = markstack.pop()
24727db96d56Sopenharmony_ci                if markpos is None:
24737db96d56Sopenharmony_ci                    markmsg = "(MARK at unknown opcode offset)"
24747db96d56Sopenharmony_ci                else:
24757db96d56Sopenharmony_ci                    markmsg = "(MARK at %d)" % markpos
24767db96d56Sopenharmony_ci                # Pop everything at and after the topmost markobject.
24777db96d56Sopenharmony_ci                while stack[-1] is not markobject:
24787db96d56Sopenharmony_ci                    stack.pop()
24797db96d56Sopenharmony_ci                stack.pop()
24807db96d56Sopenharmony_ci                # Stop later code from popping too much.
24817db96d56Sopenharmony_ci                try:
24827db96d56Sopenharmony_ci                    numtopop = before.index(markobject)
24837db96d56Sopenharmony_ci                except ValueError:
24847db96d56Sopenharmony_ci                    assert opcode.name == "POP"
24857db96d56Sopenharmony_ci                    numtopop = 0
24867db96d56Sopenharmony_ci            else:
24877db96d56Sopenharmony_ci                errormsg = markmsg = "no MARK exists on stack"
24887db96d56Sopenharmony_ci
24897db96d56Sopenharmony_ci        # Check for correct memo usage.
24907db96d56Sopenharmony_ci        if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT", "MEMOIZE"):
24917db96d56Sopenharmony_ci            if opcode.name == "MEMOIZE":
24927db96d56Sopenharmony_ci                memo_idx = len(memo)
24937db96d56Sopenharmony_ci                markmsg = "(as %d)" % memo_idx
24947db96d56Sopenharmony_ci            else:
24957db96d56Sopenharmony_ci                assert arg is not None
24967db96d56Sopenharmony_ci                memo_idx = arg
24977db96d56Sopenharmony_ci            if memo_idx in memo:
24987db96d56Sopenharmony_ci                errormsg = "memo key %r already defined" % arg
24997db96d56Sopenharmony_ci            elif not stack:
25007db96d56Sopenharmony_ci                errormsg = "stack is empty -- can't store into memo"
25017db96d56Sopenharmony_ci            elif stack[-1] is markobject:
25027db96d56Sopenharmony_ci                errormsg = "can't store markobject in the memo"
25037db96d56Sopenharmony_ci            else:
25047db96d56Sopenharmony_ci                memo[memo_idx] = stack[-1]
25057db96d56Sopenharmony_ci        elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
25067db96d56Sopenharmony_ci            if arg in memo:
25077db96d56Sopenharmony_ci                assert len(after) == 1
25087db96d56Sopenharmony_ci                after = [memo[arg]]     # for better stack emulation
25097db96d56Sopenharmony_ci            else:
25107db96d56Sopenharmony_ci                errormsg = "memo key %r has never been stored into" % arg
25117db96d56Sopenharmony_ci
25127db96d56Sopenharmony_ci        if arg is not None or markmsg:
25137db96d56Sopenharmony_ci            # make a mild effort to align arguments
25147db96d56Sopenharmony_ci            line += ' ' * (10 - len(opcode.name))
25157db96d56Sopenharmony_ci            if arg is not None:
25167db96d56Sopenharmony_ci                line += ' ' + repr(arg)
25177db96d56Sopenharmony_ci            if markmsg:
25187db96d56Sopenharmony_ci                line += ' ' + markmsg
25197db96d56Sopenharmony_ci        if annotate:
25207db96d56Sopenharmony_ci            line += ' ' * (annocol - len(line))
25217db96d56Sopenharmony_ci            # make a mild effort to align annotations
25227db96d56Sopenharmony_ci            annocol = len(line)
25237db96d56Sopenharmony_ci            if annocol > 50:
25247db96d56Sopenharmony_ci                annocol = annotate
25257db96d56Sopenharmony_ci            line += ' ' + opcode.doc.split('\n', 1)[0]
25267db96d56Sopenharmony_ci        print(line, file=out)
25277db96d56Sopenharmony_ci
25287db96d56Sopenharmony_ci        if errormsg:
25297db96d56Sopenharmony_ci            # Note that we delayed complaining until the offending opcode
25307db96d56Sopenharmony_ci            # was printed.
25317db96d56Sopenharmony_ci            raise ValueError(errormsg)
25327db96d56Sopenharmony_ci
25337db96d56Sopenharmony_ci        # Emulate the stack effects.
25347db96d56Sopenharmony_ci        if len(stack) < numtopop:
25357db96d56Sopenharmony_ci            raise ValueError("tries to pop %d items from stack with "
25367db96d56Sopenharmony_ci                             "only %d items" % (numtopop, len(stack)))
25377db96d56Sopenharmony_ci        if numtopop:
25387db96d56Sopenharmony_ci            del stack[-numtopop:]
25397db96d56Sopenharmony_ci        if markobject in after:
25407db96d56Sopenharmony_ci            assert markobject not in before
25417db96d56Sopenharmony_ci            markstack.append(pos)
25427db96d56Sopenharmony_ci
25437db96d56Sopenharmony_ci        stack.extend(after)
25447db96d56Sopenharmony_ci
25457db96d56Sopenharmony_ci    print("highest protocol among opcodes =", maxproto, file=out)
25467db96d56Sopenharmony_ci    if stack:
25477db96d56Sopenharmony_ci        raise ValueError("stack not empty after STOP: %r" % stack)
25487db96d56Sopenharmony_ci
25497db96d56Sopenharmony_ci# For use in the doctest, simply as an example of a class to pickle.
25507db96d56Sopenharmony_ciclass _Example:
25517db96d56Sopenharmony_ci    def __init__(self, value):
25527db96d56Sopenharmony_ci        self.value = value
25537db96d56Sopenharmony_ci
25547db96d56Sopenharmony_ci_dis_test = r"""
25557db96d56Sopenharmony_ci>>> import pickle
25567db96d56Sopenharmony_ci>>> x = [1, 2, (3, 4), {b'abc': "def"}]
25577db96d56Sopenharmony_ci>>> pkl0 = pickle.dumps(x, 0)
25587db96d56Sopenharmony_ci>>> dis(pkl0)
25597db96d56Sopenharmony_ci    0: (    MARK
25607db96d56Sopenharmony_ci    1: l        LIST       (MARK at 0)
25617db96d56Sopenharmony_ci    2: p    PUT        0
25627db96d56Sopenharmony_ci    5: I    INT        1
25637db96d56Sopenharmony_ci    8: a    APPEND
25647db96d56Sopenharmony_ci    9: I    INT        2
25657db96d56Sopenharmony_ci   12: a    APPEND
25667db96d56Sopenharmony_ci   13: (    MARK
25677db96d56Sopenharmony_ci   14: I        INT        3
25687db96d56Sopenharmony_ci   17: I        INT        4
25697db96d56Sopenharmony_ci   20: t        TUPLE      (MARK at 13)
25707db96d56Sopenharmony_ci   21: p    PUT        1
25717db96d56Sopenharmony_ci   24: a    APPEND
25727db96d56Sopenharmony_ci   25: (    MARK
25737db96d56Sopenharmony_ci   26: d        DICT       (MARK at 25)
25747db96d56Sopenharmony_ci   27: p    PUT        2
25757db96d56Sopenharmony_ci   30: c    GLOBAL     '_codecs encode'
25767db96d56Sopenharmony_ci   46: p    PUT        3
25777db96d56Sopenharmony_ci   49: (    MARK
25787db96d56Sopenharmony_ci   50: V        UNICODE    'abc'
25797db96d56Sopenharmony_ci   55: p        PUT        4
25807db96d56Sopenharmony_ci   58: V        UNICODE    'latin1'
25817db96d56Sopenharmony_ci   66: p        PUT        5
25827db96d56Sopenharmony_ci   69: t        TUPLE      (MARK at 49)
25837db96d56Sopenharmony_ci   70: p    PUT        6
25847db96d56Sopenharmony_ci   73: R    REDUCE
25857db96d56Sopenharmony_ci   74: p    PUT        7
25867db96d56Sopenharmony_ci   77: V    UNICODE    'def'
25877db96d56Sopenharmony_ci   82: p    PUT        8
25887db96d56Sopenharmony_ci   85: s    SETITEM
25897db96d56Sopenharmony_ci   86: a    APPEND
25907db96d56Sopenharmony_ci   87: .    STOP
25917db96d56Sopenharmony_cihighest protocol among opcodes = 0
25927db96d56Sopenharmony_ci
25937db96d56Sopenharmony_ciTry again with a "binary" pickle.
25947db96d56Sopenharmony_ci
25957db96d56Sopenharmony_ci>>> pkl1 = pickle.dumps(x, 1)
25967db96d56Sopenharmony_ci>>> dis(pkl1)
25977db96d56Sopenharmony_ci    0: ]    EMPTY_LIST
25987db96d56Sopenharmony_ci    1: q    BINPUT     0
25997db96d56Sopenharmony_ci    3: (    MARK
26007db96d56Sopenharmony_ci    4: K        BININT1    1
26017db96d56Sopenharmony_ci    6: K        BININT1    2
26027db96d56Sopenharmony_ci    8: (        MARK
26037db96d56Sopenharmony_ci    9: K            BININT1    3
26047db96d56Sopenharmony_ci   11: K            BININT1    4
26057db96d56Sopenharmony_ci   13: t            TUPLE      (MARK at 8)
26067db96d56Sopenharmony_ci   14: q        BINPUT     1
26077db96d56Sopenharmony_ci   16: }        EMPTY_DICT
26087db96d56Sopenharmony_ci   17: q        BINPUT     2
26097db96d56Sopenharmony_ci   19: c        GLOBAL     '_codecs encode'
26107db96d56Sopenharmony_ci   35: q        BINPUT     3
26117db96d56Sopenharmony_ci   37: (        MARK
26127db96d56Sopenharmony_ci   38: X            BINUNICODE 'abc'
26137db96d56Sopenharmony_ci   46: q            BINPUT     4
26147db96d56Sopenharmony_ci   48: X            BINUNICODE 'latin1'
26157db96d56Sopenharmony_ci   59: q            BINPUT     5
26167db96d56Sopenharmony_ci   61: t            TUPLE      (MARK at 37)
26177db96d56Sopenharmony_ci   62: q        BINPUT     6
26187db96d56Sopenharmony_ci   64: R        REDUCE
26197db96d56Sopenharmony_ci   65: q        BINPUT     7
26207db96d56Sopenharmony_ci   67: X        BINUNICODE 'def'
26217db96d56Sopenharmony_ci   75: q        BINPUT     8
26227db96d56Sopenharmony_ci   77: s        SETITEM
26237db96d56Sopenharmony_ci   78: e        APPENDS    (MARK at 3)
26247db96d56Sopenharmony_ci   79: .    STOP
26257db96d56Sopenharmony_cihighest protocol among opcodes = 1
26267db96d56Sopenharmony_ci
26277db96d56Sopenharmony_ciExercise the INST/OBJ/BUILD family.
26287db96d56Sopenharmony_ci
26297db96d56Sopenharmony_ci>>> import pickletools
26307db96d56Sopenharmony_ci>>> dis(pickle.dumps(pickletools.dis, 0))
26317db96d56Sopenharmony_ci    0: c    GLOBAL     'pickletools dis'
26327db96d56Sopenharmony_ci   17: p    PUT        0
26337db96d56Sopenharmony_ci   20: .    STOP
26347db96d56Sopenharmony_cihighest protocol among opcodes = 0
26357db96d56Sopenharmony_ci
26367db96d56Sopenharmony_ci>>> from pickletools import _Example
26377db96d56Sopenharmony_ci>>> x = [_Example(42)] * 2
26387db96d56Sopenharmony_ci>>> dis(pickle.dumps(x, 0))
26397db96d56Sopenharmony_ci    0: (    MARK
26407db96d56Sopenharmony_ci    1: l        LIST       (MARK at 0)
26417db96d56Sopenharmony_ci    2: p    PUT        0
26427db96d56Sopenharmony_ci    5: c    GLOBAL     'copy_reg _reconstructor'
26437db96d56Sopenharmony_ci   30: p    PUT        1
26447db96d56Sopenharmony_ci   33: (    MARK
26457db96d56Sopenharmony_ci   34: c        GLOBAL     'pickletools _Example'
26467db96d56Sopenharmony_ci   56: p        PUT        2
26477db96d56Sopenharmony_ci   59: c        GLOBAL     '__builtin__ object'
26487db96d56Sopenharmony_ci   79: p        PUT        3
26497db96d56Sopenharmony_ci   82: N        NONE
26507db96d56Sopenharmony_ci   83: t        TUPLE      (MARK at 33)
26517db96d56Sopenharmony_ci   84: p    PUT        4
26527db96d56Sopenharmony_ci   87: R    REDUCE
26537db96d56Sopenharmony_ci   88: p    PUT        5
26547db96d56Sopenharmony_ci   91: (    MARK
26557db96d56Sopenharmony_ci   92: d        DICT       (MARK at 91)
26567db96d56Sopenharmony_ci   93: p    PUT        6
26577db96d56Sopenharmony_ci   96: V    UNICODE    'value'
26587db96d56Sopenharmony_ci  103: p    PUT        7
26597db96d56Sopenharmony_ci  106: I    INT        42
26607db96d56Sopenharmony_ci  110: s    SETITEM
26617db96d56Sopenharmony_ci  111: b    BUILD
26627db96d56Sopenharmony_ci  112: a    APPEND
26637db96d56Sopenharmony_ci  113: g    GET        5
26647db96d56Sopenharmony_ci  116: a    APPEND
26657db96d56Sopenharmony_ci  117: .    STOP
26667db96d56Sopenharmony_cihighest protocol among opcodes = 0
26677db96d56Sopenharmony_ci
26687db96d56Sopenharmony_ci>>> dis(pickle.dumps(x, 1))
26697db96d56Sopenharmony_ci    0: ]    EMPTY_LIST
26707db96d56Sopenharmony_ci    1: q    BINPUT     0
26717db96d56Sopenharmony_ci    3: (    MARK
26727db96d56Sopenharmony_ci    4: c        GLOBAL     'copy_reg _reconstructor'
26737db96d56Sopenharmony_ci   29: q        BINPUT     1
26747db96d56Sopenharmony_ci   31: (        MARK
26757db96d56Sopenharmony_ci   32: c            GLOBAL     'pickletools _Example'
26767db96d56Sopenharmony_ci   54: q            BINPUT     2
26777db96d56Sopenharmony_ci   56: c            GLOBAL     '__builtin__ object'
26787db96d56Sopenharmony_ci   76: q            BINPUT     3
26797db96d56Sopenharmony_ci   78: N            NONE
26807db96d56Sopenharmony_ci   79: t            TUPLE      (MARK at 31)
26817db96d56Sopenharmony_ci   80: q        BINPUT     4
26827db96d56Sopenharmony_ci   82: R        REDUCE
26837db96d56Sopenharmony_ci   83: q        BINPUT     5
26847db96d56Sopenharmony_ci   85: }        EMPTY_DICT
26857db96d56Sopenharmony_ci   86: q        BINPUT     6
26867db96d56Sopenharmony_ci   88: X        BINUNICODE 'value'
26877db96d56Sopenharmony_ci   98: q        BINPUT     7
26887db96d56Sopenharmony_ci  100: K        BININT1    42
26897db96d56Sopenharmony_ci  102: s        SETITEM
26907db96d56Sopenharmony_ci  103: b        BUILD
26917db96d56Sopenharmony_ci  104: h        BINGET     5
26927db96d56Sopenharmony_ci  106: e        APPENDS    (MARK at 3)
26937db96d56Sopenharmony_ci  107: .    STOP
26947db96d56Sopenharmony_cihighest protocol among opcodes = 1
26957db96d56Sopenharmony_ci
26967db96d56Sopenharmony_ciTry "the canonical" recursive-object test.
26977db96d56Sopenharmony_ci
26987db96d56Sopenharmony_ci>>> L = []
26997db96d56Sopenharmony_ci>>> T = L,
27007db96d56Sopenharmony_ci>>> L.append(T)
27017db96d56Sopenharmony_ci>>> L[0] is T
27027db96d56Sopenharmony_ciTrue
27037db96d56Sopenharmony_ci>>> T[0] is L
27047db96d56Sopenharmony_ciTrue
27057db96d56Sopenharmony_ci>>> L[0][0] is L
27067db96d56Sopenharmony_ciTrue
27077db96d56Sopenharmony_ci>>> T[0][0] is T
27087db96d56Sopenharmony_ciTrue
27097db96d56Sopenharmony_ci>>> dis(pickle.dumps(L, 0))
27107db96d56Sopenharmony_ci    0: (    MARK
27117db96d56Sopenharmony_ci    1: l        LIST       (MARK at 0)
27127db96d56Sopenharmony_ci    2: p    PUT        0
27137db96d56Sopenharmony_ci    5: (    MARK
27147db96d56Sopenharmony_ci    6: g        GET        0
27157db96d56Sopenharmony_ci    9: t        TUPLE      (MARK at 5)
27167db96d56Sopenharmony_ci   10: p    PUT        1
27177db96d56Sopenharmony_ci   13: a    APPEND
27187db96d56Sopenharmony_ci   14: .    STOP
27197db96d56Sopenharmony_cihighest protocol among opcodes = 0
27207db96d56Sopenharmony_ci
27217db96d56Sopenharmony_ci>>> dis(pickle.dumps(L, 1))
27227db96d56Sopenharmony_ci    0: ]    EMPTY_LIST
27237db96d56Sopenharmony_ci    1: q    BINPUT     0
27247db96d56Sopenharmony_ci    3: (    MARK
27257db96d56Sopenharmony_ci    4: h        BINGET     0
27267db96d56Sopenharmony_ci    6: t        TUPLE      (MARK at 3)
27277db96d56Sopenharmony_ci    7: q    BINPUT     1
27287db96d56Sopenharmony_ci    9: a    APPEND
27297db96d56Sopenharmony_ci   10: .    STOP
27307db96d56Sopenharmony_cihighest protocol among opcodes = 1
27317db96d56Sopenharmony_ci
27327db96d56Sopenharmony_ciNote that, in the protocol 0 pickle of the recursive tuple, the disassembler
27337db96d56Sopenharmony_cihas to emulate the stack in order to realize that the POP opcode at 16 gets
27347db96d56Sopenharmony_cirid of the MARK at 0.
27357db96d56Sopenharmony_ci
27367db96d56Sopenharmony_ci>>> dis(pickle.dumps(T, 0))
27377db96d56Sopenharmony_ci    0: (    MARK
27387db96d56Sopenharmony_ci    1: (        MARK
27397db96d56Sopenharmony_ci    2: l            LIST       (MARK at 1)
27407db96d56Sopenharmony_ci    3: p        PUT        0
27417db96d56Sopenharmony_ci    6: (        MARK
27427db96d56Sopenharmony_ci    7: g            GET        0
27437db96d56Sopenharmony_ci   10: t            TUPLE      (MARK at 6)
27447db96d56Sopenharmony_ci   11: p        PUT        1
27457db96d56Sopenharmony_ci   14: a        APPEND
27467db96d56Sopenharmony_ci   15: 0        POP
27477db96d56Sopenharmony_ci   16: 0        POP        (MARK at 0)
27487db96d56Sopenharmony_ci   17: g    GET        1
27497db96d56Sopenharmony_ci   20: .    STOP
27507db96d56Sopenharmony_cihighest protocol among opcodes = 0
27517db96d56Sopenharmony_ci
27527db96d56Sopenharmony_ci>>> dis(pickle.dumps(T, 1))
27537db96d56Sopenharmony_ci    0: (    MARK
27547db96d56Sopenharmony_ci    1: ]        EMPTY_LIST
27557db96d56Sopenharmony_ci    2: q        BINPUT     0
27567db96d56Sopenharmony_ci    4: (        MARK
27577db96d56Sopenharmony_ci    5: h            BINGET     0
27587db96d56Sopenharmony_ci    7: t            TUPLE      (MARK at 4)
27597db96d56Sopenharmony_ci    8: q        BINPUT     1
27607db96d56Sopenharmony_ci   10: a        APPEND
27617db96d56Sopenharmony_ci   11: 1        POP_MARK   (MARK at 0)
27627db96d56Sopenharmony_ci   12: h    BINGET     1
27637db96d56Sopenharmony_ci   14: .    STOP
27647db96d56Sopenharmony_cihighest protocol among opcodes = 1
27657db96d56Sopenharmony_ci
27667db96d56Sopenharmony_ciTry protocol 2.
27677db96d56Sopenharmony_ci
27687db96d56Sopenharmony_ci>>> dis(pickle.dumps(L, 2))
27697db96d56Sopenharmony_ci    0: \x80 PROTO      2
27707db96d56Sopenharmony_ci    2: ]    EMPTY_LIST
27717db96d56Sopenharmony_ci    3: q    BINPUT     0
27727db96d56Sopenharmony_ci    5: h    BINGET     0
27737db96d56Sopenharmony_ci    7: \x85 TUPLE1
27747db96d56Sopenharmony_ci    8: q    BINPUT     1
27757db96d56Sopenharmony_ci   10: a    APPEND
27767db96d56Sopenharmony_ci   11: .    STOP
27777db96d56Sopenharmony_cihighest protocol among opcodes = 2
27787db96d56Sopenharmony_ci
27797db96d56Sopenharmony_ci>>> dis(pickle.dumps(T, 2))
27807db96d56Sopenharmony_ci    0: \x80 PROTO      2
27817db96d56Sopenharmony_ci    2: ]    EMPTY_LIST
27827db96d56Sopenharmony_ci    3: q    BINPUT     0
27837db96d56Sopenharmony_ci    5: h    BINGET     0
27847db96d56Sopenharmony_ci    7: \x85 TUPLE1
27857db96d56Sopenharmony_ci    8: q    BINPUT     1
27867db96d56Sopenharmony_ci   10: a    APPEND
27877db96d56Sopenharmony_ci   11: 0    POP
27887db96d56Sopenharmony_ci   12: h    BINGET     1
27897db96d56Sopenharmony_ci   14: .    STOP
27907db96d56Sopenharmony_cihighest protocol among opcodes = 2
27917db96d56Sopenharmony_ci
27927db96d56Sopenharmony_ciTry protocol 3 with annotations:
27937db96d56Sopenharmony_ci
27947db96d56Sopenharmony_ci>>> dis(pickle.dumps(T, 3), annotate=1)
27957db96d56Sopenharmony_ci    0: \x80 PROTO      3 Protocol version indicator.
27967db96d56Sopenharmony_ci    2: ]    EMPTY_LIST   Push an empty list.
27977db96d56Sopenharmony_ci    3: q    BINPUT     0 Store the stack top into the memo.  The stack is not popped.
27987db96d56Sopenharmony_ci    5: h    BINGET     0 Read an object from the memo and push it on the stack.
27997db96d56Sopenharmony_ci    7: \x85 TUPLE1       Build a one-tuple out of the topmost item on the stack.
28007db96d56Sopenharmony_ci    8: q    BINPUT     1 Store the stack top into the memo.  The stack is not popped.
28017db96d56Sopenharmony_ci   10: a    APPEND       Append an object to a list.
28027db96d56Sopenharmony_ci   11: 0    POP          Discard the top stack item, shrinking the stack by one item.
28037db96d56Sopenharmony_ci   12: h    BINGET     1 Read an object from the memo and push it on the stack.
28047db96d56Sopenharmony_ci   14: .    STOP         Stop the unpickling machine.
28057db96d56Sopenharmony_cihighest protocol among opcodes = 2
28067db96d56Sopenharmony_ci
28077db96d56Sopenharmony_ci"""
28087db96d56Sopenharmony_ci
28097db96d56Sopenharmony_ci_memo_test = r"""
28107db96d56Sopenharmony_ci>>> import pickle
28117db96d56Sopenharmony_ci>>> import io
28127db96d56Sopenharmony_ci>>> f = io.BytesIO()
28137db96d56Sopenharmony_ci>>> p = pickle.Pickler(f, 2)
28147db96d56Sopenharmony_ci>>> x = [1, 2, 3]
28157db96d56Sopenharmony_ci>>> p.dump(x)
28167db96d56Sopenharmony_ci>>> p.dump(x)
28177db96d56Sopenharmony_ci>>> f.seek(0)
28187db96d56Sopenharmony_ci0
28197db96d56Sopenharmony_ci>>> memo = {}
28207db96d56Sopenharmony_ci>>> dis(f, memo=memo)
28217db96d56Sopenharmony_ci    0: \x80 PROTO      2
28227db96d56Sopenharmony_ci    2: ]    EMPTY_LIST
28237db96d56Sopenharmony_ci    3: q    BINPUT     0
28247db96d56Sopenharmony_ci    5: (    MARK
28257db96d56Sopenharmony_ci    6: K        BININT1    1
28267db96d56Sopenharmony_ci    8: K        BININT1    2
28277db96d56Sopenharmony_ci   10: K        BININT1    3
28287db96d56Sopenharmony_ci   12: e        APPENDS    (MARK at 5)
28297db96d56Sopenharmony_ci   13: .    STOP
28307db96d56Sopenharmony_cihighest protocol among opcodes = 2
28317db96d56Sopenharmony_ci>>> dis(f, memo=memo)
28327db96d56Sopenharmony_ci   14: \x80 PROTO      2
28337db96d56Sopenharmony_ci   16: h    BINGET     0
28347db96d56Sopenharmony_ci   18: .    STOP
28357db96d56Sopenharmony_cihighest protocol among opcodes = 2
28367db96d56Sopenharmony_ci"""
28377db96d56Sopenharmony_ci
28387db96d56Sopenharmony_ci__test__ = {'disassembler_test': _dis_test,
28397db96d56Sopenharmony_ci            'disassembler_memo_test': _memo_test,
28407db96d56Sopenharmony_ci           }
28417db96d56Sopenharmony_ci
28427db96d56Sopenharmony_cidef _test():
28437db96d56Sopenharmony_ci    import doctest
28447db96d56Sopenharmony_ci    return doctest.testmod()
28457db96d56Sopenharmony_ci
28467db96d56Sopenharmony_ciif __name__ == "__main__":
28477db96d56Sopenharmony_ci    import argparse
28487db96d56Sopenharmony_ci    parser = argparse.ArgumentParser(
28497db96d56Sopenharmony_ci        description='disassemble one or more pickle files')
28507db96d56Sopenharmony_ci    parser.add_argument(
28517db96d56Sopenharmony_ci        'pickle_file', type=argparse.FileType('br'),
28527db96d56Sopenharmony_ci        nargs='*', help='the pickle file')
28537db96d56Sopenharmony_ci    parser.add_argument(
28547db96d56Sopenharmony_ci        '-o', '--output', default=sys.stdout, type=argparse.FileType('w'),
28557db96d56Sopenharmony_ci        help='the file where the output should be written')
28567db96d56Sopenharmony_ci    parser.add_argument(
28577db96d56Sopenharmony_ci        '-m', '--memo', action='store_true',
28587db96d56Sopenharmony_ci        help='preserve memo between disassemblies')
28597db96d56Sopenharmony_ci    parser.add_argument(
28607db96d56Sopenharmony_ci        '-l', '--indentlevel', default=4, type=int,
28617db96d56Sopenharmony_ci        help='the number of blanks by which to indent a new MARK level')
28627db96d56Sopenharmony_ci    parser.add_argument(
28637db96d56Sopenharmony_ci        '-a', '--annotate',  action='store_true',
28647db96d56Sopenharmony_ci        help='annotate each line with a short opcode description')
28657db96d56Sopenharmony_ci    parser.add_argument(
28667db96d56Sopenharmony_ci        '-p', '--preamble', default="==> {name} <==",
28677db96d56Sopenharmony_ci        help='if more than one pickle file is specified, print this before'
28687db96d56Sopenharmony_ci        ' each disassembly')
28697db96d56Sopenharmony_ci    parser.add_argument(
28707db96d56Sopenharmony_ci        '-t', '--test', action='store_true',
28717db96d56Sopenharmony_ci        help='run self-test suite')
28727db96d56Sopenharmony_ci    parser.add_argument(
28737db96d56Sopenharmony_ci        '-v', action='store_true',
28747db96d56Sopenharmony_ci        help='run verbosely; only affects self-test run')
28757db96d56Sopenharmony_ci    args = parser.parse_args()
28767db96d56Sopenharmony_ci    if args.test:
28777db96d56Sopenharmony_ci        _test()
28787db96d56Sopenharmony_ci    else:
28797db96d56Sopenharmony_ci        annotate = 30 if args.annotate else 0
28807db96d56Sopenharmony_ci        if not args.pickle_file:
28817db96d56Sopenharmony_ci            parser.print_help()
28827db96d56Sopenharmony_ci        elif len(args.pickle_file) == 1:
28837db96d56Sopenharmony_ci            dis(args.pickle_file[0], args.output, None,
28847db96d56Sopenharmony_ci                args.indentlevel, annotate)
28857db96d56Sopenharmony_ci        else:
28867db96d56Sopenharmony_ci            memo = {} if args.memo else None
28877db96d56Sopenharmony_ci            for f in args.pickle_file:
28887db96d56Sopenharmony_ci                preamble = args.preamble.format(name=f.name)
28897db96d56Sopenharmony_ci                args.output.write(preamble + '\n')
28907db96d56Sopenharmony_ci                dis(f, args.output, memo, args.indentlevel, annotate)
2891