xref: /third_party/python/Modules/_pickle.c (revision 7db96d56)
1/* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7#ifndef Py_BUILD_CORE_BUILTIN
8#  define Py_BUILD_CORE_MODULE 1
9#endif
10
11#include "Python.h"
12#include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
13#include "pycore_moduleobject.h"  // _PyModule_GetState()
14#include "pycore_runtime.h"       // _Py_ID()
15#include "pycore_pystate.h"       // _PyThreadState_GET()
16#include "structmember.h"         // PyMemberDef
17
18#include <stdlib.h>               // strtol()
19
20PyDoc_STRVAR(pickle_module_doc,
21"Optimized C implementation for the Python pickle module.");
22
23/*[clinic input]
24module _pickle
25class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
26class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
27class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
28class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
29[clinic start generated code]*/
30/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
31
32/* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
33   Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
34   already includes it. */
35enum {
36    HIGHEST_PROTOCOL = 5,
37    DEFAULT_PROTOCOL = 4
38};
39
40#ifdef MS_WINDOWS
41// These are already typedefs from windows.h, pulled in via pycore_runtime.h.
42#define FLOAT FLOAT_
43#define INT INT_
44#define LONG LONG_
45#endif
46
47/* Pickle opcodes. These must be kept updated with pickle.py.
48   Extensive docs are in pickletools.py. */
49enum opcode {
50    MARK            = '(',
51    STOP            = '.',
52    POP             = '0',
53    POP_MARK        = '1',
54    DUP             = '2',
55    FLOAT           = 'F',
56    INT             = 'I',
57    BININT          = 'J',
58    BININT1         = 'K',
59    LONG            = 'L',
60    BININT2         = 'M',
61    NONE            = 'N',
62    PERSID          = 'P',
63    BINPERSID       = 'Q',
64    REDUCE          = 'R',
65    STRING          = 'S',
66    BINSTRING       = 'T',
67    SHORT_BINSTRING = 'U',
68    UNICODE         = 'V',
69    BINUNICODE      = 'X',
70    APPEND          = 'a',
71    BUILD           = 'b',
72    GLOBAL          = 'c',
73    DICT            = 'd',
74    EMPTY_DICT      = '}',
75    APPENDS         = 'e',
76    GET             = 'g',
77    BINGET          = 'h',
78    INST            = 'i',
79    LONG_BINGET     = 'j',
80    LIST            = 'l',
81    EMPTY_LIST      = ']',
82    OBJ             = 'o',
83    PUT             = 'p',
84    BINPUT          = 'q',
85    LONG_BINPUT     = 'r',
86    SETITEM         = 's',
87    TUPLE           = 't',
88    EMPTY_TUPLE     = ')',
89    SETITEMS        = 'u',
90    BINFLOAT        = 'G',
91
92    /* Protocol 2. */
93    PROTO       = '\x80',
94    NEWOBJ      = '\x81',
95    EXT1        = '\x82',
96    EXT2        = '\x83',
97    EXT4        = '\x84',
98    TUPLE1      = '\x85',
99    TUPLE2      = '\x86',
100    TUPLE3      = '\x87',
101    NEWTRUE     = '\x88',
102    NEWFALSE    = '\x89',
103    LONG1       = '\x8a',
104    LONG4       = '\x8b',
105
106    /* Protocol 3 (Python 3.x) */
107    BINBYTES       = 'B',
108    SHORT_BINBYTES = 'C',
109
110    /* Protocol 4 */
111    SHORT_BINUNICODE = '\x8c',
112    BINUNICODE8      = '\x8d',
113    BINBYTES8        = '\x8e',
114    EMPTY_SET        = '\x8f',
115    ADDITEMS         = '\x90',
116    FROZENSET        = '\x91',
117    NEWOBJ_EX        = '\x92',
118    STACK_GLOBAL     = '\x93',
119    MEMOIZE          = '\x94',
120    FRAME            = '\x95',
121
122    /* Protocol 5 */
123    BYTEARRAY8       = '\x96',
124    NEXT_BUFFER      = '\x97',
125    READONLY_BUFFER  = '\x98'
126};
127
128enum {
129   /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
130      batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
131      break if this gets out of synch with pickle.py, but it's unclear that would
132      help anything either. */
133    BATCHSIZE = 1000,
134
135    /* Nesting limit until Pickler, when running in "fast mode", starts
136       checking for self-referential data-structures. */
137    FAST_NESTING_LIMIT = 50,
138
139    /* Initial size of the write buffer of Pickler. */
140    WRITE_BUF_SIZE = 4096,
141
142    /* Prefetch size when unpickling (disabled on unpeekable streams) */
143    PREFETCH = 8192 * 16,
144
145    FRAME_SIZE_MIN = 4,
146    FRAME_SIZE_TARGET = 64 * 1024,
147    FRAME_HEADER_SIZE = 9
148};
149
150/*************************************************************************/
151
152/* State of the pickle module, per PEP 3121. */
153typedef struct {
154    /* Exception classes for pickle. */
155    PyObject *PickleError;
156    PyObject *PicklingError;
157    PyObject *UnpicklingError;
158
159    /* copyreg.dispatch_table, {type_object: pickling_function} */
160    PyObject *dispatch_table;
161
162    /* For the extension opcodes EXT1, EXT2 and EXT4. */
163
164    /* copyreg._extension_registry, {(module_name, function_name): code} */
165    PyObject *extension_registry;
166    /* copyreg._extension_cache, {code: object} */
167    PyObject *extension_cache;
168    /* copyreg._inverted_registry, {code: (module_name, function_name)} */
169    PyObject *inverted_registry;
170
171    /* Import mappings for compatibility with Python 2.x */
172
173    /* _compat_pickle.NAME_MAPPING,
174       {(oldmodule, oldname): (newmodule, newname)} */
175    PyObject *name_mapping_2to3;
176    /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
177    PyObject *import_mapping_2to3;
178    /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
179    PyObject *name_mapping_3to2;
180    PyObject *import_mapping_3to2;
181
182    /* codecs.encode, used for saving bytes in older protocols */
183    PyObject *codecs_encode;
184    /* builtins.getattr, used for saving nested names with protocol < 4 */
185    PyObject *getattr;
186    /* functools.partial, used for implementing __newobj_ex__ with protocols
187       2 and 3 */
188    PyObject *partial;
189} PickleState;
190
191/* Forward declaration of the _pickle module definition. */
192static struct PyModuleDef _picklemodule;
193
194/* Given a module object, get its per-module state. */
195static PickleState *
196_Pickle_GetState(PyObject *module)
197{
198    return (PickleState *)_PyModule_GetState(module);
199}
200
201/* Find the module instance imported in the currently running sub-interpreter
202   and get its state. */
203static PickleState *
204_Pickle_GetGlobalState(void)
205{
206    return _Pickle_GetState(PyState_FindModule(&_picklemodule));
207}
208
209/* Clear the given pickle module state. */
210static void
211_Pickle_ClearState(PickleState *st)
212{
213    Py_CLEAR(st->PickleError);
214    Py_CLEAR(st->PicklingError);
215    Py_CLEAR(st->UnpicklingError);
216    Py_CLEAR(st->dispatch_table);
217    Py_CLEAR(st->extension_registry);
218    Py_CLEAR(st->extension_cache);
219    Py_CLEAR(st->inverted_registry);
220    Py_CLEAR(st->name_mapping_2to3);
221    Py_CLEAR(st->import_mapping_2to3);
222    Py_CLEAR(st->name_mapping_3to2);
223    Py_CLEAR(st->import_mapping_3to2);
224    Py_CLEAR(st->codecs_encode);
225    Py_CLEAR(st->getattr);
226    Py_CLEAR(st->partial);
227}
228
229/* Initialize the given pickle module state. */
230static int
231_Pickle_InitState(PickleState *st)
232{
233    PyObject *copyreg = NULL;
234    PyObject *compat_pickle = NULL;
235    PyObject *codecs = NULL;
236    PyObject *functools = NULL;
237
238    st->getattr = _PyEval_GetBuiltin(&_Py_ID(getattr));
239    if (st->getattr == NULL)
240        goto error;
241
242    copyreg = PyImport_ImportModule("copyreg");
243    if (!copyreg)
244        goto error;
245    st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
246    if (!st->dispatch_table)
247        goto error;
248    if (!PyDict_CheckExact(st->dispatch_table)) {
249        PyErr_Format(PyExc_RuntimeError,
250                     "copyreg.dispatch_table should be a dict, not %.200s",
251                     Py_TYPE(st->dispatch_table)->tp_name);
252        goto error;
253    }
254    st->extension_registry = \
255        PyObject_GetAttrString(copyreg, "_extension_registry");
256    if (!st->extension_registry)
257        goto error;
258    if (!PyDict_CheckExact(st->extension_registry)) {
259        PyErr_Format(PyExc_RuntimeError,
260                     "copyreg._extension_registry should be a dict, "
261                     "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
262        goto error;
263    }
264    st->inverted_registry = \
265        PyObject_GetAttrString(copyreg, "_inverted_registry");
266    if (!st->inverted_registry)
267        goto error;
268    if (!PyDict_CheckExact(st->inverted_registry)) {
269        PyErr_Format(PyExc_RuntimeError,
270                     "copyreg._inverted_registry should be a dict, "
271                     "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
272        goto error;
273    }
274    st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
275    if (!st->extension_cache)
276        goto error;
277    if (!PyDict_CheckExact(st->extension_cache)) {
278        PyErr_Format(PyExc_RuntimeError,
279                     "copyreg._extension_cache should be a dict, "
280                     "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
281        goto error;
282    }
283    Py_CLEAR(copyreg);
284
285    /* Load the 2.x -> 3.x stdlib module mapping tables */
286    compat_pickle = PyImport_ImportModule("_compat_pickle");
287    if (!compat_pickle)
288        goto error;
289    st->name_mapping_2to3 = \
290        PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
291    if (!st->name_mapping_2to3)
292        goto error;
293    if (!PyDict_CheckExact(st->name_mapping_2to3)) {
294        PyErr_Format(PyExc_RuntimeError,
295                     "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
296                     Py_TYPE(st->name_mapping_2to3)->tp_name);
297        goto error;
298    }
299    st->import_mapping_2to3 = \
300        PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
301    if (!st->import_mapping_2to3)
302        goto error;
303    if (!PyDict_CheckExact(st->import_mapping_2to3)) {
304        PyErr_Format(PyExc_RuntimeError,
305                     "_compat_pickle.IMPORT_MAPPING should be a dict, "
306                     "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
307        goto error;
308    }
309    /* ... and the 3.x -> 2.x mapping tables */
310    st->name_mapping_3to2 = \
311        PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
312    if (!st->name_mapping_3to2)
313        goto error;
314    if (!PyDict_CheckExact(st->name_mapping_3to2)) {
315        PyErr_Format(PyExc_RuntimeError,
316                     "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
317                     "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
318        goto error;
319    }
320    st->import_mapping_3to2 = \
321        PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
322    if (!st->import_mapping_3to2)
323        goto error;
324    if (!PyDict_CheckExact(st->import_mapping_3to2)) {
325        PyErr_Format(PyExc_RuntimeError,
326                     "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
327                     "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
328        goto error;
329    }
330    Py_CLEAR(compat_pickle);
331
332    codecs = PyImport_ImportModule("codecs");
333    if (codecs == NULL)
334        goto error;
335    st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
336    if (st->codecs_encode == NULL) {
337        goto error;
338    }
339    if (!PyCallable_Check(st->codecs_encode)) {
340        PyErr_Format(PyExc_RuntimeError,
341                     "codecs.encode should be a callable, not %.200s",
342                     Py_TYPE(st->codecs_encode)->tp_name);
343        goto error;
344    }
345    Py_CLEAR(codecs);
346
347    functools = PyImport_ImportModule("functools");
348    if (!functools)
349        goto error;
350    st->partial = PyObject_GetAttrString(functools, "partial");
351    if (!st->partial)
352        goto error;
353    Py_CLEAR(functools);
354
355    return 0;
356
357  error:
358    Py_CLEAR(copyreg);
359    Py_CLEAR(compat_pickle);
360    Py_CLEAR(codecs);
361    Py_CLEAR(functools);
362    _Pickle_ClearState(st);
363    return -1;
364}
365
366/* Helper for calling a function with a single argument quickly.
367
368   This function steals the reference of the given argument. */
369static PyObject *
370_Pickle_FastCall(PyObject *func, PyObject *obj)
371{
372    PyObject *result;
373
374    result = PyObject_CallOneArg(func, obj);
375    Py_DECREF(obj);
376    return result;
377}
378
379/*************************************************************************/
380
381/* Retrieve and deconstruct a method for avoiding a reference cycle
382   (pickler -> bound method of pickler -> pickler) */
383static int
384init_method_ref(PyObject *self, PyObject *name,
385                PyObject **method_func, PyObject **method_self)
386{
387    PyObject *func, *func2;
388    int ret;
389
390    /* *method_func and *method_self should be consistent.  All refcount decrements
391       should be occurred after setting *method_self and *method_func. */
392    ret = _PyObject_LookupAttr(self, name, &func);
393    if (func == NULL) {
394        *method_self = NULL;
395        Py_CLEAR(*method_func);
396        return ret;
397    }
398
399    if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
400        /* Deconstruct a bound Python method */
401        func2 = PyMethod_GET_FUNCTION(func);
402        Py_INCREF(func2);
403        *method_self = self; /* borrowed */
404        Py_XSETREF(*method_func, func2);
405        Py_DECREF(func);
406        return 0;
407    }
408    else {
409        *method_self = NULL;
410        Py_XSETREF(*method_func, func);
411        return 0;
412    }
413}
414
415/* Bind a method if it was deconstructed */
416static PyObject *
417reconstruct_method(PyObject *func, PyObject *self)
418{
419    if (self) {
420        return PyMethod_New(func, self);
421    }
422    else {
423        Py_INCREF(func);
424        return func;
425    }
426}
427
428static PyObject *
429call_method(PyObject *func, PyObject *self, PyObject *obj)
430{
431    if (self) {
432        return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
433    }
434    else {
435        return PyObject_CallOneArg(func, obj);
436    }
437}
438
439/*************************************************************************/
440
441/* Internal data type used as the unpickling stack. */
442typedef struct {
443    PyObject_VAR_HEAD
444    PyObject **data;
445    int mark_set;          /* is MARK set? */
446    Py_ssize_t fence;      /* position of top MARK or 0 */
447    Py_ssize_t allocated;  /* number of slots in data allocated */
448} Pdata;
449
450static void
451Pdata_dealloc(Pdata *self)
452{
453    Py_ssize_t i = Py_SIZE(self);
454    while (--i >= 0) {
455        Py_DECREF(self->data[i]);
456    }
457    PyMem_Free(self->data);
458    PyObject_Free(self);
459}
460
461static PyTypeObject Pdata_Type = {
462    PyVarObject_HEAD_INIT(NULL, 0)
463    "_pickle.Pdata",              /*tp_name*/
464    sizeof(Pdata),                /*tp_basicsize*/
465    sizeof(PyObject *),           /*tp_itemsize*/
466    (destructor)Pdata_dealloc,    /*tp_dealloc*/
467};
468
469static PyObject *
470Pdata_New(void)
471{
472    Pdata *self;
473
474    if (!(self = PyObject_New(Pdata, &Pdata_Type)))
475        return NULL;
476    Py_SET_SIZE(self, 0);
477    self->mark_set = 0;
478    self->fence = 0;
479    self->allocated = 8;
480    self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
481    if (self->data)
482        return (PyObject *)self;
483    Py_DECREF(self);
484    return PyErr_NoMemory();
485}
486
487
488/* Retain only the initial clearto items.  If clearto >= the current
489 * number of items, this is a (non-erroneous) NOP.
490 */
491static int
492Pdata_clear(Pdata *self, Py_ssize_t clearto)
493{
494    Py_ssize_t i = Py_SIZE(self);
495
496    assert(clearto >= self->fence);
497    if (clearto >= i)
498        return 0;
499
500    while (--i >= clearto) {
501        Py_CLEAR(self->data[i]);
502    }
503    Py_SET_SIZE(self, clearto);
504    return 0;
505}
506
507static int
508Pdata_grow(Pdata *self)
509{
510    PyObject **data = self->data;
511    size_t allocated = (size_t)self->allocated;
512    size_t new_allocated;
513
514    new_allocated = (allocated >> 3) + 6;
515    /* check for integer overflow */
516    if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
517        goto nomemory;
518    new_allocated += allocated;
519    PyMem_RESIZE(data, PyObject *, new_allocated);
520    if (data == NULL)
521        goto nomemory;
522
523    self->data = data;
524    self->allocated = (Py_ssize_t)new_allocated;
525    return 0;
526
527  nomemory:
528    PyErr_NoMemory();
529    return -1;
530}
531
532static int
533Pdata_stack_underflow(Pdata *self)
534{
535    PickleState *st = _Pickle_GetGlobalState();
536    PyErr_SetString(st->UnpicklingError,
537                    self->mark_set ?
538                    "unexpected MARK found" :
539                    "unpickling stack underflow");
540    return -1;
541}
542
543/* D is a Pdata*.  Pop the topmost element and store it into V, which
544 * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
545 * is raised and V is set to NULL.
546 */
547static PyObject *
548Pdata_pop(Pdata *self)
549{
550    if (Py_SIZE(self) <= self->fence) {
551        Pdata_stack_underflow(self);
552        return NULL;
553    }
554    Py_SET_SIZE(self, Py_SIZE(self) - 1);
555    return self->data[Py_SIZE(self)];
556}
557#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
558
559static int
560Pdata_push(Pdata *self, PyObject *obj)
561{
562    if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
563        return -1;
564    }
565    self->data[Py_SIZE(self)] = obj;
566    Py_SET_SIZE(self, Py_SIZE(self) + 1);
567    return 0;
568}
569
570/* Push an object on stack, transferring its ownership to the stack. */
571#define PDATA_PUSH(D, O, ER) do {                               \
572        if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
573
574/* Push an object on stack, adding a new reference to the object. */
575#define PDATA_APPEND(D, O, ER) do {                             \
576        Py_INCREF((O));                                         \
577        if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
578
579static PyObject *
580Pdata_poptuple(Pdata *self, Py_ssize_t start)
581{
582    PyObject *tuple;
583    Py_ssize_t len, i, j;
584
585    if (start < self->fence) {
586        Pdata_stack_underflow(self);
587        return NULL;
588    }
589    len = Py_SIZE(self) - start;
590    tuple = PyTuple_New(len);
591    if (tuple == NULL)
592        return NULL;
593    for (i = start, j = 0; j < len; i++, j++)
594        PyTuple_SET_ITEM(tuple, j, self->data[i]);
595
596    Py_SET_SIZE(self, start);
597    return tuple;
598}
599
600static PyObject *
601Pdata_poplist(Pdata *self, Py_ssize_t start)
602{
603    PyObject *list;
604    Py_ssize_t len, i, j;
605
606    len = Py_SIZE(self) - start;
607    list = PyList_New(len);
608    if (list == NULL)
609        return NULL;
610    for (i = start, j = 0; j < len; i++, j++)
611        PyList_SET_ITEM(list, j, self->data[i]);
612
613    Py_SET_SIZE(self, start);
614    return list;
615}
616
617typedef struct {
618    PyObject *me_key;
619    Py_ssize_t me_value;
620} PyMemoEntry;
621
622typedef struct {
623    size_t mt_mask;
624    size_t mt_used;
625    size_t mt_allocated;
626    PyMemoEntry *mt_table;
627} PyMemoTable;
628
629typedef struct PicklerObject {
630    PyObject_HEAD
631    PyMemoTable *memo;          /* Memo table, keep track of the seen
632                                   objects to support self-referential objects
633                                   pickling. */
634    PyObject *pers_func;        /* persistent_id() method, can be NULL */
635    PyObject *pers_func_self;   /* borrowed reference to self if pers_func
636                                   is an unbound method, NULL otherwise */
637    PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
638    PyObject *reducer_override; /* hook for invoking user-defined callbacks
639                                   instead of save_global when pickling
640                                   functions and classes*/
641
642    PyObject *write;            /* write() method of the output stream. */
643    PyObject *output_buffer;    /* Write into a local bytearray buffer before
644                                   flushing to the stream. */
645    Py_ssize_t output_len;      /* Length of output_buffer. */
646    Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
647    int proto;                  /* Pickle protocol number, >= 0 */
648    int bin;                    /* Boolean, true if proto > 0 */
649    int framing;                /* True when framing is enabled, proto >= 4 */
650    Py_ssize_t frame_start;     /* Position in output_buffer where the
651                                   current frame begins. -1 if there
652                                   is no frame currently open. */
653
654    Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
655    int fast;                   /* Enable fast mode if set to a true value.
656                                   The fast mode disable the usage of memo,
657                                   therefore speeding the pickling process by
658                                   not generating superfluous PUT opcodes. It
659                                   should not be used if with self-referential
660                                   objects. */
661    int fast_nesting;
662    int fix_imports;            /* Indicate whether Pickler should fix
663                                   the name of globals for Python 2.x. */
664    PyObject *fast_memo;
665    PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
666} PicklerObject;
667
668typedef struct UnpicklerObject {
669    PyObject_HEAD
670    Pdata *stack;               /* Pickle data stack, store unpickled objects. */
671
672    /* The unpickler memo is just an array of PyObject *s. Using a dict
673       is unnecessary, since the keys are contiguous ints. */
674    PyObject **memo;
675    size_t memo_size;       /* Capacity of the memo array */
676    size_t memo_len;        /* Number of objects in the memo */
677
678    PyObject *pers_func;        /* persistent_load() method, can be NULL. */
679    PyObject *pers_func_self;   /* borrowed reference to self if pers_func
680                                   is an unbound method, NULL otherwise */
681
682    Py_buffer buffer;
683    char *input_buffer;
684    char *input_line;
685    Py_ssize_t input_len;
686    Py_ssize_t next_read_idx;
687    Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
688
689    PyObject *read;             /* read() method of the input stream. */
690    PyObject *readinto;         /* readinto() method of the input stream. */
691    PyObject *readline;         /* readline() method of the input stream. */
692    PyObject *peek;             /* peek() method of the input stream, or NULL */
693    PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
694
695    char *encoding;             /* Name of the encoding to be used for
696                                   decoding strings pickled using Python
697                                   2.x. The default value is "ASCII" */
698    char *errors;               /* Name of errors handling scheme to used when
699                                   decoding strings. The default value is
700                                   "strict". */
701    Py_ssize_t *marks;          /* Mark stack, used for unpickling container
702                                   objects. */
703    Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
704    Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
705    int proto;                  /* Protocol of the pickle loaded. */
706    int fix_imports;            /* Indicate whether Unpickler should fix
707                                   the name of globals pickled by Python 2.x. */
708} UnpicklerObject;
709
710typedef struct {
711    PyObject_HEAD
712    PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
713}  PicklerMemoProxyObject;
714
715typedef struct {
716    PyObject_HEAD
717    UnpicklerObject *unpickler;
718} UnpicklerMemoProxyObject;
719
720/* Forward declarations */
721static int save(PicklerObject *, PyObject *, int);
722static int save_reduce(PicklerObject *, PyObject *, PyObject *);
723static PyTypeObject Pickler_Type;
724static PyTypeObject Unpickler_Type;
725
726#include "clinic/_pickle.c.h"
727
728/*************************************************************************
729 A custom hashtable mapping void* to Python ints. This is used by the pickler
730 for memoization. Using a custom hashtable rather than PyDict allows us to skip
731 a bunch of unnecessary object creation. This makes a huge performance
732 difference. */
733
734#define MT_MINSIZE 8
735#define PERTURB_SHIFT 5
736
737
738static PyMemoTable *
739PyMemoTable_New(void)
740{
741    PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
742    if (memo == NULL) {
743        PyErr_NoMemory();
744        return NULL;
745    }
746
747    memo->mt_used = 0;
748    memo->mt_allocated = MT_MINSIZE;
749    memo->mt_mask = MT_MINSIZE - 1;
750    memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
751    if (memo->mt_table == NULL) {
752        PyMem_Free(memo);
753        PyErr_NoMemory();
754        return NULL;
755    }
756    memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
757
758    return memo;
759}
760
761static PyMemoTable *
762PyMemoTable_Copy(PyMemoTable *self)
763{
764    PyMemoTable *new = PyMemoTable_New();
765    if (new == NULL)
766        return NULL;
767
768    new->mt_used = self->mt_used;
769    new->mt_allocated = self->mt_allocated;
770    new->mt_mask = self->mt_mask;
771    /* The table we get from _New() is probably smaller than we wanted.
772       Free it and allocate one that's the right size. */
773    PyMem_Free(new->mt_table);
774    new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
775    if (new->mt_table == NULL) {
776        PyMem_Free(new);
777        PyErr_NoMemory();
778        return NULL;
779    }
780    for (size_t i = 0; i < self->mt_allocated; i++) {
781        Py_XINCREF(self->mt_table[i].me_key);
782    }
783    memcpy(new->mt_table, self->mt_table,
784           sizeof(PyMemoEntry) * self->mt_allocated);
785
786    return new;
787}
788
789static Py_ssize_t
790PyMemoTable_Size(PyMemoTable *self)
791{
792    return self->mt_used;
793}
794
795static int
796PyMemoTable_Clear(PyMemoTable *self)
797{
798    Py_ssize_t i = self->mt_allocated;
799
800    while (--i >= 0) {
801        Py_XDECREF(self->mt_table[i].me_key);
802    }
803    self->mt_used = 0;
804    memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
805    return 0;
806}
807
808static void
809PyMemoTable_Del(PyMemoTable *self)
810{
811    if (self == NULL)
812        return;
813    PyMemoTable_Clear(self);
814
815    PyMem_Free(self->mt_table);
816    PyMem_Free(self);
817}
818
819/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
820   can be considerably simpler than dictobject.c's lookdict(). */
821static PyMemoEntry *
822_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
823{
824    size_t i;
825    size_t perturb;
826    size_t mask = self->mt_mask;
827    PyMemoEntry *table = self->mt_table;
828    PyMemoEntry *entry;
829    Py_hash_t hash = (Py_hash_t)key >> 3;
830
831    i = hash & mask;
832    entry = &table[i];
833    if (entry->me_key == NULL || entry->me_key == key)
834        return entry;
835
836    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
837        i = (i << 2) + i + perturb + 1;
838        entry = &table[i & mask];
839        if (entry->me_key == NULL || entry->me_key == key)
840            return entry;
841    }
842    Py_UNREACHABLE();
843}
844
845/* Returns -1 on failure, 0 on success. */
846static int
847_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
848{
849    PyMemoEntry *oldtable = NULL;
850    PyMemoEntry *oldentry, *newentry;
851    size_t new_size = MT_MINSIZE;
852    size_t to_process;
853
854    assert(min_size > 0);
855
856    if (min_size > PY_SSIZE_T_MAX) {
857        PyErr_NoMemory();
858        return -1;
859    }
860
861    /* Find the smallest valid table size >= min_size. */
862    while (new_size < min_size) {
863        new_size <<= 1;
864    }
865    /* new_size needs to be a power of two. */
866    assert((new_size & (new_size - 1)) == 0);
867
868    /* Allocate new table. */
869    oldtable = self->mt_table;
870    self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
871    if (self->mt_table == NULL) {
872        self->mt_table = oldtable;
873        PyErr_NoMemory();
874        return -1;
875    }
876    self->mt_allocated = new_size;
877    self->mt_mask = new_size - 1;
878    memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
879
880    /* Copy entries from the old table. */
881    to_process = self->mt_used;
882    for (oldentry = oldtable; to_process > 0; oldentry++) {
883        if (oldentry->me_key != NULL) {
884            to_process--;
885            /* newentry is a pointer to a chunk of the new
886               mt_table, so we're setting the key:value pair
887               in-place. */
888            newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
889            newentry->me_key = oldentry->me_key;
890            newentry->me_value = oldentry->me_value;
891        }
892    }
893
894    /* Deallocate the old table. */
895    PyMem_Free(oldtable);
896    return 0;
897}
898
899/* Returns NULL on failure, a pointer to the value otherwise. */
900static Py_ssize_t *
901PyMemoTable_Get(PyMemoTable *self, PyObject *key)
902{
903    PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
904    if (entry->me_key == NULL)
905        return NULL;
906    return &entry->me_value;
907}
908
909/* Returns -1 on failure, 0 on success. */
910static int
911PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
912{
913    PyMemoEntry *entry;
914
915    assert(key != NULL);
916
917    entry = _PyMemoTable_Lookup(self, key);
918    if (entry->me_key != NULL) {
919        entry->me_value = value;
920        return 0;
921    }
922    Py_INCREF(key);
923    entry->me_key = key;
924    entry->me_value = value;
925    self->mt_used++;
926
927    /* If we added a key, we can safely resize. Otherwise just return!
928     * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
929     *
930     * Quadrupling the size improves average table sparseness
931     * (reducing collisions) at the cost of some memory. It also halves
932     * the number of expensive resize operations in a growing memo table.
933     *
934     * Very large memo tables (over 50K items) use doubling instead.
935     * This may help applications with severe memory constraints.
936     */
937    if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
938        return 0;
939    }
940    // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
941    size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
942    return _PyMemoTable_ResizeTable(self, desired_size);
943}
944
945#undef MT_MINSIZE
946#undef PERTURB_SHIFT
947
948/*************************************************************************/
949
950
951static int
952_Pickler_ClearBuffer(PicklerObject *self)
953{
954    Py_XSETREF(self->output_buffer,
955              PyBytes_FromStringAndSize(NULL, self->max_output_len));
956    if (self->output_buffer == NULL)
957        return -1;
958    self->output_len = 0;
959    self->frame_start = -1;
960    return 0;
961}
962
963static void
964_write_size64(char *out, size_t value)
965{
966    size_t i;
967
968    static_assert(sizeof(size_t) <= 8, "size_t is larger than 64-bit");
969
970    for (i = 0; i < sizeof(size_t); i++) {
971        out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
972    }
973    for (i = sizeof(size_t); i < 8; i++) {
974        out[i] = 0;
975    }
976}
977
978static int
979_Pickler_CommitFrame(PicklerObject *self)
980{
981    size_t frame_len;
982    char *qdata;
983
984    if (!self->framing || self->frame_start == -1)
985        return 0;
986    frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
987    qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
988    if (frame_len >= FRAME_SIZE_MIN) {
989        qdata[0] = FRAME;
990        _write_size64(qdata + 1, frame_len);
991    }
992    else {
993        memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
994        self->output_len -= FRAME_HEADER_SIZE;
995    }
996    self->frame_start = -1;
997    return 0;
998}
999
1000static PyObject *
1001_Pickler_GetString(PicklerObject *self)
1002{
1003    PyObject *output_buffer = self->output_buffer;
1004
1005    assert(self->output_buffer != NULL);
1006
1007    if (_Pickler_CommitFrame(self))
1008        return NULL;
1009
1010    self->output_buffer = NULL;
1011    /* Resize down to exact size */
1012    if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1013        return NULL;
1014    return output_buffer;
1015}
1016
1017static int
1018_Pickler_FlushToFile(PicklerObject *self)
1019{
1020    PyObject *output, *result;
1021
1022    assert(self->write != NULL);
1023
1024    /* This will commit the frame first */
1025    output = _Pickler_GetString(self);
1026    if (output == NULL)
1027        return -1;
1028
1029    result = _Pickle_FastCall(self->write, output);
1030    Py_XDECREF(result);
1031    return (result == NULL) ? -1 : 0;
1032}
1033
1034static int
1035_Pickler_OpcodeBoundary(PicklerObject *self)
1036{
1037    Py_ssize_t frame_len;
1038
1039    if (!self->framing || self->frame_start == -1) {
1040        return 0;
1041    }
1042    frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1043    if (frame_len >= FRAME_SIZE_TARGET) {
1044        if(_Pickler_CommitFrame(self)) {
1045            return -1;
1046        }
1047        /* Flush the content of the committed frame to the underlying
1048         * file and reuse the pickler buffer for the next frame so as
1049         * to limit memory usage when dumping large complex objects to
1050         * a file.
1051         *
1052         * self->write is NULL when called via dumps.
1053         */
1054        if (self->write != NULL) {
1055            if (_Pickler_FlushToFile(self) < 0) {
1056                return -1;
1057            }
1058            if (_Pickler_ClearBuffer(self) < 0) {
1059                return -1;
1060            }
1061        }
1062    }
1063    return 0;
1064}
1065
1066static Py_ssize_t
1067_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1068{
1069    Py_ssize_t i, n, required;
1070    char *buffer;
1071    int need_new_frame;
1072
1073    assert(s != NULL);
1074    need_new_frame = (self->framing && self->frame_start == -1);
1075
1076    if (need_new_frame)
1077        n = data_len + FRAME_HEADER_SIZE;
1078    else
1079        n = data_len;
1080
1081    required = self->output_len + n;
1082    if (required > self->max_output_len) {
1083        /* Make place in buffer for the pickle chunk */
1084        if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1085            PyErr_NoMemory();
1086            return -1;
1087        }
1088        self->max_output_len = (self->output_len + n) / 2 * 3;
1089        if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1090            return -1;
1091    }
1092    buffer = PyBytes_AS_STRING(self->output_buffer);
1093    if (need_new_frame) {
1094        /* Setup new frame */
1095        Py_ssize_t frame_start = self->output_len;
1096        self->frame_start = frame_start;
1097        for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1098            /* Write an invalid value, for debugging */
1099            buffer[frame_start + i] = 0xFE;
1100        }
1101        self->output_len += FRAME_HEADER_SIZE;
1102    }
1103    if (data_len < 8) {
1104        /* This is faster than memcpy when the string is short. */
1105        for (i = 0; i < data_len; i++) {
1106            buffer[self->output_len + i] = s[i];
1107        }
1108    }
1109    else {
1110        memcpy(buffer + self->output_len, s, data_len);
1111    }
1112    self->output_len += data_len;
1113    return data_len;
1114}
1115
1116static PicklerObject *
1117_Pickler_New(void)
1118{
1119    PicklerObject *self;
1120
1121    self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1122    if (self == NULL)
1123        return NULL;
1124
1125    self->pers_func = NULL;
1126    self->dispatch_table = NULL;
1127    self->buffer_callback = NULL;
1128    self->write = NULL;
1129    self->proto = 0;
1130    self->bin = 0;
1131    self->framing = 0;
1132    self->frame_start = -1;
1133    self->fast = 0;
1134    self->fast_nesting = 0;
1135    self->fix_imports = 0;
1136    self->fast_memo = NULL;
1137    self->max_output_len = WRITE_BUF_SIZE;
1138    self->output_len = 0;
1139    self->reducer_override = NULL;
1140
1141    self->memo = PyMemoTable_New();
1142    self->output_buffer = PyBytes_FromStringAndSize(NULL,
1143                                                    self->max_output_len);
1144
1145    if (self->memo == NULL || self->output_buffer == NULL) {
1146        Py_DECREF(self);
1147        return NULL;
1148    }
1149
1150    PyObject_GC_Track(self);
1151    return self;
1152}
1153
1154static int
1155_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1156{
1157    long proto;
1158
1159    if (protocol == Py_None) {
1160        proto = DEFAULT_PROTOCOL;
1161    }
1162    else {
1163        proto = PyLong_AsLong(protocol);
1164        if (proto < 0) {
1165            if (proto == -1 && PyErr_Occurred())
1166                return -1;
1167            proto = HIGHEST_PROTOCOL;
1168        }
1169        else if (proto > HIGHEST_PROTOCOL) {
1170            PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1171                         HIGHEST_PROTOCOL);
1172            return -1;
1173        }
1174    }
1175    self->proto = (int)proto;
1176    self->bin = proto > 0;
1177    self->fix_imports = fix_imports && proto < 3;
1178    return 0;
1179}
1180
1181/* Returns -1 (with an exception set) on failure, 0 on success. This may
1182   be called once on a freshly created Pickler. */
1183static int
1184_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1185{
1186    assert(file != NULL);
1187    if (_PyObject_LookupAttr(file, &_Py_ID(write), &self->write) < 0) {
1188        return -1;
1189    }
1190    if (self->write == NULL) {
1191        PyErr_SetString(PyExc_TypeError,
1192                        "file must have a 'write' attribute");
1193        return -1;
1194    }
1195
1196    return 0;
1197}
1198
1199static int
1200_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1201{
1202    if (buffer_callback == Py_None) {
1203        buffer_callback = NULL;
1204    }
1205    if (buffer_callback != NULL && self->proto < 5) {
1206        PyErr_SetString(PyExc_ValueError,
1207                        "buffer_callback needs protocol >= 5");
1208        return -1;
1209    }
1210
1211    Py_XINCREF(buffer_callback);
1212    self->buffer_callback = buffer_callback;
1213    return 0;
1214}
1215
1216/* Returns the size of the input on success, -1 on failure. This takes its
1217   own reference to `input`. */
1218static Py_ssize_t
1219_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1220{
1221    if (self->buffer.buf != NULL)
1222        PyBuffer_Release(&self->buffer);
1223    if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1224        return -1;
1225    self->input_buffer = self->buffer.buf;
1226    self->input_len = self->buffer.len;
1227    self->next_read_idx = 0;
1228    self->prefetched_idx = self->input_len;
1229    return self->input_len;
1230}
1231
1232static int
1233bad_readline(void)
1234{
1235    PickleState *st = _Pickle_GetGlobalState();
1236    PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1237    return -1;
1238}
1239
1240/* Skip any consumed data that was only prefetched using peek() */
1241static int
1242_Unpickler_SkipConsumed(UnpicklerObject *self)
1243{
1244    Py_ssize_t consumed;
1245    PyObject *r;
1246
1247    consumed = self->next_read_idx - self->prefetched_idx;
1248    if (consumed <= 0)
1249        return 0;
1250
1251    assert(self->peek);  /* otherwise we did something wrong */
1252    /* This makes a useless copy... */
1253    r = PyObject_CallFunction(self->read, "n", consumed);
1254    if (r == NULL)
1255        return -1;
1256    Py_DECREF(r);
1257
1258    self->prefetched_idx = self->next_read_idx;
1259    return 0;
1260}
1261
1262static const Py_ssize_t READ_WHOLE_LINE = -1;
1263
1264/* If reading from a file, we need to only pull the bytes we need, since there
1265   may be multiple pickle objects arranged contiguously in the same input
1266   buffer.
1267
1268   If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1269   bytes from the input stream/buffer.
1270
1271   Update the unpickler's input buffer with the newly-read data. Returns -1 on
1272   failure; on success, returns the number of bytes read from the file.
1273
1274   On success, self->input_len will be 0; this is intentional so that when
1275   unpickling from a file, the "we've run out of data" code paths will trigger,
1276   causing the Unpickler to go back to the file for more data. Use the returned
1277   size to tell you how much data you can process. */
1278static Py_ssize_t
1279_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1280{
1281    PyObject *data;
1282    Py_ssize_t read_size;
1283
1284    assert(self->read != NULL);
1285
1286    if (_Unpickler_SkipConsumed(self) < 0)
1287        return -1;
1288
1289    if (n == READ_WHOLE_LINE) {
1290        data = PyObject_CallNoArgs(self->readline);
1291    }
1292    else {
1293        PyObject *len;
1294        /* Prefetch some data without advancing the file pointer, if possible */
1295        if (self->peek && n < PREFETCH) {
1296            len = PyLong_FromSsize_t(PREFETCH);
1297            if (len == NULL)
1298                return -1;
1299            data = _Pickle_FastCall(self->peek, len);
1300            if (data == NULL) {
1301                if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1302                    return -1;
1303                /* peek() is probably not supported by the given file object */
1304                PyErr_Clear();
1305                Py_CLEAR(self->peek);
1306            }
1307            else {
1308                read_size = _Unpickler_SetStringInput(self, data);
1309                Py_DECREF(data);
1310                self->prefetched_idx = 0;
1311                if (n <= read_size)
1312                    return n;
1313            }
1314        }
1315        len = PyLong_FromSsize_t(n);
1316        if (len == NULL)
1317            return -1;
1318        data = _Pickle_FastCall(self->read, len);
1319    }
1320    if (data == NULL)
1321        return -1;
1322
1323    read_size = _Unpickler_SetStringInput(self, data);
1324    Py_DECREF(data);
1325    return read_size;
1326}
1327
1328/* Don't call it directly: use _Unpickler_Read() */
1329static Py_ssize_t
1330_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1331{
1332    Py_ssize_t num_read;
1333
1334    *s = NULL;
1335    if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1336        PickleState *st = _Pickle_GetGlobalState();
1337        PyErr_SetString(st->UnpicklingError,
1338                        "read would overflow (invalid bytecode)");
1339        return -1;
1340    }
1341
1342    /* This case is handled by the _Unpickler_Read() macro for efficiency */
1343    assert(self->next_read_idx + n > self->input_len);
1344
1345    if (!self->read)
1346        return bad_readline();
1347
1348    /* Extend the buffer to satisfy desired size */
1349    num_read = _Unpickler_ReadFromFile(self, n);
1350    if (num_read < 0)
1351        return -1;
1352    if (num_read < n)
1353        return bad_readline();
1354    *s = self->input_buffer;
1355    self->next_read_idx = n;
1356    return n;
1357}
1358
1359/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1360 *
1361 * This should only be used for non-small data reads where potentially
1362 * avoiding a copy is beneficial.  This method does not try to prefetch
1363 * more data into the input buffer.
1364 *
1365 * _Unpickler_Read() is recommended in most cases.
1366 */
1367static Py_ssize_t
1368_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1369{
1370    assert(n != READ_WHOLE_LINE);
1371
1372    /* Read from available buffer data, if any */
1373    Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1374    if (in_buffer > 0) {
1375        Py_ssize_t to_read = Py_MIN(in_buffer, n);
1376        memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1377        self->next_read_idx += to_read;
1378        buf += to_read;
1379        n -= to_read;
1380        if (n == 0) {
1381            /* Entire read was satisfied from buffer */
1382            return n;
1383        }
1384    }
1385
1386    /* Read from file */
1387    if (!self->read) {
1388        /* We're unpickling memory, this means the input is truncated */
1389        return bad_readline();
1390    }
1391    if (_Unpickler_SkipConsumed(self) < 0) {
1392        return -1;
1393    }
1394
1395    if (!self->readinto) {
1396        /* readinto() not supported on file-like object, fall back to read()
1397         * and copy into destination buffer (bpo-39681) */
1398        PyObject* len = PyLong_FromSsize_t(n);
1399        if (len == NULL) {
1400            return -1;
1401        }
1402        PyObject* data = _Pickle_FastCall(self->read, len);
1403        if (data == NULL) {
1404            return -1;
1405        }
1406        if (!PyBytes_Check(data)) {
1407            PyErr_Format(PyExc_ValueError,
1408                         "read() returned non-bytes object (%R)",
1409                         Py_TYPE(data));
1410            Py_DECREF(data);
1411            return -1;
1412        }
1413        Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1414        if (read_size < n) {
1415            Py_DECREF(data);
1416            return bad_readline();
1417        }
1418        memcpy(buf, PyBytes_AS_STRING(data), n);
1419        Py_DECREF(data);
1420        return n;
1421    }
1422
1423    /* Call readinto() into user buffer */
1424    PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1425    if (buf_obj == NULL) {
1426        return -1;
1427    }
1428    PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1429    if (read_size_obj == NULL) {
1430        return -1;
1431    }
1432    Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1433    Py_DECREF(read_size_obj);
1434
1435    if (read_size < 0) {
1436        if (!PyErr_Occurred()) {
1437            PyErr_SetString(PyExc_ValueError,
1438                            "readinto() returned negative size");
1439        }
1440        return -1;
1441    }
1442    if (read_size < n) {
1443        return bad_readline();
1444    }
1445    return n;
1446}
1447
1448/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1449
1450   This should be used for all data reads, rather than accessing the unpickler's
1451   input buffer directly. This method deals correctly with reading from input
1452   streams, which the input buffer doesn't deal with.
1453
1454   Note that when reading from a file-like object, self->next_read_idx won't
1455   be updated (it should remain at 0 for the entire unpickling process). You
1456   should use this function's return value to know how many bytes you can
1457   consume.
1458
1459   Returns -1 (with an exception set) on failure. On success, return the
1460   number of chars read. */
1461#define _Unpickler_Read(self, s, n) \
1462    (((n) <= (self)->input_len - (self)->next_read_idx)      \
1463     ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1464        (self)->next_read_idx += (n),                        \
1465        (n))                                                 \
1466     : _Unpickler_ReadImpl(self, (s), (n)))
1467
1468static Py_ssize_t
1469_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1470                    char **result)
1471{
1472    char *input_line = PyMem_Realloc(self->input_line, len + 1);
1473    if (input_line == NULL) {
1474        PyErr_NoMemory();
1475        return -1;
1476    }
1477
1478    memcpy(input_line, line, len);
1479    input_line[len] = '\0';
1480    self->input_line = input_line;
1481    *result = self->input_line;
1482    return len;
1483}
1484
1485/* Read a line from the input stream/buffer. If we run off the end of the input
1486   before hitting \n, raise an error.
1487
1488   Returns the number of chars read, or -1 on failure. */
1489static Py_ssize_t
1490_Unpickler_Readline(UnpicklerObject *self, char **result)
1491{
1492    Py_ssize_t i, num_read;
1493
1494    for (i = self->next_read_idx; i < self->input_len; i++) {
1495        if (self->input_buffer[i] == '\n') {
1496            char *line_start = self->input_buffer + self->next_read_idx;
1497            num_read = i - self->next_read_idx + 1;
1498            self->next_read_idx = i + 1;
1499            return _Unpickler_CopyLine(self, line_start, num_read, result);
1500        }
1501    }
1502    if (!self->read)
1503        return bad_readline();
1504
1505    num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1506    if (num_read < 0)
1507        return -1;
1508    if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1509        return bad_readline();
1510    self->next_read_idx = num_read;
1511    return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1512}
1513
1514/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1515   will be modified in place. */
1516static int
1517_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1518{
1519    size_t i;
1520
1521    assert(new_size > self->memo_size);
1522
1523    PyObject **memo_new = self->memo;
1524    PyMem_RESIZE(memo_new, PyObject *, new_size);
1525    if (memo_new == NULL) {
1526        PyErr_NoMemory();
1527        return -1;
1528    }
1529    self->memo = memo_new;
1530    for (i = self->memo_size; i < new_size; i++)
1531        self->memo[i] = NULL;
1532    self->memo_size = new_size;
1533    return 0;
1534}
1535
1536/* Returns NULL if idx is out of bounds. */
1537static PyObject *
1538_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1539{
1540    if (idx >= self->memo_size)
1541        return NULL;
1542
1543    return self->memo[idx];
1544}
1545
1546/* Returns -1 (with an exception set) on failure, 0 on success.
1547   This takes its own reference to `value`. */
1548static int
1549_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1550{
1551    PyObject *old_item;
1552
1553    if (idx >= self->memo_size) {
1554        if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1555            return -1;
1556        assert(idx < self->memo_size);
1557    }
1558    Py_INCREF(value);
1559    old_item = self->memo[idx];
1560    self->memo[idx] = value;
1561    if (old_item != NULL) {
1562        Py_DECREF(old_item);
1563    }
1564    else {
1565        self->memo_len++;
1566    }
1567    return 0;
1568}
1569
1570static PyObject **
1571_Unpickler_NewMemo(Py_ssize_t new_size)
1572{
1573    PyObject **memo = PyMem_NEW(PyObject *, new_size);
1574    if (memo == NULL) {
1575        PyErr_NoMemory();
1576        return NULL;
1577    }
1578    memset(memo, 0, new_size * sizeof(PyObject *));
1579    return memo;
1580}
1581
1582/* Free the unpickler's memo, taking care to decref any items left in it. */
1583static void
1584_Unpickler_MemoCleanup(UnpicklerObject *self)
1585{
1586    Py_ssize_t i;
1587    PyObject **memo = self->memo;
1588
1589    if (self->memo == NULL)
1590        return;
1591    self->memo = NULL;
1592    i = self->memo_size;
1593    while (--i >= 0) {
1594        Py_XDECREF(memo[i]);
1595    }
1596    PyMem_Free(memo);
1597}
1598
1599static UnpicklerObject *
1600_Unpickler_New(void)
1601{
1602    UnpicklerObject *self;
1603
1604    self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1605    if (self == NULL)
1606        return NULL;
1607
1608    self->pers_func = NULL;
1609    self->input_buffer = NULL;
1610    self->input_line = NULL;
1611    self->input_len = 0;
1612    self->next_read_idx = 0;
1613    self->prefetched_idx = 0;
1614    self->read = NULL;
1615    self->readinto = NULL;
1616    self->readline = NULL;
1617    self->peek = NULL;
1618    self->buffers = NULL;
1619    self->encoding = NULL;
1620    self->errors = NULL;
1621    self->marks = NULL;
1622    self->num_marks = 0;
1623    self->marks_size = 0;
1624    self->proto = 0;
1625    self->fix_imports = 0;
1626    memset(&self->buffer, 0, sizeof(Py_buffer));
1627    self->memo_size = 32;
1628    self->memo_len = 0;
1629    self->memo = _Unpickler_NewMemo(self->memo_size);
1630    self->stack = (Pdata *)Pdata_New();
1631
1632    if (self->memo == NULL || self->stack == NULL) {
1633        Py_DECREF(self);
1634        return NULL;
1635    }
1636
1637    PyObject_GC_Track(self);
1638    return self;
1639}
1640
1641/* Returns -1 (with an exception set) on failure, 0 on success. This may
1642   be called once on a freshly created Unpickler. */
1643static int
1644_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1645{
1646    /* Optional file methods */
1647    if (_PyObject_LookupAttr(file, &_Py_ID(peek), &self->peek) < 0) {
1648        return -1;
1649    }
1650    if (_PyObject_LookupAttr(file, &_Py_ID(readinto), &self->readinto) < 0) {
1651        return -1;
1652    }
1653    (void)_PyObject_LookupAttr(file, &_Py_ID(read), &self->read);
1654    (void)_PyObject_LookupAttr(file, &_Py_ID(readline), &self->readline);
1655    if (!self->readline || !self->read) {
1656        if (!PyErr_Occurred()) {
1657            PyErr_SetString(PyExc_TypeError,
1658                            "file must have 'read' and 'readline' attributes");
1659        }
1660        Py_CLEAR(self->read);
1661        Py_CLEAR(self->readinto);
1662        Py_CLEAR(self->readline);
1663        Py_CLEAR(self->peek);
1664        return -1;
1665    }
1666    return 0;
1667}
1668
1669/* Returns -1 (with an exception set) on failure, 0 on success. This may
1670   be called once on a freshly created Unpickler. */
1671static int
1672_Unpickler_SetInputEncoding(UnpicklerObject *self,
1673                            const char *encoding,
1674                            const char *errors)
1675{
1676    if (encoding == NULL)
1677        encoding = "ASCII";
1678    if (errors == NULL)
1679        errors = "strict";
1680
1681    self->encoding = _PyMem_Strdup(encoding);
1682    self->errors = _PyMem_Strdup(errors);
1683    if (self->encoding == NULL || self->errors == NULL) {
1684        PyErr_NoMemory();
1685        return -1;
1686    }
1687    return 0;
1688}
1689
1690/* Returns -1 (with an exception set) on failure, 0 on success. This may
1691   be called once on a freshly created Unpickler. */
1692static int
1693_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1694{
1695    if (buffers == NULL || buffers == Py_None) {
1696        self->buffers = NULL;
1697    }
1698    else {
1699        self->buffers = PyObject_GetIter(buffers);
1700        if (self->buffers == NULL) {
1701            return -1;
1702        }
1703    }
1704    return 0;
1705}
1706
1707/* Generate a GET opcode for an object stored in the memo. */
1708static int
1709memo_get(PicklerObject *self, PyObject *key)
1710{
1711    Py_ssize_t *value;
1712    char pdata[30];
1713    Py_ssize_t len;
1714
1715    value = PyMemoTable_Get(self->memo, key);
1716    if (value == NULL)  {
1717        PyErr_SetObject(PyExc_KeyError, key);
1718        return -1;
1719    }
1720
1721    if (!self->bin) {
1722        pdata[0] = GET;
1723        PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1724                      "%zd\n", *value);
1725        len = strlen(pdata);
1726    }
1727    else {
1728        if (*value < 256) {
1729            pdata[0] = BINGET;
1730            pdata[1] = (unsigned char)(*value & 0xff);
1731            len = 2;
1732        }
1733        else if ((size_t)*value <= 0xffffffffUL) {
1734            pdata[0] = LONG_BINGET;
1735            pdata[1] = (unsigned char)(*value & 0xff);
1736            pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1737            pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1738            pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1739            len = 5;
1740        }
1741        else { /* unlikely */
1742            PickleState *st = _Pickle_GetGlobalState();
1743            PyErr_SetString(st->PicklingError,
1744                            "memo id too large for LONG_BINGET");
1745            return -1;
1746        }
1747    }
1748
1749    if (_Pickler_Write(self, pdata, len) < 0)
1750        return -1;
1751
1752    return 0;
1753}
1754
1755/* Store an object in the memo, assign it a new unique ID based on the number
1756   of objects currently stored in the memo and generate a PUT opcode. */
1757static int
1758memo_put(PicklerObject *self, PyObject *obj)
1759{
1760    char pdata[30];
1761    Py_ssize_t len;
1762    Py_ssize_t idx;
1763
1764    const char memoize_op = MEMOIZE;
1765
1766    if (self->fast)
1767        return 0;
1768
1769    idx = PyMemoTable_Size(self->memo);
1770    if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1771        return -1;
1772
1773    if (self->proto >= 4) {
1774        if (_Pickler_Write(self, &memoize_op, 1) < 0)
1775            return -1;
1776        return 0;
1777    }
1778    else if (!self->bin) {
1779        pdata[0] = PUT;
1780        PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1781                      "%zd\n", idx);
1782        len = strlen(pdata);
1783    }
1784    else {
1785        if (idx < 256) {
1786            pdata[0] = BINPUT;
1787            pdata[1] = (unsigned char)idx;
1788            len = 2;
1789        }
1790        else if ((size_t)idx <= 0xffffffffUL) {
1791            pdata[0] = LONG_BINPUT;
1792            pdata[1] = (unsigned char)(idx & 0xff);
1793            pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1794            pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1795            pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1796            len = 5;
1797        }
1798        else { /* unlikely */
1799            PickleState *st = _Pickle_GetGlobalState();
1800            PyErr_SetString(st->PicklingError,
1801                            "memo id too large for LONG_BINPUT");
1802            return -1;
1803        }
1804    }
1805    if (_Pickler_Write(self, pdata, len) < 0)
1806        return -1;
1807
1808    return 0;
1809}
1810
1811static PyObject *
1812get_dotted_path(PyObject *obj, PyObject *name)
1813{
1814    PyObject *dotted_path;
1815    Py_ssize_t i, n;
1816    _Py_DECLARE_STR(dot, ".");
1817    dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1);
1818    if (dotted_path == NULL)
1819        return NULL;
1820    n = PyList_GET_SIZE(dotted_path);
1821    assert(n >= 1);
1822    for (i = 0; i < n; i++) {
1823        PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1824        if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1825            if (obj == NULL)
1826                PyErr_Format(PyExc_AttributeError,
1827                             "Can't pickle local object %R", name);
1828            else
1829                PyErr_Format(PyExc_AttributeError,
1830                             "Can't pickle local attribute %R on %R", name, obj);
1831            Py_DECREF(dotted_path);
1832            return NULL;
1833        }
1834    }
1835    return dotted_path;
1836}
1837
1838static PyObject *
1839get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1840{
1841    Py_ssize_t i, n;
1842    PyObject *parent = NULL;
1843
1844    assert(PyList_CheckExact(names));
1845    Py_INCREF(obj);
1846    n = PyList_GET_SIZE(names);
1847    for (i = 0; i < n; i++) {
1848        PyObject *name = PyList_GET_ITEM(names, i);
1849        Py_XDECREF(parent);
1850        parent = obj;
1851        (void)_PyObject_LookupAttr(parent, name, &obj);
1852        if (obj == NULL) {
1853            Py_DECREF(parent);
1854            return NULL;
1855        }
1856    }
1857    if (pparent != NULL)
1858        *pparent = parent;
1859    else
1860        Py_XDECREF(parent);
1861    return obj;
1862}
1863
1864
1865static PyObject *
1866getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1867{
1868    PyObject *dotted_path, *attr;
1869
1870    if (allow_qualname) {
1871        dotted_path = get_dotted_path(obj, name);
1872        if (dotted_path == NULL)
1873            return NULL;
1874        attr = get_deep_attribute(obj, dotted_path, NULL);
1875        Py_DECREF(dotted_path);
1876    }
1877    else {
1878        (void)_PyObject_LookupAttr(obj, name, &attr);
1879    }
1880    if (attr == NULL && !PyErr_Occurred()) {
1881        PyErr_Format(PyExc_AttributeError,
1882                     "Can't get attribute %R on %R", name, obj);
1883    }
1884    return attr;
1885}
1886
1887static int
1888_checkmodule(PyObject *module_name, PyObject *module,
1889             PyObject *global, PyObject *dotted_path)
1890{
1891    if (module == Py_None) {
1892        return -1;
1893    }
1894    if (PyUnicode_Check(module_name) &&
1895            _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1896        return -1;
1897    }
1898
1899    PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1900    if (candidate == NULL) {
1901        return -1;
1902    }
1903    if (candidate != global) {
1904        Py_DECREF(candidate);
1905        return -1;
1906    }
1907    Py_DECREF(candidate);
1908    return 0;
1909}
1910
1911static PyObject *
1912whichmodule(PyObject *global, PyObject *dotted_path)
1913{
1914    PyObject *module_name;
1915    PyObject *module = NULL;
1916    Py_ssize_t i;
1917    PyObject *modules;
1918
1919    if (_PyObject_LookupAttr(global, &_Py_ID(__module__), &module_name) < 0) {
1920        return NULL;
1921    }
1922    if (module_name) {
1923        /* In some rare cases (e.g., bound methods of extension types),
1924           __module__ can be None. If it is so, then search sys.modules for
1925           the module of global. */
1926        if (module_name != Py_None)
1927            return module_name;
1928        Py_CLEAR(module_name);
1929    }
1930    assert(module_name == NULL);
1931
1932    /* Fallback on walking sys.modules */
1933    PyThreadState *tstate = _PyThreadState_GET();
1934    modules = _PySys_GetAttr(tstate, &_Py_ID(modules));
1935    if (modules == NULL) {
1936        PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1937        return NULL;
1938    }
1939    if (PyDict_CheckExact(modules)) {
1940        i = 0;
1941        while (PyDict_Next(modules, &i, &module_name, &module)) {
1942            if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1943                Py_INCREF(module_name);
1944                return module_name;
1945            }
1946            if (PyErr_Occurred()) {
1947                return NULL;
1948            }
1949        }
1950    }
1951    else {
1952        PyObject *iterator = PyObject_GetIter(modules);
1953        if (iterator == NULL) {
1954            return NULL;
1955        }
1956        while ((module_name = PyIter_Next(iterator))) {
1957            module = PyObject_GetItem(modules, module_name);
1958            if (module == NULL) {
1959                Py_DECREF(module_name);
1960                Py_DECREF(iterator);
1961                return NULL;
1962            }
1963            if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1964                Py_DECREF(module);
1965                Py_DECREF(iterator);
1966                return module_name;
1967            }
1968            Py_DECREF(module);
1969            Py_DECREF(module_name);
1970            if (PyErr_Occurred()) {
1971                Py_DECREF(iterator);
1972                return NULL;
1973            }
1974        }
1975        Py_DECREF(iterator);
1976    }
1977
1978    /* If no module is found, use __main__. */
1979    module_name = &_Py_ID(__main__);
1980    Py_INCREF(module_name);
1981    return module_name;
1982}
1983
1984/* fast_save_enter() and fast_save_leave() are guards against recursive
1985   objects when Pickler is used with the "fast mode" (i.e., with object
1986   memoization disabled). If the nesting of a list or dict object exceed
1987   FAST_NESTING_LIMIT, these guards will start keeping an internal
1988   reference to the seen list or dict objects and check whether these objects
1989   are recursive. These are not strictly necessary, since save() has a
1990   hard-coded recursion limit, but they give a nicer error message than the
1991   typical RuntimeError. */
1992static int
1993fast_save_enter(PicklerObject *self, PyObject *obj)
1994{
1995    /* if fast_nesting < 0, we're doing an error exit. */
1996    if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1997        PyObject *key = NULL;
1998        if (self->fast_memo == NULL) {
1999            self->fast_memo = PyDict_New();
2000            if (self->fast_memo == NULL) {
2001                self->fast_nesting = -1;
2002                return 0;
2003            }
2004        }
2005        key = PyLong_FromVoidPtr(obj);
2006        if (key == NULL) {
2007            self->fast_nesting = -1;
2008            return 0;
2009        }
2010        int r = PyDict_Contains(self->fast_memo, key);
2011        if (r > 0) {
2012            PyErr_Format(PyExc_ValueError,
2013                         "fast mode: can't pickle cyclic objects "
2014                         "including object type %.200s at %p",
2015                         Py_TYPE(obj)->tp_name, obj);
2016        }
2017        else if (r == 0) {
2018            r = PyDict_SetItem(self->fast_memo, key, Py_None);
2019        }
2020        Py_DECREF(key);
2021        if (r != 0) {
2022            self->fast_nesting = -1;
2023            return 0;
2024        }
2025    }
2026    return 1;
2027}
2028
2029static int
2030fast_save_leave(PicklerObject *self, PyObject *obj)
2031{
2032    if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2033        PyObject *key = PyLong_FromVoidPtr(obj);
2034        if (key == NULL)
2035            return 0;
2036        if (PyDict_DelItem(self->fast_memo, key) < 0) {
2037            Py_DECREF(key);
2038            return 0;
2039        }
2040        Py_DECREF(key);
2041    }
2042    return 1;
2043}
2044
2045static int
2046save_none(PicklerObject *self, PyObject *obj)
2047{
2048    const char none_op = NONE;
2049    if (_Pickler_Write(self, &none_op, 1) < 0)
2050        return -1;
2051
2052    return 0;
2053}
2054
2055static int
2056save_bool(PicklerObject *self, PyObject *obj)
2057{
2058    if (self->proto >= 2) {
2059        const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2060        if (_Pickler_Write(self, &bool_op, 1) < 0)
2061            return -1;
2062    }
2063    else {
2064        /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2065         * so that unpicklers written before bools were introduced unpickle them
2066         * as ints, but unpicklers after can recognize that bools were intended.
2067         * Note that protocol 2 added direct ways to pickle bools.
2068         */
2069        const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2070        if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2071            return -1;
2072    }
2073    return 0;
2074}
2075
2076static int
2077save_long(PicklerObject *self, PyObject *obj)
2078{
2079    PyObject *repr = NULL;
2080    Py_ssize_t size;
2081    long val;
2082    int overflow;
2083    int status = 0;
2084
2085    val= PyLong_AsLongAndOverflow(obj, &overflow);
2086    if (!overflow && (sizeof(long) <= 4 ||
2087            (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2088    {
2089        /* result fits in a signed 4-byte integer.
2090
2091           Note: we can't use -0x80000000L in the above condition because some
2092           compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2093           before applying the unary minus when sizeof(long) <= 4. The
2094           resulting value stays unsigned which is commonly not what we want,
2095           so MSVC happily warns us about it.  However, that result would have
2096           been fine because we guard for sizeof(long) <= 4 which turns the
2097           condition true in that particular case. */
2098        char pdata[32];
2099        Py_ssize_t len = 0;
2100
2101        if (self->bin) {
2102            pdata[1] = (unsigned char)(val & 0xff);
2103            pdata[2] = (unsigned char)((val >> 8) & 0xff);
2104            pdata[3] = (unsigned char)((val >> 16) & 0xff);
2105            pdata[4] = (unsigned char)((val >> 24) & 0xff);
2106
2107            if ((pdata[4] != 0) || (pdata[3] != 0)) {
2108                pdata[0] = BININT;
2109                len = 5;
2110            }
2111            else if (pdata[2] != 0) {
2112                pdata[0] = BININT2;
2113                len = 3;
2114            }
2115            else {
2116                pdata[0] = BININT1;
2117                len = 2;
2118            }
2119        }
2120        else {
2121            sprintf(pdata, "%c%ld\n", INT,  val);
2122            len = strlen(pdata);
2123        }
2124        if (_Pickler_Write(self, pdata, len) < 0)
2125            return -1;
2126
2127        return 0;
2128    }
2129    assert(!PyErr_Occurred());
2130
2131    if (self->proto >= 2) {
2132        /* Linear-time pickling. */
2133        size_t nbits;
2134        size_t nbytes;
2135        unsigned char *pdata;
2136        char header[5];
2137        int i;
2138        int sign = _PyLong_Sign(obj);
2139
2140        if (sign == 0) {
2141            header[0] = LONG1;
2142            header[1] = 0;      /* It's 0 -- an empty bytestring. */
2143            if (_Pickler_Write(self, header, 2) < 0)
2144                goto error;
2145            return 0;
2146        }
2147        nbits = _PyLong_NumBits(obj);
2148        if (nbits == (size_t)-1 && PyErr_Occurred())
2149            goto error;
2150        /* How many bytes do we need?  There are nbits >> 3 full
2151         * bytes of data, and nbits & 7 leftover bits.  If there
2152         * are any leftover bits, then we clearly need another
2153         * byte.  What's not so obvious is that we *probably*
2154         * need another byte even if there aren't any leftovers:
2155         * the most-significant bit of the most-significant byte
2156         * acts like a sign bit, and it's usually got a sense
2157         * opposite of the one we need.  The exception is ints
2158         * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2159         * its own 256's-complement, so has the right sign bit
2160         * even without the extra byte.  That's a pain to check
2161         * for in advance, though, so we always grab an extra
2162         * byte at the start, and cut it back later if possible.
2163         */
2164        nbytes = (nbits >> 3) + 1;
2165        if (nbytes > 0x7fffffffL) {
2166            PyErr_SetString(PyExc_OverflowError,
2167                            "int too large to pickle");
2168            goto error;
2169        }
2170        repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2171        if (repr == NULL)
2172            goto error;
2173        pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2174        i = _PyLong_AsByteArray((PyLongObject *)obj,
2175                                pdata, nbytes,
2176                                1 /* little endian */ , 1 /* signed */ );
2177        if (i < 0)
2178            goto error;
2179        /* If the int is negative, this may be a byte more than
2180         * needed.  This is so iff the MSB is all redundant sign
2181         * bits.
2182         */
2183        if (sign < 0 &&
2184            nbytes > 1 &&
2185            pdata[nbytes - 1] == 0xff &&
2186            (pdata[nbytes - 2] & 0x80) != 0) {
2187            nbytes--;
2188        }
2189
2190        if (nbytes < 256) {
2191            header[0] = LONG1;
2192            header[1] = (unsigned char)nbytes;
2193            size = 2;
2194        }
2195        else {
2196            header[0] = LONG4;
2197            size = (Py_ssize_t) nbytes;
2198            for (i = 1; i < 5; i++) {
2199                header[i] = (unsigned char)(size & 0xff);
2200                size >>= 8;
2201            }
2202            size = 5;
2203        }
2204        if (_Pickler_Write(self, header, size) < 0 ||
2205            _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2206            goto error;
2207    }
2208    else {
2209        const char long_op = LONG;
2210        const char *string;
2211
2212        /* proto < 2: write the repr and newline.  This is quadratic-time (in
2213           the number of digits), in both directions.  We add a trailing 'L'
2214           to the repr, for compatibility with Python 2.x. */
2215
2216        repr = PyObject_Repr(obj);
2217        if (repr == NULL)
2218            goto error;
2219
2220        string = PyUnicode_AsUTF8AndSize(repr, &size);
2221        if (string == NULL)
2222            goto error;
2223
2224        if (_Pickler_Write(self, &long_op, 1) < 0 ||
2225            _Pickler_Write(self, string, size) < 0 ||
2226            _Pickler_Write(self, "L\n", 2) < 0)
2227            goto error;
2228    }
2229
2230    if (0) {
2231  error:
2232      status = -1;
2233    }
2234    Py_XDECREF(repr);
2235
2236    return status;
2237}
2238
2239static int
2240save_float(PicklerObject *self, PyObject *obj)
2241{
2242    double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2243
2244    if (self->bin) {
2245        char pdata[9];
2246        pdata[0] = BINFLOAT;
2247        if (PyFloat_Pack8(x, &pdata[1], 0) < 0)
2248            return -1;
2249        if (_Pickler_Write(self, pdata, 9) < 0)
2250            return -1;
2251   }
2252    else {
2253        int result = -1;
2254        char *buf = NULL;
2255        char op = FLOAT;
2256
2257        if (_Pickler_Write(self, &op, 1) < 0)
2258            goto done;
2259
2260        buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2261        if (!buf) {
2262            PyErr_NoMemory();
2263            goto done;
2264        }
2265
2266        if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2267            goto done;
2268
2269        if (_Pickler_Write(self, "\n", 1) < 0)
2270            goto done;
2271
2272        result = 0;
2273done:
2274        PyMem_Free(buf);
2275        return result;
2276    }
2277
2278    return 0;
2279}
2280
2281/* Perform direct write of the header and payload of the binary object.
2282
2283   The large contiguous data is written directly into the underlying file
2284   object, bypassing the output_buffer of the Pickler.  We intentionally
2285   do not insert a protocol 4 frame opcode to make it possible to optimize
2286   file.read calls in the loader.
2287 */
2288static int
2289_Pickler_write_bytes(PicklerObject *self,
2290                     const char *header, Py_ssize_t header_size,
2291                     const char *data, Py_ssize_t data_size,
2292                     PyObject *payload)
2293{
2294    int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2295    int framing = self->framing;
2296
2297    if (bypass_buffer) {
2298        assert(self->output_buffer != NULL);
2299        /* Commit the previous frame. */
2300        if (_Pickler_CommitFrame(self)) {
2301            return -1;
2302        }
2303        /* Disable framing temporarily */
2304        self->framing = 0;
2305    }
2306
2307    if (_Pickler_Write(self, header, header_size) < 0) {
2308        return -1;
2309    }
2310
2311    if (bypass_buffer && self->write != NULL) {
2312        /* Bypass the in-memory buffer to directly stream large data
2313           into the underlying file object. */
2314        PyObject *result, *mem = NULL;
2315        /* Dump the output buffer to the file. */
2316        if (_Pickler_FlushToFile(self) < 0) {
2317            return -1;
2318        }
2319
2320        /* Stream write the payload into the file without going through the
2321           output buffer. */
2322        if (payload == NULL) {
2323            /* TODO: It would be better to use a memoryview with a linked
2324               original string if this is possible. */
2325            payload = mem = PyBytes_FromStringAndSize(data, data_size);
2326            if (payload == NULL) {
2327                return -1;
2328            }
2329        }
2330        result = PyObject_CallOneArg(self->write, payload);
2331        Py_XDECREF(mem);
2332        if (result == NULL) {
2333            return -1;
2334        }
2335        Py_DECREF(result);
2336
2337        /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2338        if (_Pickler_ClearBuffer(self) < 0) {
2339            return -1;
2340        }
2341    }
2342    else {
2343        if (_Pickler_Write(self, data, data_size) < 0) {
2344            return -1;
2345        }
2346    }
2347
2348    /* Re-enable framing for subsequent calls to _Pickler_Write. */
2349    self->framing = framing;
2350
2351    return 0;
2352}
2353
2354static int
2355_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2356                 Py_ssize_t size)
2357{
2358    assert(self->proto >= 3);
2359
2360    char header[9];
2361    Py_ssize_t len;
2362
2363    if (size < 0)
2364        return -1;
2365
2366    if (size <= 0xff) {
2367        header[0] = SHORT_BINBYTES;
2368        header[1] = (unsigned char)size;
2369        len = 2;
2370    }
2371    else if ((size_t)size <= 0xffffffffUL) {
2372        header[0] = BINBYTES;
2373        header[1] = (unsigned char)(size & 0xff);
2374        header[2] = (unsigned char)((size >> 8) & 0xff);
2375        header[3] = (unsigned char)((size >> 16) & 0xff);
2376        header[4] = (unsigned char)((size >> 24) & 0xff);
2377        len = 5;
2378    }
2379    else if (self->proto >= 4) {
2380        header[0] = BINBYTES8;
2381        _write_size64(header + 1, size);
2382        len = 9;
2383    }
2384    else {
2385        PyErr_SetString(PyExc_OverflowError,
2386                        "serializing a bytes object larger than 4 GiB "
2387                        "requires pickle protocol 4 or higher");
2388        return -1;
2389    }
2390
2391    if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2392        return -1;
2393    }
2394
2395    if (memo_put(self, obj) < 0) {
2396        return -1;
2397    }
2398
2399    return 0;
2400}
2401
2402static int
2403save_bytes(PicklerObject *self, PyObject *obj)
2404{
2405    if (self->proto < 3) {
2406        /* Older pickle protocols do not have an opcode for pickling bytes
2407           objects. Therefore, we need to fake the copy protocol (i.e.,
2408           the __reduce__ method) to permit bytes object unpickling.
2409
2410           Here we use a hack to be compatible with Python 2. Since in Python
2411           2 'bytes' is just an alias for 'str' (which has different
2412           parameters than the actual bytes object), we use codecs.encode
2413           to create the appropriate 'str' object when unpickled using
2414           Python 2 *and* the appropriate 'bytes' object when unpickled
2415           using Python 3. Again this is a hack and we don't need to do this
2416           with newer protocols. */
2417        PyObject *reduce_value;
2418        int status;
2419
2420        if (PyBytes_GET_SIZE(obj) == 0) {
2421            reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2422        }
2423        else {
2424            PickleState *st = _Pickle_GetGlobalState();
2425            PyObject *unicode_str =
2426                PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2427                                       PyBytes_GET_SIZE(obj),
2428                                       "strict");
2429
2430            if (unicode_str == NULL)
2431                return -1;
2432            reduce_value = Py_BuildValue("(O(OO))",
2433                                         st->codecs_encode, unicode_str,
2434                                         &_Py_ID(latin1));
2435            Py_DECREF(unicode_str);
2436        }
2437
2438        if (reduce_value == NULL)
2439            return -1;
2440
2441        /* save_reduce() will memoize the object automatically. */
2442        status = save_reduce(self, reduce_value, obj);
2443        Py_DECREF(reduce_value);
2444        return status;
2445    }
2446    else {
2447        return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2448                                PyBytes_GET_SIZE(obj));
2449    }
2450}
2451
2452static int
2453_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2454                     Py_ssize_t size)
2455{
2456    assert(self->proto >= 5);
2457
2458    char header[9];
2459    Py_ssize_t len;
2460
2461    if (size < 0)
2462        return -1;
2463
2464    header[0] = BYTEARRAY8;
2465    _write_size64(header + 1, size);
2466    len = 9;
2467
2468    if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2469        return -1;
2470    }
2471
2472    if (memo_put(self, obj) < 0) {
2473        return -1;
2474    }
2475
2476    return 0;
2477}
2478
2479static int
2480save_bytearray(PicklerObject *self, PyObject *obj)
2481{
2482    if (self->proto < 5) {
2483        /* Older pickle protocols do not have an opcode for pickling
2484         * bytearrays. */
2485        PyObject *reduce_value = NULL;
2486        int status;
2487
2488        if (PyByteArray_GET_SIZE(obj) == 0) {
2489            reduce_value = Py_BuildValue("(O())",
2490                                         (PyObject *) &PyByteArray_Type);
2491        }
2492        else {
2493            PyObject *bytes_obj = PyBytes_FromObject(obj);
2494            if (bytes_obj != NULL) {
2495                reduce_value = Py_BuildValue("(O(O))",
2496                                             (PyObject *) &PyByteArray_Type,
2497                                             bytes_obj);
2498                Py_DECREF(bytes_obj);
2499            }
2500        }
2501        if (reduce_value == NULL)
2502            return -1;
2503
2504        /* save_reduce() will memoize the object automatically. */
2505        status = save_reduce(self, reduce_value, obj);
2506        Py_DECREF(reduce_value);
2507        return status;
2508    }
2509    else {
2510        return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2511                                    PyByteArray_GET_SIZE(obj));
2512    }
2513}
2514
2515static int
2516save_picklebuffer(PicklerObject *self, PyObject *obj)
2517{
2518    if (self->proto < 5) {
2519        PickleState *st = _Pickle_GetGlobalState();
2520        PyErr_SetString(st->PicklingError,
2521                        "PickleBuffer can only pickled with protocol >= 5");
2522        return -1;
2523    }
2524    const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2525    if (view == NULL) {
2526        return -1;
2527    }
2528    if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2529        PickleState *st = _Pickle_GetGlobalState();
2530        PyErr_SetString(st->PicklingError,
2531                        "PickleBuffer can not be pickled when "
2532                        "pointing to a non-contiguous buffer");
2533        return -1;
2534    }
2535    int in_band = 1;
2536    if (self->buffer_callback != NULL) {
2537        PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2538        if (ret == NULL) {
2539            return -1;
2540        }
2541        in_band = PyObject_IsTrue(ret);
2542        Py_DECREF(ret);
2543        if (in_band == -1) {
2544            return -1;
2545        }
2546    }
2547    if (in_band) {
2548        /* Write data in-band */
2549        if (view->readonly) {
2550            return _save_bytes_data(self, obj, (const char*) view->buf,
2551                                    view->len);
2552        }
2553        else {
2554            return _save_bytearray_data(self, obj, (const char*) view->buf,
2555                                        view->len);
2556        }
2557    }
2558    else {
2559        /* Write data out-of-band */
2560        const char next_buffer_op = NEXT_BUFFER;
2561        if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2562            return -1;
2563        }
2564        if (view->readonly) {
2565            const char readonly_buffer_op = READONLY_BUFFER;
2566            if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2567                return -1;
2568            }
2569        }
2570    }
2571    return 0;
2572}
2573
2574/* A copy of PyUnicode_AsRawUnicodeEscapeString() that also translates
2575   backslash and newline characters to \uXXXX escapes. */
2576static PyObject *
2577raw_unicode_escape(PyObject *obj)
2578{
2579    char *p;
2580    Py_ssize_t i, size;
2581    const void *data;
2582    unsigned int kind;
2583    _PyBytesWriter writer;
2584
2585    if (PyUnicode_READY(obj))
2586        return NULL;
2587
2588    _PyBytesWriter_Init(&writer);
2589
2590    size = PyUnicode_GET_LENGTH(obj);
2591    data = PyUnicode_DATA(obj);
2592    kind = PyUnicode_KIND(obj);
2593
2594    p = _PyBytesWriter_Alloc(&writer, size);
2595    if (p == NULL)
2596        goto error;
2597    writer.overallocate = 1;
2598
2599    for (i=0; i < size; i++) {
2600        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2601        /* Map 32-bit characters to '\Uxxxxxxxx' */
2602        if (ch >= 0x10000) {
2603            /* -1: subtract 1 preallocated byte */
2604            p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2605            if (p == NULL)
2606                goto error;
2607
2608            *p++ = '\\';
2609            *p++ = 'U';
2610            *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2611            *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2612            *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2613            *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2614            *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2615            *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2616            *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2617            *p++ = Py_hexdigits[ch & 15];
2618        }
2619        /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2620        else if (ch >= 256 ||
2621                 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2622                 ch == 0x1a)
2623        {
2624            /* -1: subtract 1 preallocated byte */
2625            p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2626            if (p == NULL)
2627                goto error;
2628
2629            *p++ = '\\';
2630            *p++ = 'u';
2631            *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2632            *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2633            *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2634            *p++ = Py_hexdigits[ch & 15];
2635        }
2636        /* Copy everything else as-is */
2637        else
2638            *p++ = (char) ch;
2639    }
2640
2641    return _PyBytesWriter_Finish(&writer, p);
2642
2643error:
2644    _PyBytesWriter_Dealloc(&writer);
2645    return NULL;
2646}
2647
2648static int
2649write_unicode_binary(PicklerObject *self, PyObject *obj)
2650{
2651    char header[9];
2652    Py_ssize_t len;
2653    PyObject *encoded = NULL;
2654    Py_ssize_t size;
2655    const char *data;
2656
2657    if (PyUnicode_READY(obj))
2658        return -1;
2659
2660    data = PyUnicode_AsUTF8AndSize(obj, &size);
2661    if (data == NULL) {
2662        /* Issue #8383: for strings with lone surrogates, fallback on the
2663           "surrogatepass" error handler. */
2664        PyErr_Clear();
2665        encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2666        if (encoded == NULL)
2667            return -1;
2668
2669        data = PyBytes_AS_STRING(encoded);
2670        size = PyBytes_GET_SIZE(encoded);
2671    }
2672
2673    assert(size >= 0);
2674    if (size <= 0xff && self->proto >= 4) {
2675        header[0] = SHORT_BINUNICODE;
2676        header[1] = (unsigned char)(size & 0xff);
2677        len = 2;
2678    }
2679    else if ((size_t)size <= 0xffffffffUL) {
2680        header[0] = BINUNICODE;
2681        header[1] = (unsigned char)(size & 0xff);
2682        header[2] = (unsigned char)((size >> 8) & 0xff);
2683        header[3] = (unsigned char)((size >> 16) & 0xff);
2684        header[4] = (unsigned char)((size >> 24) & 0xff);
2685        len = 5;
2686    }
2687    else if (self->proto >= 4) {
2688        header[0] = BINUNICODE8;
2689        _write_size64(header + 1, size);
2690        len = 9;
2691    }
2692    else {
2693        PyErr_SetString(PyExc_OverflowError,
2694                        "serializing a string larger than 4 GiB "
2695                        "requires pickle protocol 4 or higher");
2696        Py_XDECREF(encoded);
2697        return -1;
2698    }
2699
2700    if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2701        Py_XDECREF(encoded);
2702        return -1;
2703    }
2704    Py_XDECREF(encoded);
2705    return 0;
2706}
2707
2708static int
2709save_unicode(PicklerObject *self, PyObject *obj)
2710{
2711    if (self->bin) {
2712        if (write_unicode_binary(self, obj) < 0)
2713            return -1;
2714    }
2715    else {
2716        PyObject *encoded;
2717        Py_ssize_t size;
2718        const char unicode_op = UNICODE;
2719
2720        encoded = raw_unicode_escape(obj);
2721        if (encoded == NULL)
2722            return -1;
2723
2724        if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2725            Py_DECREF(encoded);
2726            return -1;
2727        }
2728
2729        size = PyBytes_GET_SIZE(encoded);
2730        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2731            Py_DECREF(encoded);
2732            return -1;
2733        }
2734        Py_DECREF(encoded);
2735
2736        if (_Pickler_Write(self, "\n", 1) < 0)
2737            return -1;
2738    }
2739    if (memo_put(self, obj) < 0)
2740        return -1;
2741
2742    return 0;
2743}
2744
2745/* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2746static int
2747store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2748{
2749    Py_ssize_t i;
2750
2751    assert(PyTuple_Size(t) == len);
2752
2753    for (i = 0; i < len; i++) {
2754        PyObject *element = PyTuple_GET_ITEM(t, i);
2755
2756        if (element == NULL)
2757            return -1;
2758        if (save(self, element, 0) < 0)
2759            return -1;
2760    }
2761
2762    return 0;
2763}
2764
2765/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2766 * used across protocols to minimize the space needed to pickle them.
2767 * Tuples are also the only builtin immutable type that can be recursive
2768 * (a tuple can be reached from itself), and that requires some subtle
2769 * magic so that it works in all cases.  IOW, this is a long routine.
2770 */
2771static int
2772save_tuple(PicklerObject *self, PyObject *obj)
2773{
2774    Py_ssize_t len, i;
2775
2776    const char mark_op = MARK;
2777    const char tuple_op = TUPLE;
2778    const char pop_op = POP;
2779    const char pop_mark_op = POP_MARK;
2780    const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2781
2782    if ((len = PyTuple_Size(obj)) < 0)
2783        return -1;
2784
2785    if (len == 0) {
2786        char pdata[2];
2787
2788        if (self->proto) {
2789            pdata[0] = EMPTY_TUPLE;
2790            len = 1;
2791        }
2792        else {
2793            pdata[0] = MARK;
2794            pdata[1] = TUPLE;
2795            len = 2;
2796        }
2797        if (_Pickler_Write(self, pdata, len) < 0)
2798            return -1;
2799        return 0;
2800    }
2801
2802    /* The tuple isn't in the memo now.  If it shows up there after
2803     * saving the tuple elements, the tuple must be recursive, in
2804     * which case we'll pop everything we put on the stack, and fetch
2805     * its value from the memo.
2806     */
2807    if (len <= 3 && self->proto >= 2) {
2808        /* Use TUPLE{1,2,3} opcodes. */
2809        if (store_tuple_elements(self, obj, len) < 0)
2810            return -1;
2811
2812        if (PyMemoTable_Get(self->memo, obj)) {
2813            /* pop the len elements */
2814            for (i = 0; i < len; i++)
2815                if (_Pickler_Write(self, &pop_op, 1) < 0)
2816                    return -1;
2817            /* fetch from memo */
2818            if (memo_get(self, obj) < 0)
2819                return -1;
2820
2821            return 0;
2822        }
2823        else { /* Not recursive. */
2824            if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2825                return -1;
2826        }
2827        goto memoize;
2828    }
2829
2830    /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2831     * Generate MARK e1 e2 ... TUPLE
2832     */
2833    if (_Pickler_Write(self, &mark_op, 1) < 0)
2834        return -1;
2835
2836    if (store_tuple_elements(self, obj, len) < 0)
2837        return -1;
2838
2839    if (PyMemoTable_Get(self->memo, obj)) {
2840        /* pop the stack stuff we pushed */
2841        if (self->bin) {
2842            if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2843                return -1;
2844        }
2845        else {
2846            /* Note that we pop one more than len, to remove
2847             * the MARK too.
2848             */
2849            for (i = 0; i <= len; i++)
2850                if (_Pickler_Write(self, &pop_op, 1) < 0)
2851                    return -1;
2852        }
2853        /* fetch from memo */
2854        if (memo_get(self, obj) < 0)
2855            return -1;
2856
2857        return 0;
2858    }
2859    else { /* Not recursive. */
2860        if (_Pickler_Write(self, &tuple_op, 1) < 0)
2861            return -1;
2862    }
2863
2864  memoize:
2865    if (memo_put(self, obj) < 0)
2866        return -1;
2867
2868    return 0;
2869}
2870
2871/* iter is an iterator giving items, and we batch up chunks of
2872 *     MARK item item ... item APPENDS
2873 * opcode sequences.  Calling code should have arranged to first create an
2874 * empty list, or list-like object, for the APPENDS to operate on.
2875 * Returns 0 on success, <0 on error.
2876 */
2877static int
2878batch_list(PicklerObject *self, PyObject *iter)
2879{
2880    PyObject *obj = NULL;
2881    PyObject *firstitem = NULL;
2882    int i, n;
2883
2884    const char mark_op = MARK;
2885    const char append_op = APPEND;
2886    const char appends_op = APPENDS;
2887
2888    assert(iter != NULL);
2889
2890    /* XXX: I think this function could be made faster by avoiding the
2891       iterator interface and fetching objects directly from list using
2892       PyList_GET_ITEM.
2893    */
2894
2895    if (self->proto == 0) {
2896        /* APPENDS isn't available; do one at a time. */
2897        for (;;) {
2898            obj = PyIter_Next(iter);
2899            if (obj == NULL) {
2900                if (PyErr_Occurred())
2901                    return -1;
2902                break;
2903            }
2904            i = save(self, obj, 0);
2905            Py_DECREF(obj);
2906            if (i < 0)
2907                return -1;
2908            if (_Pickler_Write(self, &append_op, 1) < 0)
2909                return -1;
2910        }
2911        return 0;
2912    }
2913
2914    /* proto > 0:  write in batches of BATCHSIZE. */
2915    do {
2916        /* Get first item */
2917        firstitem = PyIter_Next(iter);
2918        if (firstitem == NULL) {
2919            if (PyErr_Occurred())
2920                goto error;
2921
2922            /* nothing more to add */
2923            break;
2924        }
2925
2926        /* Try to get a second item */
2927        obj = PyIter_Next(iter);
2928        if (obj == NULL) {
2929            if (PyErr_Occurred())
2930                goto error;
2931
2932            /* Only one item to write */
2933            if (save(self, firstitem, 0) < 0)
2934                goto error;
2935            if (_Pickler_Write(self, &append_op, 1) < 0)
2936                goto error;
2937            Py_CLEAR(firstitem);
2938            break;
2939        }
2940
2941        /* More than one item to write */
2942
2943        /* Pump out MARK, items, APPENDS. */
2944        if (_Pickler_Write(self, &mark_op, 1) < 0)
2945            goto error;
2946
2947        if (save(self, firstitem, 0) < 0)
2948            goto error;
2949        Py_CLEAR(firstitem);
2950        n = 1;
2951
2952        /* Fetch and save up to BATCHSIZE items */
2953        while (obj) {
2954            if (save(self, obj, 0) < 0)
2955                goto error;
2956            Py_CLEAR(obj);
2957            n += 1;
2958
2959            if (n == BATCHSIZE)
2960                break;
2961
2962            obj = PyIter_Next(iter);
2963            if (obj == NULL) {
2964                if (PyErr_Occurred())
2965                    goto error;
2966                break;
2967            }
2968        }
2969
2970        if (_Pickler_Write(self, &appends_op, 1) < 0)
2971            goto error;
2972
2973    } while (n == BATCHSIZE);
2974    return 0;
2975
2976  error:
2977    Py_XDECREF(firstitem);
2978    Py_XDECREF(obj);
2979    return -1;
2980}
2981
2982/* This is a variant of batch_list() above, specialized for lists (with no
2983 * support for list subclasses). Like batch_list(), we batch up chunks of
2984 *     MARK item item ... item APPENDS
2985 * opcode sequences.  Calling code should have arranged to first create an
2986 * empty list, or list-like object, for the APPENDS to operate on.
2987 * Returns 0 on success, -1 on error.
2988 *
2989 * This version is considerably faster than batch_list(), if less general.
2990 *
2991 * Note that this only works for protocols > 0.
2992 */
2993static int
2994batch_list_exact(PicklerObject *self, PyObject *obj)
2995{
2996    PyObject *item = NULL;
2997    Py_ssize_t this_batch, total;
2998
2999    const char append_op = APPEND;
3000    const char appends_op = APPENDS;
3001    const char mark_op = MARK;
3002
3003    assert(obj != NULL);
3004    assert(self->proto > 0);
3005    assert(PyList_CheckExact(obj));
3006
3007    if (PyList_GET_SIZE(obj) == 1) {
3008        item = PyList_GET_ITEM(obj, 0);
3009        Py_INCREF(item);
3010        int err = save(self, item, 0);
3011        Py_DECREF(item);
3012        if (err < 0)
3013            return -1;
3014        if (_Pickler_Write(self, &append_op, 1) < 0)
3015            return -1;
3016        return 0;
3017    }
3018
3019    /* Write in batches of BATCHSIZE. */
3020    total = 0;
3021    do {
3022        this_batch = 0;
3023        if (_Pickler_Write(self, &mark_op, 1) < 0)
3024            return -1;
3025        while (total < PyList_GET_SIZE(obj)) {
3026            item = PyList_GET_ITEM(obj, total);
3027            Py_INCREF(item);
3028            int err = save(self, item, 0);
3029            Py_DECREF(item);
3030            if (err < 0)
3031                return -1;
3032            total++;
3033            if (++this_batch == BATCHSIZE)
3034                break;
3035        }
3036        if (_Pickler_Write(self, &appends_op, 1) < 0)
3037            return -1;
3038
3039    } while (total < PyList_GET_SIZE(obj));
3040
3041    return 0;
3042}
3043
3044static int
3045save_list(PicklerObject *self, PyObject *obj)
3046{
3047    char header[3];
3048    Py_ssize_t len;
3049    int status = 0;
3050
3051    if (self->fast && !fast_save_enter(self, obj))
3052        goto error;
3053
3054    /* Create an empty list. */
3055    if (self->bin) {
3056        header[0] = EMPTY_LIST;
3057        len = 1;
3058    }
3059    else {
3060        header[0] = MARK;
3061        header[1] = LIST;
3062        len = 2;
3063    }
3064
3065    if (_Pickler_Write(self, header, len) < 0)
3066        goto error;
3067
3068    /* Get list length, and bow out early if empty. */
3069    if ((len = PyList_Size(obj)) < 0)
3070        goto error;
3071
3072    if (memo_put(self, obj) < 0)
3073        goto error;
3074
3075    if (len != 0) {
3076        /* Materialize the list elements. */
3077        if (PyList_CheckExact(obj) && self->proto > 0) {
3078            if (_Py_EnterRecursiveCall(" while pickling an object"))
3079                goto error;
3080            status = batch_list_exact(self, obj);
3081            _Py_LeaveRecursiveCall();
3082        } else {
3083            PyObject *iter = PyObject_GetIter(obj);
3084            if (iter == NULL)
3085                goto error;
3086
3087            if (_Py_EnterRecursiveCall(" while pickling an object")) {
3088                Py_DECREF(iter);
3089                goto error;
3090            }
3091            status = batch_list(self, iter);
3092            _Py_LeaveRecursiveCall();
3093            Py_DECREF(iter);
3094        }
3095    }
3096    if (0) {
3097  error:
3098        status = -1;
3099    }
3100
3101    if (self->fast && !fast_save_leave(self, obj))
3102        status = -1;
3103
3104    return status;
3105}
3106
3107/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3108 *     MARK key value ... key value SETITEMS
3109 * opcode sequences.  Calling code should have arranged to first create an
3110 * empty dict, or dict-like object, for the SETITEMS to operate on.
3111 * Returns 0 on success, <0 on error.
3112 *
3113 * This is very much like batch_list().  The difference between saving
3114 * elements directly, and picking apart two-tuples, is so long-winded at
3115 * the C level, though, that attempts to combine these routines were too
3116 * ugly to bear.
3117 */
3118static int
3119batch_dict(PicklerObject *self, PyObject *iter)
3120{
3121    PyObject *obj = NULL;
3122    PyObject *firstitem = NULL;
3123    int i, n;
3124
3125    const char mark_op = MARK;
3126    const char setitem_op = SETITEM;
3127    const char setitems_op = SETITEMS;
3128
3129    assert(iter != NULL);
3130
3131    if (self->proto == 0) {
3132        /* SETITEMS isn't available; do one at a time. */
3133        for (;;) {
3134            obj = PyIter_Next(iter);
3135            if (obj == NULL) {
3136                if (PyErr_Occurred())
3137                    return -1;
3138                break;
3139            }
3140            if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3141                PyErr_SetString(PyExc_TypeError, "dict items "
3142                                "iterator must return 2-tuples");
3143                return -1;
3144            }
3145            i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3146            if (i >= 0)
3147                i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3148            Py_DECREF(obj);
3149            if (i < 0)
3150                return -1;
3151            if (_Pickler_Write(self, &setitem_op, 1) < 0)
3152                return -1;
3153        }
3154        return 0;
3155    }
3156
3157    /* proto > 0:  write in batches of BATCHSIZE. */
3158    do {
3159        /* Get first item */
3160        firstitem = PyIter_Next(iter);
3161        if (firstitem == NULL) {
3162            if (PyErr_Occurred())
3163                goto error;
3164
3165            /* nothing more to add */
3166            break;
3167        }
3168        if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3169            PyErr_SetString(PyExc_TypeError, "dict items "
3170                                "iterator must return 2-tuples");
3171            goto error;
3172        }
3173
3174        /* Try to get a second item */
3175        obj = PyIter_Next(iter);
3176        if (obj == NULL) {
3177            if (PyErr_Occurred())
3178                goto error;
3179
3180            /* Only one item to write */
3181            if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3182                goto error;
3183            if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3184                goto error;
3185            if (_Pickler_Write(self, &setitem_op, 1) < 0)
3186                goto error;
3187            Py_CLEAR(firstitem);
3188            break;
3189        }
3190
3191        /* More than one item to write */
3192
3193        /* Pump out MARK, items, SETITEMS. */
3194        if (_Pickler_Write(self, &mark_op, 1) < 0)
3195            goto error;
3196
3197        if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3198            goto error;
3199        if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3200            goto error;
3201        Py_CLEAR(firstitem);
3202        n = 1;
3203
3204        /* Fetch and save up to BATCHSIZE items */
3205        while (obj) {
3206            if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3207                PyErr_SetString(PyExc_TypeError, "dict items "
3208                    "iterator must return 2-tuples");
3209                goto error;
3210            }
3211            if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3212                save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3213                goto error;
3214            Py_CLEAR(obj);
3215            n += 1;
3216
3217            if (n == BATCHSIZE)
3218                break;
3219
3220            obj = PyIter_Next(iter);
3221            if (obj == NULL) {
3222                if (PyErr_Occurred())
3223                    goto error;
3224                break;
3225            }
3226        }
3227
3228        if (_Pickler_Write(self, &setitems_op, 1) < 0)
3229            goto error;
3230
3231    } while (n == BATCHSIZE);
3232    return 0;
3233
3234  error:
3235    Py_XDECREF(firstitem);
3236    Py_XDECREF(obj);
3237    return -1;
3238}
3239
3240/* This is a variant of batch_dict() above that specializes for dicts, with no
3241 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3242 *     MARK key value ... key value SETITEMS
3243 * opcode sequences.  Calling code should have arranged to first create an
3244 * empty dict, or dict-like object, for the SETITEMS to operate on.
3245 * Returns 0 on success, -1 on error.
3246 *
3247 * Note that this currently doesn't work for protocol 0.
3248 */
3249static int
3250batch_dict_exact(PicklerObject *self, PyObject *obj)
3251{
3252    PyObject *key = NULL, *value = NULL;
3253    int i;
3254    Py_ssize_t dict_size, ppos = 0;
3255
3256    const char mark_op = MARK;
3257    const char setitem_op = SETITEM;
3258    const char setitems_op = SETITEMS;
3259
3260    assert(obj != NULL && PyDict_CheckExact(obj));
3261    assert(self->proto > 0);
3262
3263    dict_size = PyDict_GET_SIZE(obj);
3264
3265    /* Special-case len(d) == 1 to save space. */
3266    if (dict_size == 1) {
3267        PyDict_Next(obj, &ppos, &key, &value);
3268        Py_INCREF(key);
3269        Py_INCREF(value);
3270        if (save(self, key, 0) < 0) {
3271            goto error;
3272        }
3273        if (save(self, value, 0) < 0) {
3274            goto error;
3275        }
3276        Py_CLEAR(key);
3277        Py_CLEAR(value);
3278        if (_Pickler_Write(self, &setitem_op, 1) < 0)
3279            return -1;
3280        return 0;
3281    }
3282
3283    /* Write in batches of BATCHSIZE. */
3284    do {
3285        i = 0;
3286        if (_Pickler_Write(self, &mark_op, 1) < 0)
3287            return -1;
3288        while (PyDict_Next(obj, &ppos, &key, &value)) {
3289            Py_INCREF(key);
3290            Py_INCREF(value);
3291            if (save(self, key, 0) < 0) {
3292                goto error;
3293            }
3294            if (save(self, value, 0) < 0) {
3295                goto error;
3296            }
3297            Py_CLEAR(key);
3298            Py_CLEAR(value);
3299            if (++i == BATCHSIZE)
3300                break;
3301        }
3302        if (_Pickler_Write(self, &setitems_op, 1) < 0)
3303            return -1;
3304        if (PyDict_GET_SIZE(obj) != dict_size) {
3305            PyErr_Format(
3306                PyExc_RuntimeError,
3307                "dictionary changed size during iteration");
3308            return -1;
3309        }
3310
3311    } while (i == BATCHSIZE);
3312    return 0;
3313error:
3314    Py_XDECREF(key);
3315    Py_XDECREF(value);
3316    return -1;
3317}
3318
3319static int
3320save_dict(PicklerObject *self, PyObject *obj)
3321{
3322    PyObject *items, *iter;
3323    char header[3];
3324    Py_ssize_t len;
3325    int status = 0;
3326    assert(PyDict_Check(obj));
3327
3328    if (self->fast && !fast_save_enter(self, obj))
3329        goto error;
3330
3331    /* Create an empty dict. */
3332    if (self->bin) {
3333        header[0] = EMPTY_DICT;
3334        len = 1;
3335    }
3336    else {
3337        header[0] = MARK;
3338        header[1] = DICT;
3339        len = 2;
3340    }
3341
3342    if (_Pickler_Write(self, header, len) < 0)
3343        goto error;
3344
3345    if (memo_put(self, obj) < 0)
3346        goto error;
3347
3348    if (PyDict_GET_SIZE(obj)) {
3349        /* Save the dict items. */
3350        if (PyDict_CheckExact(obj) && self->proto > 0) {
3351            /* We can take certain shortcuts if we know this is a dict and
3352               not a dict subclass. */
3353            if (_Py_EnterRecursiveCall(" while pickling an object"))
3354                goto error;
3355            status = batch_dict_exact(self, obj);
3356            _Py_LeaveRecursiveCall();
3357        } else {
3358            items = PyObject_CallMethodNoArgs(obj, &_Py_ID(items));
3359            if (items == NULL)
3360                goto error;
3361            iter = PyObject_GetIter(items);
3362            Py_DECREF(items);
3363            if (iter == NULL)
3364                goto error;
3365            if (_Py_EnterRecursiveCall(" while pickling an object")) {
3366                Py_DECREF(iter);
3367                goto error;
3368            }
3369            status = batch_dict(self, iter);
3370            _Py_LeaveRecursiveCall();
3371            Py_DECREF(iter);
3372        }
3373    }
3374
3375    if (0) {
3376  error:
3377        status = -1;
3378    }
3379
3380    if (self->fast && !fast_save_leave(self, obj))
3381        status = -1;
3382
3383    return status;
3384}
3385
3386static int
3387save_set(PicklerObject *self, PyObject *obj)
3388{
3389    PyObject *item;
3390    int i;
3391    Py_ssize_t set_size, ppos = 0;
3392    Py_hash_t hash;
3393
3394    const char empty_set_op = EMPTY_SET;
3395    const char mark_op = MARK;
3396    const char additems_op = ADDITEMS;
3397
3398    if (self->proto < 4) {
3399        PyObject *items;
3400        PyObject *reduce_value;
3401        int status;
3402
3403        items = PySequence_List(obj);
3404        if (items == NULL) {
3405            return -1;
3406        }
3407        reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3408        Py_DECREF(items);
3409        if (reduce_value == NULL) {
3410            return -1;
3411        }
3412        /* save_reduce() will memoize the object automatically. */
3413        status = save_reduce(self, reduce_value, obj);
3414        Py_DECREF(reduce_value);
3415        return status;
3416    }
3417
3418    if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3419        return -1;
3420
3421    if (memo_put(self, obj) < 0)
3422        return -1;
3423
3424    set_size = PySet_GET_SIZE(obj);
3425    if (set_size == 0)
3426        return 0;  /* nothing to do */
3427
3428    /* Write in batches of BATCHSIZE. */
3429    do {
3430        i = 0;
3431        if (_Pickler_Write(self, &mark_op, 1) < 0)
3432            return -1;
3433        while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3434            Py_INCREF(item);
3435            int err = save(self, item, 0);
3436            Py_CLEAR(item);
3437            if (err < 0)
3438                return -1;
3439            if (++i == BATCHSIZE)
3440                break;
3441        }
3442        if (_Pickler_Write(self, &additems_op, 1) < 0)
3443            return -1;
3444        if (PySet_GET_SIZE(obj) != set_size) {
3445            PyErr_Format(
3446                PyExc_RuntimeError,
3447                "set changed size during iteration");
3448            return -1;
3449        }
3450    } while (i == BATCHSIZE);
3451
3452    return 0;
3453}
3454
3455static int
3456save_frozenset(PicklerObject *self, PyObject *obj)
3457{
3458    PyObject *iter;
3459
3460    const char mark_op = MARK;
3461    const char frozenset_op = FROZENSET;
3462
3463    if (self->fast && !fast_save_enter(self, obj))
3464        return -1;
3465
3466    if (self->proto < 4) {
3467        PyObject *items;
3468        PyObject *reduce_value;
3469        int status;
3470
3471        items = PySequence_List(obj);
3472        if (items == NULL) {
3473            return -1;
3474        }
3475        reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3476                                     items);
3477        Py_DECREF(items);
3478        if (reduce_value == NULL) {
3479            return -1;
3480        }
3481        /* save_reduce() will memoize the object automatically. */
3482        status = save_reduce(self, reduce_value, obj);
3483        Py_DECREF(reduce_value);
3484        return status;
3485    }
3486
3487    if (_Pickler_Write(self, &mark_op, 1) < 0)
3488        return -1;
3489
3490    iter = PyObject_GetIter(obj);
3491    if (iter == NULL) {
3492        return -1;
3493    }
3494    for (;;) {
3495        PyObject *item;
3496
3497        item = PyIter_Next(iter);
3498        if (item == NULL) {
3499            if (PyErr_Occurred()) {
3500                Py_DECREF(iter);
3501                return -1;
3502            }
3503            break;
3504        }
3505        if (save(self, item, 0) < 0) {
3506            Py_DECREF(item);
3507            Py_DECREF(iter);
3508            return -1;
3509        }
3510        Py_DECREF(item);
3511    }
3512    Py_DECREF(iter);
3513
3514    /* If the object is already in the memo, this means it is
3515       recursive. In this case, throw away everything we put on the
3516       stack, and fetch the object back from the memo. */
3517    if (PyMemoTable_Get(self->memo, obj)) {
3518        const char pop_mark_op = POP_MARK;
3519
3520        if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3521            return -1;
3522        if (memo_get(self, obj) < 0)
3523            return -1;
3524        return 0;
3525    }
3526
3527    if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3528        return -1;
3529    if (memo_put(self, obj) < 0)
3530        return -1;
3531
3532    return 0;
3533}
3534
3535static int
3536fix_imports(PyObject **module_name, PyObject **global_name)
3537{
3538    PyObject *key;
3539    PyObject *item;
3540    PickleState *st = _Pickle_GetGlobalState();
3541
3542    key = PyTuple_Pack(2, *module_name, *global_name);
3543    if (key == NULL)
3544        return -1;
3545    item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3546    Py_DECREF(key);
3547    if (item) {
3548        PyObject *fixed_module_name;
3549        PyObject *fixed_global_name;
3550
3551        if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3552            PyErr_Format(PyExc_RuntimeError,
3553                         "_compat_pickle.REVERSE_NAME_MAPPING values "
3554                         "should be 2-tuples, not %.200s",
3555                         Py_TYPE(item)->tp_name);
3556            return -1;
3557        }
3558        fixed_module_name = PyTuple_GET_ITEM(item, 0);
3559        fixed_global_name = PyTuple_GET_ITEM(item, 1);
3560        if (!PyUnicode_Check(fixed_module_name) ||
3561            !PyUnicode_Check(fixed_global_name)) {
3562            PyErr_Format(PyExc_RuntimeError,
3563                         "_compat_pickle.REVERSE_NAME_MAPPING values "
3564                         "should be pairs of str, not (%.200s, %.200s)",
3565                         Py_TYPE(fixed_module_name)->tp_name,
3566                         Py_TYPE(fixed_global_name)->tp_name);
3567            return -1;
3568        }
3569
3570        Py_CLEAR(*module_name);
3571        Py_CLEAR(*global_name);
3572        Py_INCREF(fixed_module_name);
3573        Py_INCREF(fixed_global_name);
3574        *module_name = fixed_module_name;
3575        *global_name = fixed_global_name;
3576        return 0;
3577    }
3578    else if (PyErr_Occurred()) {
3579        return -1;
3580    }
3581
3582    item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3583    if (item) {
3584        if (!PyUnicode_Check(item)) {
3585            PyErr_Format(PyExc_RuntimeError,
3586                         "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3587                         "should be strings, not %.200s",
3588                         Py_TYPE(item)->tp_name);
3589            return -1;
3590        }
3591        Py_INCREF(item);
3592        Py_XSETREF(*module_name, item);
3593    }
3594    else if (PyErr_Occurred()) {
3595        return -1;
3596    }
3597
3598    return 0;
3599}
3600
3601static int
3602save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3603{
3604    PyObject *global_name = NULL;
3605    PyObject *module_name = NULL;
3606    PyObject *module = NULL;
3607    PyObject *parent = NULL;
3608    PyObject *dotted_path = NULL;
3609    PyObject *lastname = NULL;
3610    PyObject *cls;
3611    PickleState *st = _Pickle_GetGlobalState();
3612    int status = 0;
3613
3614    const char global_op = GLOBAL;
3615
3616    if (name) {
3617        Py_INCREF(name);
3618        global_name = name;
3619    }
3620    else {
3621        if (_PyObject_LookupAttr(obj, &_Py_ID(__qualname__), &global_name) < 0)
3622            goto error;
3623        if (global_name == NULL) {
3624            global_name = PyObject_GetAttr(obj, &_Py_ID(__name__));
3625            if (global_name == NULL)
3626                goto error;
3627        }
3628    }
3629
3630    dotted_path = get_dotted_path(module, global_name);
3631    if (dotted_path == NULL)
3632        goto error;
3633    module_name = whichmodule(obj, dotted_path);
3634    if (module_name == NULL)
3635        goto error;
3636
3637    /* XXX: Change to use the import C API directly with level=0 to disallow
3638       relative imports.
3639
3640       XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3641       builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3642       custom import functions (IMHO, this would be a nice security
3643       feature). The import C API would need to be extended to support the
3644       extra parameters of __import__ to fix that. */
3645    module = PyImport_Import(module_name);
3646    if (module == NULL) {
3647        PyErr_Format(st->PicklingError,
3648                     "Can't pickle %R: import of module %R failed",
3649                     obj, module_name);
3650        goto error;
3651    }
3652    lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3653    Py_INCREF(lastname);
3654    cls = get_deep_attribute(module, dotted_path, &parent);
3655    Py_CLEAR(dotted_path);
3656    if (cls == NULL) {
3657        PyErr_Format(st->PicklingError,
3658                     "Can't pickle %R: attribute lookup %S on %S failed",
3659                     obj, global_name, module_name);
3660        goto error;
3661    }
3662    if (cls != obj) {
3663        Py_DECREF(cls);
3664        PyErr_Format(st->PicklingError,
3665                     "Can't pickle %R: it's not the same object as %S.%S",
3666                     obj, module_name, global_name);
3667        goto error;
3668    }
3669    Py_DECREF(cls);
3670
3671    if (self->proto >= 2) {
3672        /* See whether this is in the extension registry, and if
3673         * so generate an EXT opcode.
3674         */
3675        PyObject *extension_key;
3676        PyObject *code_obj;      /* extension code as Python object */
3677        long code;               /* extension code as C value */
3678        char pdata[5];
3679        Py_ssize_t n;
3680
3681        extension_key = PyTuple_Pack(2, module_name, global_name);
3682        if (extension_key == NULL) {
3683            goto error;
3684        }
3685        code_obj = PyDict_GetItemWithError(st->extension_registry,
3686                                           extension_key);
3687        Py_DECREF(extension_key);
3688        /* The object is not registered in the extension registry.
3689           This is the most likely code path. */
3690        if (code_obj == NULL) {
3691            if (PyErr_Occurred()) {
3692                goto error;
3693            }
3694            goto gen_global;
3695        }
3696
3697        /* XXX: pickle.py doesn't check neither the type, nor the range
3698           of the value returned by the extension_registry. It should for
3699           consistency. */
3700
3701        /* Verify code_obj has the right type and value. */
3702        if (!PyLong_Check(code_obj)) {
3703            PyErr_Format(st->PicklingError,
3704                         "Can't pickle %R: extension code %R isn't an integer",
3705                         obj, code_obj);
3706            goto error;
3707        }
3708        code = PyLong_AS_LONG(code_obj);
3709        if (code <= 0 || code > 0x7fffffffL) {
3710            if (!PyErr_Occurred())
3711                PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3712                             "code %ld is out of range", obj, code);
3713            goto error;
3714        }
3715
3716        /* Generate an EXT opcode. */
3717        if (code <= 0xff) {
3718            pdata[0] = EXT1;
3719            pdata[1] = (unsigned char)code;
3720            n = 2;
3721        }
3722        else if (code <= 0xffff) {
3723            pdata[0] = EXT2;
3724            pdata[1] = (unsigned char)(code & 0xff);
3725            pdata[2] = (unsigned char)((code >> 8) & 0xff);
3726            n = 3;
3727        }
3728        else {
3729            pdata[0] = EXT4;
3730            pdata[1] = (unsigned char)(code & 0xff);
3731            pdata[2] = (unsigned char)((code >> 8) & 0xff);
3732            pdata[3] = (unsigned char)((code >> 16) & 0xff);
3733            pdata[4] = (unsigned char)((code >> 24) & 0xff);
3734            n = 5;
3735        }
3736
3737        if (_Pickler_Write(self, pdata, n) < 0)
3738            goto error;
3739    }
3740    else {
3741  gen_global:
3742        if (parent == module) {
3743            Py_INCREF(lastname);
3744            Py_DECREF(global_name);
3745            global_name = lastname;
3746        }
3747        if (self->proto >= 4) {
3748            const char stack_global_op = STACK_GLOBAL;
3749
3750            if (save(self, module_name, 0) < 0)
3751                goto error;
3752            if (save(self, global_name, 0) < 0)
3753                goto error;
3754
3755            if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3756                goto error;
3757        }
3758        else if (parent != module) {
3759            PickleState *st = _Pickle_GetGlobalState();
3760            PyObject *reduce_value = Py_BuildValue("(O(OO))",
3761                                        st->getattr, parent, lastname);
3762            if (reduce_value == NULL)
3763                goto error;
3764            status = save_reduce(self, reduce_value, NULL);
3765            Py_DECREF(reduce_value);
3766            if (status < 0)
3767                goto error;
3768        }
3769        else {
3770            /* Generate a normal global opcode if we are using a pickle
3771               protocol < 4, or if the object is not registered in the
3772               extension registry. */
3773            PyObject *encoded;
3774            PyObject *(*unicode_encoder)(PyObject *);
3775
3776            if (_Pickler_Write(self, &global_op, 1) < 0)
3777                goto error;
3778
3779            /* For protocol < 3 and if the user didn't request against doing
3780               so, we convert module names to the old 2.x module names. */
3781            if (self->proto < 3 && self->fix_imports) {
3782                if (fix_imports(&module_name, &global_name) < 0) {
3783                    goto error;
3784                }
3785            }
3786
3787            /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3788               both the module name and the global name using UTF-8. We do so
3789               only when we are using the pickle protocol newer than version
3790               3. This is to ensure compatibility with older Unpickler running
3791               on Python 2.x. */
3792            if (self->proto == 3) {
3793                unicode_encoder = PyUnicode_AsUTF8String;
3794            }
3795            else {
3796                unicode_encoder = PyUnicode_AsASCIIString;
3797            }
3798            encoded = unicode_encoder(module_name);
3799            if (encoded == NULL) {
3800                if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3801                    PyErr_Format(st->PicklingError,
3802                                 "can't pickle module identifier '%S' using "
3803                                 "pickle protocol %i",
3804                                 module_name, self->proto);
3805                goto error;
3806            }
3807            if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3808                               PyBytes_GET_SIZE(encoded)) < 0) {
3809                Py_DECREF(encoded);
3810                goto error;
3811            }
3812            Py_DECREF(encoded);
3813            if(_Pickler_Write(self, "\n", 1) < 0)
3814                goto error;
3815
3816            /* Save the name of the module. */
3817            encoded = unicode_encoder(global_name);
3818            if (encoded == NULL) {
3819                if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3820                    PyErr_Format(st->PicklingError,
3821                                 "can't pickle global identifier '%S' using "
3822                                 "pickle protocol %i",
3823                                 global_name, self->proto);
3824                goto error;
3825            }
3826            if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3827                               PyBytes_GET_SIZE(encoded)) < 0) {
3828                Py_DECREF(encoded);
3829                goto error;
3830            }
3831            Py_DECREF(encoded);
3832            if (_Pickler_Write(self, "\n", 1) < 0)
3833                goto error;
3834        }
3835        /* Memoize the object. */
3836        if (memo_put(self, obj) < 0)
3837            goto error;
3838    }
3839
3840    if (0) {
3841  error:
3842        status = -1;
3843    }
3844    Py_XDECREF(module_name);
3845    Py_XDECREF(global_name);
3846    Py_XDECREF(module);
3847    Py_XDECREF(parent);
3848    Py_XDECREF(dotted_path);
3849    Py_XDECREF(lastname);
3850
3851    return status;
3852}
3853
3854static int
3855save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3856{
3857    PyObject *reduce_value;
3858    int status;
3859
3860    reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3861    if (reduce_value == NULL) {
3862        return -1;
3863    }
3864    status = save_reduce(self, reduce_value, obj);
3865    Py_DECREF(reduce_value);
3866    return status;
3867}
3868
3869static int
3870save_type(PicklerObject *self, PyObject *obj)
3871{
3872    if (obj == (PyObject *)&_PyNone_Type) {
3873        return save_singleton_type(self, obj, Py_None);
3874    }
3875    else if (obj == (PyObject *)&PyEllipsis_Type) {
3876        return save_singleton_type(self, obj, Py_Ellipsis);
3877    }
3878    else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3879        return save_singleton_type(self, obj, Py_NotImplemented);
3880    }
3881    return save_global(self, obj, NULL);
3882}
3883
3884static int
3885save_pers(PicklerObject *self, PyObject *obj)
3886{
3887    PyObject *pid = NULL;
3888    int status = 0;
3889
3890    const char persid_op = PERSID;
3891    const char binpersid_op = BINPERSID;
3892
3893    pid = call_method(self->pers_func, self->pers_func_self, obj);
3894    if (pid == NULL)
3895        return -1;
3896
3897    if (pid != Py_None) {
3898        if (self->bin) {
3899            if (save(self, pid, 1) < 0 ||
3900                _Pickler_Write(self, &binpersid_op, 1) < 0)
3901                goto error;
3902        }
3903        else {
3904            PyObject *pid_str;
3905
3906            pid_str = PyObject_Str(pid);
3907            if (pid_str == NULL)
3908                goto error;
3909
3910            /* XXX: Should it check whether the pid contains embedded
3911               newlines? */
3912            if (!PyUnicode_IS_ASCII(pid_str)) {
3913                PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3914                                "persistent IDs in protocol 0 must be "
3915                                "ASCII strings");
3916                Py_DECREF(pid_str);
3917                goto error;
3918            }
3919
3920            if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3921                _Pickler_Write(self, PyUnicode_DATA(pid_str),
3922                               PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3923                _Pickler_Write(self, "\n", 1) < 0) {
3924                Py_DECREF(pid_str);
3925                goto error;
3926            }
3927            Py_DECREF(pid_str);
3928        }
3929        status = 1;
3930    }
3931
3932    if (0) {
3933  error:
3934        status = -1;
3935    }
3936    Py_XDECREF(pid);
3937
3938    return status;
3939}
3940
3941static PyObject *
3942get_class(PyObject *obj)
3943{
3944    PyObject *cls;
3945
3946    if (_PyObject_LookupAttr(obj, &_Py_ID(__class__), &cls) == 0) {
3947        cls = (PyObject *) Py_TYPE(obj);
3948        Py_INCREF(cls);
3949    }
3950    return cls;
3951}
3952
3953/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3954 * appropriate __reduce__ method for obj.
3955 */
3956static int
3957save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3958{
3959    PyObject *callable;
3960    PyObject *argtup;
3961    PyObject *state = NULL;
3962    PyObject *listitems = Py_None;
3963    PyObject *dictitems = Py_None;
3964    PyObject *state_setter = Py_None;
3965    PickleState *st = _Pickle_GetGlobalState();
3966    Py_ssize_t size;
3967    int use_newobj = 0, use_newobj_ex = 0;
3968
3969    const char reduce_op = REDUCE;
3970    const char build_op = BUILD;
3971    const char newobj_op = NEWOBJ;
3972    const char newobj_ex_op = NEWOBJ_EX;
3973
3974    size = PyTuple_Size(args);
3975    if (size < 2 || size > 6) {
3976        PyErr_SetString(st->PicklingError, "tuple returned by "
3977                        "__reduce__ must contain 2 through 6 elements");
3978        return -1;
3979    }
3980
3981    if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3982                           &callable, &argtup, &state, &listitems, &dictitems,
3983                           &state_setter))
3984        return -1;
3985
3986    if (!PyCallable_Check(callable)) {
3987        PyErr_SetString(st->PicklingError, "first item of the tuple "
3988                        "returned by __reduce__ must be callable");
3989        return -1;
3990    }
3991    if (!PyTuple_Check(argtup)) {
3992        PyErr_SetString(st->PicklingError, "second item of the tuple "
3993                        "returned by __reduce__ must be a tuple");
3994        return -1;
3995    }
3996
3997    if (state == Py_None)
3998        state = NULL;
3999
4000    if (listitems == Py_None)
4001        listitems = NULL;
4002    else if (!PyIter_Check(listitems)) {
4003        PyErr_Format(st->PicklingError, "fourth element of the tuple "
4004                     "returned by __reduce__ must be an iterator, not %s",
4005                     Py_TYPE(listitems)->tp_name);
4006        return -1;
4007    }
4008
4009    if (dictitems == Py_None)
4010        dictitems = NULL;
4011    else if (!PyIter_Check(dictitems)) {
4012        PyErr_Format(st->PicklingError, "fifth element of the tuple "
4013                     "returned by __reduce__ must be an iterator, not %s",
4014                     Py_TYPE(dictitems)->tp_name);
4015        return -1;
4016    }
4017
4018    if (state_setter == Py_None)
4019        state_setter = NULL;
4020    else if (!PyCallable_Check(state_setter)) {
4021        PyErr_Format(st->PicklingError, "sixth element of the tuple "
4022                     "returned by __reduce__ must be a function, not %s",
4023                     Py_TYPE(state_setter)->tp_name);
4024        return -1;
4025    }
4026
4027    if (self->proto >= 2) {
4028        PyObject *name;
4029
4030        if (_PyObject_LookupAttr(callable, &_Py_ID(__name__), &name) < 0) {
4031            return -1;
4032        }
4033        if (name != NULL && PyUnicode_Check(name)) {
4034            use_newobj_ex = _PyUnicode_Equal(name, &_Py_ID(__newobj_ex__));
4035            if (!use_newobj_ex) {
4036                use_newobj = _PyUnicode_Equal(name, &_Py_ID(__newobj__));
4037            }
4038        }
4039        Py_XDECREF(name);
4040    }
4041
4042    if (use_newobj_ex) {
4043        PyObject *cls;
4044        PyObject *args;
4045        PyObject *kwargs;
4046
4047        if (PyTuple_GET_SIZE(argtup) != 3) {
4048            PyErr_Format(st->PicklingError,
4049                         "length of the NEWOBJ_EX argument tuple must be "
4050                         "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4051            return -1;
4052        }
4053
4054        cls = PyTuple_GET_ITEM(argtup, 0);
4055        if (!PyType_Check(cls)) {
4056            PyErr_Format(st->PicklingError,
4057                         "first item from NEWOBJ_EX argument tuple must "
4058                         "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4059            return -1;
4060        }
4061        args = PyTuple_GET_ITEM(argtup, 1);
4062        if (!PyTuple_Check(args)) {
4063            PyErr_Format(st->PicklingError,
4064                         "second item from NEWOBJ_EX argument tuple must "
4065                         "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4066            return -1;
4067        }
4068        kwargs = PyTuple_GET_ITEM(argtup, 2);
4069        if (!PyDict_Check(kwargs)) {
4070            PyErr_Format(st->PicklingError,
4071                         "third item from NEWOBJ_EX argument tuple must "
4072                         "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4073            return -1;
4074        }
4075
4076        if (self->proto >= 4) {
4077            if (save(self, cls, 0) < 0 ||
4078                save(self, args, 0) < 0 ||
4079                save(self, kwargs, 0) < 0 ||
4080                _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4081                return -1;
4082            }
4083        }
4084        else {
4085            PyObject *newargs;
4086            PyObject *cls_new;
4087            Py_ssize_t i;
4088
4089            newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4090            if (newargs == NULL)
4091                return -1;
4092
4093            cls_new = PyObject_GetAttr(cls, &_Py_ID(__new__));
4094            if (cls_new == NULL) {
4095                Py_DECREF(newargs);
4096                return -1;
4097            }
4098            PyTuple_SET_ITEM(newargs, 0, cls_new);
4099            Py_INCREF(cls);
4100            PyTuple_SET_ITEM(newargs, 1, cls);
4101            for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4102                PyObject *item = PyTuple_GET_ITEM(args, i);
4103                Py_INCREF(item);
4104                PyTuple_SET_ITEM(newargs, i + 2, item);
4105            }
4106
4107            callable = PyObject_Call(st->partial, newargs, kwargs);
4108            Py_DECREF(newargs);
4109            if (callable == NULL)
4110                return -1;
4111
4112            newargs = PyTuple_New(0);
4113            if (newargs == NULL) {
4114                Py_DECREF(callable);
4115                return -1;
4116            }
4117
4118            if (save(self, callable, 0) < 0 ||
4119                save(self, newargs, 0) < 0 ||
4120                _Pickler_Write(self, &reduce_op, 1) < 0) {
4121                Py_DECREF(newargs);
4122                Py_DECREF(callable);
4123                return -1;
4124            }
4125            Py_DECREF(newargs);
4126            Py_DECREF(callable);
4127        }
4128    }
4129    else if (use_newobj) {
4130        PyObject *cls;
4131        PyObject *newargtup;
4132        PyObject *obj_class;
4133        int p;
4134
4135        /* Sanity checks. */
4136        if (PyTuple_GET_SIZE(argtup) < 1) {
4137            PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4138            return -1;
4139        }
4140
4141        cls = PyTuple_GET_ITEM(argtup, 0);
4142        if (!PyType_Check(cls)) {
4143            PyErr_SetString(st->PicklingError, "args[0] from "
4144                            "__newobj__ args is not a type");
4145            return -1;
4146        }
4147
4148        if (obj != NULL) {
4149            obj_class = get_class(obj);
4150            if (obj_class == NULL) {
4151                return -1;
4152            }
4153            p = obj_class != cls;
4154            Py_DECREF(obj_class);
4155            if (p) {
4156                PyErr_SetString(st->PicklingError, "args[0] from "
4157                                "__newobj__ args has the wrong class");
4158                return -1;
4159            }
4160        }
4161        /* XXX: These calls save() are prone to infinite recursion. Imagine
4162           what happen if the value returned by the __reduce__() method of
4163           some extension type contains another object of the same type. Ouch!
4164
4165           Here is a quick example, that I ran into, to illustrate what I
4166           mean:
4167
4168             >>> import pickle, copyreg
4169             >>> copyreg.dispatch_table.pop(complex)
4170             >>> pickle.dumps(1+2j)
4171             Traceback (most recent call last):
4172               ...
4173             RecursionError: maximum recursion depth exceeded
4174
4175           Removing the complex class from copyreg.dispatch_table made the
4176           __reduce_ex__() method emit another complex object:
4177
4178             >>> (1+1j).__reduce_ex__(2)
4179             (<function __newobj__ at 0xb7b71c3c>,
4180               (<class 'complex'>, (1+1j)), None, None, None)
4181
4182           Thus when save() was called on newargstup (the 2nd item) recursion
4183           ensued. Of course, the bug was in the complex class which had a
4184           broken __getnewargs__() that emitted another complex object. But,
4185           the point, here, is it is quite easy to end up with a broken reduce
4186           function. */
4187
4188        /* Save the class and its __new__ arguments. */
4189        if (save(self, cls, 0) < 0)
4190            return -1;
4191
4192        newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4193        if (newargtup == NULL)
4194            return -1;
4195
4196        p = save(self, newargtup, 0);
4197        Py_DECREF(newargtup);
4198        if (p < 0)
4199            return -1;
4200
4201        /* Add NEWOBJ opcode. */
4202        if (_Pickler_Write(self, &newobj_op, 1) < 0)
4203            return -1;
4204    }
4205    else { /* Not using NEWOBJ. */
4206        if (save(self, callable, 0) < 0 ||
4207            save(self, argtup, 0) < 0 ||
4208            _Pickler_Write(self, &reduce_op, 1) < 0)
4209            return -1;
4210    }
4211
4212    /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4213       the caller do not want to memoize the object. Not particularly useful,
4214       but that is to mimic the behavior save_reduce() in pickle.py when
4215       obj is None. */
4216    if (obj != NULL) {
4217        /* If the object is already in the memo, this means it is
4218           recursive. In this case, throw away everything we put on the
4219           stack, and fetch the object back from the memo. */
4220        if (PyMemoTable_Get(self->memo, obj)) {
4221            const char pop_op = POP;
4222
4223            if (_Pickler_Write(self, &pop_op, 1) < 0)
4224                return -1;
4225            if (memo_get(self, obj) < 0)
4226                return -1;
4227
4228            return 0;
4229        }
4230        else if (memo_put(self, obj) < 0)
4231            return -1;
4232    }
4233
4234    if (listitems && batch_list(self, listitems) < 0)
4235        return -1;
4236
4237    if (dictitems && batch_dict(self, dictitems) < 0)
4238        return -1;
4239
4240    if (state) {
4241        if (state_setter == NULL) {
4242            if (save(self, state, 0) < 0 ||
4243                _Pickler_Write(self, &build_op, 1) < 0)
4244                return -1;
4245        }
4246        else {
4247
4248            /* If a state_setter is specified, call it instead of load_build to
4249             * update obj's with its previous state.
4250             * The first 4 save/write instructions push state_setter and its
4251             * tuple of expected arguments (obj, state) onto the stack. The
4252             * REDUCE opcode triggers the state_setter(obj, state) function
4253             * call. Finally, because state-updating routines only do in-place
4254             * modification, the whole operation has to be stack-transparent.
4255             * Thus, we finally pop the call's output from the stack.*/
4256
4257            const char tupletwo_op = TUPLE2;
4258            const char pop_op = POP;
4259            if (save(self, state_setter, 0) < 0 ||
4260                save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4261                _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4262                _Pickler_Write(self, &reduce_op, 1) < 0 ||
4263                _Pickler_Write(self, &pop_op, 1) < 0)
4264                return -1;
4265        }
4266    }
4267    return 0;
4268}
4269
4270static int
4271save(PicklerObject *self, PyObject *obj, int pers_save)
4272{
4273    PyTypeObject *type;
4274    PyObject *reduce_func = NULL;
4275    PyObject *reduce_value = NULL;
4276    int status = 0;
4277
4278    if (_Pickler_OpcodeBoundary(self) < 0)
4279        return -1;
4280
4281    /* The extra pers_save argument is necessary to avoid calling save_pers()
4282       on its returned object. */
4283    if (!pers_save && self->pers_func) {
4284        /* save_pers() returns:
4285            -1   to signal an error;
4286             0   if it did nothing successfully;
4287             1   if a persistent id was saved.
4288         */
4289        if ((status = save_pers(self, obj)) != 0)
4290            return status;
4291    }
4292
4293    type = Py_TYPE(obj);
4294
4295    /* The old cPickle had an optimization that used switch-case statement
4296       dispatching on the first letter of the type name.  This has was removed
4297       since benchmarks shown that this optimization was actually slowing
4298       things down. */
4299
4300    /* Atom types; these aren't memoized, so don't check the memo. */
4301
4302    if (obj == Py_None) {
4303        return save_none(self, obj);
4304    }
4305    else if (obj == Py_False || obj == Py_True) {
4306        return save_bool(self, obj);
4307    }
4308    else if (type == &PyLong_Type) {
4309        return save_long(self, obj);
4310    }
4311    else if (type == &PyFloat_Type) {
4312        return save_float(self, obj);
4313    }
4314
4315    /* Check the memo to see if it has the object. If so, generate
4316       a GET (or BINGET) opcode, instead of pickling the object
4317       once again. */
4318    if (PyMemoTable_Get(self->memo, obj)) {
4319        return memo_get(self, obj);
4320    }
4321
4322    if (type == &PyBytes_Type) {
4323        return save_bytes(self, obj);
4324    }
4325    else if (type == &PyUnicode_Type) {
4326        return save_unicode(self, obj);
4327    }
4328
4329    /* We're only calling _Py_EnterRecursiveCall here so that atomic
4330       types above are pickled faster. */
4331    if (_Py_EnterRecursiveCall(" while pickling an object")) {
4332        return -1;
4333    }
4334
4335    if (type == &PyDict_Type) {
4336        status = save_dict(self, obj);
4337        goto done;
4338    }
4339    else if (type == &PySet_Type) {
4340        status = save_set(self, obj);
4341        goto done;
4342    }
4343    else if (type == &PyFrozenSet_Type) {
4344        status = save_frozenset(self, obj);
4345        goto done;
4346    }
4347    else if (type == &PyList_Type) {
4348        status = save_list(self, obj);
4349        goto done;
4350    }
4351    else if (type == &PyTuple_Type) {
4352        status = save_tuple(self, obj);
4353        goto done;
4354    }
4355    else if (type == &PyByteArray_Type) {
4356        status = save_bytearray(self, obj);
4357        goto done;
4358    }
4359    else if (type == &PyPickleBuffer_Type) {
4360        status = save_picklebuffer(self, obj);
4361        goto done;
4362    }
4363
4364    /* Now, check reducer_override.  If it returns NotImplemented,
4365     * fallback to save_type or save_global, and then perhaps to the
4366     * regular reduction mechanism.
4367     */
4368    if (self->reducer_override != NULL) {
4369        reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4370        if (reduce_value == NULL) {
4371            goto error;
4372        }
4373        if (reduce_value != Py_NotImplemented) {
4374            goto reduce;
4375        }
4376        Py_DECREF(reduce_value);
4377        reduce_value = NULL;
4378    }
4379
4380    if (type == &PyType_Type) {
4381        status = save_type(self, obj);
4382        goto done;
4383    }
4384    else if (type == &PyFunction_Type) {
4385        status = save_global(self, obj, NULL);
4386        goto done;
4387    }
4388
4389    /* XXX: This part needs some unit tests. */
4390
4391    /* Get a reduction callable, and call it.  This may come from
4392     * self.dispatch_table, copyreg.dispatch_table, the object's
4393     * __reduce_ex__ method, or the object's __reduce__ method.
4394     */
4395    if (self->dispatch_table == NULL) {
4396        PickleState *st = _Pickle_GetGlobalState();
4397        reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4398                                              (PyObject *)type);
4399        if (reduce_func == NULL) {
4400            if (PyErr_Occurred()) {
4401                goto error;
4402            }
4403        } else {
4404            /* PyDict_GetItemWithError() returns a borrowed reference.
4405               Increase the reference count to be consistent with
4406               PyObject_GetItem and _PyObject_GetAttrId used below. */
4407            Py_INCREF(reduce_func);
4408        }
4409    } else {
4410        reduce_func = PyObject_GetItem(self->dispatch_table,
4411                                       (PyObject *)type);
4412        if (reduce_func == NULL) {
4413            if (PyErr_ExceptionMatches(PyExc_KeyError))
4414                PyErr_Clear();
4415            else
4416                goto error;
4417        }
4418    }
4419    if (reduce_func != NULL) {
4420        Py_INCREF(obj);
4421        reduce_value = _Pickle_FastCall(reduce_func, obj);
4422    }
4423    else if (PyType_IsSubtype(type, &PyType_Type)) {
4424        status = save_global(self, obj, NULL);
4425        goto done;
4426    }
4427    else {
4428        /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4429           automatically defined as __reduce__. While this is convenient, this
4430           make it impossible to know which method was actually called. Of
4431           course, this is not a big deal. But still, it would be nice to let
4432           the user know which method was called when something go
4433           wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4434           don't actually have to check for a __reduce__ method. */
4435
4436        /* Check for a __reduce_ex__ method. */
4437        if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce_ex__), &reduce_func) < 0) {
4438            goto error;
4439        }
4440        if (reduce_func != NULL) {
4441            PyObject *proto;
4442            proto = PyLong_FromLong(self->proto);
4443            if (proto != NULL) {
4444                reduce_value = _Pickle_FastCall(reduce_func, proto);
4445            }
4446        }
4447        else {
4448            /* Check for a __reduce__ method. */
4449            if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce__), &reduce_func) < 0) {
4450                goto error;
4451            }
4452            if (reduce_func != NULL) {
4453                reduce_value = PyObject_CallNoArgs(reduce_func);
4454            }
4455            else {
4456                PickleState *st = _Pickle_GetGlobalState();
4457                PyErr_Format(st->PicklingError,
4458                             "can't pickle '%.200s' object: %R",
4459                             type->tp_name, obj);
4460                goto error;
4461            }
4462        }
4463    }
4464
4465    if (reduce_value == NULL)
4466        goto error;
4467
4468  reduce:
4469    if (PyUnicode_Check(reduce_value)) {
4470        status = save_global(self, obj, reduce_value);
4471        goto done;
4472    }
4473
4474    if (!PyTuple_Check(reduce_value)) {
4475        PickleState *st = _Pickle_GetGlobalState();
4476        PyErr_SetString(st->PicklingError,
4477                        "__reduce__ must return a string or tuple");
4478        goto error;
4479    }
4480
4481    status = save_reduce(self, reduce_value, obj);
4482
4483    if (0) {
4484  error:
4485        status = -1;
4486    }
4487  done:
4488
4489    _Py_LeaveRecursiveCall();
4490    Py_XDECREF(reduce_func);
4491    Py_XDECREF(reduce_value);
4492
4493    return status;
4494}
4495
4496static int
4497dump(PicklerObject *self, PyObject *obj)
4498{
4499    const char stop_op = STOP;
4500    int status = -1;
4501    PyObject *tmp;
4502
4503    if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(reducer_override),
4504                             &tmp) < 0) {
4505      goto error;
4506    }
4507    /* Cache the reducer_override method, if it exists. */
4508    if (tmp != NULL) {
4509        Py_XSETREF(self->reducer_override, tmp);
4510    }
4511    else {
4512        Py_CLEAR(self->reducer_override);
4513    }
4514
4515    if (self->proto >= 2) {
4516        char header[2];
4517
4518        header[0] = PROTO;
4519        assert(self->proto >= 0 && self->proto < 256);
4520        header[1] = (unsigned char)self->proto;
4521        if (_Pickler_Write(self, header, 2) < 0)
4522            goto error;
4523        if (self->proto >= 4)
4524            self->framing = 1;
4525    }
4526
4527    if (save(self, obj, 0) < 0 ||
4528        _Pickler_Write(self, &stop_op, 1) < 0 ||
4529        _Pickler_CommitFrame(self) < 0)
4530        goto error;
4531
4532    // Success
4533    status = 0;
4534
4535  error:
4536    self->framing = 0;
4537
4538    /* Break the reference cycle we generated at the beginning this function
4539     * call when setting the reducer_override attribute of the Pickler instance
4540     * to a bound method of the same instance. This is important as the Pickler
4541     * instance holds a reference to each object it has pickled (through its
4542     * memo): thus, these objects won't be garbage-collected as long as the
4543     * Pickler itself is not collected. */
4544    Py_CLEAR(self->reducer_override);
4545    return status;
4546}
4547
4548/*[clinic input]
4549
4550_pickle.Pickler.clear_memo
4551
4552Clears the pickler's "memo".
4553
4554The memo is the data structure that remembers which objects the
4555pickler has already seen, so that shared or recursive objects are
4556pickled by reference and not by value.  This method is useful when
4557re-using picklers.
4558[clinic start generated code]*/
4559
4560static PyObject *
4561_pickle_Pickler_clear_memo_impl(PicklerObject *self)
4562/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4563{
4564    if (self->memo)
4565        PyMemoTable_Clear(self->memo);
4566
4567    Py_RETURN_NONE;
4568}
4569
4570/*[clinic input]
4571
4572_pickle.Pickler.dump
4573
4574  obj: object
4575  /
4576
4577Write a pickled representation of the given object to the open file.
4578[clinic start generated code]*/
4579
4580static PyObject *
4581_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4582/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4583{
4584    /* Check whether the Pickler was initialized correctly (issue3664).
4585       Developers often forget to call __init__() in their subclasses, which
4586       would trigger a segfault without this check. */
4587    if (self->write == NULL) {
4588        PickleState *st = _Pickle_GetGlobalState();
4589        PyErr_Format(st->PicklingError,
4590                     "Pickler.__init__() was not called by %s.__init__()",
4591                     Py_TYPE(self)->tp_name);
4592        return NULL;
4593    }
4594
4595    if (_Pickler_ClearBuffer(self) < 0)
4596        return NULL;
4597
4598    if (dump(self, obj) < 0)
4599        return NULL;
4600
4601    if (_Pickler_FlushToFile(self) < 0)
4602        return NULL;
4603
4604    Py_RETURN_NONE;
4605}
4606
4607/*[clinic input]
4608
4609_pickle.Pickler.__sizeof__ -> Py_ssize_t
4610
4611Returns size in memory, in bytes.
4612[clinic start generated code]*/
4613
4614static Py_ssize_t
4615_pickle_Pickler___sizeof___impl(PicklerObject *self)
4616/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4617{
4618    Py_ssize_t res, s;
4619
4620    res = _PyObject_SIZE(Py_TYPE(self));
4621    if (self->memo != NULL) {
4622        res += sizeof(PyMemoTable);
4623        res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4624    }
4625    if (self->output_buffer != NULL) {
4626        s = _PySys_GetSizeOf(self->output_buffer);
4627        if (s == -1)
4628            return -1;
4629        res += s;
4630    }
4631    return res;
4632}
4633
4634static struct PyMethodDef Pickler_methods[] = {
4635    _PICKLE_PICKLER_DUMP_METHODDEF
4636    _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4637    _PICKLE_PICKLER___SIZEOF___METHODDEF
4638    {NULL, NULL}                /* sentinel */
4639};
4640
4641static void
4642Pickler_dealloc(PicklerObject *self)
4643{
4644    PyObject_GC_UnTrack(self);
4645
4646    Py_XDECREF(self->output_buffer);
4647    Py_XDECREF(self->write);
4648    Py_XDECREF(self->pers_func);
4649    Py_XDECREF(self->dispatch_table);
4650    Py_XDECREF(self->fast_memo);
4651    Py_XDECREF(self->reducer_override);
4652    Py_XDECREF(self->buffer_callback);
4653
4654    PyMemoTable_Del(self->memo);
4655
4656    Py_TYPE(self)->tp_free((PyObject *)self);
4657}
4658
4659static int
4660Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4661{
4662    Py_VISIT(self->write);
4663    Py_VISIT(self->pers_func);
4664    Py_VISIT(self->dispatch_table);
4665    Py_VISIT(self->fast_memo);
4666    Py_VISIT(self->reducer_override);
4667    Py_VISIT(self->buffer_callback);
4668    return 0;
4669}
4670
4671static int
4672Pickler_clear(PicklerObject *self)
4673{
4674    Py_CLEAR(self->output_buffer);
4675    Py_CLEAR(self->write);
4676    Py_CLEAR(self->pers_func);
4677    Py_CLEAR(self->dispatch_table);
4678    Py_CLEAR(self->fast_memo);
4679    Py_CLEAR(self->reducer_override);
4680    Py_CLEAR(self->buffer_callback);
4681
4682    if (self->memo != NULL) {
4683        PyMemoTable *memo = self->memo;
4684        self->memo = NULL;
4685        PyMemoTable_Del(memo);
4686    }
4687    return 0;
4688}
4689
4690
4691/*[clinic input]
4692
4693_pickle.Pickler.__init__
4694
4695  file: object
4696  protocol: object = None
4697  fix_imports: bool = True
4698  buffer_callback: object = None
4699
4700This takes a binary file for writing a pickle data stream.
4701
4702The optional *protocol* argument tells the pickler to use the given
4703protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4704protocol is 4. It was introduced in Python 3.4, and is incompatible
4705with previous versions.
4706
4707Specifying a negative protocol version selects the highest protocol
4708version supported.  The higher the protocol used, the more recent the
4709version of Python needed to read the pickle produced.
4710
4711The *file* argument must have a write() method that accepts a single
4712bytes argument. It can thus be a file object opened for binary
4713writing, an io.BytesIO instance, or any other custom object that meets
4714this interface.
4715
4716If *fix_imports* is True and protocol is less than 3, pickle will try
4717to map the new Python 3 names to the old module names used in Python
47182, so that the pickle data stream is readable with Python 2.
4719
4720If *buffer_callback* is None (the default), buffer views are
4721serialized into *file* as part of the pickle stream.
4722
4723If *buffer_callback* is not None, then it can be called any number
4724of times with a buffer view.  If the callback returns a false value
4725(such as None), the given buffer is out-of-band; otherwise the
4726buffer is serialized in-band, i.e. inside the pickle stream.
4727
4728It is an error if *buffer_callback* is not None and *protocol*
4729is None or smaller than 5.
4730
4731[clinic start generated code]*/
4732
4733static int
4734_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4735                              PyObject *protocol, int fix_imports,
4736                              PyObject *buffer_callback)
4737/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4738{
4739    /* In case of multiple __init__() calls, clear previous content. */
4740    if (self->write != NULL)
4741        (void)Pickler_clear(self);
4742
4743    if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4744        return -1;
4745
4746    if (_Pickler_SetOutputStream(self, file) < 0)
4747        return -1;
4748
4749    if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4750        return -1;
4751
4752    /* memo and output_buffer may have already been created in _Pickler_New */
4753    if (self->memo == NULL) {
4754        self->memo = PyMemoTable_New();
4755        if (self->memo == NULL)
4756            return -1;
4757    }
4758    self->output_len = 0;
4759    if (self->output_buffer == NULL) {
4760        self->max_output_len = WRITE_BUF_SIZE;
4761        self->output_buffer = PyBytes_FromStringAndSize(NULL,
4762                                                        self->max_output_len);
4763        if (self->output_buffer == NULL)
4764            return -1;
4765    }
4766
4767    self->fast = 0;
4768    self->fast_nesting = 0;
4769    self->fast_memo = NULL;
4770
4771    if (init_method_ref((PyObject *)self, &_Py_ID(persistent_id),
4772                        &self->pers_func, &self->pers_func_self) < 0)
4773    {
4774        return -1;
4775    }
4776    if (self->dispatch_table != NULL) {
4777        return 0;
4778    }
4779    if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(dispatch_table),
4780                             &self->dispatch_table) < 0) {
4781        return -1;
4782    }
4783
4784    return 0;
4785}
4786
4787
4788/* Define a proxy object for the Pickler's internal memo object. This is to
4789 * avoid breaking code like:
4790 *  pickler.memo.clear()
4791 * and
4792 *  pickler.memo = saved_memo
4793 * Is this a good idea? Not really, but we don't want to break code that uses
4794 * it. Note that we don't implement the entire mapping API here. This is
4795 * intentional, as these should be treated as black-box implementation details.
4796 */
4797
4798/*[clinic input]
4799_pickle.PicklerMemoProxy.clear
4800
4801Remove all items from memo.
4802[clinic start generated code]*/
4803
4804static PyObject *
4805_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4806/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4807{
4808    if (self->pickler->memo)
4809        PyMemoTable_Clear(self->pickler->memo);
4810    Py_RETURN_NONE;
4811}
4812
4813/*[clinic input]
4814_pickle.PicklerMemoProxy.copy
4815
4816Copy the memo to a new object.
4817[clinic start generated code]*/
4818
4819static PyObject *
4820_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4821/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4822{
4823    PyMemoTable *memo;
4824    PyObject *new_memo = PyDict_New();
4825    if (new_memo == NULL)
4826        return NULL;
4827
4828    memo = self->pickler->memo;
4829    for (size_t i = 0; i < memo->mt_allocated; ++i) {
4830        PyMemoEntry entry = memo->mt_table[i];
4831        if (entry.me_key != NULL) {
4832            int status;
4833            PyObject *key, *value;
4834
4835            key = PyLong_FromVoidPtr(entry.me_key);
4836            value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4837
4838            if (key == NULL || value == NULL) {
4839                Py_XDECREF(key);
4840                Py_XDECREF(value);
4841                goto error;
4842            }
4843            status = PyDict_SetItem(new_memo, key, value);
4844            Py_DECREF(key);
4845            Py_DECREF(value);
4846            if (status < 0)
4847                goto error;
4848        }
4849    }
4850    return new_memo;
4851
4852  error:
4853    Py_XDECREF(new_memo);
4854    return NULL;
4855}
4856
4857/*[clinic input]
4858_pickle.PicklerMemoProxy.__reduce__
4859
4860Implement pickle support.
4861[clinic start generated code]*/
4862
4863static PyObject *
4864_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4865/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4866{
4867    PyObject *reduce_value, *dict_args;
4868    PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4869    if (contents == NULL)
4870        return NULL;
4871
4872    reduce_value = PyTuple_New(2);
4873    if (reduce_value == NULL) {
4874        Py_DECREF(contents);
4875        return NULL;
4876    }
4877    dict_args = PyTuple_New(1);
4878    if (dict_args == NULL) {
4879        Py_DECREF(contents);
4880        Py_DECREF(reduce_value);
4881        return NULL;
4882    }
4883    PyTuple_SET_ITEM(dict_args, 0, contents);
4884    Py_INCREF((PyObject *)&PyDict_Type);
4885    PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4886    PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4887    return reduce_value;
4888}
4889
4890static PyMethodDef picklerproxy_methods[] = {
4891    _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4892    _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4893    _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4894    {NULL, NULL} /* sentinel */
4895};
4896
4897static void
4898PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4899{
4900    PyObject_GC_UnTrack(self);
4901    Py_XDECREF(self->pickler);
4902    PyObject_GC_Del((PyObject *)self);
4903}
4904
4905static int
4906PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4907                          visitproc visit, void *arg)
4908{
4909    Py_VISIT(self->pickler);
4910    return 0;
4911}
4912
4913static int
4914PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4915{
4916    Py_CLEAR(self->pickler);
4917    return 0;
4918}
4919
4920static PyTypeObject PicklerMemoProxyType = {
4921    PyVarObject_HEAD_INIT(NULL, 0)
4922    "_pickle.PicklerMemoProxy",                 /*tp_name*/
4923    sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4924    0,
4925    (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4926    0,                                          /* tp_vectorcall_offset */
4927    0,                                          /* tp_getattr */
4928    0,                                          /* tp_setattr */
4929    0,                                          /* tp_as_async */
4930    0,                                          /* tp_repr */
4931    0,                                          /* tp_as_number */
4932    0,                                          /* tp_as_sequence */
4933    0,                                          /* tp_as_mapping */
4934    PyObject_HashNotImplemented,                /* tp_hash */
4935    0,                                          /* tp_call */
4936    0,                                          /* tp_str */
4937    PyObject_GenericGetAttr,                    /* tp_getattro */
4938    PyObject_GenericSetAttr,                    /* tp_setattro */
4939    0,                                          /* tp_as_buffer */
4940    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4941    0,                                          /* tp_doc */
4942    (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4943    (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4944    0,                                          /* tp_richcompare */
4945    0,                                          /* tp_weaklistoffset */
4946    0,                                          /* tp_iter */
4947    0,                                          /* tp_iternext */
4948    picklerproxy_methods,                       /* tp_methods */
4949};
4950
4951static PyObject *
4952PicklerMemoProxy_New(PicklerObject *pickler)
4953{
4954    PicklerMemoProxyObject *self;
4955
4956    self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4957    if (self == NULL)
4958        return NULL;
4959    Py_INCREF(pickler);
4960    self->pickler = pickler;
4961    PyObject_GC_Track(self);
4962    return (PyObject *)self;
4963}
4964
4965/*****************************************************************************/
4966
4967static PyObject *
4968Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4969{
4970    return PicklerMemoProxy_New(self);
4971}
4972
4973static int
4974Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4975{
4976    PyMemoTable *new_memo = NULL;
4977
4978    if (obj == NULL) {
4979        PyErr_SetString(PyExc_TypeError,
4980                        "attribute deletion is not supported");
4981        return -1;
4982    }
4983
4984    if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4985        PicklerObject *pickler =
4986            ((PicklerMemoProxyObject *)obj)->pickler;
4987
4988        new_memo = PyMemoTable_Copy(pickler->memo);
4989        if (new_memo == NULL)
4990            return -1;
4991    }
4992    else if (PyDict_Check(obj)) {
4993        Py_ssize_t i = 0;
4994        PyObject *key, *value;
4995
4996        new_memo = PyMemoTable_New();
4997        if (new_memo == NULL)
4998            return -1;
4999
5000        while (PyDict_Next(obj, &i, &key, &value)) {
5001            Py_ssize_t memo_id;
5002            PyObject *memo_obj;
5003
5004            if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
5005                PyErr_SetString(PyExc_TypeError,
5006                                "'memo' values must be 2-item tuples");
5007                goto error;
5008            }
5009            memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5010            if (memo_id == -1 && PyErr_Occurred())
5011                goto error;
5012            memo_obj = PyTuple_GET_ITEM(value, 1);
5013            if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5014                goto error;
5015        }
5016    }
5017    else {
5018        PyErr_Format(PyExc_TypeError,
5019                     "'memo' attribute must be a PicklerMemoProxy object "
5020                     "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5021        return -1;
5022    }
5023
5024    PyMemoTable_Del(self->memo);
5025    self->memo = new_memo;
5026
5027    return 0;
5028
5029  error:
5030    if (new_memo)
5031        PyMemoTable_Del(new_memo);
5032    return -1;
5033}
5034
5035static PyObject *
5036Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5037{
5038    if (self->pers_func == NULL) {
5039        PyErr_SetString(PyExc_AttributeError, "persistent_id");
5040        return NULL;
5041    }
5042    return reconstruct_method(self->pers_func, self->pers_func_self);
5043}
5044
5045static int
5046Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5047{
5048    if (value == NULL) {
5049        PyErr_SetString(PyExc_TypeError,
5050                        "attribute deletion is not supported");
5051        return -1;
5052    }
5053    if (!PyCallable_Check(value)) {
5054        PyErr_SetString(PyExc_TypeError,
5055                        "persistent_id must be a callable taking one argument");
5056        return -1;
5057    }
5058
5059    self->pers_func_self = NULL;
5060    Py_INCREF(value);
5061    Py_XSETREF(self->pers_func, value);
5062
5063    return 0;
5064}
5065
5066static PyMemberDef Pickler_members[] = {
5067    {"bin", T_INT, offsetof(PicklerObject, bin)},
5068    {"fast", T_INT, offsetof(PicklerObject, fast)},
5069    {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5070    {NULL}
5071};
5072
5073static PyGetSetDef Pickler_getsets[] = {
5074    {"memo",          (getter)Pickler_get_memo,
5075                      (setter)Pickler_set_memo},
5076    {"persistent_id", (getter)Pickler_get_persid,
5077                      (setter)Pickler_set_persid},
5078    {NULL}
5079};
5080
5081static PyTypeObject Pickler_Type = {
5082    PyVarObject_HEAD_INIT(NULL, 0)
5083    "_pickle.Pickler"  ,                /*tp_name*/
5084    sizeof(PicklerObject),              /*tp_basicsize*/
5085    0,                                  /*tp_itemsize*/
5086    (destructor)Pickler_dealloc,        /*tp_dealloc*/
5087    0,                                  /*tp_vectorcall_offset*/
5088    0,                                  /*tp_getattr*/
5089    0,                                  /*tp_setattr*/
5090    0,                                  /*tp_as_async*/
5091    0,                                  /*tp_repr*/
5092    0,                                  /*tp_as_number*/
5093    0,                                  /*tp_as_sequence*/
5094    0,                                  /*tp_as_mapping*/
5095    0,                                  /*tp_hash*/
5096    0,                                  /*tp_call*/
5097    0,                                  /*tp_str*/
5098    0,                                  /*tp_getattro*/
5099    0,                                  /*tp_setattro*/
5100    0,                                  /*tp_as_buffer*/
5101    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5102    _pickle_Pickler___init____doc__,    /*tp_doc*/
5103    (traverseproc)Pickler_traverse,     /*tp_traverse*/
5104    (inquiry)Pickler_clear,             /*tp_clear*/
5105    0,                                  /*tp_richcompare*/
5106    0,                                  /*tp_weaklistoffset*/
5107    0,                                  /*tp_iter*/
5108    0,                                  /*tp_iternext*/
5109    Pickler_methods,                    /*tp_methods*/
5110    Pickler_members,                    /*tp_members*/
5111    Pickler_getsets,                    /*tp_getset*/
5112    0,                                  /*tp_base*/
5113    0,                                  /*tp_dict*/
5114    0,                                  /*tp_descr_get*/
5115    0,                                  /*tp_descr_set*/
5116    0,                                  /*tp_dictoffset*/
5117    _pickle_Pickler___init__,           /*tp_init*/
5118    PyType_GenericAlloc,                /*tp_alloc*/
5119    PyType_GenericNew,                  /*tp_new*/
5120    PyObject_GC_Del,                    /*tp_free*/
5121    0,                                  /*tp_is_gc*/
5122};
5123
5124/* Temporary helper for calling self.find_class().
5125
5126   XXX: It would be nice to able to avoid Python function call overhead, by
5127   using directly the C version of find_class(), when find_class() is not
5128   overridden by a subclass. Although, this could become rather hackish. A
5129   simpler optimization would be to call the C function when self is not a
5130   subclass instance. */
5131static PyObject *
5132find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5133{
5134    return PyObject_CallMethodObjArgs((PyObject *)self, &_Py_ID(find_class),
5135                                      module_name, global_name, NULL);
5136}
5137
5138static Py_ssize_t
5139marker(UnpicklerObject *self)
5140{
5141    Py_ssize_t mark;
5142
5143    if (self->num_marks < 1) {
5144        PickleState *st = _Pickle_GetGlobalState();
5145        PyErr_SetString(st->UnpicklingError, "could not find MARK");
5146        return -1;
5147    }
5148
5149    mark = self->marks[--self->num_marks];
5150    self->stack->mark_set = self->num_marks != 0;
5151    self->stack->fence = self->num_marks ?
5152            self->marks[self->num_marks - 1] : 0;
5153    return mark;
5154}
5155
5156static int
5157load_none(UnpicklerObject *self)
5158{
5159    PDATA_APPEND(self->stack, Py_None, -1);
5160    return 0;
5161}
5162
5163static int
5164load_int(UnpicklerObject *self)
5165{
5166    PyObject *value;
5167    char *endptr, *s;
5168    Py_ssize_t len;
5169    long x;
5170
5171    if ((len = _Unpickler_Readline(self, &s)) < 0)
5172        return -1;
5173    if (len < 2)
5174        return bad_readline();
5175
5176    errno = 0;
5177    /* XXX: Should the base argument of strtol() be explicitly set to 10?
5178       XXX(avassalotti): Should this uses PyOS_strtol()? */
5179    x = strtol(s, &endptr, 0);
5180
5181    if (errno || (*endptr != '\n' && *endptr != '\0')) {
5182        /* Hm, maybe we've got something long.  Let's try reading
5183         * it as a Python int object. */
5184        errno = 0;
5185        /* XXX: Same thing about the base here. */
5186        value = PyLong_FromString(s, NULL, 0);
5187        if (value == NULL) {
5188            PyErr_SetString(PyExc_ValueError,
5189                            "could not convert string to int");
5190            return -1;
5191        }
5192    }
5193    else {
5194        if (len == 3 && (x == 0 || x == 1)) {
5195            if ((value = PyBool_FromLong(x)) == NULL)
5196                return -1;
5197        }
5198        else {
5199            if ((value = PyLong_FromLong(x)) == NULL)
5200                return -1;
5201        }
5202    }
5203
5204    PDATA_PUSH(self->stack, value, -1);
5205    return 0;
5206}
5207
5208static int
5209load_bool(UnpicklerObject *self, PyObject *boolean)
5210{
5211    assert(boolean == Py_True || boolean == Py_False);
5212    PDATA_APPEND(self->stack, boolean, -1);
5213    return 0;
5214}
5215
5216/* s contains x bytes of an unsigned little-endian integer.  Return its value
5217 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5218 */
5219static Py_ssize_t
5220calc_binsize(char *bytes, int nbytes)
5221{
5222    unsigned char *s = (unsigned char *)bytes;
5223    int i;
5224    size_t x = 0;
5225
5226    if (nbytes > (int)sizeof(size_t)) {
5227        /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5228         * have 64-bit size that can't be represented on 32-bit platform.
5229         */
5230        for (i = (int)sizeof(size_t); i < nbytes; i++) {
5231            if (s[i])
5232                return -1;
5233        }
5234        nbytes = (int)sizeof(size_t);
5235    }
5236    for (i = 0; i < nbytes; i++) {
5237        x |= (size_t) s[i] << (8 * i);
5238    }
5239
5240    if (x > PY_SSIZE_T_MAX)
5241        return -1;
5242    else
5243        return (Py_ssize_t) x;
5244}
5245
5246/* s contains x bytes of a little-endian integer.  Return its value as a
5247 * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5248 * int, but when x is 4 it's a signed one.  This is a historical source
5249 * of x-platform bugs.
5250 */
5251static long
5252calc_binint(char *bytes, int nbytes)
5253{
5254    unsigned char *s = (unsigned char *)bytes;
5255    Py_ssize_t i;
5256    long x = 0;
5257
5258    for (i = 0; i < nbytes; i++) {
5259        x |= (long)s[i] << (8 * i);
5260    }
5261
5262    /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5263     * is signed, so on a box with longs bigger than 4 bytes we need
5264     * to extend a BININT's sign bit to the full width.
5265     */
5266    if (SIZEOF_LONG > 4 && nbytes == 4) {
5267        x |= -(x & (1L << 31));
5268    }
5269
5270    return x;
5271}
5272
5273static int
5274load_binintx(UnpicklerObject *self, char *s, int size)
5275{
5276    PyObject *value;
5277    long x;
5278
5279    x = calc_binint(s, size);
5280
5281    if ((value = PyLong_FromLong(x)) == NULL)
5282        return -1;
5283
5284    PDATA_PUSH(self->stack, value, -1);
5285    return 0;
5286}
5287
5288static int
5289load_binint(UnpicklerObject *self)
5290{
5291    char *s;
5292
5293    if (_Unpickler_Read(self, &s, 4) < 0)
5294        return -1;
5295
5296    return load_binintx(self, s, 4);
5297}
5298
5299static int
5300load_binint1(UnpicklerObject *self)
5301{
5302    char *s;
5303
5304    if (_Unpickler_Read(self, &s, 1) < 0)
5305        return -1;
5306
5307    return load_binintx(self, s, 1);
5308}
5309
5310static int
5311load_binint2(UnpicklerObject *self)
5312{
5313    char *s;
5314
5315    if (_Unpickler_Read(self, &s, 2) < 0)
5316        return -1;
5317
5318    return load_binintx(self, s, 2);
5319}
5320
5321static int
5322load_long(UnpicklerObject *self)
5323{
5324    PyObject *value;
5325    char *s = NULL;
5326    Py_ssize_t len;
5327
5328    if ((len = _Unpickler_Readline(self, &s)) < 0)
5329        return -1;
5330    if (len < 2)
5331        return bad_readline();
5332
5333    /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5334       the 'L' before calling PyLong_FromString.  In order to maintain
5335       compatibility with Python 3.0.0, we don't actually *require*
5336       the 'L' to be present. */
5337    if (s[len-2] == 'L')
5338        s[len-2] = '\0';
5339    /* XXX: Should the base argument explicitly set to 10? */
5340    value = PyLong_FromString(s, NULL, 0);
5341    if (value == NULL)
5342        return -1;
5343
5344    PDATA_PUSH(self->stack, value, -1);
5345    return 0;
5346}
5347
5348/* 'size' bytes contain the # of bytes of little-endian 256's-complement
5349 * data following.
5350 */
5351static int
5352load_counted_long(UnpicklerObject *self, int size)
5353{
5354    PyObject *value;
5355    char *nbytes;
5356    char *pdata;
5357
5358    assert(size == 1 || size == 4);
5359    if (_Unpickler_Read(self, &nbytes, size) < 0)
5360        return -1;
5361
5362    size = calc_binint(nbytes, size);
5363    if (size < 0) {
5364        PickleState *st = _Pickle_GetGlobalState();
5365        /* Corrupt or hostile pickle -- we never write one like this */
5366        PyErr_SetString(st->UnpicklingError,
5367                        "LONG pickle has negative byte count");
5368        return -1;
5369    }
5370
5371    if (size == 0)
5372        value = PyLong_FromLong(0L);
5373    else {
5374        /* Read the raw little-endian bytes and convert. */
5375        if (_Unpickler_Read(self, &pdata, size) < 0)
5376            return -1;
5377        value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5378                                      1 /* little endian */ , 1 /* signed */ );
5379    }
5380    if (value == NULL)
5381        return -1;
5382    PDATA_PUSH(self->stack, value, -1);
5383    return 0;
5384}
5385
5386static int
5387load_float(UnpicklerObject *self)
5388{
5389    PyObject *value;
5390    char *endptr, *s;
5391    Py_ssize_t len;
5392    double d;
5393
5394    if ((len = _Unpickler_Readline(self, &s)) < 0)
5395        return -1;
5396    if (len < 2)
5397        return bad_readline();
5398
5399    errno = 0;
5400    d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5401    if (d == -1.0 && PyErr_Occurred())
5402        return -1;
5403    if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5404        PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5405        return -1;
5406    }
5407    value = PyFloat_FromDouble(d);
5408    if (value == NULL)
5409        return -1;
5410
5411    PDATA_PUSH(self->stack, value, -1);
5412    return 0;
5413}
5414
5415static int
5416load_binfloat(UnpicklerObject *self)
5417{
5418    PyObject *value;
5419    double x;
5420    char *s;
5421
5422    if (_Unpickler_Read(self, &s, 8) < 0)
5423        return -1;
5424
5425    x = PyFloat_Unpack8(s, 0);
5426    if (x == -1.0 && PyErr_Occurred())
5427        return -1;
5428
5429    if ((value = PyFloat_FromDouble(x)) == NULL)
5430        return -1;
5431
5432    PDATA_PUSH(self->stack, value, -1);
5433    return 0;
5434}
5435
5436static int
5437load_string(UnpicklerObject *self)
5438{
5439    PyObject *bytes;
5440    PyObject *obj;
5441    Py_ssize_t len;
5442    char *s, *p;
5443
5444    if ((len = _Unpickler_Readline(self, &s)) < 0)
5445        return -1;
5446    /* Strip the newline */
5447    len--;
5448    /* Strip outermost quotes */
5449    if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5450        p = s + 1;
5451        len -= 2;
5452    }
5453    else {
5454        PickleState *st = _Pickle_GetGlobalState();
5455        PyErr_SetString(st->UnpicklingError,
5456                        "the STRING opcode argument must be quoted");
5457        return -1;
5458    }
5459    assert(len >= 0);
5460
5461    /* Use the PyBytes API to decode the string, since that is what is used
5462       to encode, and then coerce the result to Unicode. */
5463    bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5464    if (bytes == NULL)
5465        return -1;
5466
5467    /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5468       Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5469    if (strcmp(self->encoding, "bytes") == 0) {
5470        obj = bytes;
5471    }
5472    else {
5473        obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5474        Py_DECREF(bytes);
5475        if (obj == NULL) {
5476            return -1;
5477        }
5478    }
5479
5480    PDATA_PUSH(self->stack, obj, -1);
5481    return 0;
5482}
5483
5484static int
5485load_counted_binstring(UnpicklerObject *self, int nbytes)
5486{
5487    PyObject *obj;
5488    Py_ssize_t size;
5489    char *s;
5490
5491    if (_Unpickler_Read(self, &s, nbytes) < 0)
5492        return -1;
5493
5494    size = calc_binsize(s, nbytes);
5495    if (size < 0) {
5496        PickleState *st = _Pickle_GetGlobalState();
5497        PyErr_Format(st->UnpicklingError,
5498                     "BINSTRING exceeds system's maximum size of %zd bytes",
5499                     PY_SSIZE_T_MAX);
5500        return -1;
5501    }
5502
5503    if (_Unpickler_Read(self, &s, size) < 0)
5504        return -1;
5505
5506    /* Convert Python 2.x strings to bytes if the *encoding* given to the
5507       Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5508    if (strcmp(self->encoding, "bytes") == 0) {
5509        obj = PyBytes_FromStringAndSize(s, size);
5510    }
5511    else {
5512        obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5513    }
5514    if (obj == NULL) {
5515        return -1;
5516    }
5517
5518    PDATA_PUSH(self->stack, obj, -1);
5519    return 0;
5520}
5521
5522static int
5523load_counted_binbytes(UnpicklerObject *self, int nbytes)
5524{
5525    PyObject *bytes;
5526    Py_ssize_t size;
5527    char *s;
5528
5529    if (_Unpickler_Read(self, &s, nbytes) < 0)
5530        return -1;
5531
5532    size = calc_binsize(s, nbytes);
5533    if (size < 0) {
5534        PyErr_Format(PyExc_OverflowError,
5535                     "BINBYTES exceeds system's maximum size of %zd bytes",
5536                     PY_SSIZE_T_MAX);
5537        return -1;
5538    }
5539
5540    bytes = PyBytes_FromStringAndSize(NULL, size);
5541    if (bytes == NULL)
5542        return -1;
5543    if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5544        Py_DECREF(bytes);
5545        return -1;
5546    }
5547
5548    PDATA_PUSH(self->stack, bytes, -1);
5549    return 0;
5550}
5551
5552static int
5553load_counted_bytearray(UnpicklerObject *self)
5554{
5555    PyObject *bytearray;
5556    Py_ssize_t size;
5557    char *s;
5558
5559    if (_Unpickler_Read(self, &s, 8) < 0) {
5560        return -1;
5561    }
5562
5563    size = calc_binsize(s, 8);
5564    if (size < 0) {
5565        PyErr_Format(PyExc_OverflowError,
5566                     "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5567                     PY_SSIZE_T_MAX);
5568        return -1;
5569    }
5570
5571    bytearray = PyByteArray_FromStringAndSize(NULL, size);
5572    if (bytearray == NULL) {
5573        return -1;
5574    }
5575    if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5576        Py_DECREF(bytearray);
5577        return -1;
5578    }
5579
5580    PDATA_PUSH(self->stack, bytearray, -1);
5581    return 0;
5582}
5583
5584static int
5585load_next_buffer(UnpicklerObject *self)
5586{
5587    if (self->buffers == NULL) {
5588        PickleState *st = _Pickle_GetGlobalState();
5589        PyErr_SetString(st->UnpicklingError,
5590                        "pickle stream refers to out-of-band data "
5591                        "but no *buffers* argument was given");
5592        return -1;
5593    }
5594    PyObject *buf = PyIter_Next(self->buffers);
5595    if (buf == NULL) {
5596        if (!PyErr_Occurred()) {
5597            PickleState *st = _Pickle_GetGlobalState();
5598            PyErr_SetString(st->UnpicklingError,
5599                            "not enough out-of-band buffers");
5600        }
5601        return -1;
5602    }
5603
5604    PDATA_PUSH(self->stack, buf, -1);
5605    return 0;
5606}
5607
5608static int
5609load_readonly_buffer(UnpicklerObject *self)
5610{
5611    Py_ssize_t len = Py_SIZE(self->stack);
5612    if (len <= self->stack->fence) {
5613        return Pdata_stack_underflow(self->stack);
5614    }
5615
5616    PyObject *obj = self->stack->data[len - 1];
5617    PyObject *view = PyMemoryView_FromObject(obj);
5618    if (view == NULL) {
5619        return -1;
5620    }
5621    if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5622        /* Original object is writable */
5623        PyMemoryView_GET_BUFFER(view)->readonly = 1;
5624        self->stack->data[len - 1] = view;
5625        Py_DECREF(obj);
5626    }
5627    else {
5628        /* Original object is read-only, no need to replace it */
5629        Py_DECREF(view);
5630    }
5631    return 0;
5632}
5633
5634static int
5635load_unicode(UnpicklerObject *self)
5636{
5637    PyObject *str;
5638    Py_ssize_t len;
5639    char *s = NULL;
5640
5641    if ((len = _Unpickler_Readline(self, &s)) < 0)
5642        return -1;
5643    if (len < 1)
5644        return bad_readline();
5645
5646    str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5647    if (str == NULL)
5648        return -1;
5649
5650    PDATA_PUSH(self->stack, str, -1);
5651    return 0;
5652}
5653
5654static int
5655load_counted_binunicode(UnpicklerObject *self, int nbytes)
5656{
5657    PyObject *str;
5658    Py_ssize_t size;
5659    char *s;
5660
5661    if (_Unpickler_Read(self, &s, nbytes) < 0)
5662        return -1;
5663
5664    size = calc_binsize(s, nbytes);
5665    if (size < 0) {
5666        PyErr_Format(PyExc_OverflowError,
5667                     "BINUNICODE exceeds system's maximum size of %zd bytes",
5668                     PY_SSIZE_T_MAX);
5669        return -1;
5670    }
5671
5672    if (_Unpickler_Read(self, &s, size) < 0)
5673        return -1;
5674
5675    str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5676    if (str == NULL)
5677        return -1;
5678
5679    PDATA_PUSH(self->stack, str, -1);
5680    return 0;
5681}
5682
5683static int
5684load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5685{
5686    PyObject *tuple;
5687
5688    if (Py_SIZE(self->stack) < len)
5689        return Pdata_stack_underflow(self->stack);
5690
5691    tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5692    if (tuple == NULL)
5693        return -1;
5694    PDATA_PUSH(self->stack, tuple, -1);
5695    return 0;
5696}
5697
5698static int
5699load_tuple(UnpicklerObject *self)
5700{
5701    Py_ssize_t i;
5702
5703    if ((i = marker(self)) < 0)
5704        return -1;
5705
5706    return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5707}
5708
5709static int
5710load_empty_list(UnpicklerObject *self)
5711{
5712    PyObject *list;
5713
5714    if ((list = PyList_New(0)) == NULL)
5715        return -1;
5716    PDATA_PUSH(self->stack, list, -1);
5717    return 0;
5718}
5719
5720static int
5721load_empty_dict(UnpicklerObject *self)
5722{
5723    PyObject *dict;
5724
5725    if ((dict = PyDict_New()) == NULL)
5726        return -1;
5727    PDATA_PUSH(self->stack, dict, -1);
5728    return 0;
5729}
5730
5731static int
5732load_empty_set(UnpicklerObject *self)
5733{
5734    PyObject *set;
5735
5736    if ((set = PySet_New(NULL)) == NULL)
5737        return -1;
5738    PDATA_PUSH(self->stack, set, -1);
5739    return 0;
5740}
5741
5742static int
5743load_list(UnpicklerObject *self)
5744{
5745    PyObject *list;
5746    Py_ssize_t i;
5747
5748    if ((i = marker(self)) < 0)
5749        return -1;
5750
5751    list = Pdata_poplist(self->stack, i);
5752    if (list == NULL)
5753        return -1;
5754    PDATA_PUSH(self->stack, list, -1);
5755    return 0;
5756}
5757
5758static int
5759load_dict(UnpicklerObject *self)
5760{
5761    PyObject *dict, *key, *value;
5762    Py_ssize_t i, j, k;
5763
5764    if ((i = marker(self)) < 0)
5765        return -1;
5766    j = Py_SIZE(self->stack);
5767
5768    if ((dict = PyDict_New()) == NULL)
5769        return -1;
5770
5771    if ((j - i) % 2 != 0) {
5772        PickleState *st = _Pickle_GetGlobalState();
5773        PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5774        Py_DECREF(dict);
5775        return -1;
5776    }
5777
5778    for (k = i + 1; k < j; k += 2) {
5779        key = self->stack->data[k - 1];
5780        value = self->stack->data[k];
5781        if (PyDict_SetItem(dict, key, value) < 0) {
5782            Py_DECREF(dict);
5783            return -1;
5784        }
5785    }
5786    Pdata_clear(self->stack, i);
5787    PDATA_PUSH(self->stack, dict, -1);
5788    return 0;
5789}
5790
5791static int
5792load_frozenset(UnpicklerObject *self)
5793{
5794    PyObject *items;
5795    PyObject *frozenset;
5796    Py_ssize_t i;
5797
5798    if ((i = marker(self)) < 0)
5799        return -1;
5800
5801    items = Pdata_poptuple(self->stack, i);
5802    if (items == NULL)
5803        return -1;
5804
5805    frozenset = PyFrozenSet_New(items);
5806    Py_DECREF(items);
5807    if (frozenset == NULL)
5808        return -1;
5809
5810    PDATA_PUSH(self->stack, frozenset, -1);
5811    return 0;
5812}
5813
5814static PyObject *
5815instantiate(PyObject *cls, PyObject *args)
5816{
5817    /* Caller must assure args are a tuple.  Normally, args come from
5818       Pdata_poptuple which packs objects from the top of the stack
5819       into a newly created tuple. */
5820    assert(PyTuple_Check(args));
5821    if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5822        PyObject *func;
5823        if (_PyObject_LookupAttr(cls, &_Py_ID(__getinitargs__), &func) < 0) {
5824            return NULL;
5825        }
5826        if (func == NULL) {
5827            return PyObject_CallMethodOneArg(cls, &_Py_ID(__new__), cls);
5828        }
5829        Py_DECREF(func);
5830    }
5831    return PyObject_CallObject(cls, args);
5832}
5833
5834static int
5835load_obj(UnpicklerObject *self)
5836{
5837    PyObject *cls, *args, *obj = NULL;
5838    Py_ssize_t i;
5839
5840    if ((i = marker(self)) < 0)
5841        return -1;
5842
5843    if (Py_SIZE(self->stack) - i < 1)
5844        return Pdata_stack_underflow(self->stack);
5845
5846    args = Pdata_poptuple(self->stack, i + 1);
5847    if (args == NULL)
5848        return -1;
5849
5850    PDATA_POP(self->stack, cls);
5851    if (cls) {
5852        obj = instantiate(cls, args);
5853        Py_DECREF(cls);
5854    }
5855    Py_DECREF(args);
5856    if (obj == NULL)
5857        return -1;
5858
5859    PDATA_PUSH(self->stack, obj, -1);
5860    return 0;
5861}
5862
5863static int
5864load_inst(UnpicklerObject *self)
5865{
5866    PyObject *cls = NULL;
5867    PyObject *args = NULL;
5868    PyObject *obj = NULL;
5869    PyObject *module_name;
5870    PyObject *class_name;
5871    Py_ssize_t len;
5872    Py_ssize_t i;
5873    char *s;
5874
5875    if ((i = marker(self)) < 0)
5876        return -1;
5877    if ((len = _Unpickler_Readline(self, &s)) < 0)
5878        return -1;
5879    if (len < 2)
5880        return bad_readline();
5881
5882    /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5883       identifiers are permitted in Python 3.0, since the INST opcode is only
5884       supported by older protocols on Python 2.x. */
5885    module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5886    if (module_name == NULL)
5887        return -1;
5888
5889    if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5890        if (len < 2) {
5891            Py_DECREF(module_name);
5892            return bad_readline();
5893        }
5894        class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5895        if (class_name != NULL) {
5896            cls = find_class(self, module_name, class_name);
5897            Py_DECREF(class_name);
5898        }
5899    }
5900    Py_DECREF(module_name);
5901
5902    if (cls == NULL)
5903        return -1;
5904
5905    if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5906        obj = instantiate(cls, args);
5907        Py_DECREF(args);
5908    }
5909    Py_DECREF(cls);
5910
5911    if (obj == NULL)
5912        return -1;
5913
5914    PDATA_PUSH(self->stack, obj, -1);
5915    return 0;
5916}
5917
5918static void
5919newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
5920{
5921    PickleState *st = _Pickle_GetGlobalState();
5922    PyErr_Format(st->UnpicklingError, msg,
5923                 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5924                 Py_TYPE(arg)->tp_name);
5925}
5926
5927static int
5928load_newobj(UnpicklerObject *self, int use_kwargs)
5929{
5930    PyObject *cls, *args, *kwargs = NULL;
5931    PyObject *obj;
5932
5933    /* Stack is ... cls args [kwargs], and we want to call
5934     * cls.__new__(cls, *args, **kwargs).
5935     */
5936    if (use_kwargs) {
5937        PDATA_POP(self->stack, kwargs);
5938        if (kwargs == NULL) {
5939            return -1;
5940        }
5941    }
5942    PDATA_POP(self->stack, args);
5943    if (args == NULL) {
5944        Py_XDECREF(kwargs);
5945        return -1;
5946    }
5947    PDATA_POP(self->stack, cls);
5948    if (cls == NULL) {
5949        Py_XDECREF(kwargs);
5950        Py_DECREF(args);
5951        return -1;
5952    }
5953
5954    if (!PyType_Check(cls)) {
5955        newobj_unpickling_error("%s class argument must be a type, not %.200s",
5956                                use_kwargs, cls);
5957        goto error;
5958    }
5959    if (((PyTypeObject *)cls)->tp_new == NULL) {
5960        newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5961                                use_kwargs, cls);
5962        goto error;
5963    }
5964    if (!PyTuple_Check(args)) {
5965        newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5966                                use_kwargs, args);
5967        goto error;
5968    }
5969    if (use_kwargs && !PyDict_Check(kwargs)) {
5970        newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5971                                use_kwargs, kwargs);
5972        goto error;
5973    }
5974
5975    obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5976    if (obj == NULL) {
5977        goto error;
5978    }
5979    Py_XDECREF(kwargs);
5980    Py_DECREF(args);
5981    Py_DECREF(cls);
5982    PDATA_PUSH(self->stack, obj, -1);
5983    return 0;
5984
5985error:
5986    Py_XDECREF(kwargs);
5987    Py_DECREF(args);
5988    Py_DECREF(cls);
5989    return -1;
5990}
5991
5992static int
5993load_global(UnpicklerObject *self)
5994{
5995    PyObject *global = NULL;
5996    PyObject *module_name;
5997    PyObject *global_name;
5998    Py_ssize_t len;
5999    char *s;
6000
6001    if ((len = _Unpickler_Readline(self, &s)) < 0)
6002        return -1;
6003    if (len < 2)
6004        return bad_readline();
6005    module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6006    if (!module_name)
6007        return -1;
6008
6009    if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6010        if (len < 2) {
6011            Py_DECREF(module_name);
6012            return bad_readline();
6013        }
6014        global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6015        if (global_name) {
6016            global = find_class(self, module_name, global_name);
6017            Py_DECREF(global_name);
6018        }
6019    }
6020    Py_DECREF(module_name);
6021
6022    if (global == NULL)
6023        return -1;
6024    PDATA_PUSH(self->stack, global, -1);
6025    return 0;
6026}
6027
6028static int
6029load_stack_global(UnpicklerObject *self)
6030{
6031    PyObject *global;
6032    PyObject *module_name;
6033    PyObject *global_name;
6034
6035    PDATA_POP(self->stack, global_name);
6036    PDATA_POP(self->stack, module_name);
6037    if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6038        global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6039        PickleState *st = _Pickle_GetGlobalState();
6040        PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6041        Py_XDECREF(global_name);
6042        Py_XDECREF(module_name);
6043        return -1;
6044    }
6045    global = find_class(self, module_name, global_name);
6046    Py_DECREF(global_name);
6047    Py_DECREF(module_name);
6048    if (global == NULL)
6049        return -1;
6050    PDATA_PUSH(self->stack, global, -1);
6051    return 0;
6052}
6053
6054static int
6055load_persid(UnpicklerObject *self)
6056{
6057    PyObject *pid, *obj;
6058    Py_ssize_t len;
6059    char *s;
6060
6061    if (self->pers_func) {
6062        if ((len = _Unpickler_Readline(self, &s)) < 0)
6063            return -1;
6064        if (len < 1)
6065            return bad_readline();
6066
6067        pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6068        if (pid == NULL) {
6069            if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6070                PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6071                                "persistent IDs in protocol 0 must be "
6072                                "ASCII strings");
6073            }
6074            return -1;
6075        }
6076
6077        obj = call_method(self->pers_func, self->pers_func_self, pid);
6078        Py_DECREF(pid);
6079        if (obj == NULL)
6080            return -1;
6081
6082        PDATA_PUSH(self->stack, obj, -1);
6083        return 0;
6084    }
6085    else {
6086        PickleState *st = _Pickle_GetGlobalState();
6087        PyErr_SetString(st->UnpicklingError,
6088                        "A load persistent id instruction was encountered,\n"
6089                        "but no persistent_load function was specified.");
6090        return -1;
6091    }
6092}
6093
6094static int
6095load_binpersid(UnpicklerObject *self)
6096{
6097    PyObject *pid, *obj;
6098
6099    if (self->pers_func) {
6100        PDATA_POP(self->stack, pid);
6101        if (pid == NULL)
6102            return -1;
6103
6104        obj = call_method(self->pers_func, self->pers_func_self, pid);
6105        Py_DECREF(pid);
6106        if (obj == NULL)
6107            return -1;
6108
6109        PDATA_PUSH(self->stack, obj, -1);
6110        return 0;
6111    }
6112    else {
6113        PickleState *st = _Pickle_GetGlobalState();
6114        PyErr_SetString(st->UnpicklingError,
6115                        "A load persistent id instruction was encountered,\n"
6116                        "but no persistent_load function was specified.");
6117        return -1;
6118    }
6119}
6120
6121static int
6122load_pop(UnpicklerObject *self)
6123{
6124    Py_ssize_t len = Py_SIZE(self->stack);
6125
6126    /* Note that we split the (pickle.py) stack into two stacks,
6127     * an object stack and a mark stack. We have to be clever and
6128     * pop the right one. We do this by looking at the top of the
6129     * mark stack first, and only signalling a stack underflow if
6130     * the object stack is empty and the mark stack doesn't match
6131     * our expectations.
6132     */
6133    if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6134        self->num_marks--;
6135        self->stack->mark_set = self->num_marks != 0;
6136        self->stack->fence = self->num_marks ?
6137                self->marks[self->num_marks - 1] : 0;
6138    } else if (len <= self->stack->fence)
6139        return Pdata_stack_underflow(self->stack);
6140    else {
6141        len--;
6142        Py_DECREF(self->stack->data[len]);
6143        Py_SET_SIZE(self->stack, len);
6144    }
6145    return 0;
6146}
6147
6148static int
6149load_pop_mark(UnpicklerObject *self)
6150{
6151    Py_ssize_t i;
6152
6153    if ((i = marker(self)) < 0)
6154        return -1;
6155
6156    Pdata_clear(self->stack, i);
6157
6158    return 0;
6159}
6160
6161static int
6162load_dup(UnpicklerObject *self)
6163{
6164    PyObject *last;
6165    Py_ssize_t len = Py_SIZE(self->stack);
6166
6167    if (len <= self->stack->fence)
6168        return Pdata_stack_underflow(self->stack);
6169    last = self->stack->data[len - 1];
6170    PDATA_APPEND(self->stack, last, -1);
6171    return 0;
6172}
6173
6174static int
6175load_get(UnpicklerObject *self)
6176{
6177    PyObject *key, *value;
6178    Py_ssize_t idx;
6179    Py_ssize_t len;
6180    char *s;
6181
6182    if ((len = _Unpickler_Readline(self, &s)) < 0)
6183        return -1;
6184    if (len < 2)
6185        return bad_readline();
6186
6187    key = PyLong_FromString(s, NULL, 10);
6188    if (key == NULL)
6189        return -1;
6190    idx = PyLong_AsSsize_t(key);
6191    if (idx == -1 && PyErr_Occurred()) {
6192        Py_DECREF(key);
6193        return -1;
6194    }
6195
6196    value = _Unpickler_MemoGet(self, idx);
6197    if (value == NULL) {
6198        if (!PyErr_Occurred()) {
6199           PickleState *st = _Pickle_GetGlobalState();
6200           PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6201        }
6202        Py_DECREF(key);
6203        return -1;
6204    }
6205    Py_DECREF(key);
6206
6207    PDATA_APPEND(self->stack, value, -1);
6208    return 0;
6209}
6210
6211static int
6212load_binget(UnpicklerObject *self)
6213{
6214    PyObject *value;
6215    Py_ssize_t idx;
6216    char *s;
6217
6218    if (_Unpickler_Read(self, &s, 1) < 0)
6219        return -1;
6220
6221    idx = Py_CHARMASK(s[0]);
6222
6223    value = _Unpickler_MemoGet(self, idx);
6224    if (value == NULL) {
6225        PyObject *key = PyLong_FromSsize_t(idx);
6226        if (key != NULL) {
6227            PickleState *st = _Pickle_GetGlobalState();
6228            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6229            Py_DECREF(key);
6230        }
6231        return -1;
6232    }
6233
6234    PDATA_APPEND(self->stack, value, -1);
6235    return 0;
6236}
6237
6238static int
6239load_long_binget(UnpicklerObject *self)
6240{
6241    PyObject *value;
6242    Py_ssize_t idx;
6243    char *s;
6244
6245    if (_Unpickler_Read(self, &s, 4) < 0)
6246        return -1;
6247
6248    idx = calc_binsize(s, 4);
6249
6250    value = _Unpickler_MemoGet(self, idx);
6251    if (value == NULL) {
6252        PyObject *key = PyLong_FromSsize_t(idx);
6253        if (key != NULL) {
6254            PickleState *st = _Pickle_GetGlobalState();
6255            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6256            Py_DECREF(key);
6257        }
6258        return -1;
6259    }
6260
6261    PDATA_APPEND(self->stack, value, -1);
6262    return 0;
6263}
6264
6265/* Push an object from the extension registry (EXT[124]).  nbytes is
6266 * the number of bytes following the opcode, holding the index (code) value.
6267 */
6268static int
6269load_extension(UnpicklerObject *self, int nbytes)
6270{
6271    char *codebytes;            /* the nbytes bytes after the opcode */
6272    long code;                  /* calc_binint returns long */
6273    PyObject *py_code;          /* code as a Python int */
6274    PyObject *obj;              /* the object to push */
6275    PyObject *pair;             /* (module_name, class_name) */
6276    PyObject *module_name, *class_name;
6277    PickleState *st = _Pickle_GetGlobalState();
6278
6279    assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6280    if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6281        return -1;
6282    code = calc_binint(codebytes, nbytes);
6283    if (code <= 0) {            /* note that 0 is forbidden */
6284        /* Corrupt or hostile pickle. */
6285        PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6286        return -1;
6287    }
6288
6289    /* Look for the code in the cache. */
6290    py_code = PyLong_FromLong(code);
6291    if (py_code == NULL)
6292        return -1;
6293    obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6294    if (obj != NULL) {
6295        /* Bingo. */
6296        Py_DECREF(py_code);
6297        PDATA_APPEND(self->stack, obj, -1);
6298        return 0;
6299    }
6300    if (PyErr_Occurred()) {
6301        Py_DECREF(py_code);
6302        return -1;
6303    }
6304
6305    /* Look up the (module_name, class_name) pair. */
6306    pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6307    if (pair == NULL) {
6308        Py_DECREF(py_code);
6309        if (!PyErr_Occurred()) {
6310            PyErr_Format(PyExc_ValueError, "unregistered extension "
6311                         "code %ld", code);
6312        }
6313        return -1;
6314    }
6315    /* Since the extension registry is manipulable via Python code,
6316     * confirm that pair is really a 2-tuple of strings.
6317     */
6318    if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6319        goto error;
6320    }
6321
6322    module_name = PyTuple_GET_ITEM(pair, 0);
6323    if (!PyUnicode_Check(module_name)) {
6324        goto error;
6325    }
6326
6327    class_name = PyTuple_GET_ITEM(pair, 1);
6328    if (!PyUnicode_Check(class_name)) {
6329        goto error;
6330    }
6331
6332    /* Load the object. */
6333    obj = find_class(self, module_name, class_name);
6334    if (obj == NULL) {
6335        Py_DECREF(py_code);
6336        return -1;
6337    }
6338    /* Cache code -> obj. */
6339    code = PyDict_SetItem(st->extension_cache, py_code, obj);
6340    Py_DECREF(py_code);
6341    if (code < 0) {
6342        Py_DECREF(obj);
6343        return -1;
6344    }
6345    PDATA_PUSH(self->stack, obj, -1);
6346    return 0;
6347
6348error:
6349    Py_DECREF(py_code);
6350    PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6351                 "isn't a 2-tuple of strings", code);
6352    return -1;
6353}
6354
6355static int
6356load_put(UnpicklerObject *self)
6357{
6358    PyObject *key, *value;
6359    Py_ssize_t idx;
6360    Py_ssize_t len;
6361    char *s = NULL;
6362
6363    if ((len = _Unpickler_Readline(self, &s)) < 0)
6364        return -1;
6365    if (len < 2)
6366        return bad_readline();
6367    if (Py_SIZE(self->stack) <= self->stack->fence)
6368        return Pdata_stack_underflow(self->stack);
6369    value = self->stack->data[Py_SIZE(self->stack) - 1];
6370
6371    key = PyLong_FromString(s, NULL, 10);
6372    if (key == NULL)
6373        return -1;
6374    idx = PyLong_AsSsize_t(key);
6375    Py_DECREF(key);
6376    if (idx < 0) {
6377        if (!PyErr_Occurred())
6378            PyErr_SetString(PyExc_ValueError,
6379                            "negative PUT argument");
6380        return -1;
6381    }
6382
6383    return _Unpickler_MemoPut(self, idx, value);
6384}
6385
6386static int
6387load_binput(UnpicklerObject *self)
6388{
6389    PyObject *value;
6390    Py_ssize_t idx;
6391    char *s;
6392
6393    if (_Unpickler_Read(self, &s, 1) < 0)
6394        return -1;
6395
6396    if (Py_SIZE(self->stack) <= self->stack->fence)
6397        return Pdata_stack_underflow(self->stack);
6398    value = self->stack->data[Py_SIZE(self->stack) - 1];
6399
6400    idx = Py_CHARMASK(s[0]);
6401
6402    return _Unpickler_MemoPut(self, idx, value);
6403}
6404
6405static int
6406load_long_binput(UnpicklerObject *self)
6407{
6408    PyObject *value;
6409    Py_ssize_t idx;
6410    char *s;
6411
6412    if (_Unpickler_Read(self, &s, 4) < 0)
6413        return -1;
6414
6415    if (Py_SIZE(self->stack) <= self->stack->fence)
6416        return Pdata_stack_underflow(self->stack);
6417    value = self->stack->data[Py_SIZE(self->stack) - 1];
6418
6419    idx = calc_binsize(s, 4);
6420    if (idx < 0) {
6421        PyErr_SetString(PyExc_ValueError,
6422                        "negative LONG_BINPUT argument");
6423        return -1;
6424    }
6425
6426    return _Unpickler_MemoPut(self, idx, value);
6427}
6428
6429static int
6430load_memoize(UnpicklerObject *self)
6431{
6432    PyObject *value;
6433
6434    if (Py_SIZE(self->stack) <= self->stack->fence)
6435        return Pdata_stack_underflow(self->stack);
6436    value = self->stack->data[Py_SIZE(self->stack) - 1];
6437
6438    return _Unpickler_MemoPut(self, self->memo_len, value);
6439}
6440
6441static int
6442do_append(UnpicklerObject *self, Py_ssize_t x)
6443{
6444    PyObject *value;
6445    PyObject *slice;
6446    PyObject *list;
6447    PyObject *result;
6448    Py_ssize_t len, i;
6449
6450    len = Py_SIZE(self->stack);
6451    if (x > len || x <= self->stack->fence)
6452        return Pdata_stack_underflow(self->stack);
6453    if (len == x)  /* nothing to do */
6454        return 0;
6455
6456    list = self->stack->data[x - 1];
6457
6458    if (PyList_CheckExact(list)) {
6459        Py_ssize_t list_len;
6460        int ret;
6461
6462        slice = Pdata_poplist(self->stack, x);
6463        if (!slice)
6464            return -1;
6465        list_len = PyList_GET_SIZE(list);
6466        ret = PyList_SetSlice(list, list_len, list_len, slice);
6467        Py_DECREF(slice);
6468        return ret;
6469    }
6470    else {
6471        PyObject *extend_func;
6472
6473        if (_PyObject_LookupAttr(list, &_Py_ID(extend), &extend_func) < 0) {
6474            return -1;
6475        }
6476        if (extend_func != NULL) {
6477            slice = Pdata_poplist(self->stack, x);
6478            if (!slice) {
6479                Py_DECREF(extend_func);
6480                return -1;
6481            }
6482            result = _Pickle_FastCall(extend_func, slice);
6483            Py_DECREF(extend_func);
6484            if (result == NULL)
6485                return -1;
6486            Py_DECREF(result);
6487        }
6488        else {
6489            PyObject *append_func;
6490
6491            /* Even if the PEP 307 requires extend() and append() methods,
6492               fall back on append() if the object has no extend() method
6493               for backward compatibility. */
6494            append_func = PyObject_GetAttr(list, &_Py_ID(append));
6495            if (append_func == NULL)
6496                return -1;
6497            for (i = x; i < len; i++) {
6498                value = self->stack->data[i];
6499                result = _Pickle_FastCall(append_func, value);
6500                if (result == NULL) {
6501                    Pdata_clear(self->stack, i + 1);
6502                    Py_SET_SIZE(self->stack, x);
6503                    Py_DECREF(append_func);
6504                    return -1;
6505                }
6506                Py_DECREF(result);
6507            }
6508            Py_SET_SIZE(self->stack, x);
6509            Py_DECREF(append_func);
6510        }
6511    }
6512
6513    return 0;
6514}
6515
6516static int
6517load_append(UnpicklerObject *self)
6518{
6519    if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6520        return Pdata_stack_underflow(self->stack);
6521    return do_append(self, Py_SIZE(self->stack) - 1);
6522}
6523
6524static int
6525load_appends(UnpicklerObject *self)
6526{
6527    Py_ssize_t i = marker(self);
6528    if (i < 0)
6529        return -1;
6530    return do_append(self, i);
6531}
6532
6533static int
6534do_setitems(UnpicklerObject *self, Py_ssize_t x)
6535{
6536    PyObject *value, *key;
6537    PyObject *dict;
6538    Py_ssize_t len, i;
6539    int status = 0;
6540
6541    len = Py_SIZE(self->stack);
6542    if (x > len || x <= self->stack->fence)
6543        return Pdata_stack_underflow(self->stack);
6544    if (len == x)  /* nothing to do */
6545        return 0;
6546    if ((len - x) % 2 != 0) {
6547        PickleState *st = _Pickle_GetGlobalState();
6548        /* Corrupt or hostile pickle -- we never write one like this. */
6549        PyErr_SetString(st->UnpicklingError,
6550                        "odd number of items for SETITEMS");
6551        return -1;
6552    }
6553
6554    /* Here, dict does not actually need to be a PyDict; it could be anything
6555       that supports the __setitem__ attribute. */
6556    dict = self->stack->data[x - 1];
6557
6558    for (i = x + 1; i < len; i += 2) {
6559        key = self->stack->data[i - 1];
6560        value = self->stack->data[i];
6561        if (PyObject_SetItem(dict, key, value) < 0) {
6562            status = -1;
6563            break;
6564        }
6565    }
6566
6567    Pdata_clear(self->stack, x);
6568    return status;
6569}
6570
6571static int
6572load_setitem(UnpicklerObject *self)
6573{
6574    return do_setitems(self, Py_SIZE(self->stack) - 2);
6575}
6576
6577static int
6578load_setitems(UnpicklerObject *self)
6579{
6580    Py_ssize_t i = marker(self);
6581    if (i < 0)
6582        return -1;
6583    return do_setitems(self, i);
6584}
6585
6586static int
6587load_additems(UnpicklerObject *self)
6588{
6589    PyObject *set;
6590    Py_ssize_t mark, len, i;
6591
6592    mark =  marker(self);
6593    if (mark < 0)
6594        return -1;
6595    len = Py_SIZE(self->stack);
6596    if (mark > len || mark <= self->stack->fence)
6597        return Pdata_stack_underflow(self->stack);
6598    if (len == mark)  /* nothing to do */
6599        return 0;
6600
6601    set = self->stack->data[mark - 1];
6602
6603    if (PySet_Check(set)) {
6604        PyObject *items;
6605        int status;
6606
6607        items = Pdata_poptuple(self->stack, mark);
6608        if (items == NULL)
6609            return -1;
6610
6611        status = _PySet_Update(set, items);
6612        Py_DECREF(items);
6613        return status;
6614    }
6615    else {
6616        PyObject *add_func;
6617
6618        add_func = PyObject_GetAttr(set, &_Py_ID(add));
6619        if (add_func == NULL)
6620            return -1;
6621        for (i = mark; i < len; i++) {
6622            PyObject *result;
6623            PyObject *item;
6624
6625            item = self->stack->data[i];
6626            result = _Pickle_FastCall(add_func, item);
6627            if (result == NULL) {
6628                Pdata_clear(self->stack, i + 1);
6629                Py_SET_SIZE(self->stack, mark);
6630                return -1;
6631            }
6632            Py_DECREF(result);
6633        }
6634        Py_SET_SIZE(self->stack, mark);
6635    }
6636
6637    return 0;
6638}
6639
6640static int
6641load_build(UnpicklerObject *self)
6642{
6643    PyObject *state, *inst, *slotstate;
6644    PyObject *setstate;
6645    int status = 0;
6646
6647    /* Stack is ... instance, state.  We want to leave instance at
6648     * the stack top, possibly mutated via instance.__setstate__(state).
6649     */
6650    if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6651        return Pdata_stack_underflow(self->stack);
6652
6653    PDATA_POP(self->stack, state);
6654    if (state == NULL)
6655        return -1;
6656
6657    inst = self->stack->data[Py_SIZE(self->stack) - 1];
6658
6659    if (_PyObject_LookupAttr(inst, &_Py_ID(__setstate__), &setstate) < 0) {
6660        Py_DECREF(state);
6661        return -1;
6662    }
6663    if (setstate != NULL) {
6664        PyObject *result;
6665
6666        /* The explicit __setstate__ is responsible for everything. */
6667        result = _Pickle_FastCall(setstate, state);
6668        Py_DECREF(setstate);
6669        if (result == NULL)
6670            return -1;
6671        Py_DECREF(result);
6672        return 0;
6673    }
6674
6675    /* A default __setstate__.  First see whether state embeds a
6676     * slot state dict too (a proto 2 addition).
6677     */
6678    if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6679        PyObject *tmp = state;
6680
6681        state = PyTuple_GET_ITEM(tmp, 0);
6682        slotstate = PyTuple_GET_ITEM(tmp, 1);
6683        Py_INCREF(state);
6684        Py_INCREF(slotstate);
6685        Py_DECREF(tmp);
6686    }
6687    else
6688        slotstate = NULL;
6689
6690    /* Set inst.__dict__ from the state dict (if any). */
6691    if (state != Py_None) {
6692        PyObject *dict;
6693        PyObject *d_key, *d_value;
6694        Py_ssize_t i;
6695
6696        if (!PyDict_Check(state)) {
6697            PickleState *st = _Pickle_GetGlobalState();
6698            PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6699            goto error;
6700        }
6701        dict = PyObject_GetAttr(inst, &_Py_ID(__dict__));
6702        if (dict == NULL)
6703            goto error;
6704
6705        i = 0;
6706        while (PyDict_Next(state, &i, &d_key, &d_value)) {
6707            /* normally the keys for instance attributes are
6708               interned.  we should try to do that here. */
6709            Py_INCREF(d_key);
6710            if (PyUnicode_CheckExact(d_key))
6711                PyUnicode_InternInPlace(&d_key);
6712            if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6713                Py_DECREF(d_key);
6714                goto error;
6715            }
6716            Py_DECREF(d_key);
6717        }
6718        Py_DECREF(dict);
6719    }
6720
6721    /* Also set instance attributes from the slotstate dict (if any). */
6722    if (slotstate != NULL) {
6723        PyObject *d_key, *d_value;
6724        Py_ssize_t i;
6725
6726        if (!PyDict_Check(slotstate)) {
6727            PickleState *st = _Pickle_GetGlobalState();
6728            PyErr_SetString(st->UnpicklingError,
6729                            "slot state is not a dictionary");
6730            goto error;
6731        }
6732        i = 0;
6733        while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6734            if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6735                goto error;
6736        }
6737    }
6738
6739    if (0) {
6740  error:
6741        status = -1;
6742    }
6743
6744    Py_DECREF(state);
6745    Py_XDECREF(slotstate);
6746    return status;
6747}
6748
6749static int
6750load_mark(UnpicklerObject *self)
6751{
6752
6753    /* Note that we split the (pickle.py) stack into two stacks, an
6754     * object stack and a mark stack. Here we push a mark onto the
6755     * mark stack.
6756     */
6757
6758    if (self->num_marks >= self->marks_size) {
6759        size_t alloc = ((size_t)self->num_marks << 1) + 20;
6760        Py_ssize_t *marks_new = self->marks;
6761        PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6762        if (marks_new == NULL) {
6763            PyErr_NoMemory();
6764            return -1;
6765        }
6766        self->marks = marks_new;
6767        self->marks_size = (Py_ssize_t)alloc;
6768    }
6769
6770    self->stack->mark_set = 1;
6771    self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6772
6773    return 0;
6774}
6775
6776static int
6777load_reduce(UnpicklerObject *self)
6778{
6779    PyObject *callable = NULL;
6780    PyObject *argtup = NULL;
6781    PyObject *obj = NULL;
6782
6783    PDATA_POP(self->stack, argtup);
6784    if (argtup == NULL)
6785        return -1;
6786    PDATA_POP(self->stack, callable);
6787    if (callable) {
6788        obj = PyObject_CallObject(callable, argtup);
6789        Py_DECREF(callable);
6790    }
6791    Py_DECREF(argtup);
6792
6793    if (obj == NULL)
6794        return -1;
6795
6796    PDATA_PUSH(self->stack, obj, -1);
6797    return 0;
6798}
6799
6800/* Just raises an error if we don't know the protocol specified.  PROTO
6801 * is the first opcode for protocols >= 2.
6802 */
6803static int
6804load_proto(UnpicklerObject *self)
6805{
6806    char *s;
6807    int i;
6808
6809    if (_Unpickler_Read(self, &s, 1) < 0)
6810        return -1;
6811
6812    i = (unsigned char)s[0];
6813    if (i <= HIGHEST_PROTOCOL) {
6814        self->proto = i;
6815        return 0;
6816    }
6817
6818    PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6819    return -1;
6820}
6821
6822static int
6823load_frame(UnpicklerObject *self)
6824{
6825    char *s;
6826    Py_ssize_t frame_len;
6827
6828    if (_Unpickler_Read(self, &s, 8) < 0)
6829        return -1;
6830
6831    frame_len = calc_binsize(s, 8);
6832    if (frame_len < 0) {
6833        PyErr_Format(PyExc_OverflowError,
6834                     "FRAME length exceeds system's maximum of %zd bytes",
6835                     PY_SSIZE_T_MAX);
6836        return -1;
6837    }
6838
6839    if (_Unpickler_Read(self, &s, frame_len) < 0)
6840        return -1;
6841
6842    /* Rewind to start of frame */
6843    self->next_read_idx -= frame_len;
6844    return 0;
6845}
6846
6847static PyObject *
6848load(UnpicklerObject *self)
6849{
6850    PyObject *value = NULL;
6851    char *s = NULL;
6852
6853    self->num_marks = 0;
6854    self->stack->mark_set = 0;
6855    self->stack->fence = 0;
6856    self->proto = 0;
6857    if (Py_SIZE(self->stack))
6858        Pdata_clear(self->stack, 0);
6859
6860    /* Convenient macros for the dispatch while-switch loop just below. */
6861#define OP(opcode, load_func) \
6862    case opcode: if (load_func(self) < 0) break; continue;
6863
6864#define OP_ARG(opcode, load_func, arg) \
6865    case opcode: if (load_func(self, (arg)) < 0) break; continue;
6866
6867    while (1) {
6868        if (_Unpickler_Read(self, &s, 1) < 0) {
6869            PickleState *st = _Pickle_GetGlobalState();
6870            if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6871                PyErr_Format(PyExc_EOFError, "Ran out of input");
6872            }
6873            return NULL;
6874        }
6875
6876        switch ((enum opcode)s[0]) {
6877        OP(NONE, load_none)
6878        OP(BININT, load_binint)
6879        OP(BININT1, load_binint1)
6880        OP(BININT2, load_binint2)
6881        OP(INT, load_int)
6882        OP(LONG, load_long)
6883        OP_ARG(LONG1, load_counted_long, 1)
6884        OP_ARG(LONG4, load_counted_long, 4)
6885        OP(FLOAT, load_float)
6886        OP(BINFLOAT, load_binfloat)
6887        OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6888        OP_ARG(BINBYTES, load_counted_binbytes, 4)
6889        OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6890        OP(BYTEARRAY8, load_counted_bytearray)
6891        OP(NEXT_BUFFER, load_next_buffer)
6892        OP(READONLY_BUFFER, load_readonly_buffer)
6893        OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6894        OP_ARG(BINSTRING, load_counted_binstring, 4)
6895        OP(STRING, load_string)
6896        OP(UNICODE, load_unicode)
6897        OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6898        OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6899        OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6900        OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6901        OP_ARG(TUPLE1, load_counted_tuple, 1)
6902        OP_ARG(TUPLE2, load_counted_tuple, 2)
6903        OP_ARG(TUPLE3, load_counted_tuple, 3)
6904        OP(TUPLE, load_tuple)
6905        OP(EMPTY_LIST, load_empty_list)
6906        OP(LIST, load_list)
6907        OP(EMPTY_DICT, load_empty_dict)
6908        OP(DICT, load_dict)
6909        OP(EMPTY_SET, load_empty_set)
6910        OP(ADDITEMS, load_additems)
6911        OP(FROZENSET, load_frozenset)
6912        OP(OBJ, load_obj)
6913        OP(INST, load_inst)
6914        OP_ARG(NEWOBJ, load_newobj, 0)
6915        OP_ARG(NEWOBJ_EX, load_newobj, 1)
6916        OP(GLOBAL, load_global)
6917        OP(STACK_GLOBAL, load_stack_global)
6918        OP(APPEND, load_append)
6919        OP(APPENDS, load_appends)
6920        OP(BUILD, load_build)
6921        OP(DUP, load_dup)
6922        OP(BINGET, load_binget)
6923        OP(LONG_BINGET, load_long_binget)
6924        OP(GET, load_get)
6925        OP(MARK, load_mark)
6926        OP(BINPUT, load_binput)
6927        OP(LONG_BINPUT, load_long_binput)
6928        OP(PUT, load_put)
6929        OP(MEMOIZE, load_memoize)
6930        OP(POP, load_pop)
6931        OP(POP_MARK, load_pop_mark)
6932        OP(SETITEM, load_setitem)
6933        OP(SETITEMS, load_setitems)
6934        OP(PERSID, load_persid)
6935        OP(BINPERSID, load_binpersid)
6936        OP(REDUCE, load_reduce)
6937        OP(PROTO, load_proto)
6938        OP(FRAME, load_frame)
6939        OP_ARG(EXT1, load_extension, 1)
6940        OP_ARG(EXT2, load_extension, 2)
6941        OP_ARG(EXT4, load_extension, 4)
6942        OP_ARG(NEWTRUE, load_bool, Py_True)
6943        OP_ARG(NEWFALSE, load_bool, Py_False)
6944
6945        case STOP:
6946            break;
6947
6948        default:
6949            {
6950                PickleState *st = _Pickle_GetGlobalState();
6951                unsigned char c = (unsigned char) *s;
6952                if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6953                    PyErr_Format(st->UnpicklingError,
6954                                 "invalid load key, '%c'.", c);
6955                }
6956                else {
6957                    PyErr_Format(st->UnpicklingError,
6958                                 "invalid load key, '\\x%02x'.", c);
6959                }
6960                return NULL;
6961            }
6962        }
6963
6964        break;                  /* and we are done! */
6965    }
6966
6967    if (PyErr_Occurred()) {
6968        return NULL;
6969    }
6970
6971    if (_Unpickler_SkipConsumed(self) < 0)
6972        return NULL;
6973
6974    PDATA_POP(self->stack, value);
6975    return value;
6976}
6977
6978/*[clinic input]
6979
6980_pickle.Unpickler.load
6981
6982Load a pickle.
6983
6984Read a pickled object representation from the open file object given
6985in the constructor, and return the reconstituted object hierarchy
6986specified therein.
6987[clinic start generated code]*/
6988
6989static PyObject *
6990_pickle_Unpickler_load_impl(UnpicklerObject *self)
6991/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6992{
6993    UnpicklerObject *unpickler = (UnpicklerObject*)self;
6994
6995    /* Check whether the Unpickler was initialized correctly. This prevents
6996       segfaulting if a subclass overridden __init__ with a function that does
6997       not call Unpickler.__init__(). Here, we simply ensure that self->read
6998       is not NULL. */
6999    if (unpickler->read == NULL) {
7000        PickleState *st = _Pickle_GetGlobalState();
7001        PyErr_Format(st->UnpicklingError,
7002                     "Unpickler.__init__() was not called by %s.__init__()",
7003                     Py_TYPE(unpickler)->tp_name);
7004        return NULL;
7005    }
7006
7007    return load(unpickler);
7008}
7009
7010/* The name of find_class() is misleading. In newer pickle protocols, this
7011   function is used for loading any global (i.e., functions), not just
7012   classes. The name is kept only for backward compatibility. */
7013
7014/*[clinic input]
7015
7016_pickle.Unpickler.find_class
7017
7018  module_name: object
7019  global_name: object
7020  /
7021
7022Return an object from a specified module.
7023
7024If necessary, the module will be imported. Subclasses may override
7025this method (e.g. to restrict unpickling of arbitrary classes and
7026functions).
7027
7028This method is called whenever a class or a function object is
7029needed.  Both arguments passed are str objects.
7030[clinic start generated code]*/
7031
7032static PyObject *
7033_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7034                                  PyObject *module_name,
7035                                  PyObject *global_name)
7036/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7037{
7038    PyObject *global;
7039    PyObject *module;
7040
7041    if (PySys_Audit("pickle.find_class", "OO",
7042                    module_name, global_name) < 0) {
7043        return NULL;
7044    }
7045
7046    /* Try to map the old names used in Python 2.x to the new ones used in
7047       Python 3.x.  We do this only with old pickle protocols and when the
7048       user has not disabled the feature. */
7049    if (self->proto < 3 && self->fix_imports) {
7050        PyObject *key;
7051        PyObject *item;
7052        PickleState *st = _Pickle_GetGlobalState();
7053
7054        /* Check if the global (i.e., a function or a class) was renamed
7055           or moved to another module. */
7056        key = PyTuple_Pack(2, module_name, global_name);
7057        if (key == NULL)
7058            return NULL;
7059        item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7060        Py_DECREF(key);
7061        if (item) {
7062            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7063                PyErr_Format(PyExc_RuntimeError,
7064                             "_compat_pickle.NAME_MAPPING values should be "
7065                             "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7066                return NULL;
7067            }
7068            module_name = PyTuple_GET_ITEM(item, 0);
7069            global_name = PyTuple_GET_ITEM(item, 1);
7070            if (!PyUnicode_Check(module_name) ||
7071                !PyUnicode_Check(global_name)) {
7072                PyErr_Format(PyExc_RuntimeError,
7073                             "_compat_pickle.NAME_MAPPING values should be "
7074                             "pairs of str, not (%.200s, %.200s)",
7075                             Py_TYPE(module_name)->tp_name,
7076                             Py_TYPE(global_name)->tp_name);
7077                return NULL;
7078            }
7079        }
7080        else if (PyErr_Occurred()) {
7081            return NULL;
7082        }
7083        else {
7084            /* Check if the module was renamed. */
7085            item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7086            if (item) {
7087                if (!PyUnicode_Check(item)) {
7088                    PyErr_Format(PyExc_RuntimeError,
7089                                "_compat_pickle.IMPORT_MAPPING values should be "
7090                                "strings, not %.200s", Py_TYPE(item)->tp_name);
7091                    return NULL;
7092                }
7093                module_name = item;
7094            }
7095            else if (PyErr_Occurred()) {
7096                return NULL;
7097            }
7098        }
7099    }
7100
7101    /*
7102     * we don't use PyImport_GetModule here, because it can return partially-
7103     * initialised modules, which then cause the getattribute to fail.
7104     */
7105    module = PyImport_Import(module_name);
7106    if (module == NULL) {
7107        return NULL;
7108    }
7109    global = getattribute(module, global_name, self->proto >= 4);
7110    Py_DECREF(module);
7111    return global;
7112}
7113
7114/*[clinic input]
7115
7116_pickle.Unpickler.__sizeof__ -> Py_ssize_t
7117
7118Returns size in memory, in bytes.
7119[clinic start generated code]*/
7120
7121static Py_ssize_t
7122_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7123/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7124{
7125    Py_ssize_t res;
7126
7127    res = _PyObject_SIZE(Py_TYPE(self));
7128    if (self->memo != NULL)
7129        res += self->memo_size * sizeof(PyObject *);
7130    if (self->marks != NULL)
7131        res += self->marks_size * sizeof(Py_ssize_t);
7132    if (self->input_line != NULL)
7133        res += strlen(self->input_line) + 1;
7134    if (self->encoding != NULL)
7135        res += strlen(self->encoding) + 1;
7136    if (self->errors != NULL)
7137        res += strlen(self->errors) + 1;
7138    return res;
7139}
7140
7141static struct PyMethodDef Unpickler_methods[] = {
7142    _PICKLE_UNPICKLER_LOAD_METHODDEF
7143    _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7144    _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7145    {NULL, NULL}                /* sentinel */
7146};
7147
7148static void
7149Unpickler_dealloc(UnpicklerObject *self)
7150{
7151    PyObject_GC_UnTrack((PyObject *)self);
7152    Py_XDECREF(self->readline);
7153    Py_XDECREF(self->readinto);
7154    Py_XDECREF(self->read);
7155    Py_XDECREF(self->peek);
7156    Py_XDECREF(self->stack);
7157    Py_XDECREF(self->pers_func);
7158    Py_XDECREF(self->buffers);
7159    if (self->buffer.buf != NULL) {
7160        PyBuffer_Release(&self->buffer);
7161        self->buffer.buf = NULL;
7162    }
7163
7164    _Unpickler_MemoCleanup(self);
7165    PyMem_Free(self->marks);
7166    PyMem_Free(self->input_line);
7167    PyMem_Free(self->encoding);
7168    PyMem_Free(self->errors);
7169
7170    Py_TYPE(self)->tp_free((PyObject *)self);
7171}
7172
7173static int
7174Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7175{
7176    Py_VISIT(self->readline);
7177    Py_VISIT(self->readinto);
7178    Py_VISIT(self->read);
7179    Py_VISIT(self->peek);
7180    Py_VISIT(self->stack);
7181    Py_VISIT(self->pers_func);
7182    Py_VISIT(self->buffers);
7183    return 0;
7184}
7185
7186static int
7187Unpickler_clear(UnpicklerObject *self)
7188{
7189    Py_CLEAR(self->readline);
7190    Py_CLEAR(self->readinto);
7191    Py_CLEAR(self->read);
7192    Py_CLEAR(self->peek);
7193    Py_CLEAR(self->stack);
7194    Py_CLEAR(self->pers_func);
7195    Py_CLEAR(self->buffers);
7196    if (self->buffer.buf != NULL) {
7197        PyBuffer_Release(&self->buffer);
7198        self->buffer.buf = NULL;
7199    }
7200
7201    _Unpickler_MemoCleanup(self);
7202    PyMem_Free(self->marks);
7203    self->marks = NULL;
7204    PyMem_Free(self->input_line);
7205    self->input_line = NULL;
7206    PyMem_Free(self->encoding);
7207    self->encoding = NULL;
7208    PyMem_Free(self->errors);
7209    self->errors = NULL;
7210
7211    return 0;
7212}
7213
7214/*[clinic input]
7215
7216_pickle.Unpickler.__init__
7217
7218  file: object
7219  *
7220  fix_imports: bool = True
7221  encoding: str = 'ASCII'
7222  errors: str = 'strict'
7223  buffers: object(c_default="NULL") = ()
7224
7225This takes a binary file for reading a pickle data stream.
7226
7227The protocol version of the pickle is detected automatically, so no
7228protocol argument is needed.  Bytes past the pickled object's
7229representation are ignored.
7230
7231The argument *file* must have two methods, a read() method that takes
7232an integer argument, and a readline() method that requires no
7233arguments.  Both methods should return bytes.  Thus *file* can be a
7234binary file object opened for reading, an io.BytesIO object, or any
7235other custom object that meets this interface.
7236
7237Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7238which are used to control compatibility support for pickle stream
7239generated by Python 2.  If *fix_imports* is True, pickle will try to
7240map the old Python 2 names to the new names used in Python 3.  The
7241*encoding* and *errors* tell pickle how to decode 8-bit string
7242instances pickled by Python 2; these default to 'ASCII' and 'strict',
7243respectively.  The *encoding* can be 'bytes' to read these 8-bit
7244string instances as bytes objects.
7245[clinic start generated code]*/
7246
7247static int
7248_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7249                                int fix_imports, const char *encoding,
7250                                const char *errors, PyObject *buffers)
7251/*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7252{
7253    /* In case of multiple __init__() calls, clear previous content. */
7254    if (self->read != NULL)
7255        (void)Unpickler_clear(self);
7256
7257    if (_Unpickler_SetInputStream(self, file) < 0)
7258        return -1;
7259
7260    if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7261        return -1;
7262
7263    if (_Unpickler_SetBuffers(self, buffers) < 0)
7264        return -1;
7265
7266    self->fix_imports = fix_imports;
7267
7268    if (init_method_ref((PyObject *)self, &_Py_ID(persistent_load),
7269                        &self->pers_func, &self->pers_func_self) < 0)
7270    {
7271        return -1;
7272    }
7273
7274    self->stack = (Pdata *)Pdata_New();
7275    if (self->stack == NULL)
7276        return -1;
7277
7278    self->memo_size = 32;
7279    self->memo = _Unpickler_NewMemo(self->memo_size);
7280    if (self->memo == NULL)
7281        return -1;
7282
7283    self->proto = 0;
7284
7285    return 0;
7286}
7287
7288
7289/* Define a proxy object for the Unpickler's internal memo object. This is to
7290 * avoid breaking code like:
7291 *  unpickler.memo.clear()
7292 * and
7293 *  unpickler.memo = saved_memo
7294 * Is this a good idea? Not really, but we don't want to break code that uses
7295 * it. Note that we don't implement the entire mapping API here. This is
7296 * intentional, as these should be treated as black-box implementation details.
7297 *
7298 * We do, however, have to implement pickling/unpickling support because of
7299 * real-world code like cvs2svn.
7300 */
7301
7302/*[clinic input]
7303_pickle.UnpicklerMemoProxy.clear
7304
7305Remove all items from memo.
7306[clinic start generated code]*/
7307
7308static PyObject *
7309_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7310/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7311{
7312    _Unpickler_MemoCleanup(self->unpickler);
7313    self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314    if (self->unpickler->memo == NULL)
7315        return NULL;
7316    Py_RETURN_NONE;
7317}
7318
7319/*[clinic input]
7320_pickle.UnpicklerMemoProxy.copy
7321
7322Copy the memo to a new object.
7323[clinic start generated code]*/
7324
7325static PyObject *
7326_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7327/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7328{
7329    size_t i;
7330    PyObject *new_memo = PyDict_New();
7331    if (new_memo == NULL)
7332        return NULL;
7333
7334    for (i = 0; i < self->unpickler->memo_size; i++) {
7335        int status;
7336        PyObject *key, *value;
7337
7338        value = self->unpickler->memo[i];
7339        if (value == NULL)
7340            continue;
7341
7342        key = PyLong_FromSsize_t(i);
7343        if (key == NULL)
7344            goto error;
7345        status = PyDict_SetItem(new_memo, key, value);
7346        Py_DECREF(key);
7347        if (status < 0)
7348            goto error;
7349    }
7350    return new_memo;
7351
7352error:
7353    Py_DECREF(new_memo);
7354    return NULL;
7355}
7356
7357/*[clinic input]
7358_pickle.UnpicklerMemoProxy.__reduce__
7359
7360Implement pickling support.
7361[clinic start generated code]*/
7362
7363static PyObject *
7364_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7365/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7366{
7367    PyObject *reduce_value;
7368    PyObject *constructor_args;
7369    PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7370    if (contents == NULL)
7371        return NULL;
7372
7373    reduce_value = PyTuple_New(2);
7374    if (reduce_value == NULL) {
7375        Py_DECREF(contents);
7376        return NULL;
7377    }
7378    constructor_args = PyTuple_New(1);
7379    if (constructor_args == NULL) {
7380        Py_DECREF(contents);
7381        Py_DECREF(reduce_value);
7382        return NULL;
7383    }
7384    PyTuple_SET_ITEM(constructor_args, 0, contents);
7385    Py_INCREF((PyObject *)&PyDict_Type);
7386    PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387    PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388    return reduce_value;
7389}
7390
7391static PyMethodDef unpicklerproxy_methods[] = {
7392    _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393    _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394    _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7395    {NULL, NULL}    /* sentinel */
7396};
7397
7398static void
7399UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400{
7401    PyObject_GC_UnTrack(self);
7402    Py_XDECREF(self->unpickler);
7403    PyObject_GC_Del((PyObject *)self);
7404}
7405
7406static int
7407UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408                            visitproc visit, void *arg)
7409{
7410    Py_VISIT(self->unpickler);
7411    return 0;
7412}
7413
7414static int
7415UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416{
7417    Py_CLEAR(self->unpickler);
7418    return 0;
7419}
7420
7421static PyTypeObject UnpicklerMemoProxyType = {
7422    PyVarObject_HEAD_INIT(NULL, 0)
7423    "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7424    sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7425    0,
7426    (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7427    0,                                          /* tp_vectorcall_offset */
7428    0,                                          /* tp_getattr */
7429    0,                                          /* tp_setattr */
7430    0,                                          /* tp_as_async */
7431    0,                                          /* tp_repr */
7432    0,                                          /* tp_as_number */
7433    0,                                          /* tp_as_sequence */
7434    0,                                          /* tp_as_mapping */
7435    PyObject_HashNotImplemented,                /* tp_hash */
7436    0,                                          /* tp_call */
7437    0,                                          /* tp_str */
7438    PyObject_GenericGetAttr,                    /* tp_getattro */
7439    PyObject_GenericSetAttr,                    /* tp_setattro */
7440    0,                                          /* tp_as_buffer */
7441    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442    0,                                          /* tp_doc */
7443    (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7444    (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7445    0,                                          /* tp_richcompare */
7446    0,                                          /* tp_weaklistoffset */
7447    0,                                          /* tp_iter */
7448    0,                                          /* tp_iternext */
7449    unpicklerproxy_methods,                     /* tp_methods */
7450};
7451
7452static PyObject *
7453UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454{
7455    UnpicklerMemoProxyObject *self;
7456
7457    self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458                           &UnpicklerMemoProxyType);
7459    if (self == NULL)
7460        return NULL;
7461    Py_INCREF(unpickler);
7462    self->unpickler = unpickler;
7463    PyObject_GC_Track(self);
7464    return (PyObject *)self;
7465}
7466
7467/*****************************************************************************/
7468
7469
7470static PyObject *
7471Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7472{
7473    return UnpicklerMemoProxy_New(self);
7474}
7475
7476static int
7477Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7478{
7479    PyObject **new_memo;
7480    size_t new_memo_size = 0;
7481
7482    if (obj == NULL) {
7483        PyErr_SetString(PyExc_TypeError,
7484                        "attribute deletion is not supported");
7485        return -1;
7486    }
7487
7488    if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7489        UnpicklerObject *unpickler =
7490            ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491
7492        new_memo_size = unpickler->memo_size;
7493        new_memo = _Unpickler_NewMemo(new_memo_size);
7494        if (new_memo == NULL)
7495            return -1;
7496
7497        for (size_t i = 0; i < new_memo_size; i++) {
7498            Py_XINCREF(unpickler->memo[i]);
7499            new_memo[i] = unpickler->memo[i];
7500        }
7501    }
7502    else if (PyDict_Check(obj)) {
7503        Py_ssize_t i = 0;
7504        PyObject *key, *value;
7505
7506        new_memo_size = PyDict_GET_SIZE(obj);
7507        new_memo = _Unpickler_NewMemo(new_memo_size);
7508        if (new_memo == NULL)
7509            return -1;
7510
7511        while (PyDict_Next(obj, &i, &key, &value)) {
7512            Py_ssize_t idx;
7513            if (!PyLong_Check(key)) {
7514                PyErr_SetString(PyExc_TypeError,
7515                                "memo key must be integers");
7516                goto error;
7517            }
7518            idx = PyLong_AsSsize_t(key);
7519            if (idx == -1 && PyErr_Occurred())
7520                goto error;
7521            if (idx < 0) {
7522                PyErr_SetString(PyExc_ValueError,
7523                                "memo key must be positive integers.");
7524                goto error;
7525            }
7526            if (_Unpickler_MemoPut(self, idx, value) < 0)
7527                goto error;
7528        }
7529    }
7530    else {
7531        PyErr_Format(PyExc_TypeError,
7532                     "'memo' attribute must be an UnpicklerMemoProxy object "
7533                     "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7534        return -1;
7535    }
7536
7537    _Unpickler_MemoCleanup(self);
7538    self->memo_size = new_memo_size;
7539    self->memo = new_memo;
7540
7541    return 0;
7542
7543  error:
7544    if (new_memo_size) {
7545        for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7546            Py_XDECREF(new_memo[i]);
7547        }
7548        PyMem_Free(new_memo);
7549    }
7550    return -1;
7551}
7552
7553static PyObject *
7554Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7555{
7556    if (self->pers_func == NULL) {
7557        PyErr_SetString(PyExc_AttributeError, "persistent_load");
7558        return NULL;
7559    }
7560    return reconstruct_method(self->pers_func, self->pers_func_self);
7561}
7562
7563static int
7564Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7565{
7566    if (value == NULL) {
7567        PyErr_SetString(PyExc_TypeError,
7568                        "attribute deletion is not supported");
7569        return -1;
7570    }
7571    if (!PyCallable_Check(value)) {
7572        PyErr_SetString(PyExc_TypeError,
7573                        "persistent_load must be a callable taking "
7574                        "one argument");
7575        return -1;
7576    }
7577
7578    self->pers_func_self = NULL;
7579    Py_INCREF(value);
7580    Py_XSETREF(self->pers_func, value);
7581
7582    return 0;
7583}
7584
7585static PyGetSetDef Unpickler_getsets[] = {
7586    {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587    {"persistent_load", (getter)Unpickler_get_persload,
7588                        (setter)Unpickler_set_persload},
7589    {NULL}
7590};
7591
7592static PyTypeObject Unpickler_Type = {
7593    PyVarObject_HEAD_INIT(NULL, 0)
7594    "_pickle.Unpickler",                /*tp_name*/
7595    sizeof(UnpicklerObject),            /*tp_basicsize*/
7596    0,                                  /*tp_itemsize*/
7597    (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7598    0,                                  /*tp_vectorcall_offset*/
7599    0,                                  /*tp_getattr*/
7600    0,                                  /*tp_setattr*/
7601    0,                                  /*tp_as_async*/
7602    0,                                  /*tp_repr*/
7603    0,                                  /*tp_as_number*/
7604    0,                                  /*tp_as_sequence*/
7605    0,                                  /*tp_as_mapping*/
7606    0,                                  /*tp_hash*/
7607    0,                                  /*tp_call*/
7608    0,                                  /*tp_str*/
7609    0,                                  /*tp_getattro*/
7610    0,                                  /*tp_setattro*/
7611    0,                                  /*tp_as_buffer*/
7612    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7613    _pickle_Unpickler___init____doc__,  /*tp_doc*/
7614    (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7615    (inquiry)Unpickler_clear,           /*tp_clear*/
7616    0,                                  /*tp_richcompare*/
7617    0,                                  /*tp_weaklistoffset*/
7618    0,                                  /*tp_iter*/
7619    0,                                  /*tp_iternext*/
7620    Unpickler_methods,                  /*tp_methods*/
7621    0,                                  /*tp_members*/
7622    Unpickler_getsets,                  /*tp_getset*/
7623    0,                                  /*tp_base*/
7624    0,                                  /*tp_dict*/
7625    0,                                  /*tp_descr_get*/
7626    0,                                  /*tp_descr_set*/
7627    0,                                  /*tp_dictoffset*/
7628    _pickle_Unpickler___init__,         /*tp_init*/
7629    PyType_GenericAlloc,                /*tp_alloc*/
7630    PyType_GenericNew,                  /*tp_new*/
7631    PyObject_GC_Del,                    /*tp_free*/
7632    0,                                  /*tp_is_gc*/
7633};
7634
7635/*[clinic input]
7636
7637_pickle.dump
7638
7639  obj: object
7640  file: object
7641  protocol: object = None
7642  *
7643  fix_imports: bool = True
7644  buffer_callback: object = None
7645
7646Write a pickled representation of obj to the open file object file.
7647
7648This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649be more efficient.
7650
7651The optional *protocol* argument tells the pickler to use the given
7652protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7653protocol is 4. It was introduced in Python 3.4, and is incompatible
7654with previous versions.
7655
7656Specifying a negative protocol version selects the highest protocol
7657version supported.  The higher the protocol used, the more recent the
7658version of Python needed to read the pickle produced.
7659
7660The *file* argument must have a write() method that accepts a single
7661bytes argument.  It can thus be a file object opened for binary
7662writing, an io.BytesIO instance, or any other custom object that meets
7663this interface.
7664
7665If *fix_imports* is True and protocol is less than 3, pickle will try
7666to map the new Python 3 names to the old module names used in Python
76672, so that the pickle data stream is readable with Python 2.
7668
7669If *buffer_callback* is None (the default), buffer views are serialized
7670into *file* as part of the pickle stream.  It is an error if
7671*buffer_callback* is not None and *protocol* is None or smaller than 5.
7672
7673[clinic start generated code]*/
7674
7675static PyObject *
7676_pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7677                  PyObject *protocol, int fix_imports,
7678                  PyObject *buffer_callback)
7679/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7680{
7681    PicklerObject *pickler = _Pickler_New();
7682
7683    if (pickler == NULL)
7684        return NULL;
7685
7686    if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7687        goto error;
7688
7689    if (_Pickler_SetOutputStream(pickler, file) < 0)
7690        goto error;
7691
7692    if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693        goto error;
7694
7695    if (dump(pickler, obj) < 0)
7696        goto error;
7697
7698    if (_Pickler_FlushToFile(pickler) < 0)
7699        goto error;
7700
7701    Py_DECREF(pickler);
7702    Py_RETURN_NONE;
7703
7704  error:
7705    Py_XDECREF(pickler);
7706    return NULL;
7707}
7708
7709/*[clinic input]
7710
7711_pickle.dumps
7712
7713  obj: object
7714  protocol: object = None
7715  *
7716  fix_imports: bool = True
7717  buffer_callback: object = None
7718
7719Return the pickled representation of the object as a bytes object.
7720
7721The optional *protocol* argument tells the pickler to use the given
7722protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7723protocol is 4. It was introduced in Python 3.4, and is incompatible
7724with previous versions.
7725
7726Specifying a negative protocol version selects the highest protocol
7727version supported.  The higher the protocol used, the more recent the
7728version of Python needed to read the pickle produced.
7729
7730If *fix_imports* is True and *protocol* is less than 3, pickle will
7731try to map the new Python 3 names to the old module names used in
7732Python 2, so that the pickle data stream is readable with Python 2.
7733
7734If *buffer_callback* is None (the default), buffer views are serialized
7735into *file* as part of the pickle stream.  It is an error if
7736*buffer_callback* is not None and *protocol* is None or smaller than 5.
7737
7738[clinic start generated code]*/
7739
7740static PyObject *
7741_pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7742                   int fix_imports, PyObject *buffer_callback)
7743/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7744{
7745    PyObject *result;
7746    PicklerObject *pickler = _Pickler_New();
7747
7748    if (pickler == NULL)
7749        return NULL;
7750
7751    if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7752        goto error;
7753
7754    if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755        goto error;
7756
7757    if (dump(pickler, obj) < 0)
7758        goto error;
7759
7760    result = _Pickler_GetString(pickler);
7761    Py_DECREF(pickler);
7762    return result;
7763
7764  error:
7765    Py_XDECREF(pickler);
7766    return NULL;
7767}
7768
7769/*[clinic input]
7770
7771_pickle.load
7772
7773  file: object
7774  *
7775  fix_imports: bool = True
7776  encoding: str = 'ASCII'
7777  errors: str = 'strict'
7778  buffers: object(c_default="NULL") = ()
7779
7780Read and return an object from the pickle data stored in a file.
7781
7782This is equivalent to ``Unpickler(file).load()``, but may be more
7783efficient.
7784
7785The protocol version of the pickle is detected automatically, so no
7786protocol argument is needed.  Bytes past the pickled object's
7787representation are ignored.
7788
7789The argument *file* must have two methods, a read() method that takes
7790an integer argument, and a readline() method that requires no
7791arguments.  Both methods should return bytes.  Thus *file* can be a
7792binary file object opened for reading, an io.BytesIO object, or any
7793other custom object that meets this interface.
7794
7795Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7796which are used to control compatibility support for pickle stream
7797generated by Python 2.  If *fix_imports* is True, pickle will try to
7798map the old Python 2 names to the new names used in Python 3.  The
7799*encoding* and *errors* tell pickle how to decode 8-bit string
7800instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801respectively.  The *encoding* can be 'bytes' to read these 8-bit
7802string instances as bytes objects.
7803[clinic start generated code]*/
7804
7805static PyObject *
7806_pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7807                  const char *encoding, const char *errors,
7808                  PyObject *buffers)
7809/*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7810{
7811    PyObject *result;
7812    UnpicklerObject *unpickler = _Unpickler_New();
7813
7814    if (unpickler == NULL)
7815        return NULL;
7816
7817    if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818        goto error;
7819
7820    if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821        goto error;
7822
7823    if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824        goto error;
7825
7826    unpickler->fix_imports = fix_imports;
7827
7828    result = load(unpickler);
7829    Py_DECREF(unpickler);
7830    return result;
7831
7832  error:
7833    Py_XDECREF(unpickler);
7834    return NULL;
7835}
7836
7837/*[clinic input]
7838
7839_pickle.loads
7840
7841  data: object
7842  /
7843  *
7844  fix_imports: bool = True
7845  encoding: str = 'ASCII'
7846  errors: str = 'strict'
7847  buffers: object(c_default="NULL") = ()
7848
7849Read and return an object from the given pickle data.
7850
7851The protocol version of the pickle is detected automatically, so no
7852protocol argument is needed.  Bytes past the pickled object's
7853representation are ignored.
7854
7855Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7856which are used to control compatibility support for pickle stream
7857generated by Python 2.  If *fix_imports* is True, pickle will try to
7858map the old Python 2 names to the new names used in Python 3.  The
7859*encoding* and *errors* tell pickle how to decode 8-bit string
7860instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861respectively.  The *encoding* can be 'bytes' to read these 8-bit
7862string instances as bytes objects.
7863[clinic start generated code]*/
7864
7865static PyObject *
7866_pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7867                   const char *encoding, const char *errors,
7868                   PyObject *buffers)
7869/*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7870{
7871    PyObject *result;
7872    UnpicklerObject *unpickler = _Unpickler_New();
7873
7874    if (unpickler == NULL)
7875        return NULL;
7876
7877    if (_Unpickler_SetStringInput(unpickler, data) < 0)
7878        goto error;
7879
7880    if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881        goto error;
7882
7883    if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884        goto error;
7885
7886    unpickler->fix_imports = fix_imports;
7887
7888    result = load(unpickler);
7889    Py_DECREF(unpickler);
7890    return result;
7891
7892  error:
7893    Py_XDECREF(unpickler);
7894    return NULL;
7895}
7896
7897static struct PyMethodDef pickle_methods[] = {
7898    _PICKLE_DUMP_METHODDEF
7899    _PICKLE_DUMPS_METHODDEF
7900    _PICKLE_LOAD_METHODDEF
7901    _PICKLE_LOADS_METHODDEF
7902    {NULL, NULL} /* sentinel */
7903};
7904
7905static int
7906pickle_clear(PyObject *m)
7907{
7908    _Pickle_ClearState(_Pickle_GetState(m));
7909    return 0;
7910}
7911
7912static void
7913pickle_free(PyObject *m)
7914{
7915    _Pickle_ClearState(_Pickle_GetState(m));
7916}
7917
7918static int
7919pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920{
7921    PickleState *st = _Pickle_GetState(m);
7922    Py_VISIT(st->PickleError);
7923    Py_VISIT(st->PicklingError);
7924    Py_VISIT(st->UnpicklingError);
7925    Py_VISIT(st->dispatch_table);
7926    Py_VISIT(st->extension_registry);
7927    Py_VISIT(st->extension_cache);
7928    Py_VISIT(st->inverted_registry);
7929    Py_VISIT(st->name_mapping_2to3);
7930    Py_VISIT(st->import_mapping_2to3);
7931    Py_VISIT(st->name_mapping_3to2);
7932    Py_VISIT(st->import_mapping_3to2);
7933    Py_VISIT(st->codecs_encode);
7934    Py_VISIT(st->getattr);
7935    Py_VISIT(st->partial);
7936    return 0;
7937}
7938
7939static struct PyModuleDef _picklemodule = {
7940    PyModuleDef_HEAD_INIT,
7941    "_pickle",            /* m_name */
7942    pickle_module_doc,    /* m_doc */
7943    sizeof(PickleState),  /* m_size */
7944    pickle_methods,       /* m_methods */
7945    NULL,                 /* m_reload */
7946    pickle_traverse,      /* m_traverse */
7947    pickle_clear,         /* m_clear */
7948    (freefunc)pickle_free /* m_free */
7949};
7950
7951PyMODINIT_FUNC
7952PyInit__pickle(void)
7953{
7954    PyObject *m;
7955    PickleState *st;
7956
7957    m = PyState_FindModule(&_picklemodule);
7958    if (m) {
7959        Py_INCREF(m);
7960        return m;
7961    }
7962
7963    if (PyType_Ready(&Pdata_Type) < 0)
7964        return NULL;
7965    if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966        return NULL;
7967    if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968        return NULL;
7969
7970    /* Create the module and add the functions. */
7971    m = PyModule_Create(&_picklemodule);
7972    if (m == NULL)
7973        return NULL;
7974
7975    /* Add types */
7976    if (PyModule_AddType(m, &Pickler_Type) < 0) {
7977        return NULL;
7978    }
7979    if (PyModule_AddType(m, &Unpickler_Type) < 0) {
7980        return NULL;
7981    }
7982    if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7983        return NULL;
7984    }
7985
7986    st = _Pickle_GetState(m);
7987
7988    /* Initialize the exceptions. */
7989    st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990    if (st->PickleError == NULL)
7991        return NULL;
7992    st->PicklingError = \
7993        PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994    if (st->PicklingError == NULL)
7995        return NULL;
7996    st->UnpicklingError = \
7997        PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998    if (st->UnpicklingError == NULL)
7999        return NULL;
8000
8001    if (PyModule_AddObjectRef(m, "PickleError", st->PickleError) < 0) {
8002        return NULL;
8003    }
8004    if (PyModule_AddObjectRef(m, "PicklingError", st->PicklingError) < 0) {
8005        return NULL;
8006    }
8007    if (PyModule_AddObjectRef(m, "UnpicklingError", st->UnpicklingError) < 0) {
8008        return NULL;
8009    }
8010    if (_Pickle_InitState(st) < 0)
8011        return NULL;
8012
8013    return m;
8014}
8015