xref: /third_party/python/Objects/obmalloc.c (revision 7db96d56)
1#include "Python.h"
2#include "pycore_pymem.h"         // _PyTraceMalloc_Config
3#include "pycore_code.h"         // stats
4
5#include <stdbool.h>
6#include <stdlib.h>               // malloc()
7
8
9/* Defined in tracemalloc.c */
10extern void _PyMem_DumpTraceback(int fd, const void *ptr);
11
12
13/* Python's malloc wrappers (see pymem.h) */
14
15#undef  uint
16#define uint    unsigned int    /* assuming >= 16 bits */
17
18/* Forward declaration */
19static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
20static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
21static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
22static void _PyMem_DebugRawFree(void *ctx, void *ptr);
23
24static void* _PyMem_DebugMalloc(void *ctx, size_t size);
25static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
26static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
27static void _PyMem_DebugFree(void *ctx, void *p);
28
29static void _PyObject_DebugDumpAddress(const void *p);
30static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
31
32static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
33
34#if defined(__has_feature)  /* Clang */
35#  if __has_feature(address_sanitizer) /* is ASAN enabled? */
36#    define _Py_NO_SANITIZE_ADDRESS \
37        __attribute__((no_sanitize("address")))
38#  endif
39#  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
40#    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
41#  endif
42#  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
43#    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
44#  endif
45#elif defined(__GNUC__)
46#  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
47#    define _Py_NO_SANITIZE_ADDRESS \
48        __attribute__((no_sanitize_address))
49#  endif
50   // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
51   // is provided only since GCC 7.
52#  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
53#    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
54#  endif
55#endif
56
57#ifndef _Py_NO_SANITIZE_ADDRESS
58#  define _Py_NO_SANITIZE_ADDRESS
59#endif
60#ifndef _Py_NO_SANITIZE_THREAD
61#  define _Py_NO_SANITIZE_THREAD
62#endif
63#ifndef _Py_NO_SANITIZE_MEMORY
64#  define _Py_NO_SANITIZE_MEMORY
65#endif
66
67#ifdef WITH_PYMALLOC
68
69#ifdef MS_WINDOWS
70#  include <windows.h>
71#elif defined(HAVE_MMAP)
72#  include <sys/mman.h>
73#  ifdef MAP_ANONYMOUS
74#    define ARENAS_USE_MMAP
75#  endif
76#endif
77
78/* Forward declaration */
79static void* _PyObject_Malloc(void *ctx, size_t size);
80static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
81static void _PyObject_Free(void *ctx, void *p);
82static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
83#endif
84
85
86/* bpo-35053: Declare tracemalloc configuration here rather than
87   Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
88   library, whereas _Py_NewReference() requires it. */
89struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
90
91
92static void *
93_PyMem_RawMalloc(void *ctx, size_t size)
94{
95    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
96       for malloc(0), which would be treated as an error. Some platforms would
97       return a pointer with no memory behind it, which would break pymalloc.
98       To solve these problems, allocate an extra byte. */
99    if (size == 0)
100        size = 1;
101    return malloc(size);
102}
103
104static void *
105_PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
106{
107    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
108       for calloc(0, 0), which would be treated as an error. Some platforms
109       would return a pointer with no memory behind it, which would break
110       pymalloc.  To solve these problems, allocate an extra byte. */
111    if (nelem == 0 || elsize == 0) {
112        nelem = 1;
113        elsize = 1;
114    }
115    return calloc(nelem, elsize);
116}
117
118static void *
119_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
120{
121    if (size == 0)
122        size = 1;
123    return realloc(ptr, size);
124}
125
126static void
127_PyMem_RawFree(void *ctx, void *ptr)
128{
129    free(ptr);
130}
131
132
133#ifdef MS_WINDOWS
134static void *
135_PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
136{
137    return VirtualAlloc(NULL, size,
138                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
139}
140
141static void
142_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
143{
144    VirtualFree(ptr, 0, MEM_RELEASE);
145}
146
147#elif defined(ARENAS_USE_MMAP)
148static void *
149_PyObject_ArenaMmap(void *ctx, size_t size)
150{
151    void *ptr;
152    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
153               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
154    if (ptr == MAP_FAILED)
155        return NULL;
156    assert(ptr != NULL);
157    return ptr;
158}
159
160static void
161_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
162{
163    munmap(ptr, size);
164}
165
166#else
167static void *
168_PyObject_ArenaMalloc(void *ctx, size_t size)
169{
170    return malloc(size);
171}
172
173static void
174_PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
175{
176    free(ptr);
177}
178#endif
179
180#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
181#ifdef WITH_PYMALLOC
182#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
183#endif
184
185#define PYRAW_ALLOC MALLOC_ALLOC
186#ifdef WITH_PYMALLOC
187#  define PYOBJ_ALLOC PYMALLOC_ALLOC
188#else
189#  define PYOBJ_ALLOC MALLOC_ALLOC
190#endif
191#define PYMEM_ALLOC PYOBJ_ALLOC
192
193typedef struct {
194    /* We tag each block with an API ID in order to tag API violations */
195    char api_id;
196    PyMemAllocatorEx alloc;
197} debug_alloc_api_t;
198static struct {
199    debug_alloc_api_t raw;
200    debug_alloc_api_t mem;
201    debug_alloc_api_t obj;
202} _PyMem_Debug = {
203    {'r', PYRAW_ALLOC},
204    {'m', PYMEM_ALLOC},
205    {'o', PYOBJ_ALLOC}
206    };
207
208#define PYDBGRAW_ALLOC \
209    {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
210#define PYDBGMEM_ALLOC \
211    {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212#define PYDBGOBJ_ALLOC \
213    {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
214
215#ifdef Py_DEBUG
216static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
217static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
218static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
219#else
220static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
221static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
222static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
223#endif
224
225
226static int
227pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
228                            PyMemAllocatorEx *old_alloc)
229{
230    if (old_alloc != NULL) {
231        PyMem_GetAllocator(domain, old_alloc);
232    }
233
234
235    PyMemAllocatorEx new_alloc;
236    switch(domain)
237    {
238    case PYMEM_DOMAIN_RAW:
239        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
240        break;
241    case PYMEM_DOMAIN_MEM:
242        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
243        break;
244    case PYMEM_DOMAIN_OBJ:
245        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
246        break;
247    default:
248        /* unknown domain */
249        return -1;
250    }
251    PyMem_SetAllocator(domain, &new_alloc);
252    if (debug) {
253        _PyMem_SetupDebugHooksDomain(domain);
254    }
255    return 0;
256}
257
258
259int
260_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
261                           PyMemAllocatorEx *old_alloc)
262{
263#ifdef Py_DEBUG
264    const int debug = 1;
265#else
266    const int debug = 0;
267#endif
268    return pymem_set_default_allocator(domain, debug, old_alloc);
269}
270
271
272int
273_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
274{
275    if (name == NULL || *name == '\0') {
276        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
277           nameions): use default memory allocators */
278        *allocator = PYMEM_ALLOCATOR_DEFAULT;
279    }
280    else if (strcmp(name, "default") == 0) {
281        *allocator = PYMEM_ALLOCATOR_DEFAULT;
282    }
283    else if (strcmp(name, "debug") == 0) {
284        *allocator = PYMEM_ALLOCATOR_DEBUG;
285    }
286#ifdef WITH_PYMALLOC
287    else if (strcmp(name, "pymalloc") == 0) {
288        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
289    }
290    else if (strcmp(name, "pymalloc_debug") == 0) {
291        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
292    }
293#endif
294    else if (strcmp(name, "malloc") == 0) {
295        *allocator = PYMEM_ALLOCATOR_MALLOC;
296    }
297    else if (strcmp(name, "malloc_debug") == 0) {
298        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
299    }
300    else {
301        /* unknown allocator */
302        return -1;
303    }
304    return 0;
305}
306
307
308int
309_PyMem_SetupAllocators(PyMemAllocatorName allocator)
310{
311    switch (allocator) {
312    case PYMEM_ALLOCATOR_NOT_SET:
313        /* do nothing */
314        break;
315
316    case PYMEM_ALLOCATOR_DEFAULT:
317        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
318        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
319        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
320        break;
321
322    case PYMEM_ALLOCATOR_DEBUG:
323        (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
324        (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
325        (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
326        break;
327
328#ifdef WITH_PYMALLOC
329    case PYMEM_ALLOCATOR_PYMALLOC:
330    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
331    {
332        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
333        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
334
335        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
336        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
337        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
338
339        if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
340            PyMem_SetupDebugHooks();
341        }
342        break;
343    }
344#endif
345
346    case PYMEM_ALLOCATOR_MALLOC:
347    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
348    {
349        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
350        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
351        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
352        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
353
354        if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
355            PyMem_SetupDebugHooks();
356        }
357        break;
358    }
359
360    default:
361        /* unknown allocator */
362        return -1;
363    }
364    return 0;
365}
366
367
368static int
369pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
370{
371    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
372}
373
374
375const char*
376_PyMem_GetCurrentAllocatorName(void)
377{
378    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
379#ifdef WITH_PYMALLOC
380    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
381#endif
382
383    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
384        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
385        pymemallocator_eq(&_PyObject, &malloc_alloc))
386    {
387        return "malloc";
388    }
389#ifdef WITH_PYMALLOC
390    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
391        pymemallocator_eq(&_PyMem, &pymalloc) &&
392        pymemallocator_eq(&_PyObject, &pymalloc))
393    {
394        return "pymalloc";
395    }
396#endif
397
398    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
399    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
400    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
401
402    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
403        pymemallocator_eq(&_PyMem, &dbg_mem) &&
404        pymemallocator_eq(&_PyObject, &dbg_obj))
405    {
406        /* Debug hooks installed */
407        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
408            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
409            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
410        {
411            return "malloc_debug";
412        }
413#ifdef WITH_PYMALLOC
414        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
415            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
416            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
417        {
418            return "pymalloc_debug";
419        }
420#endif
421    }
422    return NULL;
423}
424
425
426#undef MALLOC_ALLOC
427#undef PYMALLOC_ALLOC
428#undef PYRAW_ALLOC
429#undef PYMEM_ALLOC
430#undef PYOBJ_ALLOC
431#undef PYDBGRAW_ALLOC
432#undef PYDBGMEM_ALLOC
433#undef PYDBGOBJ_ALLOC
434
435
436static PyObjectArenaAllocator _PyObject_Arena = {NULL,
437#ifdef MS_WINDOWS
438    _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
439#elif defined(ARENAS_USE_MMAP)
440    _PyObject_ArenaMmap, _PyObject_ArenaMunmap
441#else
442    _PyObject_ArenaMalloc, _PyObject_ArenaFree
443#endif
444    };
445
446#ifdef WITH_PYMALLOC
447static int
448_PyMem_DebugEnabled(void)
449{
450    return (_PyObject.malloc == _PyMem_DebugMalloc);
451}
452
453static int
454_PyMem_PymallocEnabled(void)
455{
456    if (_PyMem_DebugEnabled()) {
457        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
458    }
459    else {
460        return (_PyObject.malloc == _PyObject_Malloc);
461    }
462}
463#endif
464
465
466static void
467_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
468{
469    PyMemAllocatorEx alloc;
470
471    if (domain == PYMEM_DOMAIN_RAW) {
472        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
473            return;
474        }
475
476        PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
477        alloc.ctx = &_PyMem_Debug.raw;
478        alloc.malloc = _PyMem_DebugRawMalloc;
479        alloc.calloc = _PyMem_DebugRawCalloc;
480        alloc.realloc = _PyMem_DebugRawRealloc;
481        alloc.free = _PyMem_DebugRawFree;
482        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
483    }
484    else if (domain == PYMEM_DOMAIN_MEM) {
485        if (_PyMem.malloc == _PyMem_DebugMalloc) {
486            return;
487        }
488
489        PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
490        alloc.ctx = &_PyMem_Debug.mem;
491        alloc.malloc = _PyMem_DebugMalloc;
492        alloc.calloc = _PyMem_DebugCalloc;
493        alloc.realloc = _PyMem_DebugRealloc;
494        alloc.free = _PyMem_DebugFree;
495        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
496    }
497    else if (domain == PYMEM_DOMAIN_OBJ)  {
498        if (_PyObject.malloc == _PyMem_DebugMalloc) {
499            return;
500        }
501
502        PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
503        alloc.ctx = &_PyMem_Debug.obj;
504        alloc.malloc = _PyMem_DebugMalloc;
505        alloc.calloc = _PyMem_DebugCalloc;
506        alloc.realloc = _PyMem_DebugRealloc;
507        alloc.free = _PyMem_DebugFree;
508        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
509    }
510}
511
512
513void
514PyMem_SetupDebugHooks(void)
515{
516    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
517    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
518    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
519}
520
521void
522PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
523{
524    switch(domain)
525    {
526    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
527    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
528    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
529    default:
530        /* unknown domain: set all attributes to NULL */
531        allocator->ctx = NULL;
532        allocator->malloc = NULL;
533        allocator->calloc = NULL;
534        allocator->realloc = NULL;
535        allocator->free = NULL;
536    }
537}
538
539void
540PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
541{
542    switch(domain)
543    {
544    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
545    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
546    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
547    /* ignore unknown domain */
548    }
549}
550
551void
552PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
553{
554    *allocator = _PyObject_Arena;
555}
556
557void *
558_PyObject_VirtualAlloc(size_t size)
559{
560    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
561}
562
563void
564_PyObject_VirtualFree(void *obj, size_t size)
565{
566    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
567}
568
569void
570PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
571{
572    _PyObject_Arena = *allocator;
573}
574
575void *
576PyMem_RawMalloc(size_t size)
577{
578    /*
579     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
580     * Most python internals blindly use a signed Py_ssize_t to track
581     * things without checking for overflows or negatives.
582     * As size_t is unsigned, checking for size < 0 is not required.
583     */
584    if (size > (size_t)PY_SSIZE_T_MAX)
585        return NULL;
586    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
587}
588
589void *
590PyMem_RawCalloc(size_t nelem, size_t elsize)
591{
592    /* see PyMem_RawMalloc() */
593    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
594        return NULL;
595    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
596}
597
598void*
599PyMem_RawRealloc(void *ptr, size_t new_size)
600{
601    /* see PyMem_RawMalloc() */
602    if (new_size > (size_t)PY_SSIZE_T_MAX)
603        return NULL;
604    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
605}
606
607void PyMem_RawFree(void *ptr)
608{
609    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
610}
611
612
613void *
614PyMem_Malloc(size_t size)
615{
616    /* see PyMem_RawMalloc() */
617    if (size > (size_t)PY_SSIZE_T_MAX)
618        return NULL;
619    OBJECT_STAT_INC_COND(allocations512, size < 512);
620    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
621    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
622    OBJECT_STAT_INC(allocations);
623    return _PyMem.malloc(_PyMem.ctx, size);
624}
625
626void *
627PyMem_Calloc(size_t nelem, size_t elsize)
628{
629    /* see PyMem_RawMalloc() */
630    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
631        return NULL;
632    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
633    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
634    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
635    OBJECT_STAT_INC(allocations);
636    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
637}
638
639void *
640PyMem_Realloc(void *ptr, size_t new_size)
641{
642    /* see PyMem_RawMalloc() */
643    if (new_size > (size_t)PY_SSIZE_T_MAX)
644        return NULL;
645    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
646}
647
648void
649PyMem_Free(void *ptr)
650{
651    OBJECT_STAT_INC(frees);
652    _PyMem.free(_PyMem.ctx, ptr);
653}
654
655
656wchar_t*
657_PyMem_RawWcsdup(const wchar_t *str)
658{
659    assert(str != NULL);
660
661    size_t len = wcslen(str);
662    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
663        return NULL;
664    }
665
666    size_t size = (len + 1) * sizeof(wchar_t);
667    wchar_t *str2 = PyMem_RawMalloc(size);
668    if (str2 == NULL) {
669        return NULL;
670    }
671
672    memcpy(str2, str, size);
673    return str2;
674}
675
676char *
677_PyMem_RawStrdup(const char *str)
678{
679    assert(str != NULL);
680    size_t size = strlen(str) + 1;
681    char *copy = PyMem_RawMalloc(size);
682    if (copy == NULL) {
683        return NULL;
684    }
685    memcpy(copy, str, size);
686    return copy;
687}
688
689char *
690_PyMem_Strdup(const char *str)
691{
692    assert(str != NULL);
693    size_t size = strlen(str) + 1;
694    char *copy = PyMem_Malloc(size);
695    if (copy == NULL) {
696        return NULL;
697    }
698    memcpy(copy, str, size);
699    return copy;
700}
701
702void *
703PyObject_Malloc(size_t size)
704{
705    /* see PyMem_RawMalloc() */
706    if (size > (size_t)PY_SSIZE_T_MAX)
707        return NULL;
708    OBJECT_STAT_INC_COND(allocations512, size < 512);
709    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
710    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
711    OBJECT_STAT_INC(allocations);
712    return _PyObject.malloc(_PyObject.ctx, size);
713}
714
715void *
716PyObject_Calloc(size_t nelem, size_t elsize)
717{
718    /* see PyMem_RawMalloc() */
719    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
720        return NULL;
721    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
722    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
723    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
724    OBJECT_STAT_INC(allocations);
725    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
726}
727
728void *
729PyObject_Realloc(void *ptr, size_t new_size)
730{
731    /* see PyMem_RawMalloc() */
732    if (new_size > (size_t)PY_SSIZE_T_MAX)
733        return NULL;
734    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
735}
736
737void
738PyObject_Free(void *ptr)
739{
740    OBJECT_STAT_INC(frees);
741    _PyObject.free(_PyObject.ctx, ptr);
742}
743
744
745/* If we're using GCC, use __builtin_expect() to reduce overhead of
746   the valgrind checks */
747#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
748#  define UNLIKELY(value) __builtin_expect((value), 0)
749#  define LIKELY(value) __builtin_expect((value), 1)
750#else
751#  define UNLIKELY(value) (value)
752#  define LIKELY(value) (value)
753#endif
754
755#ifdef WITH_PYMALLOC
756
757#ifdef WITH_VALGRIND
758#include <valgrind/valgrind.h>
759
760/* -1 indicates that we haven't checked that we're running on valgrind yet. */
761static int running_on_valgrind = -1;
762#endif
763
764
765/* An object allocator for Python.
766
767   Here is an introduction to the layers of the Python memory architecture,
768   showing where the object allocator is actually used (layer +2), It is
769   called for every object allocation and deallocation (PyObject_New/Del),
770   unless the object-specific allocators implement a proprietary allocation
771   scheme (ex.: ints use a simple free list). This is also the place where
772   the cyclic garbage collector operates selectively on container objects.
773
774
775    Object-specific allocators
776    _____   ______   ______       ________
777   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
778+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
779    _______________________________       |                           |
780   [   Python's object allocator   ]      |                           |
781+2 | ####### Object memory ####### | <------ Internal buffers ------> |
782    ______________________________________________________________    |
783   [          Python's raw memory allocator (PyMem_ API)          ]   |
784+1 | <----- Python memory (under PyMem manager's control) ------> |   |
785    __________________________________________________________________
786   [    Underlying general-purpose allocator (ex: C library malloc)   ]
787 0 | <------ Virtual memory allocated for the python process -------> |
788
789   =========================================================================
790    _______________________________________________________________________
791   [                OS-specific Virtual Memory Manager (VMM)               ]
792-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
793    __________________________________   __________________________________
794   [                                  ] [                                  ]
795-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
796
797*/
798/*==========================================================================*/
799
800/* A fast, special-purpose memory allocator for small blocks, to be used
801   on top of a general-purpose malloc -- heavily based on previous art. */
802
803/* Vladimir Marangozov -- August 2000 */
804
805/*
806 * "Memory management is where the rubber meets the road -- if we do the wrong
807 * thing at any level, the results will not be good. And if we don't make the
808 * levels work well together, we are in serious trouble." (1)
809 *
810 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
811 *    "Dynamic Storage Allocation: A Survey and Critical Review",
812 *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
813 */
814
815/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
816
817/*==========================================================================*/
818
819/*
820 * Allocation strategy abstract:
821 *
822 * For small requests, the allocator sub-allocates <Big> blocks of memory.
823 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
824 * system's allocator.
825 *
826 * Small requests are grouped in size classes spaced 8 bytes apart, due
827 * to the required valid alignment of the returned address. Requests of
828 * a particular size are serviced from memory pools of 4K (one VMM page).
829 * Pools are fragmented on demand and contain free lists of blocks of one
830 * particular size class. In other words, there is a fixed-size allocator
831 * for each size class. Free pools are shared by the different allocators
832 * thus minimizing the space reserved for a particular size class.
833 *
834 * This allocation strategy is a variant of what is known as "simple
835 * segregated storage based on array of free lists". The main drawback of
836 * simple segregated storage is that we might end up with lot of reserved
837 * memory for the different free lists, which degenerate in time. To avoid
838 * this, we partition each free list in pools and we share dynamically the
839 * reserved space between all free lists. This technique is quite efficient
840 * for memory intensive programs which allocate mainly small-sized blocks.
841 *
842 * For small requests we have the following table:
843 *
844 * Request in bytes     Size of allocated block      Size class idx
845 * ----------------------------------------------------------------
846 *        1-8                     8                       0
847 *        9-16                   16                       1
848 *       17-24                   24                       2
849 *       25-32                   32                       3
850 *       33-40                   40                       4
851 *       41-48                   48                       5
852 *       49-56                   56                       6
853 *       57-64                   64                       7
854 *       65-72                   72                       8
855 *        ...                   ...                     ...
856 *      497-504                 504                      62
857 *      505-512                 512                      63
858 *
859 *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
860 *      allocator.
861 */
862
863/*==========================================================================*/
864
865/*
866 * -- Main tunable settings section --
867 */
868
869/*
870 * Alignment of addresses returned to the user. 8-bytes alignment works
871 * on most current architectures (with 32-bit or 64-bit address buses).
872 * The alignment value is also used for grouping small requests in size
873 * classes spaced ALIGNMENT bytes apart.
874 *
875 * You shouldn't change this unless you know what you are doing.
876 */
877
878#if SIZEOF_VOID_P > 4
879#define ALIGNMENT              16               /* must be 2^N */
880#define ALIGNMENT_SHIFT         4
881#else
882#define ALIGNMENT               8               /* must be 2^N */
883#define ALIGNMENT_SHIFT         3
884#endif
885
886/* Return the number of bytes in size class I, as a uint. */
887#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
888
889/*
890 * Max size threshold below which malloc requests are considered to be
891 * small enough in order to use preallocated memory pools. You can tune
892 * this value according to your application behaviour and memory needs.
893 *
894 * Note: a size threshold of 512 guarantees that newly created dictionaries
895 * will be allocated from preallocated memory pools on 64-bit.
896 *
897 * The following invariants must hold:
898 *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
899 *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
900 *
901 * Although not required, for better performance and space efficiency,
902 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
903 */
904#define SMALL_REQUEST_THRESHOLD 512
905#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
906
907/*
908 * The system's VMM page size can be obtained on most unices with a
909 * getpagesize() call or deduced from various header files. To make
910 * things simpler, we assume that it is 4K, which is OK for most systems.
911 * It is probably better if this is the native page size, but it doesn't
912 * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
913 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
914 * violation fault.  4K is apparently OK for all the platforms that python
915 * currently targets.
916 */
917#define SYSTEM_PAGE_SIZE        (4 * 1024)
918
919/*
920 * Maximum amount of memory managed by the allocator for small requests.
921 */
922#ifdef WITH_MEMORY_LIMITS
923#ifndef SMALL_MEMORY_LIMIT
924#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
925#endif
926#endif
927
928#if !defined(WITH_PYMALLOC_RADIX_TREE)
929/* Use radix-tree to track arena memory regions, for address_in_range().
930 * Enable by default since it allows larger pool sizes.  Can be disabled
931 * using -DWITH_PYMALLOC_RADIX_TREE=0 */
932#define WITH_PYMALLOC_RADIX_TREE 1
933#endif
934
935#if SIZEOF_VOID_P > 4
936/* on 64-bit platforms use larger pools and arenas if we can */
937#define USE_LARGE_ARENAS
938#if WITH_PYMALLOC_RADIX_TREE
939/* large pools only supported if radix-tree is enabled */
940#define USE_LARGE_POOLS
941#endif
942#endif
943
944/*
945 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
946 * on a page boundary. This is a reserved virtual address space for the
947 * current process (obtained through a malloc()/mmap() call). In no way this
948 * means that the memory arenas will be used entirely. A malloc(<Big>) is
949 * usually an address range reservation for <Big> bytes, unless all pages within
950 * this space are referenced subsequently. So malloc'ing big blocks and not
951 * using them does not mean "wasting memory". It's an addressable range
952 * wastage...
953 *
954 * Arenas are allocated with mmap() on systems supporting anonymous memory
955 * mappings to reduce heap fragmentation.
956 */
957#ifdef USE_LARGE_ARENAS
958#define ARENA_BITS              20                    /* 1 MiB */
959#else
960#define ARENA_BITS              18                    /* 256 KiB */
961#endif
962#define ARENA_SIZE              (1 << ARENA_BITS)
963#define ARENA_SIZE_MASK         (ARENA_SIZE - 1)
964
965#ifdef WITH_MEMORY_LIMITS
966#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
967#endif
968
969/*
970 * Size of the pools used for small blocks.  Must be a power of 2.
971 */
972#ifdef USE_LARGE_POOLS
973#define POOL_BITS               14                  /* 16 KiB */
974#else
975#define POOL_BITS               12                  /* 4 KiB */
976#endif
977#define POOL_SIZE               (1 << POOL_BITS)
978#define POOL_SIZE_MASK          (POOL_SIZE - 1)
979
980#if !WITH_PYMALLOC_RADIX_TREE
981#if POOL_SIZE != SYSTEM_PAGE_SIZE
982#   error "pool size must be equal to system page size"
983#endif
984#endif
985
986#define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
987#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
988#   error "arena size not an exact multiple of pool size"
989#endif
990
991/*
992 * -- End of tunable settings section --
993 */
994
995/*==========================================================================*/
996
997/* When you say memory, my mind reasons in terms of (pointers to) blocks */
998typedef uint8_t block;
999
1000/* Pool for small blocks. */
1001struct pool_header {
1002    union { block *_padding;
1003            uint count; } ref;          /* number of allocated blocks    */
1004    block *freeblock;                   /* pool's free list head         */
1005    struct pool_header *nextpool;       /* next pool of this size class  */
1006    struct pool_header *prevpool;       /* previous pool       ""        */
1007    uint arenaindex;                    /* index into arenas of base adr */
1008    uint szidx;                         /* block size class index        */
1009    uint nextoffset;                    /* bytes to virgin block         */
1010    uint maxnextoffset;                 /* largest valid nextoffset      */
1011};
1012
1013typedef struct pool_header *poolp;
1014
1015/* Record keeping for arenas. */
1016struct arena_object {
1017    /* The address of the arena, as returned by malloc.  Note that 0
1018     * will never be returned by a successful malloc, and is used
1019     * here to mark an arena_object that doesn't correspond to an
1020     * allocated arena.
1021     */
1022    uintptr_t address;
1023
1024    /* Pool-aligned pointer to the next pool to be carved off. */
1025    block* pool_address;
1026
1027    /* The number of available pools in the arena:  free pools + never-
1028     * allocated pools.
1029     */
1030    uint nfreepools;
1031
1032    /* The total number of pools in the arena, whether or not available. */
1033    uint ntotalpools;
1034
1035    /* Singly-linked list of available pools. */
1036    struct pool_header* freepools;
1037
1038    /* Whenever this arena_object is not associated with an allocated
1039     * arena, the nextarena member is used to link all unassociated
1040     * arena_objects in the singly-linked `unused_arena_objects` list.
1041     * The prevarena member is unused in this case.
1042     *
1043     * When this arena_object is associated with an allocated arena
1044     * with at least one available pool, both members are used in the
1045     * doubly-linked `usable_arenas` list, which is maintained in
1046     * increasing order of `nfreepools` values.
1047     *
1048     * Else this arena_object is associated with an allocated arena
1049     * all of whose pools are in use.  `nextarena` and `prevarena`
1050     * are both meaningless in this case.
1051     */
1052    struct arena_object* nextarena;
1053    struct arena_object* prevarena;
1054};
1055
1056#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1057
1058#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
1059
1060/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1061#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1062
1063/* Return total number of blocks in pool of size index I, as a uint. */
1064#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1065
1066/*==========================================================================*/
1067
1068/*
1069 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1070
1071This is involved.  For an index i, usedpools[i+i] is the header for a list of
1072all partially used pools holding small blocks with "size class idx" i. So
1073usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
107416, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1075
1076Pools are carved off an arena's highwater mark (an arena_object's pool_address
1077member) as needed.  Once carved off, a pool is in one of three states forever
1078after:
1079
1080used == partially used, neither empty nor full
1081    At least one block in the pool is currently allocated, and at least one
1082    block in the pool is not currently allocated (note this implies a pool
1083    has room for at least two blocks).
1084    This is a pool's initial state, as a pool is created only when malloc
1085    needs space.
1086    The pool holds blocks of a fixed size, and is in the circular list headed
1087    at usedpools[i] (see above).  It's linked to the other used pools of the
1088    same size class via the pool_header's nextpool and prevpool members.
1089    If all but one block is currently allocated, a malloc can cause a
1090    transition to the full state.  If all but one block is not currently
1091    allocated, a free can cause a transition to the empty state.
1092
1093full == all the pool's blocks are currently allocated
1094    On transition to full, a pool is unlinked from its usedpools[] list.
1095    It's not linked to from anything then anymore, and its nextpool and
1096    prevpool members are meaningless until it transitions back to used.
1097    A free of a block in a full pool puts the pool back in the used state.
1098    Then it's linked in at the front of the appropriate usedpools[] list, so
1099    that the next allocation for its size class will reuse the freed block.
1100
1101empty == all the pool's blocks are currently available for allocation
1102    On transition to empty, a pool is unlinked from its usedpools[] list,
1103    and linked to the front of its arena_object's singly-linked freepools list,
1104    via its nextpool member.  The prevpool member has no meaning in this case.
1105    Empty pools have no inherent size class:  the next time a malloc finds
1106    an empty list in usedpools[], it takes the first pool off of freepools.
1107    If the size class needed happens to be the same as the size class the pool
1108    last had, some pool initialization can be skipped.
1109
1110
1111Block Management
1112
1113Blocks within pools are again carved out as needed.  pool->freeblock points to
1114the start of a singly-linked list of free blocks within the pool.  When a
1115block is freed, it's inserted at the front of its pool's freeblock list.  Note
1116that the available blocks in a pool are *not* linked all together when a pool
1117is initialized.  Instead only "the first two" (lowest addresses) blocks are
1118set up, returning the first such block, and setting pool->freeblock to a
1119one-block list holding the second such block.  This is consistent with that
1120pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1121of memory until it's actually needed.
1122
1123So long as a pool is in the used state, we're certain there *is* a block
1124available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1125points to the end of the free list before we've carved the entire pool into
1126blocks, that means we simply haven't yet gotten to one of the higher-address
1127blocks.  The offset from the pool_header to the start of "the next" virgin
1128block is stored in the pool_header nextoffset member, and the largest value
1129of nextoffset that makes sense is stored in the maxnextoffset member when a
1130pool is initialized.  All the blocks in a pool have been passed out at least
1131once when and only when nextoffset > maxnextoffset.
1132
1133
1134Major obscurity:  While the usedpools vector is declared to have poolp
1135entries, it doesn't really.  It really contains two pointers per (conceptual)
1136poolp entry, the nextpool and prevpool members of a pool_header.  The
1137excruciating initialization code below fools C so that
1138
1139    usedpool[i+i]
1140
1141"acts like" a genuine poolp, but only so long as you only reference its
1142nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1143compensating for that a pool_header's nextpool and prevpool members
1144immediately follow a pool_header's first two members:
1145
1146    union { block *_padding;
1147            uint count; } ref;
1148    block *freeblock;
1149
1150each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1151contains is a fudged-up pointer p such that *if* C believes it's a poolp
1152pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1153circular list is empty).
1154
1155It's unclear why the usedpools setup is so convoluted.  It could be to
1156minimize the amount of cache required to hold this heavily-referenced table
1157(which only *needs* the two interpool pointer members of a pool_header). OTOH,
1158referencing code has to remember to "double the index" and doing so isn't
1159free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1160on that C doesn't insert any padding anywhere in a pool_header at or before
1161the prevpool member.
1162**************************************************************************** */
1163
1164#define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1165#define PT(x)   PTA(x), PTA(x)
1166
1167static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1168    PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1169#if NB_SMALL_SIZE_CLASSES > 8
1170    , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1171#if NB_SMALL_SIZE_CLASSES > 16
1172    , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1173#if NB_SMALL_SIZE_CLASSES > 24
1174    , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1175#if NB_SMALL_SIZE_CLASSES > 32
1176    , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1177#if NB_SMALL_SIZE_CLASSES > 40
1178    , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1179#if NB_SMALL_SIZE_CLASSES > 48
1180    , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1181#if NB_SMALL_SIZE_CLASSES > 56
1182    , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1183#if NB_SMALL_SIZE_CLASSES > 64
1184#error "NB_SMALL_SIZE_CLASSES should be less than 64"
1185#endif /* NB_SMALL_SIZE_CLASSES > 64 */
1186#endif /* NB_SMALL_SIZE_CLASSES > 56 */
1187#endif /* NB_SMALL_SIZE_CLASSES > 48 */
1188#endif /* NB_SMALL_SIZE_CLASSES > 40 */
1189#endif /* NB_SMALL_SIZE_CLASSES > 32 */
1190#endif /* NB_SMALL_SIZE_CLASSES > 24 */
1191#endif /* NB_SMALL_SIZE_CLASSES > 16 */
1192#endif /* NB_SMALL_SIZE_CLASSES >  8 */
1193};
1194
1195/*==========================================================================
1196Arena management.
1197
1198`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1199which may not be currently used (== they're arena_objects that aren't
1200currently associated with an allocated arena).  Note that arenas proper are
1201separately malloc'ed.
1202
1203Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1204we do try to free() arenas, and use some mild heuristic strategies to increase
1205the likelihood that arenas eventually can be freed.
1206
1207unused_arena_objects
1208
1209    This is a singly-linked list of the arena_objects that are currently not
1210    being used (no arena is associated with them).  Objects are taken off the
1211    head of the list in new_arena(), and are pushed on the head of the list in
1212    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1213    is on this list if and only if its .address member is 0.
1214
1215usable_arenas
1216
1217    This is a doubly-linked list of the arena_objects associated with arenas
1218    that have pools available.  These pools are either waiting to be reused,
1219    or have not been used before.  The list is sorted to have the most-
1220    allocated arenas first (ascending order based on the nfreepools member).
1221    This means that the next allocation will come from a heavily used arena,
1222    which gives the nearly empty arenas a chance to be returned to the system.
1223    In my unscientific tests this dramatically improved the number of arenas
1224    that could be freed.
1225
1226Note that an arena_object associated with an arena all of whose pools are
1227currently in use isn't on either list.
1228
1229Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1230used to be done by one-at-a-time linear search when an arena's number of
1231free pools changed.  That could, overall, consume time quadratic in the
1232number of arenas.  That didn't really matter when there were only a few
1233hundred arenas (typical!), but could be a timing disaster when there were
1234hundreds of thousands.  See bpo-37029.
1235
1236Now we have a vector of "search fingers" to eliminate the need to search:
1237nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1238with nfp free pools.  This is NULL if and only if there is no arena with
1239nfp free pools in usable_arenas.
1240*/
1241
1242/* Array of objects used to track chunks of memory (arenas). */
1243static struct arena_object* arenas = NULL;
1244/* Number of slots currently allocated in the `arenas` vector. */
1245static uint maxarenas = 0;
1246
1247/* The head of the singly-linked, NULL-terminated list of available
1248 * arena_objects.
1249 */
1250static struct arena_object* unused_arena_objects = NULL;
1251
1252/* The head of the doubly-linked, NULL-terminated at each end, list of
1253 * arena_objects associated with arenas that have pools available.
1254 */
1255static struct arena_object* usable_arenas = NULL;
1256
1257/* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1258static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1259
1260/* How many arena_objects do we initially allocate?
1261 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1262 * `arenas` vector.
1263 */
1264#define INITIAL_ARENA_OBJECTS 16
1265
1266/* Number of arenas allocated that haven't been free()'d. */
1267static size_t narenas_currently_allocated = 0;
1268
1269/* Total number of times malloc() called to allocate an arena. */
1270static size_t ntimes_arena_allocated = 0;
1271/* High water mark (max value ever seen) for narenas_currently_allocated. */
1272static size_t narenas_highwater = 0;
1273
1274static Py_ssize_t raw_allocated_blocks;
1275
1276Py_ssize_t
1277_Py_GetAllocatedBlocks(void)
1278{
1279    Py_ssize_t n = raw_allocated_blocks;
1280    /* add up allocated blocks for used pools */
1281    for (uint i = 0; i < maxarenas; ++i) {
1282        /* Skip arenas which are not allocated. */
1283        if (arenas[i].address == 0) {
1284            continue;
1285        }
1286
1287        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1288
1289        /* visit every pool in the arena */
1290        assert(base <= (uintptr_t) arenas[i].pool_address);
1291        for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1292            poolp p = (poolp)base;
1293            n += p->ref.count;
1294        }
1295    }
1296    return n;
1297}
1298
1299#if WITH_PYMALLOC_RADIX_TREE
1300/*==========================================================================*/
1301/* radix tree for tracking arena usage.  If enabled, used to implement
1302   address_in_range().
1303
1304   memory address bit allocation for keys
1305
1306   64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
1307     15 -> MAP_TOP_BITS
1308     15 -> MAP_MID_BITS
1309     14 -> MAP_BOT_BITS
1310     20 -> ideal aligned arena
1311   ----
1312     64
1313
1314   64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
1315     16 -> IGNORE_BITS
1316     10 -> MAP_TOP_BITS
1317     10 -> MAP_MID_BITS
1318      8 -> MAP_BOT_BITS
1319     20 -> ideal aligned arena
1320   ----
1321     64
1322
1323   32-bit pointers and 2^18 arena size:
1324     14 -> MAP_BOT_BITS
1325     18 -> ideal aligned arena
1326   ----
1327     32
1328
1329*/
1330
1331#if SIZEOF_VOID_P == 8
1332
1333/* number of bits in a pointer */
1334#define POINTER_BITS 64
1335
1336/* High bits of memory addresses that will be ignored when indexing into the
1337 * radix tree.  Setting this to zero is the safe default.  For most 64-bit
1338 * machines, setting this to 16 would be safe.  The kernel would not give
1339 * user-space virtual memory addresses that have significant information in
1340 * those high bits.  The main advantage to setting IGNORE_BITS > 0 is that less
1341 * virtual memory will be used for the top and middle radix tree arrays.  Those
1342 * arrays are allocated in the BSS segment and so will typically consume real
1343 * memory only if actually accessed.
1344 */
1345#define IGNORE_BITS 0
1346
1347/* use the top and mid layers of the radix tree */
1348#define USE_INTERIOR_NODES
1349
1350#elif SIZEOF_VOID_P == 4
1351
1352#define POINTER_BITS 32
1353#define IGNORE_BITS 0
1354
1355#else
1356
1357 /* Currently this code works for 64-bit or 32-bit pointers only.  */
1358#error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1359
1360#endif /* SIZEOF_VOID_P */
1361
1362/* arena_coverage_t members require this to be true  */
1363#if ARENA_BITS >= 32
1364#   error "arena size must be < 2^32"
1365#endif
1366
1367/* the lower bits of the address that are not ignored */
1368#define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS)
1369
1370#ifdef USE_INTERIOR_NODES
1371/* number of bits used for MAP_TOP and MAP_MID nodes */
1372#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1373#else
1374#define INTERIOR_BITS 0
1375#endif
1376
1377#define MAP_TOP_BITS INTERIOR_BITS
1378#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1379#define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1380
1381#define MAP_MID_BITS INTERIOR_BITS
1382#define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1383#define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1384
1385#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1386#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1387#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1388
1389#define MAP_BOT_SHIFT ARENA_BITS
1390#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1391#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1392
1393#define AS_UINT(p) ((uintptr_t)(p))
1394#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1395#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1396#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1397
1398#if IGNORE_BITS > 0
1399/* Return the ignored part of the pointer address.  Those bits should be same
1400 * for all valid pointers if IGNORE_BITS is set correctly.
1401 */
1402#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1403#else
1404#define HIGH_BITS(p) 0
1405#endif
1406
1407
1408/* This is the leaf of the radix tree.  See arena_map_mark_used() for the
1409 * meaning of these members. */
1410typedef struct {
1411    int32_t tail_hi;
1412    int32_t tail_lo;
1413} arena_coverage_t;
1414
1415typedef struct arena_map_bot {
1416    /* The members tail_hi and tail_lo are accessed together.  So, it
1417     * better to have them as an array of structs, rather than two
1418     * arrays.
1419     */
1420    arena_coverage_t arenas[MAP_BOT_LENGTH];
1421} arena_map_bot_t;
1422
1423#ifdef USE_INTERIOR_NODES
1424typedef struct arena_map_mid {
1425    struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1426} arena_map_mid_t;
1427
1428typedef struct arena_map_top {
1429    struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1430} arena_map_top_t;
1431#endif
1432
1433/* The root of radix tree.  Note that by initializing like this, the memory
1434 * should be in the BSS.  The OS will only memory map pages as the MAP_MID
1435 * nodes get used (OS pages are demand loaded as needed).
1436 */
1437#ifdef USE_INTERIOR_NODES
1438static arena_map_top_t arena_map_root;
1439/* accounting for number of used interior nodes */
1440static int arena_map_mid_count;
1441static int arena_map_bot_count;
1442#else
1443static arena_map_bot_t arena_map_root;
1444#endif
1445
1446/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1447 * it cannot be created */
1448static inline Py_ALWAYS_INLINE arena_map_bot_t *
1449arena_map_get(block *p, int create)
1450{
1451#ifdef USE_INTERIOR_NODES
1452    /* sanity check that IGNORE_BITS is correct */
1453    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1454    int i1 = MAP_TOP_INDEX(p);
1455    if (arena_map_root.ptrs[i1] == NULL) {
1456        if (!create) {
1457            return NULL;
1458        }
1459        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1460        if (n == NULL) {
1461            return NULL;
1462        }
1463        arena_map_root.ptrs[i1] = n;
1464        arena_map_mid_count++;
1465    }
1466    int i2 = MAP_MID_INDEX(p);
1467    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1468        if (!create) {
1469            return NULL;
1470        }
1471        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1472        if (n == NULL) {
1473            return NULL;
1474        }
1475        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1476        arena_map_bot_count++;
1477    }
1478    return arena_map_root.ptrs[i1]->ptrs[i2];
1479#else
1480    return &arena_map_root;
1481#endif
1482}
1483
1484
1485/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1486 * away 24 bits of the address.  That reduces the space requirement of
1487 * the tree compared to similar radix tree page-map schemes.  In
1488 * exchange for slashing the space requirement, it needs more
1489 * computation to check an address.
1490 *
1491 * Tracking coverage is done by "ideal" arena address.  It is easier to
1492 * explain in decimal so let's say that the arena size is 100 bytes.
1493 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1494 * pointer address is inside an actual arena, we have to check two ideal
1495 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1496 * 300.  In the rare case that an arena is aligned in the ideal way
1497 * (e.g. base address of arena is 200) then we only have to check one
1498 * ideal address.
1499 *
1500 * The tree nodes for 200 and 300 both store the address of arena.
1501 * There are two cases: the arena starts at a lower ideal arena and
1502 * extends to this one, or the arena starts in this arena and extends to
1503 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1504 * these two cases.
1505 */
1506
1507
1508/* mark or unmark addresses covered by arena */
1509static int
1510arena_map_mark_used(uintptr_t arena_base, int is_used)
1511{
1512    /* sanity check that IGNORE_BITS is correct */
1513    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1514    arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1515    if (n_hi == NULL) {
1516        assert(is_used); /* otherwise node should already exist */
1517        return 0; /* failed to allocate space for node */
1518    }
1519    int i3 = MAP_BOT_INDEX((block *)arena_base);
1520    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1521    if (tail == 0) {
1522        /* is ideal arena address */
1523        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1524    }
1525    else {
1526        /* arena_base address is not ideal (aligned to arena size) and
1527         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1528         * for the next arena.  Note that it might be in different MAP_TOP
1529         * and MAP_MID nodes as well so we need to call arena_map_get()
1530         * again (do the full tree traversal).
1531         */
1532        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1533        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1534        /* If arena_base is a legit arena address, so is arena_base_next - 1
1535         * (last address in arena).  If arena_base_next overflows then it
1536         * must overflow to 0.  However, that would mean arena_base was
1537         * "ideal" and we should not be in this case. */
1538        assert(arena_base < arena_base_next);
1539        arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1540        if (n_lo == NULL) {
1541            assert(is_used); /* otherwise should already exist */
1542            n_hi->arenas[i3].tail_hi = 0;
1543            return 0; /* failed to allocate space for node */
1544        }
1545        int i3_next = MAP_BOT_INDEX(arena_base_next);
1546        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1547    }
1548    return 1;
1549}
1550
1551/* Return true if 'p' is a pointer inside an obmalloc arena.
1552 * _PyObject_Free() calls this so it needs to be very fast. */
1553static int
1554arena_map_is_used(block *p)
1555{
1556    arena_map_bot_t *n = arena_map_get(p, 0);
1557    if (n == NULL) {
1558        return 0;
1559    }
1560    int i3 = MAP_BOT_INDEX(p);
1561    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1562    int32_t hi = n->arenas[i3].tail_hi;
1563    int32_t lo = n->arenas[i3].tail_lo;
1564    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1565    return (tail < lo) || (tail >= hi && hi != 0);
1566}
1567
1568/* end of radix tree logic */
1569/*==========================================================================*/
1570#endif /* WITH_PYMALLOC_RADIX_TREE */
1571
1572
1573/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1574 * allocate a new arena, and return the address of an arena_object
1575 * describing the new arena.  It's expected that the caller will set
1576 * `usable_arenas` to the return value.
1577 */
1578static struct arena_object*
1579new_arena(void)
1580{
1581    struct arena_object* arenaobj;
1582    uint excess;        /* number of bytes above pool alignment */
1583    void *address;
1584    static int debug_stats = -1;
1585
1586    if (debug_stats == -1) {
1587        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1588        debug_stats = (opt != NULL && *opt != '\0');
1589    }
1590    if (debug_stats) {
1591        _PyObject_DebugMallocStats(stderr);
1592    }
1593
1594    if (unused_arena_objects == NULL) {
1595        uint i;
1596        uint numarenas;
1597        size_t nbytes;
1598
1599        /* Double the number of arena objects on each allocation.
1600         * Note that it's possible for `numarenas` to overflow.
1601         */
1602        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1603        if (numarenas <= maxarenas)
1604            return NULL;                /* overflow */
1605#if SIZEOF_SIZE_T <= SIZEOF_INT
1606        if (numarenas > SIZE_MAX / sizeof(*arenas))
1607            return NULL;                /* overflow */
1608#endif
1609        nbytes = numarenas * sizeof(*arenas);
1610        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1611        if (arenaobj == NULL)
1612            return NULL;
1613        arenas = arenaobj;
1614
1615        /* We might need to fix pointers that were copied.  However,
1616         * new_arena only gets called when all the pages in the
1617         * previous arenas are full.  Thus, there are *no* pointers
1618         * into the old array. Thus, we don't have to worry about
1619         * invalid pointers.  Just to be sure, some asserts:
1620         */
1621        assert(usable_arenas == NULL);
1622        assert(unused_arena_objects == NULL);
1623
1624        /* Put the new arenas on the unused_arena_objects list. */
1625        for (i = maxarenas; i < numarenas; ++i) {
1626            arenas[i].address = 0;              /* mark as unassociated */
1627            arenas[i].nextarena = i < numarenas - 1 ?
1628                                   &arenas[i+1] : NULL;
1629        }
1630
1631        /* Update globals. */
1632        unused_arena_objects = &arenas[maxarenas];
1633        maxarenas = numarenas;
1634    }
1635
1636    /* Take the next available arena object off the head of the list. */
1637    assert(unused_arena_objects != NULL);
1638    arenaobj = unused_arena_objects;
1639    unused_arena_objects = arenaobj->nextarena;
1640    assert(arenaobj->address == 0);
1641    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1642#if WITH_PYMALLOC_RADIX_TREE
1643    if (address != NULL) {
1644        if (!arena_map_mark_used((uintptr_t)address, 1)) {
1645            /* marking arena in radix tree failed, abort */
1646            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1647            address = NULL;
1648        }
1649    }
1650#endif
1651    if (address == NULL) {
1652        /* The allocation failed: return NULL after putting the
1653         * arenaobj back.
1654         */
1655        arenaobj->nextarena = unused_arena_objects;
1656        unused_arena_objects = arenaobj;
1657        return NULL;
1658    }
1659    arenaobj->address = (uintptr_t)address;
1660
1661    ++narenas_currently_allocated;
1662    ++ntimes_arena_allocated;
1663    if (narenas_currently_allocated > narenas_highwater)
1664        narenas_highwater = narenas_currently_allocated;
1665    arenaobj->freepools = NULL;
1666    /* pool_address <- first pool-aligned address in the arena
1667       nfreepools <- number of whole pools that fit after alignment */
1668    arenaobj->pool_address = (block*)arenaobj->address;
1669    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1670    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1671    if (excess != 0) {
1672        --arenaobj->nfreepools;
1673        arenaobj->pool_address += POOL_SIZE - excess;
1674    }
1675    arenaobj->ntotalpools = arenaobj->nfreepools;
1676
1677    return arenaobj;
1678}
1679
1680
1681
1682#if WITH_PYMALLOC_RADIX_TREE
1683/* Return true if and only if P is an address that was allocated by
1684   pymalloc.  When the radix tree is used, 'poolp' is unused.
1685 */
1686static bool
1687address_in_range(void *p, poolp pool)
1688{
1689    return arena_map_is_used(p);
1690}
1691#else
1692/*
1693address_in_range(P, POOL)
1694
1695Return true if and only if P is an address that was allocated by pymalloc.
1696POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1697(the caller is asked to compute this because the macro expands POOL more than
1698once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1699variable and pass the latter to the macro; because address_in_range is
1700called on every alloc/realloc/free, micro-efficiency is important here).
1701
1702Tricky:  Let B be the arena base address associated with the pool, B =
1703arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1704
1705    B <= P < B + ARENA_SIZE
1706
1707Subtracting B throughout, this is true iff
1708
1709    0 <= P-B < ARENA_SIZE
1710
1711By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1712
1713Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1714before the first arena has been allocated.  `arenas` is still NULL in that
1715case.  We're relying on that maxarenas is also 0 in that case, so that
1716(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1717into a NULL arenas.
1718
1719Details:  given P and POOL, the arena_object corresponding to P is AO =
1720arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1721stores, etc), POOL is the correct address of P's pool, AO.address is the
1722correct base address of the pool's arena, and P must be within ARENA_SIZE of
1723AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1724(NULL)).  Therefore address_in_range correctly reports that obmalloc
1725controls P.
1726
1727Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1728call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1729in this case -- it may even be uninitialized trash.  If the trash arenaindex
1730is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1731control P.
1732
1733Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1734allocated arena, obmalloc controls all the memory in slice AO.address :
1735AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1736so P doesn't lie in that slice, so the macro correctly reports that P is not
1737controlled by obmalloc.
1738
1739Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1740arena_object (one not currently associated with an allocated arena),
1741AO.address is 0, and the second test in the macro reduces to:
1742
1743    P < ARENA_SIZE
1744
1745If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1746that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1747of the test still passes, and the third clause (AO.address != 0) is necessary
1748to get the correct result:  AO.address is 0 in this case, so the macro
1749correctly reports that P is not controlled by obmalloc (despite that P lies in
1750slice AO.address : AO.address + ARENA_SIZE).
1751
1752Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
17532.5, arenas were never free()'ed, and an arenaindex < maxarena always
1754corresponded to a currently-allocated arena, so the "P is not controlled by
1755obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1756was impossible.
1757
1758Note that the logic is excruciating, and reading up possibly uninitialized
1759memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1760creates problems for some memory debuggers.  The overwhelming advantage is
1761that this test determines whether an arbitrary address is controlled by
1762obmalloc in a small constant time, independent of the number of arenas
1763obmalloc controls.  Since this test is needed at every entry point, it's
1764extremely desirable that it be this fast.
1765*/
1766
1767static bool _Py_NO_SANITIZE_ADDRESS
1768            _Py_NO_SANITIZE_THREAD
1769            _Py_NO_SANITIZE_MEMORY
1770address_in_range(void *p, poolp pool)
1771{
1772    // Since address_in_range may be reading from memory which was not allocated
1773    // by Python, it is important that pool->arenaindex is read only once, as
1774    // another thread may be concurrently modifying the value without holding
1775    // the GIL. The following dance forces the compiler to read pool->arenaindex
1776    // only once.
1777    uint arenaindex = *((volatile uint *)&pool->arenaindex);
1778    return arenaindex < maxarenas &&
1779        (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1780        arenas[arenaindex].address != 0;
1781}
1782
1783#endif /* !WITH_PYMALLOC_RADIX_TREE */
1784
1785/*==========================================================================*/
1786
1787// Called when freelist is exhausted.  Extend the freelist if there is
1788// space for a block.  Otherwise, remove this pool from usedpools.
1789static void
1790pymalloc_pool_extend(poolp pool, uint size)
1791{
1792    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1793        /* There is room for another block. */
1794        pool->freeblock = (block*)pool + pool->nextoffset;
1795        pool->nextoffset += INDEX2SIZE(size);
1796        *(block **)(pool->freeblock) = NULL;
1797        return;
1798    }
1799
1800    /* Pool is full, unlink from used pools. */
1801    poolp next;
1802    next = pool->nextpool;
1803    pool = pool->prevpool;
1804    next->prevpool = pool;
1805    pool->nextpool = next;
1806}
1807
1808/* called when pymalloc_alloc can not allocate a block from usedpool.
1809 * This function takes new pool and allocate a block from it.
1810 */
1811static void*
1812allocate_from_new_pool(uint size)
1813{
1814    /* There isn't a pool of the right size class immediately
1815     * available:  use a free pool.
1816     */
1817    if (UNLIKELY(usable_arenas == NULL)) {
1818        /* No arena has a free pool:  allocate a new arena. */
1819#ifdef WITH_MEMORY_LIMITS
1820        if (narenas_currently_allocated >= MAX_ARENAS) {
1821            return NULL;
1822        }
1823#endif
1824        usable_arenas = new_arena();
1825        if (usable_arenas == NULL) {
1826            return NULL;
1827        }
1828        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1829        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1830        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1831    }
1832    assert(usable_arenas->address != 0);
1833
1834    /* This arena already had the smallest nfreepools value, so decreasing
1835     * nfreepools doesn't change that, and we don't need to rearrange the
1836     * usable_arenas list.  However, if the arena becomes wholly allocated,
1837     * we need to remove its arena_object from usable_arenas.
1838     */
1839    assert(usable_arenas->nfreepools > 0);
1840    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1841        /* It's the last of this size, so there won't be any. */
1842        nfp2lasta[usable_arenas->nfreepools] = NULL;
1843    }
1844    /* If any free pools will remain, it will be the new smallest. */
1845    if (usable_arenas->nfreepools > 1) {
1846        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1847        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1848    }
1849
1850    /* Try to get a cached free pool. */
1851    poolp pool = usable_arenas->freepools;
1852    if (LIKELY(pool != NULL)) {
1853        /* Unlink from cached pools. */
1854        usable_arenas->freepools = pool->nextpool;
1855        usable_arenas->nfreepools--;
1856        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1857            /* Wholly allocated:  remove. */
1858            assert(usable_arenas->freepools == NULL);
1859            assert(usable_arenas->nextarena == NULL ||
1860                   usable_arenas->nextarena->prevarena ==
1861                   usable_arenas);
1862            usable_arenas = usable_arenas->nextarena;
1863            if (usable_arenas != NULL) {
1864                usable_arenas->prevarena = NULL;
1865                assert(usable_arenas->address != 0);
1866            }
1867        }
1868        else {
1869            /* nfreepools > 0:  it must be that freepools
1870             * isn't NULL, or that we haven't yet carved
1871             * off all the arena's pools for the first
1872             * time.
1873             */
1874            assert(usable_arenas->freepools != NULL ||
1875                   usable_arenas->pool_address <=
1876                   (block*)usable_arenas->address +
1877                       ARENA_SIZE - POOL_SIZE);
1878        }
1879    }
1880    else {
1881        /* Carve off a new pool. */
1882        assert(usable_arenas->nfreepools > 0);
1883        assert(usable_arenas->freepools == NULL);
1884        pool = (poolp)usable_arenas->pool_address;
1885        assert((block*)pool <= (block*)usable_arenas->address +
1886                                 ARENA_SIZE - POOL_SIZE);
1887        pool->arenaindex = (uint)(usable_arenas - arenas);
1888        assert(&arenas[pool->arenaindex] == usable_arenas);
1889        pool->szidx = DUMMY_SIZE_IDX;
1890        usable_arenas->pool_address += POOL_SIZE;
1891        --usable_arenas->nfreepools;
1892
1893        if (usable_arenas->nfreepools == 0) {
1894            assert(usable_arenas->nextarena == NULL ||
1895                   usable_arenas->nextarena->prevarena ==
1896                   usable_arenas);
1897            /* Unlink the arena:  it is completely allocated. */
1898            usable_arenas = usable_arenas->nextarena;
1899            if (usable_arenas != NULL) {
1900                usable_arenas->prevarena = NULL;
1901                assert(usable_arenas->address != 0);
1902            }
1903        }
1904    }
1905
1906    /* Frontlink to used pools. */
1907    block *bp;
1908    poolp next = usedpools[size + size]; /* == prev */
1909    pool->nextpool = next;
1910    pool->prevpool = next;
1911    next->nextpool = pool;
1912    next->prevpool = pool;
1913    pool->ref.count = 1;
1914    if (pool->szidx == size) {
1915        /* Luckily, this pool last contained blocks
1916         * of the same size class, so its header
1917         * and free list are already initialized.
1918         */
1919        bp = pool->freeblock;
1920        assert(bp != NULL);
1921        pool->freeblock = *(block **)bp;
1922        return bp;
1923    }
1924    /*
1925     * Initialize the pool header, set up the free list to
1926     * contain just the second block, and return the first
1927     * block.
1928     */
1929    pool->szidx = size;
1930    size = INDEX2SIZE(size);
1931    bp = (block *)pool + POOL_OVERHEAD;
1932    pool->nextoffset = POOL_OVERHEAD + (size << 1);
1933    pool->maxnextoffset = POOL_SIZE - size;
1934    pool->freeblock = bp + size;
1935    *(block **)(pool->freeblock) = NULL;
1936    return bp;
1937}
1938
1939/* pymalloc allocator
1940
1941   Return a pointer to newly allocated memory if pymalloc allocated memory.
1942
1943   Return NULL if pymalloc failed to allocate the memory block: on bigger
1944   requests, on error in the code below (as a last chance to serve the request)
1945   or when the max memory limit has been reached.
1946*/
1947static inline void*
1948pymalloc_alloc(void *ctx, size_t nbytes)
1949{
1950#ifdef WITH_VALGRIND
1951    if (UNLIKELY(running_on_valgrind == -1)) {
1952        running_on_valgrind = RUNNING_ON_VALGRIND;
1953    }
1954    if (UNLIKELY(running_on_valgrind)) {
1955        return NULL;
1956    }
1957#endif
1958
1959    if (UNLIKELY(nbytes == 0)) {
1960        return NULL;
1961    }
1962    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1963        return NULL;
1964    }
1965
1966    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1967    poolp pool = usedpools[size + size];
1968    block *bp;
1969
1970    if (LIKELY(pool != pool->nextpool)) {
1971        /*
1972         * There is a used pool for this size class.
1973         * Pick up the head block of its free list.
1974         */
1975        ++pool->ref.count;
1976        bp = pool->freeblock;
1977        assert(bp != NULL);
1978
1979        if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1980            // Reached the end of the free list, try to extend it.
1981            pymalloc_pool_extend(pool, size);
1982        }
1983    }
1984    else {
1985        /* There isn't a pool of the right size class immediately
1986         * available:  use a free pool.
1987         */
1988        bp = allocate_from_new_pool(size);
1989    }
1990
1991    return (void *)bp;
1992}
1993
1994
1995static void *
1996_PyObject_Malloc(void *ctx, size_t nbytes)
1997{
1998    void* ptr = pymalloc_alloc(ctx, nbytes);
1999    if (LIKELY(ptr != NULL)) {
2000        return ptr;
2001    }
2002
2003    ptr = PyMem_RawMalloc(nbytes);
2004    if (ptr != NULL) {
2005        raw_allocated_blocks++;
2006    }
2007    return ptr;
2008}
2009
2010
2011static void *
2012_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2013{
2014    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2015    size_t nbytes = nelem * elsize;
2016
2017    void* ptr = pymalloc_alloc(ctx, nbytes);
2018    if (LIKELY(ptr != NULL)) {
2019        memset(ptr, 0, nbytes);
2020        return ptr;
2021    }
2022
2023    ptr = PyMem_RawCalloc(nelem, elsize);
2024    if (ptr != NULL) {
2025        raw_allocated_blocks++;
2026    }
2027    return ptr;
2028}
2029
2030
2031static void
2032insert_to_usedpool(poolp pool)
2033{
2034    assert(pool->ref.count > 0);            /* else the pool is empty */
2035
2036    uint size = pool->szidx;
2037    poolp next = usedpools[size + size];
2038    poolp prev = next->prevpool;
2039
2040    /* insert pool before next:   prev <-> pool <-> next */
2041    pool->nextpool = next;
2042    pool->prevpool = prev;
2043    next->prevpool = pool;
2044    prev->nextpool = pool;
2045}
2046
2047static void
2048insert_to_freepool(poolp pool)
2049{
2050    poolp next = pool->nextpool;
2051    poolp prev = pool->prevpool;
2052    next->prevpool = prev;
2053    prev->nextpool = next;
2054
2055    /* Link the pool to freepools.  This is a singly-linked
2056     * list, and pool->prevpool isn't used there.
2057     */
2058    struct arena_object *ao = &arenas[pool->arenaindex];
2059    pool->nextpool = ao->freepools;
2060    ao->freepools = pool;
2061    uint nf = ao->nfreepools;
2062    /* If this is the rightmost arena with this number of free pools,
2063     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2064     * are no arenas in usable_arenas with that value.
2065     */
2066    struct arena_object* lastnf = nfp2lasta[nf];
2067    assert((nf == 0 && lastnf == NULL) ||
2068           (nf > 0 &&
2069            lastnf != NULL &&
2070            lastnf->nfreepools == nf &&
2071            (lastnf->nextarena == NULL ||
2072             nf < lastnf->nextarena->nfreepools)));
2073    if (lastnf == ao) {  /* it is the rightmost */
2074        struct arena_object* p = ao->prevarena;
2075        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2076    }
2077    ao->nfreepools = ++nf;
2078
2079    /* All the rest is arena management.  We just freed
2080     * a pool, and there are 4 cases for arena mgmt:
2081     * 1. If all the pools are free, return the arena to
2082     *    the system free().  Except if this is the last
2083     *    arena in the list, keep it to avoid thrashing:
2084     *    keeping one wholly free arena in the list avoids
2085     *    pathological cases where a simple loop would
2086     *    otherwise provoke needing to allocate and free an
2087     *    arena on every iteration.  See bpo-37257.
2088     * 2. If this is the only free pool in the arena,
2089     *    add the arena back to the `usable_arenas` list.
2090     * 3. If the "next" arena has a smaller count of free
2091     *    pools, we have to "slide this arena right" to
2092     *    restore that usable_arenas is sorted in order of
2093     *    nfreepools.
2094     * 4. Else there's nothing more to do.
2095     */
2096    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2097        /* Case 1.  First unlink ao from usable_arenas.
2098         */
2099        assert(ao->prevarena == NULL ||
2100               ao->prevarena->address != 0);
2101        assert(ao ->nextarena == NULL ||
2102               ao->nextarena->address != 0);
2103
2104        /* Fix the pointer in the prevarena, or the
2105         * usable_arenas pointer.
2106         */
2107        if (ao->prevarena == NULL) {
2108            usable_arenas = ao->nextarena;
2109            assert(usable_arenas == NULL ||
2110                   usable_arenas->address != 0);
2111        }
2112        else {
2113            assert(ao->prevarena->nextarena == ao);
2114            ao->prevarena->nextarena =
2115                ao->nextarena;
2116        }
2117        /* Fix the pointer in the nextarena. */
2118        if (ao->nextarena != NULL) {
2119            assert(ao->nextarena->prevarena == ao);
2120            ao->nextarena->prevarena =
2121                ao->prevarena;
2122        }
2123        /* Record that this arena_object slot is
2124         * available to be reused.
2125         */
2126        ao->nextarena = unused_arena_objects;
2127        unused_arena_objects = ao;
2128
2129#if WITH_PYMALLOC_RADIX_TREE
2130        /* mark arena region as not under control of obmalloc */
2131        arena_map_mark_used(ao->address, 0);
2132#endif
2133
2134        /* Free the entire arena. */
2135        _PyObject_Arena.free(_PyObject_Arena.ctx,
2136                             (void *)ao->address, ARENA_SIZE);
2137        ao->address = 0;                        /* mark unassociated */
2138        --narenas_currently_allocated;
2139
2140        return;
2141    }
2142
2143    if (nf == 1) {
2144        /* Case 2.  Put ao at the head of
2145         * usable_arenas.  Note that because
2146         * ao->nfreepools was 0 before, ao isn't
2147         * currently on the usable_arenas list.
2148         */
2149        ao->nextarena = usable_arenas;
2150        ao->prevarena = NULL;
2151        if (usable_arenas)
2152            usable_arenas->prevarena = ao;
2153        usable_arenas = ao;
2154        assert(usable_arenas->address != 0);
2155        if (nfp2lasta[1] == NULL) {
2156            nfp2lasta[1] = ao;
2157        }
2158
2159        return;
2160    }
2161
2162    /* If this arena is now out of order, we need to keep
2163     * the list sorted.  The list is kept sorted so that
2164     * the "most full" arenas are used first, which allows
2165     * the nearly empty arenas to be completely freed.  In
2166     * a few un-scientific tests, it seems like this
2167     * approach allowed a lot more memory to be freed.
2168     */
2169    /* If this is the only arena with nf, record that. */
2170    if (nfp2lasta[nf] == NULL) {
2171        nfp2lasta[nf] = ao;
2172    } /* else the rightmost with nf doesn't change */
2173    /* If this was the rightmost of the old size, it remains in place. */
2174    if (ao == lastnf) {
2175        /* Case 4.  Nothing to do. */
2176        return;
2177    }
2178    /* If ao were the only arena in the list, the last block would have
2179     * gotten us out.
2180     */
2181    assert(ao->nextarena != NULL);
2182
2183    /* Case 3:  We have to move the arena towards the end of the list,
2184     * because it has more free pools than the arena to its right.  It needs
2185     * to move to follow lastnf.
2186     * First unlink ao from usable_arenas.
2187     */
2188    if (ao->prevarena != NULL) {
2189        /* ao isn't at the head of the list */
2190        assert(ao->prevarena->nextarena == ao);
2191        ao->prevarena->nextarena = ao->nextarena;
2192    }
2193    else {
2194        /* ao is at the head of the list */
2195        assert(usable_arenas == ao);
2196        usable_arenas = ao->nextarena;
2197    }
2198    ao->nextarena->prevarena = ao->prevarena;
2199    /* And insert after lastnf. */
2200    ao->prevarena = lastnf;
2201    ao->nextarena = lastnf->nextarena;
2202    if (ao->nextarena != NULL) {
2203        ao->nextarena->prevarena = ao;
2204    }
2205    lastnf->nextarena = ao;
2206    /* Verify that the swaps worked. */
2207    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2208    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2209    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2210    assert((usable_arenas == ao && ao->prevarena == NULL)
2211           || ao->prevarena->nextarena == ao);
2212}
2213
2214/* Free a memory block allocated by pymalloc_alloc().
2215   Return 1 if it was freed.
2216   Return 0 if the block was not allocated by pymalloc_alloc(). */
2217static inline int
2218pymalloc_free(void *ctx, void *p)
2219{
2220    assert(p != NULL);
2221
2222#ifdef WITH_VALGRIND
2223    if (UNLIKELY(running_on_valgrind > 0)) {
2224        return 0;
2225    }
2226#endif
2227
2228    poolp pool = POOL_ADDR(p);
2229    if (UNLIKELY(!address_in_range(p, pool))) {
2230        return 0;
2231    }
2232    /* We allocated this address. */
2233
2234    /* Link p to the start of the pool's freeblock list.  Since
2235     * the pool had at least the p block outstanding, the pool
2236     * wasn't empty (so it's already in a usedpools[] list, or
2237     * was full and is in no list -- it's not in the freeblocks
2238     * list in any case).
2239     */
2240    assert(pool->ref.count > 0);            /* else it was empty */
2241    block *lastfree = pool->freeblock;
2242    *(block **)p = lastfree;
2243    pool->freeblock = (block *)p;
2244    pool->ref.count--;
2245
2246    if (UNLIKELY(lastfree == NULL)) {
2247        /* Pool was full, so doesn't currently live in any list:
2248         * link it to the front of the appropriate usedpools[] list.
2249         * This mimics LRU pool usage for new allocations and
2250         * targets optimal filling when several pools contain
2251         * blocks of the same size class.
2252         */
2253        insert_to_usedpool(pool);
2254        return 1;
2255    }
2256
2257    /* freeblock wasn't NULL, so the pool wasn't full,
2258     * and the pool is in a usedpools[] list.
2259     */
2260    if (LIKELY(pool->ref.count != 0)) {
2261        /* pool isn't empty:  leave it in usedpools */
2262        return 1;
2263    }
2264
2265    /* Pool is now empty:  unlink from usedpools, and
2266     * link to the front of freepools.  This ensures that
2267     * previously freed pools will be allocated later
2268     * (being not referenced, they are perhaps paged out).
2269     */
2270    insert_to_freepool(pool);
2271    return 1;
2272}
2273
2274
2275static void
2276_PyObject_Free(void *ctx, void *p)
2277{
2278    /* PyObject_Free(NULL) has no effect */
2279    if (p == NULL) {
2280        return;
2281    }
2282
2283    if (UNLIKELY(!pymalloc_free(ctx, p))) {
2284        /* pymalloc didn't allocate this address */
2285        PyMem_RawFree(p);
2286        raw_allocated_blocks--;
2287    }
2288}
2289
2290
2291/* pymalloc realloc.
2292
2293   If nbytes==0, then as the Python docs promise, we do not treat this like
2294   free(p), and return a non-NULL result.
2295
2296   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2297   newptr_p.
2298
2299   Return 0 if pymalloc didn't allocated p. */
2300static int
2301pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2302{
2303    void *bp;
2304    poolp pool;
2305    size_t size;
2306
2307    assert(p != NULL);
2308
2309#ifdef WITH_VALGRIND
2310    /* Treat running_on_valgrind == -1 the same as 0 */
2311    if (UNLIKELY(running_on_valgrind > 0)) {
2312        return 0;
2313    }
2314#endif
2315
2316    pool = POOL_ADDR(p);
2317    if (!address_in_range(p, pool)) {
2318        /* pymalloc is not managing this block.
2319
2320           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2321           over this block.  However, if we do, we need to copy the valid data
2322           from the C-managed block to one of our blocks, and there's no
2323           portable way to know how much of the memory space starting at p is
2324           valid.
2325
2326           As bug 1185883 pointed out the hard way, it's possible that the
2327           C-managed block is "at the end" of allocated VM space, so that a
2328           memory fault can occur if we try to copy nbytes bytes starting at p.
2329           Instead we punt: let C continue to manage this block. */
2330        return 0;
2331    }
2332
2333    /* pymalloc is in charge of this block */
2334    size = INDEX2SIZE(pool->szidx);
2335    if (nbytes <= size) {
2336        /* The block is staying the same or shrinking.
2337
2338           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2339           block to a smaller size class, but it wastes memory not to copy it.
2340
2341           The compromise here is to copy on shrink only if at least 25% of
2342           size can be shaved off. */
2343        if (4 * nbytes > 3 * size) {
2344            /* It's the same, or shrinking and new/old > 3/4. */
2345            *newptr_p = p;
2346            return 1;
2347        }
2348        size = nbytes;
2349    }
2350
2351    bp = _PyObject_Malloc(ctx, nbytes);
2352    if (bp != NULL) {
2353        memcpy(bp, p, size);
2354        _PyObject_Free(ctx, p);
2355    }
2356    *newptr_p = bp;
2357    return 1;
2358}
2359
2360
2361static void *
2362_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2363{
2364    void *ptr2;
2365
2366    if (ptr == NULL) {
2367        return _PyObject_Malloc(ctx, nbytes);
2368    }
2369
2370    if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2371        return ptr2;
2372    }
2373
2374    return PyMem_RawRealloc(ptr, nbytes);
2375}
2376
2377#else   /* ! WITH_PYMALLOC */
2378
2379/*==========================================================================*/
2380/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2381 * only be used by extensions that are compiled with pymalloc enabled. */
2382
2383Py_ssize_t
2384_Py_GetAllocatedBlocks(void)
2385{
2386    return 0;
2387}
2388
2389#endif /* WITH_PYMALLOC */
2390
2391
2392/*==========================================================================*/
2393/* A x-platform debugging allocator.  This doesn't manage memory directly,
2394 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2395 */
2396
2397/* Uncomment this define to add the "serialno" field */
2398/* #define PYMEM_DEBUG_SERIALNO */
2399
2400#ifdef PYMEM_DEBUG_SERIALNO
2401static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2402
2403/* serialno is always incremented via calling this routine.  The point is
2404 * to supply a single place to set a breakpoint.
2405 */
2406static void
2407bumpserialno(void)
2408{
2409    ++serialno;
2410}
2411#endif
2412
2413#define SST SIZEOF_SIZE_T
2414
2415#ifdef PYMEM_DEBUG_SERIALNO
2416#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2417#else
2418#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2419#endif
2420
2421/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2422static size_t
2423read_size_t(const void *p)
2424{
2425    const uint8_t *q = (const uint8_t *)p;
2426    size_t result = *q++;
2427    int i;
2428
2429    for (i = SST; --i > 0; ++q)
2430        result = (result << 8) | *q;
2431    return result;
2432}
2433
2434/* Write n as a big-endian size_t, MSB at address p, LSB at
2435 * p + sizeof(size_t) - 1.
2436 */
2437static void
2438write_size_t(void *p, size_t n)
2439{
2440    uint8_t *q = (uint8_t *)p + SST - 1;
2441    int i;
2442
2443    for (i = SST; --i >= 0; --q) {
2444        *q = (uint8_t)(n & 0xff);
2445        n >>= 8;
2446    }
2447}
2448
2449/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2450   fills them with useful stuff, here calling the underlying malloc's result p:
2451
2452p[0: S]
2453    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2454    to read in a memory dump).
2455p[S]
2456    API ID.  See PEP 445.  This is a character, but seems undocumented.
2457p[S+1: 2*S]
2458    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2459p[2*S: 2*S+n]
2460    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2461    Used to catch reference to uninitialized memory.
2462    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2463    handled the request itself.
2464p[2*S+n: 2*S+n+S]
2465    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2466p[2*S+n+S: 2*S+n+2*S]
2467    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2468    and _PyMem_DebugRealloc.
2469    This is a big-endian size_t.
2470    If "bad memory" is detected later, the serial number gives an
2471    excellent way to set a breakpoint on the next run, to capture the
2472    instant at which this block was passed out.
2473
2474If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2475for 3 * S extra bytes, and omits the last serialno field.
2476*/
2477
2478static void *
2479_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2480{
2481    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2482    uint8_t *p;           /* base address of malloc'ed pad block */
2483    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2484    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2485    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2486
2487    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2488        /* integer overflow: can't represent total as a Py_ssize_t */
2489        return NULL;
2490    }
2491    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2492
2493    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2494                ^--- p    ^--- data   ^--- tail
2495       S: nbytes stored as size_t
2496       I: API identifier (1 byte)
2497       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2498       C: Clean bytes used later to store actual data
2499       N: Serial number stored as size_t
2500
2501       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2502       is omitted. */
2503
2504    if (use_calloc) {
2505        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2506    }
2507    else {
2508        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2509    }
2510    if (p == NULL) {
2511        return NULL;
2512    }
2513    data = p + 2*SST;
2514
2515#ifdef PYMEM_DEBUG_SERIALNO
2516    bumpserialno();
2517#endif
2518
2519    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2520    write_size_t(p, nbytes);
2521    p[SST] = (uint8_t)api->api_id;
2522    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2523
2524    if (nbytes > 0 && !use_calloc) {
2525        memset(data, PYMEM_CLEANBYTE, nbytes);
2526    }
2527
2528    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2529    tail = data + nbytes;
2530    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2531#ifdef PYMEM_DEBUG_SERIALNO
2532    write_size_t(tail + SST, serialno);
2533#endif
2534
2535    return data;
2536}
2537
2538static void *
2539_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2540{
2541    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2542}
2543
2544static void *
2545_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2546{
2547    size_t nbytes;
2548    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2549    nbytes = nelem * elsize;
2550    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2551}
2552
2553
2554/* The debug free first checks the 2*SST bytes on each end for sanity (in
2555   particular, that the FORBIDDENBYTEs with the api ID are still intact).
2556   Then fills the original bytes with PYMEM_DEADBYTE.
2557   Then calls the underlying free.
2558*/
2559static void
2560_PyMem_DebugRawFree(void *ctx, void *p)
2561{
2562    /* PyMem_Free(NULL) has no effect */
2563    if (p == NULL) {
2564        return;
2565    }
2566
2567    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2568    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2569    size_t nbytes;
2570
2571    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2572    nbytes = read_size_t(q);
2573    nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2574    memset(q, PYMEM_DEADBYTE, nbytes);
2575    api->alloc.free(api->alloc.ctx, q);
2576}
2577
2578
2579static void *
2580_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2581{
2582    if (p == NULL) {
2583        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2584    }
2585
2586    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2587    uint8_t *head;        /* base address of malloc'ed pad block */
2588    uint8_t *data;        /* pointer to data bytes */
2589    uint8_t *r;
2590    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2591    size_t total;         /* 2 * SST + nbytes + 2 * SST */
2592    size_t original_nbytes;
2593#define ERASED_SIZE 64
2594    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2595
2596    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2597
2598    data = (uint8_t *)p;
2599    head = data - 2*SST;
2600    original_nbytes = read_size_t(head);
2601    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2602        /* integer overflow: can't represent total as a Py_ssize_t */
2603        return NULL;
2604    }
2605    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2606
2607    tail = data + original_nbytes;
2608#ifdef PYMEM_DEBUG_SERIALNO
2609    size_t block_serialno = read_size_t(tail + SST);
2610#endif
2611    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2612       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2613     */
2614    if (original_nbytes <= sizeof(save)) {
2615        memcpy(save, data, original_nbytes);
2616        memset(data - 2 * SST, PYMEM_DEADBYTE,
2617               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2618    }
2619    else {
2620        memcpy(save, data, ERASED_SIZE);
2621        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2622        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2623        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2624               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2625    }
2626
2627    /* Resize and add decorations. */
2628    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2629    if (r == NULL) {
2630        /* if realloc() failed: rewrite header and footer which have
2631           just been erased */
2632        nbytes = original_nbytes;
2633    }
2634    else {
2635        head = r;
2636#ifdef PYMEM_DEBUG_SERIALNO
2637        bumpserialno();
2638        block_serialno = serialno;
2639#endif
2640    }
2641    data = head + 2*SST;
2642
2643    write_size_t(head, nbytes);
2644    head[SST] = (uint8_t)api->api_id;
2645    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2646
2647    tail = data + nbytes;
2648    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2649#ifdef PYMEM_DEBUG_SERIALNO
2650    write_size_t(tail + SST, block_serialno);
2651#endif
2652
2653    /* Restore saved bytes. */
2654    if (original_nbytes <= sizeof(save)) {
2655        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2656    }
2657    else {
2658        size_t i = original_nbytes - ERASED_SIZE;
2659        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2660        if (nbytes > i) {
2661            memcpy(data + i, &save[ERASED_SIZE],
2662                   Py_MIN(nbytes - i, ERASED_SIZE));
2663        }
2664    }
2665
2666    if (r == NULL) {
2667        return NULL;
2668    }
2669
2670    if (nbytes > original_nbytes) {
2671        /* growing: mark new extra memory clean */
2672        memset(data + original_nbytes, PYMEM_CLEANBYTE,
2673               nbytes - original_nbytes);
2674    }
2675
2676    return data;
2677}
2678
2679static inline void
2680_PyMem_DebugCheckGIL(const char *func)
2681{
2682    if (!PyGILState_Check()) {
2683        _Py_FatalErrorFunc(func,
2684                           "Python memory allocator called "
2685                           "without holding the GIL");
2686    }
2687}
2688
2689static void *
2690_PyMem_DebugMalloc(void *ctx, size_t nbytes)
2691{
2692    _PyMem_DebugCheckGIL(__func__);
2693    return _PyMem_DebugRawMalloc(ctx, nbytes);
2694}
2695
2696static void *
2697_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2698{
2699    _PyMem_DebugCheckGIL(__func__);
2700    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2701}
2702
2703
2704static void
2705_PyMem_DebugFree(void *ctx, void *ptr)
2706{
2707    _PyMem_DebugCheckGIL(__func__);
2708    _PyMem_DebugRawFree(ctx, ptr);
2709}
2710
2711
2712static void *
2713_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2714{
2715    _PyMem_DebugCheckGIL(__func__);
2716    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2717}
2718
2719/* Check the forbidden bytes on both ends of the memory allocated for p.
2720 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2721 * and call Py_FatalError to kill the program.
2722 * The API id, is also checked.
2723 */
2724static void
2725_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2726{
2727    assert(p != NULL);
2728
2729    const uint8_t *q = (const uint8_t *)p;
2730    size_t nbytes;
2731    const uint8_t *tail;
2732    int i;
2733    char id;
2734
2735    /* Check the API id */
2736    id = (char)q[-SST];
2737    if (id != api) {
2738        _PyObject_DebugDumpAddress(p);
2739        _Py_FatalErrorFormat(func,
2740                             "bad ID: Allocated using API '%c', "
2741                             "verified using API '%c'",
2742                             id, api);
2743    }
2744
2745    /* Check the stuff at the start of p first:  if there's underwrite
2746     * corruption, the number-of-bytes field may be nuts, and checking
2747     * the tail could lead to a segfault then.
2748     */
2749    for (i = SST-1; i >= 1; --i) {
2750        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2751            _PyObject_DebugDumpAddress(p);
2752            _Py_FatalErrorFunc(func, "bad leading pad byte");
2753        }
2754    }
2755
2756    nbytes = read_size_t(q - 2*SST);
2757    tail = q + nbytes;
2758    for (i = 0; i < SST; ++i) {
2759        if (tail[i] != PYMEM_FORBIDDENBYTE) {
2760            _PyObject_DebugDumpAddress(p);
2761            _Py_FatalErrorFunc(func, "bad trailing pad byte");
2762        }
2763    }
2764}
2765
2766/* Display info to stderr about the memory block at p. */
2767static void
2768_PyObject_DebugDumpAddress(const void *p)
2769{
2770    const uint8_t *q = (const uint8_t *)p;
2771    const uint8_t *tail;
2772    size_t nbytes;
2773    int i;
2774    int ok;
2775    char id;
2776
2777    fprintf(stderr, "Debug memory block at address p=%p:", p);
2778    if (p == NULL) {
2779        fprintf(stderr, "\n");
2780        return;
2781    }
2782    id = (char)q[-SST];
2783    fprintf(stderr, " API '%c'\n", id);
2784
2785    nbytes = read_size_t(q - 2*SST);
2786    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2787
2788    /* In case this is nuts, check the leading pad bytes first. */
2789    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2790    ok = 1;
2791    for (i = 1; i <= SST-1; ++i) {
2792        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2793            ok = 0;
2794            break;
2795        }
2796    }
2797    if (ok)
2798        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2799    else {
2800        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2801            PYMEM_FORBIDDENBYTE);
2802        for (i = SST-1; i >= 1; --i) {
2803            const uint8_t byte = *(q-i);
2804            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2805            if (byte != PYMEM_FORBIDDENBYTE)
2806                fputs(" *** OUCH", stderr);
2807            fputc('\n', stderr);
2808        }
2809
2810        fputs("    Because memory is corrupted at the start, the "
2811              "count of bytes requested\n"
2812              "       may be bogus, and checking the trailing pad "
2813              "bytes may segfault.\n", stderr);
2814    }
2815
2816    tail = q + nbytes;
2817    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2818    ok = 1;
2819    for (i = 0; i < SST; ++i) {
2820        if (tail[i] != PYMEM_FORBIDDENBYTE) {
2821            ok = 0;
2822            break;
2823        }
2824    }
2825    if (ok)
2826        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2827    else {
2828        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2829                PYMEM_FORBIDDENBYTE);
2830        for (i = 0; i < SST; ++i) {
2831            const uint8_t byte = tail[i];
2832            fprintf(stderr, "        at tail+%d: 0x%02x",
2833                    i, byte);
2834            if (byte != PYMEM_FORBIDDENBYTE)
2835                fputs(" *** OUCH", stderr);
2836            fputc('\n', stderr);
2837        }
2838    }
2839
2840#ifdef PYMEM_DEBUG_SERIALNO
2841    size_t serial = read_size_t(tail + SST);
2842    fprintf(stderr,
2843            "    The block was made by call #%zu to debug malloc/realloc.\n",
2844            serial);
2845#endif
2846
2847    if (nbytes > 0) {
2848        i = 0;
2849        fputs("    Data at p:", stderr);
2850        /* print up to 8 bytes at the start */
2851        while (q < tail && i < 8) {
2852            fprintf(stderr, " %02x", *q);
2853            ++i;
2854            ++q;
2855        }
2856        /* and up to 8 at the end */
2857        if (q < tail) {
2858            if (tail - q > 8) {
2859                fputs(" ...", stderr);
2860                q = tail - 8;
2861            }
2862            while (q < tail) {
2863                fprintf(stderr, " %02x", *q);
2864                ++q;
2865            }
2866        }
2867        fputc('\n', stderr);
2868    }
2869    fputc('\n', stderr);
2870
2871    fflush(stderr);
2872    _PyMem_DumpTraceback(fileno(stderr), p);
2873}
2874
2875
2876static size_t
2877printone(FILE *out, const char* msg, size_t value)
2878{
2879    int i, k;
2880    char buf[100];
2881    size_t origvalue = value;
2882
2883    fputs(msg, out);
2884    for (i = (int)strlen(msg); i < 35; ++i)
2885        fputc(' ', out);
2886    fputc('=', out);
2887
2888    /* Write the value with commas. */
2889    i = 22;
2890    buf[i--] = '\0';
2891    buf[i--] = '\n';
2892    k = 3;
2893    do {
2894        size_t nextvalue = value / 10;
2895        unsigned int digit = (unsigned int)(value - nextvalue * 10);
2896        value = nextvalue;
2897        buf[i--] = (char)(digit + '0');
2898        --k;
2899        if (k == 0 && value && i >= 0) {
2900            k = 3;
2901            buf[i--] = ',';
2902        }
2903    } while (value && i >= 0);
2904
2905    while (i >= 0)
2906        buf[i--] = ' ';
2907    fputs(buf, out);
2908
2909    return origvalue;
2910}
2911
2912void
2913_PyDebugAllocatorStats(FILE *out,
2914                       const char *block_name, int num_blocks, size_t sizeof_block)
2915{
2916    char buf1[128];
2917    char buf2[128];
2918    PyOS_snprintf(buf1, sizeof(buf1),
2919                  "%d %ss * %zd bytes each",
2920                  num_blocks, block_name, sizeof_block);
2921    PyOS_snprintf(buf2, sizeof(buf2),
2922                  "%48s ", buf1);
2923    (void)printone(out, buf2, num_blocks * sizeof_block);
2924}
2925
2926
2927#ifdef WITH_PYMALLOC
2928
2929#ifdef Py_DEBUG
2930/* Is target in the list?  The list is traversed via the nextpool pointers.
2931 * The list may be NULL-terminated, or circular.  Return 1 if target is in
2932 * list, else 0.
2933 */
2934static int
2935pool_is_in_list(const poolp target, poolp list)
2936{
2937    poolp origlist = list;
2938    assert(target != NULL);
2939    if (list == NULL)
2940        return 0;
2941    do {
2942        if (target == list)
2943            return 1;
2944        list = list->nextpool;
2945    } while (list != NULL && list != origlist);
2946    return 0;
2947}
2948#endif
2949
2950/* Print summary info to "out" about the state of pymalloc's structures.
2951 * In Py_DEBUG mode, also perform some expensive internal consistency
2952 * checks.
2953 *
2954 * Return 0 if the memory debug hooks are not installed or no statistics was
2955 * written into out, return 1 otherwise.
2956 */
2957int
2958_PyObject_DebugMallocStats(FILE *out)
2959{
2960    if (!_PyMem_PymallocEnabled()) {
2961        return 0;
2962    }
2963
2964    uint i;
2965    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2966    /* # of pools, allocated blocks, and free blocks per class index */
2967    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2968    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2969    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2970    /* total # of allocated bytes in used and full pools */
2971    size_t allocated_bytes = 0;
2972    /* total # of available bytes in used pools */
2973    size_t available_bytes = 0;
2974    /* # of free pools + pools not yet carved out of current arena */
2975    uint numfreepools = 0;
2976    /* # of bytes for arena alignment padding */
2977    size_t arena_alignment = 0;
2978    /* # of bytes in used and full pools used for pool_headers */
2979    size_t pool_header_bytes = 0;
2980    /* # of bytes in used and full pools wasted due to quantization,
2981     * i.e. the necessarily leftover space at the ends of used and
2982     * full pools.
2983     */
2984    size_t quantization = 0;
2985    /* # of arenas actually allocated. */
2986    size_t narenas = 0;
2987    /* running total -- should equal narenas * ARENA_SIZE */
2988    size_t total;
2989    char buf[128];
2990
2991    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2992            SMALL_REQUEST_THRESHOLD, numclasses);
2993
2994    for (i = 0; i < numclasses; ++i)
2995        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2996
2997    /* Because full pools aren't linked to from anything, it's easiest
2998     * to march over all the arenas.  If we're lucky, most of the memory
2999     * will be living in full pools -- would be a shame to miss them.
3000     */
3001    for (i = 0; i < maxarenas; ++i) {
3002        uintptr_t base = arenas[i].address;
3003
3004        /* Skip arenas which are not allocated. */
3005        if (arenas[i].address == (uintptr_t)NULL)
3006            continue;
3007        narenas += 1;
3008
3009        numfreepools += arenas[i].nfreepools;
3010
3011        /* round up to pool alignment */
3012        if (base & (uintptr_t)POOL_SIZE_MASK) {
3013            arena_alignment += POOL_SIZE;
3014            base &= ~(uintptr_t)POOL_SIZE_MASK;
3015            base += POOL_SIZE;
3016        }
3017
3018        /* visit every pool in the arena */
3019        assert(base <= (uintptr_t) arenas[i].pool_address);
3020        for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
3021            poolp p = (poolp)base;
3022            const uint sz = p->szidx;
3023            uint freeblocks;
3024
3025            if (p->ref.count == 0) {
3026                /* currently unused */
3027#ifdef Py_DEBUG
3028                assert(pool_is_in_list(p, arenas[i].freepools));
3029#endif
3030                continue;
3031            }
3032            ++numpools[sz];
3033            numblocks[sz] += p->ref.count;
3034            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3035            numfreeblocks[sz] += freeblocks;
3036#ifdef Py_DEBUG
3037            if (freeblocks > 0)
3038                assert(pool_is_in_list(p, usedpools[sz + sz]));
3039#endif
3040        }
3041    }
3042    assert(narenas == narenas_currently_allocated);
3043
3044    fputc('\n', out);
3045    fputs("class   size   num pools   blocks in use  avail blocks\n"
3046          "-----   ----   ---------   -------------  ------------\n",
3047          out);
3048
3049    for (i = 0; i < numclasses; ++i) {
3050        size_t p = numpools[i];
3051        size_t b = numblocks[i];
3052        size_t f = numfreeblocks[i];
3053        uint size = INDEX2SIZE(i);
3054        if (p == 0) {
3055            assert(b == 0 && f == 0);
3056            continue;
3057        }
3058        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3059                i, size, p, b, f);
3060        allocated_bytes += b * size;
3061        available_bytes += f * size;
3062        pool_header_bytes += p * POOL_OVERHEAD;
3063        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3064    }
3065    fputc('\n', out);
3066#ifdef PYMEM_DEBUG_SERIALNO
3067    if (_PyMem_DebugEnabled()) {
3068        (void)printone(out, "# times object malloc called", serialno);
3069    }
3070#endif
3071    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3072    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3073    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3074    (void)printone(out, "# arenas allocated current", narenas);
3075
3076    PyOS_snprintf(buf, sizeof(buf),
3077                  "%zu arenas * %d bytes/arena",
3078                  narenas, ARENA_SIZE);
3079    (void)printone(out, buf, narenas * ARENA_SIZE);
3080
3081    fputc('\n', out);
3082
3083    /* Account for what all of those arena bytes are being used for. */
3084    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3085    total += printone(out, "# bytes in available blocks", available_bytes);
3086
3087    PyOS_snprintf(buf, sizeof(buf),
3088        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3089    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3090
3091    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3092    total += printone(out, "# bytes lost to quantization", quantization);
3093    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3094    (void)printone(out, "Total", total);
3095    assert(narenas * ARENA_SIZE == total);
3096
3097#if WITH_PYMALLOC_RADIX_TREE
3098    fputs("\narena map counts\n", out);
3099#ifdef USE_INTERIOR_NODES
3100    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3101    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3102    fputc('\n', out);
3103#endif
3104    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3105#ifdef USE_INTERIOR_NODES
3106    total += printone(out, "# bytes lost to arena map mid",
3107                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3108    total += printone(out, "# bytes lost to arena map bot",
3109                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3110    (void)printone(out, "Total", total);
3111#endif
3112#endif
3113
3114    return 1;
3115}
3116
3117#endif /* #ifdef WITH_PYMALLOC */
3118