1#include "Python.h" 2#include "pycore_pymem.h" // _PyTraceMalloc_Config 3#include "pycore_code.h" // stats 4 5#include <stdbool.h> 6#include <stdlib.h> // malloc() 7 8 9/* Defined in tracemalloc.c */ 10extern void _PyMem_DumpTraceback(int fd, const void *ptr); 11 12 13/* Python's malloc wrappers (see pymem.h) */ 14 15#undef uint 16#define uint unsigned int /* assuming >= 16 bits */ 17 18/* Forward declaration */ 19static void* _PyMem_DebugRawMalloc(void *ctx, size_t size); 20static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); 21static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); 22static void _PyMem_DebugRawFree(void *ctx, void *ptr); 23 24static void* _PyMem_DebugMalloc(void *ctx, size_t size); 25static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); 26static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); 27static void _PyMem_DebugFree(void *ctx, void *p); 28 29static void _PyObject_DebugDumpAddress(const void *p); 30static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p); 31 32static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain); 33 34#if defined(__has_feature) /* Clang */ 35# if __has_feature(address_sanitizer) /* is ASAN enabled? */ 36# define _Py_NO_SANITIZE_ADDRESS \ 37 __attribute__((no_sanitize("address"))) 38# endif 39# if __has_feature(thread_sanitizer) /* is TSAN enabled? */ 40# define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 41# endif 42# if __has_feature(memory_sanitizer) /* is MSAN enabled? */ 43# define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) 44# endif 45#elif defined(__GNUC__) 46# if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */ 47# define _Py_NO_SANITIZE_ADDRESS \ 48 __attribute__((no_sanitize_address)) 49# endif 50 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro 51 // is provided only since GCC 7. 52# if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 53# define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) 54# endif 55#endif 56 57#ifndef _Py_NO_SANITIZE_ADDRESS 58# define _Py_NO_SANITIZE_ADDRESS 59#endif 60#ifndef _Py_NO_SANITIZE_THREAD 61# define _Py_NO_SANITIZE_THREAD 62#endif 63#ifndef _Py_NO_SANITIZE_MEMORY 64# define _Py_NO_SANITIZE_MEMORY 65#endif 66 67#ifdef WITH_PYMALLOC 68 69#ifdef MS_WINDOWS 70# include <windows.h> 71#elif defined(HAVE_MMAP) 72# include <sys/mman.h> 73# ifdef MAP_ANONYMOUS 74# define ARENAS_USE_MMAP 75# endif 76#endif 77 78/* Forward declaration */ 79static void* _PyObject_Malloc(void *ctx, size_t size); 80static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); 81static void _PyObject_Free(void *ctx, void *p); 82static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); 83#endif 84 85 86/* bpo-35053: Declare tracemalloc configuration here rather than 87 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic 88 library, whereas _Py_NewReference() requires it. */ 89struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT; 90 91 92static void * 93_PyMem_RawMalloc(void *ctx, size_t size) 94{ 95 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL 96 for malloc(0), which would be treated as an error. Some platforms would 97 return a pointer with no memory behind it, which would break pymalloc. 98 To solve these problems, allocate an extra byte. */ 99 if (size == 0) 100 size = 1; 101 return malloc(size); 102} 103 104static void * 105_PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) 106{ 107 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL 108 for calloc(0, 0), which would be treated as an error. Some platforms 109 would return a pointer with no memory behind it, which would break 110 pymalloc. To solve these problems, allocate an extra byte. */ 111 if (nelem == 0 || elsize == 0) { 112 nelem = 1; 113 elsize = 1; 114 } 115 return calloc(nelem, elsize); 116} 117 118static void * 119_PyMem_RawRealloc(void *ctx, void *ptr, size_t size) 120{ 121 if (size == 0) 122 size = 1; 123 return realloc(ptr, size); 124} 125 126static void 127_PyMem_RawFree(void *ctx, void *ptr) 128{ 129 free(ptr); 130} 131 132 133#ifdef MS_WINDOWS 134static void * 135_PyObject_ArenaVirtualAlloc(void *ctx, size_t size) 136{ 137 return VirtualAlloc(NULL, size, 138 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); 139} 140 141static void 142_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) 143{ 144 VirtualFree(ptr, 0, MEM_RELEASE); 145} 146 147#elif defined(ARENAS_USE_MMAP) 148static void * 149_PyObject_ArenaMmap(void *ctx, size_t size) 150{ 151 void *ptr; 152 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, 153 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 154 if (ptr == MAP_FAILED) 155 return NULL; 156 assert(ptr != NULL); 157 return ptr; 158} 159 160static void 161_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) 162{ 163 munmap(ptr, size); 164} 165 166#else 167static void * 168_PyObject_ArenaMalloc(void *ctx, size_t size) 169{ 170 return malloc(size); 171} 172 173static void 174_PyObject_ArenaFree(void *ctx, void *ptr, size_t size) 175{ 176 free(ptr); 177} 178#endif 179 180#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree} 181#ifdef WITH_PYMALLOC 182# define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free} 183#endif 184 185#define PYRAW_ALLOC MALLOC_ALLOC 186#ifdef WITH_PYMALLOC 187# define PYOBJ_ALLOC PYMALLOC_ALLOC 188#else 189# define PYOBJ_ALLOC MALLOC_ALLOC 190#endif 191#define PYMEM_ALLOC PYOBJ_ALLOC 192 193typedef struct { 194 /* We tag each block with an API ID in order to tag API violations */ 195 char api_id; 196 PyMemAllocatorEx alloc; 197} debug_alloc_api_t; 198static struct { 199 debug_alloc_api_t raw; 200 debug_alloc_api_t mem; 201 debug_alloc_api_t obj; 202} _PyMem_Debug = { 203 {'r', PYRAW_ALLOC}, 204 {'m', PYMEM_ALLOC}, 205 {'o', PYOBJ_ALLOC} 206 }; 207 208#define PYDBGRAW_ALLOC \ 209 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree} 210#define PYDBGMEM_ALLOC \ 211 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 212#define PYDBGOBJ_ALLOC \ 213 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} 214 215#ifdef Py_DEBUG 216static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC; 217static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC; 218static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC; 219#else 220static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC; 221static PyMemAllocatorEx _PyMem = PYMEM_ALLOC; 222static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC; 223#endif 224 225 226static int 227pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug, 228 PyMemAllocatorEx *old_alloc) 229{ 230 if (old_alloc != NULL) { 231 PyMem_GetAllocator(domain, old_alloc); 232 } 233 234 235 PyMemAllocatorEx new_alloc; 236 switch(domain) 237 { 238 case PYMEM_DOMAIN_RAW: 239 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC; 240 break; 241 case PYMEM_DOMAIN_MEM: 242 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC; 243 break; 244 case PYMEM_DOMAIN_OBJ: 245 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC; 246 break; 247 default: 248 /* unknown domain */ 249 return -1; 250 } 251 PyMem_SetAllocator(domain, &new_alloc); 252 if (debug) { 253 _PyMem_SetupDebugHooksDomain(domain); 254 } 255 return 0; 256} 257 258 259int 260_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain, 261 PyMemAllocatorEx *old_alloc) 262{ 263#ifdef Py_DEBUG 264 const int debug = 1; 265#else 266 const int debug = 0; 267#endif 268 return pymem_set_default_allocator(domain, debug, old_alloc); 269} 270 271 272int 273_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator) 274{ 275 if (name == NULL || *name == '\0') { 276 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line 277 nameions): use default memory allocators */ 278 *allocator = PYMEM_ALLOCATOR_DEFAULT; 279 } 280 else if (strcmp(name, "default") == 0) { 281 *allocator = PYMEM_ALLOCATOR_DEFAULT; 282 } 283 else if (strcmp(name, "debug") == 0) { 284 *allocator = PYMEM_ALLOCATOR_DEBUG; 285 } 286#ifdef WITH_PYMALLOC 287 else if (strcmp(name, "pymalloc") == 0) { 288 *allocator = PYMEM_ALLOCATOR_PYMALLOC; 289 } 290 else if (strcmp(name, "pymalloc_debug") == 0) { 291 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG; 292 } 293#endif 294 else if (strcmp(name, "malloc") == 0) { 295 *allocator = PYMEM_ALLOCATOR_MALLOC; 296 } 297 else if (strcmp(name, "malloc_debug") == 0) { 298 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG; 299 } 300 else { 301 /* unknown allocator */ 302 return -1; 303 } 304 return 0; 305} 306 307 308int 309_PyMem_SetupAllocators(PyMemAllocatorName allocator) 310{ 311 switch (allocator) { 312 case PYMEM_ALLOCATOR_NOT_SET: 313 /* do nothing */ 314 break; 315 316 case PYMEM_ALLOCATOR_DEFAULT: 317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL); 318 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL); 319 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL); 320 break; 321 322 case PYMEM_ALLOCATOR_DEBUG: 323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL); 324 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL); 325 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL); 326 break; 327 328#ifdef WITH_PYMALLOC 329 case PYMEM_ALLOCATOR_PYMALLOC: 330 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG: 331 { 332 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 333 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 334 335 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 336 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc); 337 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc); 338 339 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) { 340 PyMem_SetupDebugHooks(); 341 } 342 break; 343 } 344#endif 345 346 case PYMEM_ALLOCATOR_MALLOC: 347 case PYMEM_ALLOCATOR_MALLOC_DEBUG: 348 { 349 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 350 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc); 351 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc); 352 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc); 353 354 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) { 355 PyMem_SetupDebugHooks(); 356 } 357 break; 358 } 359 360 default: 361 /* unknown allocator */ 362 return -1; 363 } 364 return 0; 365} 366 367 368static int 369pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b) 370{ 371 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0); 372} 373 374 375const char* 376_PyMem_GetCurrentAllocatorName(void) 377{ 378 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC; 379#ifdef WITH_PYMALLOC 380 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC; 381#endif 382 383 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 384 pymemallocator_eq(&_PyMem, &malloc_alloc) && 385 pymemallocator_eq(&_PyObject, &malloc_alloc)) 386 { 387 return "malloc"; 388 } 389#ifdef WITH_PYMALLOC 390 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) && 391 pymemallocator_eq(&_PyMem, &pymalloc) && 392 pymemallocator_eq(&_PyObject, &pymalloc)) 393 { 394 return "pymalloc"; 395 } 396#endif 397 398 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC; 399 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC; 400 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC; 401 402 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) && 403 pymemallocator_eq(&_PyMem, &dbg_mem) && 404 pymemallocator_eq(&_PyObject, &dbg_obj)) 405 { 406 /* Debug hooks installed */ 407 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 408 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) && 409 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc)) 410 { 411 return "malloc_debug"; 412 } 413#ifdef WITH_PYMALLOC 414 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) && 415 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) && 416 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc)) 417 { 418 return "pymalloc_debug"; 419 } 420#endif 421 } 422 return NULL; 423} 424 425 426#undef MALLOC_ALLOC 427#undef PYMALLOC_ALLOC 428#undef PYRAW_ALLOC 429#undef PYMEM_ALLOC 430#undef PYOBJ_ALLOC 431#undef PYDBGRAW_ALLOC 432#undef PYDBGMEM_ALLOC 433#undef PYDBGOBJ_ALLOC 434 435 436static PyObjectArenaAllocator _PyObject_Arena = {NULL, 437#ifdef MS_WINDOWS 438 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree 439#elif defined(ARENAS_USE_MMAP) 440 _PyObject_ArenaMmap, _PyObject_ArenaMunmap 441#else 442 _PyObject_ArenaMalloc, _PyObject_ArenaFree 443#endif 444 }; 445 446#ifdef WITH_PYMALLOC 447static int 448_PyMem_DebugEnabled(void) 449{ 450 return (_PyObject.malloc == _PyMem_DebugMalloc); 451} 452 453static int 454_PyMem_PymallocEnabled(void) 455{ 456 if (_PyMem_DebugEnabled()) { 457 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc); 458 } 459 else { 460 return (_PyObject.malloc == _PyObject_Malloc); 461 } 462} 463#endif 464 465 466static void 467_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain) 468{ 469 PyMemAllocatorEx alloc; 470 471 if (domain == PYMEM_DOMAIN_RAW) { 472 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) { 473 return; 474 } 475 476 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc); 477 alloc.ctx = &_PyMem_Debug.raw; 478 alloc.malloc = _PyMem_DebugRawMalloc; 479 alloc.calloc = _PyMem_DebugRawCalloc; 480 alloc.realloc = _PyMem_DebugRawRealloc; 481 alloc.free = _PyMem_DebugRawFree; 482 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); 483 } 484 else if (domain == PYMEM_DOMAIN_MEM) { 485 if (_PyMem.malloc == _PyMem_DebugMalloc) { 486 return; 487 } 488 489 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc); 490 alloc.ctx = &_PyMem_Debug.mem; 491 alloc.malloc = _PyMem_DebugMalloc; 492 alloc.calloc = _PyMem_DebugCalloc; 493 alloc.realloc = _PyMem_DebugRealloc; 494 alloc.free = _PyMem_DebugFree; 495 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); 496 } 497 else if (domain == PYMEM_DOMAIN_OBJ) { 498 if (_PyObject.malloc == _PyMem_DebugMalloc) { 499 return; 500 } 501 502 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc); 503 alloc.ctx = &_PyMem_Debug.obj; 504 alloc.malloc = _PyMem_DebugMalloc; 505 alloc.calloc = _PyMem_DebugCalloc; 506 alloc.realloc = _PyMem_DebugRealloc; 507 alloc.free = _PyMem_DebugFree; 508 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); 509 } 510} 511 512 513void 514PyMem_SetupDebugHooks(void) 515{ 516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW); 517 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM); 518 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ); 519} 520 521void 522PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 523{ 524 switch(domain) 525 { 526 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; 527 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; 528 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; 529 default: 530 /* unknown domain: set all attributes to NULL */ 531 allocator->ctx = NULL; 532 allocator->malloc = NULL; 533 allocator->calloc = NULL; 534 allocator->realloc = NULL; 535 allocator->free = NULL; 536 } 537} 538 539void 540PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 541{ 542 switch(domain) 543 { 544 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; 545 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; 546 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; 547 /* ignore unknown domain */ 548 } 549} 550 551void 552PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) 553{ 554 *allocator = _PyObject_Arena; 555} 556 557void * 558_PyObject_VirtualAlloc(size_t size) 559{ 560 return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size); 561} 562 563void 564_PyObject_VirtualFree(void *obj, size_t size) 565{ 566 _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size); 567} 568 569void 570PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) 571{ 572 _PyObject_Arena = *allocator; 573} 574 575void * 576PyMem_RawMalloc(size_t size) 577{ 578 /* 579 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. 580 * Most python internals blindly use a signed Py_ssize_t to track 581 * things without checking for overflows or negatives. 582 * As size_t is unsigned, checking for size < 0 is not required. 583 */ 584 if (size > (size_t)PY_SSIZE_T_MAX) 585 return NULL; 586 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); 587} 588 589void * 590PyMem_RawCalloc(size_t nelem, size_t elsize) 591{ 592 /* see PyMem_RawMalloc() */ 593 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 594 return NULL; 595 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); 596} 597 598void* 599PyMem_RawRealloc(void *ptr, size_t new_size) 600{ 601 /* see PyMem_RawMalloc() */ 602 if (new_size > (size_t)PY_SSIZE_T_MAX) 603 return NULL; 604 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); 605} 606 607void PyMem_RawFree(void *ptr) 608{ 609 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); 610} 611 612 613void * 614PyMem_Malloc(size_t size) 615{ 616 /* see PyMem_RawMalloc() */ 617 if (size > (size_t)PY_SSIZE_T_MAX) 618 return NULL; 619 OBJECT_STAT_INC_COND(allocations512, size < 512); 620 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094); 621 OBJECT_STAT_INC_COND(allocations_big, size >= 4094); 622 OBJECT_STAT_INC(allocations); 623 return _PyMem.malloc(_PyMem.ctx, size); 624} 625 626void * 627PyMem_Calloc(size_t nelem, size_t elsize) 628{ 629 /* see PyMem_RawMalloc() */ 630 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 631 return NULL; 632 OBJECT_STAT_INC_COND(allocations512, elsize < 512); 633 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094); 634 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094); 635 OBJECT_STAT_INC(allocations); 636 return _PyMem.calloc(_PyMem.ctx, nelem, elsize); 637} 638 639void * 640PyMem_Realloc(void *ptr, size_t new_size) 641{ 642 /* see PyMem_RawMalloc() */ 643 if (new_size > (size_t)PY_SSIZE_T_MAX) 644 return NULL; 645 return _PyMem.realloc(_PyMem.ctx, ptr, new_size); 646} 647 648void 649PyMem_Free(void *ptr) 650{ 651 OBJECT_STAT_INC(frees); 652 _PyMem.free(_PyMem.ctx, ptr); 653} 654 655 656wchar_t* 657_PyMem_RawWcsdup(const wchar_t *str) 658{ 659 assert(str != NULL); 660 661 size_t len = wcslen(str); 662 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { 663 return NULL; 664 } 665 666 size_t size = (len + 1) * sizeof(wchar_t); 667 wchar_t *str2 = PyMem_RawMalloc(size); 668 if (str2 == NULL) { 669 return NULL; 670 } 671 672 memcpy(str2, str, size); 673 return str2; 674} 675 676char * 677_PyMem_RawStrdup(const char *str) 678{ 679 assert(str != NULL); 680 size_t size = strlen(str) + 1; 681 char *copy = PyMem_RawMalloc(size); 682 if (copy == NULL) { 683 return NULL; 684 } 685 memcpy(copy, str, size); 686 return copy; 687} 688 689char * 690_PyMem_Strdup(const char *str) 691{ 692 assert(str != NULL); 693 size_t size = strlen(str) + 1; 694 char *copy = PyMem_Malloc(size); 695 if (copy == NULL) { 696 return NULL; 697 } 698 memcpy(copy, str, size); 699 return copy; 700} 701 702void * 703PyObject_Malloc(size_t size) 704{ 705 /* see PyMem_RawMalloc() */ 706 if (size > (size_t)PY_SSIZE_T_MAX) 707 return NULL; 708 OBJECT_STAT_INC_COND(allocations512, size < 512); 709 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094); 710 OBJECT_STAT_INC_COND(allocations_big, size >= 4094); 711 OBJECT_STAT_INC(allocations); 712 return _PyObject.malloc(_PyObject.ctx, size); 713} 714 715void * 716PyObject_Calloc(size_t nelem, size_t elsize) 717{ 718 /* see PyMem_RawMalloc() */ 719 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 720 return NULL; 721 OBJECT_STAT_INC_COND(allocations512, elsize < 512); 722 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094); 723 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094); 724 OBJECT_STAT_INC(allocations); 725 return _PyObject.calloc(_PyObject.ctx, nelem, elsize); 726} 727 728void * 729PyObject_Realloc(void *ptr, size_t new_size) 730{ 731 /* see PyMem_RawMalloc() */ 732 if (new_size > (size_t)PY_SSIZE_T_MAX) 733 return NULL; 734 return _PyObject.realloc(_PyObject.ctx, ptr, new_size); 735} 736 737void 738PyObject_Free(void *ptr) 739{ 740 OBJECT_STAT_INC(frees); 741 _PyObject.free(_PyObject.ctx, ptr); 742} 743 744 745/* If we're using GCC, use __builtin_expect() to reduce overhead of 746 the valgrind checks */ 747#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) 748# define UNLIKELY(value) __builtin_expect((value), 0) 749# define LIKELY(value) __builtin_expect((value), 1) 750#else 751# define UNLIKELY(value) (value) 752# define LIKELY(value) (value) 753#endif 754 755#ifdef WITH_PYMALLOC 756 757#ifdef WITH_VALGRIND 758#include <valgrind/valgrind.h> 759 760/* -1 indicates that we haven't checked that we're running on valgrind yet. */ 761static int running_on_valgrind = -1; 762#endif 763 764 765/* An object allocator for Python. 766 767 Here is an introduction to the layers of the Python memory architecture, 768 showing where the object allocator is actually used (layer +2), It is 769 called for every object allocation and deallocation (PyObject_New/Del), 770 unless the object-specific allocators implement a proprietary allocation 771 scheme (ex.: ints use a simple free list). This is also the place where 772 the cyclic garbage collector operates selectively on container objects. 773 774 775 Object-specific allocators 776 _____ ______ ______ ________ 777 [ int ] [ dict ] [ list ] ... [ string ] Python core | 778+3 | <----- Object-specific memory -----> | <-- Non-object memory --> | 779 _______________________________ | | 780 [ Python's object allocator ] | | 781+2 | ####### Object memory ####### | <------ Internal buffers ------> | 782 ______________________________________________________________ | 783 [ Python's raw memory allocator (PyMem_ API) ] | 784+1 | <----- Python memory (under PyMem manager's control) ------> | | 785 __________________________________________________________________ 786 [ Underlying general-purpose allocator (ex: C library malloc) ] 787 0 | <------ Virtual memory allocated for the python process -------> | 788 789 ========================================================================= 790 _______________________________________________________________________ 791 [ OS-specific Virtual Memory Manager (VMM) ] 792-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | 793 __________________________________ __________________________________ 794 [ ] [ ] 795-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | 796 797*/ 798/*==========================================================================*/ 799 800/* A fast, special-purpose memory allocator for small blocks, to be used 801 on top of a general-purpose malloc -- heavily based on previous art. */ 802 803/* Vladimir Marangozov -- August 2000 */ 804 805/* 806 * "Memory management is where the rubber meets the road -- if we do the wrong 807 * thing at any level, the results will not be good. And if we don't make the 808 * levels work well together, we are in serious trouble." (1) 809 * 810 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, 811 * "Dynamic Storage Allocation: A Survey and Critical Review", 812 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. 813 */ 814 815/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ 816 817/*==========================================================================*/ 818 819/* 820 * Allocation strategy abstract: 821 * 822 * For small requests, the allocator sub-allocates <Big> blocks of memory. 823 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the 824 * system's allocator. 825 * 826 * Small requests are grouped in size classes spaced 8 bytes apart, due 827 * to the required valid alignment of the returned address. Requests of 828 * a particular size are serviced from memory pools of 4K (one VMM page). 829 * Pools are fragmented on demand and contain free lists of blocks of one 830 * particular size class. In other words, there is a fixed-size allocator 831 * for each size class. Free pools are shared by the different allocators 832 * thus minimizing the space reserved for a particular size class. 833 * 834 * This allocation strategy is a variant of what is known as "simple 835 * segregated storage based on array of free lists". The main drawback of 836 * simple segregated storage is that we might end up with lot of reserved 837 * memory for the different free lists, which degenerate in time. To avoid 838 * this, we partition each free list in pools and we share dynamically the 839 * reserved space between all free lists. This technique is quite efficient 840 * for memory intensive programs which allocate mainly small-sized blocks. 841 * 842 * For small requests we have the following table: 843 * 844 * Request in bytes Size of allocated block Size class idx 845 * ---------------------------------------------------------------- 846 * 1-8 8 0 847 * 9-16 16 1 848 * 17-24 24 2 849 * 25-32 32 3 850 * 33-40 40 4 851 * 41-48 48 5 852 * 49-56 56 6 853 * 57-64 64 7 854 * 65-72 72 8 855 * ... ... ... 856 * 497-504 504 62 857 * 505-512 512 63 858 * 859 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying 860 * allocator. 861 */ 862 863/*==========================================================================*/ 864 865/* 866 * -- Main tunable settings section -- 867 */ 868 869/* 870 * Alignment of addresses returned to the user. 8-bytes alignment works 871 * on most current architectures (with 32-bit or 64-bit address buses). 872 * The alignment value is also used for grouping small requests in size 873 * classes spaced ALIGNMENT bytes apart. 874 * 875 * You shouldn't change this unless you know what you are doing. 876 */ 877 878#if SIZEOF_VOID_P > 4 879#define ALIGNMENT 16 /* must be 2^N */ 880#define ALIGNMENT_SHIFT 4 881#else 882#define ALIGNMENT 8 /* must be 2^N */ 883#define ALIGNMENT_SHIFT 3 884#endif 885 886/* Return the number of bytes in size class I, as a uint. */ 887#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) 888 889/* 890 * Max size threshold below which malloc requests are considered to be 891 * small enough in order to use preallocated memory pools. You can tune 892 * this value according to your application behaviour and memory needs. 893 * 894 * Note: a size threshold of 512 guarantees that newly created dictionaries 895 * will be allocated from preallocated memory pools on 64-bit. 896 * 897 * The following invariants must hold: 898 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 899 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT 900 * 901 * Although not required, for better performance and space efficiency, 902 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. 903 */ 904#define SMALL_REQUEST_THRESHOLD 512 905#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) 906 907/* 908 * The system's VMM page size can be obtained on most unices with a 909 * getpagesize() call or deduced from various header files. To make 910 * things simpler, we assume that it is 4K, which is OK for most systems. 911 * It is probably better if this is the native page size, but it doesn't 912 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page 913 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation 914 * violation fault. 4K is apparently OK for all the platforms that python 915 * currently targets. 916 */ 917#define SYSTEM_PAGE_SIZE (4 * 1024) 918 919/* 920 * Maximum amount of memory managed by the allocator for small requests. 921 */ 922#ifdef WITH_MEMORY_LIMITS 923#ifndef SMALL_MEMORY_LIMIT 924#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ 925#endif 926#endif 927 928#if !defined(WITH_PYMALLOC_RADIX_TREE) 929/* Use radix-tree to track arena memory regions, for address_in_range(). 930 * Enable by default since it allows larger pool sizes. Can be disabled 931 * using -DWITH_PYMALLOC_RADIX_TREE=0 */ 932#define WITH_PYMALLOC_RADIX_TREE 1 933#endif 934 935#if SIZEOF_VOID_P > 4 936/* on 64-bit platforms use larger pools and arenas if we can */ 937#define USE_LARGE_ARENAS 938#if WITH_PYMALLOC_RADIX_TREE 939/* large pools only supported if radix-tree is enabled */ 940#define USE_LARGE_POOLS 941#endif 942#endif 943 944/* 945 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned 946 * on a page boundary. This is a reserved virtual address space for the 947 * current process (obtained through a malloc()/mmap() call). In no way this 948 * means that the memory arenas will be used entirely. A malloc(<Big>) is 949 * usually an address range reservation for <Big> bytes, unless all pages within 950 * this space are referenced subsequently. So malloc'ing big blocks and not 951 * using them does not mean "wasting memory". It's an addressable range 952 * wastage... 953 * 954 * Arenas are allocated with mmap() on systems supporting anonymous memory 955 * mappings to reduce heap fragmentation. 956 */ 957#ifdef USE_LARGE_ARENAS 958#define ARENA_BITS 20 /* 1 MiB */ 959#else 960#define ARENA_BITS 18 /* 256 KiB */ 961#endif 962#define ARENA_SIZE (1 << ARENA_BITS) 963#define ARENA_SIZE_MASK (ARENA_SIZE - 1) 964 965#ifdef WITH_MEMORY_LIMITS 966#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) 967#endif 968 969/* 970 * Size of the pools used for small blocks. Must be a power of 2. 971 */ 972#ifdef USE_LARGE_POOLS 973#define POOL_BITS 14 /* 16 KiB */ 974#else 975#define POOL_BITS 12 /* 4 KiB */ 976#endif 977#define POOL_SIZE (1 << POOL_BITS) 978#define POOL_SIZE_MASK (POOL_SIZE - 1) 979 980#if !WITH_PYMALLOC_RADIX_TREE 981#if POOL_SIZE != SYSTEM_PAGE_SIZE 982# error "pool size must be equal to system page size" 983#endif 984#endif 985 986#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) 987#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE 988# error "arena size not an exact multiple of pool size" 989#endif 990 991/* 992 * -- End of tunable settings section -- 993 */ 994 995/*==========================================================================*/ 996 997/* When you say memory, my mind reasons in terms of (pointers to) blocks */ 998typedef uint8_t block; 999 1000/* Pool for small blocks. */ 1001struct pool_header { 1002 union { block *_padding; 1003 uint count; } ref; /* number of allocated blocks */ 1004 block *freeblock; /* pool's free list head */ 1005 struct pool_header *nextpool; /* next pool of this size class */ 1006 struct pool_header *prevpool; /* previous pool "" */ 1007 uint arenaindex; /* index into arenas of base adr */ 1008 uint szidx; /* block size class index */ 1009 uint nextoffset; /* bytes to virgin block */ 1010 uint maxnextoffset; /* largest valid nextoffset */ 1011}; 1012 1013typedef struct pool_header *poolp; 1014 1015/* Record keeping for arenas. */ 1016struct arena_object { 1017 /* The address of the arena, as returned by malloc. Note that 0 1018 * will never be returned by a successful malloc, and is used 1019 * here to mark an arena_object that doesn't correspond to an 1020 * allocated arena. 1021 */ 1022 uintptr_t address; 1023 1024 /* Pool-aligned pointer to the next pool to be carved off. */ 1025 block* pool_address; 1026 1027 /* The number of available pools in the arena: free pools + never- 1028 * allocated pools. 1029 */ 1030 uint nfreepools; 1031 1032 /* The total number of pools in the arena, whether or not available. */ 1033 uint ntotalpools; 1034 1035 /* Singly-linked list of available pools. */ 1036 struct pool_header* freepools; 1037 1038 /* Whenever this arena_object is not associated with an allocated 1039 * arena, the nextarena member is used to link all unassociated 1040 * arena_objects in the singly-linked `unused_arena_objects` list. 1041 * The prevarena member is unused in this case. 1042 * 1043 * When this arena_object is associated with an allocated arena 1044 * with at least one available pool, both members are used in the 1045 * doubly-linked `usable_arenas` list, which is maintained in 1046 * increasing order of `nfreepools` values. 1047 * 1048 * Else this arena_object is associated with an allocated arena 1049 * all of whose pools are in use. `nextarena` and `prevarena` 1050 * are both meaningless in this case. 1051 */ 1052 struct arena_object* nextarena; 1053 struct arena_object* prevarena; 1054}; 1055 1056#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) 1057 1058#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ 1059 1060/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ 1061#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) 1062 1063/* Return total number of blocks in pool of size index I, as a uint. */ 1064#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) 1065 1066/*==========================================================================*/ 1067 1068/* 1069 * Pool table -- headed, circular, doubly-linked lists of partially used pools. 1070 1071This is involved. For an index i, usedpools[i+i] is the header for a list of 1072all partially used pools holding small blocks with "size class idx" i. So 1073usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size 107416, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. 1075 1076Pools are carved off an arena's highwater mark (an arena_object's pool_address 1077member) as needed. Once carved off, a pool is in one of three states forever 1078after: 1079 1080used == partially used, neither empty nor full 1081 At least one block in the pool is currently allocated, and at least one 1082 block in the pool is not currently allocated (note this implies a pool 1083 has room for at least two blocks). 1084 This is a pool's initial state, as a pool is created only when malloc 1085 needs space. 1086 The pool holds blocks of a fixed size, and is in the circular list headed 1087 at usedpools[i] (see above). It's linked to the other used pools of the 1088 same size class via the pool_header's nextpool and prevpool members. 1089 If all but one block is currently allocated, a malloc can cause a 1090 transition to the full state. If all but one block is not currently 1091 allocated, a free can cause a transition to the empty state. 1092 1093full == all the pool's blocks are currently allocated 1094 On transition to full, a pool is unlinked from its usedpools[] list. 1095 It's not linked to from anything then anymore, and its nextpool and 1096 prevpool members are meaningless until it transitions back to used. 1097 A free of a block in a full pool puts the pool back in the used state. 1098 Then it's linked in at the front of the appropriate usedpools[] list, so 1099 that the next allocation for its size class will reuse the freed block. 1100 1101empty == all the pool's blocks are currently available for allocation 1102 On transition to empty, a pool is unlinked from its usedpools[] list, 1103 and linked to the front of its arena_object's singly-linked freepools list, 1104 via its nextpool member. The prevpool member has no meaning in this case. 1105 Empty pools have no inherent size class: the next time a malloc finds 1106 an empty list in usedpools[], it takes the first pool off of freepools. 1107 If the size class needed happens to be the same as the size class the pool 1108 last had, some pool initialization can be skipped. 1109 1110 1111Block Management 1112 1113Blocks within pools are again carved out as needed. pool->freeblock points to 1114the start of a singly-linked list of free blocks within the pool. When a 1115block is freed, it's inserted at the front of its pool's freeblock list. Note 1116that the available blocks in a pool are *not* linked all together when a pool 1117is initialized. Instead only "the first two" (lowest addresses) blocks are 1118set up, returning the first such block, and setting pool->freeblock to a 1119one-block list holding the second such block. This is consistent with that 1120pymalloc strives at all levels (arena, pool, and block) never to touch a piece 1121of memory until it's actually needed. 1122 1123So long as a pool is in the used state, we're certain there *is* a block 1124available for allocating, and pool->freeblock is not NULL. If pool->freeblock 1125points to the end of the free list before we've carved the entire pool into 1126blocks, that means we simply haven't yet gotten to one of the higher-address 1127blocks. The offset from the pool_header to the start of "the next" virgin 1128block is stored in the pool_header nextoffset member, and the largest value 1129of nextoffset that makes sense is stored in the maxnextoffset member when a 1130pool is initialized. All the blocks in a pool have been passed out at least 1131once when and only when nextoffset > maxnextoffset. 1132 1133 1134Major obscurity: While the usedpools vector is declared to have poolp 1135entries, it doesn't really. It really contains two pointers per (conceptual) 1136poolp entry, the nextpool and prevpool members of a pool_header. The 1137excruciating initialization code below fools C so that 1138 1139 usedpool[i+i] 1140 1141"acts like" a genuine poolp, but only so long as you only reference its 1142nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is 1143compensating for that a pool_header's nextpool and prevpool members 1144immediately follow a pool_header's first two members: 1145 1146 union { block *_padding; 1147 uint count; } ref; 1148 block *freeblock; 1149 1150each of which consume sizeof(block *) bytes. So what usedpools[i+i] really 1151contains is a fudged-up pointer p such that *if* C believes it's a poolp 1152pointer, then p->nextpool and p->prevpool are both p (meaning that the headed 1153circular list is empty). 1154 1155It's unclear why the usedpools setup is so convoluted. It could be to 1156minimize the amount of cache required to hold this heavily-referenced table 1157(which only *needs* the two interpool pointer members of a pool_header). OTOH, 1158referencing code has to remember to "double the index" and doing so isn't 1159free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying 1160on that C doesn't insert any padding anywhere in a pool_header at or before 1161the prevpool member. 1162**************************************************************************** */ 1163 1164#define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) 1165#define PT(x) PTA(x), PTA(x) 1166 1167static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { 1168 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) 1169#if NB_SMALL_SIZE_CLASSES > 8 1170 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) 1171#if NB_SMALL_SIZE_CLASSES > 16 1172 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) 1173#if NB_SMALL_SIZE_CLASSES > 24 1174 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) 1175#if NB_SMALL_SIZE_CLASSES > 32 1176 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) 1177#if NB_SMALL_SIZE_CLASSES > 40 1178 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) 1179#if NB_SMALL_SIZE_CLASSES > 48 1180 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) 1181#if NB_SMALL_SIZE_CLASSES > 56 1182 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) 1183#if NB_SMALL_SIZE_CLASSES > 64 1184#error "NB_SMALL_SIZE_CLASSES should be less than 64" 1185#endif /* NB_SMALL_SIZE_CLASSES > 64 */ 1186#endif /* NB_SMALL_SIZE_CLASSES > 56 */ 1187#endif /* NB_SMALL_SIZE_CLASSES > 48 */ 1188#endif /* NB_SMALL_SIZE_CLASSES > 40 */ 1189#endif /* NB_SMALL_SIZE_CLASSES > 32 */ 1190#endif /* NB_SMALL_SIZE_CLASSES > 24 */ 1191#endif /* NB_SMALL_SIZE_CLASSES > 16 */ 1192#endif /* NB_SMALL_SIZE_CLASSES > 8 */ 1193}; 1194 1195/*========================================================================== 1196Arena management. 1197 1198`arenas` is a vector of arena_objects. It contains maxarenas entries, some of 1199which may not be currently used (== they're arena_objects that aren't 1200currently associated with an allocated arena). Note that arenas proper are 1201separately malloc'ed. 1202 1203Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, 1204we do try to free() arenas, and use some mild heuristic strategies to increase 1205the likelihood that arenas eventually can be freed. 1206 1207unused_arena_objects 1208 1209 This is a singly-linked list of the arena_objects that are currently not 1210 being used (no arena is associated with them). Objects are taken off the 1211 head of the list in new_arena(), and are pushed on the head of the list in 1212 PyObject_Free() when the arena is empty. Key invariant: an arena_object 1213 is on this list if and only if its .address member is 0. 1214 1215usable_arenas 1216 1217 This is a doubly-linked list of the arena_objects associated with arenas 1218 that have pools available. These pools are either waiting to be reused, 1219 or have not been used before. The list is sorted to have the most- 1220 allocated arenas first (ascending order based on the nfreepools member). 1221 This means that the next allocation will come from a heavily used arena, 1222 which gives the nearly empty arenas a chance to be returned to the system. 1223 In my unscientific tests this dramatically improved the number of arenas 1224 that could be freed. 1225 1226Note that an arena_object associated with an arena all of whose pools are 1227currently in use isn't on either list. 1228 1229Changed in Python 3.8: keeping usable_arenas sorted by number of free pools 1230used to be done by one-at-a-time linear search when an arena's number of 1231free pools changed. That could, overall, consume time quadratic in the 1232number of arenas. That didn't really matter when there were only a few 1233hundred arenas (typical!), but could be a timing disaster when there were 1234hundreds of thousands. See bpo-37029. 1235 1236Now we have a vector of "search fingers" to eliminate the need to search: 1237nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas 1238with nfp free pools. This is NULL if and only if there is no arena with 1239nfp free pools in usable_arenas. 1240*/ 1241 1242/* Array of objects used to track chunks of memory (arenas). */ 1243static struct arena_object* arenas = NULL; 1244/* Number of slots currently allocated in the `arenas` vector. */ 1245static uint maxarenas = 0; 1246 1247/* The head of the singly-linked, NULL-terminated list of available 1248 * arena_objects. 1249 */ 1250static struct arena_object* unused_arena_objects = NULL; 1251 1252/* The head of the doubly-linked, NULL-terminated at each end, list of 1253 * arena_objects associated with arenas that have pools available. 1254 */ 1255static struct arena_object* usable_arenas = NULL; 1256 1257/* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */ 1258static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL }; 1259 1260/* How many arena_objects do we initially allocate? 1261 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the 1262 * `arenas` vector. 1263 */ 1264#define INITIAL_ARENA_OBJECTS 16 1265 1266/* Number of arenas allocated that haven't been free()'d. */ 1267static size_t narenas_currently_allocated = 0; 1268 1269/* Total number of times malloc() called to allocate an arena. */ 1270static size_t ntimes_arena_allocated = 0; 1271/* High water mark (max value ever seen) for narenas_currently_allocated. */ 1272static size_t narenas_highwater = 0; 1273 1274static Py_ssize_t raw_allocated_blocks; 1275 1276Py_ssize_t 1277_Py_GetAllocatedBlocks(void) 1278{ 1279 Py_ssize_t n = raw_allocated_blocks; 1280 /* add up allocated blocks for used pools */ 1281 for (uint i = 0; i < maxarenas; ++i) { 1282 /* Skip arenas which are not allocated. */ 1283 if (arenas[i].address == 0) { 1284 continue; 1285 } 1286 1287 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE); 1288 1289 /* visit every pool in the arena */ 1290 assert(base <= (uintptr_t) arenas[i].pool_address); 1291 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) { 1292 poolp p = (poolp)base; 1293 n += p->ref.count; 1294 } 1295 } 1296 return n; 1297} 1298 1299#if WITH_PYMALLOC_RADIX_TREE 1300/*==========================================================================*/ 1301/* radix tree for tracking arena usage. If enabled, used to implement 1302 address_in_range(). 1303 1304 memory address bit allocation for keys 1305 1306 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size: 1307 15 -> MAP_TOP_BITS 1308 15 -> MAP_MID_BITS 1309 14 -> MAP_BOT_BITS 1310 20 -> ideal aligned arena 1311 ---- 1312 64 1313 1314 64-bit pointers, IGNORE_BITS=16, and 2^20 arena size: 1315 16 -> IGNORE_BITS 1316 10 -> MAP_TOP_BITS 1317 10 -> MAP_MID_BITS 1318 8 -> MAP_BOT_BITS 1319 20 -> ideal aligned arena 1320 ---- 1321 64 1322 1323 32-bit pointers and 2^18 arena size: 1324 14 -> MAP_BOT_BITS 1325 18 -> ideal aligned arena 1326 ---- 1327 32 1328 1329*/ 1330 1331#if SIZEOF_VOID_P == 8 1332 1333/* number of bits in a pointer */ 1334#define POINTER_BITS 64 1335 1336/* High bits of memory addresses that will be ignored when indexing into the 1337 * radix tree. Setting this to zero is the safe default. For most 64-bit 1338 * machines, setting this to 16 would be safe. The kernel would not give 1339 * user-space virtual memory addresses that have significant information in 1340 * those high bits. The main advantage to setting IGNORE_BITS > 0 is that less 1341 * virtual memory will be used for the top and middle radix tree arrays. Those 1342 * arrays are allocated in the BSS segment and so will typically consume real 1343 * memory only if actually accessed. 1344 */ 1345#define IGNORE_BITS 0 1346 1347/* use the top and mid layers of the radix tree */ 1348#define USE_INTERIOR_NODES 1349 1350#elif SIZEOF_VOID_P == 4 1351 1352#define POINTER_BITS 32 1353#define IGNORE_BITS 0 1354 1355#else 1356 1357 /* Currently this code works for 64-bit or 32-bit pointers only. */ 1358#error "obmalloc radix tree requires 64-bit or 32-bit pointers." 1359 1360#endif /* SIZEOF_VOID_P */ 1361 1362/* arena_coverage_t members require this to be true */ 1363#if ARENA_BITS >= 32 1364# error "arena size must be < 2^32" 1365#endif 1366 1367/* the lower bits of the address that are not ignored */ 1368#define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS) 1369 1370#ifdef USE_INTERIOR_NODES 1371/* number of bits used for MAP_TOP and MAP_MID nodes */ 1372#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3) 1373#else 1374#define INTERIOR_BITS 0 1375#endif 1376 1377#define MAP_TOP_BITS INTERIOR_BITS 1378#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS) 1379#define MAP_TOP_MASK (MAP_TOP_LENGTH - 1) 1380 1381#define MAP_MID_BITS INTERIOR_BITS 1382#define MAP_MID_LENGTH (1 << MAP_MID_BITS) 1383#define MAP_MID_MASK (MAP_MID_LENGTH - 1) 1384 1385#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS) 1386#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS) 1387#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1) 1388 1389#define MAP_BOT_SHIFT ARENA_BITS 1390#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT) 1391#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT) 1392 1393#define AS_UINT(p) ((uintptr_t)(p)) 1394#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK) 1395#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK) 1396#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK) 1397 1398#if IGNORE_BITS > 0 1399/* Return the ignored part of the pointer address. Those bits should be same 1400 * for all valid pointers if IGNORE_BITS is set correctly. 1401 */ 1402#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS) 1403#else 1404#define HIGH_BITS(p) 0 1405#endif 1406 1407 1408/* This is the leaf of the radix tree. See arena_map_mark_used() for the 1409 * meaning of these members. */ 1410typedef struct { 1411 int32_t tail_hi; 1412 int32_t tail_lo; 1413} arena_coverage_t; 1414 1415typedef struct arena_map_bot { 1416 /* The members tail_hi and tail_lo are accessed together. So, it 1417 * better to have them as an array of structs, rather than two 1418 * arrays. 1419 */ 1420 arena_coverage_t arenas[MAP_BOT_LENGTH]; 1421} arena_map_bot_t; 1422 1423#ifdef USE_INTERIOR_NODES 1424typedef struct arena_map_mid { 1425 struct arena_map_bot *ptrs[MAP_MID_LENGTH]; 1426} arena_map_mid_t; 1427 1428typedef struct arena_map_top { 1429 struct arena_map_mid *ptrs[MAP_TOP_LENGTH]; 1430} arena_map_top_t; 1431#endif 1432 1433/* The root of radix tree. Note that by initializing like this, the memory 1434 * should be in the BSS. The OS will only memory map pages as the MAP_MID 1435 * nodes get used (OS pages are demand loaded as needed). 1436 */ 1437#ifdef USE_INTERIOR_NODES 1438static arena_map_top_t arena_map_root; 1439/* accounting for number of used interior nodes */ 1440static int arena_map_mid_count; 1441static int arena_map_bot_count; 1442#else 1443static arena_map_bot_t arena_map_root; 1444#endif 1445 1446/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or 1447 * it cannot be created */ 1448static inline Py_ALWAYS_INLINE arena_map_bot_t * 1449arena_map_get(block *p, int create) 1450{ 1451#ifdef USE_INTERIOR_NODES 1452 /* sanity check that IGNORE_BITS is correct */ 1453 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); 1454 int i1 = MAP_TOP_INDEX(p); 1455 if (arena_map_root.ptrs[i1] == NULL) { 1456 if (!create) { 1457 return NULL; 1458 } 1459 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t)); 1460 if (n == NULL) { 1461 return NULL; 1462 } 1463 arena_map_root.ptrs[i1] = n; 1464 arena_map_mid_count++; 1465 } 1466 int i2 = MAP_MID_INDEX(p); 1467 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { 1468 if (!create) { 1469 return NULL; 1470 } 1471 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t)); 1472 if (n == NULL) { 1473 return NULL; 1474 } 1475 arena_map_root.ptrs[i1]->ptrs[i2] = n; 1476 arena_map_bot_count++; 1477 } 1478 return arena_map_root.ptrs[i1]->ptrs[i2]; 1479#else 1480 return &arena_map_root; 1481#endif 1482} 1483 1484 1485/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw 1486 * away 24 bits of the address. That reduces the space requirement of 1487 * the tree compared to similar radix tree page-map schemes. In 1488 * exchange for slashing the space requirement, it needs more 1489 * computation to check an address. 1490 * 1491 * Tracking coverage is done by "ideal" arena address. It is easier to 1492 * explain in decimal so let's say that the arena size is 100 bytes. 1493 * Then, ideal addresses are 100, 200, 300, etc. For checking if a 1494 * pointer address is inside an actual arena, we have to check two ideal 1495 * arena addresses. E.g. if pointer is 357, we need to check 200 and 1496 * 300. In the rare case that an arena is aligned in the ideal way 1497 * (e.g. base address of arena is 200) then we only have to check one 1498 * ideal address. 1499 * 1500 * The tree nodes for 200 and 300 both store the address of arena. 1501 * There are two cases: the arena starts at a lower ideal arena and 1502 * extends to this one, or the arena starts in this arena and extends to 1503 * the next ideal arena. The tail_lo and tail_hi members correspond to 1504 * these two cases. 1505 */ 1506 1507 1508/* mark or unmark addresses covered by arena */ 1509static int 1510arena_map_mark_used(uintptr_t arena_base, int is_used) 1511{ 1512 /* sanity check that IGNORE_BITS is correct */ 1513 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); 1514 arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used); 1515 if (n_hi == NULL) { 1516 assert(is_used); /* otherwise node should already exist */ 1517 return 0; /* failed to allocate space for node */ 1518 } 1519 int i3 = MAP_BOT_INDEX((block *)arena_base); 1520 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK); 1521 if (tail == 0) { 1522 /* is ideal arena address */ 1523 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; 1524 } 1525 else { 1526 /* arena_base address is not ideal (aligned to arena size) and 1527 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node 1528 * for the next arena. Note that it might be in different MAP_TOP 1529 * and MAP_MID nodes as well so we need to call arena_map_get() 1530 * again (do the full tree traversal). 1531 */ 1532 n_hi->arenas[i3].tail_hi = is_used ? tail : 0; 1533 uintptr_t arena_base_next = arena_base + ARENA_SIZE; 1534 /* If arena_base is a legit arena address, so is arena_base_next - 1 1535 * (last address in arena). If arena_base_next overflows then it 1536 * must overflow to 0. However, that would mean arena_base was 1537 * "ideal" and we should not be in this case. */ 1538 assert(arena_base < arena_base_next); 1539 arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used); 1540 if (n_lo == NULL) { 1541 assert(is_used); /* otherwise should already exist */ 1542 n_hi->arenas[i3].tail_hi = 0; 1543 return 0; /* failed to allocate space for node */ 1544 } 1545 int i3_next = MAP_BOT_INDEX(arena_base_next); 1546 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; 1547 } 1548 return 1; 1549} 1550 1551/* Return true if 'p' is a pointer inside an obmalloc arena. 1552 * _PyObject_Free() calls this so it needs to be very fast. */ 1553static int 1554arena_map_is_used(block *p) 1555{ 1556 arena_map_bot_t *n = arena_map_get(p, 0); 1557 if (n == NULL) { 1558 return 0; 1559 } 1560 int i3 = MAP_BOT_INDEX(p); 1561 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ 1562 int32_t hi = n->arenas[i3].tail_hi; 1563 int32_t lo = n->arenas[i3].tail_lo; 1564 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK); 1565 return (tail < lo) || (tail >= hi && hi != 0); 1566} 1567 1568/* end of radix tree logic */ 1569/*==========================================================================*/ 1570#endif /* WITH_PYMALLOC_RADIX_TREE */ 1571 1572 1573/* Allocate a new arena. If we run out of memory, return NULL. Else 1574 * allocate a new arena, and return the address of an arena_object 1575 * describing the new arena. It's expected that the caller will set 1576 * `usable_arenas` to the return value. 1577 */ 1578static struct arena_object* 1579new_arena(void) 1580{ 1581 struct arena_object* arenaobj; 1582 uint excess; /* number of bytes above pool alignment */ 1583 void *address; 1584 static int debug_stats = -1; 1585 1586 if (debug_stats == -1) { 1587 const char *opt = Py_GETENV("PYTHONMALLOCSTATS"); 1588 debug_stats = (opt != NULL && *opt != '\0'); 1589 } 1590 if (debug_stats) { 1591 _PyObject_DebugMallocStats(stderr); 1592 } 1593 1594 if (unused_arena_objects == NULL) { 1595 uint i; 1596 uint numarenas; 1597 size_t nbytes; 1598 1599 /* Double the number of arena objects on each allocation. 1600 * Note that it's possible for `numarenas` to overflow. 1601 */ 1602 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; 1603 if (numarenas <= maxarenas) 1604 return NULL; /* overflow */ 1605#if SIZEOF_SIZE_T <= SIZEOF_INT 1606 if (numarenas > SIZE_MAX / sizeof(*arenas)) 1607 return NULL; /* overflow */ 1608#endif 1609 nbytes = numarenas * sizeof(*arenas); 1610 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); 1611 if (arenaobj == NULL) 1612 return NULL; 1613 arenas = arenaobj; 1614 1615 /* We might need to fix pointers that were copied. However, 1616 * new_arena only gets called when all the pages in the 1617 * previous arenas are full. Thus, there are *no* pointers 1618 * into the old array. Thus, we don't have to worry about 1619 * invalid pointers. Just to be sure, some asserts: 1620 */ 1621 assert(usable_arenas == NULL); 1622 assert(unused_arena_objects == NULL); 1623 1624 /* Put the new arenas on the unused_arena_objects list. */ 1625 for (i = maxarenas; i < numarenas; ++i) { 1626 arenas[i].address = 0; /* mark as unassociated */ 1627 arenas[i].nextarena = i < numarenas - 1 ? 1628 &arenas[i+1] : NULL; 1629 } 1630 1631 /* Update globals. */ 1632 unused_arena_objects = &arenas[maxarenas]; 1633 maxarenas = numarenas; 1634 } 1635 1636 /* Take the next available arena object off the head of the list. */ 1637 assert(unused_arena_objects != NULL); 1638 arenaobj = unused_arena_objects; 1639 unused_arena_objects = arenaobj->nextarena; 1640 assert(arenaobj->address == 0); 1641 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); 1642#if WITH_PYMALLOC_RADIX_TREE 1643 if (address != NULL) { 1644 if (!arena_map_mark_used((uintptr_t)address, 1)) { 1645 /* marking arena in radix tree failed, abort */ 1646 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE); 1647 address = NULL; 1648 } 1649 } 1650#endif 1651 if (address == NULL) { 1652 /* The allocation failed: return NULL after putting the 1653 * arenaobj back. 1654 */ 1655 arenaobj->nextarena = unused_arena_objects; 1656 unused_arena_objects = arenaobj; 1657 return NULL; 1658 } 1659 arenaobj->address = (uintptr_t)address; 1660 1661 ++narenas_currently_allocated; 1662 ++ntimes_arena_allocated; 1663 if (narenas_currently_allocated > narenas_highwater) 1664 narenas_highwater = narenas_currently_allocated; 1665 arenaobj->freepools = NULL; 1666 /* pool_address <- first pool-aligned address in the arena 1667 nfreepools <- number of whole pools that fit after alignment */ 1668 arenaobj->pool_address = (block*)arenaobj->address; 1669 arenaobj->nfreepools = MAX_POOLS_IN_ARENA; 1670 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); 1671 if (excess != 0) { 1672 --arenaobj->nfreepools; 1673 arenaobj->pool_address += POOL_SIZE - excess; 1674 } 1675 arenaobj->ntotalpools = arenaobj->nfreepools; 1676 1677 return arenaobj; 1678} 1679 1680 1681 1682#if WITH_PYMALLOC_RADIX_TREE 1683/* Return true if and only if P is an address that was allocated by 1684 pymalloc. When the radix tree is used, 'poolp' is unused. 1685 */ 1686static bool 1687address_in_range(void *p, poolp pool) 1688{ 1689 return arena_map_is_used(p); 1690} 1691#else 1692/* 1693address_in_range(P, POOL) 1694 1695Return true if and only if P is an address that was allocated by pymalloc. 1696POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) 1697(the caller is asked to compute this because the macro expands POOL more than 1698once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a 1699variable and pass the latter to the macro; because address_in_range is 1700called on every alloc/realloc/free, micro-efficiency is important here). 1701 1702Tricky: Let B be the arena base address associated with the pool, B = 1703arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if 1704 1705 B <= P < B + ARENA_SIZE 1706 1707Subtracting B throughout, this is true iff 1708 1709 0 <= P-B < ARENA_SIZE 1710 1711By using unsigned arithmetic, the "0 <=" half of the test can be skipped. 1712 1713Obscure: A PyMem "free memory" function can call the pymalloc free or realloc 1714before the first arena has been allocated. `arenas` is still NULL in that 1715case. We're relying on that maxarenas is also 0 in that case, so that 1716(POOL)->arenaindex < maxarenas must be false, saving us from trying to index 1717into a NULL arenas. 1718 1719Details: given P and POOL, the arena_object corresponding to P is AO = 1720arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild 1721stores, etc), POOL is the correct address of P's pool, AO.address is the 1722correct base address of the pool's arena, and P must be within ARENA_SIZE of 1723AO.address. In addition, AO.address is not 0 (no arena can start at address 0 1724(NULL)). Therefore address_in_range correctly reports that obmalloc 1725controls P. 1726 1727Now suppose obmalloc does not control P (e.g., P was obtained via a direct 1728call to the system malloc() or realloc()). (POOL)->arenaindex may be anything 1729in this case -- it may even be uninitialized trash. If the trash arenaindex 1730is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't 1731control P. 1732 1733Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an 1734allocated arena, obmalloc controls all the memory in slice AO.address : 1735AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, 1736so P doesn't lie in that slice, so the macro correctly reports that P is not 1737controlled by obmalloc. 1738 1739Finally, if P is not controlled by obmalloc and AO corresponds to an unused 1740arena_object (one not currently associated with an allocated arena), 1741AO.address is 0, and the second test in the macro reduces to: 1742 1743 P < ARENA_SIZE 1744 1745If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes 1746that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part 1747of the test still passes, and the third clause (AO.address != 0) is necessary 1748to get the correct result: AO.address is 0 in this case, so the macro 1749correctly reports that P is not controlled by obmalloc (despite that P lies in 1750slice AO.address : AO.address + ARENA_SIZE). 1751 1752Note: The third (AO.address != 0) clause was added in Python 2.5. Before 17532.5, arenas were never free()'ed, and an arenaindex < maxarena always 1754corresponded to a currently-allocated arena, so the "P is not controlled by 1755obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case 1756was impossible. 1757 1758Note that the logic is excruciating, and reading up possibly uninitialized 1759memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) 1760creates problems for some memory debuggers. The overwhelming advantage is 1761that this test determines whether an arbitrary address is controlled by 1762obmalloc in a small constant time, independent of the number of arenas 1763obmalloc controls. Since this test is needed at every entry point, it's 1764extremely desirable that it be this fast. 1765*/ 1766 1767static bool _Py_NO_SANITIZE_ADDRESS 1768 _Py_NO_SANITIZE_THREAD 1769 _Py_NO_SANITIZE_MEMORY 1770address_in_range(void *p, poolp pool) 1771{ 1772 // Since address_in_range may be reading from memory which was not allocated 1773 // by Python, it is important that pool->arenaindex is read only once, as 1774 // another thread may be concurrently modifying the value without holding 1775 // the GIL. The following dance forces the compiler to read pool->arenaindex 1776 // only once. 1777 uint arenaindex = *((volatile uint *)&pool->arenaindex); 1778 return arenaindex < maxarenas && 1779 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && 1780 arenas[arenaindex].address != 0; 1781} 1782 1783#endif /* !WITH_PYMALLOC_RADIX_TREE */ 1784 1785/*==========================================================================*/ 1786 1787// Called when freelist is exhausted. Extend the freelist if there is 1788// space for a block. Otherwise, remove this pool from usedpools. 1789static void 1790pymalloc_pool_extend(poolp pool, uint size) 1791{ 1792 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) { 1793 /* There is room for another block. */ 1794 pool->freeblock = (block*)pool + pool->nextoffset; 1795 pool->nextoffset += INDEX2SIZE(size); 1796 *(block **)(pool->freeblock) = NULL; 1797 return; 1798 } 1799 1800 /* Pool is full, unlink from used pools. */ 1801 poolp next; 1802 next = pool->nextpool; 1803 pool = pool->prevpool; 1804 next->prevpool = pool; 1805 pool->nextpool = next; 1806} 1807 1808/* called when pymalloc_alloc can not allocate a block from usedpool. 1809 * This function takes new pool and allocate a block from it. 1810 */ 1811static void* 1812allocate_from_new_pool(uint size) 1813{ 1814 /* There isn't a pool of the right size class immediately 1815 * available: use a free pool. 1816 */ 1817 if (UNLIKELY(usable_arenas == NULL)) { 1818 /* No arena has a free pool: allocate a new arena. */ 1819#ifdef WITH_MEMORY_LIMITS 1820 if (narenas_currently_allocated >= MAX_ARENAS) { 1821 return NULL; 1822 } 1823#endif 1824 usable_arenas = new_arena(); 1825 if (usable_arenas == NULL) { 1826 return NULL; 1827 } 1828 usable_arenas->nextarena = usable_arenas->prevarena = NULL; 1829 assert(nfp2lasta[usable_arenas->nfreepools] == NULL); 1830 nfp2lasta[usable_arenas->nfreepools] = usable_arenas; 1831 } 1832 assert(usable_arenas->address != 0); 1833 1834 /* This arena already had the smallest nfreepools value, so decreasing 1835 * nfreepools doesn't change that, and we don't need to rearrange the 1836 * usable_arenas list. However, if the arena becomes wholly allocated, 1837 * we need to remove its arena_object from usable_arenas. 1838 */ 1839 assert(usable_arenas->nfreepools > 0); 1840 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) { 1841 /* It's the last of this size, so there won't be any. */ 1842 nfp2lasta[usable_arenas->nfreepools] = NULL; 1843 } 1844 /* If any free pools will remain, it will be the new smallest. */ 1845 if (usable_arenas->nfreepools > 1) { 1846 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL); 1847 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas; 1848 } 1849 1850 /* Try to get a cached free pool. */ 1851 poolp pool = usable_arenas->freepools; 1852 if (LIKELY(pool != NULL)) { 1853 /* Unlink from cached pools. */ 1854 usable_arenas->freepools = pool->nextpool; 1855 usable_arenas->nfreepools--; 1856 if (UNLIKELY(usable_arenas->nfreepools == 0)) { 1857 /* Wholly allocated: remove. */ 1858 assert(usable_arenas->freepools == NULL); 1859 assert(usable_arenas->nextarena == NULL || 1860 usable_arenas->nextarena->prevarena == 1861 usable_arenas); 1862 usable_arenas = usable_arenas->nextarena; 1863 if (usable_arenas != NULL) { 1864 usable_arenas->prevarena = NULL; 1865 assert(usable_arenas->address != 0); 1866 } 1867 } 1868 else { 1869 /* nfreepools > 0: it must be that freepools 1870 * isn't NULL, or that we haven't yet carved 1871 * off all the arena's pools for the first 1872 * time. 1873 */ 1874 assert(usable_arenas->freepools != NULL || 1875 usable_arenas->pool_address <= 1876 (block*)usable_arenas->address + 1877 ARENA_SIZE - POOL_SIZE); 1878 } 1879 } 1880 else { 1881 /* Carve off a new pool. */ 1882 assert(usable_arenas->nfreepools > 0); 1883 assert(usable_arenas->freepools == NULL); 1884 pool = (poolp)usable_arenas->pool_address; 1885 assert((block*)pool <= (block*)usable_arenas->address + 1886 ARENA_SIZE - POOL_SIZE); 1887 pool->arenaindex = (uint)(usable_arenas - arenas); 1888 assert(&arenas[pool->arenaindex] == usable_arenas); 1889 pool->szidx = DUMMY_SIZE_IDX; 1890 usable_arenas->pool_address += POOL_SIZE; 1891 --usable_arenas->nfreepools; 1892 1893 if (usable_arenas->nfreepools == 0) { 1894 assert(usable_arenas->nextarena == NULL || 1895 usable_arenas->nextarena->prevarena == 1896 usable_arenas); 1897 /* Unlink the arena: it is completely allocated. */ 1898 usable_arenas = usable_arenas->nextarena; 1899 if (usable_arenas != NULL) { 1900 usable_arenas->prevarena = NULL; 1901 assert(usable_arenas->address != 0); 1902 } 1903 } 1904 } 1905 1906 /* Frontlink to used pools. */ 1907 block *bp; 1908 poolp next = usedpools[size + size]; /* == prev */ 1909 pool->nextpool = next; 1910 pool->prevpool = next; 1911 next->nextpool = pool; 1912 next->prevpool = pool; 1913 pool->ref.count = 1; 1914 if (pool->szidx == size) { 1915 /* Luckily, this pool last contained blocks 1916 * of the same size class, so its header 1917 * and free list are already initialized. 1918 */ 1919 bp = pool->freeblock; 1920 assert(bp != NULL); 1921 pool->freeblock = *(block **)bp; 1922 return bp; 1923 } 1924 /* 1925 * Initialize the pool header, set up the free list to 1926 * contain just the second block, and return the first 1927 * block. 1928 */ 1929 pool->szidx = size; 1930 size = INDEX2SIZE(size); 1931 bp = (block *)pool + POOL_OVERHEAD; 1932 pool->nextoffset = POOL_OVERHEAD + (size << 1); 1933 pool->maxnextoffset = POOL_SIZE - size; 1934 pool->freeblock = bp + size; 1935 *(block **)(pool->freeblock) = NULL; 1936 return bp; 1937} 1938 1939/* pymalloc allocator 1940 1941 Return a pointer to newly allocated memory if pymalloc allocated memory. 1942 1943 Return NULL if pymalloc failed to allocate the memory block: on bigger 1944 requests, on error in the code below (as a last chance to serve the request) 1945 or when the max memory limit has been reached. 1946*/ 1947static inline void* 1948pymalloc_alloc(void *ctx, size_t nbytes) 1949{ 1950#ifdef WITH_VALGRIND 1951 if (UNLIKELY(running_on_valgrind == -1)) { 1952 running_on_valgrind = RUNNING_ON_VALGRIND; 1953 } 1954 if (UNLIKELY(running_on_valgrind)) { 1955 return NULL; 1956 } 1957#endif 1958 1959 if (UNLIKELY(nbytes == 0)) { 1960 return NULL; 1961 } 1962 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) { 1963 return NULL; 1964 } 1965 1966 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; 1967 poolp pool = usedpools[size + size]; 1968 block *bp; 1969 1970 if (LIKELY(pool != pool->nextpool)) { 1971 /* 1972 * There is a used pool for this size class. 1973 * Pick up the head block of its free list. 1974 */ 1975 ++pool->ref.count; 1976 bp = pool->freeblock; 1977 assert(bp != NULL); 1978 1979 if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) { 1980 // Reached the end of the free list, try to extend it. 1981 pymalloc_pool_extend(pool, size); 1982 } 1983 } 1984 else { 1985 /* There isn't a pool of the right size class immediately 1986 * available: use a free pool. 1987 */ 1988 bp = allocate_from_new_pool(size); 1989 } 1990 1991 return (void *)bp; 1992} 1993 1994 1995static void * 1996_PyObject_Malloc(void *ctx, size_t nbytes) 1997{ 1998 void* ptr = pymalloc_alloc(ctx, nbytes); 1999 if (LIKELY(ptr != NULL)) { 2000 return ptr; 2001 } 2002 2003 ptr = PyMem_RawMalloc(nbytes); 2004 if (ptr != NULL) { 2005 raw_allocated_blocks++; 2006 } 2007 return ptr; 2008} 2009 2010 2011static void * 2012_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) 2013{ 2014 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 2015 size_t nbytes = nelem * elsize; 2016 2017 void* ptr = pymalloc_alloc(ctx, nbytes); 2018 if (LIKELY(ptr != NULL)) { 2019 memset(ptr, 0, nbytes); 2020 return ptr; 2021 } 2022 2023 ptr = PyMem_RawCalloc(nelem, elsize); 2024 if (ptr != NULL) { 2025 raw_allocated_blocks++; 2026 } 2027 return ptr; 2028} 2029 2030 2031static void 2032insert_to_usedpool(poolp pool) 2033{ 2034 assert(pool->ref.count > 0); /* else the pool is empty */ 2035 2036 uint size = pool->szidx; 2037 poolp next = usedpools[size + size]; 2038 poolp prev = next->prevpool; 2039 2040 /* insert pool before next: prev <-> pool <-> next */ 2041 pool->nextpool = next; 2042 pool->prevpool = prev; 2043 next->prevpool = pool; 2044 prev->nextpool = pool; 2045} 2046 2047static void 2048insert_to_freepool(poolp pool) 2049{ 2050 poolp next = pool->nextpool; 2051 poolp prev = pool->prevpool; 2052 next->prevpool = prev; 2053 prev->nextpool = next; 2054 2055 /* Link the pool to freepools. This is a singly-linked 2056 * list, and pool->prevpool isn't used there. 2057 */ 2058 struct arena_object *ao = &arenas[pool->arenaindex]; 2059 pool->nextpool = ao->freepools; 2060 ao->freepools = pool; 2061 uint nf = ao->nfreepools; 2062 /* If this is the rightmost arena with this number of free pools, 2063 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there 2064 * are no arenas in usable_arenas with that value. 2065 */ 2066 struct arena_object* lastnf = nfp2lasta[nf]; 2067 assert((nf == 0 && lastnf == NULL) || 2068 (nf > 0 && 2069 lastnf != NULL && 2070 lastnf->nfreepools == nf && 2071 (lastnf->nextarena == NULL || 2072 nf < lastnf->nextarena->nfreepools))); 2073 if (lastnf == ao) { /* it is the rightmost */ 2074 struct arena_object* p = ao->prevarena; 2075 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL; 2076 } 2077 ao->nfreepools = ++nf; 2078 2079 /* All the rest is arena management. We just freed 2080 * a pool, and there are 4 cases for arena mgmt: 2081 * 1. If all the pools are free, return the arena to 2082 * the system free(). Except if this is the last 2083 * arena in the list, keep it to avoid thrashing: 2084 * keeping one wholly free arena in the list avoids 2085 * pathological cases where a simple loop would 2086 * otherwise provoke needing to allocate and free an 2087 * arena on every iteration. See bpo-37257. 2088 * 2. If this is the only free pool in the arena, 2089 * add the arena back to the `usable_arenas` list. 2090 * 3. If the "next" arena has a smaller count of free 2091 * pools, we have to "slide this arena right" to 2092 * restore that usable_arenas is sorted in order of 2093 * nfreepools. 2094 * 4. Else there's nothing more to do. 2095 */ 2096 if (nf == ao->ntotalpools && ao->nextarena != NULL) { 2097 /* Case 1. First unlink ao from usable_arenas. 2098 */ 2099 assert(ao->prevarena == NULL || 2100 ao->prevarena->address != 0); 2101 assert(ao ->nextarena == NULL || 2102 ao->nextarena->address != 0); 2103 2104 /* Fix the pointer in the prevarena, or the 2105 * usable_arenas pointer. 2106 */ 2107 if (ao->prevarena == NULL) { 2108 usable_arenas = ao->nextarena; 2109 assert(usable_arenas == NULL || 2110 usable_arenas->address != 0); 2111 } 2112 else { 2113 assert(ao->prevarena->nextarena == ao); 2114 ao->prevarena->nextarena = 2115 ao->nextarena; 2116 } 2117 /* Fix the pointer in the nextarena. */ 2118 if (ao->nextarena != NULL) { 2119 assert(ao->nextarena->prevarena == ao); 2120 ao->nextarena->prevarena = 2121 ao->prevarena; 2122 } 2123 /* Record that this arena_object slot is 2124 * available to be reused. 2125 */ 2126 ao->nextarena = unused_arena_objects; 2127 unused_arena_objects = ao; 2128 2129#if WITH_PYMALLOC_RADIX_TREE 2130 /* mark arena region as not under control of obmalloc */ 2131 arena_map_mark_used(ao->address, 0); 2132#endif 2133 2134 /* Free the entire arena. */ 2135 _PyObject_Arena.free(_PyObject_Arena.ctx, 2136 (void *)ao->address, ARENA_SIZE); 2137 ao->address = 0; /* mark unassociated */ 2138 --narenas_currently_allocated; 2139 2140 return; 2141 } 2142 2143 if (nf == 1) { 2144 /* Case 2. Put ao at the head of 2145 * usable_arenas. Note that because 2146 * ao->nfreepools was 0 before, ao isn't 2147 * currently on the usable_arenas list. 2148 */ 2149 ao->nextarena = usable_arenas; 2150 ao->prevarena = NULL; 2151 if (usable_arenas) 2152 usable_arenas->prevarena = ao; 2153 usable_arenas = ao; 2154 assert(usable_arenas->address != 0); 2155 if (nfp2lasta[1] == NULL) { 2156 nfp2lasta[1] = ao; 2157 } 2158 2159 return; 2160 } 2161 2162 /* If this arena is now out of order, we need to keep 2163 * the list sorted. The list is kept sorted so that 2164 * the "most full" arenas are used first, which allows 2165 * the nearly empty arenas to be completely freed. In 2166 * a few un-scientific tests, it seems like this 2167 * approach allowed a lot more memory to be freed. 2168 */ 2169 /* If this is the only arena with nf, record that. */ 2170 if (nfp2lasta[nf] == NULL) { 2171 nfp2lasta[nf] = ao; 2172 } /* else the rightmost with nf doesn't change */ 2173 /* If this was the rightmost of the old size, it remains in place. */ 2174 if (ao == lastnf) { 2175 /* Case 4. Nothing to do. */ 2176 return; 2177 } 2178 /* If ao were the only arena in the list, the last block would have 2179 * gotten us out. 2180 */ 2181 assert(ao->nextarena != NULL); 2182 2183 /* Case 3: We have to move the arena towards the end of the list, 2184 * because it has more free pools than the arena to its right. It needs 2185 * to move to follow lastnf. 2186 * First unlink ao from usable_arenas. 2187 */ 2188 if (ao->prevarena != NULL) { 2189 /* ao isn't at the head of the list */ 2190 assert(ao->prevarena->nextarena == ao); 2191 ao->prevarena->nextarena = ao->nextarena; 2192 } 2193 else { 2194 /* ao is at the head of the list */ 2195 assert(usable_arenas == ao); 2196 usable_arenas = ao->nextarena; 2197 } 2198 ao->nextarena->prevarena = ao->prevarena; 2199 /* And insert after lastnf. */ 2200 ao->prevarena = lastnf; 2201 ao->nextarena = lastnf->nextarena; 2202 if (ao->nextarena != NULL) { 2203 ao->nextarena->prevarena = ao; 2204 } 2205 lastnf->nextarena = ao; 2206 /* Verify that the swaps worked. */ 2207 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools); 2208 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools); 2209 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao); 2210 assert((usable_arenas == ao && ao->prevarena == NULL) 2211 || ao->prevarena->nextarena == ao); 2212} 2213 2214/* Free a memory block allocated by pymalloc_alloc(). 2215 Return 1 if it was freed. 2216 Return 0 if the block was not allocated by pymalloc_alloc(). */ 2217static inline int 2218pymalloc_free(void *ctx, void *p) 2219{ 2220 assert(p != NULL); 2221 2222#ifdef WITH_VALGRIND 2223 if (UNLIKELY(running_on_valgrind > 0)) { 2224 return 0; 2225 } 2226#endif 2227 2228 poolp pool = POOL_ADDR(p); 2229 if (UNLIKELY(!address_in_range(p, pool))) { 2230 return 0; 2231 } 2232 /* We allocated this address. */ 2233 2234 /* Link p to the start of the pool's freeblock list. Since 2235 * the pool had at least the p block outstanding, the pool 2236 * wasn't empty (so it's already in a usedpools[] list, or 2237 * was full and is in no list -- it's not in the freeblocks 2238 * list in any case). 2239 */ 2240 assert(pool->ref.count > 0); /* else it was empty */ 2241 block *lastfree = pool->freeblock; 2242 *(block **)p = lastfree; 2243 pool->freeblock = (block *)p; 2244 pool->ref.count--; 2245 2246 if (UNLIKELY(lastfree == NULL)) { 2247 /* Pool was full, so doesn't currently live in any list: 2248 * link it to the front of the appropriate usedpools[] list. 2249 * This mimics LRU pool usage for new allocations and 2250 * targets optimal filling when several pools contain 2251 * blocks of the same size class. 2252 */ 2253 insert_to_usedpool(pool); 2254 return 1; 2255 } 2256 2257 /* freeblock wasn't NULL, so the pool wasn't full, 2258 * and the pool is in a usedpools[] list. 2259 */ 2260 if (LIKELY(pool->ref.count != 0)) { 2261 /* pool isn't empty: leave it in usedpools */ 2262 return 1; 2263 } 2264 2265 /* Pool is now empty: unlink from usedpools, and 2266 * link to the front of freepools. This ensures that 2267 * previously freed pools will be allocated later 2268 * (being not referenced, they are perhaps paged out). 2269 */ 2270 insert_to_freepool(pool); 2271 return 1; 2272} 2273 2274 2275static void 2276_PyObject_Free(void *ctx, void *p) 2277{ 2278 /* PyObject_Free(NULL) has no effect */ 2279 if (p == NULL) { 2280 return; 2281 } 2282 2283 if (UNLIKELY(!pymalloc_free(ctx, p))) { 2284 /* pymalloc didn't allocate this address */ 2285 PyMem_RawFree(p); 2286 raw_allocated_blocks--; 2287 } 2288} 2289 2290 2291/* pymalloc realloc. 2292 2293 If nbytes==0, then as the Python docs promise, we do not treat this like 2294 free(p), and return a non-NULL result. 2295 2296 Return 1 if pymalloc reallocated memory and wrote the new pointer into 2297 newptr_p. 2298 2299 Return 0 if pymalloc didn't allocated p. */ 2300static int 2301pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) 2302{ 2303 void *bp; 2304 poolp pool; 2305 size_t size; 2306 2307 assert(p != NULL); 2308 2309#ifdef WITH_VALGRIND 2310 /* Treat running_on_valgrind == -1 the same as 0 */ 2311 if (UNLIKELY(running_on_valgrind > 0)) { 2312 return 0; 2313 } 2314#endif 2315 2316 pool = POOL_ADDR(p); 2317 if (!address_in_range(p, pool)) { 2318 /* pymalloc is not managing this block. 2319 2320 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take 2321 over this block. However, if we do, we need to copy the valid data 2322 from the C-managed block to one of our blocks, and there's no 2323 portable way to know how much of the memory space starting at p is 2324 valid. 2325 2326 As bug 1185883 pointed out the hard way, it's possible that the 2327 C-managed block is "at the end" of allocated VM space, so that a 2328 memory fault can occur if we try to copy nbytes bytes starting at p. 2329 Instead we punt: let C continue to manage this block. */ 2330 return 0; 2331 } 2332 2333 /* pymalloc is in charge of this block */ 2334 size = INDEX2SIZE(pool->szidx); 2335 if (nbytes <= size) { 2336 /* The block is staying the same or shrinking. 2337 2338 If it's shrinking, there's a tradeoff: it costs cycles to copy the 2339 block to a smaller size class, but it wastes memory not to copy it. 2340 2341 The compromise here is to copy on shrink only if at least 25% of 2342 size can be shaved off. */ 2343 if (4 * nbytes > 3 * size) { 2344 /* It's the same, or shrinking and new/old > 3/4. */ 2345 *newptr_p = p; 2346 return 1; 2347 } 2348 size = nbytes; 2349 } 2350 2351 bp = _PyObject_Malloc(ctx, nbytes); 2352 if (bp != NULL) { 2353 memcpy(bp, p, size); 2354 _PyObject_Free(ctx, p); 2355 } 2356 *newptr_p = bp; 2357 return 1; 2358} 2359 2360 2361static void * 2362_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) 2363{ 2364 void *ptr2; 2365 2366 if (ptr == NULL) { 2367 return _PyObject_Malloc(ctx, nbytes); 2368 } 2369 2370 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) { 2371 return ptr2; 2372 } 2373 2374 return PyMem_RawRealloc(ptr, nbytes); 2375} 2376 2377#else /* ! WITH_PYMALLOC */ 2378 2379/*==========================================================================*/ 2380/* pymalloc not enabled: Redirect the entry points to malloc. These will 2381 * only be used by extensions that are compiled with pymalloc enabled. */ 2382 2383Py_ssize_t 2384_Py_GetAllocatedBlocks(void) 2385{ 2386 return 0; 2387} 2388 2389#endif /* WITH_PYMALLOC */ 2390 2391 2392/*==========================================================================*/ 2393/* A x-platform debugging allocator. This doesn't manage memory directly, 2394 * it wraps a real allocator, adding extra debugging info to the memory blocks. 2395 */ 2396 2397/* Uncomment this define to add the "serialno" field */ 2398/* #define PYMEM_DEBUG_SERIALNO */ 2399 2400#ifdef PYMEM_DEBUG_SERIALNO 2401static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ 2402 2403/* serialno is always incremented via calling this routine. The point is 2404 * to supply a single place to set a breakpoint. 2405 */ 2406static void 2407bumpserialno(void) 2408{ 2409 ++serialno; 2410} 2411#endif 2412 2413#define SST SIZEOF_SIZE_T 2414 2415#ifdef PYMEM_DEBUG_SERIALNO 2416# define PYMEM_DEBUG_EXTRA_BYTES 4 * SST 2417#else 2418# define PYMEM_DEBUG_EXTRA_BYTES 3 * SST 2419#endif 2420 2421/* Read sizeof(size_t) bytes at p as a big-endian size_t. */ 2422static size_t 2423read_size_t(const void *p) 2424{ 2425 const uint8_t *q = (const uint8_t *)p; 2426 size_t result = *q++; 2427 int i; 2428 2429 for (i = SST; --i > 0; ++q) 2430 result = (result << 8) | *q; 2431 return result; 2432} 2433 2434/* Write n as a big-endian size_t, MSB at address p, LSB at 2435 * p + sizeof(size_t) - 1. 2436 */ 2437static void 2438write_size_t(void *p, size_t n) 2439{ 2440 uint8_t *q = (uint8_t *)p + SST - 1; 2441 int i; 2442 2443 for (i = SST; --i >= 0; --q) { 2444 *q = (uint8_t)(n & 0xff); 2445 n >>= 8; 2446 } 2447} 2448 2449/* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and 2450 fills them with useful stuff, here calling the underlying malloc's result p: 2451 2452p[0: S] 2453 Number of bytes originally asked for. This is a size_t, big-endian (easier 2454 to read in a memory dump). 2455p[S] 2456 API ID. See PEP 445. This is a character, but seems undocumented. 2457p[S+1: 2*S] 2458 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads. 2459p[2*S: 2*S+n] 2460 The requested memory, filled with copies of PYMEM_CLEANBYTE. 2461 Used to catch reference to uninitialized memory. 2462 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc 2463 handled the request itself. 2464p[2*S+n: 2*S+n+S] 2465 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads. 2466p[2*S+n+S: 2*S+n+2*S] 2467 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc 2468 and _PyMem_DebugRealloc. 2469 This is a big-endian size_t. 2470 If "bad memory" is detected later, the serial number gives an 2471 excellent way to set a breakpoint on the next run, to capture the 2472 instant at which this block was passed out. 2473 2474If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks 2475for 3 * S extra bytes, and omits the last serialno field. 2476*/ 2477 2478static void * 2479_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) 2480{ 2481 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2482 uint8_t *p; /* base address of malloc'ed pad block */ 2483 uint8_t *data; /* p + 2*SST == pointer to data bytes */ 2484 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2485 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */ 2486 2487 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) { 2488 /* integer overflow: can't represent total as a Py_ssize_t */ 2489 return NULL; 2490 } 2491 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES; 2492 2493 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN] 2494 ^--- p ^--- data ^--- tail 2495 S: nbytes stored as size_t 2496 I: API identifier (1 byte) 2497 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after) 2498 C: Clean bytes used later to store actual data 2499 N: Serial number stored as size_t 2500 2501 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field 2502 is omitted. */ 2503 2504 if (use_calloc) { 2505 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total); 2506 } 2507 else { 2508 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total); 2509 } 2510 if (p == NULL) { 2511 return NULL; 2512 } 2513 data = p + 2*SST; 2514 2515#ifdef PYMEM_DEBUG_SERIALNO 2516 bumpserialno(); 2517#endif 2518 2519 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ 2520 write_size_t(p, nbytes); 2521 p[SST] = (uint8_t)api->api_id; 2522 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); 2523 2524 if (nbytes > 0 && !use_calloc) { 2525 memset(data, PYMEM_CLEANBYTE, nbytes); 2526 } 2527 2528 /* at tail, write pad (SST bytes) and serialno (SST bytes) */ 2529 tail = data + nbytes; 2530 memset(tail, PYMEM_FORBIDDENBYTE, SST); 2531#ifdef PYMEM_DEBUG_SERIALNO 2532 write_size_t(tail + SST, serialno); 2533#endif 2534 2535 return data; 2536} 2537 2538static void * 2539_PyMem_DebugRawMalloc(void *ctx, size_t nbytes) 2540{ 2541 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2542} 2543 2544static void * 2545_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) 2546{ 2547 size_t nbytes; 2548 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); 2549 nbytes = nelem * elsize; 2550 return _PyMem_DebugRawAlloc(1, ctx, nbytes); 2551} 2552 2553 2554/* The debug free first checks the 2*SST bytes on each end for sanity (in 2555 particular, that the FORBIDDENBYTEs with the api ID are still intact). 2556 Then fills the original bytes with PYMEM_DEADBYTE. 2557 Then calls the underlying free. 2558*/ 2559static void 2560_PyMem_DebugRawFree(void *ctx, void *p) 2561{ 2562 /* PyMem_Free(NULL) has no effect */ 2563 if (p == NULL) { 2564 return; 2565 } 2566 2567 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2568 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */ 2569 size_t nbytes; 2570 2571 _PyMem_DebugCheckAddress(__func__, api->api_id, p); 2572 nbytes = read_size_t(q); 2573 nbytes += PYMEM_DEBUG_EXTRA_BYTES; 2574 memset(q, PYMEM_DEADBYTE, nbytes); 2575 api->alloc.free(api->alloc.ctx, q); 2576} 2577 2578 2579static void * 2580_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) 2581{ 2582 if (p == NULL) { 2583 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 2584 } 2585 2586 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 2587 uint8_t *head; /* base address of malloc'ed pad block */ 2588 uint8_t *data; /* pointer to data bytes */ 2589 uint8_t *r; 2590 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */ 2591 size_t total; /* 2 * SST + nbytes + 2 * SST */ 2592 size_t original_nbytes; 2593#define ERASED_SIZE 64 2594 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */ 2595 2596 _PyMem_DebugCheckAddress(__func__, api->api_id, p); 2597 2598 data = (uint8_t *)p; 2599 head = data - 2*SST; 2600 original_nbytes = read_size_t(head); 2601 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) { 2602 /* integer overflow: can't represent total as a Py_ssize_t */ 2603 return NULL; 2604 } 2605 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES; 2606 2607 tail = data + original_nbytes; 2608#ifdef PYMEM_DEBUG_SERIALNO 2609 size_t block_serialno = read_size_t(tail + SST); 2610#endif 2611 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and 2612 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes. 2613 */ 2614 if (original_nbytes <= sizeof(save)) { 2615 memcpy(save, data, original_nbytes); 2616 memset(data - 2 * SST, PYMEM_DEADBYTE, 2617 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES); 2618 } 2619 else { 2620 memcpy(save, data, ERASED_SIZE); 2621 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST); 2622 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE); 2623 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE, 2624 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST); 2625 } 2626 2627 /* Resize and add decorations. */ 2628 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total); 2629 if (r == NULL) { 2630 /* if realloc() failed: rewrite header and footer which have 2631 just been erased */ 2632 nbytes = original_nbytes; 2633 } 2634 else { 2635 head = r; 2636#ifdef PYMEM_DEBUG_SERIALNO 2637 bumpserialno(); 2638 block_serialno = serialno; 2639#endif 2640 } 2641 data = head + 2*SST; 2642 2643 write_size_t(head, nbytes); 2644 head[SST] = (uint8_t)api->api_id; 2645 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); 2646 2647 tail = data + nbytes; 2648 memset(tail, PYMEM_FORBIDDENBYTE, SST); 2649#ifdef PYMEM_DEBUG_SERIALNO 2650 write_size_t(tail + SST, block_serialno); 2651#endif 2652 2653 /* Restore saved bytes. */ 2654 if (original_nbytes <= sizeof(save)) { 2655 memcpy(data, save, Py_MIN(nbytes, original_nbytes)); 2656 } 2657 else { 2658 size_t i = original_nbytes - ERASED_SIZE; 2659 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE)); 2660 if (nbytes > i) { 2661 memcpy(data + i, &save[ERASED_SIZE], 2662 Py_MIN(nbytes - i, ERASED_SIZE)); 2663 } 2664 } 2665 2666 if (r == NULL) { 2667 return NULL; 2668 } 2669 2670 if (nbytes > original_nbytes) { 2671 /* growing: mark new extra memory clean */ 2672 memset(data + original_nbytes, PYMEM_CLEANBYTE, 2673 nbytes - original_nbytes); 2674 } 2675 2676 return data; 2677} 2678 2679static inline void 2680_PyMem_DebugCheckGIL(const char *func) 2681{ 2682 if (!PyGILState_Check()) { 2683 _Py_FatalErrorFunc(func, 2684 "Python memory allocator called " 2685 "without holding the GIL"); 2686 } 2687} 2688 2689static void * 2690_PyMem_DebugMalloc(void *ctx, size_t nbytes) 2691{ 2692 _PyMem_DebugCheckGIL(__func__); 2693 return _PyMem_DebugRawMalloc(ctx, nbytes); 2694} 2695 2696static void * 2697_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) 2698{ 2699 _PyMem_DebugCheckGIL(__func__); 2700 return _PyMem_DebugRawCalloc(ctx, nelem, elsize); 2701} 2702 2703 2704static void 2705_PyMem_DebugFree(void *ctx, void *ptr) 2706{ 2707 _PyMem_DebugCheckGIL(__func__); 2708 _PyMem_DebugRawFree(ctx, ptr); 2709} 2710 2711 2712static void * 2713_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) 2714{ 2715 _PyMem_DebugCheckGIL(__func__); 2716 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes); 2717} 2718 2719/* Check the forbidden bytes on both ends of the memory allocated for p. 2720 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, 2721 * and call Py_FatalError to kill the program. 2722 * The API id, is also checked. 2723 */ 2724static void 2725_PyMem_DebugCheckAddress(const char *func, char api, const void *p) 2726{ 2727 assert(p != NULL); 2728 2729 const uint8_t *q = (const uint8_t *)p; 2730 size_t nbytes; 2731 const uint8_t *tail; 2732 int i; 2733 char id; 2734 2735 /* Check the API id */ 2736 id = (char)q[-SST]; 2737 if (id != api) { 2738 _PyObject_DebugDumpAddress(p); 2739 _Py_FatalErrorFormat(func, 2740 "bad ID: Allocated using API '%c', " 2741 "verified using API '%c'", 2742 id, api); 2743 } 2744 2745 /* Check the stuff at the start of p first: if there's underwrite 2746 * corruption, the number-of-bytes field may be nuts, and checking 2747 * the tail could lead to a segfault then. 2748 */ 2749 for (i = SST-1; i >= 1; --i) { 2750 if (*(q-i) != PYMEM_FORBIDDENBYTE) { 2751 _PyObject_DebugDumpAddress(p); 2752 _Py_FatalErrorFunc(func, "bad leading pad byte"); 2753 } 2754 } 2755 2756 nbytes = read_size_t(q - 2*SST); 2757 tail = q + nbytes; 2758 for (i = 0; i < SST; ++i) { 2759 if (tail[i] != PYMEM_FORBIDDENBYTE) { 2760 _PyObject_DebugDumpAddress(p); 2761 _Py_FatalErrorFunc(func, "bad trailing pad byte"); 2762 } 2763 } 2764} 2765 2766/* Display info to stderr about the memory block at p. */ 2767static void 2768_PyObject_DebugDumpAddress(const void *p) 2769{ 2770 const uint8_t *q = (const uint8_t *)p; 2771 const uint8_t *tail; 2772 size_t nbytes; 2773 int i; 2774 int ok; 2775 char id; 2776 2777 fprintf(stderr, "Debug memory block at address p=%p:", p); 2778 if (p == NULL) { 2779 fprintf(stderr, "\n"); 2780 return; 2781 } 2782 id = (char)q[-SST]; 2783 fprintf(stderr, " API '%c'\n", id); 2784 2785 nbytes = read_size_t(q - 2*SST); 2786 fprintf(stderr, " %zu bytes originally requested\n", nbytes); 2787 2788 /* In case this is nuts, check the leading pad bytes first. */ 2789 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1); 2790 ok = 1; 2791 for (i = 1; i <= SST-1; ++i) { 2792 if (*(q-i) != PYMEM_FORBIDDENBYTE) { 2793 ok = 0; 2794 break; 2795 } 2796 } 2797 if (ok) 2798 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2799 else { 2800 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2801 PYMEM_FORBIDDENBYTE); 2802 for (i = SST-1; i >= 1; --i) { 2803 const uint8_t byte = *(q-i); 2804 fprintf(stderr, " at p-%d: 0x%02x", i, byte); 2805 if (byte != PYMEM_FORBIDDENBYTE) 2806 fputs(" *** OUCH", stderr); 2807 fputc('\n', stderr); 2808 } 2809 2810 fputs(" Because memory is corrupted at the start, the " 2811 "count of bytes requested\n" 2812 " may be bogus, and checking the trailing pad " 2813 "bytes may segfault.\n", stderr); 2814 } 2815 2816 tail = q + nbytes; 2817 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail); 2818 ok = 1; 2819 for (i = 0; i < SST; ++i) { 2820 if (tail[i] != PYMEM_FORBIDDENBYTE) { 2821 ok = 0; 2822 break; 2823 } 2824 } 2825 if (ok) 2826 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2827 else { 2828 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2829 PYMEM_FORBIDDENBYTE); 2830 for (i = 0; i < SST; ++i) { 2831 const uint8_t byte = tail[i]; 2832 fprintf(stderr, " at tail+%d: 0x%02x", 2833 i, byte); 2834 if (byte != PYMEM_FORBIDDENBYTE) 2835 fputs(" *** OUCH", stderr); 2836 fputc('\n', stderr); 2837 } 2838 } 2839 2840#ifdef PYMEM_DEBUG_SERIALNO 2841 size_t serial = read_size_t(tail + SST); 2842 fprintf(stderr, 2843 " The block was made by call #%zu to debug malloc/realloc.\n", 2844 serial); 2845#endif 2846 2847 if (nbytes > 0) { 2848 i = 0; 2849 fputs(" Data at p:", stderr); 2850 /* print up to 8 bytes at the start */ 2851 while (q < tail && i < 8) { 2852 fprintf(stderr, " %02x", *q); 2853 ++i; 2854 ++q; 2855 } 2856 /* and up to 8 at the end */ 2857 if (q < tail) { 2858 if (tail - q > 8) { 2859 fputs(" ...", stderr); 2860 q = tail - 8; 2861 } 2862 while (q < tail) { 2863 fprintf(stderr, " %02x", *q); 2864 ++q; 2865 } 2866 } 2867 fputc('\n', stderr); 2868 } 2869 fputc('\n', stderr); 2870 2871 fflush(stderr); 2872 _PyMem_DumpTraceback(fileno(stderr), p); 2873} 2874 2875 2876static size_t 2877printone(FILE *out, const char* msg, size_t value) 2878{ 2879 int i, k; 2880 char buf[100]; 2881 size_t origvalue = value; 2882 2883 fputs(msg, out); 2884 for (i = (int)strlen(msg); i < 35; ++i) 2885 fputc(' ', out); 2886 fputc('=', out); 2887 2888 /* Write the value with commas. */ 2889 i = 22; 2890 buf[i--] = '\0'; 2891 buf[i--] = '\n'; 2892 k = 3; 2893 do { 2894 size_t nextvalue = value / 10; 2895 unsigned int digit = (unsigned int)(value - nextvalue * 10); 2896 value = nextvalue; 2897 buf[i--] = (char)(digit + '0'); 2898 --k; 2899 if (k == 0 && value && i >= 0) { 2900 k = 3; 2901 buf[i--] = ','; 2902 } 2903 } while (value && i >= 0); 2904 2905 while (i >= 0) 2906 buf[i--] = ' '; 2907 fputs(buf, out); 2908 2909 return origvalue; 2910} 2911 2912void 2913_PyDebugAllocatorStats(FILE *out, 2914 const char *block_name, int num_blocks, size_t sizeof_block) 2915{ 2916 char buf1[128]; 2917 char buf2[128]; 2918 PyOS_snprintf(buf1, sizeof(buf1), 2919 "%d %ss * %zd bytes each", 2920 num_blocks, block_name, sizeof_block); 2921 PyOS_snprintf(buf2, sizeof(buf2), 2922 "%48s ", buf1); 2923 (void)printone(out, buf2, num_blocks * sizeof_block); 2924} 2925 2926 2927#ifdef WITH_PYMALLOC 2928 2929#ifdef Py_DEBUG 2930/* Is target in the list? The list is traversed via the nextpool pointers. 2931 * The list may be NULL-terminated, or circular. Return 1 if target is in 2932 * list, else 0. 2933 */ 2934static int 2935pool_is_in_list(const poolp target, poolp list) 2936{ 2937 poolp origlist = list; 2938 assert(target != NULL); 2939 if (list == NULL) 2940 return 0; 2941 do { 2942 if (target == list) 2943 return 1; 2944 list = list->nextpool; 2945 } while (list != NULL && list != origlist); 2946 return 0; 2947} 2948#endif 2949 2950/* Print summary info to "out" about the state of pymalloc's structures. 2951 * In Py_DEBUG mode, also perform some expensive internal consistency 2952 * checks. 2953 * 2954 * Return 0 if the memory debug hooks are not installed or no statistics was 2955 * written into out, return 1 otherwise. 2956 */ 2957int 2958_PyObject_DebugMallocStats(FILE *out) 2959{ 2960 if (!_PyMem_PymallocEnabled()) { 2961 return 0; 2962 } 2963 2964 uint i; 2965 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; 2966 /* # of pools, allocated blocks, and free blocks per class index */ 2967 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2968 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2969 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2970 /* total # of allocated bytes in used and full pools */ 2971 size_t allocated_bytes = 0; 2972 /* total # of available bytes in used pools */ 2973 size_t available_bytes = 0; 2974 /* # of free pools + pools not yet carved out of current arena */ 2975 uint numfreepools = 0; 2976 /* # of bytes for arena alignment padding */ 2977 size_t arena_alignment = 0; 2978 /* # of bytes in used and full pools used for pool_headers */ 2979 size_t pool_header_bytes = 0; 2980 /* # of bytes in used and full pools wasted due to quantization, 2981 * i.e. the necessarily leftover space at the ends of used and 2982 * full pools. 2983 */ 2984 size_t quantization = 0; 2985 /* # of arenas actually allocated. */ 2986 size_t narenas = 0; 2987 /* running total -- should equal narenas * ARENA_SIZE */ 2988 size_t total; 2989 char buf[128]; 2990 2991 fprintf(out, "Small block threshold = %d, in %u size classes.\n", 2992 SMALL_REQUEST_THRESHOLD, numclasses); 2993 2994 for (i = 0; i < numclasses; ++i) 2995 numpools[i] = numblocks[i] = numfreeblocks[i] = 0; 2996 2997 /* Because full pools aren't linked to from anything, it's easiest 2998 * to march over all the arenas. If we're lucky, most of the memory 2999 * will be living in full pools -- would be a shame to miss them. 3000 */ 3001 for (i = 0; i < maxarenas; ++i) { 3002 uintptr_t base = arenas[i].address; 3003 3004 /* Skip arenas which are not allocated. */ 3005 if (arenas[i].address == (uintptr_t)NULL) 3006 continue; 3007 narenas += 1; 3008 3009 numfreepools += arenas[i].nfreepools; 3010 3011 /* round up to pool alignment */ 3012 if (base & (uintptr_t)POOL_SIZE_MASK) { 3013 arena_alignment += POOL_SIZE; 3014 base &= ~(uintptr_t)POOL_SIZE_MASK; 3015 base += POOL_SIZE; 3016 } 3017 3018 /* visit every pool in the arena */ 3019 assert(base <= (uintptr_t) arenas[i].pool_address); 3020 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) { 3021 poolp p = (poolp)base; 3022 const uint sz = p->szidx; 3023 uint freeblocks; 3024 3025 if (p->ref.count == 0) { 3026 /* currently unused */ 3027#ifdef Py_DEBUG 3028 assert(pool_is_in_list(p, arenas[i].freepools)); 3029#endif 3030 continue; 3031 } 3032 ++numpools[sz]; 3033 numblocks[sz] += p->ref.count; 3034 freeblocks = NUMBLOCKS(sz) - p->ref.count; 3035 numfreeblocks[sz] += freeblocks; 3036#ifdef Py_DEBUG 3037 if (freeblocks > 0) 3038 assert(pool_is_in_list(p, usedpools[sz + sz])); 3039#endif 3040 } 3041 } 3042 assert(narenas == narenas_currently_allocated); 3043 3044 fputc('\n', out); 3045 fputs("class size num pools blocks in use avail blocks\n" 3046 "----- ---- --------- ------------- ------------\n", 3047 out); 3048 3049 for (i = 0; i < numclasses; ++i) { 3050 size_t p = numpools[i]; 3051 size_t b = numblocks[i]; 3052 size_t f = numfreeblocks[i]; 3053 uint size = INDEX2SIZE(i); 3054 if (p == 0) { 3055 assert(b == 0 && f == 0); 3056 continue; 3057 } 3058 fprintf(out, "%5u %6u %11zu %15zu %13zu\n", 3059 i, size, p, b, f); 3060 allocated_bytes += b * size; 3061 available_bytes += f * size; 3062 pool_header_bytes += p * POOL_OVERHEAD; 3063 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); 3064 } 3065 fputc('\n', out); 3066#ifdef PYMEM_DEBUG_SERIALNO 3067 if (_PyMem_DebugEnabled()) { 3068 (void)printone(out, "# times object malloc called", serialno); 3069 } 3070#endif 3071 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); 3072 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); 3073 (void)printone(out, "# arenas highwater mark", narenas_highwater); 3074 (void)printone(out, "# arenas allocated current", narenas); 3075 3076 PyOS_snprintf(buf, sizeof(buf), 3077 "%zu arenas * %d bytes/arena", 3078 narenas, ARENA_SIZE); 3079 (void)printone(out, buf, narenas * ARENA_SIZE); 3080 3081 fputc('\n', out); 3082 3083 /* Account for what all of those arena bytes are being used for. */ 3084 total = printone(out, "# bytes in allocated blocks", allocated_bytes); 3085 total += printone(out, "# bytes in available blocks", available_bytes); 3086 3087 PyOS_snprintf(buf, sizeof(buf), 3088 "%u unused pools * %d bytes", numfreepools, POOL_SIZE); 3089 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE); 3090 3091 total += printone(out, "# bytes lost to pool headers", pool_header_bytes); 3092 total += printone(out, "# bytes lost to quantization", quantization); 3093 total += printone(out, "# bytes lost to arena alignment", arena_alignment); 3094 (void)printone(out, "Total", total); 3095 assert(narenas * ARENA_SIZE == total); 3096 3097#if WITH_PYMALLOC_RADIX_TREE 3098 fputs("\narena map counts\n", out); 3099#ifdef USE_INTERIOR_NODES 3100 (void)printone(out, "# arena map mid nodes", arena_map_mid_count); 3101 (void)printone(out, "# arena map bot nodes", arena_map_bot_count); 3102 fputc('\n', out); 3103#endif 3104 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root)); 3105#ifdef USE_INTERIOR_NODES 3106 total += printone(out, "# bytes lost to arena map mid", 3107 sizeof(arena_map_mid_t) * arena_map_mid_count); 3108 total += printone(out, "# bytes lost to arena map bot", 3109 sizeof(arena_map_bot_t) * arena_map_bot_count); 3110 (void)printone(out, "Total", total); 3111#endif 3112#endif 3113 3114 return 1; 3115} 3116 3117#endif /* #ifdef WITH_PYMALLOC */ 3118