1/* 2 3 Reference Cycle Garbage Collection 4 ================================== 5 6 Neil Schemenauer <nas@arctrix.com> 7 8 Based on a post on the python-dev list. Ideas from Guido van Rossum, 9 Eric Tiedemann, and various others. 10 11 http://www.arctrix.com/nas/python/gc/ 12 13 The following mailing list threads provide a historical perspective on 14 the design of this module. Note that a fair amount of refinement has 15 occurred since those discussions. 16 17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html 18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html 19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html 20 21 For a highlevel view of the collection process, read the collect 22 function. 23 24*/ 25 26#include "Python.h" 27#include "pycore_context.h" 28#include "pycore_initconfig.h" 29#include "pycore_interp.h" // PyInterpreterState.gc 30#include "pycore_object.h" 31#include "pycore_pyerrors.h" 32#include "pycore_pystate.h" // _PyThreadState_GET() 33#include "pydtrace.h" 34 35typedef struct _gc_runtime_state GCState; 36 37/*[clinic input] 38module gc 39[clinic start generated code]*/ 40/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ 41 42 43#ifdef Py_DEBUG 44# define GC_DEBUG 45#endif 46 47#define GC_NEXT _PyGCHead_NEXT 48#define GC_PREV _PyGCHead_PREV 49 50// update_refs() set this bit for all objects in current generation. 51// subtract_refs() and move_unreachable() uses this to distinguish 52// visited object is in GCing or not. 53// 54// move_unreachable() removes this flag from reachable objects. 55// Only unreachable objects have this flag. 56// 57// No objects in interpreter have this flag after GC ends. 58#define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING 59 60// Lowest bit of _gc_next is used for UNREACHABLE flag. 61// 62// This flag represents the object is in unreachable list in move_unreachable() 63// 64// Although this flag is used only in move_unreachable(), move_unreachable() 65// doesn't clear this flag to skip unnecessary iteration. 66// move_legacy_finalizers() removes this flag instead. 67// Between them, unreachable list is not normal list and we can not use 68// most gc_list_* functions for it. 69#define NEXT_MASK_UNREACHABLE (1) 70 71/* Get an object's GC head */ 72#define AS_GC(o) ((PyGC_Head *)(((char *)(o))-sizeof(PyGC_Head))) 73 74/* Get the object given the GC head */ 75#define FROM_GC(g) ((PyObject *)(((char *)(g))+sizeof(PyGC_Head))) 76 77static inline int 78gc_is_collecting(PyGC_Head *g) 79{ 80 return (g->_gc_prev & PREV_MASK_COLLECTING) != 0; 81} 82 83static inline void 84gc_clear_collecting(PyGC_Head *g) 85{ 86 g->_gc_prev &= ~PREV_MASK_COLLECTING; 87} 88 89static inline Py_ssize_t 90gc_get_refs(PyGC_Head *g) 91{ 92 return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT); 93} 94 95static inline void 96gc_set_refs(PyGC_Head *g, Py_ssize_t refs) 97{ 98 g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK) 99 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); 100} 101 102static inline void 103gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) 104{ 105 g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) 106 | PREV_MASK_COLLECTING 107 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); 108} 109 110static inline void 111gc_decref(PyGC_Head *g) 112{ 113 _PyObject_ASSERT_WITH_MSG(FROM_GC(g), 114 gc_get_refs(g) > 0, 115 "refcount is too small"); 116 g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; 117} 118 119/* set for debugging information */ 120#define DEBUG_STATS (1<<0) /* print collection statistics */ 121#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ 122#define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ 123#define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ 124#define DEBUG_LEAK DEBUG_COLLECTABLE | \ 125 DEBUG_UNCOLLECTABLE | \ 126 DEBUG_SAVEALL 127 128#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) 129 130 131static GCState * 132get_gc_state(void) 133{ 134 PyInterpreterState *interp = _PyInterpreterState_GET(); 135 return &interp->gc; 136} 137 138 139void 140_PyGC_InitState(GCState *gcstate) 141{ 142#define INIT_HEAD(GEN) \ 143 do { \ 144 GEN.head._gc_next = (uintptr_t)&GEN.head; \ 145 GEN.head._gc_prev = (uintptr_t)&GEN.head; \ 146 } while (0) 147 148 for (int i = 0; i < NUM_GENERATIONS; i++) { 149 assert(gcstate->generations[i].count == 0); 150 INIT_HEAD(gcstate->generations[i]); 151 }; 152 gcstate->generation0 = GEN_HEAD(gcstate, 0); 153 INIT_HEAD(gcstate->permanent_generation); 154 155#undef INIT_HEAD 156} 157 158 159PyStatus 160_PyGC_Init(PyInterpreterState *interp) 161{ 162 GCState *gcstate = &interp->gc; 163 164 gcstate->garbage = PyList_New(0); 165 if (gcstate->garbage == NULL) { 166 return _PyStatus_NO_MEMORY(); 167 } 168 169 gcstate->callbacks = PyList_New(0); 170 if (gcstate->callbacks == NULL) { 171 return _PyStatus_NO_MEMORY(); 172 } 173 174 return _PyStatus_OK(); 175} 176 177 178/* 179_gc_prev values 180--------------- 181 182Between collections, _gc_prev is used for doubly linked list. 183 184Lowest two bits of _gc_prev are used for flags. 185PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends 186or _PyObject_GC_UNTRACK() is called. 187 188During a collection, _gc_prev is temporary used for gc_refs, and the gc list 189is singly linked until _gc_prev is restored. 190 191gc_refs 192 At the start of a collection, update_refs() copies the true refcount 193 to gc_refs, for each object in the generation being collected. 194 subtract_refs() then adjusts gc_refs so that it equals the number of 195 times an object is referenced directly from outside the generation 196 being collected. 197 198PREV_MASK_COLLECTING 199 Objects in generation being collected are marked PREV_MASK_COLLECTING in 200 update_refs(). 201 202 203_gc_next values 204--------------- 205 206_gc_next takes these values: 207 2080 209 The object is not tracked 210 211!= 0 212 Pointer to the next object in the GC list. 213 Additionally, lowest bit is used temporary for 214 NEXT_MASK_UNREACHABLE flag described below. 215 216NEXT_MASK_UNREACHABLE 217 move_unreachable() then moves objects not reachable (whether directly or 218 indirectly) from outside the generation into an "unreachable" set and 219 set this flag. 220 221 Objects that are found to be reachable have gc_refs set to 1. 222 When this flag is set for the reachable object, the object must be in 223 "unreachable" set. 224 The flag is unset and the object is moved back to "reachable" set. 225 226 move_legacy_finalizers() will remove this flag from "unreachable" set. 227*/ 228 229/*** list functions ***/ 230 231static inline void 232gc_list_init(PyGC_Head *list) 233{ 234 // List header must not have flags. 235 // We can assign pointer by simple cast. 236 list->_gc_prev = (uintptr_t)list; 237 list->_gc_next = (uintptr_t)list; 238} 239 240static inline int 241gc_list_is_empty(PyGC_Head *list) 242{ 243 return (list->_gc_next == (uintptr_t)list); 244} 245 246/* Append `node` to `list`. */ 247static inline void 248gc_list_append(PyGC_Head *node, PyGC_Head *list) 249{ 250 PyGC_Head *last = (PyGC_Head *)list->_gc_prev; 251 252 // last <-> node 253 _PyGCHead_SET_PREV(node, last); 254 _PyGCHead_SET_NEXT(last, node); 255 256 // node <-> list 257 _PyGCHead_SET_NEXT(node, list); 258 list->_gc_prev = (uintptr_t)node; 259} 260 261/* Remove `node` from the gc list it's currently in. */ 262static inline void 263gc_list_remove(PyGC_Head *node) 264{ 265 PyGC_Head *prev = GC_PREV(node); 266 PyGC_Head *next = GC_NEXT(node); 267 268 _PyGCHead_SET_NEXT(prev, next); 269 _PyGCHead_SET_PREV(next, prev); 270 271 node->_gc_next = 0; /* object is not currently tracked */ 272} 273 274/* Move `node` from the gc list it's currently in (which is not explicitly 275 * named here) to the end of `list`. This is semantically the same as 276 * gc_list_remove(node) followed by gc_list_append(node, list). 277 */ 278static void 279gc_list_move(PyGC_Head *node, PyGC_Head *list) 280{ 281 /* Unlink from current list. */ 282 PyGC_Head *from_prev = GC_PREV(node); 283 PyGC_Head *from_next = GC_NEXT(node); 284 _PyGCHead_SET_NEXT(from_prev, from_next); 285 _PyGCHead_SET_PREV(from_next, from_prev); 286 287 /* Relink at end of new list. */ 288 // list must not have flags. So we can skip macros. 289 PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev; 290 _PyGCHead_SET_PREV(node, to_prev); 291 _PyGCHead_SET_NEXT(to_prev, node); 292 list->_gc_prev = (uintptr_t)node; 293 _PyGCHead_SET_NEXT(node, list); 294} 295 296/* append list `from` onto list `to`; `from` becomes an empty list */ 297static void 298gc_list_merge(PyGC_Head *from, PyGC_Head *to) 299{ 300 assert(from != to); 301 if (!gc_list_is_empty(from)) { 302 PyGC_Head *to_tail = GC_PREV(to); 303 PyGC_Head *from_head = GC_NEXT(from); 304 PyGC_Head *from_tail = GC_PREV(from); 305 assert(from_head != from); 306 assert(from_tail != from); 307 308 _PyGCHead_SET_NEXT(to_tail, from_head); 309 _PyGCHead_SET_PREV(from_head, to_tail); 310 311 _PyGCHead_SET_NEXT(from_tail, to); 312 _PyGCHead_SET_PREV(to, from_tail); 313 } 314 gc_list_init(from); 315} 316 317static Py_ssize_t 318gc_list_size(PyGC_Head *list) 319{ 320 PyGC_Head *gc; 321 Py_ssize_t n = 0; 322 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { 323 n++; 324 } 325 return n; 326} 327 328/* Walk the list and mark all objects as non-collecting */ 329static inline void 330gc_list_clear_collecting(PyGC_Head *collectable) 331{ 332 PyGC_Head *gc; 333 for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { 334 gc_clear_collecting(gc); 335 } 336} 337 338/* Append objects in a GC list to a Python list. 339 * Return 0 if all OK, < 0 if error (out of memory for list) 340 */ 341static int 342append_objects(PyObject *py_list, PyGC_Head *gc_list) 343{ 344 PyGC_Head *gc; 345 for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { 346 PyObject *op = FROM_GC(gc); 347 if (op != py_list) { 348 if (PyList_Append(py_list, op)) { 349 return -1; /* exception */ 350 } 351 } 352 } 353 return 0; 354} 355 356// Constants for validate_list's flags argument. 357enum flagstates {collecting_clear_unreachable_clear, 358 collecting_clear_unreachable_set, 359 collecting_set_unreachable_clear, 360 collecting_set_unreachable_set}; 361 362#ifdef GC_DEBUG 363// validate_list checks list consistency. And it works as document 364// describing when flags are expected to be set / unset. 365// `head` must be a doubly-linked gc list, although it's fine (expected!) if 366// the prev and next pointers are "polluted" with flags. 367// What's checked: 368// - The `head` pointers are not polluted. 369// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all 370// `set or clear, as specified by the 'flags' argument. 371// - The prev and next pointers are mutually consistent. 372static void 373validate_list(PyGC_Head *head, enum flagstates flags) 374{ 375 assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); 376 assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); 377 uintptr_t prev_value = 0, next_value = 0; 378 switch (flags) { 379 case collecting_clear_unreachable_clear: 380 break; 381 case collecting_set_unreachable_clear: 382 prev_value = PREV_MASK_COLLECTING; 383 break; 384 case collecting_clear_unreachable_set: 385 next_value = NEXT_MASK_UNREACHABLE; 386 break; 387 case collecting_set_unreachable_set: 388 prev_value = PREV_MASK_COLLECTING; 389 next_value = NEXT_MASK_UNREACHABLE; 390 break; 391 default: 392 assert(! "bad internal flags argument"); 393 } 394 PyGC_Head *prev = head; 395 PyGC_Head *gc = GC_NEXT(head); 396 while (gc != head) { 397 PyGC_Head *trueprev = GC_PREV(gc); 398 PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); 399 assert(truenext != NULL); 400 assert(trueprev == prev); 401 assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); 402 assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value); 403 prev = gc; 404 gc = truenext; 405 } 406 assert(prev == GC_PREV(head)); 407} 408#else 409#define validate_list(x, y) do{}while(0) 410#endif 411 412/*** end of list stuff ***/ 413 414 415/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and 416 * PREV_MASK_COLLECTING bit is set for all objects in containers. 417 */ 418static void 419update_refs(PyGC_Head *containers) 420{ 421 PyGC_Head *gc = GC_NEXT(containers); 422 for (; gc != containers; gc = GC_NEXT(gc)) { 423 gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); 424 /* Python's cyclic gc should never see an incoming refcount 425 * of 0: if something decref'ed to 0, it should have been 426 * deallocated immediately at that time. 427 * Possible cause (if the assert triggers): a tp_dealloc 428 * routine left a gc-aware object tracked during its teardown 429 * phase, and did something-- or allowed something to happen -- 430 * that called back into Python. gc can trigger then, and may 431 * see the still-tracked dying object. Before this assert 432 * was added, such mistakes went on to allow gc to try to 433 * delete the object again. In a debug build, that caused 434 * a mysterious segfault, when _Py_ForgetReference tried 435 * to remove the object from the doubly-linked list of all 436 * objects a second time. In a release build, an actual 437 * double deallocation occurred, which leads to corruption 438 * of the allocator's internal bookkeeping pointers. That's 439 * so serious that maybe this should be a release-build 440 * check instead of an assert? 441 */ 442 _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); 443 } 444} 445 446/* A traversal callback for subtract_refs. */ 447static int 448visit_decref(PyObject *op, void *parent) 449{ 450 _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op)); 451 452 if (_PyObject_IS_GC(op)) { 453 PyGC_Head *gc = AS_GC(op); 454 /* We're only interested in gc_refs for objects in the 455 * generation being collected, which can be recognized 456 * because only they have positive gc_refs. 457 */ 458 if (gc_is_collecting(gc)) { 459 gc_decref(gc); 460 } 461 } 462 return 0; 463} 464 465/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 466 * for all objects in containers, and is GC_REACHABLE for all tracked gc 467 * objects not in containers. The ones with gc_refs > 0 are directly 468 * reachable from outside containers, and so can't be collected. 469 */ 470static void 471subtract_refs(PyGC_Head *containers) 472{ 473 traverseproc traverse; 474 PyGC_Head *gc = GC_NEXT(containers); 475 for (; gc != containers; gc = GC_NEXT(gc)) { 476 PyObject *op = FROM_GC(gc); 477 traverse = Py_TYPE(op)->tp_traverse; 478 (void) traverse(op, 479 (visitproc)visit_decref, 480 op); 481 } 482} 483 484/* A traversal callback for move_unreachable. */ 485static int 486visit_reachable(PyObject *op, PyGC_Head *reachable) 487{ 488 if (!_PyObject_IS_GC(op)) { 489 return 0; 490 } 491 492 PyGC_Head *gc = AS_GC(op); 493 const Py_ssize_t gc_refs = gc_get_refs(gc); 494 495 // Ignore objects in other generation. 496 // This also skips objects "to the left" of the current position in 497 // move_unreachable's scan of the 'young' list - they've already been 498 // traversed, and no longer have the PREV_MASK_COLLECTING flag. 499 if (! gc_is_collecting(gc)) { 500 return 0; 501 } 502 // It would be a logic error elsewhere if the collecting flag were set on 503 // an untracked object. 504 assert(gc->_gc_next != 0); 505 506 if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { 507 /* This had gc_refs = 0 when move_unreachable got 508 * to it, but turns out it's reachable after all. 509 * Move it back to move_unreachable's 'young' list, 510 * and move_unreachable will eventually get to it 511 * again. 512 */ 513 // Manually unlink gc from unreachable list because the list functions 514 // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. 515 PyGC_Head *prev = GC_PREV(gc); 516 PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); 517 _PyObject_ASSERT(FROM_GC(prev), 518 prev->_gc_next & NEXT_MASK_UNREACHABLE); 519 _PyObject_ASSERT(FROM_GC(next), 520 next->_gc_next & NEXT_MASK_UNREACHABLE); 521 prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE 522 _PyGCHead_SET_PREV(next, prev); 523 524 gc_list_append(gc, reachable); 525 gc_set_refs(gc, 1); 526 } 527 else if (gc_refs == 0) { 528 /* This is in move_unreachable's 'young' list, but 529 * the traversal hasn't yet gotten to it. All 530 * we need to do is tell move_unreachable that it's 531 * reachable. 532 */ 533 gc_set_refs(gc, 1); 534 } 535 /* Else there's nothing to do. 536 * If gc_refs > 0, it must be in move_unreachable's 'young' 537 * list, and move_unreachable will eventually get to it. 538 */ 539 else { 540 _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small"); 541 } 542 return 0; 543} 544 545/* Move the unreachable objects from young to unreachable. After this, 546 * all objects in young don't have PREV_MASK_COLLECTING flag and 547 * unreachable have the flag. 548 * All objects in young after this are directly or indirectly reachable 549 * from outside the original young; and all objects in unreachable are 550 * not. 551 * 552 * This function restores _gc_prev pointer. young and unreachable are 553 * doubly linked list after this function. 554 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. 555 * So we can not gc_list_* functions for unreachable until we remove the flag. 556 */ 557static void 558move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) 559{ 560 // previous elem in the young list, used for restore gc_prev. 561 PyGC_Head *prev = young; 562 PyGC_Head *gc = GC_NEXT(young); 563 564 /* Invariants: all objects "to the left" of us in young are reachable 565 * (directly or indirectly) from outside the young list as it was at entry. 566 * 567 * All other objects from the original young "to the left" of us are in 568 * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the 569 * left of us in 'young' now have been scanned, and no objects here 570 * or to the right have been scanned yet. 571 */ 572 573 while (gc != young) { 574 if (gc_get_refs(gc)) { 575 /* gc is definitely reachable from outside the 576 * original 'young'. Mark it as such, and traverse 577 * its pointers to find any other objects that may 578 * be directly reachable from it. Note that the 579 * call to tp_traverse may append objects to young, 580 * so we have to wait until it returns to determine 581 * the next object to visit. 582 */ 583 PyObject *op = FROM_GC(gc); 584 traverseproc traverse = Py_TYPE(op)->tp_traverse; 585 _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0, 586 "refcount is too small"); 587 // NOTE: visit_reachable may change gc->_gc_next when 588 // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before! 589 (void) traverse(op, 590 (visitproc)visit_reachable, 591 (void *)young); 592 // relink gc_prev to prev element. 593 _PyGCHead_SET_PREV(gc, prev); 594 // gc is not COLLECTING state after here. 595 gc_clear_collecting(gc); 596 prev = gc; 597 } 598 else { 599 /* This *may* be unreachable. To make progress, 600 * assume it is. gc isn't directly reachable from 601 * any object we've already traversed, but may be 602 * reachable from an object we haven't gotten to yet. 603 * visit_reachable will eventually move gc back into 604 * young if that's so, and we'll see it again. 605 */ 606 // Move gc to unreachable. 607 // No need to gc->next->prev = prev because it is single linked. 608 prev->_gc_next = gc->_gc_next; 609 610 // We can't use gc_list_append() here because we use 611 // NEXT_MASK_UNREACHABLE here. 612 PyGC_Head *last = GC_PREV(unreachable); 613 // NOTE: Since all objects in unreachable set has 614 // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. 615 // But this may pollute the unreachable list head's 'next' pointer 616 // too. That's semantically senseless but expedient here - the 617 // damage is repaired when this function ends. 618 last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); 619 _PyGCHead_SET_PREV(gc, last); 620 gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); 621 unreachable->_gc_prev = (uintptr_t)gc; 622 } 623 gc = (PyGC_Head*)prev->_gc_next; 624 } 625 // young->_gc_prev must be last element remained in the list. 626 young->_gc_prev = (uintptr_t)prev; 627 // don't let the pollution of the list head's next pointer leak 628 unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; 629} 630 631static void 632untrack_tuples(PyGC_Head *head) 633{ 634 PyGC_Head *next, *gc = GC_NEXT(head); 635 while (gc != head) { 636 PyObject *op = FROM_GC(gc); 637 next = GC_NEXT(gc); 638 if (PyTuple_CheckExact(op)) { 639 _PyTuple_MaybeUntrack(op); 640 } 641 gc = next; 642 } 643} 644 645/* Try to untrack all currently tracked dictionaries */ 646static void 647untrack_dicts(PyGC_Head *head) 648{ 649 PyGC_Head *next, *gc = GC_NEXT(head); 650 while (gc != head) { 651 PyObject *op = FROM_GC(gc); 652 next = GC_NEXT(gc); 653 if (PyDict_CheckExact(op)) { 654 _PyDict_MaybeUntrack(op); 655 } 656 gc = next; 657 } 658} 659 660/* Return true if object has a pre-PEP 442 finalization method. */ 661static int 662has_legacy_finalizer(PyObject *op) 663{ 664 return Py_TYPE(op)->tp_del != NULL; 665} 666 667/* Move the objects in unreachable with tp_del slots into `finalizers`. 668 * 669 * This function also removes NEXT_MASK_UNREACHABLE flag 670 * from _gc_next in unreachable. 671 */ 672static void 673move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) 674{ 675 PyGC_Head *gc, *next; 676 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); 677 678 /* March over unreachable. Move objects with finalizers into 679 * `finalizers`. 680 */ 681 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { 682 PyObject *op = FROM_GC(gc); 683 684 _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); 685 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; 686 next = (PyGC_Head*)gc->_gc_next; 687 688 if (has_legacy_finalizer(op)) { 689 gc_clear_collecting(gc); 690 gc_list_move(gc, finalizers); 691 } 692 } 693} 694 695static inline void 696clear_unreachable_mask(PyGC_Head *unreachable) 697{ 698 /* Check that the list head does not have the unreachable bit set */ 699 assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); 700 701 PyGC_Head *gc, *next; 702 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); 703 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { 704 _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); 705 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; 706 next = (PyGC_Head*)gc->_gc_next; 707 } 708 validate_list(unreachable, collecting_set_unreachable_clear); 709} 710 711/* A traversal callback for move_legacy_finalizer_reachable. */ 712static int 713visit_move(PyObject *op, PyGC_Head *tolist) 714{ 715 if (_PyObject_IS_GC(op)) { 716 PyGC_Head *gc = AS_GC(op); 717 if (gc_is_collecting(gc)) { 718 gc_list_move(gc, tolist); 719 gc_clear_collecting(gc); 720 } 721 } 722 return 0; 723} 724 725/* Move objects that are reachable from finalizers, from the unreachable set 726 * into finalizers set. 727 */ 728static void 729move_legacy_finalizer_reachable(PyGC_Head *finalizers) 730{ 731 traverseproc traverse; 732 PyGC_Head *gc = GC_NEXT(finalizers); 733 for (; gc != finalizers; gc = GC_NEXT(gc)) { 734 /* Note that the finalizers list may grow during this. */ 735 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; 736 (void) traverse(FROM_GC(gc), 737 (visitproc)visit_move, 738 (void *)finalizers); 739 } 740} 741 742/* Clear all weakrefs to unreachable objects, and if such a weakref has a 743 * callback, invoke it if necessary. Note that it's possible for such 744 * weakrefs to be outside the unreachable set -- indeed, those are precisely 745 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for 746 * overview & some details. Some weakrefs with callbacks may be reclaimed 747 * directly by this routine; the number reclaimed is the return value. Other 748 * weakrefs with callbacks may be moved into the `old` generation. Objects 749 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in 750 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, 751 * no object in `unreachable` is weakly referenced anymore. 752 */ 753static int 754handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) 755{ 756 PyGC_Head *gc; 757 PyObject *op; /* generally FROM_GC(gc) */ 758 PyWeakReference *wr; /* generally a cast of op */ 759 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ 760 PyGC_Head *next; 761 int num_freed = 0; 762 763 gc_list_init(&wrcb_to_call); 764 765 /* Clear all weakrefs to the objects in unreachable. If such a weakref 766 * also has a callback, move it into `wrcb_to_call` if the callback 767 * needs to be invoked. Note that we cannot invoke any callbacks until 768 * all weakrefs to unreachable objects are cleared, lest the callback 769 * resurrect an unreachable object via a still-active weakref. We 770 * make another pass over wrcb_to_call, invoking callbacks, after this 771 * pass completes. 772 */ 773 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { 774 PyWeakReference **wrlist; 775 776 op = FROM_GC(gc); 777 next = GC_NEXT(gc); 778 779 if (PyWeakref_Check(op)) { 780 /* A weakref inside the unreachable set must be cleared. If we 781 * allow its callback to execute inside delete_garbage(), it 782 * could expose objects that have tp_clear already called on 783 * them. Or, it could resurrect unreachable objects. One way 784 * this can happen is if some container objects do not implement 785 * tp_traverse. Then, wr_object can be outside the unreachable 786 * set but can be deallocated as a result of breaking the 787 * reference cycle. If we don't clear the weakref, the callback 788 * will run and potentially cause a crash. See bpo-38006 for 789 * one example. 790 */ 791 _PyWeakref_ClearRef((PyWeakReference *)op); 792 } 793 794 if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) 795 continue; 796 797 /* It supports weakrefs. Does it have any? */ 798 wrlist = (PyWeakReference **) 799 _PyObject_GET_WEAKREFS_LISTPTR(op); 800 801 /* `op` may have some weakrefs. March over the list, clear 802 * all the weakrefs, and move the weakrefs with callbacks 803 * that must be called into wrcb_to_call. 804 */ 805 for (wr = *wrlist; wr != NULL; wr = *wrlist) { 806 PyGC_Head *wrasgc; /* AS_GC(wr) */ 807 808 /* _PyWeakref_ClearRef clears the weakref but leaves 809 * the callback pointer intact. Obscure: it also 810 * changes *wrlist. 811 */ 812 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); 813 _PyWeakref_ClearRef(wr); 814 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); 815 if (wr->wr_callback == NULL) { 816 /* no callback */ 817 continue; 818 } 819 820 /* Headache time. `op` is going away, and is weakly referenced by 821 * `wr`, which has a callback. Should the callback be invoked? If wr 822 * is also trash, no: 823 * 824 * 1. There's no need to call it. The object and the weakref are 825 * both going away, so it's legitimate to pretend the weakref is 826 * going away first. The user has to ensure a weakref outlives its 827 * referent if they want a guarantee that the wr callback will get 828 * invoked. 829 * 830 * 2. It may be catastrophic to call it. If the callback is also in 831 * cyclic trash (CT), then although the CT is unreachable from 832 * outside the current generation, CT may be reachable from the 833 * callback. Then the callback could resurrect insane objects. 834 * 835 * Since the callback is never needed and may be unsafe in this case, 836 * wr is simply left in the unreachable set. Note that because we 837 * already called _PyWeakref_ClearRef(wr), its callback will never 838 * trigger. 839 * 840 * OTOH, if wr isn't part of CT, we should invoke the callback: the 841 * weakref outlived the trash. Note that since wr isn't CT in this 842 * case, its callback can't be CT either -- wr acted as an external 843 * root to this generation, and therefore its callback did too. So 844 * nothing in CT is reachable from the callback either, so it's hard 845 * to imagine how calling it later could create a problem for us. wr 846 * is moved to wrcb_to_call in this case. 847 */ 848 if (gc_is_collecting(AS_GC(wr))) { 849 /* it should already have been cleared above */ 850 assert(wr->wr_object == Py_None); 851 continue; 852 } 853 854 /* Create a new reference so that wr can't go away 855 * before we can process it again. 856 */ 857 Py_INCREF(wr); 858 859 /* Move wr to wrcb_to_call, for the next pass. */ 860 wrasgc = AS_GC(wr); 861 assert(wrasgc != next); /* wrasgc is reachable, but 862 next isn't, so they can't 863 be the same */ 864 gc_list_move(wrasgc, &wrcb_to_call); 865 } 866 } 867 868 /* Invoke the callbacks we decided to honor. It's safe to invoke them 869 * because they can't reference unreachable objects. 870 */ 871 while (! gc_list_is_empty(&wrcb_to_call)) { 872 PyObject *temp; 873 PyObject *callback; 874 875 gc = (PyGC_Head*)wrcb_to_call._gc_next; 876 op = FROM_GC(gc); 877 _PyObject_ASSERT(op, PyWeakref_Check(op)); 878 wr = (PyWeakReference *)op; 879 callback = wr->wr_callback; 880 _PyObject_ASSERT(op, callback != NULL); 881 882 /* copy-paste of weakrefobject.c's handle_callback() */ 883 temp = PyObject_CallOneArg(callback, (PyObject *)wr); 884 if (temp == NULL) 885 PyErr_WriteUnraisable(callback); 886 else 887 Py_DECREF(temp); 888 889 /* Give up the reference we created in the first pass. When 890 * op's refcount hits 0 (which it may or may not do right now), 891 * op's tp_dealloc will decref op->wr_callback too. Note 892 * that the refcount probably will hit 0 now, and because this 893 * weakref was reachable to begin with, gc didn't already 894 * add it to its count of freed objects. Example: a reachable 895 * weak value dict maps some key to this reachable weakref. 896 * The callback removes this key->weakref mapping from the 897 * dict, leaving no other references to the weakref (excepting 898 * ours). 899 */ 900 Py_DECREF(op); 901 if (wrcb_to_call._gc_next == (uintptr_t)gc) { 902 /* object is still alive -- move it */ 903 gc_list_move(gc, old); 904 } 905 else { 906 ++num_freed; 907 } 908 } 909 910 return num_freed; 911} 912 913static void 914debug_cycle(const char *msg, PyObject *op) 915{ 916 PySys_FormatStderr("gc: %s <%s %p>\n", 917 msg, Py_TYPE(op)->tp_name, op); 918} 919 920/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable 921 * only from such cycles). 922 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module 923 * garbage list (a Python list), else only the objects in finalizers with 924 * __del__ methods are appended to garbage. All objects in finalizers are 925 * merged into the old list regardless. 926 */ 927static void 928handle_legacy_finalizers(PyThreadState *tstate, 929 GCState *gcstate, 930 PyGC_Head *finalizers, PyGC_Head *old) 931{ 932 assert(!_PyErr_Occurred(tstate)); 933 assert(gcstate->garbage != NULL); 934 935 PyGC_Head *gc = GC_NEXT(finalizers); 936 for (; gc != finalizers; gc = GC_NEXT(gc)) { 937 PyObject *op = FROM_GC(gc); 938 939 if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { 940 if (PyList_Append(gcstate->garbage, op) < 0) { 941 _PyErr_Clear(tstate); 942 break; 943 } 944 } 945 } 946 947 gc_list_merge(finalizers, old); 948} 949 950/* Run first-time finalizers (if any) on all the objects in collectable. 951 * Note that this may remove some (or even all) of the objects from the 952 * list, due to refcounts falling to 0. 953 */ 954static void 955finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) 956{ 957 destructor finalize; 958 PyGC_Head seen; 959 960 /* While we're going through the loop, `finalize(op)` may cause op, or 961 * other objects, to be reclaimed via refcounts falling to zero. So 962 * there's little we can rely on about the structure of the input 963 * `collectable` list across iterations. For safety, we always take the 964 * first object in that list and move it to a temporary `seen` list. 965 * If objects vanish from the `collectable` and `seen` lists we don't 966 * care. 967 */ 968 gc_list_init(&seen); 969 970 while (!gc_list_is_empty(collectable)) { 971 PyGC_Head *gc = GC_NEXT(collectable); 972 PyObject *op = FROM_GC(gc); 973 gc_list_move(gc, &seen); 974 if (!_PyGCHead_FINALIZED(gc) && 975 (finalize = Py_TYPE(op)->tp_finalize) != NULL) { 976 _PyGCHead_SET_FINALIZED(gc); 977 Py_INCREF(op); 978 finalize(op); 979 assert(!_PyErr_Occurred(tstate)); 980 Py_DECREF(op); 981 } 982 } 983 gc_list_merge(&seen, collectable); 984} 985 986/* Break reference cycles by clearing the containers involved. This is 987 * tricky business as the lists can be changing and we don't know which 988 * objects may be freed. It is possible I screwed something up here. 989 */ 990static void 991delete_garbage(PyThreadState *tstate, GCState *gcstate, 992 PyGC_Head *collectable, PyGC_Head *old) 993{ 994 assert(!_PyErr_Occurred(tstate)); 995 996 while (!gc_list_is_empty(collectable)) { 997 PyGC_Head *gc = GC_NEXT(collectable); 998 PyObject *op = FROM_GC(gc); 999 1000 _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0, 1001 "refcount is too small"); 1002 1003 if (gcstate->debug & DEBUG_SAVEALL) { 1004 assert(gcstate->garbage != NULL); 1005 if (PyList_Append(gcstate->garbage, op) < 0) { 1006 _PyErr_Clear(tstate); 1007 } 1008 } 1009 else { 1010 inquiry clear; 1011 if ((clear = Py_TYPE(op)->tp_clear) != NULL) { 1012 Py_INCREF(op); 1013 (void) clear(op); 1014 if (_PyErr_Occurred(tstate)) { 1015 _PyErr_WriteUnraisableMsg("in tp_clear of", 1016 (PyObject*)Py_TYPE(op)); 1017 } 1018 Py_DECREF(op); 1019 } 1020 } 1021 if (GC_NEXT(collectable) == gc) { 1022 /* object is still alive, move it, it may die later */ 1023 gc_clear_collecting(gc); 1024 gc_list_move(gc, old); 1025 } 1026 } 1027} 1028 1029/* Clear all free lists 1030 * All free lists are cleared during the collection of the highest generation. 1031 * Allocated items in the free list may keep a pymalloc arena occupied. 1032 * Clearing the free lists may give back memory to the OS earlier. 1033 */ 1034static void 1035clear_freelists(PyInterpreterState *interp) 1036{ 1037 _PyTuple_ClearFreeList(interp); 1038 _PyFloat_ClearFreeList(interp); 1039 _PyList_ClearFreeList(interp); 1040 _PyDict_ClearFreeList(interp); 1041 _PyAsyncGen_ClearFreeLists(interp); 1042 _PyContext_ClearFreeList(interp); 1043} 1044 1045// Show stats for objects in each generations 1046static void 1047show_stats_each_generations(GCState *gcstate) 1048{ 1049 char buf[100]; 1050 size_t pos = 0; 1051 1052 for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { 1053 pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, 1054 " %zd", 1055 gc_list_size(GEN_HEAD(gcstate, i))); 1056 } 1057 1058 PySys_FormatStderr( 1059 "gc: objects in each generation:%s\n" 1060 "gc: objects in permanent generation: %zd\n", 1061 buf, gc_list_size(&gcstate->permanent_generation.head)); 1062} 1063 1064/* Deduce which objects among "base" are unreachable from outside the list 1065 and move them to 'unreachable'. The process consist in the following steps: 1066 10671. Copy all reference counts to a different field (gc_prev is used to hold 1068 this copy to save memory). 10692. Traverse all objects in "base" and visit all referred objects using 1070 "tp_traverse" and for every visited object, subtract 1 to the reference 1071 count (the one that we copied in the previous step). After this step, all 1072 objects that can be reached directly from outside must have strictly positive 1073 reference count, while all unreachable objects must have a count of exactly 0. 10743. Identify all unreachable objects (the ones with 0 reference count) and move 1075 them to the "unreachable" list. This step also needs to move back to "base" all 1076 objects that were initially marked as unreachable but are referred transitively 1077 by the reachable objects (the ones with strictly positive reference count). 1078 1079Contracts: 1080 1081 * The "base" has to be a valid list with no mask set. 1082 1083 * The "unreachable" list must be uninitialized (this function calls 1084 gc_list_init over 'unreachable'). 1085 1086IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE 1087flag set but it does not clear it to skip unnecessary iteration. Before the 1088flag is cleared (for example, by using 'clear_unreachable_mask' function or 1089by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal 1090list and we can not use most gc_list_* functions for it. */ 1091static inline void 1092deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { 1093 validate_list(base, collecting_clear_unreachable_clear); 1094 /* Using ob_refcnt and gc_refs, calculate which objects in the 1095 * container set are reachable from outside the set (i.e., have a 1096 * refcount greater than 0 when all the references within the 1097 * set are taken into account). 1098 */ 1099 update_refs(base); // gc_prev is used for gc_refs 1100 subtract_refs(base); 1101 1102 /* Leave everything reachable from outside base in base, and move 1103 * everything else (in base) to unreachable. 1104 * 1105 * NOTE: This used to move the reachable objects into a reachable 1106 * set instead. But most things usually turn out to be reachable, 1107 * so it's more efficient to move the unreachable things. It "sounds slick" 1108 * to move the unreachable objects, until you think about it - the reason it 1109 * pays isn't actually obvious. 1110 * 1111 * Suppose we create objects A, B, C in that order. They appear in the young 1112 * generation in the same order. If B points to A, and C to B, and C is 1113 * reachable from outside, then the adjusted refcounts will be 0, 0, and 1 1114 * respectively. 1115 * 1116 * When move_unreachable finds A, A is moved to the unreachable list. The 1117 * same for B when it's first encountered. Then C is traversed, B is moved 1118 * _back_ to the reachable list. B is eventually traversed, and then A is 1119 * moved back to the reachable list. 1120 * 1121 * So instead of not moving at all, the reachable objects B and A are moved 1122 * twice each. Why is this a win? A straightforward algorithm to move the 1123 * reachable objects instead would move A, B, and C once each. 1124 * 1125 * The key is that this dance leaves the objects in order C, B, A - it's 1126 * reversed from the original order. On all _subsequent_ scans, none of 1127 * them will move. Since most objects aren't in cycles, this can save an 1128 * unbounded number of moves across an unbounded number of later collections. 1129 * It can cost more only the first time the chain is scanned. 1130 * 1131 * Drawback: move_unreachable is also used to find out what's still trash 1132 * after finalizers may resurrect objects. In _that_ case most unreachable 1133 * objects will remain unreachable, so it would be more efficient to move 1134 * the reachable objects instead. But this is a one-time cost, probably not 1135 * worth complicating the code to speed just a little. 1136 */ 1137 gc_list_init(unreachable); 1138 move_unreachable(base, unreachable); // gc_prev is pointer again 1139 validate_list(base, collecting_clear_unreachable_clear); 1140 validate_list(unreachable, collecting_set_unreachable_set); 1141} 1142 1143/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving 1144 them to 'old_generation' and placing the rest on 'still_unreachable'. 1145 1146 Contracts: 1147 * After this function 'unreachable' must not be used anymore and 'still_unreachable' 1148 will contain the objects that did not resurrect. 1149 1150 * The "still_unreachable" list must be uninitialized (this function calls 1151 gc_list_init over 'still_unreachable'). 1152 1153IMPORTANT: After a call to this function, the 'still_unreachable' set will have the 1154PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so 1155we can skip the expense of clearing the flag to avoid extra iteration. */ 1156static inline void 1157handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, 1158 PyGC_Head *old_generation) 1159{ 1160 // Remove the PREV_MASK_COLLECTING from unreachable 1161 // to prepare it for a new call to 'deduce_unreachable' 1162 gc_list_clear_collecting(unreachable); 1163 1164 // After the call to deduce_unreachable, the 'still_unreachable' set will 1165 // have the PREV_MARK_COLLECTING set, but the objects are going to be 1166 // removed so we can skip the expense of clearing the flag. 1167 PyGC_Head* resurrected = unreachable; 1168 deduce_unreachable(resurrected, still_unreachable); 1169 clear_unreachable_mask(still_unreachable); 1170 1171 // Move the resurrected objects to the old generation for future collection. 1172 gc_list_merge(resurrected, old_generation); 1173} 1174 1175/* This is the main function. Read this to understand how the 1176 * collection process works. */ 1177static Py_ssize_t 1178gc_collect_main(PyThreadState *tstate, int generation, 1179 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, 1180 int nofail) 1181{ 1182 int i; 1183 Py_ssize_t m = 0; /* # objects collected */ 1184 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ 1185 PyGC_Head *young; /* the generation we are examining */ 1186 PyGC_Head *old; /* next older generation */ 1187 PyGC_Head unreachable; /* non-problematic unreachable trash */ 1188 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ 1189 PyGC_Head *gc; 1190 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ 1191 GCState *gcstate = &tstate->interp->gc; 1192 1193 // gc_collect_main() must not be called before _PyGC_Init 1194 // or after _PyGC_Fini() 1195 assert(gcstate->garbage != NULL); 1196 assert(!_PyErr_Occurred(tstate)); 1197 1198 if (gcstate->debug & DEBUG_STATS) { 1199 PySys_WriteStderr("gc: collecting generation %d...\n", generation); 1200 show_stats_each_generations(gcstate); 1201 t1 = _PyTime_GetPerfCounter(); 1202 } 1203 1204 if (PyDTrace_GC_START_ENABLED()) 1205 PyDTrace_GC_START(generation); 1206 1207 /* update collection and allocation counters */ 1208 if (generation+1 < NUM_GENERATIONS) 1209 gcstate->generations[generation+1].count += 1; 1210 for (i = 0; i <= generation; i++) 1211 gcstate->generations[i].count = 0; 1212 1213 /* merge younger generations with one we are currently collecting */ 1214 for (i = 0; i < generation; i++) { 1215 gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); 1216 } 1217 1218 /* handy references */ 1219 young = GEN_HEAD(gcstate, generation); 1220 if (generation < NUM_GENERATIONS-1) 1221 old = GEN_HEAD(gcstate, generation+1); 1222 else 1223 old = young; 1224 validate_list(old, collecting_clear_unreachable_clear); 1225 1226 deduce_unreachable(young, &unreachable); 1227 1228 untrack_tuples(young); 1229 /* Move reachable objects to next generation. */ 1230 if (young != old) { 1231 if (generation == NUM_GENERATIONS - 2) { 1232 gcstate->long_lived_pending += gc_list_size(young); 1233 } 1234 gc_list_merge(young, old); 1235 } 1236 else { 1237 /* We only un-track dicts in full collections, to avoid quadratic 1238 dict build-up. See issue #14775. */ 1239 untrack_dicts(young); 1240 gcstate->long_lived_pending = 0; 1241 gcstate->long_lived_total = gc_list_size(young); 1242 } 1243 1244 /* All objects in unreachable are trash, but objects reachable from 1245 * legacy finalizers (e.g. tp_del) can't safely be deleted. 1246 */ 1247 gc_list_init(&finalizers); 1248 // NEXT_MASK_UNREACHABLE is cleared here. 1249 // After move_legacy_finalizers(), unreachable is normal list. 1250 move_legacy_finalizers(&unreachable, &finalizers); 1251 /* finalizers contains the unreachable objects with a legacy finalizer; 1252 * unreachable objects reachable *from* those are also uncollectable, 1253 * and we move those into the finalizers list too. 1254 */ 1255 move_legacy_finalizer_reachable(&finalizers); 1256 1257 validate_list(&finalizers, collecting_clear_unreachable_clear); 1258 validate_list(&unreachable, collecting_set_unreachable_clear); 1259 1260 /* Print debugging information. */ 1261 if (gcstate->debug & DEBUG_COLLECTABLE) { 1262 for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { 1263 debug_cycle("collectable", FROM_GC(gc)); 1264 } 1265 } 1266 1267 /* Clear weakrefs and invoke callbacks as necessary. */ 1268 m += handle_weakrefs(&unreachable, old); 1269 1270 validate_list(old, collecting_clear_unreachable_clear); 1271 validate_list(&unreachable, collecting_set_unreachable_clear); 1272 1273 /* Call tp_finalize on objects which have one. */ 1274 finalize_garbage(tstate, &unreachable); 1275 1276 /* Handle any objects that may have resurrected after the call 1277 * to 'finalize_garbage' and continue the collection with the 1278 * objects that are still unreachable */ 1279 PyGC_Head final_unreachable; 1280 handle_resurrected_objects(&unreachable, &final_unreachable, old); 1281 1282 /* Call tp_clear on objects in the final_unreachable set. This will cause 1283 * the reference cycles to be broken. It may also cause some objects 1284 * in finalizers to be freed. 1285 */ 1286 m += gc_list_size(&final_unreachable); 1287 delete_garbage(tstate, gcstate, &final_unreachable, old); 1288 1289 /* Collect statistics on uncollectable objects found and print 1290 * debugging information. */ 1291 for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { 1292 n++; 1293 if (gcstate->debug & DEBUG_UNCOLLECTABLE) 1294 debug_cycle("uncollectable", FROM_GC(gc)); 1295 } 1296 if (gcstate->debug & DEBUG_STATS) { 1297 double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); 1298 PySys_WriteStderr( 1299 "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", 1300 n+m, n, d); 1301 } 1302 1303 /* Append instances in the uncollectable set to a Python 1304 * reachable list of garbage. The programmer has to deal with 1305 * this if they insist on creating this type of structure. 1306 */ 1307 handle_legacy_finalizers(tstate, gcstate, &finalizers, old); 1308 validate_list(old, collecting_clear_unreachable_clear); 1309 1310 /* Clear free list only during the collection of the highest 1311 * generation */ 1312 if (generation == NUM_GENERATIONS-1) { 1313 clear_freelists(tstate->interp); 1314 } 1315 1316 if (_PyErr_Occurred(tstate)) { 1317 if (nofail) { 1318 _PyErr_Clear(tstate); 1319 } 1320 else { 1321 _PyErr_WriteUnraisableMsg("in garbage collection", NULL); 1322 } 1323 } 1324 1325 /* Update stats */ 1326 if (n_collected) { 1327 *n_collected = m; 1328 } 1329 if (n_uncollectable) { 1330 *n_uncollectable = n; 1331 } 1332 1333 struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; 1334 stats->collections++; 1335 stats->collected += m; 1336 stats->uncollectable += n; 1337 1338 if (PyDTrace_GC_DONE_ENABLED()) { 1339 PyDTrace_GC_DONE(n + m); 1340 } 1341 1342 assert(!_PyErr_Occurred(tstate)); 1343 return n + m; 1344} 1345 1346/* Invoke progress callbacks to notify clients that garbage collection 1347 * is starting or stopping 1348 */ 1349static void 1350invoke_gc_callback(PyThreadState *tstate, const char *phase, 1351 int generation, Py_ssize_t collected, 1352 Py_ssize_t uncollectable) 1353{ 1354 assert(!_PyErr_Occurred(tstate)); 1355 1356 /* we may get called very early */ 1357 GCState *gcstate = &tstate->interp->gc; 1358 if (gcstate->callbacks == NULL) { 1359 return; 1360 } 1361 1362 /* The local variable cannot be rebound, check it for sanity */ 1363 assert(PyList_CheckExact(gcstate->callbacks)); 1364 PyObject *info = NULL; 1365 if (PyList_GET_SIZE(gcstate->callbacks) != 0) { 1366 info = Py_BuildValue("{sisnsn}", 1367 "generation", generation, 1368 "collected", collected, 1369 "uncollectable", uncollectable); 1370 if (info == NULL) { 1371 PyErr_WriteUnraisable(NULL); 1372 return; 1373 } 1374 } 1375 for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) { 1376 PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); 1377 Py_INCREF(cb); /* make sure cb doesn't go away */ 1378 r = PyObject_CallFunction(cb, "sO", phase, info); 1379 if (r == NULL) { 1380 PyErr_WriteUnraisable(cb); 1381 } 1382 else { 1383 Py_DECREF(r); 1384 } 1385 Py_DECREF(cb); 1386 } 1387 Py_XDECREF(info); 1388 assert(!_PyErr_Occurred(tstate)); 1389} 1390 1391/* Perform garbage collection of a generation and invoke 1392 * progress callbacks. 1393 */ 1394static Py_ssize_t 1395gc_collect_with_callback(PyThreadState *tstate, int generation) 1396{ 1397 assert(!_PyErr_Occurred(tstate)); 1398 Py_ssize_t result, collected, uncollectable; 1399 invoke_gc_callback(tstate, "start", generation, 0, 0); 1400 result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0); 1401 invoke_gc_callback(tstate, "stop", generation, collected, uncollectable); 1402 assert(!_PyErr_Occurred(tstate)); 1403 return result; 1404} 1405 1406static Py_ssize_t 1407gc_collect_generations(PyThreadState *tstate) 1408{ 1409 GCState *gcstate = &tstate->interp->gc; 1410 /* Find the oldest generation (highest numbered) where the count 1411 * exceeds the threshold. Objects in the that generation and 1412 * generations younger than it will be collected. */ 1413 Py_ssize_t n = 0; 1414 for (int i = NUM_GENERATIONS-1; i >= 0; i--) { 1415 if (gcstate->generations[i].count > gcstate->generations[i].threshold) { 1416 /* Avoid quadratic performance degradation in number 1417 of tracked objects (see also issue #4074): 1418 1419 To limit the cost of garbage collection, there are two strategies; 1420 - make each collection faster, e.g. by scanning fewer objects 1421 - do less collections 1422 This heuristic is about the latter strategy. 1423 1424 In addition to the various configurable thresholds, we only trigger a 1425 full collection if the ratio 1426 1427 long_lived_pending / long_lived_total 1428 1429 is above a given value (hardwired to 25%). 1430 1431 The reason is that, while "non-full" collections (i.e., collections of 1432 the young and middle generations) will always examine roughly the same 1433 number of objects -- determined by the aforementioned thresholds --, 1434 the cost of a full collection is proportional to the total number of 1435 long-lived objects, which is virtually unbounded. 1436 1437 Indeed, it has been remarked that doing a full collection every 1438 <constant number> of object creations entails a dramatic performance 1439 degradation in workloads which consist in creating and storing lots of 1440 long-lived objects (e.g. building a large list of GC-tracked objects would 1441 show quadratic performance, instead of linear as expected: see issue #4074). 1442 1443 Using the above ratio, instead, yields amortized linear performance in 1444 the total number of objects (the effect of which can be summarized 1445 thusly: "each full garbage collection is more and more costly as the 1446 number of objects grows, but we do fewer and fewer of them"). 1447 1448 This heuristic was suggested by Martin von Löwis on python-dev in 1449 June 2008. His original analysis and proposal can be found at: 1450 http://mail.python.org/pipermail/python-dev/2008-June/080579.html 1451 */ 1452 if (i == NUM_GENERATIONS - 1 1453 && gcstate->long_lived_pending < gcstate->long_lived_total / 4) 1454 continue; 1455 n = gc_collect_with_callback(tstate, i); 1456 break; 1457 } 1458 } 1459 return n; 1460} 1461 1462#include "clinic/gcmodule.c.h" 1463 1464/*[clinic input] 1465gc.enable 1466 1467Enable automatic garbage collection. 1468[clinic start generated code]*/ 1469 1470static PyObject * 1471gc_enable_impl(PyObject *module) 1472/*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/ 1473{ 1474 PyGC_Enable(); 1475 Py_RETURN_NONE; 1476} 1477 1478/*[clinic input] 1479gc.disable 1480 1481Disable automatic garbage collection. 1482[clinic start generated code]*/ 1483 1484static PyObject * 1485gc_disable_impl(PyObject *module) 1486/*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/ 1487{ 1488 PyGC_Disable(); 1489 Py_RETURN_NONE; 1490} 1491 1492/*[clinic input] 1493gc.isenabled -> bool 1494 1495Returns true if automatic garbage collection is enabled. 1496[clinic start generated code]*/ 1497 1498static int 1499gc_isenabled_impl(PyObject *module) 1500/*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/ 1501{ 1502 return PyGC_IsEnabled(); 1503} 1504 1505/*[clinic input] 1506gc.collect -> Py_ssize_t 1507 1508 generation: int(c_default="NUM_GENERATIONS - 1") = 2 1509 1510Run the garbage collector. 1511 1512With no arguments, run a full collection. The optional argument 1513may be an integer specifying which generation to collect. A ValueError 1514is raised if the generation number is invalid. 1515 1516The number of unreachable objects is returned. 1517[clinic start generated code]*/ 1518 1519static Py_ssize_t 1520gc_collect_impl(PyObject *module, int generation) 1521/*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/ 1522{ 1523 PyThreadState *tstate = _PyThreadState_GET(); 1524 1525 if (generation < 0 || generation >= NUM_GENERATIONS) { 1526 _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation"); 1527 return -1; 1528 } 1529 1530 GCState *gcstate = &tstate->interp->gc; 1531 Py_ssize_t n; 1532 if (gcstate->collecting) { 1533 /* already collecting, don't do anything */ 1534 n = 0; 1535 } 1536 else { 1537 gcstate->collecting = 1; 1538 n = gc_collect_with_callback(tstate, generation); 1539 gcstate->collecting = 0; 1540 } 1541 return n; 1542} 1543 1544/*[clinic input] 1545gc.set_debug 1546 1547 flags: int 1548 An integer that can have the following bits turned on: 1549 DEBUG_STATS - Print statistics during collection. 1550 DEBUG_COLLECTABLE - Print collectable objects found. 1551 DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects 1552 found. 1553 DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them. 1554 DEBUG_LEAK - Debug leaking programs (everything but STATS). 1555 / 1556 1557Set the garbage collection debugging flags. 1558 1559Debugging information is written to sys.stderr. 1560[clinic start generated code]*/ 1561 1562static PyObject * 1563gc_set_debug_impl(PyObject *module, int flags) 1564/*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/ 1565{ 1566 GCState *gcstate = get_gc_state(); 1567 gcstate->debug = flags; 1568 Py_RETURN_NONE; 1569} 1570 1571/*[clinic input] 1572gc.get_debug -> int 1573 1574Get the garbage collection debugging flags. 1575[clinic start generated code]*/ 1576 1577static int 1578gc_get_debug_impl(PyObject *module) 1579/*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/ 1580{ 1581 GCState *gcstate = get_gc_state(); 1582 return gcstate->debug; 1583} 1584 1585PyDoc_STRVAR(gc_set_thresh__doc__, 1586"set_threshold(threshold0, [threshold1, threshold2]) -> None\n" 1587"\n" 1588"Sets the collection thresholds. Setting threshold0 to zero disables\n" 1589"collection.\n"); 1590 1591static PyObject * 1592gc_set_threshold(PyObject *self, PyObject *args) 1593{ 1594 GCState *gcstate = get_gc_state(); 1595 if (!PyArg_ParseTuple(args, "i|ii:set_threshold", 1596 &gcstate->generations[0].threshold, 1597 &gcstate->generations[1].threshold, 1598 &gcstate->generations[2].threshold)) 1599 return NULL; 1600 for (int i = 3; i < NUM_GENERATIONS; i++) { 1601 /* generations higher than 2 get the same threshold */ 1602 gcstate->generations[i].threshold = gcstate->generations[2].threshold; 1603 } 1604 Py_RETURN_NONE; 1605} 1606 1607/*[clinic input] 1608gc.get_threshold 1609 1610Return the current collection thresholds. 1611[clinic start generated code]*/ 1612 1613static PyObject * 1614gc_get_threshold_impl(PyObject *module) 1615/*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/ 1616{ 1617 GCState *gcstate = get_gc_state(); 1618 return Py_BuildValue("(iii)", 1619 gcstate->generations[0].threshold, 1620 gcstate->generations[1].threshold, 1621 gcstate->generations[2].threshold); 1622} 1623 1624/*[clinic input] 1625gc.get_count 1626 1627Return a three-tuple of the current collection counts. 1628[clinic start generated code]*/ 1629 1630static PyObject * 1631gc_get_count_impl(PyObject *module) 1632/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/ 1633{ 1634 GCState *gcstate = get_gc_state(); 1635 return Py_BuildValue("(iii)", 1636 gcstate->generations[0].count, 1637 gcstate->generations[1].count, 1638 gcstate->generations[2].count); 1639} 1640 1641static int 1642referrersvisit(PyObject* obj, PyObject *objs) 1643{ 1644 Py_ssize_t i; 1645 for (i = 0; i < PyTuple_GET_SIZE(objs); i++) 1646 if (PyTuple_GET_ITEM(objs, i) == obj) 1647 return 1; 1648 return 0; 1649} 1650 1651static int 1652gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) 1653{ 1654 PyGC_Head *gc; 1655 PyObject *obj; 1656 traverseproc traverse; 1657 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { 1658 obj = FROM_GC(gc); 1659 traverse = Py_TYPE(obj)->tp_traverse; 1660 if (obj == objs || obj == resultlist) 1661 continue; 1662 if (traverse(obj, (visitproc)referrersvisit, objs)) { 1663 if (PyList_Append(resultlist, obj) < 0) 1664 return 0; /* error */ 1665 } 1666 } 1667 return 1; /* no error */ 1668} 1669 1670PyDoc_STRVAR(gc_get_referrers__doc__, 1671"get_referrers(*objs) -> list\n\ 1672Return the list of objects that directly refer to any of objs."); 1673 1674static PyObject * 1675gc_get_referrers(PyObject *self, PyObject *args) 1676{ 1677 if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) { 1678 return NULL; 1679 } 1680 1681 PyObject *result = PyList_New(0); 1682 if (!result) { 1683 return NULL; 1684 } 1685 1686 GCState *gcstate = get_gc_state(); 1687 for (int i = 0; i < NUM_GENERATIONS; i++) { 1688 if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) { 1689 Py_DECREF(result); 1690 return NULL; 1691 } 1692 } 1693 return result; 1694} 1695 1696/* Append obj to list; return true if error (out of memory), false if OK. */ 1697static int 1698referentsvisit(PyObject *obj, PyObject *list) 1699{ 1700 return PyList_Append(list, obj) < 0; 1701} 1702 1703PyDoc_STRVAR(gc_get_referents__doc__, 1704"get_referents(*objs) -> list\n\ 1705Return the list of objects that are directly referred to by objs."); 1706 1707static PyObject * 1708gc_get_referents(PyObject *self, PyObject *args) 1709{ 1710 Py_ssize_t i; 1711 if (PySys_Audit("gc.get_referents", "(O)", args) < 0) { 1712 return NULL; 1713 } 1714 PyObject *result = PyList_New(0); 1715 1716 if (result == NULL) 1717 return NULL; 1718 1719 for (i = 0; i < PyTuple_GET_SIZE(args); i++) { 1720 traverseproc traverse; 1721 PyObject *obj = PyTuple_GET_ITEM(args, i); 1722 1723 if (!_PyObject_IS_GC(obj)) 1724 continue; 1725 traverse = Py_TYPE(obj)->tp_traverse; 1726 if (! traverse) 1727 continue; 1728 if (traverse(obj, (visitproc)referentsvisit, result)) { 1729 Py_DECREF(result); 1730 return NULL; 1731 } 1732 } 1733 return result; 1734} 1735 1736/*[clinic input] 1737gc.get_objects 1738 generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None 1739 Generation to extract the objects from. 1740 1741Return a list of objects tracked by the collector (excluding the list returned). 1742 1743If generation is not None, return only the objects tracked by the collector 1744that are in that generation. 1745[clinic start generated code]*/ 1746 1747static PyObject * 1748gc_get_objects_impl(PyObject *module, Py_ssize_t generation) 1749/*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/ 1750{ 1751 PyThreadState *tstate = _PyThreadState_GET(); 1752 int i; 1753 PyObject* result; 1754 GCState *gcstate = &tstate->interp->gc; 1755 1756 if (PySys_Audit("gc.get_objects", "n", generation) < 0) { 1757 return NULL; 1758 } 1759 1760 result = PyList_New(0); 1761 if (result == NULL) { 1762 return NULL; 1763 } 1764 1765 /* If generation is passed, we extract only that generation */ 1766 if (generation != -1) { 1767 if (generation >= NUM_GENERATIONS) { 1768 _PyErr_Format(tstate, PyExc_ValueError, 1769 "generation parameter must be less than the number of " 1770 "available generations (%i)", 1771 NUM_GENERATIONS); 1772 goto error; 1773 } 1774 1775 if (generation < 0) { 1776 _PyErr_SetString(tstate, PyExc_ValueError, 1777 "generation parameter cannot be negative"); 1778 goto error; 1779 } 1780 1781 if (append_objects(result, GEN_HEAD(gcstate, generation))) { 1782 goto error; 1783 } 1784 1785 return result; 1786 } 1787 1788 /* If generation is not passed or None, get all objects from all generations */ 1789 for (i = 0; i < NUM_GENERATIONS; i++) { 1790 if (append_objects(result, GEN_HEAD(gcstate, i))) { 1791 goto error; 1792 } 1793 } 1794 return result; 1795 1796error: 1797 Py_DECREF(result); 1798 return NULL; 1799} 1800 1801/*[clinic input] 1802gc.get_stats 1803 1804Return a list of dictionaries containing per-generation statistics. 1805[clinic start generated code]*/ 1806 1807static PyObject * 1808gc_get_stats_impl(PyObject *module) 1809/*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/ 1810{ 1811 int i; 1812 struct gc_generation_stats stats[NUM_GENERATIONS], *st; 1813 1814 /* To get consistent values despite allocations while constructing 1815 the result list, we use a snapshot of the running stats. */ 1816 GCState *gcstate = get_gc_state(); 1817 for (i = 0; i < NUM_GENERATIONS; i++) { 1818 stats[i] = gcstate->generation_stats[i]; 1819 } 1820 1821 PyObject *result = PyList_New(0); 1822 if (result == NULL) 1823 return NULL; 1824 1825 for (i = 0; i < NUM_GENERATIONS; i++) { 1826 PyObject *dict; 1827 st = &stats[i]; 1828 dict = Py_BuildValue("{snsnsn}", 1829 "collections", st->collections, 1830 "collected", st->collected, 1831 "uncollectable", st->uncollectable 1832 ); 1833 if (dict == NULL) 1834 goto error; 1835 if (PyList_Append(result, dict)) { 1836 Py_DECREF(dict); 1837 goto error; 1838 } 1839 Py_DECREF(dict); 1840 } 1841 return result; 1842 1843error: 1844 Py_XDECREF(result); 1845 return NULL; 1846} 1847 1848 1849/*[clinic input] 1850gc.is_tracked 1851 1852 obj: object 1853 / 1854 1855Returns true if the object is tracked by the garbage collector. 1856 1857Simple atomic objects will return false. 1858[clinic start generated code]*/ 1859 1860static PyObject * 1861gc_is_tracked(PyObject *module, PyObject *obj) 1862/*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/ 1863{ 1864 PyObject *result; 1865 1866 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) 1867 result = Py_True; 1868 else 1869 result = Py_False; 1870 Py_INCREF(result); 1871 return result; 1872} 1873 1874/*[clinic input] 1875gc.is_finalized 1876 1877 obj: object 1878 / 1879 1880Returns true if the object has been already finalized by the GC. 1881[clinic start generated code]*/ 1882 1883static PyObject * 1884gc_is_finalized(PyObject *module, PyObject *obj) 1885/*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/ 1886{ 1887 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) { 1888 Py_RETURN_TRUE; 1889 } 1890 Py_RETURN_FALSE; 1891} 1892 1893/*[clinic input] 1894gc.freeze 1895 1896Freeze all current tracked objects and ignore them for future collections. 1897 1898This can be used before a POSIX fork() call to make the gc copy-on-write friendly. 1899Note: collection before a POSIX fork() call may free pages for future allocation 1900which can cause copy-on-write. 1901[clinic start generated code]*/ 1902 1903static PyObject * 1904gc_freeze_impl(PyObject *module) 1905/*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/ 1906{ 1907 GCState *gcstate = get_gc_state(); 1908 for (int i = 0; i < NUM_GENERATIONS; ++i) { 1909 gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head); 1910 gcstate->generations[i].count = 0; 1911 } 1912 Py_RETURN_NONE; 1913} 1914 1915/*[clinic input] 1916gc.unfreeze 1917 1918Unfreeze all objects in the permanent generation. 1919 1920Put all objects in the permanent generation back into oldest generation. 1921[clinic start generated code]*/ 1922 1923static PyObject * 1924gc_unfreeze_impl(PyObject *module) 1925/*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/ 1926{ 1927 GCState *gcstate = get_gc_state(); 1928 gc_list_merge(&gcstate->permanent_generation.head, 1929 GEN_HEAD(gcstate, NUM_GENERATIONS-1)); 1930 Py_RETURN_NONE; 1931} 1932 1933/*[clinic input] 1934gc.get_freeze_count -> Py_ssize_t 1935 1936Return the number of objects in the permanent generation. 1937[clinic start generated code]*/ 1938 1939static Py_ssize_t 1940gc_get_freeze_count_impl(PyObject *module) 1941/*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/ 1942{ 1943 GCState *gcstate = get_gc_state(); 1944 return gc_list_size(&gcstate->permanent_generation.head); 1945} 1946 1947 1948PyDoc_STRVAR(gc__doc__, 1949"This module provides access to the garbage collector for reference cycles.\n" 1950"\n" 1951"enable() -- Enable automatic garbage collection.\n" 1952"disable() -- Disable automatic garbage collection.\n" 1953"isenabled() -- Returns true if automatic collection is enabled.\n" 1954"collect() -- Do a full collection right now.\n" 1955"get_count() -- Return the current collection counts.\n" 1956"get_stats() -- Return list of dictionaries containing per-generation stats.\n" 1957"set_debug() -- Set debugging flags.\n" 1958"get_debug() -- Get debugging flags.\n" 1959"set_threshold() -- Set the collection thresholds.\n" 1960"get_threshold() -- Return the current the collection thresholds.\n" 1961"get_objects() -- Return a list of all objects tracked by the collector.\n" 1962"is_tracked() -- Returns true if a given object is tracked.\n" 1963"is_finalized() -- Returns true if a given object has been already finalized.\n" 1964"get_referrers() -- Return the list of objects that refer to an object.\n" 1965"get_referents() -- Return the list of objects that an object refers to.\n" 1966"freeze() -- Freeze all tracked objects and ignore them for future collections.\n" 1967"unfreeze() -- Unfreeze all objects in the permanent generation.\n" 1968"get_freeze_count() -- Return the number of objects in the permanent generation.\n"); 1969 1970static PyMethodDef GcMethods[] = { 1971 GC_ENABLE_METHODDEF 1972 GC_DISABLE_METHODDEF 1973 GC_ISENABLED_METHODDEF 1974 GC_SET_DEBUG_METHODDEF 1975 GC_GET_DEBUG_METHODDEF 1976 GC_GET_COUNT_METHODDEF 1977 {"set_threshold", gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__}, 1978 GC_GET_THRESHOLD_METHODDEF 1979 GC_COLLECT_METHODDEF 1980 GC_GET_OBJECTS_METHODDEF 1981 GC_GET_STATS_METHODDEF 1982 GC_IS_TRACKED_METHODDEF 1983 GC_IS_FINALIZED_METHODDEF 1984 {"get_referrers", gc_get_referrers, METH_VARARGS, 1985 gc_get_referrers__doc__}, 1986 {"get_referents", gc_get_referents, METH_VARARGS, 1987 gc_get_referents__doc__}, 1988 GC_FREEZE_METHODDEF 1989 GC_UNFREEZE_METHODDEF 1990 GC_GET_FREEZE_COUNT_METHODDEF 1991 {NULL, NULL} /* Sentinel */ 1992}; 1993 1994static int 1995gcmodule_exec(PyObject *module) 1996{ 1997 GCState *gcstate = get_gc_state(); 1998 1999 /* garbage and callbacks are initialized by _PyGC_Init() early in 2000 * interpreter lifecycle. */ 2001 assert(gcstate->garbage != NULL); 2002 if (PyModule_AddObjectRef(module, "garbage", gcstate->garbage) < 0) { 2003 return -1; 2004 } 2005 assert(gcstate->callbacks != NULL); 2006 if (PyModule_AddObjectRef(module, "callbacks", gcstate->callbacks) < 0) { 2007 return -1; 2008 } 2009 2010#define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; } 2011 ADD_INT(DEBUG_STATS); 2012 ADD_INT(DEBUG_COLLECTABLE); 2013 ADD_INT(DEBUG_UNCOLLECTABLE); 2014 ADD_INT(DEBUG_SAVEALL); 2015 ADD_INT(DEBUG_LEAK); 2016#undef ADD_INT 2017 return 0; 2018} 2019 2020static PyModuleDef_Slot gcmodule_slots[] = { 2021 {Py_mod_exec, gcmodule_exec}, 2022 {0, NULL} 2023}; 2024 2025static struct PyModuleDef gcmodule = { 2026 PyModuleDef_HEAD_INIT, 2027 .m_name = "gc", 2028 .m_doc = gc__doc__, 2029 .m_size = 0, // per interpreter state, see: get_gc_state() 2030 .m_methods = GcMethods, 2031 .m_slots = gcmodule_slots 2032}; 2033 2034PyMODINIT_FUNC 2035PyInit_gc(void) 2036{ 2037 return PyModuleDef_Init(&gcmodule); 2038} 2039 2040/* C API for controlling the state of the garbage collector */ 2041int 2042PyGC_Enable(void) 2043{ 2044 GCState *gcstate = get_gc_state(); 2045 int old_state = gcstate->enabled; 2046 gcstate->enabled = 1; 2047 return old_state; 2048} 2049 2050int 2051PyGC_Disable(void) 2052{ 2053 GCState *gcstate = get_gc_state(); 2054 int old_state = gcstate->enabled; 2055 gcstate->enabled = 0; 2056 return old_state; 2057} 2058 2059int 2060PyGC_IsEnabled(void) 2061{ 2062 GCState *gcstate = get_gc_state(); 2063 return gcstate->enabled; 2064} 2065 2066/* Public API to invoke gc.collect() from C */ 2067Py_ssize_t 2068PyGC_Collect(void) 2069{ 2070 PyThreadState *tstate = _PyThreadState_GET(); 2071 GCState *gcstate = &tstate->interp->gc; 2072 2073 if (!gcstate->enabled) { 2074 return 0; 2075 } 2076 2077 Py_ssize_t n; 2078 if (gcstate->collecting) { 2079 /* already collecting, don't do anything */ 2080 n = 0; 2081 } 2082 else { 2083 PyObject *exc, *value, *tb; 2084 gcstate->collecting = 1; 2085 _PyErr_Fetch(tstate, &exc, &value, &tb); 2086 n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1); 2087 _PyErr_Restore(tstate, exc, value, tb); 2088 gcstate->collecting = 0; 2089 } 2090 2091 return n; 2092} 2093 2094Py_ssize_t 2095_PyGC_CollectNoFail(PyThreadState *tstate) 2096{ 2097 /* Ideally, this function is only called on interpreter shutdown, 2098 and therefore not recursively. Unfortunately, when there are daemon 2099 threads, a daemon thread can start a cyclic garbage collection 2100 during interpreter shutdown (and then never finish it). 2101 See http://bugs.python.org/issue8713#msg195178 for an example. 2102 */ 2103 GCState *gcstate = &tstate->interp->gc; 2104 if (gcstate->collecting) { 2105 return 0; 2106 } 2107 2108 Py_ssize_t n; 2109 gcstate->collecting = 1; 2110 n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1); 2111 gcstate->collecting = 0; 2112 return n; 2113} 2114 2115void 2116_PyGC_DumpShutdownStats(PyInterpreterState *interp) 2117{ 2118 GCState *gcstate = &interp->gc; 2119 if (!(gcstate->debug & DEBUG_SAVEALL) 2120 && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) { 2121 const char *message; 2122 if (gcstate->debug & DEBUG_UNCOLLECTABLE) 2123 message = "gc: %zd uncollectable objects at " \ 2124 "shutdown"; 2125 else 2126 message = "gc: %zd uncollectable objects at " \ 2127 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; 2128 /* PyErr_WarnFormat does too many things and we are at shutdown, 2129 the warnings module's dependencies (e.g. linecache) may be gone 2130 already. */ 2131 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, 2132 "gc", NULL, message, 2133 PyList_GET_SIZE(gcstate->garbage))) 2134 PyErr_WriteUnraisable(NULL); 2135 if (gcstate->debug & DEBUG_UNCOLLECTABLE) { 2136 PyObject *repr = NULL, *bytes = NULL; 2137 repr = PyObject_Repr(gcstate->garbage); 2138 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) 2139 PyErr_WriteUnraisable(gcstate->garbage); 2140 else { 2141 PySys_WriteStderr( 2142 " %s\n", 2143 PyBytes_AS_STRING(bytes) 2144 ); 2145 } 2146 Py_XDECREF(repr); 2147 Py_XDECREF(bytes); 2148 } 2149 } 2150} 2151 2152 2153static void 2154gc_fini_untrack(PyGC_Head *list) 2155{ 2156 PyGC_Head *gc; 2157 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(list)) { 2158 PyObject *op = FROM_GC(gc); 2159 _PyObject_GC_UNTRACK(op); 2160 // gh-92036: If a deallocator function expect the object to be tracked 2161 // by the GC (ex: func_dealloc()), it can crash if called on an object 2162 // which is no longer tracked by the GC. Leak one strong reference on 2163 // purpose so the object is never deleted and its deallocator is not 2164 // called. 2165 Py_INCREF(op); 2166 } 2167} 2168 2169 2170void 2171_PyGC_Fini(PyInterpreterState *interp) 2172{ 2173 GCState *gcstate = &interp->gc; 2174 Py_CLEAR(gcstate->garbage); 2175 Py_CLEAR(gcstate->callbacks); 2176 2177 if (!_Py_IsMainInterpreter(interp)) { 2178 // bpo-46070: Explicitly untrack all objects currently tracked by the 2179 // GC. Otherwise, if an object is used later by another interpreter, 2180 // calling PyObject_GC_UnTrack() on the object crashs if the previous 2181 // or the next object of the PyGC_Head structure became a dangling 2182 // pointer. 2183 for (int i = 0; i < NUM_GENERATIONS; i++) { 2184 PyGC_Head *gen = GEN_HEAD(gcstate, i); 2185 gc_fini_untrack(gen); 2186 } 2187 } 2188} 2189 2190/* for debugging */ 2191void 2192_PyGC_Dump(PyGC_Head *g) 2193{ 2194 _PyObject_Dump(FROM_GC(g)); 2195} 2196 2197 2198#ifdef Py_DEBUG 2199static int 2200visit_validate(PyObject *op, void *parent_raw) 2201{ 2202 PyObject *parent = _PyObject_CAST(parent_raw); 2203 if (_PyObject_IsFreed(op)) { 2204 _PyObject_ASSERT_FAILED_MSG(parent, 2205 "PyObject_GC_Track() object is not valid"); 2206 } 2207 return 0; 2208} 2209#endif 2210 2211 2212/* extension modules might be compiled with GC support so these 2213 functions must always be available */ 2214 2215void 2216PyObject_GC_Track(void *op_raw) 2217{ 2218 PyObject *op = _PyObject_CAST(op_raw); 2219 if (_PyObject_GC_IS_TRACKED(op)) { 2220 _PyObject_ASSERT_FAILED_MSG(op, 2221 "object already tracked " 2222 "by the garbage collector"); 2223 } 2224 _PyObject_GC_TRACK(op); 2225 2226#ifdef Py_DEBUG 2227 /* Check that the object is valid: validate objects traversed 2228 by tp_traverse() */ 2229 traverseproc traverse = Py_TYPE(op)->tp_traverse; 2230 (void)traverse(op, visit_validate, op); 2231#endif 2232} 2233 2234void 2235PyObject_GC_UnTrack(void *op_raw) 2236{ 2237 PyObject *op = _PyObject_CAST(op_raw); 2238 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to 2239 * call PyObject_GC_UnTrack twice on an object. 2240 */ 2241 if (_PyObject_GC_IS_TRACKED(op)) { 2242 _PyObject_GC_UNTRACK(op); 2243 } 2244} 2245 2246int 2247PyObject_IS_GC(PyObject *obj) 2248{ 2249 return _PyObject_IS_GC(obj); 2250} 2251 2252void 2253_PyObject_GC_Link(PyObject *op) 2254{ 2255 PyGC_Head *g = AS_GC(op); 2256 assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0); // g must be correctly aligned 2257 2258 PyThreadState *tstate = _PyThreadState_GET(); 2259 GCState *gcstate = &tstate->interp->gc; 2260 g->_gc_next = 0; 2261 g->_gc_prev = 0; 2262 gcstate->generations[0].count++; /* number of allocated GC objects */ 2263 if (gcstate->generations[0].count > gcstate->generations[0].threshold && 2264 gcstate->enabled && 2265 gcstate->generations[0].threshold && 2266 !gcstate->collecting && 2267 !_PyErr_Occurred(tstate)) 2268 { 2269 gcstate->collecting = 1; 2270 gc_collect_generations(tstate); 2271 gcstate->collecting = 0; 2272 } 2273} 2274 2275static PyObject * 2276gc_alloc(size_t basicsize, size_t presize) 2277{ 2278 PyThreadState *tstate = _PyThreadState_GET(); 2279 if (basicsize > PY_SSIZE_T_MAX - presize) { 2280 return _PyErr_NoMemory(tstate); 2281 } 2282 size_t size = presize + basicsize; 2283 char *mem = PyObject_Malloc(size); 2284 if (mem == NULL) { 2285 return _PyErr_NoMemory(tstate); 2286 } 2287 ((PyObject **)mem)[0] = NULL; 2288 ((PyObject **)mem)[1] = NULL; 2289 PyObject *op = (PyObject *)(mem + presize); 2290 _PyObject_GC_Link(op); 2291 return op; 2292} 2293 2294PyObject * 2295_PyObject_GC_New(PyTypeObject *tp) 2296{ 2297 size_t presize = _PyType_PreHeaderSize(tp); 2298 PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize); 2299 if (op == NULL) { 2300 return NULL; 2301 } 2302 _PyObject_Init(op, tp); 2303 return op; 2304} 2305 2306PyVarObject * 2307_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) 2308{ 2309 size_t size; 2310 PyVarObject *op; 2311 2312 if (nitems < 0) { 2313 PyErr_BadInternalCall(); 2314 return NULL; 2315 } 2316 size_t presize = _PyType_PreHeaderSize(tp); 2317 size = _PyObject_VAR_SIZE(tp, nitems); 2318 op = (PyVarObject *)gc_alloc(size, presize); 2319 if (op == NULL) { 2320 return NULL; 2321 } 2322 _PyObject_InitVar(op, tp, nitems); 2323 return op; 2324} 2325 2326PyVarObject * 2327_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) 2328{ 2329 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); 2330 _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); 2331 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) { 2332 return (PyVarObject *)PyErr_NoMemory(); 2333 } 2334 2335 PyGC_Head *g = AS_GC(op); 2336 g = (PyGC_Head *)PyObject_Realloc(g, sizeof(PyGC_Head) + basicsize); 2337 if (g == NULL) 2338 return (PyVarObject *)PyErr_NoMemory(); 2339 op = (PyVarObject *) FROM_GC(g); 2340 Py_SET_SIZE(op, nitems); 2341 return op; 2342} 2343 2344void 2345PyObject_GC_Del(void *op) 2346{ 2347 size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); 2348 PyGC_Head *g = AS_GC(op); 2349 if (_PyObject_GC_IS_TRACKED(op)) { 2350#ifdef Py_DEBUG 2351 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, 2352 "gc", NULL, "Object of type %s is not untracked before destruction", 2353 ((PyObject*)op)->ob_type->tp_name)) { 2354 PyErr_WriteUnraisable(NULL); 2355 } 2356#endif 2357 gc_list_remove(g); 2358 } 2359 GCState *gcstate = get_gc_state(); 2360 if (gcstate->generations[0].count > 0) { 2361 gcstate->generations[0].count--; 2362 } 2363 PyObject_Free(((char *)op)-presize); 2364} 2365 2366int 2367PyObject_GC_IsTracked(PyObject* obj) 2368{ 2369 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) { 2370 return 1; 2371 } 2372 return 0; 2373} 2374 2375int 2376PyObject_GC_IsFinalized(PyObject *obj) 2377{ 2378 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) { 2379 return 1; 2380 } 2381 return 0; 2382} 2383