From b045b8ffc1f66ebdcaec1798f3d9292a8f389bd1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 01:34:23 +0100 Subject: [PATCH 01/13] bpo-38379: Don't block collection of unreachable objects when some objects resurrect --- Lib/test/test_gc.py | 17 ++++++------ Modules/gcmodule.c | 67 +++++++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index f52db1eab169cd..43d22a34d9015a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -822,7 +822,7 @@ def test_get_objects_arguments(self): self.assertRaises(TypeError, gc.get_objects, "1") self.assertRaises(TypeError, gc.get_objects, 1.234) - def test_38379(self): + def test_resurrection_does_not_block_cleanup_of_other_objects(self): # When a finalizer resurrects objects, stats were reporting them as # having been collected. This affected both collect()'s return # value and the dicts returned by get_stats(). @@ -877,18 +877,18 @@ def getstats(): # Z() prevents anything from being collected. t = gc.collect() c, nc = getstats() - #self.assertEqual(t, 2*N + 2) # before - self.assertEqual(t, 0) # after - #self.assertEqual(c - oldc, 2*N + 2) # before - self.assertEqual(c - oldc, 0) # after + self.assertEqual(t, 2*N) # after + self.assertEqual(c - oldc, 2*N) # after self.assertEqual(nc - oldnc, 0) - # But the A() trash is reclaimed on the next run. + # The A() trash should have been eliminated and + # the Z objects should be removed now + zs.clear() oldc, oldnc = c, nc t = gc.collect() c, nc = getstats() - self.assertEqual(t, 2*N) - self.assertEqual(c - oldc, 2*N) + self.assertEqual(t, 4) + self.assertEqual(c - oldc, 4) self.assertEqual(nc - oldnc, 0) gc.enable() @@ -1213,6 +1213,7 @@ def __del__(self): # If __del__ resurrected c2, the instance would be damaged, with an # empty __dict__. self.assertEqual(x, None) + def test_main(): enabled = gc.isenabled() diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index a1cb323bd24cb2..0dda03691db598 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -613,7 +613,7 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) } } -/* A traversal callback for move_legacy_finalizer_reachable. */ +/* A traversal callback for move_recursive_reachable. */ static int visit_move(PyObject *op, PyGC_Head *tolist) { @@ -627,20 +627,20 @@ visit_move(PyObject *op, PyGC_Head *tolist) return 0; } -/* Move objects that are reachable from finalizers, from the unreachable set - * into finalizers set. +/* Move objects that are reachable from gc_list from their original place + into gc_list. */ static void -move_legacy_finalizer_reachable(PyGC_Head *finalizers) +move_recursive_reachable(PyGC_Head *gc_list) { traverseproc traverse; - PyGC_Head *gc = GC_NEXT(finalizers); - for (; gc != finalizers; gc = GC_NEXT(gc)) { + PyGC_Head *gc = GC_NEXT(gc_list); + for (; gc != gc_list; gc = GC_NEXT(gc)) { /* Note that the finalizers list may grow during this. */ traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; (void) traverse(FROM_GC(gc), (visitproc)visit_move, - (void *)finalizers); + (void *)gc_list); } } @@ -895,7 +895,7 @@ finalize_garbage(PyGC_Head *collectable) from the outside (some objects could have been resurrected by a finalizer). */ static int -check_garbage(PyGC_Head *collectable) +deduce_unreachable(PyGC_Head *collectable, PyGC_Head *still_reachable) { int ret = 0; PyGC_Head *gc; @@ -911,13 +911,28 @@ check_garbage(PyGC_Head *collectable) gc_get_refs(gc) >= 0, "refcount is too small"); if (gc_get_refs(gc) != 0) { + // We found a object that is reachable from the outside, + // Restore the previous link (it was removed when we + // called gc_set_refs). + _PyGCHead_SET_PREV(gc, prev); + // Mark it as not collecting (now is reachable). + gc_clear_collecting(gc); + // Move the object into the still_reachable set. + PyGC_Head *tmp = GC_PREV(gc); + gc_list_move(gc, still_reachable); + gc = tmp; + // Signal that we found some reachable objects. ret = -1; + } else { + _PyGCHead_SET_PREV(gc, prev); } - // Restore gc_prev here. - _PyGCHead_SET_PREV(gc, prev); - gc_clear_collecting(gc); prev = gc; } + // Move also all the objects that are reachable from the + // still_reachable objects into the same collection (when exiting + // this function, the still_reachable collection will correspond + // to everything that we cannot remove in this collection). + move_recursive_reachable(still_reachable); return ret; } @@ -1067,6 +1082,8 @@ collect(struct _gc_runtime_state *state, int generation, validate_list(young, 0); untrack_tuples(young); + + /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { @@ -1093,7 +1110,7 @@ collect(struct _gc_runtime_state *state, int generation, * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. */ - move_legacy_finalizer_reachable(&finalizers); + move_recursive_reachable(&finalizers); validate_list(&finalizers, 0); validate_list(&unreachable, PREV_MASK_COLLECTING); @@ -1114,17 +1131,21 @@ collect(struct _gc_runtime_state *state, int generation, /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - if (check_garbage(&unreachable)) { // clear PREV_MASK_COLLECTING here - gc_list_merge(&unreachable, old); - } - else { - /* Call tp_clear on objects in the unreachable set. This will cause - * the reference cycles to be broken. It may also cause some objects - * in finalizers to be freed. - */ - m += gc_list_size(&unreachable); - delete_garbage(state, &unreachable, old); - } + // The deduce_unreachable function will move all objects that are still + // reachable from the outside into the still_unreachable set. + PyGC_Head still_unreachable; + gc_list_init(&still_unreachable); + if (deduce_unreachable(&unreachable, &still_unreachable)) { // clear PREV_MASK_COLLECTING here + // Move every resurrected object (and the ones reachable from them) into + // the yoing list for future collection. + gc_list_merge(&still_unreachable, young); + } + /* Call tp_clear on objects in the unreachable set. This will cause + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ + m += gc_list_size(&unreachable); + delete_garbage(state, &unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ From e4c8654a321283bfa5037d793cb611bf7491f553 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 01:41:11 +0100 Subject: [PATCH 02/13] Add News entry --- .../2019-10-10-01-41-02.bpo-38379._q4dhn.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst b/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst new file mode 100644 index 00000000000000..86f908b67740b9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst @@ -0,0 +1,4 @@ +When the garbage collector makes a collection in which some objects +resurrect (they are reachable from outside the isolated cycles after the +finalizers have been executed), do not block the collection of all objects +that are still unreachable. Patch by Pablo Galindo and Tim Peters. From 60cffa6552a69483582f7207ad538248d969f3a3 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 01:42:04 +0100 Subject: [PATCH 03/13] Fix indentation --- Lib/test/test_gc.py | 2 +- Modules/gcmodule.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 43d22a34d9015a..4aa7c4af8fae95 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1213,7 +1213,7 @@ def __del__(self): # If __del__ resurrected c2, the instance would be damaged, with an # empty __dict__. self.assertEqual(x, None) - + def test_main(): enabled = gc.isenabled() diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 0dda03691db598..d55be7ac846538 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1141,9 +1141,9 @@ collect(struct _gc_runtime_state *state, int generation, gc_list_merge(&still_unreachable, young); } /* Call tp_clear on objects in the unreachable set. This will cause - * the reference cycles to be broken. It may also cause some objects - * in finalizers to be freed. - */ + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ m += gc_list_size(&unreachable); delete_garbage(state, &unreachable, old); From d1d0916415af0e88d72edb69250b8e67fb64bb63 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 03:21:11 +0100 Subject: [PATCH 04/13] Use deduce_unreachable function --- Lib/test/test_gc.py | 23 ++++----- Modules/gcmodule.c | 113 +++++++++++++++++++++++--------------------- 2 files changed, 67 insertions(+), 69 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 4aa7c4af8fae95..99c7b1cb2e419a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -823,6 +823,7 @@ def test_get_objects_arguments(self): self.assertRaises(TypeError, gc.get_objects, 1.234) def test_resurrection_does_not_block_cleanup_of_other_objects(self): + # When a finalizer resurrects objects, stats were reporting them as # having been collected. This affected both collect()'s return # value and the dicts returned by get_stats(). @@ -861,30 +862,25 @@ def getstats(): # Nothing is collected - Z() is merely resurrected. t = gc.collect() c, nc = getstats() - #self.assertEqual(t, 2) # before - self.assertEqual(t, 0) # after - #self.assertEqual(c - oldc, 2) # before - self.assertEqual(c - oldc, 0) # after + self.assertEqual(t, 0) # before + self.assertEqual(c - oldc, 0) # before self.assertEqual(nc - oldnc, 0) - # Unfortunately, a Z() prevents _anything_ from being collected. - # It should be possible to collect the A instances anyway, but - # that will require non-trivial code changes. + # Z() should not prevents anything from being collected. oldc, oldnc = c, nc for i in range(N): A() Z() - # Z() prevents anything from being collected. t = gc.collect() c, nc = getstats() - self.assertEqual(t, 2*N) # after - self.assertEqual(c - oldc, 2*N) # after + self.assertEqual(t, 2*N) # before + self.assertEqual(c - oldc, 2*N) # before self.assertEqual(nc - oldnc, 0) - # The A() trash should have been eliminated and - # the Z objects should be removed now - zs.clear() + # The A() trash should have been reclaimed already but the + # 2 copies of Z are still in zs (and the associated dicts). oldc, oldnc = c, nc + zs.clear() t = gc.collect() c, nc = getstats() self.assertEqual(t, 4) @@ -1214,7 +1210,6 @@ def __del__(self): # empty __dict__. self.assertEqual(x, None) - def test_main(): enabled = gc.isenabled() gc.disable() diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index d55be7ac846538..348a31c641f912 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -302,7 +302,7 @@ gc_list_size(PyGC_Head *list) } /* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list). + * Return 0 if all OK, < 0 if error (out of memory for list)deduce_unreachable. */ static int append_objects(PyObject *py_list, PyGC_Head *gc_list) @@ -613,7 +613,20 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) } } -/* A traversal callback for move_recursive_reachable. */ +static inline void +restore_unreachable_mask(PyGC_Head *unreachable) +{ + PyGC_Head *gc, *next; + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyObject *op = FROM_GC(gc); + _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); + gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; + } +} + +/* A traversal callback for move_legacy_finalizer_reachable. */ static int visit_move(PyObject *op, PyGC_Head *tolist) { @@ -627,20 +640,20 @@ visit_move(PyObject *op, PyGC_Head *tolist) return 0; } -/* Move objects that are reachable from gc_list from their original place - into gc_list. +/* Move objects that are reachable from finalizers, from the unreachable set + * into finalizers set. */ static void -move_recursive_reachable(PyGC_Head *gc_list) +move_legacy_finalizer_reachable(PyGC_Head *finalizers) { traverseproc traverse; - PyGC_Head *gc = GC_NEXT(gc_list); - for (; gc != gc_list; gc = GC_NEXT(gc)) { + PyGC_Head *gc = GC_NEXT(finalizers); + for (; gc != finalizers; gc = GC_NEXT(gc)) { /* Note that the finalizers list may grow during this. */ traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; (void) traverse(FROM_GC(gc), (visitproc)visit_move, - (void *)gc_list); + (void *)finalizers); } } @@ -895,7 +908,7 @@ finalize_garbage(PyGC_Head *collectable) from the outside (some objects could have been resurrected by a finalizer). */ static int -deduce_unreachable(PyGC_Head *collectable, PyGC_Head *still_reachable) +check_garbage(PyGC_Head *collectable) { int ret = 0; PyGC_Head *gc; @@ -911,28 +924,13 @@ deduce_unreachable(PyGC_Head *collectable, PyGC_Head *still_reachable) gc_get_refs(gc) >= 0, "refcount is too small"); if (gc_get_refs(gc) != 0) { - // We found a object that is reachable from the outside, - // Restore the previous link (it was removed when we - // called gc_set_refs). - _PyGCHead_SET_PREV(gc, prev); - // Mark it as not collecting (now is reachable). - gc_clear_collecting(gc); - // Move the object into the still_reachable set. - PyGC_Head *tmp = GC_PREV(gc); - gc_list_move(gc, still_reachable); - gc = tmp; - // Signal that we found some reachable objects. ret = -1; - } else { - _PyGCHead_SET_PREV(gc, prev); } + // Restore gc_prev here. + _PyGCHead_SET_PREV(gc, prev); + gc_clear_collecting(gc); prev = gc; } - // Move also all the objects that are reachable from the - // still_reachable objects into the same collection (when exiting - // this function, the still_reachable collection will correspond - // to everything that we cannot remove in this collection). - move_recursive_reachable(still_reachable); return ret; } @@ -1018,6 +1016,26 @@ show_stats_each_generations(struct _gc_runtime_state *state) buf, gc_list_size(&state->permanent_generation.head)); } +static void +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { + /* Using ob_refcnt and gc_refs, calculate which objects in the + * container set are reachable from outside the set (i.e., have a + * refcount greater than 0 when all the references within the + * set are taken into account). + */ + update_refs(base); // gc_prev is used for gc_refs + subtract_refs(base); + + /* Leave everything reachable from outside young in young, and move + * everything else (in young) to unreachable. + * NOTE: This used to move the reachable objects into a reachable + * set instead. But most things usually turn out to be reachable, + * so it's more efficient to move the unreachable things. + */ + gc_list_init(unreachable); + move_unreachable(base, unreachable); // gc_prev is pointer again +} + /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t @@ -1063,27 +1081,10 @@ collect(struct _gc_runtime_state *state, int generation, validate_list(young, 0); validate_list(old, 0); - /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (i.e., have a - * refcount greater than 0 when all the references within the - * set are taken into account). - */ - update_refs(young); // gc_prev is used for gc_refs - subtract_refs(young); - - /* Leave everything reachable from outside young in young, and move - * everything else (in young) to unreachable. - * NOTE: This used to move the reachable objects into a reachable - * set instead. But most things usually turn out to be reachable, - * so it's more efficient to move the unreachable things. - */ - gc_list_init(&unreachable); - move_unreachable(young, &unreachable); // gc_prev is pointer again + deduce_unreachable(young, &unreachable); validate_list(young, 0); untrack_tuples(young); - - /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { @@ -1110,7 +1111,7 @@ collect(struct _gc_runtime_state *state, int generation, * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. */ - move_recursive_reachable(&finalizers); + move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, 0); validate_list(&unreachable, PREV_MASK_COLLECTING); @@ -1131,21 +1132,23 @@ collect(struct _gc_runtime_state *state, int generation, /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - // The deduce_unreachable function will move all objects that are still - // reachable from the outside into the still_unreachable set. PyGC_Head still_unreachable; - gc_list_init(&still_unreachable); - if (deduce_unreachable(&unreachable, &still_unreachable)) { // clear PREV_MASK_COLLECTING here - // Move every resurrected object (and the ones reachable from them) into - // the yoing list for future collection. - gc_list_merge(&still_unreachable, young); + PyGC_Head *final_unreachable = &unreachable; + if (check_garbage(&unreachable)) { + // Get what objects are still unreachable from the outside + // after the resurrections + deduce_unreachable(&unreachable, &still_unreachable); + restore_unreachable_mask(&still_unreachable); + // Move the resurrected objects to old + gc_list_merge(&unreachable, old); + final_unreachable = &still_unreachable; } /* Call tp_clear on objects in the unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ - m += gc_list_size(&unreachable); - delete_garbage(state, &unreachable, old); + m += gc_list_size(final_unreachable); + delete_garbage(state, final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ From 5c92ff321b6b53a8dfc3566552805f1ee229628f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 19:55:53 +0100 Subject: [PATCH 05/13] Address first round of feedback --- Modules/gcmodule.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 348a31c641f912..75b74fab3f471f 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -302,7 +302,7 @@ gc_list_size(PyGC_Head *list) } /* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list)deduce_unreachable. + * Return 0 if all OK, < 0 if error (out of memory for list) */ static int append_objects(PyObject *py_list, PyGC_Head *gc_list) @@ -614,8 +614,11 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) } static inline void -restore_unreachable_mask(PyGC_Head *unreachable) +clear_unreachable_mask(PyGC_Head *unreachable) { + /* Check that the list head does not have the unreachable bit set */ + assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); + PyGC_Head *gc, *next; unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { @@ -624,6 +627,7 @@ restore_unreachable_mask(PyGC_Head *unreachable) gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; next = (PyGC_Head*)gc->_gc_next; } + validate_list(unreachable, PREV_MASK_COLLECTING); } /* A traversal callback for move_legacy_finalizer_reachable. */ @@ -1026,8 +1030,8 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { update_refs(base); // gc_prev is used for gc_refs subtract_refs(base); - /* Leave everything reachable from outside young in young, and move - * everything else (in young) to unreachable. + /* Leave everything reachable from outside base in base, and move + * everything else (in base) to unreachable. * NOTE: This used to move the reachable objects into a reachable * set instead. But most things usually turn out to be reachable, * so it's more efficient to move the unreachable things. @@ -1138,7 +1142,7 @@ collect(struct _gc_runtime_state *state, int generation, // Get what objects are still unreachable from the outside // after the resurrections deduce_unreachable(&unreachable, &still_unreachable); - restore_unreachable_mask(&still_unreachable); + clear_unreachable_mask(&still_unreachable); // Move the resurrected objects to old gc_list_merge(&unreachable, old); final_unreachable = &still_unreachable; From 0ed2f1b4650736ab7eced89d7070eb19e68bda28 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 20:07:23 +0100 Subject: [PATCH 06/13] Use better variable names when dealing whit resurrected objects --- Lib/test/test_gc.py | 2 +- Modules/gcmodule.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 99c7b1cb2e419a..84d45ccaf1899e 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -866,7 +866,7 @@ def getstats(): self.assertEqual(c - oldc, 0) # before self.assertEqual(nc - oldnc, 0) - # Z() should not prevents anything from being collected. + # Z() should not prevent anything else from being collected. oldc, oldnc = c, nc for i in range(N): A() diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 75b74fab3f471f..e5bef6f6b896c5 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1139,12 +1139,14 @@ collect(struct _gc_runtime_state *state, int generation, PyGC_Head still_unreachable; PyGC_Head *final_unreachable = &unreachable; if (check_garbage(&unreachable)) { + PyGC_Head* resurrected = &unreachable; // Get what objects are still unreachable from the outside - // after the resurrections - deduce_unreachable(&unreachable, &still_unreachable); + // after the resurrections. After this call, the "resurrected" + // list has all objects that have been resurrected. + deduce_unreachable(resurrected, &still_unreachable); clear_unreachable_mask(&still_unreachable); // Move the resurrected objects to old - gc_list_merge(&unreachable, old); + gc_list_merge(resurrected, old); final_unreachable = &still_unreachable; } /* Call tp_clear on objects in the unreachable set. This will cause From 6c2559a404d81ea25800f0c20926e3cb706bcf46 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 20:08:53 +0100 Subject: [PATCH 07/13] Make deduce_unreachable a 'static inline' function --- Modules/gcmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index e5bef6f6b896c5..41d5e4f8b656f1 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1020,7 +1020,7 @@ show_stats_each_generations(struct _gc_runtime_state *state) buf, gc_list_size(&state->permanent_generation.head)); } -static void +static inline void deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a From 2d1db3b2139ffd0afa37d4c7f0aabcaf47bc6952 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 10 Oct 2019 20:34:57 +0100 Subject: [PATCH 08/13] Add documentation to 'deduce_unreachable' --- Modules/gcmodule.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 41d5e4f8b656f1..931b2c3f7a1f68 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1020,8 +1020,36 @@ show_stats_each_generations(struct _gc_runtime_state *state) buf, gc_list_size(&state->permanent_generation.head)); } +/* Deduce wich objects among "base" are unreachable from outside the list + and move them to 'unreachable'. The process consist in the following steps: + +1. Copy all references to a different field (gc_prev is used to hold this copy + to save memory). +2. Traverse all objects in "base" and visit all referred objects using + "tp_traverse" and for every visited object, substract 1 to the reference + count (the one that we copied in the previous step). After this step, all + objects that can be reached from outside must have positive reference count, + while all unreachable objects must have a count of exactly 0. +3. Indentify all unreachable objects (the ones with 0 reference count) and move + them to the "unreachable" list. This step also needs to move back to "base" all + objects that were initially marked as unreachable but are referred transitively + by the reachable objects (the ones with positive reference count). + +Contracts: + + * The "base" has to be a valid list with no mask set. + + * The "unreachable" list must be uninitialized (this function calls + gc_list_init over 'unreachable'). + +IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE +flag set but it does not clear it to skip unnecessary iteration. Before the +flag is cleared (for example, by using 'clear_unreachable_mask' function or +by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal +list and we can not use mostgc_list_* functions for it. */ static inline void deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { + validate_list(base, 0); /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a * refcount greater than 0 when all the references within the @@ -1038,6 +1066,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { */ gc_list_init(unreachable); move_unreachable(base, unreachable); // gc_prev is pointer again + validate_list(base, 0); } /* This is the main function. Read this to understand how the @@ -1082,11 +1111,9 @@ collect(struct _gc_runtime_state *state, int generation, old = GEN_HEAD(state, generation+1); else old = young; - - validate_list(young, 0); validate_list(old, 0); + deduce_unreachable(young, &unreachable); - validate_list(young, 0); untrack_tuples(young); /* Move reachable objects to next generation. */ From 3c115ad7f0bdfbec12776c38dff024c4fba5d6d5 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 11 Oct 2019 16:23:45 +0100 Subject: [PATCH 09/13] Fix comments and minor refactors --- Lib/test/test_gc.py | 8 ++++---- Modules/gcmodule.c | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 84d45ccaf1899e..35f63e45d1c36c 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -862,8 +862,8 @@ def getstats(): # Nothing is collected - Z() is merely resurrected. t = gc.collect() c, nc = getstats() - self.assertEqual(t, 0) # before - self.assertEqual(c - oldc, 0) # before + self.assertEqual(t, 0) + self.assertEqual(c - oldc, 0) self.assertEqual(nc - oldnc, 0) # Z() should not prevent anything else from being collected. @@ -873,8 +873,8 @@ def getstats(): Z() t = gc.collect() c, nc = getstats() - self.assertEqual(t, 2*N) # before - self.assertEqual(c - oldc, 2*N) # before + self.assertEqual(t, 2*N) + self.assertEqual(c - oldc, 2*N) self.assertEqual(nc - oldnc, 0) # The A() trash should have been reclaimed already but the diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 931b2c3f7a1f68..5d242dd3a847d9 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1023,17 +1023,17 @@ show_stats_each_generations(struct _gc_runtime_state *state) /* Deduce wich objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: -1. Copy all references to a different field (gc_prev is used to hold this copy - to save memory). +1. Copy all reference counts to a different field (gc_prev is used to hold + this copy to save memory). 2. Traverse all objects in "base" and visit all referred objects using "tp_traverse" and for every visited object, substract 1 to the reference count (the one that we copied in the previous step). After this step, all - objects that can be reached from outside must have positive reference count, - while all unreachable objects must have a count of exactly 0. + objects that can be reached directly from outside must have strictly positive + reference count, while all unreachable objects must have a count of exactly 0. 3. Indentify all unreachable objects (the ones with 0 reference count) and move them to the "unreachable" list. This step also needs to move back to "base" all objects that were initially marked as unreachable but are referred transitively - by the reachable objects (the ones with positive reference count). + by the reachable objects (the ones with strictly positive reference count). Contracts: @@ -1046,7 +1046,7 @@ IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE flag set but it does not clear it to skip unnecessary iteration. Before the flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal -list and we can not use mostgc_list_* functions for it. */ +list and we can not use most gc_list_* functions for it. */ static inline void deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { validate_list(base, 0); @@ -1163,9 +1163,9 @@ collect(struct _gc_runtime_state *state, int generation, /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - PyGC_Head still_unreachable; PyGC_Head *final_unreachable = &unreachable; if (check_garbage(&unreachable)) { + PyGC_Head still_unreachable; PyGC_Head* resurrected = &unreachable; // Get what objects are still unreachable from the outside // after the resurrections. After this call, the "resurrected" From 7ccc5644af9fcc1691518f98ba3f4d310cc329d8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 12 Oct 2019 04:26:26 +0100 Subject: [PATCH 10/13] Remove check_garbage and create 'handle_resurrected_objects' --- Modules/gcmodule.c | 101 ++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 5d242dd3a847d9..b3019f48a96fa4 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -301,6 +301,16 @@ gc_list_size(PyGC_Head *list) return n; } +/* Walk the list and mark all objects as non-collecting */ +static inline void +gc_list_clear_collecting(PyGC_Head *collectable) +{ + PyGC_Head *gc; + for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { + gc_clear_collecting(gc); + } +} + /* Append objects in a GC list to a Python list. * Return 0 if all OK, < 0 if error (out of memory for list) */ @@ -908,36 +918,6 @@ finalize_garbage(PyGC_Head *collectable) gc_list_merge(&seen, collectable); } -/* Walk the collectable list and check that they are really unreachable - from the outside (some objects could have been resurrected by a - finalizer). */ -static int -check_garbage(PyGC_Head *collectable) -{ - int ret = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - // Use gc_refs and break gc_prev again. - gc_set_refs(gc, Py_REFCNT(FROM_GC(gc))); - _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); - } - subtract_refs(collectable); - PyGC_Head *prev = collectable; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - _PyObject_ASSERT_WITH_MSG(FROM_GC(gc), - gc_get_refs(gc) >= 0, - "refcount is too small"); - if (gc_get_refs(gc) != 0) { - ret = -1; - } - // Restore gc_prev here. - _PyGCHead_SET_PREV(gc, prev); - gc_clear_collecting(gc); - prev = gc; - } - return ret; -} - /* Break reference cycles by clearing the containers involved. This is * tricky business as the lists can be changing and we don't know which * objects may be freed. It is possible I screwed something up here. @@ -975,6 +955,7 @@ delete_garbage(struct _gc_runtime_state *state, } if (GC_NEXT(collectable) == gc) { /* object is still alive, move it, it may die later */ + gc_clear_collecting(gc); gc_list_move(gc, old); } } @@ -1069,6 +1050,41 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { validate_list(base, 0); } +/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving + them to 'old_generation' and placing the rest on 'still_unreachable'. + + Contracts: + * After this function 'unreachable' must not be used anymore and 'still_unreachable' + will contain the objects that did not resurrect. + + * The "still_unreachable" list must be uninitialized (this function calls + gc_list_init over 'still_unreachable'). + +IMPORTANT: After a call to this function, the 'still_unreachable' set will have the +PREV_MARK_COLLECTING set, but the objects in this set are going to be removed (and +'delete_garbage' handles special cases that will go back to 'old_generation') so +the removal of the flag can be skipped to avoid extra iteration. */ +static inline void +handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, + PyGC_Head *old_generation) +{ + // Remove the PREV_MASK_COLLECTING from unreachable + // to prepare it for a new call to 'deduce_unreachable' + gc_list_clear_collecting(unreachable); + + // After the call to deduce_unreachable, the 'still_unreachable' set will + // have the PREV_MARK_COLLECTING set, but the objects are going to be + // removed (and 'delete_garbage' handles special cases that will go + // back to 'old_generation') so we can not remove the flag to avoid + // extra iteration. + PyGC_Head* resurrected = unreachable; + deduce_unreachable(resurrected, still_unreachable); + clear_unreachable_mask(still_unreachable); + + // Move the resurrected objects to the old generation for future collection. + gc_list_merge(resurrected, old_generation); +} + /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t @@ -1163,25 +1179,18 @@ collect(struct _gc_runtime_state *state, int generation, /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - PyGC_Head *final_unreachable = &unreachable; - if (check_garbage(&unreachable)) { - PyGC_Head still_unreachable; - PyGC_Head* resurrected = &unreachable; - // Get what objects are still unreachable from the outside - // after the resurrections. After this call, the "resurrected" - // list has all objects that have been resurrected. - deduce_unreachable(resurrected, &still_unreachable); - clear_unreachable_mask(&still_unreachable); - // Move the resurrected objects to old - gc_list_merge(resurrected, old); - final_unreachable = &still_unreachable; - } - /* Call tp_clear on objects in the unreachable set. This will cause + /* Handle any objects that may have resurrected after the call + * to 'finalize_garbage' and continue the collection with the + * objects that are still unreachable */ + PyGC_Head final_unreachable; + handle_resurrected_objects(&unreachable, &final_unreachable, old); + + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ - m += gc_list_size(final_unreachable); - delete_garbage(state, final_unreachable, old); + m += gc_list_size(&final_unreachable); + delete_garbage(state, &final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ From a19016331f639f112efc81bd119c0b2712a20fff Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 12 Oct 2019 05:02:55 +0100 Subject: [PATCH 11/13] Fix compiler warnings in release mode --- Modules/gcmodule.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index b3019f48a96fa4..38c3de32c58fff 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -632,8 +632,7 @@ clear_unreachable_mask(PyGC_Head *unreachable) PyGC_Head *gc, *next; unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyObject *op = FROM_GC(gc); - _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); + _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; next = (PyGC_Head*)gc->_gc_next; } From 6aadc40310ee64f17a78cdc6a697d115522289ef Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 12 Oct 2019 19:21:10 +0100 Subject: [PATCH 12/13] Add more tests for resurrection --- Lib/test/test_gc.py | 70 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 35f63e45d1c36c..fe9d6c2f766ccb 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -822,6 +822,76 @@ def test_get_objects_arguments(self): self.assertRaises(TypeError, gc.get_objects, "1") self.assertRaises(TypeError, gc.get_objects, 1.234) + def test_resurrection_only_happens_once_per_object(self): + class A: # simple self-loop + def __init__(self): + self.me = self + + class Lazarus(A): + resurrected = 0 + resurrected_instances = [] + + def __del__(self): + Lazarus.resurrected += 1 + Lazarus.resurrected_instances.append(self) + + gc.collect() + gc.disable() + + # We start with 0 resurrections + laz = Lazarus() + self.assertEqual(Lazarus.resurrected, 0) + + # Deleting the instance and triggering a collection + # resurrects the object + del laz + gc.collect() + self.assertEqual(Lazarus.resurrected, 1) + self.assertEqual(len(Lazarus.resurrected_instances), 1) + + # Clearing the references and forcing a collection + # should not resurrect the object again. + Lazarus.resurrected_instances.clear() + self.assertEqual(Lazarus.resurrected, 1) + gc.collect() + self.assertEqual(Lazarus.resurrected, 1) + + gc.enable() + + def test_resurrection_is_transitive(self): + class Cargo: + def __init__(self): + self.me = self + + class Lazarus: + resurrected_instances = [] + + def __del__(self): + Lazarus.resurrected_instances.append(self) + + gc.collect() + gc.disable() + + laz = Lazarus() + cargo = Cargo() + cargo_id = id(cargo) + + # Create a cycle between cargo and laz + laz.cargo = cargo + cargo.laz = laz + + # Drop the references, force a collection and check that + # everything was resurrected. + del laz, cargo + gc.collect() + self.assertEqual(len(Lazarus.resurrected_instances), 1) + instance = Lazarus.resurrected_instances.pop() + self.assertTrue(hasattr(instance, "cargo")) + self.assertEqual(id(instance.cargo), cargo_id) + + gc.collect() + gc.enable() + def test_resurrection_does_not_block_cleanup_of_other_objects(self): # When a finalizer resurrects objects, stats were reporting them as From 07d3f8a6730c3a57b6721e3216fca5041bd7f364 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 12 Oct 2019 19:46:51 +0100 Subject: [PATCH 13/13] Remove comment about delete_garbage --- Modules/gcmodule.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 38c3de32c58fff..0fb9ac2ac1ff55 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1060,9 +1060,8 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { gc_list_init over 'still_unreachable'). IMPORTANT: After a call to this function, the 'still_unreachable' set will have the -PREV_MARK_COLLECTING set, but the objects in this set are going to be removed (and -'delete_garbage' handles special cases that will go back to 'old_generation') so -the removal of the flag can be skipped to avoid extra iteration. */ +PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so +we can skip the expense of clearing the flag to avoid extra iteration. */ static inline void handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, PyGC_Head *old_generation) @@ -1073,9 +1072,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // After the call to deduce_unreachable, the 'still_unreachable' set will // have the PREV_MARK_COLLECTING set, but the objects are going to be - // removed (and 'delete_garbage' handles special cases that will go - // back to 'old_generation') so we can not remove the flag to avoid - // extra iteration. + // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; deduce_unreachable(resurrected, still_unreachable); clear_unreachable_mask(still_unreachable);