mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
GH-106176, GH-104702: Fix reference leak when importing across multiple threads (#108497)
This commit is contained in:
parent
c780698e9b
commit
5f85b443f7
2 changed files with 107 additions and 12 deletions
|
@ -51,18 +51,106 @@ def _new_module(name):
|
||||||
|
|
||||||
# Module-level locking ########################################################
|
# Module-level locking ########################################################
|
||||||
|
|
||||||
# A dict mapping module names to weakrefs of _ModuleLock instances
|
# For a list that can have a weakref to it.
|
||||||
# Dictionary protected by the global import lock
|
class _List(list):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Copied from weakref.py with some simplifications and modifications unique to
|
||||||
|
# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
|
||||||
|
# are needed in the future they may work if simply copied back in.
|
||||||
|
class _WeakValueDictionary:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self_weakref = _weakref.ref(self)
|
||||||
|
|
||||||
|
# Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
|
||||||
|
# set by _setup(). Since there's only one instance of this class, this is
|
||||||
|
# not expensive.
|
||||||
|
class KeyedRef(_weakref.ref):
|
||||||
|
|
||||||
|
__slots__ = "key",
|
||||||
|
|
||||||
|
def __new__(type, ob, key):
|
||||||
|
self = super().__new__(type, ob, type.remove)
|
||||||
|
self.key = key
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __init__(self, ob, key):
|
||||||
|
super().__init__(ob, self.remove)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def remove(wr):
|
||||||
|
nonlocal self_weakref
|
||||||
|
|
||||||
|
self = self_weakref()
|
||||||
|
if self is not None:
|
||||||
|
if self._iterating:
|
||||||
|
self._pending_removals.append(wr.key)
|
||||||
|
else:
|
||||||
|
_weakref._remove_dead_weakref(self.data, wr.key)
|
||||||
|
|
||||||
|
self._KeyedRef = KeyedRef
|
||||||
|
self.clear()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self._pending_removals = []
|
||||||
|
self._iterating = set()
|
||||||
|
self.data = {}
|
||||||
|
|
||||||
|
def _commit_removals(self):
|
||||||
|
pop = self._pending_removals.pop
|
||||||
|
d = self.data
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
key = pop()
|
||||||
|
except IndexError:
|
||||||
|
return
|
||||||
|
_weakref._remove_dead_weakref(d, key)
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
if self._pending_removals:
|
||||||
|
self._commit_removals()
|
||||||
|
try:
|
||||||
|
wr = self.data[key]
|
||||||
|
except KeyError:
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
if (o := wr()) is None:
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
return o
|
||||||
|
|
||||||
|
def setdefault(self, key, default=None):
|
||||||
|
try:
|
||||||
|
o = self.data[key]()
|
||||||
|
except KeyError:
|
||||||
|
o = None
|
||||||
|
if o is None:
|
||||||
|
if self._pending_removals:
|
||||||
|
self._commit_removals()
|
||||||
|
self.data[key] = self._KeyedRef(default, key)
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
# A dict mapping module names to weakrefs of _ModuleLock instances.
|
||||||
|
# Dictionary protected by the global import lock.
|
||||||
_module_locks = {}
|
_module_locks = {}
|
||||||
|
|
||||||
# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
|
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
|
||||||
# thread to the module locks it is blocking on acquiring. The values are
|
# This maps a thread to the module locks it is blocking on acquiring. The
|
||||||
# lists because a single thread could perform a re-entrant import and be "in
|
# values are lists because a single thread could perform a re-entrant import
|
||||||
# the process" of blocking on locks for more than one module. A thread can
|
# and be "in the process" of blocking on locks for more than one module. A
|
||||||
# be "in the process" because a thread cannot actually block on acquiring
|
# thread can be "in the process" because a thread cannot actually block on
|
||||||
# more than one lock but it can have set up bookkeeping that reflects that
|
# acquiring more than one lock but it can have set up bookkeeping that reflects
|
||||||
# it intends to block on acquiring more than one lock.
|
# that it intends to block on acquiring more than one lock.
|
||||||
_blocking_on = {}
|
#
|
||||||
|
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
|
||||||
|
# lists around, regardless of GC runs. This way there's no memory leak if
|
||||||
|
# the list is no longer needed (GH-106176).
|
||||||
|
_blocking_on = None
|
||||||
|
|
||||||
|
|
||||||
class _BlockingOnManager:
|
class _BlockingOnManager:
|
||||||
|
@ -79,7 +167,7 @@ class _BlockingOnManager:
|
||||||
# re-entrant (i.e., a single thread may take it more than once) so it
|
# re-entrant (i.e., a single thread may take it more than once) so it
|
||||||
# wouldn't help us be correct in the face of re-entrancy either.
|
# wouldn't help us be correct in the face of re-entrancy either.
|
||||||
|
|
||||||
self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
|
self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
|
||||||
self.blocked_on.append(self.lock)
|
self.blocked_on.append(self.lock)
|
||||||
|
|
||||||
def __exit__(self, *args, **kwargs):
|
def __exit__(self, *args, **kwargs):
|
||||||
|
@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
|
||||||
modules, those two modules must be explicitly passed in.
|
modules, those two modules must be explicitly passed in.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
global _imp, sys
|
global _imp, sys, _blocking_on
|
||||||
_imp = _imp_module
|
_imp = _imp_module
|
||||||
sys = sys_module
|
sys = sys_module
|
||||||
|
|
||||||
|
@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
|
||||||
builtin_module = sys.modules[builtin_name]
|
builtin_module = sys.modules[builtin_name]
|
||||||
setattr(self_module, builtin_name, builtin_module)
|
setattr(self_module, builtin_name, builtin_module)
|
||||||
|
|
||||||
|
# Instantiation requires _weakref to have been set.
|
||||||
|
_blocking_on = _WeakValueDictionary()
|
||||||
|
|
||||||
|
|
||||||
def _install(sys_module, _imp_module):
|
def _install(sys_module, _imp_module):
|
||||||
"""Install importers for builtin and frozen modules"""
|
"""Install importers for builtin and frozen modules"""
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Use a ``WeakValueDictionary`` to track the lists containing the modules each
|
||||||
|
thread is currently importing. This helps avoid a reference leak from
|
||||||
|
keeping the list around longer than necessary. Weakrefs are used as GC can't
|
||||||
|
interrupt the cleanup.
|
Loading…
Add table
Add a link
Reference in a new issue