mirror of
https://github.com/python/cpython.git
synced 2025-10-03 05:35:59 +00:00
[3.12] gh-114763: Protect lazy loading modules from attribute access races (GH-114781) (GH-115870)
gh-114763: Protect lazy loading modules from attribute access races (GH-114781)
Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
(cherry picked from commit 200271c61d
)
Co-authored-by: Chris Markiewicz <effigies@gmail.com>
This commit is contained in:
parent
53b84e772c
commit
7b91b9001a
3 changed files with 94 additions and 32 deletions
|
@ -13,6 +13,7 @@ from ._bootstrap_external import spec_from_file_location
|
||||||
|
|
||||||
import _imp
|
import _imp
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
import types
|
import types
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):
|
||||||
|
|
||||||
def __getattribute__(self, attr):
|
def __getattribute__(self, attr):
|
||||||
"""Trigger the load of the module and return the attribute."""
|
"""Trigger the load of the module and return the attribute."""
|
||||||
# All module metadata must be garnered from __spec__ in order to avoid
|
__spec__ = object.__getattribute__(self, '__spec__')
|
||||||
# using mutated values.
|
loader_state = __spec__.loader_state
|
||||||
# Stop triggering this method.
|
with loader_state['lock']:
|
||||||
self.__class__ = types.ModuleType
|
# Only the first thread to get the lock should trigger the load
|
||||||
# Get the original name to make sure no object substitution occurred
|
# and reset the module's class. The rest can now getattr().
|
||||||
# in sys.modules.
|
if object.__getattribute__(self, '__class__') is _LazyModule:
|
||||||
original_name = self.__spec__.name
|
# The first thread comes here multiple times as it descends the
|
||||||
# Figure out exactly what attributes were mutated between the creation
|
# call stack. The first time, it sets is_loading and triggers
|
||||||
# of the module and now.
|
# exec_module(), which will access module.__dict__, module.__name__,
|
||||||
attrs_then = self.__spec__.loader_state['__dict__']
|
# and/or module.__spec__, reentering this method. These accesses
|
||||||
attrs_now = self.__dict__
|
# need to be allowed to proceed without triggering the load again.
|
||||||
attrs_updated = {}
|
if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
|
||||||
for key, value in attrs_now.items():
|
return object.__getattribute__(self, attr)
|
||||||
# Code that set the attribute may have kept a reference to the
|
loader_state['is_loading'] = True
|
||||||
# assigned object, making identity more important than equality.
|
|
||||||
if key not in attrs_then:
|
__dict__ = object.__getattribute__(self, '__dict__')
|
||||||
attrs_updated[key] = value
|
|
||||||
elif id(attrs_now[key]) != id(attrs_then[key]):
|
# All module metadata must be gathered from __spec__ in order to avoid
|
||||||
attrs_updated[key] = value
|
# using mutated values.
|
||||||
self.__spec__.loader.exec_module(self)
|
# Get the original name to make sure no object substitution occurred
|
||||||
# If exec_module() was used directly there is no guarantee the module
|
# in sys.modules.
|
||||||
# object was put into sys.modules.
|
original_name = __spec__.name
|
||||||
if original_name in sys.modules:
|
# Figure out exactly what attributes were mutated between the creation
|
||||||
if id(self) != id(sys.modules[original_name]):
|
# of the module and now.
|
||||||
raise ValueError(f"module object for {original_name!r} "
|
attrs_then = loader_state['__dict__']
|
||||||
"substituted in sys.modules during a lazy "
|
attrs_now = __dict__
|
||||||
"load")
|
attrs_updated = {}
|
||||||
# Update after loading since that's what would happen in an eager
|
for key, value in attrs_now.items():
|
||||||
# loading situation.
|
# Code that set an attribute may have kept a reference to the
|
||||||
self.__dict__.update(attrs_updated)
|
# assigned object, making identity more important than equality.
|
||||||
|
if key not in attrs_then:
|
||||||
|
attrs_updated[key] = value
|
||||||
|
elif id(attrs_now[key]) != id(attrs_then[key]):
|
||||||
|
attrs_updated[key] = value
|
||||||
|
__spec__.loader.exec_module(self)
|
||||||
|
# If exec_module() was used directly there is no guarantee the module
|
||||||
|
# object was put into sys.modules.
|
||||||
|
if original_name in sys.modules:
|
||||||
|
if id(self) != id(sys.modules[original_name]):
|
||||||
|
raise ValueError(f"module object for {original_name!r} "
|
||||||
|
"substituted in sys.modules during a lazy "
|
||||||
|
"load")
|
||||||
|
# Update after loading since that's what would happen in an eager
|
||||||
|
# loading situation.
|
||||||
|
__dict__.update(attrs_updated)
|
||||||
|
# Finally, stop triggering this method.
|
||||||
|
self.__class__ = types.ModuleType
|
||||||
|
|
||||||
return getattr(self, attr)
|
return getattr(self, attr)
|
||||||
|
|
||||||
def __delattr__(self, attr):
|
def __delattr__(self, attr):
|
||||||
|
@ -244,5 +263,7 @@ class LazyLoader(Loader):
|
||||||
loader_state = {}
|
loader_state = {}
|
||||||
loader_state['__dict__'] = module.__dict__.copy()
|
loader_state['__dict__'] = module.__dict__.copy()
|
||||||
loader_state['__class__'] = module.__class__
|
loader_state['__class__'] = module.__class__
|
||||||
|
loader_state['lock'] = threading.RLock()
|
||||||
|
loader_state['is_loading'] = False
|
||||||
module.__spec__.loader_state = loader_state
|
module.__spec__.loader_state = loader_state
|
||||||
module.__class__ = _LazyModule
|
module.__class__ = _LazyModule
|
||||||
|
|
|
@ -2,9 +2,12 @@ import importlib
|
||||||
from importlib import abc
|
from importlib import abc
|
||||||
from importlib import util
|
from importlib import util
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
import types
|
import types
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
from test.support import threading_helper
|
||||||
from test.test_importlib import util as test_util
|
from test.test_importlib import util as test_util
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
|
||||||
module_name = 'lazy_loader_test'
|
module_name = 'lazy_loader_test'
|
||||||
mutated_name = 'changed'
|
mutated_name = 'changed'
|
||||||
loaded = None
|
loaded = None
|
||||||
|
load_count = 0
|
||||||
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
|
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
|
||||||
|
|
||||||
def find_spec(self, name, path, target=None):
|
def find_spec(self, name, path, target=None):
|
||||||
|
@ -48,8 +52,10 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
|
||||||
return util.spec_from_loader(name, util.LazyLoader(self))
|
return util.spec_from_loader(name, util.LazyLoader(self))
|
||||||
|
|
||||||
def exec_module(self, module):
|
def exec_module(self, module):
|
||||||
|
time.sleep(0.01) # Simulate a slow load.
|
||||||
exec(self.source_code, module.__dict__)
|
exec(self.source_code, module.__dict__)
|
||||||
self.loaded = module
|
self.loaded = module
|
||||||
|
self.load_count += 1
|
||||||
|
|
||||||
|
|
||||||
class LazyLoaderTests(unittest.TestCase):
|
class LazyLoaderTests(unittest.TestCase):
|
||||||
|
@ -59,8 +65,9 @@ class LazyLoaderTests(unittest.TestCase):
|
||||||
# Classes that don't define exec_module() trigger TypeError.
|
# Classes that don't define exec_module() trigger TypeError.
|
||||||
util.LazyLoader(object)
|
util.LazyLoader(object)
|
||||||
|
|
||||||
def new_module(self, source_code=None):
|
def new_module(self, source_code=None, loader=None):
|
||||||
loader = TestingImporter()
|
if loader is None:
|
||||||
|
loader = TestingImporter()
|
||||||
if source_code is not None:
|
if source_code is not None:
|
||||||
loader.source_code = source_code
|
loader.source_code = source_code
|
||||||
spec = util.spec_from_loader(TestingImporter.module_name,
|
spec = util.spec_from_loader(TestingImporter.module_name,
|
||||||
|
@ -140,6 +147,37 @@ class LazyLoaderTests(unittest.TestCase):
|
||||||
# Force the load; just care that no exception is raised.
|
# Force the load; just care that no exception is raised.
|
||||||
module.__name__
|
module.__name__
|
||||||
|
|
||||||
|
@threading_helper.requires_working_threading()
|
||||||
|
def test_module_load_race(self):
|
||||||
|
with test_util.uncache(TestingImporter.module_name):
|
||||||
|
loader = TestingImporter()
|
||||||
|
module = self.new_module(loader=loader)
|
||||||
|
self.assertEqual(loader.load_count, 0)
|
||||||
|
|
||||||
|
class RaisingThread(threading.Thread):
|
||||||
|
exc = None
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
super().run()
|
||||||
|
except Exception as exc:
|
||||||
|
self.exc = exc
|
||||||
|
|
||||||
|
def access_module():
|
||||||
|
return module.attr
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
for _ in range(2):
|
||||||
|
threads.append(thread := RaisingThread(target=access_module))
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
# Races could cause errors
|
||||||
|
for thread in threads:
|
||||||
|
thread.join()
|
||||||
|
self.assertIsNone(thread.exc)
|
||||||
|
|
||||||
|
# Or multiple load attempts
|
||||||
|
self.assertEqual(loader.load_count, 1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
|
||||||
|
conditions when multiple threads try to access attributes before the loading
|
||||||
|
is complete.
|
Loading…
Add table
Add a link
Reference in a new issue