[3.12] gh-114763: Protect lazy loading modules from attribute access races (GH-114781) (GH-115870)

gh-114763: Protect lazy loading modules from attribute access races (GH-114781)

Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
(cherry picked from commit 200271c61d)

Co-authored-by: Chris Markiewicz <effigies@gmail.com>
This commit is contained in:
Miss Islington (bot) 2024-02-26 21:43:44 +01:00 committed by GitHub
parent 53b84e772c
commit 7b91b9001a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 94 additions and 32 deletions

View file

@ -13,6 +13,7 @@ from ._bootstrap_external import spec_from_file_location
import _imp import _imp
import sys import sys
import threading
import types import types
@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):
def __getattribute__(self, attr): def __getattribute__(self, attr):
"""Trigger the load of the module and return the attribute.""" """Trigger the load of the module and return the attribute."""
# All module metadata must be garnered from __spec__ in order to avoid __spec__ = object.__getattribute__(self, '__spec__')
# using mutated values. loader_state = __spec__.loader_state
# Stop triggering this method. with loader_state['lock']:
self.__class__ = types.ModuleType # Only the first thread to get the lock should trigger the load
# Get the original name to make sure no object substitution occurred # and reset the module's class. The rest can now getattr().
# in sys.modules. if object.__getattribute__(self, '__class__') is _LazyModule:
original_name = self.__spec__.name # The first thread comes here multiple times as it descends the
# Figure out exactly what attributes were mutated between the creation # call stack. The first time, it sets is_loading and triggers
# of the module and now. # exec_module(), which will access module.__dict__, module.__name__,
attrs_then = self.__spec__.loader_state['__dict__'] # and/or module.__spec__, reentering this method. These accesses
attrs_now = self.__dict__ # need to be allowed to proceed without triggering the load again.
attrs_updated = {} if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
for key, value in attrs_now.items(): return object.__getattribute__(self, attr)
# Code that set the attribute may have kept a reference to the loader_state['is_loading'] = True
# assigned object, making identity more important than equality.
if key not in attrs_then: __dict__ = object.__getattribute__(self, '__dict__')
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]): # All module metadata must be gathered from __spec__ in order to avoid
attrs_updated[key] = value # using mutated values.
self.__spec__.loader.exec_module(self) # Get the original name to make sure no object substitution occurred
# If exec_module() was used directly there is no guarantee the module # in sys.modules.
# object was put into sys.modules. original_name = __spec__.name
if original_name in sys.modules: # Figure out exactly what attributes were mutated between the creation
if id(self) != id(sys.modules[original_name]): # of the module and now.
raise ValueError(f"module object for {original_name!r} " attrs_then = loader_state['__dict__']
"substituted in sys.modules during a lazy " attrs_now = __dict__
"load") attrs_updated = {}
# Update after loading since that's what would happen in an eager for key, value in attrs_now.items():
# loading situation. # Code that set an attribute may have kept a reference to the
self.__dict__.update(attrs_updated) # assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
raise ValueError(f"module object for {original_name!r} "
"substituted in sys.modules during a lazy "
"load")
# Update after loading since that's what would happen in an eager
# loading situation.
__dict__.update(attrs_updated)
# Finally, stop triggering this method.
self.__class__ = types.ModuleType
return getattr(self, attr) return getattr(self, attr)
def __delattr__(self, attr): def __delattr__(self, attr):
@ -244,5 +263,7 @@ class LazyLoader(Loader):
loader_state = {} loader_state = {}
loader_state['__dict__'] = module.__dict__.copy() loader_state['__dict__'] = module.__dict__.copy()
loader_state['__class__'] = module.__class__ loader_state['__class__'] = module.__class__
loader_state['lock'] = threading.RLock()
loader_state['is_loading'] = False
module.__spec__.loader_state = loader_state module.__spec__.loader_state = loader_state
module.__class__ = _LazyModule module.__class__ = _LazyModule

View file

@ -2,9 +2,12 @@ import importlib
from importlib import abc from importlib import abc
from importlib import util from importlib import util
import sys import sys
import time
import threading
import types import types
import unittest import unittest
from test.support import threading_helper
from test.test_importlib import util as test_util from test.test_importlib import util as test_util
@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
module_name = 'lazy_loader_test' module_name = 'lazy_loader_test'
mutated_name = 'changed' mutated_name = 'changed'
loaded = None loaded = None
load_count = 0
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name) source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
def find_spec(self, name, path, target=None): def find_spec(self, name, path, target=None):
@ -48,8 +52,10 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
return util.spec_from_loader(name, util.LazyLoader(self)) return util.spec_from_loader(name, util.LazyLoader(self))
def exec_module(self, module): def exec_module(self, module):
time.sleep(0.01) # Simulate a slow load.
exec(self.source_code, module.__dict__) exec(self.source_code, module.__dict__)
self.loaded = module self.loaded = module
self.load_count += 1
class LazyLoaderTests(unittest.TestCase): class LazyLoaderTests(unittest.TestCase):
@ -59,8 +65,9 @@ class LazyLoaderTests(unittest.TestCase):
# Classes that don't define exec_module() trigger TypeError. # Classes that don't define exec_module() trigger TypeError.
util.LazyLoader(object) util.LazyLoader(object)
def new_module(self, source_code=None): def new_module(self, source_code=None, loader=None):
loader = TestingImporter() if loader is None:
loader = TestingImporter()
if source_code is not None: if source_code is not None:
loader.source_code = source_code loader.source_code = source_code
spec = util.spec_from_loader(TestingImporter.module_name, spec = util.spec_from_loader(TestingImporter.module_name,
@ -140,6 +147,37 @@ class LazyLoaderTests(unittest.TestCase):
# Force the load; just care that no exception is raised. # Force the load; just care that no exception is raised.
module.__name__ module.__name__
@threading_helper.requires_working_threading()
def test_module_load_race(self):
with test_util.uncache(TestingImporter.module_name):
loader = TestingImporter()
module = self.new_module(loader=loader)
self.assertEqual(loader.load_count, 0)
class RaisingThread(threading.Thread):
exc = None
def run(self):
try:
super().run()
except Exception as exc:
self.exc = exc
def access_module():
return module.attr
threads = []
for _ in range(2):
threads.append(thread := RaisingThread(target=access_module))
thread.start()
# Races could cause errors
for thread in threads:
thread.join()
self.assertIsNone(thread.exc)
# Or multiple load attempts
self.assertEqual(loader.load_count, 1)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -0,0 +1,3 @@
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
conditions when multiple threads try to access attributes before the loading
is complete.