gh-124694: Add concurrent.futures.InterpreterPoolExecutor (gh-124548)

This is an implementation of InterpreterPoolExecutor that builds on ThreadPoolExecutor.

(Note that this is not tied to PEP 734, which is strictly about adding a new stdlib module.)

Possible future improvements:

* support passing a script for the initializer or to submit()
* support passing (most) arbitrary functions without pickling
* support passing closures
* optionally exec functions against __main__ instead of the their original module
This commit is contained in:
Eric Snow 2024-10-16 16:50:46 -06:00 committed by GitHub
parent a38fef4439
commit a5a7f5e16d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 828 additions and 40 deletions

View file

@ -29,6 +29,7 @@ __all__ = (
'Executor',
'wait',
'as_completed',
'InterpreterPoolExecutor',
'ProcessPoolExecutor',
'ThreadPoolExecutor',
)
@ -39,7 +40,7 @@ def __dir__():
def __getattr__(name):
global ProcessPoolExecutor, ThreadPoolExecutor
global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor
if name == 'ProcessPoolExecutor':
from .process import ProcessPoolExecutor as pe
@ -51,4 +52,13 @@ def __getattr__(name):
ThreadPoolExecutor = te
return te
if name == 'InterpreterPoolExecutor':
try:
from .interpreter import InterpreterPoolExecutor as ie
except ModuleNotFoundError:
ie = InterpreterPoolExecutor = None
else:
InterpreterPoolExecutor = ie
return ie
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

View file

@ -0,0 +1,241 @@
"""Implements InterpreterPoolExecutor."""
import contextlib
import pickle
import textwrap
from . import thread as _thread
import _interpreters
import _interpqueues
class ExecutionFailed(_interpreters.InterpreterError):
"""An unhandled exception happened during execution."""
def __init__(self, excinfo):
msg = excinfo.formatted
if not msg:
if excinfo.type and excinfo.msg:
msg = f'{excinfo.type.__name__}: {excinfo.msg}'
else:
msg = excinfo.type.__name__ or excinfo.msg
super().__init__(msg)
self.excinfo = excinfo
def __str__(self):
try:
formatted = self.excinfo.errdisplay
except Exception:
return super().__str__()
else:
return textwrap.dedent(f"""
{super().__str__()}
Uncaught in the interpreter:
{formatted}
""".strip())
UNBOUND = 2 # error; this should not happen.
class WorkerContext(_thread.WorkerContext):
@classmethod
def prepare(cls, initializer, initargs, shared):
def resolve_task(fn, args, kwargs):
if isinstance(fn, str):
# XXX Circle back to this later.
raise TypeError('scripts not supported')
if args or kwargs:
raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}')
data = textwrap.dedent(fn)
kind = 'script'
# Make sure the script compiles.
# Ideally we wouldn't throw away the resulting code
# object. However, there isn't much to be done until
# code objects are shareable and/or we do a better job
# of supporting code objects in _interpreters.exec().
compile(data, '<string>', 'exec')
else:
# Functions defined in the __main__ module can't be pickled,
# so they can't be used here. In the future, we could possibly
# borrow from multiprocessing to work around this.
data = pickle.dumps((fn, args, kwargs))
kind = 'function'
return (data, kind)
if initializer is not None:
try:
initdata = resolve_task(initializer, initargs, {})
except ValueError:
if isinstance(initializer, str) and initargs:
raise ValueError(f'an initializer script does not take args, got {initargs!r}')
raise # re-raise
else:
initdata = None
def create_context():
return cls(initdata, shared)
return create_context, resolve_task
@classmethod
@contextlib.contextmanager
def _capture_exc(cls, resultsid):
try:
yield
except BaseException as exc:
# Send the captured exception out on the results queue,
# but still leave it unhandled for the interpreter to handle.
err = pickle.dumps(exc)
_interpqueues.put(resultsid, (None, err), 1, UNBOUND)
raise # re-raise
@classmethod
def _send_script_result(cls, resultsid):
_interpqueues.put(resultsid, (None, None), 0, UNBOUND)
@classmethod
def _call(cls, func, args, kwargs, resultsid):
with cls._capture_exc(resultsid):
res = func(*args or (), **kwargs or {})
# Send the result back.
try:
_interpqueues.put(resultsid, (res, None), 0, UNBOUND)
except _interpreters.NotShareableError:
res = pickle.dumps(res)
_interpqueues.put(resultsid, (res, None), 1, UNBOUND)
@classmethod
def _call_pickled(cls, pickled, resultsid):
fn, args, kwargs = pickle.loads(pickled)
cls._call(fn, args, kwargs, resultsid)
def __init__(self, initdata, shared=None):
self.initdata = initdata
self.shared = dict(shared) if shared else None
self.interpid = None
self.resultsid = None
def __del__(self):
if self.interpid is not None:
self.finalize()
def _exec(self, script):
assert self.interpid is not None
excinfo = _interpreters.exec(self.interpid, script, restrict=True)
if excinfo is not None:
raise ExecutionFailed(excinfo)
def initialize(self):
assert self.interpid is None, self.interpid
self.interpid = _interpreters.create(reqrefs=True)
try:
_interpreters.incref(self.interpid)
maxsize = 0
fmt = 0
self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND)
self._exec(f'from {__name__} import WorkerContext')
if self.shared:
_interpreters.set___main___attrs(
self.interpid, self.shared, restrict=True)
if self.initdata:
self.run(self.initdata)
except BaseException:
self.finalize()
raise # re-raise
def finalize(self):
interpid = self.interpid
resultsid = self.resultsid
self.resultsid = None
self.interpid = None
if resultsid is not None:
try:
_interpqueues.destroy(resultsid)
except _interpqueues.QueueNotFoundError:
pass
if interpid is not None:
try:
_interpreters.decref(interpid)
except _interpreters.InterpreterNotFoundError:
pass
def run(self, task):
data, kind = task
if kind == 'script':
raise NotImplementedError('script kind disabled')
script = f"""
with WorkerContext._capture_exc({self.resultsid}):
{textwrap.indent(data, ' ')}
WorkerContext._send_script_result({self.resultsid})"""
elif kind == 'function':
script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})'
else:
raise NotImplementedError(kind)
try:
self._exec(script)
except ExecutionFailed as exc:
exc_wrapper = exc
else:
exc_wrapper = None
# Return the result, or raise the exception.
while True:
try:
obj = _interpqueues.get(self.resultsid)
except _interpqueues.QueueNotFoundError:
raise # re-raise
except _interpqueues.QueueError:
continue
except ModuleNotFoundError:
# interpreters.queues doesn't exist, which means
# QueueEmpty doesn't. Act as though it does.
continue
else:
break
(res, excdata), pickled, unboundop = obj
assert unboundop is None, unboundop
if excdata is not None:
assert res is None, res
assert pickled
assert exc_wrapper is not None
exc = pickle.loads(excdata)
raise exc from exc_wrapper
return pickle.loads(res) if pickled else res
class BrokenInterpreterPool(_thread.BrokenThreadPool):
"""
Raised when a worker thread in an InterpreterPoolExecutor failed initializing.
"""
class InterpreterPoolExecutor(_thread.ThreadPoolExecutor):
BROKEN = BrokenInterpreterPool
@classmethod
def prepare_context(cls, initializer, initargs, shared):
return WorkerContext.prepare(initializer, initargs, shared)
def __init__(self, max_workers=None, thread_name_prefix='',
initializer=None, initargs=(), shared=None):
"""Initializes a new InterpreterPoolExecutor instance.
Args:
max_workers: The maximum number of interpreters that can be used to
execute the given calls.
thread_name_prefix: An optional name prefix to give our threads.
initializer: A callable or script used to initialize
each worker interpreter.
initargs: A tuple of arguments to pass to the initializer.
shared: A mapping of shareabled objects to be inserted into
each worker interpreter.
"""
super().__init__(max_workers, thread_name_prefix,
initializer, initargs, shared=shared)

View file

@ -43,19 +43,46 @@ if hasattr(os, 'register_at_fork'):
after_in_parent=_global_shutdown_lock.release)
class _WorkItem:
def __init__(self, future, fn, args, kwargs):
self.future = future
self.fn = fn
self.args = args
self.kwargs = kwargs
class WorkerContext:
def run(self):
@classmethod
def prepare(cls, initializer, initargs):
if initializer is not None:
if not callable(initializer):
raise TypeError("initializer must be a callable")
def create_context():
return cls(initializer, initargs)
def resolve_task(fn, args, kwargs):
return (fn, args, kwargs)
return create_context, resolve_task
def __init__(self, initializer, initargs):
self.initializer = initializer
self.initargs = initargs
def initialize(self):
if self.initializer is not None:
self.initializer(*self.initargs)
def finalize(self):
pass
def run(self, task):
fn, args, kwargs = task
return fn(*args, **kwargs)
class _WorkItem:
def __init__(self, future, task):
self.future = future
self.task = task
def run(self, ctx):
if not self.future.set_running_or_notify_cancel():
return
try:
result = self.fn(*self.args, **self.kwargs)
result = ctx.run(self.task)
except BaseException as exc:
self.future.set_exception(exc)
# Break a reference cycle with the exception 'exc'
@ -66,16 +93,15 @@ class _WorkItem:
__class_getitem__ = classmethod(types.GenericAlias)
def _worker(executor_reference, work_queue, initializer, initargs):
if initializer is not None:
try:
initializer(*initargs)
except BaseException:
_base.LOGGER.critical('Exception in initializer:', exc_info=True)
executor = executor_reference()
if executor is not None:
executor._initializer_failed()
return
def _worker(executor_reference, ctx, work_queue):
try:
ctx.initialize()
except BaseException:
_base.LOGGER.critical('Exception in initializer:', exc_info=True)
executor = executor_reference()
if executor is not None:
executor._initializer_failed()
return
try:
while True:
try:
@ -89,7 +115,7 @@ def _worker(executor_reference, work_queue, initializer, initargs):
work_item = work_queue.get(block=True)
if work_item is not None:
work_item.run()
work_item.run(ctx)
# Delete references to object. See GH-60488
del work_item
continue
@ -110,6 +136,8 @@ def _worker(executor_reference, work_queue, initializer, initargs):
del executor
except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
finally:
ctx.finalize()
class BrokenThreadPool(_base.BrokenExecutor):
@ -120,11 +148,17 @@ class BrokenThreadPool(_base.BrokenExecutor):
class ThreadPoolExecutor(_base.Executor):
BROKEN = BrokenThreadPool
# Used to assign unique thread names when thread_name_prefix is not supplied.
_counter = itertools.count().__next__
@classmethod
def prepare_context(cls, initializer, initargs):
return WorkerContext.prepare(initializer, initargs)
def __init__(self, max_workers=None, thread_name_prefix='',
initializer=None, initargs=()):
initializer=None, initargs=(), **ctxkwargs):
"""Initializes a new ThreadPoolExecutor instance.
Args:
@ -133,6 +167,7 @@ class ThreadPoolExecutor(_base.Executor):
thread_name_prefix: An optional name prefix to give our threads.
initializer: A callable used to initialize worker threads.
initargs: A tuple of arguments to pass to the initializer.
ctxkwargs: Additional arguments to cls.prepare_context().
"""
if max_workers is None:
# ThreadPoolExecutor is often used to:
@ -146,8 +181,9 @@ class ThreadPoolExecutor(_base.Executor):
if max_workers <= 0:
raise ValueError("max_workers must be greater than 0")
if initializer is not None and not callable(initializer):
raise TypeError("initializer must be a callable")
(self._create_worker_context,
self._resolve_work_item_task,
) = type(self).prepare_context(initializer, initargs, **ctxkwargs)
self._max_workers = max_workers
self._work_queue = queue.SimpleQueue()
@ -158,13 +194,11 @@ class ThreadPoolExecutor(_base.Executor):
self._shutdown_lock = threading.Lock()
self._thread_name_prefix = (thread_name_prefix or
("ThreadPoolExecutor-%d" % self._counter()))
self._initializer = initializer
self._initargs = initargs
def submit(self, fn, /, *args, **kwargs):
with self._shutdown_lock, _global_shutdown_lock:
if self._broken:
raise BrokenThreadPool(self._broken)
raise self.BROKEN(self._broken)
if self._shutdown:
raise RuntimeError('cannot schedule new futures after shutdown')
@ -173,7 +207,8 @@ class ThreadPoolExecutor(_base.Executor):
'interpreter shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
task = self._resolve_work_item_task(fn, args, kwargs)
w = _WorkItem(f, task)
self._work_queue.put(w)
self._adjust_thread_count()
@ -196,9 +231,8 @@ class ThreadPoolExecutor(_base.Executor):
num_threads)
t = threading.Thread(name=thread_name, target=_worker,
args=(weakref.ref(self, weakref_cb),
self._work_queue,
self._initializer,
self._initargs))
self._create_worker_context(),
self._work_queue))
t.start()
self._threads.add(t)
_threads_queues[t] = self._work_queue
@ -214,7 +248,7 @@ class ThreadPoolExecutor(_base.Executor):
except queue.Empty:
break
if work_item is not None:
work_item.future.set_exception(BrokenThreadPool(self._broken))
work_item.future.set_exception(self.BROKEN(self._broken))
def shutdown(self, wait=True, *, cancel_futures=False):
with self._shutdown_lock: