mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
gh-124694: Add concurrent.futures.InterpreterPoolExecutor (gh-124548)
This is an implementation of InterpreterPoolExecutor that builds on ThreadPoolExecutor. (Note that this is not tied to PEP 734, which is strictly about adding a new stdlib module.) Possible future improvements: * support passing a script for the initializer or to submit() * support passing (most) arbitrary functions without pickling * support passing closures * optionally exec functions against __main__ instead of the their original module
This commit is contained in:
parent
a38fef4439
commit
a5a7f5e16d
12 changed files with 828 additions and 40 deletions
|
@ -29,6 +29,7 @@ __all__ = (
|
|||
'Executor',
|
||||
'wait',
|
||||
'as_completed',
|
||||
'InterpreterPoolExecutor',
|
||||
'ProcessPoolExecutor',
|
||||
'ThreadPoolExecutor',
|
||||
)
|
||||
|
@ -39,7 +40,7 @@ def __dir__():
|
|||
|
||||
|
||||
def __getattr__(name):
|
||||
global ProcessPoolExecutor, ThreadPoolExecutor
|
||||
global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor
|
||||
|
||||
if name == 'ProcessPoolExecutor':
|
||||
from .process import ProcessPoolExecutor as pe
|
||||
|
@ -51,4 +52,13 @@ def __getattr__(name):
|
|||
ThreadPoolExecutor = te
|
||||
return te
|
||||
|
||||
if name == 'InterpreterPoolExecutor':
|
||||
try:
|
||||
from .interpreter import InterpreterPoolExecutor as ie
|
||||
except ModuleNotFoundError:
|
||||
ie = InterpreterPoolExecutor = None
|
||||
else:
|
||||
InterpreterPoolExecutor = ie
|
||||
return ie
|
||||
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
|
241
Lib/concurrent/futures/interpreter.py
Normal file
241
Lib/concurrent/futures/interpreter.py
Normal file
|
@ -0,0 +1,241 @@
|
|||
"""Implements InterpreterPoolExecutor."""
|
||||
|
||||
import contextlib
|
||||
import pickle
|
||||
import textwrap
|
||||
from . import thread as _thread
|
||||
import _interpreters
|
||||
import _interpqueues
|
||||
|
||||
|
||||
class ExecutionFailed(_interpreters.InterpreterError):
|
||||
"""An unhandled exception happened during execution."""
|
||||
|
||||
def __init__(self, excinfo):
|
||||
msg = excinfo.formatted
|
||||
if not msg:
|
||||
if excinfo.type and excinfo.msg:
|
||||
msg = f'{excinfo.type.__name__}: {excinfo.msg}'
|
||||
else:
|
||||
msg = excinfo.type.__name__ or excinfo.msg
|
||||
super().__init__(msg)
|
||||
self.excinfo = excinfo
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
formatted = self.excinfo.errdisplay
|
||||
except Exception:
|
||||
return super().__str__()
|
||||
else:
|
||||
return textwrap.dedent(f"""
|
||||
{super().__str__()}
|
||||
|
||||
Uncaught in the interpreter:
|
||||
|
||||
{formatted}
|
||||
""".strip())
|
||||
|
||||
|
||||
UNBOUND = 2 # error; this should not happen.
|
||||
|
||||
|
||||
class WorkerContext(_thread.WorkerContext):
|
||||
|
||||
@classmethod
|
||||
def prepare(cls, initializer, initargs, shared):
|
||||
def resolve_task(fn, args, kwargs):
|
||||
if isinstance(fn, str):
|
||||
# XXX Circle back to this later.
|
||||
raise TypeError('scripts not supported')
|
||||
if args or kwargs:
|
||||
raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}')
|
||||
data = textwrap.dedent(fn)
|
||||
kind = 'script'
|
||||
# Make sure the script compiles.
|
||||
# Ideally we wouldn't throw away the resulting code
|
||||
# object. However, there isn't much to be done until
|
||||
# code objects are shareable and/or we do a better job
|
||||
# of supporting code objects in _interpreters.exec().
|
||||
compile(data, '<string>', 'exec')
|
||||
else:
|
||||
# Functions defined in the __main__ module can't be pickled,
|
||||
# so they can't be used here. In the future, we could possibly
|
||||
# borrow from multiprocessing to work around this.
|
||||
data = pickle.dumps((fn, args, kwargs))
|
||||
kind = 'function'
|
||||
return (data, kind)
|
||||
|
||||
if initializer is not None:
|
||||
try:
|
||||
initdata = resolve_task(initializer, initargs, {})
|
||||
except ValueError:
|
||||
if isinstance(initializer, str) and initargs:
|
||||
raise ValueError(f'an initializer script does not take args, got {initargs!r}')
|
||||
raise # re-raise
|
||||
else:
|
||||
initdata = None
|
||||
def create_context():
|
||||
return cls(initdata, shared)
|
||||
return create_context, resolve_task
|
||||
|
||||
@classmethod
|
||||
@contextlib.contextmanager
|
||||
def _capture_exc(cls, resultsid):
|
||||
try:
|
||||
yield
|
||||
except BaseException as exc:
|
||||
# Send the captured exception out on the results queue,
|
||||
# but still leave it unhandled for the interpreter to handle.
|
||||
err = pickle.dumps(exc)
|
||||
_interpqueues.put(resultsid, (None, err), 1, UNBOUND)
|
||||
raise # re-raise
|
||||
|
||||
@classmethod
|
||||
def _send_script_result(cls, resultsid):
|
||||
_interpqueues.put(resultsid, (None, None), 0, UNBOUND)
|
||||
|
||||
@classmethod
|
||||
def _call(cls, func, args, kwargs, resultsid):
|
||||
with cls._capture_exc(resultsid):
|
||||
res = func(*args or (), **kwargs or {})
|
||||
# Send the result back.
|
||||
try:
|
||||
_interpqueues.put(resultsid, (res, None), 0, UNBOUND)
|
||||
except _interpreters.NotShareableError:
|
||||
res = pickle.dumps(res)
|
||||
_interpqueues.put(resultsid, (res, None), 1, UNBOUND)
|
||||
|
||||
@classmethod
|
||||
def _call_pickled(cls, pickled, resultsid):
|
||||
fn, args, kwargs = pickle.loads(pickled)
|
||||
cls._call(fn, args, kwargs, resultsid)
|
||||
|
||||
def __init__(self, initdata, shared=None):
|
||||
self.initdata = initdata
|
||||
self.shared = dict(shared) if shared else None
|
||||
self.interpid = None
|
||||
self.resultsid = None
|
||||
|
||||
def __del__(self):
|
||||
if self.interpid is not None:
|
||||
self.finalize()
|
||||
|
||||
def _exec(self, script):
|
||||
assert self.interpid is not None
|
||||
excinfo = _interpreters.exec(self.interpid, script, restrict=True)
|
||||
if excinfo is not None:
|
||||
raise ExecutionFailed(excinfo)
|
||||
|
||||
def initialize(self):
|
||||
assert self.interpid is None, self.interpid
|
||||
self.interpid = _interpreters.create(reqrefs=True)
|
||||
try:
|
||||
_interpreters.incref(self.interpid)
|
||||
|
||||
maxsize = 0
|
||||
fmt = 0
|
||||
self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND)
|
||||
|
||||
self._exec(f'from {__name__} import WorkerContext')
|
||||
|
||||
if self.shared:
|
||||
_interpreters.set___main___attrs(
|
||||
self.interpid, self.shared, restrict=True)
|
||||
|
||||
if self.initdata:
|
||||
self.run(self.initdata)
|
||||
except BaseException:
|
||||
self.finalize()
|
||||
raise # re-raise
|
||||
|
||||
def finalize(self):
|
||||
interpid = self.interpid
|
||||
resultsid = self.resultsid
|
||||
self.resultsid = None
|
||||
self.interpid = None
|
||||
if resultsid is not None:
|
||||
try:
|
||||
_interpqueues.destroy(resultsid)
|
||||
except _interpqueues.QueueNotFoundError:
|
||||
pass
|
||||
if interpid is not None:
|
||||
try:
|
||||
_interpreters.decref(interpid)
|
||||
except _interpreters.InterpreterNotFoundError:
|
||||
pass
|
||||
|
||||
def run(self, task):
|
||||
data, kind = task
|
||||
if kind == 'script':
|
||||
raise NotImplementedError('script kind disabled')
|
||||
script = f"""
|
||||
with WorkerContext._capture_exc({self.resultsid}):
|
||||
{textwrap.indent(data, ' ')}
|
||||
WorkerContext._send_script_result({self.resultsid})"""
|
||||
elif kind == 'function':
|
||||
script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})'
|
||||
else:
|
||||
raise NotImplementedError(kind)
|
||||
|
||||
try:
|
||||
self._exec(script)
|
||||
except ExecutionFailed as exc:
|
||||
exc_wrapper = exc
|
||||
else:
|
||||
exc_wrapper = None
|
||||
|
||||
# Return the result, or raise the exception.
|
||||
while True:
|
||||
try:
|
||||
obj = _interpqueues.get(self.resultsid)
|
||||
except _interpqueues.QueueNotFoundError:
|
||||
raise # re-raise
|
||||
except _interpqueues.QueueError:
|
||||
continue
|
||||
except ModuleNotFoundError:
|
||||
# interpreters.queues doesn't exist, which means
|
||||
# QueueEmpty doesn't. Act as though it does.
|
||||
continue
|
||||
else:
|
||||
break
|
||||
(res, excdata), pickled, unboundop = obj
|
||||
assert unboundop is None, unboundop
|
||||
if excdata is not None:
|
||||
assert res is None, res
|
||||
assert pickled
|
||||
assert exc_wrapper is not None
|
||||
exc = pickle.loads(excdata)
|
||||
raise exc from exc_wrapper
|
||||
return pickle.loads(res) if pickled else res
|
||||
|
||||
|
||||
class BrokenInterpreterPool(_thread.BrokenThreadPool):
|
||||
"""
|
||||
Raised when a worker thread in an InterpreterPoolExecutor failed initializing.
|
||||
"""
|
||||
|
||||
|
||||
class InterpreterPoolExecutor(_thread.ThreadPoolExecutor):
|
||||
|
||||
BROKEN = BrokenInterpreterPool
|
||||
|
||||
@classmethod
|
||||
def prepare_context(cls, initializer, initargs, shared):
|
||||
return WorkerContext.prepare(initializer, initargs, shared)
|
||||
|
||||
def __init__(self, max_workers=None, thread_name_prefix='',
|
||||
initializer=None, initargs=(), shared=None):
|
||||
"""Initializes a new InterpreterPoolExecutor instance.
|
||||
|
||||
Args:
|
||||
max_workers: The maximum number of interpreters that can be used to
|
||||
execute the given calls.
|
||||
thread_name_prefix: An optional name prefix to give our threads.
|
||||
initializer: A callable or script used to initialize
|
||||
each worker interpreter.
|
||||
initargs: A tuple of arguments to pass to the initializer.
|
||||
shared: A mapping of shareabled objects to be inserted into
|
||||
each worker interpreter.
|
||||
"""
|
||||
super().__init__(max_workers, thread_name_prefix,
|
||||
initializer, initargs, shared=shared)
|
|
@ -43,19 +43,46 @@ if hasattr(os, 'register_at_fork'):
|
|||
after_in_parent=_global_shutdown_lock.release)
|
||||
|
||||
|
||||
class _WorkItem:
|
||||
def __init__(self, future, fn, args, kwargs):
|
||||
self.future = future
|
||||
self.fn = fn
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
class WorkerContext:
|
||||
|
||||
def run(self):
|
||||
@classmethod
|
||||
def prepare(cls, initializer, initargs):
|
||||
if initializer is not None:
|
||||
if not callable(initializer):
|
||||
raise TypeError("initializer must be a callable")
|
||||
def create_context():
|
||||
return cls(initializer, initargs)
|
||||
def resolve_task(fn, args, kwargs):
|
||||
return (fn, args, kwargs)
|
||||
return create_context, resolve_task
|
||||
|
||||
def __init__(self, initializer, initargs):
|
||||
self.initializer = initializer
|
||||
self.initargs = initargs
|
||||
|
||||
def initialize(self):
|
||||
if self.initializer is not None:
|
||||
self.initializer(*self.initargs)
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def run(self, task):
|
||||
fn, args, kwargs = task
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
|
||||
class _WorkItem:
|
||||
def __init__(self, future, task):
|
||||
self.future = future
|
||||
self.task = task
|
||||
|
||||
def run(self, ctx):
|
||||
if not self.future.set_running_or_notify_cancel():
|
||||
return
|
||||
|
||||
try:
|
||||
result = self.fn(*self.args, **self.kwargs)
|
||||
result = ctx.run(self.task)
|
||||
except BaseException as exc:
|
||||
self.future.set_exception(exc)
|
||||
# Break a reference cycle with the exception 'exc'
|
||||
|
@ -66,16 +93,15 @@ class _WorkItem:
|
|||
__class_getitem__ = classmethod(types.GenericAlias)
|
||||
|
||||
|
||||
def _worker(executor_reference, work_queue, initializer, initargs):
|
||||
if initializer is not None:
|
||||
try:
|
||||
initializer(*initargs)
|
||||
except BaseException:
|
||||
_base.LOGGER.critical('Exception in initializer:', exc_info=True)
|
||||
executor = executor_reference()
|
||||
if executor is not None:
|
||||
executor._initializer_failed()
|
||||
return
|
||||
def _worker(executor_reference, ctx, work_queue):
|
||||
try:
|
||||
ctx.initialize()
|
||||
except BaseException:
|
||||
_base.LOGGER.critical('Exception in initializer:', exc_info=True)
|
||||
executor = executor_reference()
|
||||
if executor is not None:
|
||||
executor._initializer_failed()
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
|
@ -89,7 +115,7 @@ def _worker(executor_reference, work_queue, initializer, initargs):
|
|||
work_item = work_queue.get(block=True)
|
||||
|
||||
if work_item is not None:
|
||||
work_item.run()
|
||||
work_item.run(ctx)
|
||||
# Delete references to object. See GH-60488
|
||||
del work_item
|
||||
continue
|
||||
|
@ -110,6 +136,8 @@ def _worker(executor_reference, work_queue, initializer, initargs):
|
|||
del executor
|
||||
except BaseException:
|
||||
_base.LOGGER.critical('Exception in worker', exc_info=True)
|
||||
finally:
|
||||
ctx.finalize()
|
||||
|
||||
|
||||
class BrokenThreadPool(_base.BrokenExecutor):
|
||||
|
@ -120,11 +148,17 @@ class BrokenThreadPool(_base.BrokenExecutor):
|
|||
|
||||
class ThreadPoolExecutor(_base.Executor):
|
||||
|
||||
BROKEN = BrokenThreadPool
|
||||
|
||||
# Used to assign unique thread names when thread_name_prefix is not supplied.
|
||||
_counter = itertools.count().__next__
|
||||
|
||||
@classmethod
|
||||
def prepare_context(cls, initializer, initargs):
|
||||
return WorkerContext.prepare(initializer, initargs)
|
||||
|
||||
def __init__(self, max_workers=None, thread_name_prefix='',
|
||||
initializer=None, initargs=()):
|
||||
initializer=None, initargs=(), **ctxkwargs):
|
||||
"""Initializes a new ThreadPoolExecutor instance.
|
||||
|
||||
Args:
|
||||
|
@ -133,6 +167,7 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
thread_name_prefix: An optional name prefix to give our threads.
|
||||
initializer: A callable used to initialize worker threads.
|
||||
initargs: A tuple of arguments to pass to the initializer.
|
||||
ctxkwargs: Additional arguments to cls.prepare_context().
|
||||
"""
|
||||
if max_workers is None:
|
||||
# ThreadPoolExecutor is often used to:
|
||||
|
@ -146,8 +181,9 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
if max_workers <= 0:
|
||||
raise ValueError("max_workers must be greater than 0")
|
||||
|
||||
if initializer is not None and not callable(initializer):
|
||||
raise TypeError("initializer must be a callable")
|
||||
(self._create_worker_context,
|
||||
self._resolve_work_item_task,
|
||||
) = type(self).prepare_context(initializer, initargs, **ctxkwargs)
|
||||
|
||||
self._max_workers = max_workers
|
||||
self._work_queue = queue.SimpleQueue()
|
||||
|
@ -158,13 +194,11 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
self._shutdown_lock = threading.Lock()
|
||||
self._thread_name_prefix = (thread_name_prefix or
|
||||
("ThreadPoolExecutor-%d" % self._counter()))
|
||||
self._initializer = initializer
|
||||
self._initargs = initargs
|
||||
|
||||
def submit(self, fn, /, *args, **kwargs):
|
||||
with self._shutdown_lock, _global_shutdown_lock:
|
||||
if self._broken:
|
||||
raise BrokenThreadPool(self._broken)
|
||||
raise self.BROKEN(self._broken)
|
||||
|
||||
if self._shutdown:
|
||||
raise RuntimeError('cannot schedule new futures after shutdown')
|
||||
|
@ -173,7 +207,8 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
'interpreter shutdown')
|
||||
|
||||
f = _base.Future()
|
||||
w = _WorkItem(f, fn, args, kwargs)
|
||||
task = self._resolve_work_item_task(fn, args, kwargs)
|
||||
w = _WorkItem(f, task)
|
||||
|
||||
self._work_queue.put(w)
|
||||
self._adjust_thread_count()
|
||||
|
@ -196,9 +231,8 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
num_threads)
|
||||
t = threading.Thread(name=thread_name, target=_worker,
|
||||
args=(weakref.ref(self, weakref_cb),
|
||||
self._work_queue,
|
||||
self._initializer,
|
||||
self._initargs))
|
||||
self._create_worker_context(),
|
||||
self._work_queue))
|
||||
t.start()
|
||||
self._threads.add(t)
|
||||
_threads_queues[t] = self._work_queue
|
||||
|
@ -214,7 +248,7 @@ class ThreadPoolExecutor(_base.Executor):
|
|||
except queue.Empty:
|
||||
break
|
||||
if work_item is not None:
|
||||
work_item.future.set_exception(BrokenThreadPool(self._broken))
|
||||
work_item.future.set_exception(self.BROKEN(self._broken))
|
||||
|
||||
def shutdown(self, wait=True, *, cancel_futures=False):
|
||||
with self._shutdown_lock:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue