mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
gh-74028: add buffersize parameter to concurrent.futures.Executor.map for lazier behavior (#125663)
`concurrent.futures.Executor.map` now supports limiting the number of submitted tasks whose results have not yet been yielded via the new `buffersize` parameter. --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
e98d321bef
commit
a005835f69
6 changed files with 128 additions and 7 deletions
|
|
@ -40,11 +40,14 @@ Executor Objects
|
||||||
future = executor.submit(pow, 323, 1235)
|
future = executor.submit(pow, 323, 1235)
|
||||||
print(future.result())
|
print(future.result())
|
||||||
|
|
||||||
.. method:: map(fn, *iterables, timeout=None, chunksize=1)
|
.. method:: map(fn, *iterables, timeout=None, chunksize=1, buffersize=None)
|
||||||
|
|
||||||
Similar to :func:`map(fn, *iterables) <map>` except:
|
Similar to :func:`map(fn, *iterables) <map>` except:
|
||||||
|
|
||||||
* the *iterables* are collected immediately rather than lazily;
|
* The *iterables* are collected immediately rather than lazily, unless a
|
||||||
|
*buffersize* is specified to limit the number of submitted tasks whose
|
||||||
|
results have not yet been yielded. If the buffer is full, iteration over
|
||||||
|
the *iterables* pauses until a result is yielded from the buffer.
|
||||||
|
|
||||||
* *fn* is executed asynchronously and several calls to
|
* *fn* is executed asynchronously and several calls to
|
||||||
*fn* may be made concurrently.
|
*fn* may be made concurrently.
|
||||||
|
|
@ -68,7 +71,10 @@ Executor Objects
|
||||||
*chunksize* has no effect.
|
*chunksize* has no effect.
|
||||||
|
|
||||||
.. versionchanged:: 3.5
|
.. versionchanged:: 3.5
|
||||||
Added the *chunksize* argument.
|
Added the *chunksize* parameter.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
Added the *buffersize* parameter.
|
||||||
|
|
||||||
.. method:: shutdown(wait=True, *, cancel_futures=False)
|
.. method:: shutdown(wait=True, *, cancel_futures=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -465,6 +465,13 @@ contextvars
|
||||||
* Support context manager protocol by :class:`contextvars.Token`.
|
* Support context manager protocol by :class:`contextvars.Token`.
|
||||||
(Contributed by Andrew Svetlov in :gh:`129889`.)
|
(Contributed by Andrew Svetlov in :gh:`129889`.)
|
||||||
|
|
||||||
|
* Add the optional ``buffersize`` parameter to
|
||||||
|
:meth:`concurrent.futures.Executor.map` to limit the number of submitted
|
||||||
|
tasks whose results have not yet been yielded. If the buffer is full,
|
||||||
|
iteration over the *iterables* pauses until a result is yielded from the
|
||||||
|
buffer.
|
||||||
|
(Contributed by Enzo Bonnal and Josh Rosenberg in :gh:`74028`.)
|
||||||
|
|
||||||
|
|
||||||
ctypes
|
ctypes
|
||||||
------
|
------
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@ import logging
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import types
|
import types
|
||||||
|
import weakref
|
||||||
|
from itertools import islice
|
||||||
|
|
||||||
FIRST_COMPLETED = 'FIRST_COMPLETED'
|
FIRST_COMPLETED = 'FIRST_COMPLETED'
|
||||||
FIRST_EXCEPTION = 'FIRST_EXCEPTION'
|
FIRST_EXCEPTION = 'FIRST_EXCEPTION'
|
||||||
|
|
@ -572,7 +574,7 @@ class Executor(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def map(self, fn, *iterables, timeout=None, chunksize=1):
|
def map(self, fn, *iterables, timeout=None, chunksize=1, buffersize=None):
|
||||||
"""Returns an iterator equivalent to map(fn, iter).
|
"""Returns an iterator equivalent to map(fn, iter).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -584,6 +586,11 @@ class Executor(object):
|
||||||
before being passed to a child process. This argument is only
|
before being passed to a child process. This argument is only
|
||||||
used by ProcessPoolExecutor; it is ignored by
|
used by ProcessPoolExecutor; it is ignored by
|
||||||
ThreadPoolExecutor.
|
ThreadPoolExecutor.
|
||||||
|
buffersize: The number of submitted tasks whose results have not
|
||||||
|
yet been yielded. If the buffer is full, iteration over the
|
||||||
|
iterables pauses until a result is yielded from the buffer.
|
||||||
|
If None, all input elements are eagerly collected, and a task is
|
||||||
|
submitted for each.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An iterator equivalent to: map(func, *iterables) but the calls may
|
An iterator equivalent to: map(func, *iterables) but the calls may
|
||||||
|
|
@ -594,10 +601,25 @@ class Executor(object):
|
||||||
before the given timeout.
|
before the given timeout.
|
||||||
Exception: If fn(*args) raises for any values.
|
Exception: If fn(*args) raises for any values.
|
||||||
"""
|
"""
|
||||||
|
if buffersize is not None and not isinstance(buffersize, int):
|
||||||
|
raise TypeError("buffersize must be an integer or None")
|
||||||
|
if buffersize is not None and buffersize < 1:
|
||||||
|
raise ValueError("buffersize must be None or > 0")
|
||||||
|
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
end_time = timeout + time.monotonic()
|
end_time = timeout + time.monotonic()
|
||||||
|
|
||||||
fs = [self.submit(fn, *args) for args in zip(*iterables)]
|
zipped_iterables = zip(*iterables)
|
||||||
|
if buffersize:
|
||||||
|
fs = collections.deque(
|
||||||
|
self.submit(fn, *args) for args in islice(zipped_iterables, buffersize)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fs = [self.submit(fn, *args) for args in zipped_iterables]
|
||||||
|
|
||||||
|
# Use a weak reference to ensure that the executor can be garbage
|
||||||
|
# collected independently of the result_iterator closure.
|
||||||
|
executor_weakref = weakref.ref(self)
|
||||||
|
|
||||||
# Yield must be hidden in closure so that the futures are submitted
|
# Yield must be hidden in closure so that the futures are submitted
|
||||||
# before the first iterator value is required.
|
# before the first iterator value is required.
|
||||||
|
|
@ -606,6 +628,12 @@ class Executor(object):
|
||||||
# reverse to keep finishing order
|
# reverse to keep finishing order
|
||||||
fs.reverse()
|
fs.reverse()
|
||||||
while fs:
|
while fs:
|
||||||
|
if (
|
||||||
|
buffersize
|
||||||
|
and (executor := executor_weakref())
|
||||||
|
and (args := next(zipped_iterables, None))
|
||||||
|
):
|
||||||
|
fs.appendleft(executor.submit(fn, *args))
|
||||||
# Careful not to keep a reference to the popped future
|
# Careful not to keep a reference to the popped future
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
yield _result_or_cancel(fs.pop())
|
yield _result_or_cancel(fs.pop())
|
||||||
|
|
|
||||||
|
|
@ -813,7 +813,7 @@ class ProcessPoolExecutor(_base.Executor):
|
||||||
return f
|
return f
|
||||||
submit.__doc__ = _base.Executor.submit.__doc__
|
submit.__doc__ = _base.Executor.submit.__doc__
|
||||||
|
|
||||||
def map(self, fn, *iterables, timeout=None, chunksize=1):
|
def map(self, fn, *iterables, timeout=None, chunksize=1, buffersize=None):
|
||||||
"""Returns an iterator equivalent to map(fn, iter).
|
"""Returns an iterator equivalent to map(fn, iter).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -824,6 +824,11 @@ class ProcessPoolExecutor(_base.Executor):
|
||||||
chunksize: If greater than one, the iterables will be chopped into
|
chunksize: If greater than one, the iterables will be chopped into
|
||||||
chunks of size chunksize and submitted to the process pool.
|
chunks of size chunksize and submitted to the process pool.
|
||||||
If set to one, the items in the list will be sent one at a time.
|
If set to one, the items in the list will be sent one at a time.
|
||||||
|
buffersize: The number of submitted tasks whose results have not
|
||||||
|
yet been yielded. If the buffer is full, iteration over the
|
||||||
|
iterables pauses until a result is yielded from the buffer.
|
||||||
|
If None, all input elements are eagerly collected, and a task is
|
||||||
|
submitted for each.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An iterator equivalent to: map(func, *iterables) but the calls may
|
An iterator equivalent to: map(func, *iterables) but the calls may
|
||||||
|
|
@ -839,7 +844,8 @@ class ProcessPoolExecutor(_base.Executor):
|
||||||
|
|
||||||
results = super().map(partial(_process_chunk, fn),
|
results = super().map(partial(_process_chunk, fn),
|
||||||
itertools.batched(zip(*iterables), chunksize),
|
itertools.batched(zip(*iterables), chunksize),
|
||||||
timeout=timeout)
|
timeout=timeout,
|
||||||
|
buffersize=buffersize)
|
||||||
return _chain_from_iterable_of_lists(results)
|
return _chain_from_iterable_of_lists(results)
|
||||||
|
|
||||||
def shutdown(self, wait=True, *, cancel_futures=False):
|
def shutdown(self, wait=True, *, cancel_futures=False):
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
|
import itertools
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import weakref
|
import weakref
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
from operator import add
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import Py_GIL_DISABLED
|
from test.support import Py_GIL_DISABLED
|
||||||
|
|
||||||
|
|
@ -73,6 +75,74 @@ class ExecutorTest:
|
||||||
# take longer than the specified timeout.
|
# take longer than the specified timeout.
|
||||||
self.assertIn(results, ([None, None], [None], []))
|
self.assertIn(results, ([None, None], [None], []))
|
||||||
|
|
||||||
|
def test_map_buffersize_type_validation(self):
|
||||||
|
for buffersize in ("foo", 2.0):
|
||||||
|
with self.subTest(buffersize=buffersize):
|
||||||
|
with self.assertRaisesRegex(
|
||||||
|
TypeError,
|
||||||
|
"buffersize must be an integer or None",
|
||||||
|
):
|
||||||
|
self.executor.map(str, range(4), buffersize=buffersize)
|
||||||
|
|
||||||
|
def test_map_buffersize_value_validation(self):
|
||||||
|
for buffersize in (0, -1):
|
||||||
|
with self.subTest(buffersize=buffersize):
|
||||||
|
with self.assertRaisesRegex(
|
||||||
|
ValueError,
|
||||||
|
"buffersize must be None or > 0",
|
||||||
|
):
|
||||||
|
self.executor.map(str, range(4), buffersize=buffersize)
|
||||||
|
|
||||||
|
def test_map_buffersize(self):
|
||||||
|
ints = range(4)
|
||||||
|
for buffersize in (1, 2, len(ints), len(ints) * 2):
|
||||||
|
with self.subTest(buffersize=buffersize):
|
||||||
|
res = self.executor.map(str, ints, buffersize=buffersize)
|
||||||
|
self.assertListEqual(list(res), ["0", "1", "2", "3"])
|
||||||
|
|
||||||
|
def test_map_buffersize_on_multiple_iterables(self):
|
||||||
|
ints = range(4)
|
||||||
|
for buffersize in (1, 2, len(ints), len(ints) * 2):
|
||||||
|
with self.subTest(buffersize=buffersize):
|
||||||
|
res = self.executor.map(add, ints, ints, buffersize=buffersize)
|
||||||
|
self.assertListEqual(list(res), [0, 2, 4, 6])
|
||||||
|
|
||||||
|
def test_map_buffersize_on_infinite_iterable(self):
|
||||||
|
res = self.executor.map(str, itertools.count(), buffersize=2)
|
||||||
|
self.assertEqual(next(res, None), "0")
|
||||||
|
self.assertEqual(next(res, None), "1")
|
||||||
|
self.assertEqual(next(res, None), "2")
|
||||||
|
|
||||||
|
def test_map_buffersize_on_multiple_infinite_iterables(self):
|
||||||
|
res = self.executor.map(
|
||||||
|
add,
|
||||||
|
itertools.count(),
|
||||||
|
itertools.count(),
|
||||||
|
buffersize=2
|
||||||
|
)
|
||||||
|
self.assertEqual(next(res, None), 0)
|
||||||
|
self.assertEqual(next(res, None), 2)
|
||||||
|
self.assertEqual(next(res, None), 4)
|
||||||
|
|
||||||
|
def test_map_buffersize_on_empty_iterable(self):
|
||||||
|
res = self.executor.map(str, [], buffersize=2)
|
||||||
|
self.assertIsNone(next(res, None))
|
||||||
|
|
||||||
|
def test_map_buffersize_without_iterable(self):
|
||||||
|
res = self.executor.map(str, buffersize=2)
|
||||||
|
self.assertIsNone(next(res, None))
|
||||||
|
|
||||||
|
def test_map_buffersize_when_buffer_is_full(self):
|
||||||
|
ints = iter(range(4))
|
||||||
|
buffersize = 2
|
||||||
|
self.executor.map(str, ints, buffersize=buffersize)
|
||||||
|
self.executor.shutdown(wait=True) # wait for tasks to complete
|
||||||
|
self.assertEqual(
|
||||||
|
next(ints),
|
||||||
|
buffersize,
|
||||||
|
msg="should have fetched only `buffersize` elements from `ints`.",
|
||||||
|
)
|
||||||
|
|
||||||
def test_shutdown_race_issue12456(self):
|
def test_shutdown_race_issue12456(self):
|
||||||
# Issue #12456: race condition at shutdown where trying to post a
|
# Issue #12456: race condition at shutdown where trying to post a
|
||||||
# sentinel in the call queue blocks (the queue is full while processes
|
# sentinel in the call queue blocks (the queue is full while processes
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
Add the optional ``buffersize`` parameter to
|
||||||
|
:meth:`concurrent.futures.Executor.map` to limit the number of submitted tasks
|
||||||
|
whose results have not yet been yielded. If the buffer is full, iteration over
|
||||||
|
the *iterables* pauses until a result is yielded from the buffer.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue