mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
Close #19946: use runpy as needed in multiprocessing
- handles main files without a suffix - handles main submodules properly - adds test cases for the various kinds of __main__
This commit is contained in:
parent
7cff4cd7e9
commit
9a76735b1d
4 changed files with 375 additions and 54 deletions
|
@ -624,13 +624,22 @@ mmap objects can now be weakref'ed.
|
|||
multiprocessing
|
||||
---------------
|
||||
|
||||
On Unix two new *start methods* have been added for starting processes
|
||||
using :mod:`multiprocessing`. These make the mixing of processes with
|
||||
threads more robust. See :issue:`8713`.
|
||||
On Unix, two new *start methods* (``spawn`` and ``forkserver``) have been
|
||||
added for starting processes using :mod:`multiprocessing`. These make
|
||||
the mixing of processes with threads more robust, and the ``spawn``
|
||||
method matches the semantics that multiprocessing has always used on
|
||||
Windows. (Contributed by Richard Oudkerk in :issue:`8713`).
|
||||
|
||||
Also, except when using the old *fork* start method, child processes
|
||||
will no longer inherit unneeded handles/file descriptors from their parents.
|
||||
|
||||
:mod:`multiprocessing` now relies on :mod:`runpy` (which implements the
|
||||
``-m`` switch) to initialise ``__main__`` appropriately in child processes
|
||||
when using the ``spawn`` or ``forkserver`` start methods. This resolves some
|
||||
edge cases where combining multiprocessing, the ``-m`` command line switch
|
||||
and explicit relative imports could cause obscure failures in child
|
||||
processes. (Contributed by Nick Coghlan in :issue:`19946`)
|
||||
|
||||
|
||||
os
|
||||
--
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
import os
|
||||
import pickle
|
||||
import sys
|
||||
import runpy
|
||||
import types
|
||||
|
||||
from . import get_start_method, set_start_method
|
||||
from . import process
|
||||
|
@ -157,15 +159,19 @@ def get_preparation_data(name):
|
|||
start_method=get_start_method(),
|
||||
)
|
||||
|
||||
if sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
|
||||
main_path = getattr(sys.modules['__main__'], '__file__', None)
|
||||
if not main_path and sys.argv[0] not in ('', '-c'):
|
||||
main_path = sys.argv[0]
|
||||
# Figure out whether to initialise main in the subprocess as a module
|
||||
# or through direct execution (or to leave it alone entirely)
|
||||
main_module = sys.modules['__main__']
|
||||
main_mod_name = getattr(main_module.__spec__, "name", None)
|
||||
if main_mod_name is not None:
|
||||
d['init_main_from_name'] = main_mod_name
|
||||
elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
|
||||
main_path = getattr(main_module, '__file__', None)
|
||||
if main_path is not None:
|
||||
if (not os.path.isabs(main_path) and
|
||||
process.ORIGINAL_DIR is not None):
|
||||
main_path = os.path.join(process.ORIGINAL_DIR, main_path)
|
||||
d['main_path'] = os.path.normpath(main_path)
|
||||
d['init_main_from_path'] = os.path.normpath(main_path)
|
||||
|
||||
return d
|
||||
|
||||
|
@ -206,55 +212,68 @@ def prepare(data):
|
|||
if 'start_method' in data:
|
||||
set_start_method(data['start_method'])
|
||||
|
||||
if 'main_path' in data:
|
||||
import_main_path(data['main_path'])
|
||||
if 'init_main_from_name' in data:
|
||||
_fixup_main_from_name(data['init_main_from_name'])
|
||||
elif 'init_main_from_path' in data:
|
||||
_fixup_main_from_path(data['init_main_from_path'])
|
||||
|
||||
# Multiprocessing module helpers to fix up the main module in
|
||||
# spawned subprocesses
|
||||
def _fixup_main_from_name(mod_name):
|
||||
# __main__.py files for packages, directories, zip archives, etc, run
|
||||
# their "main only" code unconditionally, so we don't even try to
|
||||
# populate anything in __main__, nor do we make any changes to
|
||||
# __main__ attributes
|
||||
current_main = sys.modules['__main__']
|
||||
if mod_name == "__main__" or mod_name.endswith(".__main__"):
|
||||
return
|
||||
|
||||
# If this process was forked, __main__ may already be populated
|
||||
if getattr(current_main.__spec__, "name", None) == mod_name:
|
||||
return
|
||||
|
||||
# Otherwise, __main__ may contain some non-main code where we need to
|
||||
# support unpickling it properly. We rerun it as __mp_main__ and make
|
||||
# the normal __main__ an alias to that
|
||||
old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_module(mod_name,
|
||||
run_name="__mp_main__",
|
||||
alter_sys=True)
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
|
||||
|
||||
def _fixup_main_from_path(main_path):
|
||||
# If this process was forked, __main__ may already be populated
|
||||
current_main = sys.modules['__main__']
|
||||
|
||||
# Unfortunately, the main ipython launch script historically had no
|
||||
# "if __name__ == '__main__'" guard, so we work around that
|
||||
# by treating it like a __main__.py file
|
||||
# See https://github.com/ipython/ipython/issues/4698
|
||||
main_name = os.path.splitext(os.path.basename(main_path))[0]
|
||||
if main_name == 'ipython':
|
||||
return
|
||||
|
||||
# Otherwise, if __file__ already has the setting we expect,
|
||||
# there's nothing more to do
|
||||
if getattr(current_main, '__file__', None) == main_path:
|
||||
return
|
||||
|
||||
# If the parent process has sent a path through rather than a module
|
||||
# name we assume it is an executable script that may contain
|
||||
# non-main code that needs to be executed
|
||||
old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_path(main_path,
|
||||
run_name="__mp_main__")
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
|
||||
|
||||
def import_main_path(main_path):
|
||||
'''
|
||||
Set sys.modules['__main__'] to module at main_path
|
||||
'''
|
||||
# XXX (ncoghlan): The following code makes several bogus
|
||||
# assumptions regarding the relationship between __file__
|
||||
# and a module's real name. See PEP 302 and issue #10845
|
||||
if getattr(sys.modules['__main__'], '__file__', None) == main_path:
|
||||
return
|
||||
|
||||
main_name = os.path.splitext(os.path.basename(main_path))[0]
|
||||
if main_name == '__init__':
|
||||
main_name = os.path.basename(os.path.dirname(main_path))
|
||||
|
||||
if main_name == '__main__':
|
||||
main_module = sys.modules['__main__']
|
||||
main_module.__file__ = main_path
|
||||
elif main_name != 'ipython':
|
||||
# Main modules not actually called __main__.py may
|
||||
# contain additional code that should still be executed
|
||||
import importlib
|
||||
import types
|
||||
|
||||
if main_path is None:
|
||||
dirs = None
|
||||
elif os.path.basename(main_path).startswith('__init__.py'):
|
||||
dirs = [os.path.dirname(os.path.dirname(main_path))]
|
||||
else:
|
||||
dirs = [os.path.dirname(main_path)]
|
||||
|
||||
assert main_name not in sys.modules, main_name
|
||||
sys.modules.pop('__mp_main__', None)
|
||||
# We should not try to load __main__
|
||||
# since that would execute 'if __name__ == "__main__"'
|
||||
# clauses, potentially causing a psuedo fork bomb.
|
||||
main_module = types.ModuleType(main_name)
|
||||
# XXX Use a target of main_module?
|
||||
spec = importlib.find_spec(main_name, path=dirs)
|
||||
if spec is None:
|
||||
raise ImportError(name=main_name)
|
||||
methods = importlib._bootstrap._SpecMethods(spec)
|
||||
methods.init_module_attrs(main_module)
|
||||
main_module.__name__ = '__mp_main__'
|
||||
code = spec.loader.get_code(main_name)
|
||||
exec(code, main_module.__dict__)
|
||||
|
||||
old_main_modules.append(sys.modules['__main__'])
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
_fixup_main_from_path(main_path)
|
||||
|
|
287
Lib/test/test_multiprocessing_main_handling.py
Normal file
287
Lib/test/test_multiprocessing_main_handling.py
Normal file
|
@ -0,0 +1,287 @@
|
|||
# tests __main__ module handling in multiprocessing
|
||||
|
||||
import importlib
|
||||
import importlib.machinery
|
||||
import zipimport
|
||||
import unittest
|
||||
import sys
|
||||
import os
|
||||
import os.path
|
||||
import py_compile
|
||||
|
||||
from test import support
|
||||
from test.script_helper import (
|
||||
make_pkg, make_script, make_zip_pkg, make_zip_script,
|
||||
assert_python_ok, assert_python_failure, temp_dir,
|
||||
spawn_python, kill_python)
|
||||
|
||||
# We look inside the context module to find out which
|
||||
# start methods we can check
|
||||
from multiprocessing.context import _concrete_contexts
|
||||
|
||||
verbose = support.verbose
|
||||
|
||||
test_source = """\
|
||||
# multiprocessing includes all sorts of shenanigans to make __main__
|
||||
# attributes accessible in the subprocess in a pickle compatible way.
|
||||
|
||||
# We run the "doesn't work in the interactive interpreter" example from
|
||||
# the docs to make sure it *does* work from an executed __main__,
|
||||
# regardless of the invocation mechanism
|
||||
|
||||
import sys
|
||||
import time
|
||||
from multiprocessing import Pool, set_start_method
|
||||
|
||||
# We use this __main__ defined function in the map call below in order to
|
||||
# check that multiprocessing in correctly running the unguarded
|
||||
# code in child processes and then making it available as __main__
|
||||
def f(x):
|
||||
return x*x
|
||||
|
||||
# Check explicit relative imports
|
||||
if "check_sibling" in __file__:
|
||||
# We're inside a package and not in a __main__.py file
|
||||
# so make sure explicit relative imports work correctly
|
||||
from . import sibling
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_method = sys.argv[1]
|
||||
set_start_method(start_method)
|
||||
p = Pool(5)
|
||||
results = []
|
||||
p.map_async(f, [1, 2, 3], callback=results.extend)
|
||||
deadline = time.time() + 2 # up to 2 s to report the results
|
||||
while not results:
|
||||
time.sleep(0.05)
|
||||
if time.time() > deadline:
|
||||
raise RuntimeError("Timed out waiting for results")
|
||||
results.sort()
|
||||
print(start_method, "->", results)
|
||||
"""
|
||||
|
||||
test_source_main_skipped_in_children = """\
|
||||
# __main__.py files have an implied "if __name__ == '__main__'" so
|
||||
# multiprocessing should always skip running them in child processes
|
||||
|
||||
# This means we can't use __main__ defined functions in child processes,
|
||||
# so we just use "int" as a passthrough operation below
|
||||
|
||||
if __name__ != "__main__":
|
||||
raise RuntimeError("Should only be called as __main__!")
|
||||
|
||||
import sys
|
||||
import time
|
||||
from multiprocessing import Pool, set_start_method
|
||||
|
||||
start_method = sys.argv[1]
|
||||
set_start_method(start_method)
|
||||
p = Pool(5)
|
||||
results = []
|
||||
p.map_async(int, [1, 4, 9], callback=results.extend)
|
||||
deadline = time.time() + 2 # up to 2 s to report the results
|
||||
while not results:
|
||||
time.sleep(0.05)
|
||||
if time.time() > deadline:
|
||||
raise RuntimeError("Timed out waiting for results")
|
||||
results.sort()
|
||||
print(start_method, "->", results)
|
||||
"""
|
||||
|
||||
# These helpers were copied from test_cmd_line_script & tweaked a bit...
|
||||
|
||||
def _make_test_script(script_dir, script_basename,
|
||||
source=test_source, omit_suffix=False):
|
||||
to_return = make_script(script_dir, script_basename,
|
||||
source, omit_suffix)
|
||||
# Hack to check explicit relative imports
|
||||
if script_basename == "check_sibling":
|
||||
make_script(script_dir, "sibling", "")
|
||||
importlib.invalidate_caches()
|
||||
return to_return
|
||||
|
||||
def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
|
||||
source=test_source, depth=1):
|
||||
to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
|
||||
source, depth)
|
||||
importlib.invalidate_caches()
|
||||
return to_return
|
||||
|
||||
# There's no easy way to pass the script directory in to get
|
||||
# -m to work (avoiding that is the whole point of making
|
||||
# directories and zipfiles executable!)
|
||||
# So we fake it for testing purposes with a custom launch script
|
||||
launch_source = """\
|
||||
import sys, os.path, runpy
|
||||
sys.path.insert(0, %s)
|
||||
runpy._run_module_as_main(%r)
|
||||
"""
|
||||
|
||||
def _make_launch_script(script_dir, script_basename, module_name, path=None):
|
||||
if path is None:
|
||||
path = "os.path.dirname(__file__)"
|
||||
else:
|
||||
path = repr(path)
|
||||
source = launch_source % (path, module_name)
|
||||
to_return = make_script(script_dir, script_basename, source)
|
||||
importlib.invalidate_caches()
|
||||
return to_return
|
||||
|
||||
class MultiProcessingCmdLineMixin():
|
||||
maxDiff = None # Show full tracebacks on subprocess failure
|
||||
|
||||
def setupClass(cls):
|
||||
if cls.start_method not in _concrete_contexts:
|
||||
raise unittest.SkipTest("%r start method not available" %
|
||||
cls.start_method)
|
||||
|
||||
def _check_output(self, script_name, exit_code, out, err):
|
||||
if verbose > 1:
|
||||
print("Output from test script %r:" % script_name)
|
||||
print(out)
|
||||
self.assertEqual(exit_code, 0)
|
||||
self.assertEqual(err.decode('utf-8'), '')
|
||||
expected_results = "%s -> [1, 4, 9]" % self.start_method
|
||||
self.assertEqual(out.decode('utf-8').strip(), expected_results)
|
||||
|
||||
def _check_script(self, script_name, *cmd_line_switches):
|
||||
if not __debug__:
|
||||
cmd_line_switches += ('-' + 'O' * sys.flags.optimize,)
|
||||
run_args = cmd_line_switches + (script_name, self.start_method)
|
||||
rc, out, err = assert_python_ok(*run_args, __isolated=False)
|
||||
self._check_output(script_name, rc, out, err)
|
||||
|
||||
def test_basic_script(self):
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, 'script')
|
||||
self._check_script(script_name)
|
||||
|
||||
def test_basic_script_no_suffix(self):
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, 'script',
|
||||
omit_suffix=True)
|
||||
self._check_script(script_name)
|
||||
|
||||
def test_ipython_workaround(self):
|
||||
# Some versions of the IPython launch script are missing the
|
||||
# __name__ = "__main__" guard, and multiprocessing has long had
|
||||
# a workaround for that case
|
||||
# See https://github.com/ipython/ipython/issues/4698
|
||||
source = test_source_main_skipped_in_children
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, 'ipython',
|
||||
source=source)
|
||||
self._check_script(script_name)
|
||||
script_no_suffix = _make_test_script(script_dir, 'ipython',
|
||||
source=source,
|
||||
omit_suffix=True)
|
||||
self._check_script(script_no_suffix)
|
||||
|
||||
def test_script_compiled(self):
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, 'script')
|
||||
py_compile.compile(script_name, doraise=True)
|
||||
os.remove(script_name)
|
||||
pyc_file = support.make_legacy_pyc(script_name)
|
||||
self._check_script(pyc_file)
|
||||
|
||||
def test_directory(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, '__main__',
|
||||
source=source)
|
||||
self._check_script(script_dir)
|
||||
|
||||
def test_directory_compiled(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, '__main__',
|
||||
source=source)
|
||||
py_compile.compile(script_name, doraise=True)
|
||||
os.remove(script_name)
|
||||
pyc_file = support.make_legacy_pyc(script_name)
|
||||
self._check_script(script_dir)
|
||||
|
||||
def test_zipfile(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, '__main__',
|
||||
source=source)
|
||||
zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name)
|
||||
self._check_script(zip_name)
|
||||
|
||||
def test_zipfile_compiled(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
script_name = _make_test_script(script_dir, '__main__',
|
||||
source=source)
|
||||
compiled_name = py_compile.compile(script_name, doraise=True)
|
||||
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
|
||||
self._check_script(zip_name)
|
||||
|
||||
def test_module_in_package(self):
|
||||
with temp_dir() as script_dir:
|
||||
pkg_dir = os.path.join(script_dir, 'test_pkg')
|
||||
make_pkg(pkg_dir)
|
||||
script_name = _make_test_script(pkg_dir, 'check_sibling')
|
||||
launch_name = _make_launch_script(script_dir, 'launch',
|
||||
'test_pkg.check_sibling')
|
||||
self._check_script(launch_name)
|
||||
|
||||
def test_module_in_package_in_zipfile(self):
|
||||
with temp_dir() as script_dir:
|
||||
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script')
|
||||
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name)
|
||||
self._check_script(launch_name)
|
||||
|
||||
def test_module_in_subpackage_in_zipfile(self):
|
||||
with temp_dir() as script_dir:
|
||||
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2)
|
||||
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name)
|
||||
self._check_script(launch_name)
|
||||
|
||||
def test_package(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
pkg_dir = os.path.join(script_dir, 'test_pkg')
|
||||
make_pkg(pkg_dir)
|
||||
script_name = _make_test_script(pkg_dir, '__main__',
|
||||
source=source)
|
||||
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
|
||||
self._check_script(launch_name)
|
||||
|
||||
def test_package_compiled(self):
|
||||
source = self.main_in_children_source
|
||||
with temp_dir() as script_dir:
|
||||
pkg_dir = os.path.join(script_dir, 'test_pkg')
|
||||
make_pkg(pkg_dir)
|
||||
script_name = _make_test_script(pkg_dir, '__main__',
|
||||
source=source)
|
||||
compiled_name = py_compile.compile(script_name, doraise=True)
|
||||
os.remove(script_name)
|
||||
pyc_file = support.make_legacy_pyc(script_name)
|
||||
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
|
||||
self._check_script(launch_name)
|
||||
|
||||
# Test all supported start methods (setupClass skips as appropriate)
|
||||
|
||||
class SpawnCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
|
||||
start_method = 'spawn'
|
||||
main_in_children_source = test_source_main_skipped_in_children
|
||||
|
||||
class ForkCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
|
||||
start_method = 'fork'
|
||||
main_in_children_source = test_source
|
||||
|
||||
class ForkServerCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
|
||||
start_method = 'forkserver'
|
||||
main_in_children_source = test_source_main_skipped_in_children
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(SpawnCmdLineTest,
|
||||
ForkCmdLineTest,
|
||||
ForkServerCmdLineTest)
|
||||
support.reap_children()
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_main()
|
|
@ -44,6 +44,12 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #19946: multiprocessing now uses runpy to initialize __main__ in
|
||||
child processes when necessary, allowing it to correctly handle scripts
|
||||
without suffixes and submodules that use explicit relative imports or
|
||||
otherwise rely on parent modules being correctly imported prior to
|
||||
execution.
|
||||
|
||||
- Issue #19921: When Path.mkdir() is called with parents=True, any missing
|
||||
parent is created with the default permissions, ignoring the mode argument
|
||||
(mimicking the POSIX "mkdir -p" command).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue