Use pydevd.settrace() argument instead of patch_new_process_functions() to enable multiprocessing.

Add multiprocessing= argument to attach() and enable_attach().

Refactor common code in attach() and enable_attach().

Remove broken legacy multiproc implementation.
This commit is contained in:
Pavel Minaev 2019-08-23 15:52:05 -07:00 committed by Pavel Minaev
parent 915515d910
commit 5f2cb304a6
7 changed files with 94 additions and 448 deletions

View file

@ -45,7 +45,7 @@ def wait_for_attach():
return api.wait_for_attach()
def enable_attach(address, log_dir=None):
def enable_attach(address, log_dir=None, multiprocess=True):
"""Starts a DAP (Debug Adapter Protocol) server in this process,
listening for incoming socket connection from the IDE on the
specified address.
@ -59,6 +59,11 @@ def enable_attach(address, log_dir=None):
scenarios involving multiple processes. The log file for a process
with process ID <pid> will be named "ptvsd_<pid>.log".
If multiprocess is true, ptvsd will also intercept child processes
spawned by this process, inject a debug server into them, and
configure it to attach to the same IDE before the child process
starts running any user code.
Returns the interface and the port on which the debug server is
actually listening, in the same format as address. This may be
different from address if port was 0 in the latter, in which case
@ -73,7 +78,7 @@ def enable_attach(address, log_dir=None):
return api.enable_attach(address, log_dir)
def attach(address, log_dir=None):
def attach(address, log_dir=None, multiprocess=True):
"""Starts a DAP (Debug Adapter Protocol) server in this process,
and connects it to the IDE that is listening for an incoming
connection on a socket with the specified address.
@ -87,6 +92,11 @@ def attach(address, log_dir=None):
scenarios involving multiple processes. The log file for a process
with process ID <pid> will be named "ptvsd_<pid>.log".
If multiprocess is true, ptvsd will also intercept child processes
spawned by this process, inject a debug server into them, and
configure it to attach to the same IDE before the child process
starts running any user code.
This function doesn't return until connection to the IDE has been
established.
"""

View file

@ -61,10 +61,3 @@ pydevd.install_breakpointhook(ptvsd_breakpointhook)
from _pydevd_bundle import pydevd_constants
from _pydevd_bundle import pydevd_defaults
pydevd_defaults.PydevdCustomization.DEFAULT_PROTOCOL = pydevd_constants.HTTP_JSON_PROTOCOL
# Ensure our patch args is used. This is invoked when a child process is spawned
# with multiproc debugging enabled.
from _pydev_bundle import pydev_monkey
from ptvsd.server import multiproc
pydev_monkey.patch_args = multiproc.patch_and_quote_args
pydev_monkey.patch_new_process_functions()

View file

@ -2113,6 +2113,7 @@ def _enable_attach(
address,
dont_trace_start_patterns=(),
dont_trace_end_paterns=(),
patch_multiprocessing=False,
):
'''
Starts accepting connections at the given host/port. The debugger will not be initialized nor
@ -2138,6 +2139,7 @@ def _enable_attach(
block_until_connected=False,
dont_trace_start_patterns=dont_trace_start_patterns,
dont_trace_end_paterns=dont_trace_end_paterns,
patch_multiprocessing=patch_multiprocessing,
)
py_db = get_global_debugger()
py_db.wait_for_server_socket_ready()

View file

@ -19,20 +19,6 @@ from pydevd_file_utils import (
)
def _get_dont_trace_patterns():
ptvsd_path, _, _ = get_abs_path_real_path_and_base_from_file(ptvsd.__file__)
ptvsd_path = os.path.dirname(ptvsd_path)
start_patterns = (ptvsd_path,)
end_patterns = ("ptvsd_launcher.py",)
log.info(
"Won't trace filenames starting with: {0!j}\n"
"Won't trace filenames ending with: {1!j}",
start_patterns,
end_patterns,
)
return start_patterns, end_patterns
def wait_for_attach():
log.info("wait_for_attach()")
dbg = get_global_debugger()
@ -46,59 +32,62 @@ def wait_for_attach():
pydevd._wait_for_attach(cancel=cancel_event)
def enable_attach(address, log_dir=None):
if log_dir:
common_opts.log_dir = log_dir
log.to_file()
log.info("enable_attach{0!r}", (address,))
def _starts_debugging(func):
def debug(address, log_dir=None, multiprocess=True):
if log_dir:
common_opts.log_dir = log_dir
if is_attached():
log.info("enable_attach() ignored - already attached.")
return None, None
log.to_file()
log.info("{0}{1!r}", func.__name__, (address, log_dir, multiprocess))
# Ensure port is int
host, port = address
address = (host, int(port))
if is_attached():
log.info("{0}() ignored - already attached.", func.__name__)
return server_opts.host, server_opts.port
start_patterns, end_patterns = _get_dont_trace_patterns()
# Ensure port is int
if address is not server_opts:
host, port = address
server_opts.host, server_opts.port = (host, int(port))
if multiprocess is not server_opts:
server_opts.multiprocess = multiprocess
ptvsd_path, _, _ = get_abs_path_real_path_and_base_from_file(ptvsd.__file__)
ptvsd_path = os.path.dirname(ptvsd_path)
start_patterns = (ptvsd_path,)
end_patterns = ("ptvsd_launcher.py",)
log.info(
"Won't trace filenames starting with: {0!j}\n"
"Won't trace filenames ending with: {1!j}",
start_patterns,
end_patterns,
)
return func(start_patterns, end_patterns)
return debug
@_starts_debugging
def enable_attach(dont_trace_start_patterns, dont_trace_end_patterns):
server_opts.host, server_opts.port = pydevd._enable_attach(
address,
dont_trace_start_patterns=start_patterns,
dont_trace_end_paterns=end_patterns,
(server_opts.host, server_opts.port),
dont_trace_start_patterns=dont_trace_start_patterns,
dont_trace_end_paterns=dont_trace_end_patterns,
patch_multiprocessing=server_opts.multiprocess,
)
if server_opts.subprocess_notify:
from ptvsd.server import multiproc
multiproc.notify_root(server_opts.port)
return (server_opts.host, server_opts.port)
return server_opts.host, server_opts.port
def attach(address, log_dir=None):
if log_dir:
common_opts.log_dir = log_dir
log.to_file()
log.info("attach{0!r}", (address,))
if is_attached():
log.info("attach() ignored - already attached.")
return
# Ensure port is int
host, port = address
address = (host, int(port))
server_opts.host, server_opts.port = address
start_patterns, end_patterns = _get_dont_trace_patterns()
log.debug("pydevd.settrace()")
@_starts_debugging
def attach(dont_trace_start_patterns, dont_trace_end_patterns):
pydevd.settrace(
host=host,
port=port,
host=server_opts.host,
port=server_opts.port,
suspend=False,
patch_multiprocessing=server_opts.multiprocess,
dont_trace_start_patterns=start_patterns,
dont_trace_end_paterns=end_patterns,
dont_trace_start_patterns=dont_trace_start_patterns,
dont_trace_end_paterns=dont_trace_end_patterns,
)
@ -119,8 +108,7 @@ def break_into_debugger():
while (
stop_at_frame is not None
and global_debugger.get_file_type(
stop_at_frame,
get_abs_path_real_path_and_base_from_frame(stop_at_frame)
stop_at_frame, get_abs_path_real_path_and_base_from_frame(stop_at_frame)
)
== global_debugger.PYDEV_FILE
):

View file

@ -16,7 +16,7 @@ import pydevd
import ptvsd
from ptvsd.common import compat, fmt, log, options as common_opts
from ptvsd.server import multiproc, options
from ptvsd.server import options
TARGET = "<filename> | -m <module> | -c <code> | --pid <pid>"
@ -172,63 +172,22 @@ def parse(args):
return it
def setup_connection():
pydevd.apply_debugger_options(
{
"server": not options.client,
"client": options.host,
"port": options.port,
"multiprocess": options.multiprocess,
}
)
if options.multiprocess:
multiproc.listen_for_subprocesses()
def setup_debug_server(argv_0):
# We need to set up sys.argv[0] before invoking attach() or enable_attach(),
# because they use it to report the 'process' event. Thus, we can't rely on
# run_path() and run_module() doing that, even though they will eventually.
if options.target_kind == "code":
sys.argv[0] = "-c"
elif options.target_kind == "file":
sys.argv[0] = options.target
elif options.target_kind == "module":
# Add current directory to path, like Python itself does for -m. This must
# be in place before trying to use find_spec below to resolve submodules.
sys.path.insert(0, "")
# We want to do the same thing that run_module() would do here, without
# actually invoking it. On Python 3, it's exposed as a public API, but
# on Python 2, we have to invoke a private function in runpy for this.
# Either way, if it fails to resolve for any reason, just leave argv as is.
try:
if sys.version_info >= (3,):
from importlib.util import find_spec
spec = find_spec(options.target)
if spec is not None:
sys.argv[0] = spec.origin
else:
_, _, _, sys.argv[0] = runpy._get_module_details(options.target)
except Exception:
log.exception("Error determining module path for sys.argv")
else:
assert False
sys.argv[0] = compat.filename(argv_0)
log.debug("sys.argv after patching: {0!r}", sys.argv)
addr = (options.host, options.port)
if options.client:
ptvsd.attach(addr)
else:
ptvsd.enable_attach(addr)
debug = ptvsd.attach if options.client else ptvsd.enable_attach
debug(address=options, multiprocess=options)
if options.wait:
ptvsd.wait_for_attach()
def run_file():
setup_connection()
setup_debug_server(options.target)
# run_path has one difference with invoking Python from command-line:
# if the target is a file (rather than a directory), it does not add its
@ -246,7 +205,27 @@ def run_file():
def run_module():
setup_connection()
# Add current directory to path, like Python itself does for -m. This must
# be in place before trying to use find_spec below to resolve submodules.
sys.path.insert(0, "")
# We want to do the same thing that run_module() would do here, without
# actually invoking it. On Python 3, it's exposed as a public API, but
# on Python 2, we have to invoke a private function in runpy for this.
# Either way, if it fails to resolve for any reason, just leave argv as is.
try:
if sys.version_info >= (3,):
from importlib.util import find_spec
spec = find_spec(options.target)
if spec is not None:
argv_0 = spec.origin
else:
_, _, _, argv_0 = runpy._get_module_details(options.target)
except Exception:
log.exception("Error determining module path for sys.argv")
setup_debug_server(argv_0)
# On Python 2, module name must be a non-Unicode string, because it ends up
# a part of module's __package__, and Python will refuse to run the module
@ -281,7 +260,8 @@ def run_code():
# Add current directory to path, like Python itself does for -c.
sys.path.insert(0, "")
code = compile(options.target, "<string>", "exec")
setup_connection()
setup_debug_server("-c")
eval(code, {})
@ -303,6 +283,7 @@ def attach_to_pid():
host = quoted_str(options.host)
port = options.port
client = options.client
multiprocess = options.multiprocess
log_dir = quoted_str(ptvsd.common.options.log_dir)
ptvsd_path = os.path.abspath(os.path.join(ptvsd.server.__file__, "../.."))
@ -325,10 +306,8 @@ options.log_dir = {log_dir}
log.to_file()
log.info("Bootstrapping injected debugger.")
if {client}:
ptvsd.attach(({host}, {port}))
else:
ptvsd.enable_attach(({host}, {port}))
debug = ptvsd.attach if {client} else ptvsd.enable_attach
debug(({host}, {port}), multiprocess={multiprocess})
""".format(
**locals()
)

View file

@ -1,315 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in the project root
# for license information.
from __future__ import absolute_import, print_function, unicode_literals
import atexit
import itertools
import os
import re
import signal
import sys
import threading
try:
import queue
except ImportError:
import Queue as queue
import ptvsd
from ptvsd.common import log, messaging, options as common_opts, sockets, util
from ptvsd.server import options as server_opts
from _pydev_bundle import pydev_monkey
subprocess_lock = threading.Lock()
subprocess_listener_socket = None
subprocesses = {}
"""List of known subprocesses. Keys are process IDs, values are JsonMessageChannel
instances; subprocess_lock must be used to synchronize access.
"""
subprocess_queue = queue.Queue()
"""A queue of incoming 'ptvsd_subprocess' notifications. Whenenever a new request
is received, a tuple of (subprocess_request, subprocess_response) is placed in the
queue.
subprocess_request is the body of the 'ptvsd_subprocess' notification request that
was received, with additional information about the root process added.
subprocess_response is the body of the response that will be sent to respond to the
request. It contains a single item 'incomingConnection', which is initially set to
False. If, as a result of processing the entry, the subprocess shall receive an
incoming DAP connection on the port it specified in the request, its value should be
set to True, indicating that the subprocess should wait for that connection before
proceeding. If no incoming connection is expected, it is set to False, indicating
that the subprocess shall proceed executing user code immediately.
subprocess_queue.task_done() must be invoked for every subprocess_queue.get(), for
the corresponding subprocess_response to be delivered back to the subprocess.
"""
root_start_request = None
"""The 'launch' or 'attach' request that started debugging in this process, in its
entirety (i.e. dict representation of JSON request). This information is added to
'ptvsd_subprocess' notifications before they're placed in subprocess_queue.
"""
def listen_for_subprocesses():
"""Starts a listener for incoming 'ptvsd_subprocess' notifications that
enqueues them in subprocess_queue.
"""
global subprocess_listener_socket
assert subprocess_listener_socket is None
subprocess_listener_socket = sockets.create_server('localhost', 0)
log.debug(
'Listening for subprocess notifications on port {0}.',
subprocess_listener_port())
atexit.register(stop_listening_for_subprocesses)
atexit.register(kill_subprocesses)
util.new_hidden_thread('SubprocessListener', _subprocess_listener).start()
def stop_listening_for_subprocesses():
log.debug('Stopping listening for subprocess notifications.')
global subprocess_listener_socket
if subprocess_listener_socket is None:
return
try:
sockets.shut_down(subprocess_listener_socket)
except Exception:
pass
subprocess_listener_socket = None
def kill_subprocesses():
with subprocess_lock:
pids = list(subprocesses.keys())
log.debug('Killing remaining subprocesses: PID={0}', pids)
for pid in pids:
log.debug('Killing subprocess with PID={0}.', pid)
with subprocess_lock:
subprocesses.pop(pid, None)
try:
os.kill(pid, signal.SIGTERM)
except Exception:
log.exception('Failed to kill process with PID={0}.', pid, level='debug')
def subprocess_listener_port():
if subprocess_listener_socket is None:
return None
_, port = subprocess_listener_socket.getsockname()
return port
def _subprocess_listener():
counter = itertools.count(1)
while subprocess_listener_socket:
try:
(sock, _) = subprocess_listener_socket.accept()
except Exception:
break
n = next(counter)
name = 'subprocess-{}'.format(n)
log.debug('Accepted incoming connection from {0}', name)
stream = messaging.JsonIOStream.from_socket(sock, name=name)
_handle_subprocess(n, stream)
def _handle_subprocess(n, stream):
class Handlers(object):
_pid = None
def ptvsd_subprocess_request(self, request):
# When child process is spawned, the notification it sends only
# contains information about itself and its immediate parent.
# Add information about the root process before passing it on.
arguments = dict(request.arguments)
arguments.update({
'rootProcessId': os.getpid(),
'rootStartRequest': root_start_request,
})
self._pid = arguments['processId']
with subprocess_lock:
subprocesses[self._pid] = channel
log.debug(
'Subprocess {0} (PID={1}) registered, notifying IDE.',
stream.name,
self._pid)
response = {'incomingConnection': False}
subprocess_queue.put((arguments, response))
subprocess_queue.join()
return response
def disconnect(self):
log.debug('Subprocess {0} disconnected, presumed to have terminated.', self._pid)
if self._pid is not None:
with subprocess_lock:
subprocesses.pop(self._pid, None)
name = 'subprocess-%d' % n
channel = messaging.JsonMessageChannel(stream, Handlers(), name)
channel.start()
def notify_root(port):
assert server_opts.subprocess_of
log.debug('Subprocess (PID={0}) notifying root process at port {1}', os.getpid(), server_opts.subprocess_notify)
conn = sockets.create_client()
conn.connect(('localhost', server_opts.subprocess_notify))
stream = messaging.JsonIOStream.from_socket(conn, 'root-process')
channel = messaging.JsonMessageChannel(stream)
channel.start()
# Send the notification about ourselves to root, and wait for it to tell us
# whether an incoming connection is anticipated. This will be true if root
# had successfully propagated the notification to the IDE, and false if it
# couldn't do so (e.g. because the IDE is not attached). There's also the
# possibility that connection to root will just drop, e.g. if it crashes -
# in that case, just exit immediately.
request = channel.send_request('ptvsd_subprocess', {
'parentProcessId': server_opts.subprocess_of,
'processId': os.getpid(),
'port': port,
})
try:
response = request.wait_for_response()
except Exception:
log.exception('Failed to send subprocess notification; exiting')
sys.exit(0)
# Keep the channel open until we exit - root process uses open channels to keep
# track of which subprocesses are alive and which are not.
atexit.register(lambda: channel.close())
if not response['incomingConnection']:
log.debug('No IDE connection is expected for this subprocess; unpausing.')
if hasattr(ptvsd.wait_for_attach, "cancel"):
ptvsd.wait_for_attach.cancel()
def patch_args(args):
"""
Patches a command line invoking Python such that it has the same meaning, but
the process runs under ptvsd.server. In general, this means that given something like:
python -R -Q warn -m app
the result should be:
python -R -Q warn .../ptvsd/__main__.py --host localhost --port 0 ... -m app
"""
if not server_opts.multiprocess:
return args
args = list(args)
log.debug('Patching subprocess command line: {0!r}', args)
# First, let's find the target of the invocation. This is one of:
#
# filename.py
# -m module_name
# -c "code"
# -
#
# This needs to take into account other switches that have values:
#
# -Q -W -X --check-hash-based-pycs
#
# because in something like "-X -c", -c is a value, not a switch.
expect_value = False
for i, arg in enumerate(args):
# Skip Python binary.
if i == 0:
continue
if arg == '-':
# We do not support debugging while reading from stdin, so just let this
# process run without debugging.
return args
if expect_value:
# Consume the value and move on.
expect_value = False
continue
if not arg.startswith('-') or arg in ('-c', '-m'):
# This is the target.
break
if arg.startswith('--'):
expect_value = (arg == '--check-hash-based-pycs')
continue
# All short switches other than -c and -m can be combined together, including
# those with values. So, instead of -R -B -v -Q old, we might see -RBvQ old.
# Furthermore, the value itself can be concatenated with the switch, so rather
# than -Q old, we might have -Qold. When switches are combined, any switch that
# has a value "eats" the rest of the argument; for example, -RBQv is treated as
# -R -B -Qv, and not as -R -B -Q -v. So, we need to check whether one of 'Q',
# 'W' or 'X' was present somewhere in the arg, and whether there was anything
# following it in the arg. If it was there but nothing followed after it, then
# the switch is expecting a value.
split = re.split(r'[QWX]', arg, maxsplit=1)
expect_value = (len(split) > 1 and split[-1] != '')
else:
# Didn't find the target, so we don't know how to patch this command line; let
# it run without debugging.
return args
if not args[i].startswith('-'):
# If it was a filename, it can be a Python file, a directory, or a zip archive
# that is treated as if it were a directory. However, ptvsd only supports the
# first scenario. Distinguishing between these can be tricky, and getting it
# wrong means that process fails to launch, so be conservative.
if not args[i].endswith('.py'):
return args
# Now we need to inject the ptvsd invocation right before the target. The target
# itself can remain as is, because ptvsd is compatible with Python in that respect.
from ptvsd import __main__
ptvsd_args = [
__main__.__file__,
'--host', server_opts.host,
'--port', '0',
'--wait',
'--multiprocess',
'--subprocess-of', str(os.getpid()),
'--subprocess-notify', str(server_opts.subprocess_notify or subprocess_listener_port()),
]
if common_opts.log_dir:
ptvsd_args += ['--log-dir', common_opts.log_dir]
args[i:i] = ptvsd_args
log.debug('Patched subprocess command line: {0!r}', args)
return args
def patch_and_quote_args(args):
# On Windows, pydevd expects arguments to be quoted and escaped as necessary, such
# that simply concatenating them via ' ' produces a valid command line. This wraps
# patch_args and applies quoting (quote_args contains platform check), so that the
# implementation of patch_args can be kept simple.
return pydev_monkey.quote_args(patch_args(args))

View file

@ -49,18 +49,7 @@ wait = False
"""If True, wait until the debugger is connected before running any code."
"""
multiprocess = False
multiprocess = True
"""Whether this ptvsd instance is running in multiprocess mode, detouring creation
of new processes and enabling debugging for them.
"""
subprocess_of = None
"""If not None, the process ID of the parent process (running in multiprocess mode)
that spawned this subprocess.
"""
subprocess_notify = None
"""The port number of the subprocess listener. If specified, a 'ptvsd_subprocess'
notification must be sent to that port once this ptvsd is initialized and ready to
accept a connection from the client.
"""