From 346b97bf696341b539c41b948b9bf290cc92755a Mon Sep 17 00:00:00 2001 From: Pavel Minaev Date: Wed, 27 Nov 2019 14:18:35 -0800 Subject: [PATCH] Fix #1938: Debuggee output is UTF-8 regardless of locale Fix #1954: "redirectOutput" property is not respected If "redirectOutput" is specified, always capture output as UTF-8, and encode it according to locale and Python settings. Change the default for "redirectOutput" to be false when "console" is not set to "internalConsole", to minimize differences in behavior when running under debugger for most common scenarios. Refactor ptvsd.launcher package to minimize circular dependencies between modules. --- .vscode/launch.json | 5 +- src/ptvsd/adapter/launchers.py | 2 +- src/ptvsd/launcher/__init__.py | 19 ++++ src/ptvsd/launcher/__main__.py | 8 +- src/ptvsd/launcher/adapter.py | 154 --------------------------------- src/ptvsd/launcher/debuggee.py | 18 ++-- src/ptvsd/launcher/handlers.py | 153 ++++++++++++++++++++++++++++++++ src/ptvsd/launcher/output.py | 92 +++++++++----------- 8 files changed, 230 insertions(+), 221 deletions(-) delete mode 100644 src/ptvsd/launcher/adapter.py create mode 100644 src/ptvsd/launcher/handlers.py diff --git a/.vscode/launch.json b/.vscode/launch.json index bd5d5f74..0c340448 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,8 +25,8 @@ "console": "integratedTerminal", //"console": "externalTerminal", "consoleTitle": "ptvsd.server", - //"program": "${file}", - "program": "${workspaceFolder}/tests/test_data/testpkgs/pkg1/__main__.py", + "program": "${file}", + "logToFile": true, }, { "name": "Attach [debugServer]", @@ -34,6 +34,7 @@ "request": "attach", "host": "localhost", "port": 5678, + "logToFile": true, }, ] } \ No newline at end of file diff --git a/src/ptvsd/adapter/launchers.py b/src/ptvsd/adapter/launchers.py index 8d610ba0..f11cd441 100644 --- a/src/ptvsd/adapter/launchers.py +++ b/src/ptvsd/adapter/launchers.py @@ -58,7 +58,7 @@ def spawn_debuggee(session, start_request, sudo, args, console, console_title): cmdline = ["sudo"] if sudo else [] cmdline += [sys.executable, os.path.dirname(ptvsd.launcher.__file__)] cmdline += args - env = {str("PTVSD_SESSION_ID"): str(session.id)} + env = {} def spawn_launcher(): with session.accept_connection_from_launcher() as (_, launcher_port): diff --git a/src/ptvsd/launcher/__init__.py b/src/ptvsd/launcher/__init__.py index 2a6b5e3d..130c4ffb 100644 --- a/src/ptvsd/launcher/__init__.py +++ b/src/ptvsd/launcher/__init__.py @@ -10,3 +10,22 @@ import os # Force absolute path on Python 2. __file__ = os.path.abspath(__file__) + + +channel = None +"""DAP message channel to the adapter.""" + + +def connect(launcher_port): + from ptvsd.common import messaging, sockets + from ptvsd.launcher import handlers + + global channel + assert channel is None + + sock = sockets.create_client() + sock.connect(("127.0.0.1", launcher_port)) + + stream = messaging.JsonIOStream.from_socket(sock, "Adapter") + channel = messaging.JsonMessageChannel(stream, handlers=handlers) + channel.start() diff --git a/src/ptvsd/launcher/__main__.py b/src/ptvsd/launcher/__main__.py index d088d13f..e4d793a3 100644 --- a/src/ptvsd/launcher/__main__.py +++ b/src/ptvsd/launcher/__main__.py @@ -19,7 +19,8 @@ __file__ = os.path.abspath(__file__) def main(): from ptvsd.common import log - from ptvsd.launcher import adapter, debuggee + from ptvsd import launcher + from ptvsd.launcher import debuggee log.to_file(prefix="ptvsd.launcher") log.describe_environment("ptvsd.launcher startup environment:") @@ -30,11 +31,10 @@ def main(): except Exception: raise log.exception("Error parsing {0!r}:", name) - session_id = option("PTVSD_SESSION_ID", int) launcher_port = option("PTVSD_LAUNCHER_PORT", int) - adapter.connect(session_id, launcher_port) - adapter.channel.wait() + launcher.connect(launcher_port) + launcher.channel.wait() if debuggee.process is not None: sys.exit(debuggee.process.returncode) diff --git a/src/ptvsd/launcher/adapter.py b/src/ptvsd/launcher/adapter.py deleted file mode 100644 index d473d979..00000000 --- a/src/ptvsd/launcher/adapter.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE in the project root -# for license information. - -from __future__ import absolute_import, division, print_function, unicode_literals - -import functools -import os -import sys - -import ptvsd -from ptvsd.common import compat, fmt, json, messaging, sockets -from ptvsd.common.compat import unicode -from ptvsd.launcher import debuggee - - -channel = None -"""DAP message channel to the adapter.""" - - -def connect(session_id, launcher_port): - global channel - assert channel is None - - sock = sockets.create_client() - sock.connect(("127.0.0.1", launcher_port)) - - stream = messaging.JsonIOStream.from_socket(sock, fmt("Adapter-{0}", session_id)) - channel = messaging.JsonMessageChannel(stream, handlers=Handlers()) - channel.start() - - -class Handlers(object): - def launch_request(self, request): - debug_options = set(request("debugOptions", json.array(unicode))) - - # Handling of properties that can also be specified as legacy "debugOptions" flags. - # If property is explicitly set to false, but the flag is in "debugOptions", treat - # it as an error. - def property_or_debug_option(prop_name, flag_name): - assert prop_name[0].islower() and flag_name[0].isupper() - value = request(prop_name, json.default(flag_name in debug_options)) - if value is False and flag_name in debug_options: - raise request.isnt_valid( - '{0!r}:false and "debugOptions":[{1!r}] are mutually exclusive', - prop_name, - flag_name, - ) - return value - - cmdline = [] - if property_or_debug_option("sudo", "Sudo"): - if sys.platform == "win32": - raise request.cant_handle('"sudo":true is not supported on Windows.') - else: - cmdline += ["sudo"] - - # "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try - # the alternative. But if both are missing, the error message should say "python". - python_key = "python" - if python_key in request: - if "pythonPath" in request: - raise request.isnt_valid( - '"pythonPath" is not valid if "python" is specified' - ) - elif "pythonPath" in request: - python_key = "pythonPath" - python = request(python_key, json.array(unicode, vectorize=True, size=(1,))) - if not len(python): - python = [compat.filename(sys.executable)] - cmdline += python - - if not request("noDebug", json.default(False)): - port = request("port", int) - ptvsd_args = request("ptvsdArgs", json.array(unicode)) - cmdline += [ - compat.filename(os.path.dirname(ptvsd.__file__)), - "--client", - "--host", - "127.0.0.1", - "--port", - str(port), - ] + ptvsd_args - - program = module = code = () - if "program" in request: - program = request("program", json.array(unicode, vectorize=True, size=(1,))) - cmdline += program - process_name = program[0] - if "module" in request: - module = request("module", json.array(unicode, vectorize=True, size=(1,))) - cmdline += ["-m"] + module - process_name = module[0] - if "code" in request: - code = request("code", json.array(unicode, vectorize=True, size=(1,))) - cmdline += ["-c"] + code - process_name = python[0] - - num_targets = len([x for x in (program, module, code) if x != ()]) - if num_targets == 0: - raise request.isnt_valid( - 'either "program", "module", or "code" must be specified' - ) - elif num_targets != 1: - raise request.isnt_valid( - '"program", "module", and "code" are mutually exclusive' - ) - - cmdline += request("args", json.array(unicode)) - - cwd = request("cwd", unicode, optional=True) - if cwd == (): - # If it's not specified, but we're launching a file rather than a module, - # and the specified path has a directory in it, use that. - cwd = None if program == () else (os.path.dirname(program[0]) or None) - - env = os.environ.copy() - if "PTVSD_TEST" in env: - # If we're running as part of a ptvsd test, make sure that codecov is not - # applied to the debuggee, since it will conflict with pydevd. - env.pop("COV_CORE_SOURCE", None) - env.update(request("env", json.object(unicode))) - - if request("gevent", False): - env["GEVENT_SUPPORT"] = "True" - - redirect_output = "RedirectOutput" in debug_options - if redirect_output: - # sys.stdout buffering must be disabled - otherwise we won't see the output - # at all until the buffer fills up. - env["PYTHONUNBUFFERED"] = "1" - - if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"): - debuggee.wait_on_exit_predicates.append(lambda code: code == 0) - if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"): - debuggee.wait_on_exit_predicates.append(lambda code: code != 0) - - if sys.version_info < (3,): - # Popen() expects command line and environment to be bytes, not Unicode. - # Assume that values are filenames - it's usually either that, or numbers - - # but don't allow encoding to fail if we guessed wrong. - encode = functools.partial(compat.filename_bytes, errors="replace") - cmdline = [encode(s) for s in cmdline] - env = {encode(k): encode(v) for k, v in env.items()} - - debuggee.spawn(process_name, cmdline, cwd, env, redirect_output) - return {} - - def terminate_request(self, request): - request.respond({}) - debuggee.kill() - - def disconnect(self): - debuggee.kill() diff --git a/src/ptvsd/launcher/debuggee.py b/src/ptvsd/launcher/debuggee.py index 5982ac29..1d85a533 100644 --- a/src/ptvsd/launcher/debuggee.py +++ b/src/ptvsd/launcher/debuggee.py @@ -5,14 +5,15 @@ from __future__ import absolute_import, division, print_function, unicode_literals import atexit -import locale import os import struct import subprocess import sys import threading +from ptvsd import launcher from ptvsd.common import fmt, log, messaging, compat +from ptvsd.launcher import output process = None @@ -27,12 +28,10 @@ returns True, the launcher pauses and waits for user input before exiting. def describe(): - return fmt("debuggee process with PID={0}", process.pid) + return fmt("Debuggee[PID={0}]", process.pid) def spawn(process_name, cmdline, cwd, env, redirect_output): - from ptvsd.launcher import adapter, output - log.info( "Spawning debuggee process:\n\n" "Current directory: {0!j}\n\n" @@ -65,7 +64,7 @@ def spawn(process_name, cmdline, cwd, env, redirect_output): log.info("Spawned {0}.", describe()) atexit.register(kill) - adapter.channel.send_event( + launcher.channel.send_event( "process", { "startMethod": "launch", @@ -77,12 +76,11 @@ def spawn(process_name, cmdline, cwd, env, redirect_output): ) if redirect_output: - encoding = env.get("PYTHONIOENCODING", locale.getpreferredencoding()) for category, fd, tee in [ ("stdout", stdout_r, sys.stdout), ("stderr", stderr_r, sys.stderr), ]: - output.CaptureOutput(category, fd, tee.fileno(), encoding) + output.CaptureOutput(describe(), category, fd, tee) close_fds.remove(fd) wait_thread = threading.Thread(target=wait_for_exit, name="wait_for_exit()") @@ -109,8 +107,6 @@ def kill(): def wait_for_exit(): - from ptvsd.launcher import adapter, output - try: code = process.wait() if sys.platform != "win32" and code < 0: @@ -126,7 +122,7 @@ def wait_for_exit(): log.info("{0} exited with code {1}", describe(), code) output.wait_for_remaining_output() try: - adapter.channel.send_event("exited", {"exitCode": code}) + launcher.channel.send_event("exited", {"exitCode": code}) except Exception: pass @@ -134,7 +130,7 @@ def wait_for_exit(): _wait_for_user_input() try: - adapter.channel.send_event("terminated") + launcher.channel.send_event("terminated") except Exception: pass diff --git a/src/ptvsd/launcher/handlers.py b/src/ptvsd/launcher/handlers.py new file mode 100644 index 00000000..451fc455 --- /dev/null +++ b/src/ptvsd/launcher/handlers.py @@ -0,0 +1,153 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in the project root +# for license information. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import functools +import os +import sys + +import ptvsd +from ptvsd.common import compat, json +from ptvsd.common.compat import unicode +from ptvsd.launcher import debuggee + + +def launch_request(request): + debug_options = set(request("debugOptions", json.array(unicode))) + + # Handling of properties that can also be specified as legacy "debugOptions" flags. + # If property is explicitly set to false, but the flag is in "debugOptions", treat + # it as an error. Returns None if the property wasn't explicitly set either way. + def property_or_debug_option(prop_name, flag_name): + assert prop_name[0].islower() and flag_name[0].isupper() + + value = request(prop_name, bool, optional=True) + if value == (): + value = None + + if flag_name in debug_options: + if value is False: + raise request.isnt_valid( + '{0!j}:false and "debugOptions":[{1!j}] are mutually exclusive', + prop_name, + flag_name, + ) + value = True + + return value + + cmdline = [] + if property_or_debug_option("sudo", "Sudo"): + if sys.platform == "win32": + raise request.cant_handle('"sudo":true is not supported on Windows.') + else: + cmdline += ["sudo"] + + # "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try + # the alternative. But if both are missing, the error message should say "python". + python_key = "python" + if python_key in request: + if "pythonPath" in request: + raise request.isnt_valid( + '"pythonPath" is not valid if "python" is specified' + ) + elif "pythonPath" in request: + python_key = "pythonPath" + python = request(python_key, json.array(unicode, vectorize=True, size=(1,))) + if not len(python): + python = [compat.filename(sys.executable)] + cmdline += python + + if not request("noDebug", json.default(False)): + port = request("port", int) + ptvsd_args = request("ptvsdArgs", json.array(unicode)) + cmdline += [ + compat.filename(os.path.dirname(ptvsd.__file__)), + "--client", + "--host", + "127.0.0.1", + "--port", + str(port), + ] + ptvsd_args + + program = module = code = () + if "program" in request: + program = request("program", json.array(unicode, vectorize=True, size=(1,))) + cmdline += program + process_name = program[0] + if "module" in request: + module = request("module", json.array(unicode, vectorize=True, size=(1,))) + cmdline += ["-m"] + module + process_name = module[0] + if "code" in request: + code = request("code", json.array(unicode, vectorize=True, size=(1,))) + cmdline += ["-c"] + code + process_name = python[0] + + num_targets = len([x for x in (program, module, code) if x != ()]) + if num_targets == 0: + raise request.isnt_valid( + 'either "program", "module", or "code" must be specified' + ) + elif num_targets != 1: + raise request.isnt_valid( + '"program", "module", and "code" are mutually exclusive' + ) + + cmdline += request("args", json.array(unicode)) + + cwd = request("cwd", unicode, optional=True) + if cwd == (): + # If it's not specified, but we're launching a file rather than a module, + # and the specified path has a directory in it, use that. + cwd = None if program == () else (os.path.dirname(program[0]) or None) + + env = os.environ.copy() + if "PTVSD_TEST" in env: + # If we're running as part of a ptvsd test, make sure that codecov is not + # applied to the debuggee, since it will conflict with pydevd. + env.pop("COV_CORE_SOURCE", None) + env.update(request("env", json.object(unicode))) + + if request("gevent", False): + env["GEVENT_SUPPORT"] = "True" + + redirect_output = property_or_debug_option("redirectOutput", "RedirectOutput") + if redirect_output is None: + # If neither the property nor the option were specified explicitly, choose + # the default depending on console type - "internalConsole" needs it to + # provide any output at all, but it's unnecessary for the terminals. + redirect_output = (request("console", unicode) == "internalConsole") + if redirect_output: + # sys.stdout buffering must be disabled - otherwise we won't see the output + # at all until the buffer fills up. + env["PYTHONUNBUFFERED"] = "1" + # Force UTF-8 output to minimize data loss due to re-encoding. + env["PYTHONIOENCODING"] = "utf-8" + + if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"): + debuggee.wait_on_exit_predicates.append(lambda code: code == 0) + if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"): + debuggee.wait_on_exit_predicates.append(lambda code: code != 0) + + if sys.version_info < (3,): + # Popen() expects command line and environment to be bytes, not Unicode. + # Assume that values are filenames - it's usually either that, or numbers - + # but don't allow encoding to fail if we guessed wrong. + encode = functools.partial(compat.filename_bytes, errors="replace") + cmdline = [encode(s) for s in cmdline] + env = {encode(k): encode(v) for k, v in env.items()} + + debuggee.spawn(process_name, cmdline, cwd, env, redirect_output) + return {} + + +def terminate_request(request): + request.respond({}) + debuggee.kill() + + +def disconnect(): + debuggee.kill() diff --git a/src/ptvsd/launcher/output.py b/src/ptvsd/launcher/output.py index e9520840..31000dd0 100644 --- a/src/ptvsd/launcher/output.py +++ b/src/ptvsd/launcher/output.py @@ -6,10 +6,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera import codecs import os +import sys import threading +from ptvsd import launcher from ptvsd.common import log -from ptvsd.launcher import adapter, debuggee class CaptureOutput(object): @@ -20,24 +21,19 @@ class CaptureOutput(object): instances = {} """Keys are output categories, values are CaptureOutput instances.""" - def __init__(self, category, fd, tee_fd, encoding): + def __init__(self, whose, category, fd, stream): assert category not in self.instances self.instances[category] = self - log.info("Capturing {0} of {1}.", category, debuggee.describe()) + log.info("Capturing {0} of {1}.", category, whose) self.category = category + self._whose = whose self._fd = fd - self._tee_fd = tee_fd - - try: - self._decoder = codecs.getincrementaldecoder(encoding)(errors="replace") - except LookupError: - self._decoder = None - log.warning( - 'Unable to generate "output" events for {0} - unknown encoding {1!r}', - category, - encoding, - ) + self._stream = stream if sys.version_info < (3,) else stream.buffer + self._decoder = codecs.getincrementaldecoder("utf-8")(errors="surrogateescape") + self._encode = codecs.getencoder( + "utf-8" if stream.encoding is None else stream.encoding + ) self._worker_thread = threading.Thread(target=self._worker, name=category) self._worker_thread.start() @@ -50,54 +46,52 @@ class CaptureOutput(object): except Exception: pass - def _send_output_event(self, s, final=False): - if self._decoder is None: - return - - s = self._decoder.decode(s, final=final) - if len(s) == 0: - return - s = s.replace("\r\n", "\n") - - try: - adapter.channel.send_event( - "output", {"category": self.category, "output": s} - ) - except Exception: - pass # channel to adapter is already closed - def _worker(self): while self._fd is not None: try: s = os.read(self._fd, 0x1000) except Exception: break - - size = len(s) - if size == 0: + if not len(s): break - - # Tee the output first, before sending the "output" event. - i = 0 - while i < size: - written = os.write(self._tee_fd, s[i:]) - i += written - if not written: - # This means that the output stream was closed from the other end. - # Do the same to the debuggee, so that it knows as well. - os.close(self._fd) - self._fd = None - break - - self._send_output_event(s) + self._process_chunk(s) # Flush any remaining data in the incremental decoder. - self._send_output_event(b"", final=True) + self._process_chunk(b"", final=True) + + def _process_chunk(self, s, final=False): + s = self._decoder.decode(s, final=final) + if len(s) == 0: + return + + try: + launcher.channel.send_event( + "output", {"category": self.category, "output": s.replace("\r\n", "\n")} + ) + except Exception: + pass # channel to adapter is already closed + + s, _ = self._encode(s, "surrogateescape") + size = len(s) + i = 0 + while i < size: + # On Python 2, all writes are full writes, and write() returns None. + # On Python 3, writes can be partial, and write() returns the count. + written = self._stream.write(s[i:]) + if written is None: # full write + break + elif written == 0: + # This means that the output stream was closed from the other end. + # Do the same to the debuggee, so that it knows as well. + os.close(self._fd) + self._fd = None + break + i += written def wait_for_remaining_output(): """Waits for all remaining output to be captured and propagated. """ for category, instance in CaptureOutput.instances.items(): - log.info("Waiting for remaining {0} of {1}.", category, debuggee.describe()) + log.info("Waiting for remaining {0} of {1}.", category, instance._whose) instance._worker_thread.join()