Fix #1938: Debuggee output is UTF-8 regardless of locale

Fix #1954: "redirectOutput" property is not respected

If "redirectOutput" is specified, always capture output as UTF-8, and encode it according to locale and Python settings.

Change the default for "redirectOutput" to be false when "console" is not set to "internalConsole", to minimize differences in behavior when running under debugger for most common scenarios.

Refactor ptvsd.launcher package to minimize circular dependencies between modules.
This commit is contained in:
Pavel Minaev 2019-11-27 14:18:35 -08:00 committed by Pavel Minaev
parent 5d674c49e5
commit 346b97bf69
8 changed files with 230 additions and 221 deletions

5
.vscode/launch.json vendored
View file

@ -25,8 +25,8 @@
"console": "integratedTerminal",
//"console": "externalTerminal",
"consoleTitle": "ptvsd.server",
//"program": "${file}",
"program": "${workspaceFolder}/tests/test_data/testpkgs/pkg1/__main__.py",
"program": "${file}",
"logToFile": true,
},
{
"name": "Attach [debugServer]",
@ -34,6 +34,7 @@
"request": "attach",
"host": "localhost",
"port": 5678,
"logToFile": true,
},
]
}

View file

@ -58,7 +58,7 @@ def spawn_debuggee(session, start_request, sudo, args, console, console_title):
cmdline = ["sudo"] if sudo else []
cmdline += [sys.executable, os.path.dirname(ptvsd.launcher.__file__)]
cmdline += args
env = {str("PTVSD_SESSION_ID"): str(session.id)}
env = {}
def spawn_launcher():
with session.accept_connection_from_launcher() as (_, launcher_port):

View file

@ -10,3 +10,22 @@ import os
# Force absolute path on Python 2.
__file__ = os.path.abspath(__file__)
channel = None
"""DAP message channel to the adapter."""
def connect(launcher_port):
from ptvsd.common import messaging, sockets
from ptvsd.launcher import handlers
global channel
assert channel is None
sock = sockets.create_client()
sock.connect(("127.0.0.1", launcher_port))
stream = messaging.JsonIOStream.from_socket(sock, "Adapter")
channel = messaging.JsonMessageChannel(stream, handlers=handlers)
channel.start()

View file

@ -19,7 +19,8 @@ __file__ = os.path.abspath(__file__)
def main():
from ptvsd.common import log
from ptvsd.launcher import adapter, debuggee
from ptvsd import launcher
from ptvsd.launcher import debuggee
log.to_file(prefix="ptvsd.launcher")
log.describe_environment("ptvsd.launcher startup environment:")
@ -30,11 +31,10 @@ def main():
except Exception:
raise log.exception("Error parsing {0!r}:", name)
session_id = option("PTVSD_SESSION_ID", int)
launcher_port = option("PTVSD_LAUNCHER_PORT", int)
adapter.connect(session_id, launcher_port)
adapter.channel.wait()
launcher.connect(launcher_port)
launcher.channel.wait()
if debuggee.process is not None:
sys.exit(debuggee.process.returncode)

View file

@ -1,154 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in the project root
# for license information.
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import os
import sys
import ptvsd
from ptvsd.common import compat, fmt, json, messaging, sockets
from ptvsd.common.compat import unicode
from ptvsd.launcher import debuggee
channel = None
"""DAP message channel to the adapter."""
def connect(session_id, launcher_port):
global channel
assert channel is None
sock = sockets.create_client()
sock.connect(("127.0.0.1", launcher_port))
stream = messaging.JsonIOStream.from_socket(sock, fmt("Adapter-{0}", session_id))
channel = messaging.JsonMessageChannel(stream, handlers=Handlers())
channel.start()
class Handlers(object):
def launch_request(self, request):
debug_options = set(request("debugOptions", json.array(unicode)))
# Handling of properties that can also be specified as legacy "debugOptions" flags.
# If property is explicitly set to false, but the flag is in "debugOptions", treat
# it as an error.
def property_or_debug_option(prop_name, flag_name):
assert prop_name[0].islower() and flag_name[0].isupper()
value = request(prop_name, json.default(flag_name in debug_options))
if value is False and flag_name in debug_options:
raise request.isnt_valid(
'{0!r}:false and "debugOptions":[{1!r}] are mutually exclusive',
prop_name,
flag_name,
)
return value
cmdline = []
if property_or_debug_option("sudo", "Sudo"):
if sys.platform == "win32":
raise request.cant_handle('"sudo":true is not supported on Windows.')
else:
cmdline += ["sudo"]
# "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try
# the alternative. But if both are missing, the error message should say "python".
python_key = "python"
if python_key in request:
if "pythonPath" in request:
raise request.isnt_valid(
'"pythonPath" is not valid if "python" is specified'
)
elif "pythonPath" in request:
python_key = "pythonPath"
python = request(python_key, json.array(unicode, vectorize=True, size=(1,)))
if not len(python):
python = [compat.filename(sys.executable)]
cmdline += python
if not request("noDebug", json.default(False)):
port = request("port", int)
ptvsd_args = request("ptvsdArgs", json.array(unicode))
cmdline += [
compat.filename(os.path.dirname(ptvsd.__file__)),
"--client",
"--host",
"127.0.0.1",
"--port",
str(port),
] + ptvsd_args
program = module = code = ()
if "program" in request:
program = request("program", json.array(unicode, vectorize=True, size=(1,)))
cmdline += program
process_name = program[0]
if "module" in request:
module = request("module", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-m"] + module
process_name = module[0]
if "code" in request:
code = request("code", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-c"] + code
process_name = python[0]
num_targets = len([x for x in (program, module, code) if x != ()])
if num_targets == 0:
raise request.isnt_valid(
'either "program", "module", or "code" must be specified'
)
elif num_targets != 1:
raise request.isnt_valid(
'"program", "module", and "code" are mutually exclusive'
)
cmdline += request("args", json.array(unicode))
cwd = request("cwd", unicode, optional=True)
if cwd == ():
# If it's not specified, but we're launching a file rather than a module,
# and the specified path has a directory in it, use that.
cwd = None if program == () else (os.path.dirname(program[0]) or None)
env = os.environ.copy()
if "PTVSD_TEST" in env:
# If we're running as part of a ptvsd test, make sure that codecov is not
# applied to the debuggee, since it will conflict with pydevd.
env.pop("COV_CORE_SOURCE", None)
env.update(request("env", json.object(unicode)))
if request("gevent", False):
env["GEVENT_SUPPORT"] = "True"
redirect_output = "RedirectOutput" in debug_options
if redirect_output:
# sys.stdout buffering must be disabled - otherwise we won't see the output
# at all until the buffer fills up.
env["PYTHONUNBUFFERED"] = "1"
if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code == 0)
if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code != 0)
if sys.version_info < (3,):
# Popen() expects command line and environment to be bytes, not Unicode.
# Assume that values are filenames - it's usually either that, or numbers -
# but don't allow encoding to fail if we guessed wrong.
encode = functools.partial(compat.filename_bytes, errors="replace")
cmdline = [encode(s) for s in cmdline]
env = {encode(k): encode(v) for k, v in env.items()}
debuggee.spawn(process_name, cmdline, cwd, env, redirect_output)
return {}
def terminate_request(self, request):
request.respond({})
debuggee.kill()
def disconnect(self):
debuggee.kill()

View file

@ -5,14 +5,15 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import atexit
import locale
import os
import struct
import subprocess
import sys
import threading
from ptvsd import launcher
from ptvsd.common import fmt, log, messaging, compat
from ptvsd.launcher import output
process = None
@ -27,12 +28,10 @@ returns True, the launcher pauses and waits for user input before exiting.
def describe():
return fmt("debuggee process with PID={0}", process.pid)
return fmt("Debuggee[PID={0}]", process.pid)
def spawn(process_name, cmdline, cwd, env, redirect_output):
from ptvsd.launcher import adapter, output
log.info(
"Spawning debuggee process:\n\n"
"Current directory: {0!j}\n\n"
@ -65,7 +64,7 @@ def spawn(process_name, cmdline, cwd, env, redirect_output):
log.info("Spawned {0}.", describe())
atexit.register(kill)
adapter.channel.send_event(
launcher.channel.send_event(
"process",
{
"startMethod": "launch",
@ -77,12 +76,11 @@ def spawn(process_name, cmdline, cwd, env, redirect_output):
)
if redirect_output:
encoding = env.get("PYTHONIOENCODING", locale.getpreferredencoding())
for category, fd, tee in [
("stdout", stdout_r, sys.stdout),
("stderr", stderr_r, sys.stderr),
]:
output.CaptureOutput(category, fd, tee.fileno(), encoding)
output.CaptureOutput(describe(), category, fd, tee)
close_fds.remove(fd)
wait_thread = threading.Thread(target=wait_for_exit, name="wait_for_exit()")
@ -109,8 +107,6 @@ def kill():
def wait_for_exit():
from ptvsd.launcher import adapter, output
try:
code = process.wait()
if sys.platform != "win32" and code < 0:
@ -126,7 +122,7 @@ def wait_for_exit():
log.info("{0} exited with code {1}", describe(), code)
output.wait_for_remaining_output()
try:
adapter.channel.send_event("exited", {"exitCode": code})
launcher.channel.send_event("exited", {"exitCode": code})
except Exception:
pass
@ -134,7 +130,7 @@ def wait_for_exit():
_wait_for_user_input()
try:
adapter.channel.send_event("terminated")
launcher.channel.send_event("terminated")
except Exception:
pass

View file

@ -0,0 +1,153 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in the project root
# for license information.
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import os
import sys
import ptvsd
from ptvsd.common import compat, json
from ptvsd.common.compat import unicode
from ptvsd.launcher import debuggee
def launch_request(request):
debug_options = set(request("debugOptions", json.array(unicode)))
# Handling of properties that can also be specified as legacy "debugOptions" flags.
# If property is explicitly set to false, but the flag is in "debugOptions", treat
# it as an error. Returns None if the property wasn't explicitly set either way.
def property_or_debug_option(prop_name, flag_name):
assert prop_name[0].islower() and flag_name[0].isupper()
value = request(prop_name, bool, optional=True)
if value == ():
value = None
if flag_name in debug_options:
if value is False:
raise request.isnt_valid(
'{0!j}:false and "debugOptions":[{1!j}] are mutually exclusive',
prop_name,
flag_name,
)
value = True
return value
cmdline = []
if property_or_debug_option("sudo", "Sudo"):
if sys.platform == "win32":
raise request.cant_handle('"sudo":true is not supported on Windows.')
else:
cmdline += ["sudo"]
# "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try
# the alternative. But if both are missing, the error message should say "python".
python_key = "python"
if python_key in request:
if "pythonPath" in request:
raise request.isnt_valid(
'"pythonPath" is not valid if "python" is specified'
)
elif "pythonPath" in request:
python_key = "pythonPath"
python = request(python_key, json.array(unicode, vectorize=True, size=(1,)))
if not len(python):
python = [compat.filename(sys.executable)]
cmdline += python
if not request("noDebug", json.default(False)):
port = request("port", int)
ptvsd_args = request("ptvsdArgs", json.array(unicode))
cmdline += [
compat.filename(os.path.dirname(ptvsd.__file__)),
"--client",
"--host",
"127.0.0.1",
"--port",
str(port),
] + ptvsd_args
program = module = code = ()
if "program" in request:
program = request("program", json.array(unicode, vectorize=True, size=(1,)))
cmdline += program
process_name = program[0]
if "module" in request:
module = request("module", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-m"] + module
process_name = module[0]
if "code" in request:
code = request("code", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-c"] + code
process_name = python[0]
num_targets = len([x for x in (program, module, code) if x != ()])
if num_targets == 0:
raise request.isnt_valid(
'either "program", "module", or "code" must be specified'
)
elif num_targets != 1:
raise request.isnt_valid(
'"program", "module", and "code" are mutually exclusive'
)
cmdline += request("args", json.array(unicode))
cwd = request("cwd", unicode, optional=True)
if cwd == ():
# If it's not specified, but we're launching a file rather than a module,
# and the specified path has a directory in it, use that.
cwd = None if program == () else (os.path.dirname(program[0]) or None)
env = os.environ.copy()
if "PTVSD_TEST" in env:
# If we're running as part of a ptvsd test, make sure that codecov is not
# applied to the debuggee, since it will conflict with pydevd.
env.pop("COV_CORE_SOURCE", None)
env.update(request("env", json.object(unicode)))
if request("gevent", False):
env["GEVENT_SUPPORT"] = "True"
redirect_output = property_or_debug_option("redirectOutput", "RedirectOutput")
if redirect_output is None:
# If neither the property nor the option were specified explicitly, choose
# the default depending on console type - "internalConsole" needs it to
# provide any output at all, but it's unnecessary for the terminals.
redirect_output = (request("console", unicode) == "internalConsole")
if redirect_output:
# sys.stdout buffering must be disabled - otherwise we won't see the output
# at all until the buffer fills up.
env["PYTHONUNBUFFERED"] = "1"
# Force UTF-8 output to minimize data loss due to re-encoding.
env["PYTHONIOENCODING"] = "utf-8"
if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code == 0)
if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code != 0)
if sys.version_info < (3,):
# Popen() expects command line and environment to be bytes, not Unicode.
# Assume that values are filenames - it's usually either that, or numbers -
# but don't allow encoding to fail if we guessed wrong.
encode = functools.partial(compat.filename_bytes, errors="replace")
cmdline = [encode(s) for s in cmdline]
env = {encode(k): encode(v) for k, v in env.items()}
debuggee.spawn(process_name, cmdline, cwd, env, redirect_output)
return {}
def terminate_request(request):
request.respond({})
debuggee.kill()
def disconnect():
debuggee.kill()

View file

@ -6,10 +6,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import codecs
import os
import sys
import threading
from ptvsd import launcher
from ptvsd.common import log
from ptvsd.launcher import adapter, debuggee
class CaptureOutput(object):
@ -20,23 +21,18 @@ class CaptureOutput(object):
instances = {}
"""Keys are output categories, values are CaptureOutput instances."""
def __init__(self, category, fd, tee_fd, encoding):
def __init__(self, whose, category, fd, stream):
assert category not in self.instances
self.instances[category] = self
log.info("Capturing {0} of {1}.", category, debuggee.describe())
log.info("Capturing {0} of {1}.", category, whose)
self.category = category
self._whose = whose
self._fd = fd
self._tee_fd = tee_fd
try:
self._decoder = codecs.getincrementaldecoder(encoding)(errors="replace")
except LookupError:
self._decoder = None
log.warning(
'Unable to generate "output" events for {0} - unknown encoding {1!r}',
category,
encoding,
self._stream = stream if sys.version_info < (3,) else stream.buffer
self._decoder = codecs.getincrementaldecoder("utf-8")(errors="surrogateescape")
self._encode = codecs.getencoder(
"utf-8" if stream.encoding is None else stream.encoding
)
self._worker_thread = threading.Thread(target=self._worker, name=category)
@ -50,54 +46,52 @@ class CaptureOutput(object):
except Exception:
pass
def _send_output_event(self, s, final=False):
if self._decoder is None:
return
s = self._decoder.decode(s, final=final)
if len(s) == 0:
return
s = s.replace("\r\n", "\n")
try:
adapter.channel.send_event(
"output", {"category": self.category, "output": s}
)
except Exception:
pass # channel to adapter is already closed
def _worker(self):
while self._fd is not None:
try:
s = os.read(self._fd, 0x1000)
except Exception:
break
size = len(s)
if size == 0:
if not len(s):
break
self._process_chunk(s)
# Tee the output first, before sending the "output" event.
# Flush any remaining data in the incremental decoder.
self._process_chunk(b"", final=True)
def _process_chunk(self, s, final=False):
s = self._decoder.decode(s, final=final)
if len(s) == 0:
return
try:
launcher.channel.send_event(
"output", {"category": self.category, "output": s.replace("\r\n", "\n")}
)
except Exception:
pass # channel to adapter is already closed
s, _ = self._encode(s, "surrogateescape")
size = len(s)
i = 0
while i < size:
written = os.write(self._tee_fd, s[i:])
i += written
if not written:
# On Python 2, all writes are full writes, and write() returns None.
# On Python 3, writes can be partial, and write() returns the count.
written = self._stream.write(s[i:])
if written is None: # full write
break
elif written == 0:
# This means that the output stream was closed from the other end.
# Do the same to the debuggee, so that it knows as well.
os.close(self._fd)
self._fd = None
break
self._send_output_event(s)
# Flush any remaining data in the incremental decoder.
self._send_output_event(b"", final=True)
i += written
def wait_for_remaining_output():
"""Waits for all remaining output to be captured and propagated.
"""
for category, instance in CaptureOutput.instances.items():
log.info("Waiting for remaining {0} of {1}.", category, debuggee.describe())
log.info("Waiting for remaining {0} of {1}.", category, instance._whose)
instance._worker_thread.join()