Fix #1938: Debuggee output is UTF-8 regardless of locale

Fix #1954: "redirectOutput" property is not respected

If "redirectOutput" is specified, always capture output as UTF-8, and encode it according to locale and Python settings.

Change the default for "redirectOutput" to be false when "console" is not set to "internalConsole", to minimize differences in behavior when running under debugger for most common scenarios.

Refactor ptvsd.launcher package to minimize circular dependencies between modules.
This commit is contained in:
Pavel Minaev 2019-11-27 14:18:35 -08:00 committed by Pavel Minaev
parent 5d674c49e5
commit 346b97bf69
8 changed files with 230 additions and 221 deletions

5
.vscode/launch.json vendored
View file

@ -25,8 +25,8 @@
"console": "integratedTerminal", "console": "integratedTerminal",
//"console": "externalTerminal", //"console": "externalTerminal",
"consoleTitle": "ptvsd.server", "consoleTitle": "ptvsd.server",
//"program": "${file}", "program": "${file}",
"program": "${workspaceFolder}/tests/test_data/testpkgs/pkg1/__main__.py", "logToFile": true,
}, },
{ {
"name": "Attach [debugServer]", "name": "Attach [debugServer]",
@ -34,6 +34,7 @@
"request": "attach", "request": "attach",
"host": "localhost", "host": "localhost",
"port": 5678, "port": 5678,
"logToFile": true,
}, },
] ]
} }

View file

@ -58,7 +58,7 @@ def spawn_debuggee(session, start_request, sudo, args, console, console_title):
cmdline = ["sudo"] if sudo else [] cmdline = ["sudo"] if sudo else []
cmdline += [sys.executable, os.path.dirname(ptvsd.launcher.__file__)] cmdline += [sys.executable, os.path.dirname(ptvsd.launcher.__file__)]
cmdline += args cmdline += args
env = {str("PTVSD_SESSION_ID"): str(session.id)} env = {}
def spawn_launcher(): def spawn_launcher():
with session.accept_connection_from_launcher() as (_, launcher_port): with session.accept_connection_from_launcher() as (_, launcher_port):

View file

@ -10,3 +10,22 @@ import os
# Force absolute path on Python 2. # Force absolute path on Python 2.
__file__ = os.path.abspath(__file__) __file__ = os.path.abspath(__file__)
channel = None
"""DAP message channel to the adapter."""
def connect(launcher_port):
from ptvsd.common import messaging, sockets
from ptvsd.launcher import handlers
global channel
assert channel is None
sock = sockets.create_client()
sock.connect(("127.0.0.1", launcher_port))
stream = messaging.JsonIOStream.from_socket(sock, "Adapter")
channel = messaging.JsonMessageChannel(stream, handlers=handlers)
channel.start()

View file

@ -19,7 +19,8 @@ __file__ = os.path.abspath(__file__)
def main(): def main():
from ptvsd.common import log from ptvsd.common import log
from ptvsd.launcher import adapter, debuggee from ptvsd import launcher
from ptvsd.launcher import debuggee
log.to_file(prefix="ptvsd.launcher") log.to_file(prefix="ptvsd.launcher")
log.describe_environment("ptvsd.launcher startup environment:") log.describe_environment("ptvsd.launcher startup environment:")
@ -30,11 +31,10 @@ def main():
except Exception: except Exception:
raise log.exception("Error parsing {0!r}:", name) raise log.exception("Error parsing {0!r}:", name)
session_id = option("PTVSD_SESSION_ID", int)
launcher_port = option("PTVSD_LAUNCHER_PORT", int) launcher_port = option("PTVSD_LAUNCHER_PORT", int)
adapter.connect(session_id, launcher_port) launcher.connect(launcher_port)
adapter.channel.wait() launcher.channel.wait()
if debuggee.process is not None: if debuggee.process is not None:
sys.exit(debuggee.process.returncode) sys.exit(debuggee.process.returncode)

View file

@ -1,154 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in the project root
# for license information.
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import os
import sys
import ptvsd
from ptvsd.common import compat, fmt, json, messaging, sockets
from ptvsd.common.compat import unicode
from ptvsd.launcher import debuggee
channel = None
"""DAP message channel to the adapter."""
def connect(session_id, launcher_port):
global channel
assert channel is None
sock = sockets.create_client()
sock.connect(("127.0.0.1", launcher_port))
stream = messaging.JsonIOStream.from_socket(sock, fmt("Adapter-{0}", session_id))
channel = messaging.JsonMessageChannel(stream, handlers=Handlers())
channel.start()
class Handlers(object):
def launch_request(self, request):
debug_options = set(request("debugOptions", json.array(unicode)))
# Handling of properties that can also be specified as legacy "debugOptions" flags.
# If property is explicitly set to false, but the flag is in "debugOptions", treat
# it as an error.
def property_or_debug_option(prop_name, flag_name):
assert prop_name[0].islower() and flag_name[0].isupper()
value = request(prop_name, json.default(flag_name in debug_options))
if value is False and flag_name in debug_options:
raise request.isnt_valid(
'{0!r}:false and "debugOptions":[{1!r}] are mutually exclusive',
prop_name,
flag_name,
)
return value
cmdline = []
if property_or_debug_option("sudo", "Sudo"):
if sys.platform == "win32":
raise request.cant_handle('"sudo":true is not supported on Windows.')
else:
cmdline += ["sudo"]
# "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try
# the alternative. But if both are missing, the error message should say "python".
python_key = "python"
if python_key in request:
if "pythonPath" in request:
raise request.isnt_valid(
'"pythonPath" is not valid if "python" is specified'
)
elif "pythonPath" in request:
python_key = "pythonPath"
python = request(python_key, json.array(unicode, vectorize=True, size=(1,)))
if not len(python):
python = [compat.filename(sys.executable)]
cmdline += python
if not request("noDebug", json.default(False)):
port = request("port", int)
ptvsd_args = request("ptvsdArgs", json.array(unicode))
cmdline += [
compat.filename(os.path.dirname(ptvsd.__file__)),
"--client",
"--host",
"127.0.0.1",
"--port",
str(port),
] + ptvsd_args
program = module = code = ()
if "program" in request:
program = request("program", json.array(unicode, vectorize=True, size=(1,)))
cmdline += program
process_name = program[0]
if "module" in request:
module = request("module", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-m"] + module
process_name = module[0]
if "code" in request:
code = request("code", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-c"] + code
process_name = python[0]
num_targets = len([x for x in (program, module, code) if x != ()])
if num_targets == 0:
raise request.isnt_valid(
'either "program", "module", or "code" must be specified'
)
elif num_targets != 1:
raise request.isnt_valid(
'"program", "module", and "code" are mutually exclusive'
)
cmdline += request("args", json.array(unicode))
cwd = request("cwd", unicode, optional=True)
if cwd == ():
# If it's not specified, but we're launching a file rather than a module,
# and the specified path has a directory in it, use that.
cwd = None if program == () else (os.path.dirname(program[0]) or None)
env = os.environ.copy()
if "PTVSD_TEST" in env:
# If we're running as part of a ptvsd test, make sure that codecov is not
# applied to the debuggee, since it will conflict with pydevd.
env.pop("COV_CORE_SOURCE", None)
env.update(request("env", json.object(unicode)))
if request("gevent", False):
env["GEVENT_SUPPORT"] = "True"
redirect_output = "RedirectOutput" in debug_options
if redirect_output:
# sys.stdout buffering must be disabled - otherwise we won't see the output
# at all until the buffer fills up.
env["PYTHONUNBUFFERED"] = "1"
if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code == 0)
if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code != 0)
if sys.version_info < (3,):
# Popen() expects command line and environment to be bytes, not Unicode.
# Assume that values are filenames - it's usually either that, or numbers -
# but don't allow encoding to fail if we guessed wrong.
encode = functools.partial(compat.filename_bytes, errors="replace")
cmdline = [encode(s) for s in cmdline]
env = {encode(k): encode(v) for k, v in env.items()}
debuggee.spawn(process_name, cmdline, cwd, env, redirect_output)
return {}
def terminate_request(self, request):
request.respond({})
debuggee.kill()
def disconnect(self):
debuggee.kill()

View file

@ -5,14 +5,15 @@
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import atexit import atexit
import locale
import os import os
import struct import struct
import subprocess import subprocess
import sys import sys
import threading import threading
from ptvsd import launcher
from ptvsd.common import fmt, log, messaging, compat from ptvsd.common import fmt, log, messaging, compat
from ptvsd.launcher import output
process = None process = None
@ -27,12 +28,10 @@ returns True, the launcher pauses and waits for user input before exiting.
def describe(): def describe():
return fmt("debuggee process with PID={0}", process.pid) return fmt("Debuggee[PID={0}]", process.pid)
def spawn(process_name, cmdline, cwd, env, redirect_output): def spawn(process_name, cmdline, cwd, env, redirect_output):
from ptvsd.launcher import adapter, output
log.info( log.info(
"Spawning debuggee process:\n\n" "Spawning debuggee process:\n\n"
"Current directory: {0!j}\n\n" "Current directory: {0!j}\n\n"
@ -65,7 +64,7 @@ def spawn(process_name, cmdline, cwd, env, redirect_output):
log.info("Spawned {0}.", describe()) log.info("Spawned {0}.", describe())
atexit.register(kill) atexit.register(kill)
adapter.channel.send_event( launcher.channel.send_event(
"process", "process",
{ {
"startMethod": "launch", "startMethod": "launch",
@ -77,12 +76,11 @@ def spawn(process_name, cmdline, cwd, env, redirect_output):
) )
if redirect_output: if redirect_output:
encoding = env.get("PYTHONIOENCODING", locale.getpreferredencoding())
for category, fd, tee in [ for category, fd, tee in [
("stdout", stdout_r, sys.stdout), ("stdout", stdout_r, sys.stdout),
("stderr", stderr_r, sys.stderr), ("stderr", stderr_r, sys.stderr),
]: ]:
output.CaptureOutput(category, fd, tee.fileno(), encoding) output.CaptureOutput(describe(), category, fd, tee)
close_fds.remove(fd) close_fds.remove(fd)
wait_thread = threading.Thread(target=wait_for_exit, name="wait_for_exit()") wait_thread = threading.Thread(target=wait_for_exit, name="wait_for_exit()")
@ -109,8 +107,6 @@ def kill():
def wait_for_exit(): def wait_for_exit():
from ptvsd.launcher import adapter, output
try: try:
code = process.wait() code = process.wait()
if sys.platform != "win32" and code < 0: if sys.platform != "win32" and code < 0:
@ -126,7 +122,7 @@ def wait_for_exit():
log.info("{0} exited with code {1}", describe(), code) log.info("{0} exited with code {1}", describe(), code)
output.wait_for_remaining_output() output.wait_for_remaining_output()
try: try:
adapter.channel.send_event("exited", {"exitCode": code}) launcher.channel.send_event("exited", {"exitCode": code})
except Exception: except Exception:
pass pass
@ -134,7 +130,7 @@ def wait_for_exit():
_wait_for_user_input() _wait_for_user_input()
try: try:
adapter.channel.send_event("terminated") launcher.channel.send_event("terminated")
except Exception: except Exception:
pass pass

View file

@ -0,0 +1,153 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in the project root
# for license information.
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import os
import sys
import ptvsd
from ptvsd.common import compat, json
from ptvsd.common.compat import unicode
from ptvsd.launcher import debuggee
def launch_request(request):
debug_options = set(request("debugOptions", json.array(unicode)))
# Handling of properties that can also be specified as legacy "debugOptions" flags.
# If property is explicitly set to false, but the flag is in "debugOptions", treat
# it as an error. Returns None if the property wasn't explicitly set either way.
def property_or_debug_option(prop_name, flag_name):
assert prop_name[0].islower() and flag_name[0].isupper()
value = request(prop_name, bool, optional=True)
if value == ():
value = None
if flag_name in debug_options:
if value is False:
raise request.isnt_valid(
'{0!j}:false and "debugOptions":[{1!j}] are mutually exclusive',
prop_name,
flag_name,
)
value = True
return value
cmdline = []
if property_or_debug_option("sudo", "Sudo"):
if sys.platform == "win32":
raise request.cant_handle('"sudo":true is not supported on Windows.')
else:
cmdline += ["sudo"]
# "pythonPath" is a deprecated legacy spelling. If "python" is missing, then try
# the alternative. But if both are missing, the error message should say "python".
python_key = "python"
if python_key in request:
if "pythonPath" in request:
raise request.isnt_valid(
'"pythonPath" is not valid if "python" is specified'
)
elif "pythonPath" in request:
python_key = "pythonPath"
python = request(python_key, json.array(unicode, vectorize=True, size=(1,)))
if not len(python):
python = [compat.filename(sys.executable)]
cmdline += python
if not request("noDebug", json.default(False)):
port = request("port", int)
ptvsd_args = request("ptvsdArgs", json.array(unicode))
cmdline += [
compat.filename(os.path.dirname(ptvsd.__file__)),
"--client",
"--host",
"127.0.0.1",
"--port",
str(port),
] + ptvsd_args
program = module = code = ()
if "program" in request:
program = request("program", json.array(unicode, vectorize=True, size=(1,)))
cmdline += program
process_name = program[0]
if "module" in request:
module = request("module", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-m"] + module
process_name = module[0]
if "code" in request:
code = request("code", json.array(unicode, vectorize=True, size=(1,)))
cmdline += ["-c"] + code
process_name = python[0]
num_targets = len([x for x in (program, module, code) if x != ()])
if num_targets == 0:
raise request.isnt_valid(
'either "program", "module", or "code" must be specified'
)
elif num_targets != 1:
raise request.isnt_valid(
'"program", "module", and "code" are mutually exclusive'
)
cmdline += request("args", json.array(unicode))
cwd = request("cwd", unicode, optional=True)
if cwd == ():
# If it's not specified, but we're launching a file rather than a module,
# and the specified path has a directory in it, use that.
cwd = None if program == () else (os.path.dirname(program[0]) or None)
env = os.environ.copy()
if "PTVSD_TEST" in env:
# If we're running as part of a ptvsd test, make sure that codecov is not
# applied to the debuggee, since it will conflict with pydevd.
env.pop("COV_CORE_SOURCE", None)
env.update(request("env", json.object(unicode)))
if request("gevent", False):
env["GEVENT_SUPPORT"] = "True"
redirect_output = property_or_debug_option("redirectOutput", "RedirectOutput")
if redirect_output is None:
# If neither the property nor the option were specified explicitly, choose
# the default depending on console type - "internalConsole" needs it to
# provide any output at all, but it's unnecessary for the terminals.
redirect_output = (request("console", unicode) == "internalConsole")
if redirect_output:
# sys.stdout buffering must be disabled - otherwise we won't see the output
# at all until the buffer fills up.
env["PYTHONUNBUFFERED"] = "1"
# Force UTF-8 output to minimize data loss due to re-encoding.
env["PYTHONIOENCODING"] = "utf-8"
if property_or_debug_option("waitOnNormalExit", "WaitOnNormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code == 0)
if property_or_debug_option("waitOnAbnormalExit", "WaitOnAbnormalExit"):
debuggee.wait_on_exit_predicates.append(lambda code: code != 0)
if sys.version_info < (3,):
# Popen() expects command line and environment to be bytes, not Unicode.
# Assume that values are filenames - it's usually either that, or numbers -
# but don't allow encoding to fail if we guessed wrong.
encode = functools.partial(compat.filename_bytes, errors="replace")
cmdline = [encode(s) for s in cmdline]
env = {encode(k): encode(v) for k, v in env.items()}
debuggee.spawn(process_name, cmdline, cwd, env, redirect_output)
return {}
def terminate_request(request):
request.respond({})
debuggee.kill()
def disconnect():
debuggee.kill()

View file

@ -6,10 +6,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import codecs import codecs
import os import os
import sys
import threading import threading
from ptvsd import launcher
from ptvsd.common import log from ptvsd.common import log
from ptvsd.launcher import adapter, debuggee
class CaptureOutput(object): class CaptureOutput(object):
@ -20,24 +21,19 @@ class CaptureOutput(object):
instances = {} instances = {}
"""Keys are output categories, values are CaptureOutput instances.""" """Keys are output categories, values are CaptureOutput instances."""
def __init__(self, category, fd, tee_fd, encoding): def __init__(self, whose, category, fd, stream):
assert category not in self.instances assert category not in self.instances
self.instances[category] = self self.instances[category] = self
log.info("Capturing {0} of {1}.", category, debuggee.describe()) log.info("Capturing {0} of {1}.", category, whose)
self.category = category self.category = category
self._whose = whose
self._fd = fd self._fd = fd
self._tee_fd = tee_fd self._stream = stream if sys.version_info < (3,) else stream.buffer
self._decoder = codecs.getincrementaldecoder("utf-8")(errors="surrogateescape")
try: self._encode = codecs.getencoder(
self._decoder = codecs.getincrementaldecoder(encoding)(errors="replace") "utf-8" if stream.encoding is None else stream.encoding
except LookupError: )
self._decoder = None
log.warning(
'Unable to generate "output" events for {0} - unknown encoding {1!r}',
category,
encoding,
)
self._worker_thread = threading.Thread(target=self._worker, name=category) self._worker_thread = threading.Thread(target=self._worker, name=category)
self._worker_thread.start() self._worker_thread.start()
@ -50,54 +46,52 @@ class CaptureOutput(object):
except Exception: except Exception:
pass pass
def _send_output_event(self, s, final=False):
if self._decoder is None:
return
s = self._decoder.decode(s, final=final)
if len(s) == 0:
return
s = s.replace("\r\n", "\n")
try:
adapter.channel.send_event(
"output", {"category": self.category, "output": s}
)
except Exception:
pass # channel to adapter is already closed
def _worker(self): def _worker(self):
while self._fd is not None: while self._fd is not None:
try: try:
s = os.read(self._fd, 0x1000) s = os.read(self._fd, 0x1000)
except Exception: except Exception:
break break
if not len(s):
size = len(s)
if size == 0:
break break
self._process_chunk(s)
# Tee the output first, before sending the "output" event.
i = 0
while i < size:
written = os.write(self._tee_fd, s[i:])
i += written
if not written:
# This means that the output stream was closed from the other end.
# Do the same to the debuggee, so that it knows as well.
os.close(self._fd)
self._fd = None
break
self._send_output_event(s)
# Flush any remaining data in the incremental decoder. # Flush any remaining data in the incremental decoder.
self._send_output_event(b"", final=True) self._process_chunk(b"", final=True)
def _process_chunk(self, s, final=False):
s = self._decoder.decode(s, final=final)
if len(s) == 0:
return
try:
launcher.channel.send_event(
"output", {"category": self.category, "output": s.replace("\r\n", "\n")}
)
except Exception:
pass # channel to adapter is already closed
s, _ = self._encode(s, "surrogateescape")
size = len(s)
i = 0
while i < size:
# On Python 2, all writes are full writes, and write() returns None.
# On Python 3, writes can be partial, and write() returns the count.
written = self._stream.write(s[i:])
if written is None: # full write
break
elif written == 0:
# This means that the output stream was closed from the other end.
# Do the same to the debuggee, so that it knows as well.
os.close(self._fd)
self._fd = None
break
i += written
def wait_for_remaining_output(): def wait_for_remaining_output():
"""Waits for all remaining output to be captured and propagated. """Waits for all remaining output to be captured and propagated.
""" """
for category, instance in CaptureOutput.instances.items(): for category, instance in CaptureOutput.instances.items():
log.info("Waiting for remaining {0} of {1}.", category, debuggee.describe()) log.info("Waiting for remaining {0} of {1}.", category, instance._whose)
instance._worker_thread.join() instance._worker_thread.join()