From 2b19f8fe6ff8495e40d169f505fd4d178184df75 Mon Sep 17 00:00:00 2001 From: Pavel Minaev Date: Mon, 1 Apr 2024 13:40:51 -0700 Subject: [PATCH] Generic implementation for streaming truncatable "safe" repr for str-like and builtin collection types. Parse and propagate "valueFormat". --- src/debugpy/adapter/__main__.py | 2 +- src/debugpy/common/log.py | 2 +- src/debugpy/common/messaging.py | 12 +- src/debugpy/server/adapters.py | 36 +++- src/debugpy/server/eval.py | 53 +++-- src/debugpy/server/inspect/__init__.py | 255 ++++++++++++++++++++----- src/debugpy/server/inspect/stdlib.py | 168 +++++++++++++++- 7 files changed, 445 insertions(+), 83 deletions(-) diff --git a/src/debugpy/adapter/__main__.py b/src/debugpy/adapter/__main__.py index 4413fcf4..0630c38f 100644 --- a/src/debugpy/adapter/__main__.py +++ b/src/debugpy/adapter/__main__.py @@ -46,7 +46,7 @@ def main(args): if args.log_dir is not None: log.log_dir = args.log_dir - log.to_file(prefix="debugpy.adapter", levels=("info", "warning", "error")) + log.to_file(prefix="debugpy.adapter") log.describe_environment("debugpy.adapter startup environment:") servers.access_token = args.server_access_token diff --git a/src/debugpy/common/log.py b/src/debugpy/common/log.py index 099e93c7..4f5e581a 100644 --- a/src/debugpy/common/log.py +++ b/src/debugpy/common/log.py @@ -223,7 +223,7 @@ def reraise_exception(format_string="", *args, **kwargs): raise -def to_file(filename=None, prefix=None, levels=LEVELS): +def to_file(filename=None, prefix=None, levels=("error", "warning", "info")): """Starts logging all messages at the specified levels to the designated file. Either filename or prefix must be specified, but not both. diff --git a/src/debugpy/common/messaging.py b/src/debugpy/common/messaging.py index 07cdbdb0..ae66bcbc 100644 --- a/src/debugpy/common/messaging.py +++ b/src/debugpy/common/messaging.py @@ -140,7 +140,7 @@ class JsonIOStream(object): return self._closed = True - log.debug("Closing {0} message stream", self.name) + log.info("Closing {0} message stream", self.name) try: try: # Close the writer first, so that the other end of the connection has @@ -158,7 +158,7 @@ class JsonIOStream(object): except Exception: # pragma: no cover log.reraise_exception("Error while closing {0} message stream", self.name) - def _log_message(self, dir, data, logger=log.debug): + def _log_message(self, dir, data, logger=log.info): return logger("{0} {1} {2}", self.name, dir, data) def _read_line(self, reader): @@ -1163,13 +1163,13 @@ class JsonMessageChannel(object): if parser_thread is not None: parser_thread.join() except AssertionError: - log.debug("Handled error joining parser thread.") + log.info("Handled error joining parser thread.") try: handler_thread = self._handler_thread if handler_thread is not None: handler_thread.join() except AssertionError: - log.debug("Handled error joining handler thread.") + log.info("Handled error joining handler thread.") # Order of keys for _prettify() - follows the order of properties in # https://microsoft.github.io/debug-adapter-protocol/specification @@ -1289,13 +1289,13 @@ class JsonMessageChannel(object): exc.propagate(message) def _parse_incoming_messages(self): - log.debug("Starting message loop for channel {0}", self) + log.info("Starting message loop for channel {0}", self) try: while True: self._parse_incoming_message() except NoMoreMessages as exc: - log.debug("Exiting message loop for channel {0}: {1}", self, exc) + log.info("Exiting message loop for channel {0}: {1}", self, exc) with self: # Generate dummy responses for all outstanding requests. err_message = str(exc) diff --git a/src/debugpy/server/adapters.py b/src/debugpy/server/adapters.py index ad31576b..55941f55 100644 --- a/src/debugpy/server/adapters.py +++ b/src/debugpy/server/adapters.py @@ -368,7 +368,33 @@ class Adapter: return request.isnt_valid(f'Invalid "frameId": {frame_id}', silent=True) return {"scopes": frame.scopes()} + def _parse_value_format(self, request: Request) -> eval.ValueFormat: + result = eval.ValueFormat( + hex=False, + max_length=1024, # VSCode limit for tooltips + truncation_suffix="⌇⋯", + ) + + format = request("format", json.object()) + if format == {}: + return result + + hex = format("hex", bool, optional=True) + if hex != (): + result.hex = hex + + max_length = format("debugpy.maxLength", int, optional=True) + if max_length != (): + result.max_length = max_length + + truncation_suffix = format("debugpy.truncationSuffix", str, optional=True) + if truncation_suffix != (): + result.truncation_suffix = truncation_suffix + + return result + def variables_request(self, request: Request): + format = self._parse_value_format(request) start = request("start", 0) count = request("count", int, optional=True) @@ -389,9 +415,10 @@ class Adapter: if container is None: raise request.isnt_valid(f'Invalid "variablesReference": {container_id}') - return {"variables": list(container.variables(filter, start, count))} + return {"variables": list(container.variables(filter, format, start, count))} def evaluate_request(self, request: Request): + format = self._parse_value_format(request) expr = request("expression", str) frame_id = request("frameId", int) frame = StackFrame.get(frame_id) @@ -401,9 +428,10 @@ class Adapter: result = frame.evaluate(expr) except BaseException as exc: result = exc - return eval.Result(frame, result) + return eval.Result(frame, result, format) def setVariable_request(self, request: Request): + format = self._parse_value_format(request) name = request("name", str) value = request("value", str) container_id = request("variablesReference", int) @@ -411,7 +439,7 @@ class Adapter: if container is None: raise request.isnt_valid(f'Invalid "variablesReference": {container_id}') try: - return container.set_variable(name, value) + return container.set_variable(name, value, format) except BaseException as exc: raise request.cant_handle(str(exc)) @@ -427,7 +455,7 @@ class Adapter: result = frame.evaluate(expr) except BaseException as exc: raise request.cant_handle(str(exc)) - return eval.Result(frame, result) + return eval.Result(frame, result, format) def disconnect_request(self, request: Request): Breakpoint.clear() diff --git a/src/debugpy/server/eval.py b/src/debugpy/server/eval.py index 40697b2a..01b2b80f 100644 --- a/src/debugpy/server/eval.py +++ b/src/debugpy/server/eval.py @@ -2,13 +2,21 @@ # Licensed under the MIT License. See LICENSE in the project root # for license information. +""" +DAP entities related to expression evaluation and inspection of variables and scopes. + +Classes here are mostly wrappers around the actual object inspection logic implemented +in debugpy.server.inspect which adapts it to DAP, allowing debugpy.server.inspect to be +unit-tested in isolation. +""" + import ctypes import itertools import debugpy import threading from collections.abc import Iterable, Set from debugpy.common import log -from debugpy.server.inspect import ObjectInspector, inspect +from debugpy.server.inspect import ObjectInspector, ValueFormat, inspect from typing import ClassVar, Literal, Optional, Self type StackFrame = "debugpy.server.tracing.StackFrame" @@ -44,11 +52,15 @@ class VariableContainer: return cls._all.get(id) def variables( - self, filter: VariableFilter, start: int = 0, count: Optional[int] = None + self, + filter: VariableFilter, + format: ValueFormat, + start: int = 0, + count: Optional[int] = None, ) -> Iterable["Variable"]: raise NotImplementedError - def set_variable(self, name: str, value: str) -> "Value": + def set_variable(self, name: str, value: str, format: ValueFormat) -> "Value": raise NotImplementedError @classmethod @@ -65,13 +77,13 @@ class VariableContainer: class Value(VariableContainer): value: object - inspector: ObjectInspector + format: ValueFormat # TODO: memoryReference, presentationHint - def __init__(self, frame: StackFrame, value: object): + def __init__(self, frame: StackFrame, value: object, format: ValueFormat): super().__init__(frame) self.value = value - self.inspector = inspect(value) + self.format = format def __getstate__(self) -> dict[str, object]: state = super().__getstate__() @@ -85,6 +97,10 @@ class Value(VariableContainer): ) return state + @property + def inspector(self) -> ObjectInspector: + return inspect(self.value, self.format) + @property def typename(self) -> str: try: @@ -93,10 +109,14 @@ class Value(VariableContainer): return "" def repr(self) -> str: - return "".join(self.inspector.repr()) + return self.inspector.repr() def variables( - self, filter: VariableFilter, start: int = 0, count: Optional[int] = None + self, + filter: VariableFilter, + format: ValueFormat, + start: int = 0, + count: Optional[int] = None, ) -> Iterable["Variable"]: stop = None if count is None else start + count log.info( @@ -107,15 +127,16 @@ class Value(VariableContainer): stop, ) + inspector = inspect(self.value, format) children = itertools.chain( - self.inspector.named_children() if "named" in filter else (), - self.inspector.indexed_children() if "indexed" in filter else (), + inspector.named_children() if "named" in filter else (), + inspector.indexed_children() if "indexed" in filter else (), ) children = itertools.islice(children, start, stop) for child in children: - yield Variable(self.frame, child.key, child.value) + yield Variable(self.frame, child.accessor(format), child.value, format) - def set_variable(self, name: str, value_expr: str) -> "Value": + def set_variable(self, name: str, value_expr: str, format: ValueFormat) -> "Value": value = self.frame.evaluate(value_expr) if name.startswith("[") and name.endswith("]"): key_expr = name[1:-1] @@ -125,7 +146,7 @@ class Value(VariableContainer): else: setattr(self.value, name, value) result = getattr(self.value, name) - return Value(self.frame, result) + return Value(self.frame, result, format) class Result(Value): @@ -139,8 +160,8 @@ class Variable(Value): name: str # TODO: evaluateName - def __init__(self, frame: StackFrame, name: str, value: object): - super().__init__(frame, value) + def __init__(self, frame: StackFrame, name: str, value: object, format: ValueFormat): + super().__init__(frame, value, format) self.name = name def __getstate__(self) -> dict[str, object]: @@ -171,4 +192,4 @@ class Scope(Variable): ctypes.py_object(frame.python_frame), ctypes.c_int(0) ) - super().__init__(frame, name, ScopeObject()) + super().__init__(frame, name, ScopeObject(), ValueFormat()) diff --git a/src/debugpy/server/inspect/__init__.py b/src/debugpy/server/inspect/__init__.py index 3de0dd0c..1d8bd25a 100644 --- a/src/debugpy/server/inspect/__init__.py +++ b/src/debugpy/server/inspect/__init__.py @@ -4,36 +4,105 @@ """ Object inspection: rendering values, enumerating children etc. + +This module provides a generic non-DAP-aware API with minimal dependencies, so that +it can be unit-tested in isolation without requiring a live debugpy session. + +debugpy.server.eval then wraps it in DAP-specific adapter classes that expose the +same functionality in DAP terms. """ -from collections.abc import Iterable +import io +import sys +from array import array +from collections import deque +from collections.abc import Iterable, Mapping + + +class ValueFormat: + hex: bool + """Whether integers should be rendered in hexadecimal.""" + + max_length: int + """ + Maximum length of the string representation of variable values, including values + of indices returned by IndexedChildObject.accessor(). + """ + + truncation_suffix: str + """Suffix to append to truncated string representations; counts towards max_length.""" + + def __init__( + self, + *, + hex: bool = False, + max_length: int = sys.maxsize, + truncation_suffix: str = "", + ): + assert max_length >= len(truncation_suffix) + self.hex = hex + self.max_length = max_length + self.truncation_suffix = truncation_suffix class ChildObject: - key: str + """ + Represents an object that is a child of another object that is accessible in some way. + """ + value: object def __init__(self, value: object): self.value = value + self.format = format + + def accessor(self, format: ValueFormat) -> str: + """ + Accessor used to retrieve this object. + + This is a display string and is not intended to be used for eval, but it should + generally correlate to the expression that can be used to retrieve the object in + some clear and obvious way. Some examples of accessors: + + "attr" - value.attr + "[key]" - value[key] + "len()" - len(value) + """ + raise NotImplementedError def expr(self, parent_expr: str) -> str: + """ + Returns an expression that can be used to retrieve this object from its parent, + given the expression to compute the parent. + """ raise NotImplementedError class NamedChildObject(ChildObject): + """ + Child object that has a predefined accessor used to access it. + + This includes not just attributes, but all children that do not require repr() of + index, key etc to compute the accessor. + """ + def __init__(self, name: str, value: object): super().__init__(value) - self.key = name + self.name = name - @property - def name(self) -> str: - return self.key + def accessor(self, format: ValueFormat) -> str: + return self.name def expr(self, parent_expr: str) -> str: - return f"({parent_expr}).{self.name}" + accessor = self.accessor(ValueFormat()) + return f"({parent_expr}).{accessor}" class LenChildObject(NamedChildObject): + """ + A synthetic child object that represents the return value of len(). + """ + def __init__(self, parent: object): super().__init__("len()", len(parent)) @@ -42,44 +111,69 @@ class LenChildObject(NamedChildObject): class IndexedChildObject(ChildObject): - key_object: object - indexer: str + """ + Child object that has a computed accessor. + """ + + key: object def __init__(self, key: object, value: object): super().__init__(value) - self.key_object = key + self.key = key self.indexer = None - @property - def key(self) -> str: - if self.indexer is None: - key_repr = "".join(inspect(self.key_object).repr()) - self.indexer = f"[{key_repr}]" - return self.indexer + def accessor(self, format: ValueFormat) -> str: + key_repr = inspect(self.key, format).repr() + return f"[{key_repr}]" def expr(self, parent_expr: str) -> str: - return f"({parent_expr}){self.key}" + accessor = self.accessor(ValueFormat()) + return f"({parent_expr}){accessor}" class ObjectInspector: """ - Inspects a generic object. Uses builtins.repr() to render values and dir() to enumerate children. + Inspects a generic object, providing access to its string representation and children. """ - obj: object + class ReprContext: + """ + Context for ObjectInspector.iter_repr(). + """ - def __init__(self, obj: object): - self.obj = obj + format: ValueFormat - def repr(self) -> Iterable[str]: - try: - result = repr(self.obj) - except BaseException as exc: - try: - result = f"" - except: - result = "" - yield result + chars_remaining: int + """ + How many more characters are allowed in the output. + + Implementations of ObjectInspector.iter_repr() can use this to optimize by yielding + larger chunks if there is enough space left for them. + """ + + nesting_level: int + """ + Nesting level of the current object being inspected. This is 0 for the top-level + object on which ObjectInspector.iter_repr() was called, and increases by 1 for each + call to ObjectInspector.nest(). + """ + + def __init__(self, inspector: "ObjectInspector"): + self.format = inspector.format + self.chars_remaining = self.format.max_length + self.nesting_level = 0 + + def nest(self, value: object): + self.nesting_level += 1 + yield from inspect(value, self.format).iter_repr(self) + self.nesting_level -= 1 + + value: object + format: ValueFormat + + def __init__(self, value: object, format: ValueFormat): + self.value = value + self.format = format def children(self) -> Iterable[ChildObject]: yield from self.named_children() @@ -87,7 +181,7 @@ class ObjectInspector: def indexed_children_count(self) -> int: try: - return len(self.obj) + return len(self.value) except: return 0 @@ -100,7 +194,7 @@ class ObjectInspector: def named_children(self) -> Iterable[NamedChildObject]: def attrs(): try: - names = dir(self.obj) + names = dir(self.value) except: names = () @@ -109,7 +203,7 @@ class ObjectInspector: if name.startswith("__"): continue try: - value = getattr(self.obj, name) + value = getattr(self.value, name) except BaseException as exc: value = exc try: @@ -120,23 +214,94 @@ class ObjectInspector: yield NamedChildObject(name, value) try: - yield LenChildObject(self.obj) + yield LenChildObject(self.value) except: pass return sorted(attrs(), key=lambda var: var.name) + def repr(self) -> str: + """ + repr() of the inspected object. Like builtins.repr(), but with additional + formatting options and size limit. + """ + context = self.ReprContext(self) + output = io.StringIO() + for chunk in self.iter_repr(context): + output.write(chunk) + context.chars_remaining -= len(chunk) + if context.chars_remaining < 0: + output.seek(self.format.max_length - len(self.format.truncation_suffix)) + output.truncate() + output.write(self.format.truncation_suffix) + break + return output.getvalue() -def inspect(obj: object) -> ObjectInspector: + def iter_repr(self, context: ReprContext) -> Iterable[str]: + """ + Streaming repr of the inspected object. Like builtins.repr(), but instead + of computing and returning the whole string right away, returns an iterator + that yields chunks of the repr as they are computed. + + When object being inspected contains other objects that it needs to include + in its own repr, it should pass the nested objects to context.nest() and + yield from the returned iterator. This will dispatch the nested repr to the + correct inspector, and make sure that context.nesting_level is updated as + needed while nested repr is being computed. + + When possible, implementations should use context.chars_remaining as a hint + to yield larger chunks. However, there is no obligation for iter_repr() to + yield chunks smaller than chars_remaining. + + The default implementation delegates to builtins.repr(), which will always + produce the correct result, but without any streaming. Derived inspectors + should always override this method to stream repr if possible. + """ + try: + result = repr(self.value) + except BaseException as exc: + try: + result = f"" + except: + result = "" + yield result + + +def inspect(value: object, format: ValueFormat) -> ObjectInspector: from debugpy.server.inspect import stdlib - # TODO: proper extensible registry - match obj: - case list(): - return stdlib.ListInspector(obj) - case {}: - return stdlib.MappingInspector(obj) - case [*_] | set() | frozenset() | str() | bytes() | bytearray(): - return stdlib.SequenceInspector(obj) - case _: - return ObjectInspector(obj) + # TODO: proper extensible registry with public API for debugpy plugins. + def get_inspector(): + # TODO: should subtypes of standard collections be treated the same? This works + # for fetching items, but gets repr() wrong - might have to split the two. + match value: + case int(): + return stdlib.IntInspector + case str(): + return stdlib.StrInspector + case bytes(): + return stdlib.BytesInspector + case bytearray(): + return stdlib.ByteArrayInspector + case tuple(): + return stdlib.TupleInspector + case list(): + return stdlib.ListInspector + case set(): + return stdlib.SetInspector + case frozenset(): + return stdlib.FrozenSetInspector + case array(): + return stdlib.ArrayInspector + case deque(): + return stdlib.DequeInspector + case dict(): + return stdlib.DictInspector + case Mapping(): + return stdlib.MappingInspector + case Iterable(): + return stdlib.IterableInspector + case _: + return ObjectInspector + + return get_inspector()(value, format) diff --git a/src/debugpy/server/inspect/stdlib.py b/src/debugpy/server/inspect/stdlib.py index a92be110..e583b518 100644 --- a/src/debugpy/server/inspect/stdlib.py +++ b/src/debugpy/server/inspect/stdlib.py @@ -4,19 +4,20 @@ """Object inspection for builtin Python types.""" -from collections.abc import Iterable +from collections.abc import Iterable, Mapping from itertools import count from debugpy.common import log from debugpy.server.inspect import ObjectInspector, IndexedChildObject -from debugpy.server.safe_repr import SafeRepr -class SequenceInspector(ObjectInspector): +class IterableInspector(ObjectInspector): + value: Iterable + def indexed_children(self) -> Iterable[IndexedChildObject]: yield from super().indexed_children() try: - it = iter(self.obj) + it = iter(self.value) except: return for i in count(): @@ -31,10 +32,12 @@ class SequenceInspector(ObjectInspector): class MappingInspector(ObjectInspector): + value: Mapping + def indexed_children(self) -> Iterable[IndexedChildObject]: yield from super().indexed_children() try: - keys = self.obj.keys() + keys = self.value.keys() except: return it = iter(keys) @@ -46,13 +49,158 @@ class MappingInspector(ObjectInspector): except: break try: - value = self.obj[key] + value = self.value[key] except BaseException as exc: value = exc yield IndexedChildObject(key, value) -class ListInspector(SequenceInspector): - def repr(self) -> Iterable[str]: - # TODO: move logic from SafeRepr here - yield SafeRepr()(self.obj) +class IterableInspectorWithRepr(IterableInspector): + def repr_prefix(self) -> str: + return type(self.value).__name__ + "((" + + def repr_suffix(self) -> str: + return "))" + + def repr_items(self) -> Iterable[object]: + return self.value + + def iter_repr(self, context: ObjectInspector.ReprContext) -> Iterable[str]: + yield self.repr_prefix() + for i, item in enumerate(self.value): + if i > 0: + yield ", " + yield from context.nest(item) + yield self.repr_suffix() + + +class MappingInspectorWithRepr(MappingInspector): + def repr_prefix(self) -> str: + return type(self.value).__name__ + "({" + + def repr_suffix(self) -> str: + return "})" + + def iter_repr(self, context: ObjectInspector.ReprContext) -> Iterable[str]: + yield self.repr_prefix() + for i, (key, value) in enumerate(self.value.items()): + if i > 0: + yield ", " + yield from context.nest(key) + yield ": " + yield from context.nest(value) + yield self.repr_suffix() + + +class StrLikeInspector(IterableInspector): + value: str | bytes | bytearray + + def repr_prefix(self) -> str: + return "'" + + def repr_suffix(self) -> str: + return "'" + + def indexed_children(self) -> Iterable[IndexedChildObject]: + if isinstance(self.value, str): + # Indexing str yields str, which is not very useful for debugging. + # What we want is to show the ordinal character values, similar + # to how it works for bytes & bytearray. + for i, ch in enumerate(self.value): + yield IndexedChildObject(i, ord(ch)) + else: + yield from super().indexed_children() + + def iter_repr(self, context: ObjectInspector.ReprContext) -> Iterable[str]: + prefix = self.repr_prefix() + suffix = self.repr_suffix() + yield prefix + i = 0 + while i < len(self.value): + # Optimistically assume that no escaping will be needed. + chunk_size = max(1, context.chars_remaining) + chunk = repr(self.value[i : i + chunk_size]) + yield chunk[len(prefix) : -len(suffix)] + i += chunk_size + yield suffix + + +class IntInspector(ObjectInspector): + value: int + + def iter_repr(self, context: ObjectInspector.ReprContext) -> Iterable[str]: + fs = "{:#x}" if self.format.hex else "{}" + yield fs.format(self.value) + + +class BytesInspector(StrLikeInspector): + def repr_prefix(self) -> str: + return "b'" + + +class ByteArrayInspector(StrLikeInspector): + def repr_prefix(self) -> str: + return "bytearray(b'" + + def repr_suffix(self) -> str: + return "')" + + +class StrInspector(StrLikeInspector): + def indexed_children(self) -> Iterable[IndexedChildObject]: + # Indexing str yields str, which is not very useful for debugging. We want + # to show the ordinal character values, similar to how it works for bytes. + for i, ch in enumerate(self.value): + yield IndexedChildObject(i, ord(ch)) + + +class ListInspector(IterableInspectorWithRepr): + def repr_prefix(self) -> str: + return "[" + + def repr_suffix(self) -> str: + return "]" + + +class TupleInspector(IterableInspectorWithRepr): + def repr_prefix(self) -> str: + return "(" + + def repr_suffix(self) -> str: + return ",)" if len(self.value) == 1 else ")" + + +class SetInspector(IterableInspectorWithRepr): + def repr_prefix(self) -> str: + return "{" + + def repr_suffix(self) -> str: + return "}" + + +class FrozenSetInspector(IterableInspectorWithRepr): + def repr_prefix(self) -> str: + return "frozenset({" + + def repr_suffix(self) -> str: + return "})" + + +class ArrayInspector(IterableInspectorWithRepr): + def repr_prefix(self) -> str: + return f"array({self.value.typecode!r}, (" + + def repr_suffix(self) -> str: + return "))" + + +class DequeInspector(IterableInspectorWithRepr): + pass + + +class DictInspector(MappingInspectorWithRepr): + def repr_prefix(self) -> str: + return "{" + + def repr_suffix(self) -> str: + return "}"