From 4692041e6a09e91ef611836870d9f4097b2a8ae1 Mon Sep 17 00:00:00 2001 From: Pavel Minaev Date: Tue, 6 Feb 2018 16:41:09 -0800 Subject: [PATCH] Fix #19: Variable value includes type Use SafeRepr implementation from old version of ptvsd to format values. --- ptvsd/safe_repr.py | 456 +++++++++++++++++++++++++++++++++++++++++++++ ptvsd/wrapper.py | 20 ++ 2 files changed, 476 insertions(+) create mode 100644 ptvsd/safe_repr.py diff --git a/ptvsd/safe_repr.py b/ptvsd/safe_repr.py new file mode 100644 index 00000000..cf3456f7 --- /dev/null +++ b/ptvsd/safe_repr.py @@ -0,0 +1,456 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in the project root +# for license information. + +__author__ = "Microsoft Corporation " +__version__ = "4.0.0a1" + +import sys + + +# Py3 compat - alias unicode to str, and xrange to range +try: + unicode +except: + unicode = str +try: + xrange +except: + xrange = range + + +class SafeRepr(object): + # String types are truncated to maxstring_outer when at the outer- + # most level, and truncated to maxstring_inner characters inside + # collections. + maxstring_outer = 2 ** 16 + maxstring_inner = 30 + if sys.version_info >= (3, 0): + string_types = (str, bytes) + set_info = (set, '{', '}', False) + frozenset_info = (frozenset, 'frozenset({', '})', False) + else: + string_types = (str, unicode) + set_info = (set, 'set([', '])', False) + frozenset_info = (frozenset, 'frozenset([', '])', False) + + # Collection types are recursively iterated for each limit in + # maxcollection. + maxcollection = (15, 10) + + # Specifies type, prefix string, suffix string, and whether to include a + # comma if there is only one element. (Using a sequence rather than a + # mapping because we use isinstance() to determine the matching type.) + collection_types = [ + (tuple, '(', ')', True), + (list, '[', ']', False), + frozenset_info, + set_info, + ] + try: + from collections import deque + collection_types.append((deque, 'deque([', '])', False)) + except: + pass + + # type, prefix string, suffix string, item prefix string, item key/value separator, item suffix string + dict_types = [(dict, '{', '}', '', ': ', '')] + try: + from collections import OrderedDict + dict_types.append((OrderedDict, 'OrderedDict([', '])', '(', ', ', ')')) + except: + pass + + # All other types are treated identically to strings, but using + # different limits. + maxother_outer = 2 ** 16 + maxother_inner = 30 + + def __call__(self, obj): + try: + return ''.join(self._repr(obj, 0)) + except: + try: + return 'An exception was raised: %r' % sys.exc_info()[1] + except: + return 'An exception was raised' + + def _repr(self, obj, level): + '''Returns an iterable of the parts in the final repr string.''' + + try: + obj_repr = type(obj).__repr__ + except: + obj_repr = None + + def has_obj_repr(t): + r = t.__repr__ + try: + return obj_repr == r + except: + return obj_repr is r + + for t, prefix, suffix, comma in self.collection_types: + if isinstance(obj, t) and has_obj_repr(t): + return self._repr_iter(obj, level, prefix, suffix, comma) + + for t, prefix, suffix, item_prefix, item_sep, item_suffix in self.dict_types: + if isinstance(obj, t) and has_obj_repr(t): + return self._repr_dict(obj, level, prefix, suffix, item_prefix, item_sep, item_suffix) + + for t in self.string_types: + if isinstance(obj, t) and has_obj_repr(t): + return self._repr_str(obj, level) + + if self._is_long_iter(obj): + return self._repr_long_iter(obj) + + return self._repr_other(obj, level) + + # Determines whether an iterable exceeds the limits set in maxlimits, and is therefore unsafe to repr(). + def _is_long_iter(self, obj, level = 0): + try: + # Strings have their own limits (and do not nest). Because they don't have __iter__ in 2.x, this + # check goes before the next one. + if isinstance(obj, self.string_types): + return len(obj) > self.maxstring_inner + + # If it's not an iterable (and not a string), it's fine. + if not hasattr(obj, '__iter__'): + return False + + # Iterable is its own iterator - this is a one-off iterable like generator or enumerate(). We can't + # really count that, but repr() for these should not include any elements anyway, so we can treat it + # the same as non-iterables. + if obj is iter(obj): + return False + + # xrange reprs fine regardless of length. + if isinstance(obj, xrange): + return False + + # numpy and scipy collections (ndarray etc) have self-truncating repr, so they're always safe. + try: + module = type(obj).__module__.partition('.')[0] + if module in ('numpy', 'scipy'): + return False + except: + pass + + # Iterables that nest too deep are considered long. + if level >= len(self.maxcollection): + return True + + # It is too long if the length exceeds the limit, or any of its elements are long iterables. + if hasattr(obj, '__len__'): + try: + l = len(obj) + except: + l = None + if l is not None and l > self.maxcollection[level]: + return True + return any((self._is_long_iter(item, level + 1) for item in obj)) + return any(i > self.maxcollection[level] or self._is_long_iter(item, level + 1) for i, item in enumerate(obj)) + + except: + # If anything breaks, assume the worst case. + return True + + def _repr_iter(self, obj, level, prefix, suffix, comma_after_single_element = False): + yield prefix + + if level >= len(self.maxcollection): + yield '...' + else: + count = self.maxcollection[level] + yield_comma = False + for item in obj: + if yield_comma: + yield ', ' + yield_comma = True + + count -= 1 + if count <= 0: + yield '...' + break + + for p in self._repr(item, 100 if item is obj else level + 1): + yield p + else: + if comma_after_single_element and count == self.maxcollection[level] - 1: + yield ',' + yield suffix + + def _repr_long_iter(self, obj): + try: + obj_repr = '<%s, len() = %s>' % (type(obj).__name__, len(obj)) + except: + try: + obj_repr = '<' + type(obj).__name__ + '>' + except: + obj_repr = '' + yield obj_repr + + def _repr_dict(self, obj, level, prefix, suffix, item_prefix, item_sep, item_suffix): + if not obj: + yield prefix + suffix + return + if level >= len(self.maxcollection): + yield prefix + '...' + suffix + return + + yield prefix + + count = self.maxcollection[level] + yield_comma = False + + try: + sorted_keys = sorted(obj) + except Exception: + sorted_keys = list(obj) + + for key in sorted_keys: + if yield_comma: + yield ', ' + yield_comma = True + + count -= 1 + if count <= 0: + yield '...' + break + + yield item_prefix + for p in self._repr(key, level + 1): + yield p + + yield item_sep + + try: + item = obj[key] + except Exception: + yield '' + else: + for p in self._repr(item, 100 if item is obj else level + 1): + yield p + yield item_suffix + + yield suffix + + def _repr_str(self, obj, level): + return self._repr_obj(obj, level, self.maxstring_inner, self.maxstring_outer) + + def _repr_other(self, obj, level): + return self._repr_obj(obj, level, self.maxother_inner, self.maxother_outer) + + def _repr_obj(self, obj, level, limit_inner, limit_outer): + try: + obj_repr = repr(obj) + except: + try: + obj_repr = object.__repr__(obj) + except: + try: + obj_repr = '' + except: + obj_repr = '' + + limit = limit_inner if level > 0 else limit_outer + + if limit >= len(obj_repr): + yield obj_repr + return + + # Slightly imprecise calculations - we may end up with a string that is + # up to 3 characters longer than limit. If you need precise formatting, + # you are using the wrong class. + left_count, right_count = max(1, int(2 * limit / 3)), max(1, int(limit / 3)) + + yield obj_repr[:left_count] + yield '...' + yield obj_repr[-right_count:] + + + def _selftest(self): + # Test the string limiting somewhat automatically + tests = [] + tests.append((7, 9, 'A' * (5))) + tests.append((self.maxstring_outer + 3, self.maxstring_inner + 3 + 2, 'A' * (self.maxstring_outer + 10))) + if sys.version_info >= (3, 0): + tests.append((self.maxstring_outer + 4, self.maxstring_inner + 4 + 2, bytes('A', 'ascii') * (self.maxstring_outer + 10))) + else: + tests.append((self.maxstring_outer + 4, self.maxstring_inner + 4 + 2, unicode('A') * (self.maxstring_outer + 10))) + + for limit1, limit2, value in tests: + assert len(self(value)) <= limit1 <= len(repr(value)), (len(self(value)), limit1, len(repr(value)), value) + assert len(self([value])) <= limit2 <= len(repr([value])), (len(self([value])), limit2, len(repr([value])), self([value])) + + def test(source, expected): + actual = self(source) + if actual != expected: + print("Source " + repr(source)) + print("Expect " + expected) + print("Actual " + actual) + print("") + assert False + + def re_test(source, pattern): + import re + actual = self(source) + if not re.match(pattern, actual): + print("Source " + repr(source)) + print("Pattern " + pattern) + print("Actual " + actual) + print("") + assert False + + for ctype, _prefix, _suffix, comma in self.collection_types: + for i in range(len(self.maxcollection)): + prefix = _prefix * (i + 1) + if comma: + suffix = _suffix + ("," + _suffix) * i + else: + suffix = _suffix * (i + 1) + #print("ctype = " + ctype.__name__ + ", maxcollection[" + str(i) + "] == " + str(self.maxcollection[i])) + c1 = ctype(range(self.maxcollection[i] - 1)) + inner_repr = prefix + ', '.join(str(j) for j in c1) + c2 = ctype(range(self.maxcollection[i])) + c3 = ctype(range(self.maxcollection[i] + 1)) + for j in range(i): + c1, c2, c3 = ctype((c1,)), ctype((c2,)), ctype((c3,)) + test(c1, inner_repr + suffix) + test(c2, inner_repr + ", ..." + suffix) + test(c3, inner_repr + ", ..." + suffix) + + if ctype is set: + # Cannot recursively add sets to sets + break + + # Assume that all tests apply equally to all iterable types and only + # test with lists. + c1 = list(range(self.maxcollection[0] * 2)) + c2 = [c1 for _ in range(self.maxcollection[0] * 2)] + c1_expect = '[' + ', '.join(str(j) for j in range(self.maxcollection[0] - 1)) + ', ...]' + test(c1, c1_expect) + c1_expect2 = '[' + ', '.join(str(j) for j in range(self.maxcollection[1] - 1)) + ', ...]' + c2_expect = '[' + ', '.join(c1_expect2 for _ in range(self.maxcollection[0] - 1)) + ', ...]' + test(c2, c2_expect) + + # Ensure dict keys and values are limited correctly + d1 = {} + d1_key = 'a' * self.maxstring_inner * 2 + d1[d1_key] = d1_key + re_test(d1, "{'a+\.\.\.a+': 'a+\.\.\.a+'}") + d2 = {d1_key : d1} + re_test(d2, "{'a+\.\.\.a+': {'a+\.\.\.a+': 'a+\.\.\.a+'}}") + d3 = {d1_key : d2} + if len(self.maxcollection) == 2: + re_test(d3, "{'a+\.\.\.a+': {'a+\.\.\.a+': {\.\.\.}}}") + else: + re_test(d3, "{'a+\.\.\.a+': {'a+\.\.\.a+': {'a+\.\.\.a+': 'a+\.\.\.a+'}}}") + + # Ensure empty dicts work + test({}, '{}') + + # Ensure dict keys are sorted + d1 = {} + d1['c'] = None + d1['b'] = None + d1['a'] = None + test(d1, "{'a': None, 'b': None, 'c': None}") + + if sys.version_info >= (3, 0): + # Ensure dicts with unsortable keys do not crash + d1 = {} + for _ in range(100): + d1[object()] = None + try: + list(sorted(d1)) + assert False, "d1.keys() should be unorderable" + except TypeError: + pass + self(d1) + + # Test with objects with broken repr implementations + class TestClass(object): + def __repr__(self): + raise NameError + try: + repr(TestClass()) + assert False, "TestClass().__repr__ should have thrown" + except NameError: + pass + self(TestClass()) + + # Test with objects with long repr implementations + class TestClass(object): + repr_str = '<' + 'A' * self.maxother_outer * 2 + '>' + def __repr__(self): + return self.repr_str + re_test(TestClass(), r'\') + + # Test collections that don't override repr + class TestClass(dict): pass + test(TestClass(), '{}') + class TestClass(list): pass + test(TestClass(), '[]') + + # Test collections that override repr + class TestClass(dict): + def __repr__(self): return 'MyRepr' + test(TestClass(), 'MyRepr') + class TestClass(list): + def __init__(self, iter = ()): list.__init__(self, iter) + def __repr__(self): return 'MyRepr' + test(TestClass(), 'MyRepr') + + # Test collections and iterables with long repr + test(TestClass(xrange(0, 15)), 'MyRepr') + test(TestClass(xrange(0, 16)), '') + test(TestClass([TestClass(xrange(0, 10))]), 'MyRepr') + test(TestClass([TestClass(xrange(0, 11))]), '') + + # Test strings inside long iterables + test(TestClass(['a' * (self.maxcollection[1] + 1)]), 'MyRepr') + test(TestClass(['a' * (self.maxstring_inner + 1)]), '') + + # Test range + if sys.version[0] == '2': + range_name = 'xrange' + else: + range_name = 'range' + test(xrange(1, self.maxcollection[0] + 1), '%s(1, %s)' % (range_name, self.maxcollection[0] + 1)) + + # Test directly recursive collections + c1 = [1, 2] + c1.append(c1) + test(c1, '[1, 2, [...]]') + d1 = {1: None} + d1[2] = d1 + test(d1, '{1: None, 2: {...}}') + + # Find the largest possible repr and ensure it is below our arbitrary + # limit (8KB). + coll = '-' * (self.maxstring_outer * 2) + for limit in reversed(self.maxcollection[1:]): + coll = [coll] * (limit * 2) + dcoll = {} + for i in range(self.maxcollection[0]): + dcoll[str(i) * self.maxstring_outer] = coll + text = self(dcoll) + #try: + # text_repr = repr(dcoll) + #except MemoryError: + # print('Memory error raised while creating repr of test data') + # text_repr = '' + #print('len(SafeRepr()(dcoll)) = ' + str(len(text)) + ', len(repr(coll)) = ' + str(len(text_repr))) + assert len(text) < 8192 + + # Test numpy types - they should all use their native reprs, even arrays exceeding limits + try: + import numpy as np + except ImportError: + print('WARNING! could not import numpy - skipping all numpy tests.') + else: + test(np.int32(123), repr(np.int32(123))) + test(np.float64(123.456), repr(np.float64(123.456))) + test(np.zeros(self.maxcollection[0] + 1), repr(np.zeros(self.maxcollection[0] + 1))); diff --git a/ptvsd/wrapper.py b/ptvsd/wrapper.py index a2a9d63a..09b2f13e 100644 --- a/ptvsd/wrapper.py +++ b/ptvsd/wrapper.py @@ -22,6 +22,8 @@ except Exception: import urllib.parse as urllib import _pydevd_bundle.pydevd_comm as pydevd_comm +import _pydevd_bundle.pydevd_extension_api as pydevd_extapi +import _pydevd_bundle.pydevd_extension_utils as pydevd_extutil #from _pydevd_bundle.pydevd_comm import pydevd_log import ptvsd.ipcjson as ipcjson @@ -825,3 +827,21 @@ def start_client(host, port): # These are the functions pydevd invokes to get a socket to the client. pydevd_comm.start_server = start_server pydevd_comm.start_client = start_client + + +class SafeReprPresentationProvider(pydevd_extapi.StrPresentationProvider): + """Computes string representation of Python values by delegating them to SafeRepr.""" + + def __init__(self): + from ptvsd.safe_repr import SafeRepr + self.safe_repr = SafeRepr() + + def can_provide(self, type_object, type_name): + return True + + def get_str(self, val): + return self.safe_repr(val) + +# Register our presentation provider as the first item on the list, so that we're in full control of presentation. +str_handlers = pydevd_extutil.EXTENSION_MANAGER_INSTANCE.type_to_instance.setdefault(pydevd_extapi.StrPresentationProvider, []) +str_handlers.insert(0, SafeReprPresentationProvider())