mirror of
https://github.com/python/cpython.git
synced 2025-07-24 19:54:21 +00:00

Integer to and from text conversions via CPython's bignum `int` type is not safe against denial of service attacks due to malicious input. Very large input strings with hundred thousands of digits can consume several CPU seconds. This PR comes fresh from a pile of work done in our private PSRT security response team repo. Signed-off-by: Christian Heimes [Red Hat] <christian@python.org> Tons-of-polishing-up-by: Gregory P. Smith [Google] <greg@krypto.org> Reviews via the private PSRT repo via many others (see the NEWS entry in the PR). <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> I wrote up [a one pager for the release managers](https://docs.google.com/document/d/1KjuF_aXlzPUxTK4BMgezGJ2Pn7uevfX7g0_mvgHlL7Y/edit#). Much of that text wound up in the Issue. Backports PRs already exist. See the issue for links.
108 lines
4.4 KiB
Python
108 lines
4.4 KiB
Python
import decimal
|
|
from io import StringIO
|
|
from collections import OrderedDict
|
|
from test.test_json import PyTest, CTest
|
|
from test import support
|
|
|
|
|
|
class TestDecode:
|
|
def test_decimal(self):
|
|
rval = self.loads('1.1', parse_float=decimal.Decimal)
|
|
self.assertTrue(isinstance(rval, decimal.Decimal))
|
|
self.assertEqual(rval, decimal.Decimal('1.1'))
|
|
|
|
def test_float(self):
|
|
rval = self.loads('1', parse_int=float)
|
|
self.assertTrue(isinstance(rval, float))
|
|
self.assertEqual(rval, 1.0)
|
|
|
|
def test_empty_objects(self):
|
|
self.assertEqual(self.loads('{}'), {})
|
|
self.assertEqual(self.loads('[]'), [])
|
|
self.assertEqual(self.loads('""'), "")
|
|
|
|
def test_object_pairs_hook(self):
|
|
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
|
|
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
|
|
("qrt", 5), ("pad", 6), ("hoy", 7)]
|
|
self.assertEqual(self.loads(s), eval(s))
|
|
self.assertEqual(self.loads(s, object_pairs_hook=lambda x: x), p)
|
|
self.assertEqual(self.json.load(StringIO(s),
|
|
object_pairs_hook=lambda x: x), p)
|
|
od = self.loads(s, object_pairs_hook=OrderedDict)
|
|
self.assertEqual(od, OrderedDict(p))
|
|
self.assertEqual(type(od), OrderedDict)
|
|
# the object_pairs_hook takes priority over the object_hook
|
|
self.assertEqual(self.loads(s, object_pairs_hook=OrderedDict,
|
|
object_hook=lambda x: None),
|
|
OrderedDict(p))
|
|
# check that empty object literals work (see #17368)
|
|
self.assertEqual(self.loads('{}', object_pairs_hook=OrderedDict),
|
|
OrderedDict())
|
|
self.assertEqual(self.loads('{"empty": {}}',
|
|
object_pairs_hook=OrderedDict),
|
|
OrderedDict([('empty', OrderedDict())]))
|
|
|
|
def test_decoder_optimizations(self):
|
|
# Several optimizations were made that skip over calls to
|
|
# the whitespace regex, so this test is designed to try and
|
|
# exercise the uncommon cases. The array cases are already covered.
|
|
rval = self.loads('{ "key" : "value" , "k":"v" }')
|
|
self.assertEqual(rval, {"key":"value", "k":"v"})
|
|
|
|
def check_keys_reuse(self, source, loads):
|
|
rval = loads(source)
|
|
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
|
|
self.assertIs(a, c)
|
|
self.assertIs(b, d)
|
|
|
|
def test_keys_reuse(self):
|
|
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
|
|
self.check_keys_reuse(s, self.loads)
|
|
decoder = self.json.decoder.JSONDecoder()
|
|
self.check_keys_reuse(s, decoder.decode)
|
|
self.assertFalse(decoder.memo)
|
|
|
|
def test_extra_data(self):
|
|
s = '[1, 2, 3]5'
|
|
msg = 'Extra data'
|
|
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
|
|
|
|
def test_invalid_escape(self):
|
|
s = '["abc\\y"]'
|
|
msg = 'escape'
|
|
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
|
|
|
|
def test_invalid_input_type(self):
|
|
msg = 'the JSON object must be str'
|
|
for value in [1, 3.14, [], {}, None]:
|
|
self.assertRaisesRegex(TypeError, msg, self.loads, value)
|
|
|
|
def test_string_with_utf8_bom(self):
|
|
# see #18958
|
|
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
|
|
with self.assertRaises(self.JSONDecodeError) as cm:
|
|
self.loads(bom_json)
|
|
self.assertIn('BOM', str(cm.exception))
|
|
with self.assertRaises(self.JSONDecodeError) as cm:
|
|
self.json.load(StringIO(bom_json))
|
|
self.assertIn('BOM', str(cm.exception))
|
|
# make sure that the BOM is not detected in the middle of a string
|
|
bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8'))
|
|
self.assertEqual(self.loads(bom_in_str), '\ufeff')
|
|
self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff')
|
|
|
|
def test_negative_index(self):
|
|
d = self.json.JSONDecoder()
|
|
self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000)
|
|
|
|
def test_limit_int(self):
|
|
maxdigits = 5000
|
|
with support.adjust_int_max_str_digits(maxdigits):
|
|
self.loads('1' * maxdigits)
|
|
with self.assertRaises(ValueError):
|
|
self.loads('1' * (maxdigits + 1))
|
|
|
|
|
|
class TestPyDecode(TestDecode, PyTest): pass
|
|
class TestCDecode(TestDecode, CTest): pass
|