Issue #17909: Accept binary input in json.loads

json.loads (and hence json.load) now support binary input
encoded as UTF-8, UTF-16 or UTF-32.

Patch by Serhiy Storchaka.
This commit is contained in:
Nick Coghlan 2016-09-10 20:16:18 +10:00
parent 457fc9a69e
commit b161562f72
6 changed files with 70 additions and 16 deletions

View file

@ -1,3 +1,4 @@
import codecs
from collections import OrderedDict
from test.test_json import PyTest, CTest
@ -52,9 +53,18 @@ class TestUnicode:
self.assertRaises(TypeError, self.dumps, [b"hi"])
def test_bytes_decode(self):
self.assertRaises(TypeError, self.loads, b'"hi"')
self.assertRaises(TypeError, self.loads, b'["hi"]')
for encoding, bom in [
('utf-8', codecs.BOM_UTF8),
('utf-16be', codecs.BOM_UTF16_BE),
('utf-16le', codecs.BOM_UTF16_LE),
('utf-32be', codecs.BOM_UTF32_BE),
('utf-32le', codecs.BOM_UTF32_LE),
]:
data = ["a\xb5\u20ac\U0001d120"]
encoded = self.dumps(data).encode(encoding)
self.assertEqual(self.loads(bom + encoded), data)
self.assertEqual(self.loads(encoded), data)
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'