mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
Issue #17909: Accept binary input in json.loads
json.loads (and hence json.load) now support binary input encoded as UTF-8, UTF-16 or UTF-32. Patch by Serhiy Storchaka.
This commit is contained in:
parent
457fc9a69e
commit
b161562f72
6 changed files with 70 additions and 16 deletions
|
@ -268,8 +268,9 @@ Basic Usage
|
||||||
|
|
||||||
.. function:: loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
|
.. function:: loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
|
||||||
|
|
||||||
Deserialize *s* (a :class:`str` instance containing a JSON document) to a
|
Deserialize *s* (a :class:`str`, :class:`bytes` or :class:`bytearray`
|
||||||
Python object using this :ref:`conversion table <json-to-py-table>`.
|
instance containing a JSON document) to a Python object using this
|
||||||
|
:ref:`conversion table <json-to-py-table>`.
|
||||||
|
|
||||||
The other arguments have the same meaning as in :func:`load`, except
|
The other arguments have the same meaning as in :func:`load`, except
|
||||||
*encoding* which is ignored and deprecated.
|
*encoding* which is ignored and deprecated.
|
||||||
|
|
|
@ -680,6 +680,14 @@ restriction that :class:`importlib.machinery.BuiltinImporter` and
|
||||||
:term:`path-like object`.
|
:term:`path-like object`.
|
||||||
|
|
||||||
|
|
||||||
|
json
|
||||||
|
----
|
||||||
|
|
||||||
|
:func:`json.load` and :func:`json.loads` now support binary input. Encoded
|
||||||
|
JSON should be represented using either UTF-8, UTF-16, or UTF-32.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`17909`.)
|
||||||
|
|
||||||
|
|
||||||
os
|
os
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
|
@ -105,6 +105,7 @@ __author__ = 'Bob Ippolito <bob@redivi.com>'
|
||||||
|
|
||||||
from .decoder import JSONDecoder, JSONDecodeError
|
from .decoder import JSONDecoder, JSONDecodeError
|
||||||
from .encoder import JSONEncoder
|
from .encoder import JSONEncoder
|
||||||
|
import codecs
|
||||||
|
|
||||||
_default_encoder = JSONEncoder(
|
_default_encoder = JSONEncoder(
|
||||||
skipkeys=False,
|
skipkeys=False,
|
||||||
|
@ -240,6 +241,35 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||||
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
|
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_encoding(b):
|
||||||
|
bstartswith = b.startswith
|
||||||
|
if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
|
||||||
|
return 'utf-32'
|
||||||
|
if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
|
||||||
|
return 'utf-16'
|
||||||
|
if bstartswith(codecs.BOM_UTF8):
|
||||||
|
return 'utf-8-sig'
|
||||||
|
|
||||||
|
if len(b) >= 4:
|
||||||
|
if not b[0]:
|
||||||
|
# 00 00 -- -- - utf-32-be
|
||||||
|
# 00 XX -- -- - utf-16-be
|
||||||
|
return 'utf-16-be' if b[1] else 'utf-32-be'
|
||||||
|
if not b[1]:
|
||||||
|
# XX 00 00 00 - utf-32-le
|
||||||
|
# XX 00 XX XX - utf-16-le
|
||||||
|
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
|
||||||
|
elif len(b) == 2:
|
||||||
|
if not b[0]:
|
||||||
|
# 00 XX - utf-16-be
|
||||||
|
return 'utf-16-be'
|
||||||
|
if not b[1]:
|
||||||
|
# XX 00 - utf-16-le
|
||||||
|
return 'utf-16-le'
|
||||||
|
# default
|
||||||
|
return 'utf-8'
|
||||||
|
|
||||||
|
|
||||||
def load(fp, *, cls=None, object_hook=None, parse_float=None,
|
def load(fp, *, cls=None, object_hook=None, parse_float=None,
|
||||||
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
||||||
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
|
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
|
||||||
|
@ -270,8 +300,8 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
|
||||||
|
|
||||||
def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
|
def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||||
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
||||||
"""Deserialize ``s`` (a ``str`` instance containing a JSON
|
"""Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
|
||||||
document) to a Python object.
|
containing a JSON document) to a Python object.
|
||||||
|
|
||||||
``object_hook`` is an optional function that will be called with the
|
``object_hook`` is an optional function that will be called with the
|
||||||
result of any object literal decode (a ``dict``). The return value of
|
result of any object literal decode (a ``dict``). The return value of
|
||||||
|
@ -307,12 +337,16 @@ def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||||
The ``encoding`` argument is ignored and deprecated.
|
The ``encoding`` argument is ignored and deprecated.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not isinstance(s, str):
|
if isinstance(s, str):
|
||||||
raise TypeError('the JSON object must be str, not {!r}'.format(
|
if s.startswith('\ufeff'):
|
||||||
s.__class__.__name__))
|
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
|
||||||
if s.startswith(u'\ufeff'):
|
s, 0)
|
||||||
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
|
else:
|
||||||
s, 0)
|
if not isinstance(s, (bytes, bytearray)):
|
||||||
|
raise TypeError('the JSON object must be str, bytes or bytearray, '
|
||||||
|
'not {!r}'.format(s.__class__.__name__))
|
||||||
|
s = s.decode(detect_encoding(s), 'surrogatepass')
|
||||||
|
|
||||||
if (cls is None and object_hook is None and
|
if (cls is None and object_hook is None and
|
||||||
parse_int is None and parse_float is None and
|
parse_int is None and parse_float is None and
|
||||||
parse_constant is None and object_pairs_hook is None and not kw):
|
parse_constant is None and object_pairs_hook is None and not kw):
|
||||||
|
|
|
@ -72,10 +72,8 @@ class TestDecode:
|
||||||
|
|
||||||
def test_invalid_input_type(self):
|
def test_invalid_input_type(self):
|
||||||
msg = 'the JSON object must be str'
|
msg = 'the JSON object must be str'
|
||||||
for value in [1, 3.14, b'bytes', b'\xff\x00', [], {}, None]:
|
for value in [1, 3.14, [], {}, None]:
|
||||||
self.assertRaisesRegex(TypeError, msg, self.loads, value)
|
self.assertRaisesRegex(TypeError, msg, self.loads, value)
|
||||||
with self.assertRaisesRegex(TypeError, msg):
|
|
||||||
self.json.load(BytesIO(b'[1,2,3]'))
|
|
||||||
|
|
||||||
def test_string_with_utf8_bom(self):
|
def test_string_with_utf8_bom(self):
|
||||||
# see #18958
|
# see #18958
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import codecs
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from test.test_json import PyTest, CTest
|
from test.test_json import PyTest, CTest
|
||||||
|
|
||||||
|
@ -52,9 +53,18 @@ class TestUnicode:
|
||||||
self.assertRaises(TypeError, self.dumps, [b"hi"])
|
self.assertRaises(TypeError, self.dumps, [b"hi"])
|
||||||
|
|
||||||
def test_bytes_decode(self):
|
def test_bytes_decode(self):
|
||||||
self.assertRaises(TypeError, self.loads, b'"hi"')
|
for encoding, bom in [
|
||||||
self.assertRaises(TypeError, self.loads, b'["hi"]')
|
('utf-8', codecs.BOM_UTF8),
|
||||||
|
('utf-16be', codecs.BOM_UTF16_BE),
|
||||||
|
('utf-16le', codecs.BOM_UTF16_LE),
|
||||||
|
('utf-32be', codecs.BOM_UTF32_BE),
|
||||||
|
('utf-32le', codecs.BOM_UTF32_LE),
|
||||||
|
]:
|
||||||
|
data = ["a\xb5\u20ac\U0001d120"]
|
||||||
|
encoded = self.dumps(data).encode(encoding)
|
||||||
|
self.assertEqual(self.loads(bom + encoded), data)
|
||||||
|
self.assertEqual(self.loads(encoded), data)
|
||||||
|
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
|
||||||
|
|
||||||
def test_object_pairs_hook_with_unicode(self):
|
def test_object_pairs_hook_with_unicode(self):
|
||||||
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
|
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
|
||||||
|
|
|
@ -135,6 +135,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #17909: ``json.load`` and ``json.loads`` now support binary input
|
||||||
|
encoded as UTF-8, UTF-16 or UTF-32. Patch by Serhiy Storchaka.
|
||||||
|
|
||||||
- Issue #27137: the pure Python fallback implementation of ``functools.partial``
|
- Issue #27137: the pure Python fallback implementation of ``functools.partial``
|
||||||
now matches the behaviour of its accelerated C counterpart for subclassing,
|
now matches the behaviour of its accelerated C counterpart for subclassing,
|
||||||
pickling and text representation purposes. Patch by Emanuel Barry and
|
pickling and text representation purposes. Patch by Emanuel Barry and
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue