mirror of
https://github.com/python/cpython.git
synced 2025-11-25 04:34:37 +00:00
[3.14] gh-140793: Improve documentatation and tests for the ensure_ascii option in the json module (GH-140906) (GH-141227)
* Document that ensure_ascii=True forces escaping not only non-ASCII, but also
non-printable characters (the only affected ASCII character is U+007F).
* Ensure that the help output for the json module does not exceed 80
columns (except one long line in an example and generated lines).
* Add more tests.
(cherry picked from commit 7e90bac3cc)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
de00dde9de
commit
24619962e0
6 changed files with 89 additions and 43 deletions
|
|
@ -183,8 +183,10 @@ Basic Usage
|
|||
|
||||
:param bool ensure_ascii:
|
||||
If ``True`` (the default), the output is guaranteed to
|
||||
have all incoming non-ASCII characters escaped.
|
||||
If ``False``, these characters will be outputted as-is.
|
||||
have all incoming non-ASCII and non-printable characters escaped.
|
||||
If ``False``, all characters will be outputted as-is, except for
|
||||
the characters that must be escaped: quotation mark, reverse solidus,
|
||||
and the control characters U+0000 through U+001F.
|
||||
|
||||
:param bool check_circular:
|
||||
If ``False``, the circular reference check for container types is skipped
|
||||
|
|
@ -495,8 +497,10 @@ Encoders and Decoders
|
|||
:class:`bool` or ``None``. If *skipkeys* is true, such items are simply skipped.
|
||||
|
||||
If *ensure_ascii* is true (the default), the output is guaranteed to
|
||||
have all incoming non-ASCII characters escaped. If *ensure_ascii* is
|
||||
false, these characters will be output as-is.
|
||||
have all incoming non-ASCII and non-printable characters escaped.
|
||||
If *ensure_ascii* is false, all characters will be output as-is, except for
|
||||
the characters that must be escaped: quotation mark, reverse solidus,
|
||||
and the control characters U+0000 through U+001F.
|
||||
|
||||
If *check_circular* is true (the default), then lists, dicts, and custom
|
||||
encoded objects will be checked for circular references during encoding to
|
||||
|
|
@ -636,7 +640,7 @@ UTF-32, with UTF-8 being the recommended default for maximum interoperability.
|
|||
|
||||
As permitted, though not required, by the RFC, this module's serializer sets
|
||||
*ensure_ascii=True* by default, thus escaping the output so that the resulting
|
||||
strings only contain ASCII characters.
|
||||
strings only contain printable ASCII characters.
|
||||
|
||||
Other than the *ensure_ascii* parameter, this module is defined strictly in
|
||||
terms of conversion between Python objects and
|
||||
|
|
|
|||
|
|
@ -128,8 +128,9 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the strings written to ``fp`` can
|
||||
contain non-ASCII characters if they appear in strings contained in
|
||||
``obj``. Otherwise, all such characters are escaped in JSON strings.
|
||||
contain non-ASCII and non-printable characters if they appear in strings
|
||||
contained in ``obj``. Otherwise, all such characters are escaped in JSON
|
||||
strings.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
|
|
@ -145,10 +146,11 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
level of 0 will only insert newlines. ``None`` is the most compact
|
||||
representation.
|
||||
|
||||
If specified, ``separators`` should be an ``(item_separator, key_separator)``
|
||||
tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and
|
||||
``(',', ': ')`` otherwise. To get the most compact JSON representation,
|
||||
you should specify ``(',', ':')`` to eliminate whitespace.
|
||||
If specified, ``separators`` should be an ``(item_separator,
|
||||
key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
|
||||
``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON
|
||||
representation, you should specify ``(',', ':')`` to eliminate
|
||||
whitespace.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
|
@ -189,9 +191,10 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
|
||||
instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the return value can contain non-ASCII
|
||||
characters if they appear in strings contained in ``obj``. Otherwise, all
|
||||
such characters are escaped in JSON strings.
|
||||
If ``ensure_ascii`` is false, then the return value can contain
|
||||
non-ASCII and non-printable characters if they appear in strings
|
||||
contained in ``obj``. Otherwise, all such characters are escaped in
|
||||
JSON strings.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
|
|
@ -207,10 +210,11 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
level of 0 will only insert newlines. ``None`` is the most compact
|
||||
representation.
|
||||
|
||||
If specified, ``separators`` should be an ``(item_separator, key_separator)``
|
||||
tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and
|
||||
``(',', ': ')`` otherwise. To get the most compact JSON representation,
|
||||
you should specify ``(',', ':')`` to eliminate whitespace.
|
||||
If specified, ``separators`` should be an ``(item_separator,
|
||||
key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
|
||||
``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON
|
||||
representation, you should specify ``(',', ':')`` to eliminate
|
||||
whitespace.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
|
@ -281,11 +285,12 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
|
|||
``object_hook`` will be used instead of the ``dict``. This feature
|
||||
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook`` is an optional function that will be called with the
|
||||
result of any object literal decoded with an ordered list of pairs. The
|
||||
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders. If ``object_hook``
|
||||
is also defined, the ``object_pairs_hook`` takes priority.
|
||||
``object_pairs_hook`` is an optional function that will be called with
|
||||
the result of any object literal decoded with an ordered list of pairs.
|
||||
The return value of ``object_pairs_hook`` will be used instead of the
|
||||
``dict``. This feature can be used to implement custom decoders. If
|
||||
``object_hook`` is also defined, the ``object_pairs_hook`` takes
|
||||
priority.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg; otherwise ``JSONDecoder`` is used.
|
||||
|
|
@ -306,11 +311,12 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
|
|||
``object_hook`` will be used instead of the ``dict``. This feature
|
||||
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook`` is an optional function that will be called with the
|
||||
result of any object literal decoded with an ordered list of pairs. The
|
||||
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders. If ``object_hook``
|
||||
is also defined, the ``object_pairs_hook`` takes priority.
|
||||
``object_pairs_hook`` is an optional function that will be called with
|
||||
the result of any object literal decoded with an ordered list of pairs.
|
||||
The return value of ``object_pairs_hook`` will be used instead of the
|
||||
``dict``. This feature can be used to implement custom decoders. If
|
||||
``object_hook`` is also defined, the ``object_pairs_hook`` takes
|
||||
priority.
|
||||
|
||||
``parse_float``, if specified, will be called with the string
|
||||
of every JSON float to be decoded. By default this is equivalent to
|
||||
|
|
|
|||
|
|
@ -297,10 +297,10 @@ class JSONDecoder(object):
|
|||
place of the given ``dict``. This can be used to provide custom
|
||||
deserializations (e.g. to support JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook``, if specified will be called with the result of
|
||||
every JSON object decoded with an ordered list of pairs. The return
|
||||
value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders.
|
||||
``object_pairs_hook``, if specified will be called with the result
|
||||
of every JSON object decoded with an ordered list of pairs. The
|
||||
return value of ``object_pairs_hook`` will be used instead of the
|
||||
``dict``. This feature can be used to implement custom decoders.
|
||||
If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
|
||||
priority.
|
||||
|
||||
|
|
|
|||
|
|
@ -111,9 +111,10 @@ class JSONEncoder(object):
|
|||
encoding of keys that are not str, int, float, bool or None.
|
||||
If skipkeys is True, such items are simply skipped.
|
||||
|
||||
If ensure_ascii is true, the output is guaranteed to be str
|
||||
objects with all incoming non-ASCII characters escaped. If
|
||||
ensure_ascii is false, the output can contain non-ASCII characters.
|
||||
If ensure_ascii is true, the output is guaranteed to be str objects
|
||||
with all incoming non-ASCII and non-printable characters escaped.
|
||||
If ensure_ascii is false, the output can contain non-ASCII and
|
||||
non-printable characters.
|
||||
|
||||
If check_circular is true, then lists, dicts, and custom encoded
|
||||
objects will be checked for circular references during encoding to
|
||||
|
|
@ -134,14 +135,15 @@ class JSONEncoder(object):
|
|||
indent level. An indent level of 0 will only insert newlines.
|
||||
None is the most compact representation.
|
||||
|
||||
If specified, separators should be an (item_separator, key_separator)
|
||||
tuple. The default is (', ', ': ') if *indent* is ``None`` and
|
||||
(',', ': ') otherwise. To get the most compact JSON representation,
|
||||
you should specify (',', ':') to eliminate whitespace.
|
||||
If specified, separators should be an (item_separator,
|
||||
key_separator) tuple. The default is (', ', ': ') if *indent* is
|
||||
``None`` and (',', ': ') otherwise. To get the most compact JSON
|
||||
representation, you should specify (',', ':') to eliminate
|
||||
whitespace.
|
||||
|
||||
If specified, default is a function that gets called for objects
|
||||
that can't otherwise be serialized. It should return a JSON encodable
|
||||
version of the object or raise a ``TypeError``.
|
||||
that can't otherwise be serialized. It should return a JSON
|
||||
encodable version of the object or raise a ``TypeError``.
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -8,13 +8,12 @@ CASES = [
|
|||
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
|
||||
('controls', '"controls"'),
|
||||
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
|
||||
('\x00\x1f\x7f', '"\\u0000\\u001f\\u007f"'),
|
||||
('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
|
||||
(' s p a c e d ', '" s p a c e d "'),
|
||||
('\U0001d120', '"\\ud834\\udd20"'),
|
||||
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
|
||||
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
|
||||
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
|
||||
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
|
||||
]
|
||||
|
||||
class TestEncodeBasestringAscii:
|
||||
|
|
|
|||
|
|
@ -32,6 +32,29 @@ class TestUnicode:
|
|||
j = self.dumps(u + "\n", ensure_ascii=False)
|
||||
self.assertEqual(j, f'"{u}\\n"')
|
||||
|
||||
def test_ascii_non_printable_encode(self):
|
||||
u = '\b\t\n\f\r\x00\x1f\x7f'
|
||||
self.assertEqual(self.dumps(u),
|
||||
'"\\b\\t\\n\\f\\r\\u0000\\u001f\\u007f"')
|
||||
self.assertEqual(self.dumps(u, ensure_ascii=False),
|
||||
'"\\b\\t\\n\\f\\r\\u0000\\u001f\x7f"')
|
||||
|
||||
def test_ascii_non_printable_decode(self):
|
||||
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'),
|
||||
'\b\t\n\f\r')
|
||||
s = ''.join(map(chr, range(32)))
|
||||
for c in s:
|
||||
self.assertRaises(self.JSONDecodeError, self.loads, f'"{c}"')
|
||||
self.assertEqual(self.loads(f'"{s}"', strict=False), s)
|
||||
self.assertEqual(self.loads('"\x7f"'), '\x7f')
|
||||
|
||||
def test_escaped_decode(self):
|
||||
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'), '\b\t\n\f\r')
|
||||
self.assertEqual(self.loads('"\\"\\\\\\/"'), '"\\/')
|
||||
for c in set(map(chr, range(0x100))) - set('"\\/bfnrt'):
|
||||
self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"')
|
||||
self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"', strict=False)
|
||||
|
||||
def test_big_unicode_encode(self):
|
||||
u = '\U0001d120'
|
||||
self.assertEqual(self.dumps(u), '"\\ud834\\udd20"')
|
||||
|
|
@ -48,6 +71,18 @@ class TestUnicode:
|
|||
s = f'"\\u{i:04x}"'
|
||||
self.assertEqual(self.loads(s), u)
|
||||
|
||||
def test_single_surrogate_encode(self):
|
||||
self.assertEqual(self.dumps('\uD83D'), '"\\ud83d"')
|
||||
self.assertEqual(self.dumps('\uD83D', ensure_ascii=False), '"\ud83d"')
|
||||
self.assertEqual(self.dumps('\uDC0D'), '"\\udc0d"')
|
||||
self.assertEqual(self.dumps('\uDC0D', ensure_ascii=False), '"\udc0d"')
|
||||
|
||||
def test_single_surrogate_decode(self):
|
||||
self.assertEqual(self.loads('"\uD83D"'), '\ud83d')
|
||||
self.assertEqual(self.loads('"\\uD83D"'), '\ud83d')
|
||||
self.assertEqual(self.loads('"\udc0d"'), '\udc0d')
|
||||
self.assertEqual(self.loads('"\\udc0d"'), '\udc0d')
|
||||
|
||||
def test_unicode_preservation(self):
|
||||
self.assertEqual(type(self.loads('""')), str)
|
||||
self.assertEqual(type(self.loads('"a"')), str)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue