mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 10:26:02 +00:00 
			
		
		
		
	 e4ca8156ef
			
		
	
	
		e4ca8156ef
		
	
	
	
	
		
			
			svn+ssh://pythondev@svn.python.org/python/trunk ........ r62805 | christian.heimes | 2008-05-07 01:59:53 +0200 (Wed, 07 May 2008) | 1 line Re-added getbuildinfo.c solution item ........ r62811 | benjamin.peterson | 2008-05-07 04:23:43 +0200 (Wed, 07 May 2008) | 2 lines update .bzrignore ........ r62841 | christian.heimes | 2008-05-08 00:54:17 +0200 (Thu, 08 May 2008) | 1 line Replace more float hacks with correct math functions ........ r62842 | benjamin.peterson | 2008-05-08 01:11:54 +0200 (Thu, 08 May 2008) | 2 lines Practice EAFP, and revert 62787 ........ r62848 | raymond.hettinger | 2008-05-08 06:35:20 +0200 (Thu, 08 May 2008) | 1 line Frozensets do not benefit from autoconversion. ........ r62849 | raymond.hettinger | 2008-05-08 06:36:12 +0200 (Thu, 08 May 2008) | 1 line The __all__ variable forgot to expose the gcd() function. ........ r62853 | raymond.hettinger | 2008-05-08 09:23:30 +0200 (Thu, 08 May 2008) | 1 line Fix-up the enumerate type example and move it to the end. ........ r62854 | ronald.oussoren | 2008-05-08 12:34:39 +0200 (Thu, 08 May 2008) | 3 lines Fix for issue 1770190: platform.mac_ver() now returns the right version on OSX 10.4.10 ........
		
			
				
	
	
		
			384 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			384 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Implementation of JSONEncoder
 | |
| """
 | |
| 
 | |
| import re
 | |
| import math
 | |
| 
 | |
| try:
 | |
|     from _json import encode_basestring_ascii as c_encode_basestring_ascii
 | |
| except ImportError:
 | |
|     c_encode_basestring_ascii = None
 | |
| 
 | |
| __all__ = ['JSONEncoder']
 | |
| 
 | |
| ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 | |
| ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 | |
| HAS_UTF8 = re.compile(r'[\x80-\xff]')
 | |
| ESCAPE_DCT = {
 | |
|     '\\': '\\\\',
 | |
|     '"': '\\"',
 | |
|     '\b': '\\b',
 | |
|     '\f': '\\f',
 | |
|     '\n': '\\n',
 | |
|     '\r': '\\r',
 | |
|     '\t': '\\t',
 | |
| }
 | |
| for i in range(0x20):
 | |
|     ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
 | |
| 
 | |
| FLOAT_REPR = repr
 | |
| 
 | |
| def floatstr(o, allow_nan=True):
 | |
|     # Check for specials.  Note that this type of test is processor- and/or
 | |
|     # platform-specific, so do tests which don't depend on the internals.
 | |
| 
 | |
|     if math.isnan(o):
 | |
|         text = 'NaN'
 | |
|     elif math.isinf(o):
 | |
|         if math.copysign(1., o) == 1.:
 | |
|             text = 'Infinity'
 | |
|         else:
 | |
|             text = '-Infinity'
 | |
|     else:
 | |
|         return FLOAT_REPR(o)
 | |
| 
 | |
|     if not allow_nan:
 | |
|         msg = "Out of range float values are not JSON compliant: " + repr(o)
 | |
|         raise ValueError(msg)
 | |
| 
 | |
|     return text
 | |
| 
 | |
| 
 | |
| def encode_basestring(s):
 | |
|     """Return a JSON representation of a Python string
 | |
| 
 | |
|     """
 | |
|     def replace(match):
 | |
|         return ESCAPE_DCT[match.group(0)]
 | |
|     return '"' + ESCAPE.sub(replace, s) + '"'
 | |
| 
 | |
| 
 | |
| def py_encode_basestring_ascii(s):
 | |
|     if isinstance(s, bytes): # and HAS_UTF8.search(s) is not None:
 | |
|         s = s.decode('utf-8')
 | |
|     def replace(match):
 | |
|         s = match.group(0)
 | |
|         try:
 | |
|             return ESCAPE_DCT[s]
 | |
|         except KeyError:
 | |
|             n = ord(s)
 | |
|             if n < 0x10000:
 | |
|                 return '\\u{0:04x}'.format(n)
 | |
|             else:
 | |
|                 # surrogate pair
 | |
|                 n -= 0x10000
 | |
|                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
 | |
|                 s2 = 0xdc00 | (n & 0x3ff)
 | |
|                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
 | |
|     return '"' + (ESCAPE_ASCII.sub(replace, s)) + '"'
 | |
| 
 | |
| 
 | |
| if c_encode_basestring_ascii is not None:
 | |
|     encode_basestring_ascii = c_encode_basestring_ascii
 | |
| else:
 | |
|     encode_basestring_ascii = py_encode_basestring_ascii
 | |
| 
 | |
| 
 | |
| class JSONEncoder(object):
 | |
|     """Extensible JSON <http://json.org> encoder for Python data structures.
 | |
| 
 | |
|     Supports the following objects and types by default:
 | |
| 
 | |
|     +-------------------+---------------+
 | |
|     | Python            | JSON          |
 | |
|     +===================+===============+
 | |
|     | dict              | object        |
 | |
|     +-------------------+---------------+
 | |
|     | list, tuple       | array         |
 | |
|     +-------------------+---------------+
 | |
|     | str, unicode      | string        |
 | |
|     +-------------------+---------------+
 | |
|     | int, long, float  | number        |
 | |
|     +-------------------+---------------+
 | |
|     | True              | true          |
 | |
|     +-------------------+---------------+
 | |
|     | False             | false         |
 | |
|     +-------------------+---------------+
 | |
|     | None              | null          |
 | |
|     +-------------------+---------------+
 | |
| 
 | |
|     To extend this to recognize other objects, subclass and implement a
 | |
|     ``.default()`` method with another method that returns a serializable
 | |
|     object for ``o`` if possible, otherwise it should call the superclass
 | |
|     implementation (to raise ``TypeError``).
 | |
| 
 | |
|     """
 | |
|     __all__ = ['__init__', 'default', 'encode', 'iterencode']
 | |
|     item_separator = ', '
 | |
|     key_separator = ': '
 | |
|     def __init__(self, skipkeys=False, ensure_ascii=True,
 | |
|             check_circular=True, allow_nan=True, sort_keys=False,
 | |
|             indent=None, separators=None, encoding='utf-8', default=None):
 | |
|         """Constructor for JSONEncoder, with sensible defaults.
 | |
| 
 | |
|         If skipkeys is False, then it is a TypeError to attempt
 | |
|         encoding of keys that are not str, int, long, float or None.  If
 | |
|         skipkeys is True, such items are simply skipped.
 | |
| 
 | |
|         If ensure_ascii is True, the output is guaranteed to be str
 | |
|         objects with all incoming unicode characters escaped.  If
 | |
|         ensure_ascii is false, the output will be unicode object.
 | |
| 
 | |
|         If check_circular is True, then lists, dicts, and custom encoded
 | |
|         objects will be checked for circular references during encoding to
 | |
|         prevent an infinite recursion (which would cause an OverflowError).
 | |
|         Otherwise, no such check takes place.
 | |
| 
 | |
|         If allow_nan is True, then NaN, Infinity, and -Infinity will be
 | |
|         encoded as such.  This behavior is not JSON specification compliant,
 | |
|         but is consistent with most JavaScript based encoders and decoders.
 | |
|         Otherwise, it will be a ValueError to encode such floats.
 | |
| 
 | |
|         If sort_keys is True, then the output of dictionaries will be
 | |
|         sorted by key; this is useful for regression tests to ensure
 | |
|         that JSON serializations can be compared on a day-to-day basis.
 | |
| 
 | |
|         If indent is a non-negative integer, then JSON array
 | |
|         elements and object members will be pretty-printed with that
 | |
|         indent level.  An indent level of 0 will only insert newlines.
 | |
|         None is the most compact representation.
 | |
| 
 | |
|         If specified, separators should be a (item_separator, key_separator)
 | |
|         tuple.  The default is (', ', ': ').  To get the most compact JSON
 | |
|         representation you should specify (',', ':') to eliminate whitespace.
 | |
| 
 | |
|         If specified, default is a function that gets called for objects
 | |
|         that can't otherwise be serialized.  It should return a JSON encodable
 | |
|         version of the object or raise a ``TypeError``.
 | |
| 
 | |
|         If encoding is not None, then all input strings will be
 | |
|         transformed into unicode using that encoding prior to JSON-encoding.
 | |
|         The default is UTF-8.
 | |
| 
 | |
|         """
 | |
|         self.skipkeys = skipkeys
 | |
|         self.ensure_ascii = ensure_ascii
 | |
|         self.check_circular = check_circular
 | |
|         self.allow_nan = allow_nan
 | |
|         self.sort_keys = sort_keys
 | |
|         self.indent = indent
 | |
|         self.current_indent_level = 0
 | |
|         if separators is not None:
 | |
|             self.item_separator, self.key_separator = separators
 | |
|         if default is not None:
 | |
|             self.default = default
 | |
|         self.encoding = encoding
 | |
| 
 | |
|     def _newline_indent(self):
 | |
|         return '\n' + (' ' * (self.indent * self.current_indent_level))
 | |
| 
 | |
|     def _iterencode_list(self, lst, markers=None):
 | |
|         if not lst:
 | |
|             yield '[]'
 | |
|             return
 | |
|         if markers is not None:
 | |
|             markerid = id(lst)
 | |
|             if markerid in markers:
 | |
|                 raise ValueError("Circular reference detected")
 | |
|             markers[markerid] = lst
 | |
|         yield '['
 | |
|         if self.indent is not None:
 | |
|             self.current_indent_level += 1
 | |
|             newline_indent = self._newline_indent()
 | |
|             separator = self.item_separator + newline_indent
 | |
|             yield newline_indent
 | |
|         else:
 | |
|             newline_indent = None
 | |
|             separator = self.item_separator
 | |
|         first = True
 | |
|         for value in lst:
 | |
|             if first:
 | |
|                 first = False
 | |
|             else:
 | |
|                 yield separator
 | |
|             for chunk in self._iterencode(value, markers):
 | |
|                 yield chunk
 | |
|         if newline_indent is not None:
 | |
|             self.current_indent_level -= 1
 | |
|             yield self._newline_indent()
 | |
|         yield ']'
 | |
|         if markers is not None:
 | |
|             del markers[markerid]
 | |
| 
 | |
|     def _iterencode_dict(self, dct, markers=None):
 | |
|         if not dct:
 | |
|             yield '{}'
 | |
|             return
 | |
|         if markers is not None:
 | |
|             markerid = id(dct)
 | |
|             if markerid in markers:
 | |
|                 raise ValueError("Circular reference detected")
 | |
|             markers[markerid] = dct
 | |
|         yield '{'
 | |
|         key_separator = self.key_separator
 | |
|         if self.indent is not None:
 | |
|             self.current_indent_level += 1
 | |
|             newline_indent = self._newline_indent()
 | |
|             item_separator = self.item_separator + newline_indent
 | |
|             yield newline_indent
 | |
|         else:
 | |
|             newline_indent = None
 | |
|             item_separator = self.item_separator
 | |
|         first = True
 | |
|         if self.ensure_ascii:
 | |
|             encoder = encode_basestring_ascii
 | |
|         else:
 | |
|             encoder = encode_basestring
 | |
|         allow_nan = self.allow_nan
 | |
|         if self.sort_keys:
 | |
|             keys = list(dct.keys())
 | |
|             keys.sort()
 | |
|             items = [(k, dct[k]) for k in keys]
 | |
|         else:
 | |
|             items = iter(dct.items())
 | |
|         _encoding = self.encoding
 | |
|         _do_decode = (_encoding is not None
 | |
|             and not (_encoding == 'utf-8'))
 | |
|         for key, value in items:
 | |
|             if isinstance(key, str):
 | |
|                 if _do_decode:
 | |
|                     key = key.decode(_encoding)
 | |
|             elif isinstance(key, str):
 | |
|                 pass
 | |
|             # JavaScript is weakly typed for these, so it makes sense to
 | |
|             # also allow them.  Many encoders seem to do something like this.
 | |
|             elif isinstance(key, float):
 | |
|                 key = floatstr(key, allow_nan)
 | |
|             elif isinstance(key, (int, int)):
 | |
|                 key = str(key)
 | |
|             elif key is True:
 | |
|                 key = 'true'
 | |
|             elif key is False:
 | |
|                 key = 'false'
 | |
|             elif key is None:
 | |
|                 key = 'null'
 | |
|             elif self.skipkeys:
 | |
|                 continue
 | |
|             else:
 | |
|                 raise TypeError("key {0!r} is not a string".format(key))
 | |
|             if first:
 | |
|                 first = False
 | |
|             else:
 | |
|                 yield item_separator
 | |
|             yield encoder(key)
 | |
|             yield key_separator
 | |
|             for chunk in self._iterencode(value, markers):
 | |
|                 yield chunk
 | |
|         if newline_indent is not None:
 | |
|             self.current_indent_level -= 1
 | |
|             yield self._newline_indent()
 | |
|         yield '}'
 | |
|         if markers is not None:
 | |
|             del markers[markerid]
 | |
| 
 | |
|     def _iterencode(self, o, markers=None):
 | |
|         if isinstance(o, str):
 | |
|             if self.ensure_ascii:
 | |
|                 encoder = encode_basestring_ascii
 | |
|             else:
 | |
|                 encoder = encode_basestring
 | |
|             _encoding = self.encoding
 | |
|             if (_encoding is not None and isinstance(o, str)
 | |
|                     and not (_encoding == 'utf-8')):
 | |
|                 o = o.decode(_encoding)
 | |
|             yield encoder(o)
 | |
|         elif o is None:
 | |
|             yield 'null'
 | |
|         elif o is True:
 | |
|             yield 'true'
 | |
|         elif o is False:
 | |
|             yield 'false'
 | |
|         elif isinstance(o, (int, int)):
 | |
|             yield str(o)
 | |
|         elif isinstance(o, float):
 | |
|             yield floatstr(o, self.allow_nan)
 | |
|         elif isinstance(o, (list, tuple)):
 | |
|             for chunk in self._iterencode_list(o, markers):
 | |
|                 yield chunk
 | |
|         elif isinstance(o, dict):
 | |
|             for chunk in self._iterencode_dict(o, markers):
 | |
|                 yield chunk
 | |
|         else:
 | |
|             if markers is not None:
 | |
|                 markerid = id(o)
 | |
|                 if markerid in markers:
 | |
|                     raise ValueError("Circular reference detected")
 | |
|                 markers[markerid] = o
 | |
|             for chunk in self._iterencode_default(o, markers):
 | |
|                 yield chunk
 | |
|             if markers is not None:
 | |
|                 del markers[markerid]
 | |
| 
 | |
|     def _iterencode_default(self, o, markers=None):
 | |
|         newobj = self.default(o)
 | |
|         return self._iterencode(newobj, markers)
 | |
| 
 | |
|     def default(self, o):
 | |
|         """Implement this method in a subclass such that it returns a serializable
 | |
|         object for ``o``, or calls the base implementation (to raise a
 | |
|         ``TypeError``).
 | |
| 
 | |
|         For example, to support arbitrary iterators, you could implement
 | |
|         default like this::
 | |
| 
 | |
|             def default(self, o):
 | |
|                 try:
 | |
|                     iterable = iter(o)
 | |
|                 except TypeError:
 | |
|                     pass
 | |
|                 else:
 | |
|                     return list(iterable)
 | |
|                 return JSONEncoder.default(self, o)
 | |
| 
 | |
|         """
 | |
|         raise TypeError(repr(o) + " is not JSON serializable")
 | |
| 
 | |
|     def encode(self, o):
 | |
|         """Return a JSON string representation of a Python data structure.
 | |
| 
 | |
|         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 | |
|         '{"foo": ["bar", "baz"]}'
 | |
| 
 | |
|         """
 | |
|         # This is for extremely simple cases and benchmarks.
 | |
|         if isinstance(o, (str, bytes)):
 | |
|             if isinstance(o, bytes):
 | |
|                 _encoding = self.encoding
 | |
|                 if (_encoding is not None
 | |
|                         and not (_encoding == 'utf-8')):
 | |
|                     o = o.decode(_encoding)
 | |
|             if self.ensure_ascii:
 | |
|                 return encode_basestring_ascii(o)
 | |
|             else:
 | |
|                 return encode_basestring(o)
 | |
|         # This doesn't pass the iterator directly to ''.join() because the
 | |
|         # exceptions aren't as detailed.  The list call should be roughly
 | |
|         # equivalent to the PySequence_Fast that ''.join() would do.
 | |
|         chunks = list(self.iterencode(o))
 | |
|         return ''.join(chunks)
 | |
| 
 | |
|     def iterencode(self, o):
 | |
|         """Encode the given object and yield each string representation as
 | |
|         available.
 | |
| 
 | |
|         For example::
 | |
| 
 | |
|             for chunk in JSONEncoder().iterencode(bigobject):
 | |
|                 mysocket.write(chunk)
 | |
| 
 | |
|         """
 | |
|         if self.check_circular:
 | |
|             markers = {}
 | |
|         else:
 | |
|             markers = None
 | |
|         return self._iterencode(o, markers)
 |