mirror of
https://github.com/python/cpython.git
synced 2025-07-08 03:45:36 +00:00

and str (unicode) patterns get full unicode matching by default. The re.ASCII flag is also introduced to ask for ASCII matching instead.
339 lines
11 KiB
Python
339 lines
11 KiB
Python
"""Implementation of JSONDecoder
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
|
|
from json.scanner import Scanner, pattern
|
|
try:
|
|
from _json import scanstring as c_scanstring
|
|
except ImportError:
|
|
c_scanstring = None
|
|
|
|
__all__ = ['JSONDecoder']
|
|
|
|
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
|
|
|
|
NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
|
|
|
|
|
|
def linecol(doc, pos):
|
|
lineno = doc.count('\n', 0, pos) + 1
|
|
if lineno == 1:
|
|
colno = pos
|
|
else:
|
|
colno = pos - doc.rindex('\n', 0, pos)
|
|
return lineno, colno
|
|
|
|
|
|
def errmsg(msg, doc, pos, end=None):
|
|
lineno, colno = linecol(doc, pos)
|
|
if end is None:
|
|
fmt = '{0}: line {1} column {2} (char {3})'
|
|
return fmt.format(msg, lineno, colno, pos)
|
|
endlineno, endcolno = linecol(doc, end)
|
|
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
|
|
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
|
|
|
|
|
|
_CONSTANTS = {
|
|
'-Infinity': NegInf,
|
|
'Infinity': PosInf,
|
|
'NaN': NaN,
|
|
'true': True,
|
|
'false': False,
|
|
'null': None,
|
|
}
|
|
|
|
|
|
def JSONConstant(match, context, c=_CONSTANTS):
|
|
s = match.group(0)
|
|
fn = getattr(context, 'parse_constant', None)
|
|
if fn is None:
|
|
rval = c[s]
|
|
else:
|
|
rval = fn(s)
|
|
return rval, None
|
|
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
|
|
|
|
|
|
def JSONNumber(match, context):
|
|
match = JSONNumber.regex.match(match.string, *match.span())
|
|
integer, frac, exp = match.groups()
|
|
if frac or exp:
|
|
fn = getattr(context, 'parse_float', None) or float
|
|
res = fn(integer + (frac or '') + (exp or ''))
|
|
else:
|
|
fn = getattr(context, 'parse_int', None) or int
|
|
res = fn(integer)
|
|
return res, None
|
|
pattern(r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?')(JSONNumber)
|
|
|
|
|
|
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
|
BACKSLASH = {
|
|
'"': '"', '\\': '\\', '/': '/',
|
|
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
|
|
}
|
|
|
|
DEFAULT_ENCODING = "utf-8"
|
|
|
|
|
|
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
|
|
if encoding is None:
|
|
encoding = DEFAULT_ENCODING
|
|
chunks = []
|
|
_append = chunks.append
|
|
begin = end - 1
|
|
while 1:
|
|
chunk = _m(s, end)
|
|
if chunk is None:
|
|
raise ValueError(
|
|
errmsg("Unterminated string starting at", s, begin))
|
|
end = chunk.end()
|
|
content, terminator = chunk.groups()
|
|
if content:
|
|
if not isinstance(content, str):
|
|
content = str(content, encoding)
|
|
_append(content)
|
|
if terminator == '"':
|
|
break
|
|
elif terminator != '\\':
|
|
if strict:
|
|
msg = "Invalid control character {0!r} at".format(terminator)
|
|
raise ValueError(errmsg(msg, s, end))
|
|
else:
|
|
_append(terminator)
|
|
continue
|
|
try:
|
|
esc = s[end]
|
|
except IndexError:
|
|
raise ValueError(
|
|
errmsg("Unterminated string starting at", s, begin))
|
|
if esc != 'u':
|
|
try:
|
|
m = _b[esc]
|
|
except KeyError:
|
|
msg = "Invalid \\escape: {0!r}".format(esc)
|
|
raise ValueError(errmsg(msg, s, end))
|
|
end += 1
|
|
else:
|
|
esc = s[end + 1:end + 5]
|
|
next_end = end + 5
|
|
msg = "Invalid \\uXXXX escape"
|
|
try:
|
|
if len(esc) != 4:
|
|
raise ValueError
|
|
uni = int(esc, 16)
|
|
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
|
|
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
|
if not s[end + 5:end + 7] == '\\u':
|
|
raise ValueError
|
|
esc2 = s[end + 7:end + 11]
|
|
if len(esc2) != 4:
|
|
raise ValueError
|
|
uni2 = int(esc2, 16)
|
|
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
|
next_end += 6
|
|
m = chr(uni)
|
|
except ValueError:
|
|
raise ValueError(errmsg(msg, s, end))
|
|
end = next_end
|
|
_append(m)
|
|
return ''.join(chunks), end
|
|
|
|
|
|
# Use speedup
|
|
if c_scanstring is not None:
|
|
scanstring = c_scanstring
|
|
else:
|
|
scanstring = py_scanstring
|
|
|
|
def JSONString(match, context):
|
|
encoding = getattr(context, 'encoding', None)
|
|
strict = getattr(context, 'strict', True)
|
|
return scanstring(match.string, match.end(), encoding, strict)
|
|
pattern(r'"')(JSONString)
|
|
|
|
|
|
WHITESPACE = re.compile(r'\s*', FLAGS)
|
|
|
|
|
|
def JSONObject(match, context, _w=WHITESPACE.match):
|
|
pairs = {}
|
|
s = match.string
|
|
end = _w(s, match.end()).end()
|
|
nextchar = s[end:end + 1]
|
|
# Trivial empty object
|
|
if nextchar == '}':
|
|
return pairs, end + 1
|
|
if nextchar != '"':
|
|
raise ValueError(errmsg("Expecting property name", s, end))
|
|
end += 1
|
|
encoding = getattr(context, 'encoding', None)
|
|
strict = getattr(context, 'strict', True)
|
|
iterscan = JSONScanner.iterscan
|
|
while True:
|
|
key, end = scanstring(s, end, encoding, strict)
|
|
end = _w(s, end).end()
|
|
if s[end:end + 1] != ':':
|
|
raise ValueError(errmsg("Expecting : delimiter", s, end))
|
|
end = _w(s, end + 1).end()
|
|
try:
|
|
value, end = next(iterscan(s, idx=end, context=context))
|
|
except StopIteration:
|
|
raise ValueError(errmsg("Expecting object", s, end))
|
|
pairs[key] = value
|
|
end = _w(s, end).end()
|
|
nextchar = s[end:end + 1]
|
|
end += 1
|
|
if nextchar == '}':
|
|
break
|
|
if nextchar != ',':
|
|
raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
|
|
end = _w(s, end).end()
|
|
nextchar = s[end:end + 1]
|
|
end += 1
|
|
if nextchar != '"':
|
|
raise ValueError(errmsg("Expecting property name", s, end - 1))
|
|
object_hook = getattr(context, 'object_hook', None)
|
|
if object_hook is not None:
|
|
pairs = object_hook(pairs)
|
|
return pairs, end
|
|
pattern(r'{')(JSONObject)
|
|
|
|
|
|
def JSONArray(match, context, _w=WHITESPACE.match):
|
|
values = []
|
|
s = match.string
|
|
end = _w(s, match.end()).end()
|
|
# Look-ahead for trivial empty array
|
|
nextchar = s[end:end + 1]
|
|
if nextchar == ']':
|
|
return values, end + 1
|
|
iterscan = JSONScanner.iterscan
|
|
while True:
|
|
try:
|
|
value, end = next(iterscan(s, idx=end, context=context))
|
|
except StopIteration:
|
|
raise ValueError(errmsg("Expecting object", s, end))
|
|
values.append(value)
|
|
end = _w(s, end).end()
|
|
nextchar = s[end:end + 1]
|
|
end += 1
|
|
if nextchar == ']':
|
|
break
|
|
if nextchar != ',':
|
|
raise ValueError(errmsg("Expecting , delimiter", s, end))
|
|
end = _w(s, end).end()
|
|
return values, end
|
|
pattern(r'\[')(JSONArray)
|
|
|
|
|
|
ANYTHING = [
|
|
JSONObject,
|
|
JSONArray,
|
|
JSONString,
|
|
JSONConstant,
|
|
JSONNumber,
|
|
]
|
|
|
|
JSONScanner = Scanner(ANYTHING)
|
|
|
|
|
|
class JSONDecoder(object):
|
|
"""Simple JSON <http://json.org> decoder
|
|
|
|
Performs the following translations in decoding by default:
|
|
|
|
+---------------+-------------------+
|
|
| JSON | Python |
|
|
+===============+===================+
|
|
| object | dict |
|
|
+---------------+-------------------+
|
|
| array | list |
|
|
+---------------+-------------------+
|
|
| string | unicode |
|
|
+---------------+-------------------+
|
|
| number (int) | int, long |
|
|
+---------------+-------------------+
|
|
| number (real) | float |
|
|
+---------------+-------------------+
|
|
| true | True |
|
|
+---------------+-------------------+
|
|
| false | False |
|
|
+---------------+-------------------+
|
|
| null | None |
|
|
+---------------+-------------------+
|
|
|
|
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
|
their corresponding ``float`` values, which is outside the JSON spec.
|
|
"""
|
|
|
|
_scanner = Scanner(ANYTHING)
|
|
__all__ = ['__init__', 'decode', 'raw_decode']
|
|
|
|
def __init__(self, encoding=None, object_hook=None, parse_float=None,
|
|
parse_int=None, parse_constant=None, strict=True):
|
|
"""``encoding`` determines the encoding used to interpret any ``str``
|
|
objects decoded by this instance (utf-8 by default). It has no
|
|
effect when decoding ``unicode`` objects.
|
|
|
|
Note that currently only encodings that are a superset of ASCII work,
|
|
strings of other encodings should be passed in as ``unicode``.
|
|
|
|
``object_hook``, if specified, will be called with the result of
|
|
every JSON object decoded and its return value will be used in
|
|
place of the given ``dict``. This can be used to provide custom
|
|
deserializations (e.g. to support JSON-RPC class hinting).
|
|
|
|
``parse_float``, if specified, will be called with the string
|
|
of every JSON float to be decoded. By default this is equivalent to
|
|
float(num_str). This can be used to use another datatype or parser
|
|
for JSON floats (e.g. decimal.Decimal).
|
|
|
|
``parse_int``, if specified, will be called with the string
|
|
of every JSON int to be decoded. By default this is equivalent to
|
|
int(num_str). This can be used to use another datatype or parser
|
|
for JSON integers (e.g. float).
|
|
|
|
``parse_constant``, if specified, will be called with one of the
|
|
following strings: -Infinity, Infinity, NaN, null, true, false.
|
|
This can be used to raise an exception if invalid JSON numbers
|
|
are encountered.
|
|
|
|
"""
|
|
self.encoding = encoding
|
|
self.object_hook = object_hook
|
|
self.parse_float = parse_float
|
|
self.parse_int = parse_int
|
|
self.parse_constant = parse_constant
|
|
self.strict = strict
|
|
|
|
def decode(self, s, _w=WHITESPACE.match):
|
|
"""
|
|
Return the Python representation of ``s`` (a ``str`` or ``unicode``
|
|
instance containing a JSON document)
|
|
|
|
"""
|
|
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
|
end = _w(s, end).end()
|
|
if end != len(s):
|
|
raise ValueError(errmsg("Extra data", s, end, len(s)))
|
|
return obj
|
|
|
|
def raw_decode(self, s, **kw):
|
|
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
|
|
with a JSON document) and return a 2-tuple of the Python
|
|
representation and the index in ``s`` where the document ended.
|
|
|
|
This can be used to decode a JSON document from a string that may
|
|
have extraneous data at the end.
|
|
|
|
"""
|
|
kw.setdefault('context', self)
|
|
try:
|
|
obj, end = next(self._scanner.iterscan(s, **kw))
|
|
except StopIteration:
|
|
raise ValueError("No JSON object could be decoded")
|
|
return obj, end
|