cpython/Modules/_json.c
Georg Brandl 4aef703c45 Merged revisions 66801,66803-66804,66813,66854-66856,66866,66870-66872,66874,66887,66903,66905,66911,66913,66927,66932,66938,66942,66962,66964,66973-66974,66977,66992,66998-66999,67002,67005,67007,67028,67040-67041,67044,67070,67089,67091,67101,67117-67119,67123-67124 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

................
  r66801 | andrew.kuchling | 2008-10-04 23:51:59 +0200 (Sat, 04 Oct 2008) | 1 line

  Punctuation fix; expand dict.update docstring to be clearer
................
  r66803 | benjamin.peterson | 2008-10-05 00:15:31 +0200 (Sun, 05 Oct 2008) | 1 line

  fix typo
................
  r66804 | andrew.kuchling | 2008-10-05 02:11:56 +0200 (Sun, 05 Oct 2008) | 1 line

  #1415508 from Rocky Bernstein: add docstrings for enable_interspersed_args(), disable_interspersed_args()
................
  r66813 | andrew.kuchling | 2008-10-06 14:07:04 +0200 (Mon, 06 Oct 2008) | 3 lines

  Per Greg Ward, optparse is no longer being externally maintained.
  I'll look at the bugs in the Optik bug tracker and copy them to the Python bug
  tracker if they're still relevant.
................
  r66854 | georg.brandl | 2008-10-08 19:20:20 +0200 (Wed, 08 Oct 2008) | 2 lines

  #4059: patch up some sqlite docs.
................
  r66855 | georg.brandl | 2008-10-08 19:30:55 +0200 (Wed, 08 Oct 2008) | 2 lines

  #4058: fix some whatsnew markup.
................
  r66856 | georg.brandl | 2008-10-08 20:47:17 +0200 (Wed, 08 Oct 2008) | 3 lines

  #3935: properly support list subclasses in the C impl. of bisect.
  Patch reviewed by Raymond.
................
  r66866 | benjamin.peterson | 2008-10-09 22:54:43 +0200 (Thu, 09 Oct 2008) | 1 line

  update paragraph about __future__ for 2.6
................
  r66870 | armin.rigo | 2008-10-10 10:40:44 +0200 (Fri, 10 Oct 2008) | 2 lines

  Typo: "ThreadError" is the name in the C source.
................
  r66871 | benjamin.peterson | 2008-10-10 22:38:49 +0200 (Fri, 10 Oct 2008) | 1 line

  fix a small typo
................
  r66872 | benjamin.peterson | 2008-10-10 22:51:37 +0200 (Fri, 10 Oct 2008) | 1 line

  talk about how you can unzip with zip
................
  r66874 | benjamin.peterson | 2008-10-11 00:23:41 +0200 (Sat, 11 Oct 2008) | 1 line

  PyGILState_Acquire -> PyGILState_Ensure
................
  r66887 | benjamin.peterson | 2008-10-13 23:51:40 +0200 (Mon, 13 Oct 2008) | 1 line

  document how to disable fixers
................
  r66903 | benjamin.peterson | 2008-10-15 22:34:09 +0200 (Wed, 15 Oct 2008) | 1 line

  don't recurse into directories that start with '.'
................
  r66905 | benjamin.peterson | 2008-10-15 23:05:55 +0200 (Wed, 15 Oct 2008) | 1 line

  support the optional line argument for idle
................
  r66911 | benjamin.peterson | 2008-10-16 01:10:28 +0200 (Thu, 16 Oct 2008) | 41 lines

  Merged revisions 66805,66841,66860,66884-66886,66893,66907,66910 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3

  ........
    r66805 | benjamin.peterson | 2008-10-04 20:11:02 -0500 (Sat, 04 Oct 2008) | 1 line

    mention what the fixes directory is for
  ........
    r66841 | benjamin.peterson | 2008-10-07 17:48:12 -0500 (Tue, 07 Oct 2008) | 1 line

    use assertFalse and assertTrue
  ........
    r66860 | benjamin.peterson | 2008-10-08 16:05:07 -0500 (Wed, 08 Oct 2008) | 1 line

    instead of abusing the pattern matcher, use start_tree to find a next binding
  ........
    r66884 | benjamin.peterson | 2008-10-13 15:50:30 -0500 (Mon, 13 Oct 2008) | 1 line

    don't print tokens to stdout when -v is given
  ........
    r66885 | benjamin.peterson | 2008-10-13 16:28:57 -0500 (Mon, 13 Oct 2008) | 1 line

    add the -x option to disable fixers
  ........
    r66886 | benjamin.peterson | 2008-10-13 16:33:53 -0500 (Mon, 13 Oct 2008) | 1 line

    cut down on some crud
  ........
    r66893 | benjamin.peterson | 2008-10-14 17:16:54 -0500 (Tue, 14 Oct 2008) | 1 line

    add an optional set literal fixer
  ........
    r66907 | benjamin.peterson | 2008-10-15 16:59:41 -0500 (Wed, 15 Oct 2008) | 1 line

    don't write backup files by default
  ........
    r66910 | benjamin.peterson | 2008-10-15 17:43:10 -0500 (Wed, 15 Oct 2008) | 1 line

    add the -n option; it stops backupfiles from being written
  ........
................
  r66913 | benjamin.peterson | 2008-10-16 20:52:14 +0200 (Thu, 16 Oct 2008) | 1 line

  document that deque indexing is O(n) #4123
................
  r66927 | andrew.kuchling | 2008-10-16 22:15:47 +0200 (Thu, 16 Oct 2008) | 1 line

  Fix wording (2.6.1 backport candidate)
................
  r66932 | benjamin.peterson | 2008-10-16 23:09:28 +0200 (Thu, 16 Oct 2008) | 1 line

  check for error conditions in _json #3623
................
  r66938 | benjamin.peterson | 2008-10-16 23:27:54 +0200 (Thu, 16 Oct 2008) | 1 line

  fix possible ref leak
................
  r66942 | benjamin.peterson | 2008-10-16 23:48:06 +0200 (Thu, 16 Oct 2008) | 1 line

  fix more possible ref leaks in _json and use Py_CLEAR
................
  r66962 | benjamin.peterson | 2008-10-17 22:01:01 +0200 (Fri, 17 Oct 2008) | 1 line

  clarify CALL_FUNCTION #4141
................
  r66964 | georg.brandl | 2008-10-17 23:41:49 +0200 (Fri, 17 Oct 2008) | 2 lines

  Fix duplicate word.
................
  r66973 | armin.ronacher | 2008-10-19 10:27:43 +0200 (Sun, 19 Oct 2008) | 3 lines

  Fixed #4067 by implementing _attributes and _fields for the AST root node.
................
  r66974 | benjamin.peterson | 2008-10-19 15:59:01 +0200 (Sun, 19 Oct 2008) | 1 line

  fix compiler warning
................
  r66977 | benjamin.peterson | 2008-10-19 21:39:16 +0200 (Sun, 19 Oct 2008) | 1 line

  mention -n
................
  r66992 | benjamin.peterson | 2008-10-21 22:51:13 +0200 (Tue, 21 Oct 2008) | 1 line

  make sure to call iteritems()
................
  r66998 | benjamin.peterson | 2008-10-22 22:57:43 +0200 (Wed, 22 Oct 2008) | 1 line

  fix a few typos
................
  r66999 | benjamin.peterson | 2008-10-22 23:05:30 +0200 (Wed, 22 Oct 2008) | 1 line

  and another typo...
................
  r67002 | hirokazu.yamamoto | 2008-10-23 02:37:33 +0200 (Thu, 23 Oct 2008) | 1 line

  Issue #4183: Some tests didn't run with pickle.HIGHEST_PROTOCOL.
................
  r67005 | walter.doerwald | 2008-10-23 15:11:39 +0200 (Thu, 23 Oct 2008) | 2 lines

  Use the correct names of the stateless codec functions (Fixes issue 4178).
................
  r67007 | benjamin.peterson | 2008-10-23 23:43:48 +0200 (Thu, 23 Oct 2008) | 1 line

  only nonempty __slots__ don't work
................
  r67028 | benjamin.peterson | 2008-10-26 01:27:07 +0200 (Sun, 26 Oct 2008) | 1 line

  don't use a catch-all
................
  r67040 | armin.rigo | 2008-10-28 18:01:21 +0100 (Tue, 28 Oct 2008) | 5 lines

  Fix one of the tests: it relied on being present in an "output test" in
  order to actually test what it was supposed to test, i.e. that the code
  in the __del__ method did not crash.  Use instead the new helper
  test_support.captured_output().
................
  r67041 | benjamin.peterson | 2008-10-29 21:33:00 +0100 (Wed, 29 Oct 2008) | 1 line

  mention the version gettempdir() was added
................
  r67044 | amaury.forgeotdarc | 2008-10-30 00:15:57 +0100 (Thu, 30 Oct 2008) | 3 lines

  Correct error message in io.open():
  closefd=True is the only accepted value with a file name.
................
  r67070 | benjamin.peterson | 2008-10-31 21:41:44 +0100 (Fri, 31 Oct 2008) | 1 line

  rephrase has_key doc
................
  r67089 | benjamin.peterson | 2008-11-03 21:43:20 +0100 (Mon, 03 Nov 2008) | 1 line

  clarify by splitting into multiple paragraphs
................
  r67091 | benjamin.peterson | 2008-11-03 23:34:57 +0100 (Mon, 03 Nov 2008) | 1 line

  move a FileIO test to test_fileio
................
  r67101 | georg.brandl | 2008-11-04 21:49:35 +0100 (Tue, 04 Nov 2008) | 2 lines

  #4167: fix markup glitches.
................
  r67117 | georg.brandl | 2008-11-06 11:17:58 +0100 (Thu, 06 Nov 2008) | 2 lines

  #4268: Use correct module for two toplevel functions.
................
  r67118 | georg.brandl | 2008-11-06 11:19:11 +0100 (Thu, 06 Nov 2008) | 2 lines

  #4267: small fixes in sqlite3 docs.
................
  r67119 | georg.brandl | 2008-11-06 11:20:49 +0100 (Thu, 06 Nov 2008) | 2 lines

  #4245: move Thread section to the top.
................
  r67123 | georg.brandl | 2008-11-06 19:49:15 +0100 (Thu, 06 Nov 2008) | 2 lines

  #4247: add "pass" examples to tutorial.
................
  r67124 | andrew.kuchling | 2008-11-06 20:23:02 +0100 (Thu, 06 Nov 2008) | 1 line

  Fix grammar error; reword two paragraphs
................
2008-11-07 08:56:27 +00:00

621 lines
20 KiB
C

#include "Python.h"
#define DEFAULT_ENCODING "utf-8"
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
#define MIN_EXPANSION 6
#ifdef Py_UNICODE_WIDE
#define MAX_EXPANSION (2 * MIN_EXPANSION)
#else
#define MAX_EXPANSION MIN_EXPANSION
#endif
static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
{
Py_UNICODE x;
output[chars++] = '\\';
switch (c) {
case '\\': output[chars++] = (char)c; break;
case '"': output[chars++] = (char)c; break;
case '\b': output[chars++] = 'b'; break;
case '\f': output[chars++] = 'f'; break;
case '\n': output[chars++] = 'n'; break;
case '\r': output[chars++] = 'r'; break;
case '\t': output[chars++] = 't'; break;
default:
#ifdef Py_UNICODE_WIDE
if (c >= 0x10000) {
/* UTF-16 surrogate pair */
Py_UNICODE v = c - 0x10000;
c = 0xd800 | ((v >> 10) & 0x3ff);
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
c = 0xdc00 | (v & 0x3ff);
output[chars++] = '\\';
}
#endif
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
}
return chars;
}
static PyObject *
ascii_escape_unicode(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
Py_UNICODE *input_unicode;
input_chars = PyUnicode_GET_SIZE(pystr);
input_unicode = PyUnicode_AS_UNICODE(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = input_unicode[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else {
chars = ascii_escape_char(c, output, chars);
}
if (output_size - chars < (1 + MAX_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
/* This is an upper bound */
if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
output_size = 2 + (input_chars * MAX_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
static PyObject *
ascii_escape_str(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
char *input_str;
input_chars = PyString_GET_SIZE(pystr);
input_str = PyString_AS_STRING(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = (Py_UNICODE)input_str[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else if (c > 0x7F) {
/* We hit a non-ASCII character, bail to unicode mode */
PyObject *uni;
Py_DECREF(rval);
uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
if (uni == NULL) {
return NULL;
}
rval = ascii_escape_unicode(uni);
Py_DECREF(uni);
return rval;
}
else {
chars = ascii_escape_char(c, output, chars);
}
/* An ASCII char can't possibly expand to a surrogate! */
if (output_size - chars < (1 + MIN_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
output_size = 2 + (input_chars * MIN_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
static PyObject *errmsg_fn = NULL;
PyObject *pymsg;
if (errmsg_fn == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL)
return;
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
if (errmsg_fn == NULL)
return;
Py_DECREF(decoder);
}
pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
if (pymsg) {
PyErr_SetObject(PyExc_ValueError, pymsg);
Py_DECREF(pymsg);
}
/*
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
if end is None:
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
msg, lineno, colno, endlineno, endcolno, pos, end)
*/
}
static PyObject *
join_list_unicode(PyObject *lst)
{
static PyObject *ustr = NULL;
static PyObject *joinstr = NULL;
if (ustr == NULL) {
Py_UNICODE c = 0;
ustr = PyUnicode_FromUnicode(&c, 0);
}
if (joinstr == NULL) {
joinstr = PyString_InternFromString("join");
}
if (joinstr == NULL || ustr == NULL) {
return NULL;
}
return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
}
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict)
{
PyObject *rval;
Py_ssize_t len = PyString_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
char *buf = PyString_AS_STRING(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
}
chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
Py_DECREF(strchunk);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)
{
PyObject *rval;
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
PyDoc_STRVAR(pydoc_scanstring,
"scanstring(basestring, end, encoding) -> (str, end)\n");
static PyObject *
py_scanstring(PyObject* self, PyObject *args)
{
PyObject *pystr;
Py_ssize_t end;
char *encoding = NULL;
int strict = 0;
if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &strict)) {
return NULL;
}
if (encoding == NULL) {
encoding = DEFAULT_ENCODING;
}
if (PyString_Check(pystr)) {
return scanstring_str(pystr, end, encoding, strict);
}
else if (PyUnicode_Check(pystr)) {
return scanstring_unicode(pystr, end, strict);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
"encode_basestring_ascii(basestring) -> str\n");
static PyObject *
py_encode_basestring_ascii(PyObject* self, PyObject *pystr)
{
/* METH_O */
if (PyString_Check(pystr)) {
return ascii_escape_str(pystr);
}
else if (PyUnicode_Check(pystr)) {
return ascii_escape_unicode(pystr);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
static PyMethodDef json_methods[] = {
{"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii,
METH_O, pydoc_encode_basestring_ascii},
{"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,
pydoc_scanstring},
{NULL, NULL, 0, NULL}
};
PyDoc_STRVAR(module_doc,
"json speedups\n");
void
init_json(void)
{
PyObject *m;
m = Py_InitModule3("_json", json_methods, module_doc);
}