Merged revisions 75928 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r75928 | benjamin.peterson | 2009-10-28 16:59:39 -0500 (Wed, 28 Oct 2009) | 5 lines

  in wide builds, avoid storing high unicode characters from source code with surrogates

  This is accomplished by decoding with utf-32 instead of utf-16 on all builds.
  The patch is by Adam Olsen.
........
This commit is contained in:
Benjamin Peterson 2009-10-29 01:22:38 +00:00
parent 1531f528b3
commit 7dc5ac5ec6
3 changed files with 25 additions and 9 deletions

View file

@ -36,6 +36,14 @@ class PEP263Test(unittest.TestCase):
exec(c, d) exec(c, d)
self.assertEquals(d['\xc6'], '\xc6') self.assertEquals(d['\xc6'], '\xc6')
def test_issue3297(self):
c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
d = {}
exec(c, d)
self.assertEqual(d['a'], d['b'])
self.assertEqual(len(d['a']), len(d['b']))
self.assertEqual(ascii(d['a']), ascii(d['b']))
def test_main(): def test_main():
support.run_unittest(PEP263Test) support.run_unittest(PEP263Test)

View file

@ -12,6 +12,9 @@ What's New in Python 3.1.2?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #3297: On wide unicode builds, do not split unicode characters into
surrogates.
- Issue #1722344: threading._shutdown() is now called in Py_Finalize(), which - Issue #1722344: threading._shutdown() is now called in Py_Finalize(), which
fixes the problem of some exceptions being thrown at shutdown when the fixes the problem of some exceptions being thrown at shutdown when the
interpreter is killed. Patch by Adam Olsen. interpreter is killed. Patch by Adam Olsen.

View file

@ -3217,10 +3217,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL; u = NULL;
} else { } else {
/* check for integer overflow */ /* check for integer overflow */
if (len > PY_SIZE_MAX / 4) if (len > PY_SIZE_MAX / 6)
return NULL; return NULL;
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */ /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
u = PyBytes_FromStringAndSize((char *)NULL, len * 4); "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL) if (u == NULL)
return NULL; return NULL;
p = buf = PyBytes_AsString(u); p = buf = PyBytes_AsString(u);
@ -3237,20 +3238,24 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w; PyObject *w;
char *r; char *r;
Py_ssize_t rn, i; Py_ssize_t rn, i;
w = decode_utf8(c, &s, end, "utf-16-be"); w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) { if (w == NULL) {
Py_DECREF(u); Py_DECREF(u);
return NULL; return NULL;
} }
r = PyBytes_AS_STRING(w); r = PyBytes_AS_STRING(w);
rn = Py_SIZE(w); rn = Py_SIZE(w);
assert(rn % 2 == 0); assert(rn % 4 == 0);
for (i = 0; i < rn; i += 2) { for (i = 0; i < rn; i += 4) {
sprintf(p, "\\u%02x%02x", sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF, r[i + 0] & 0xFF,
r[i + 1] & 0xFF); r[i + 1] & 0xFF,
p += 6; r[i + 2] & 0xFF,
r[i + 3] & 0xFF);
p += 10;
} }
/* Should be impossible to overflow */
assert(p - buf <= Py_SIZE(u));
Py_DECREF(w); Py_DECREF(w);
} else { } else {
*p++ = *s++; *p++ = *s++;