mirror of
https://github.com/python/cpython.git
synced 2025-07-23 11:15:24 +00:00
Merged revisions 84655 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines Issue #9804: ascii() now always represents unicode surrogate pairs as a single `\UXXXXXXXX`, regardless of whether the character is printable or not. Also, the "backslashreplace" error handler now joins surrogate pairs into a single character on UCS-2 builds. ........
This commit is contained in:
parent
8e0bb6a1e2
commit
c9a8df24cc
4 changed files with 72 additions and 17 deletions
|
@ -678,6 +678,13 @@ static Py_UNICODE hexdigits[] = {
|
|||
|
||||
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
||||
{
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
#define IS_SURROGATE_PAIR(p, end) \
|
||||
(*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
|
||||
*(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
|
||||
#else
|
||||
#define IS_SURROGATE_PAIR(p, end) 0
|
||||
#endif
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
PyObject *restuple;
|
||||
PyObject *object;
|
||||
|
@ -702,7 +709,12 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
else
|
||||
#endif
|
||||
if (*p >= 0x100) {
|
||||
ressize += 1+1+4;
|
||||
if (IS_SURROGATE_PAIR(p, startp+end)) {
|
||||
ressize += 1+1+8;
|
||||
++p;
|
||||
}
|
||||
else
|
||||
ressize += 1+1+4;
|
||||
}
|
||||
else
|
||||
ressize += 1+1+2;
|
||||
|
@ -712,9 +724,12 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
return NULL;
|
||||
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
|
||||
p < startp+end; ++p) {
|
||||
Py_UNICODE c = *p;
|
||||
Py_UCS4 c = (Py_UCS4) *p;
|
||||
*outp++ = '\\';
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (IS_SURROGATE_PAIR(p, startp+end)) {
|
||||
c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
|
||||
++p;
|
||||
}
|
||||
if (c >= 0x00010000) {
|
||||
*outp++ = 'U';
|
||||
*outp++ = hexdigits[(c>>28)&0xf];
|
||||
|
@ -724,9 +739,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
*outp++ = hexdigits[(c>>12)&0xf];
|
||||
*outp++ = hexdigits[(c>>8)&0xf];
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (c >= 0x100) {
|
||||
else if (c >= 0x100) {
|
||||
*outp++ = 'u';
|
||||
*outp++ = hexdigits[(c>>12)&0xf];
|
||||
*outp++ = hexdigits[(c>>8)&0xf];
|
||||
|
@ -746,6 +759,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
wrong_exception_type(exc);
|
||||
return NULL;
|
||||
}
|
||||
#undef IS_SURROGATE_PAIR
|
||||
}
|
||||
|
||||
/* This handler is declared static until someone demonstrates
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue