mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
Consider \U-escapes in raw-unicode-escape. Fixes #444514.
This commit is contained in:
parent
9e46abed50
commit
9a3a9f7791
2 changed files with 49 additions and 3 deletions
|
@ -697,6 +697,13 @@ class UnicodeTest(
|
||||||
print >>out, u'def\n'
|
print >>out, u'def\n'
|
||||||
print >>out, u'def\n'
|
print >>out, u'def\n'
|
||||||
|
|
||||||
|
def test_ucs4(self):
|
||||||
|
if sys.maxunicode == 0xFFFF:
|
||||||
|
return
|
||||||
|
x = u'\U00100000'
|
||||||
|
y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
|
||||||
|
self.assertEqual(x, y)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(UnicodeTest)
|
test_support.run_unittest(UnicodeTest)
|
||||||
|
|
||||||
|
|
|
@ -2030,6 +2030,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
Py_UCS4 x;
|
Py_UCS4 x;
|
||||||
int i;
|
int i;
|
||||||
|
int count;
|
||||||
|
|
||||||
/* Non-escape characters are interpreted as Unicode ordinals */
|
/* Non-escape characters are interpreted as Unicode ordinals */
|
||||||
if (*s != '\\') {
|
if (*s != '\\') {
|
||||||
|
@ -2048,15 +2049,16 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
}
|
}
|
||||||
if (((s - bs) & 1) == 0 ||
|
if (((s - bs) & 1) == 0 ||
|
||||||
s >= end ||
|
s >= end ||
|
||||||
*s != 'u') {
|
(*s != 'u' && *s != 'U')) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
p--;
|
p--;
|
||||||
|
count = *s=='u' ? 4 : 8;
|
||||||
s++;
|
s++;
|
||||||
|
|
||||||
/* \uXXXX with 4 hex digits */
|
/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
|
||||||
outpos = p-PyUnicode_AS_UNICODE(v);
|
outpos = p-PyUnicode_AS_UNICODE(v);
|
||||||
for (x = 0, i = 0; i < 4; ++i, ++s) {
|
for (x = 0, i = 0; i < count; ++i, ++s) {
|
||||||
c = (unsigned char)*s;
|
c = (unsigned char)*s;
|
||||||
if (!isxdigit(c)) {
|
if (!isxdigit(c)) {
|
||||||
endinpos = s-starts;
|
endinpos = s-starts;
|
||||||
|
@ -2076,6 +2078,16 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
else
|
else
|
||||||
x += 10 + c - 'A';
|
x += 10 + c - 'A';
|
||||||
}
|
}
|
||||||
|
#ifndef Py_UNICODE_WIDE
|
||||||
|
if (x > 0x10000) {
|
||||||
|
if (unicode_decode_call_errorhandler(
|
||||||
|
errors, &errorHandler,
|
||||||
|
"rawunicodeescape", "\\Uxxxxxxxx out of range",
|
||||||
|
starts, size, &startinpos, &endinpos, &exc, &s,
|
||||||
|
(PyObject **)&v, &outpos, &p))
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
*p++ = x;
|
*p++ = x;
|
||||||
nextByte:
|
nextByte:
|
||||||
;
|
;
|
||||||
|
@ -2102,7 +2114,11 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
|
|
||||||
static const char *hexdigit = "0123456789abcdef";
|
static const char *hexdigit = "0123456789abcdef";
|
||||||
|
|
||||||
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
repr = PyString_FromStringAndSize(NULL, 10 * size);
|
||||||
|
#else
|
||||||
repr = PyString_FromStringAndSize(NULL, 6 * size);
|
repr = PyString_FromStringAndSize(NULL, 6 * size);
|
||||||
|
#endif
|
||||||
if (repr == NULL)
|
if (repr == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
|
@ -2111,6 +2127,22 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
p = q = PyString_AS_STRING(repr);
|
p = q = PyString_AS_STRING(repr);
|
||||||
while (size-- > 0) {
|
while (size-- > 0) {
|
||||||
Py_UNICODE ch = *s++;
|
Py_UNICODE ch = *s++;
|
||||||
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
/* Map 32-bit characters to '\Uxxxxxxxx' */
|
||||||
|
if (ch >= 0x10000) {
|
||||||
|
*p++ = '\\';
|
||||||
|
*p++ = 'U';
|
||||||
|
*p++ = hexdigit[(ch >> 28) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 24) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 20) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 16) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 12) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 8) & 0xf];
|
||||||
|
*p++ = hexdigit[(ch >> 4) & 0xf];
|
||||||
|
*p++ = hexdigit[ch & 15];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
/* Map 16-bit characters to '\uxxxx' */
|
/* Map 16-bit characters to '\uxxxx' */
|
||||||
if (ch >= 256) {
|
if (ch >= 256) {
|
||||||
*p++ = '\\';
|
*p++ = '\\';
|
||||||
|
@ -6769,3 +6801,10 @@ _PyUnicode_Fini(void)
|
||||||
unicode_freelist = NULL;
|
unicode_freelist = NULL;
|
||||||
unicode_freelist_size = 0;
|
unicode_freelist_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Local variables:
|
||||||
|
c-basic-offset: 4
|
||||||
|
indent-tabs-mode: nil
|
||||||
|
End:
|
||||||
|
*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue