mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Do not insert characters for unicode-escape decoders if the error mode
is "ignore". Fixes #529104.
This commit is contained in:
parent
bdf1f19fee
commit
047c05ebc4
2 changed files with 32 additions and 14 deletions
|
@ -541,6 +541,14 @@ else:
|
||||||
verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
|
verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
|
||||||
verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
|
verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
|
||||||
|
|
||||||
|
verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
|
||||||
|
try:
|
||||||
|
"\\".decode("unicode-escape")
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise TestFailed, '"\\".decode("unicode-escape") should fail'
|
||||||
|
|
||||||
verify(u'hello'.encode('ascii') == 'hello')
|
verify(u'hello'.encode('ascii') == 'hello')
|
||||||
verify(u'hello'.encode('utf-7') == 'hello')
|
verify(u'hello'.encode('utf-7') == 'hello')
|
||||||
verify(u'hello'.encode('utf-8') == 'hello')
|
verify(u'hello'.encode('utf-8') == 'hello')
|
||||||
|
|
|
@ -1514,8 +1514,7 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
|
||||||
/* --- Unicode Escape Codec ----------------------------------------------- */
|
/* --- Unicode Escape Codec ----------------------------------------------- */
|
||||||
|
|
||||||
static
|
static
|
||||||
int unicodeescape_decoding_error(const char **source,
|
int unicodeescape_decoding_error(Py_UNICODE **x,
|
||||||
Py_UNICODE *x,
|
|
||||||
const char *errors,
|
const char *errors,
|
||||||
const char *details)
|
const char *details)
|
||||||
{
|
{
|
||||||
|
@ -1530,7 +1529,8 @@ int unicodeescape_decoding_error(const char **source,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else if (strcmp(errors,"replace") == 0) {
|
else if (strcmp(errors,"replace") == 0) {
|
||||||
*x = Py_UNICODE_REPLACEMENT_CHARACTER;
|
**x = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||||
|
(*x)++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1628,9 +1628,9 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
for (i = 0; i < digits; i++) {
|
for (i = 0; i < digits; i++) {
|
||||||
c = (unsigned char) s[i];
|
c = (unsigned char) s[i];
|
||||||
if (!isxdigit(c)) {
|
if (!isxdigit(c)) {
|
||||||
if (unicodeescape_decoding_error(&s, &x, errors, message))
|
if (unicodeescape_decoding_error(&p, errors, message))
|
||||||
goto onError;
|
goto onError;
|
||||||
chr = x;
|
chr = 0xffffffff;
|
||||||
i++;
|
i++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1643,6 +1643,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
chr += 10 + c - 'A';
|
chr += 10 + c - 'A';
|
||||||
}
|
}
|
||||||
s += i;
|
s += i;
|
||||||
|
if (chr == 0xffffffff)
|
||||||
|
/* _decoding_error will have already written into the
|
||||||
|
target buffer. */
|
||||||
|
break;
|
||||||
store:
|
store:
|
||||||
/* when we get here, chr is a 32-bit unicode character */
|
/* when we get here, chr is a 32-bit unicode character */
|
||||||
if (chr <= 0xffff)
|
if (chr <= 0xffff)
|
||||||
|
@ -1660,11 +1664,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
if (unicodeescape_decoding_error(
|
if (unicodeescape_decoding_error(
|
||||||
&s, &x, errors,
|
&p, errors,
|
||||||
"illegal Unicode character")
|
"illegal Unicode character")
|
||||||
)
|
)
|
||||||
goto onError;
|
goto onError;
|
||||||
*p++ = x; /* store replacement character */
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1699,14 +1702,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
goto store;
|
goto store;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (unicodeescape_decoding_error(&s, &x, errors, message))
|
if (unicodeescape_decoding_error(&p, errors, message))
|
||||||
goto onError;
|
goto onError;
|
||||||
*p++ = x;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
*p++ = '\\';
|
if (s > end) {
|
||||||
*p++ = (unsigned char)s[-1];
|
if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*p++ = '\\';
|
||||||
|
*p++ = (unsigned char)s[-1];
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1909,7 +1917,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
end = s + size;
|
end = s + size;
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
Py_UNICODE x;
|
Py_UCS4 x;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Non-escape characters are interpreted as Unicode ordinals */
|
/* Non-escape characters are interpreted as Unicode ordinals */
|
||||||
|
@ -1938,9 +1946,10 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
for (x = 0, i = 0; i < 4; i++) {
|
for (x = 0, i = 0; i < 4; i++) {
|
||||||
c = (unsigned char)s[i];
|
c = (unsigned char)s[i];
|
||||||
if (!isxdigit(c)) {
|
if (!isxdigit(c)) {
|
||||||
if (unicodeescape_decoding_error(&s, &x, errors,
|
if (unicodeescape_decoding_error(&p, errors,
|
||||||
"truncated \\uXXXX"))
|
"truncated \\uXXXX"))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
x = 0xffffffff;
|
||||||
i++;
|
i++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1953,7 +1962,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
x += 10 + c - 'A';
|
x += 10 + c - 'A';
|
||||||
}
|
}
|
||||||
s += i;
|
s += i;
|
||||||
*p++ = x;
|
if (x != 0xffffffff)
|
||||||
|
*p++ = x;
|
||||||
}
|
}
|
||||||
if (_PyUnicode_Resize(&v, (int)(p - buf)))
|
if (_PyUnicode_Resize(&v, (int)(p - buf)))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue