(Merge 3.2) Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()

This commit is contained in:
Victor Stinner 2011-11-22 01:50:07 +01:00
parent cfed46e00a
commit b84d723509
2 changed files with 14 additions and 4 deletions

View file

@ -1824,6 +1824,12 @@ class UnicodeTest(string_tests.CommonTest,
b'123€') b'123€')
self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"), self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
b'123\\u20ac') b'123\\u20ac')
self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
b'123? ')
self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
b'123??')
self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
b'123?0')
def test_transform_decimal(self): def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal from _testcapi import unicode_transformdecimaltoascii as transform_decimal

View file

@ -8875,22 +8875,25 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
kind = PyUnicode_KIND(unicode); kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode); data = PyUnicode_DATA(unicode);
for (i=0; i < length; i++) { for (i=0; i < length; ) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i); Py_UCS4 ch = PyUnicode_READ(kind, data, i);
int decimal; int decimal;
Py_ssize_t startpos, endpos; Py_ssize_t startpos, endpos;
if (Py_UNICODE_ISSPACE(ch)) { if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' '; *output++ = ' ';
i++;
continue; continue;
} }
decimal = Py_UNICODE_TODECIMAL(ch); decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0) { if (decimal >= 0) {
*output++ = '0' + decimal; *output++ = '0' + decimal;
i++;
continue; continue;
} }
if (0 < ch && ch < 256) { if (0 < ch && ch < 256) {
*output++ = (char)ch; *output++ = (char)ch;
i++;
continue; continue;
} }
/* All other characters are considered unencodable */ /* All other characters are considered unencodable */
@ -8899,8 +8902,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
for (; endpos < length; endpos++) { for (; endpos < length; endpos++) {
ch = PyUnicode_READ(kind, data, endpos); ch = PyUnicode_READ(kind, data, endpos);
if ((0 < ch && ch < 256) || if ((0 < ch && ch < 256) ||
!Py_UNICODE_ISSPACE(ch) || Py_UNICODE_ISSPACE(ch) ||
Py_UNICODE_TODECIMAL(ch)) 0 <= Py_UNICODE_TODECIMAL(ch))
break; break;
} }
/* cache callback name lookup /* cache callback name lookup
@ -8924,7 +8927,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
case 2: /* replace */ case 2: /* replace */
for (j=startpos; j < endpos; j++) for (j=startpos; j < endpos; j++)
*output++ = '?'; *output++ = '?';
/* fall through */ i = endpos;
break;
case 3: /* ignore */ case 3: /* ignore */
i = endpos; i = endpos;
break; break;