mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
Close #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
different than "strict" anymore. The caller was unable to compute the size of the output buffer: it depends on the error handler.
This commit is contained in:
parent
e7ede06757
commit
6345be9a14
3 changed files with 26 additions and 127 deletions
|
@ -1816,20 +1816,10 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
b' 3.14 ')
|
b' 3.14 ')
|
||||||
self.assertRaises(UnicodeEncodeError,
|
self.assertRaises(UnicodeEncodeError,
|
||||||
unicode_encodedecimal, "123\u20ac", "strict")
|
unicode_encodedecimal, "123\u20ac", "strict")
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
|
self.assertRaisesRegex(
|
||||||
b'123?')
|
ValueError,
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
|
"^'decimal' codec can't encode character",
|
||||||
b'123')
|
unicode_encodedecimal, "123\u20ac", "replace")
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
|
|
||||||
b'123€')
|
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
|
|
||||||
b'123\\u20ac')
|
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
|
|
||||||
b'123? ')
|
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
|
|
||||||
b'123??')
|
|
||||||
self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
|
|
||||||
b'123?0')
|
|
||||||
|
|
||||||
def test_transform_decimal(self):
|
def test_transform_decimal(self):
|
||||||
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
|
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
|
||||||
|
|
|
@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #13093: PyUnicode_EncodeDecimal() doesn't support error handlers
|
||||||
|
different than "strict" anymore. The caller was unable to compute the
|
||||||
|
size of the output buffer: it depends on the error handler.
|
||||||
|
|
||||||
- PEP 3155 / issue #13448: Qualified name for classes and functions.
|
- PEP 3155 / issue #13448: Qualified name for classes and functions.
|
||||||
|
|
||||||
- Issue #13436: Fix a bogus error message when an AST object was passed
|
- Issue #13436: Fix a bogus error message when an AST object was passed
|
||||||
|
|
|
@ -8839,15 +8839,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
char *output,
|
char *output,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *errorHandler = NULL;
|
|
||||||
PyObject *exc = NULL;
|
|
||||||
PyObject *unicode;
|
PyObject *unicode;
|
||||||
const char *encoding = "decimal";
|
Py_ssize_t i;
|
||||||
const char *reason = "invalid decimal Unicode string";
|
|
||||||
/* the following variable is used for caching string comparisons
|
|
||||||
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
|
||||||
int known_errorHandler = -1;
|
|
||||||
Py_ssize_t i, j;
|
|
||||||
enum PyUnicode_Kind kind;
|
enum PyUnicode_Kind kind;
|
||||||
void *data;
|
void *data;
|
||||||
|
|
||||||
|
@ -8860,15 +8853,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
if (unicode == NULL)
|
if (unicode == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (PyUnicode_READY(unicode) < 0)
|
if (PyUnicode_READY(unicode) < 0) {
|
||||||
goto onError;
|
Py_DECREF(unicode);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
kind = PyUnicode_KIND(unicode);
|
kind = PyUnicode_KIND(unicode);
|
||||||
data = PyUnicode_DATA(unicode);
|
data = PyUnicode_DATA(unicode);
|
||||||
|
|
||||||
for (i=0; i < length; ) {
|
for (i=0; i < length; ) {
|
||||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
PyObject *exc;
|
||||||
|
Py_UCS4 ch;
|
||||||
int decimal;
|
int decimal;
|
||||||
Py_ssize_t startpos, endpos;
|
Py_ssize_t startpos;
|
||||||
|
|
||||||
|
ch = PyUnicode_READ(kind, data, i);
|
||||||
|
|
||||||
if (Py_UNICODE_ISSPACE(ch)) {
|
if (Py_UNICODE_ISSPACE(ch)) {
|
||||||
*output++ = ' ';
|
*output++ = ' ';
|
||||||
|
@ -8886,113 +8884,20 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* All other characters are considered unencodable */
|
|
||||||
startpos = i;
|
startpos = i;
|
||||||
endpos = i+1;
|
exc = NULL;
|
||||||
for (; endpos < length; endpos++) {
|
raise_encode_exception(&exc, "decimal", unicode,
|
||||||
ch = PyUnicode_READ(kind, data, endpos);
|
startpos, startpos+1,
|
||||||
if ((0 < ch && ch < 256) ||
|
"invalid decimal Unicode string");
|
||||||
Py_UNICODE_ISSPACE(ch) ||
|
Py_XDECREF(exc);
|
||||||
0 <= Py_UNICODE_TODECIMAL(ch))
|
Py_DECREF(unicode);
|
||||||
break;
|
return -1;
|
||||||
}
|
|
||||||
/* cache callback name lookup
|
|
||||||
* (if not done yet, i.e. it's the first error) */
|
|
||||||
if (known_errorHandler==-1) {
|
|
||||||
if ((errors==NULL) || (!strcmp(errors, "strict")))
|
|
||||||
known_errorHandler = 1;
|
|
||||||
else if (!strcmp(errors, "replace"))
|
|
||||||
known_errorHandler = 2;
|
|
||||||
else if (!strcmp(errors, "ignore"))
|
|
||||||
known_errorHandler = 3;
|
|
||||||
else if (!strcmp(errors, "xmlcharrefreplace"))
|
|
||||||
known_errorHandler = 4;
|
|
||||||
else
|
|
||||||
known_errorHandler = 0;
|
|
||||||
}
|
|
||||||
switch (known_errorHandler) {
|
|
||||||
case 1: /* strict */
|
|
||||||
raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason);
|
|
||||||
goto onError;
|
|
||||||
case 2: /* replace */
|
|
||||||
for (j=startpos; j < endpos; j++)
|
|
||||||
*output++ = '?';
|
|
||||||
i = endpos;
|
|
||||||
break;
|
|
||||||
case 3: /* ignore */
|
|
||||||
i = endpos;
|
|
||||||
break;
|
|
||||||
case 4: /* xmlcharrefreplace */
|
|
||||||
/* generate replacement */
|
|
||||||
for (j=startpos; j < endpos; j++) {
|
|
||||||
ch = PyUnicode_READ(kind, data, i);
|
|
||||||
output += sprintf(output, "&#%d;", (int)ch);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
PyObject *repunicode;
|
|
||||||
Py_ssize_t repsize, newpos, k;
|
|
||||||
enum PyUnicode_Kind repkind;
|
|
||||||
void *repdata;
|
|
||||||
|
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
|
||||||
encoding, reason, unicode, &exc,
|
|
||||||
startpos, endpos, &newpos);
|
|
||||||
if (repunicode == NULL)
|
|
||||||
goto onError;
|
|
||||||
if (!PyUnicode_Check(repunicode)) {
|
|
||||||
/* Byte results not supported, since they have no decimal property. */
|
|
||||||
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
|
|
||||||
Py_DECREF(repunicode);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
if (PyUnicode_READY(repunicode) < 0) {
|
|
||||||
Py_DECREF(repunicode);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
repkind = PyUnicode_KIND(repunicode);
|
|
||||||
repdata = PyUnicode_DATA(repunicode);
|
|
||||||
|
|
||||||
/* generate replacement */
|
|
||||||
repsize = PyUnicode_GET_SIZE(repunicode);
|
|
||||||
for (k=0; k<repsize; k++) {
|
|
||||||
ch = PyUnicode_READ(repkind, repdata, k);
|
|
||||||
if (Py_UNICODE_ISSPACE(ch))
|
|
||||||
*output++ = ' ';
|
|
||||||
else {
|
|
||||||
decimal = Py_UNICODE_TODECIMAL(ch);
|
|
||||||
if (decimal >= 0)
|
|
||||||
*output++ = '0' + decimal;
|
|
||||||
else if (0 < ch && ch < 256)
|
|
||||||
*output++ = (char)ch;
|
|
||||||
else {
|
|
||||||
Py_DECREF(repunicode);
|
|
||||||
raise_encode_exception(&exc, encoding,
|
|
||||||
unicode, startpos, endpos,
|
|
||||||
reason);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
i = newpos;
|
|
||||||
Py_DECREF(repunicode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* 0-terminate the output string */
|
/* 0-terminate the output string */
|
||||||
*output++ = '\0';
|
*output++ = '\0';
|
||||||
Py_XDECREF(exc);
|
|
||||||
Py_XDECREF(errorHandler);
|
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
onError:
|
|
||||||
Py_XDECREF(exc);
|
|
||||||
Py_XDECREF(errorHandler);
|
|
||||||
Py_DECREF(unicode);
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- Helpers ------------------------------------------------------------ */
|
/* --- Helpers ------------------------------------------------------------ */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue