mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Issue #25227: Cleanup unicode_encode_ucs1() error handler
* Change limit type from unsigned int to Py_UCS4, to use the same type than the "ch" variable (an Unicode character). * Reuse ch variable for _Py_ERROR_XMLCHARREFREPLACE * Add some newlines for readability
This commit is contained in:
parent
1e5fcc3dea
commit
0030cd52da
1 changed files with 13 additions and 9 deletions
|
@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode_ucs1(PyObject *unicode,
|
unicode_encode_ucs1(PyObject *unicode,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
unsigned int limit)
|
const Py_UCS4 limit)
|
||||||
{
|
{
|
||||||
/* input state */
|
/* input state */
|
||||||
Py_ssize_t pos=0, size;
|
Py_ssize_t pos=0, size;
|
||||||
|
@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
ressize = size;
|
ressize = size;
|
||||||
|
|
||||||
while (pos < size) {
|
while (pos < size) {
|
||||||
Py_UCS4 c = PyUnicode_READ(kind, data, pos);
|
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
|
||||||
|
|
||||||
/* can we encode this? */
|
/* can we encode this? */
|
||||||
if (c<limit) {
|
if (ch < limit) {
|
||||||
/* no overflow check, because we know that the space is enough */
|
/* no overflow check, because we know that the space is enough */
|
||||||
*str++ = (char)c;
|
*str++ = (char)ch;
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
case _Py_ERROR_REPLACE:
|
case _Py_ERROR_REPLACE:
|
||||||
while (collstart++ < collend)
|
while (collstart++ < collend)
|
||||||
*str++ = '?';
|
*str++ = '?';
|
||||||
/* fall through */
|
/* fall through ignore error handler */
|
||||||
case _Py_ERROR_IGNORE:
|
case _Py_ERROR_IGNORE:
|
||||||
pos = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
|
@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
requiredsize = respos;
|
requiredsize = respos;
|
||||||
/* determine replacement size */
|
/* determine replacement size */
|
||||||
for (i = collstart; i < collend; ++i) {
|
for (i = collstart; i < collend; ++i) {
|
||||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
|
||||||
Py_ssize_t incr;
|
Py_ssize_t incr;
|
||||||
|
|
||||||
|
ch = PyUnicode_READ(kind, data, i);
|
||||||
if (ch < 10)
|
if (ch < 10)
|
||||||
incr = 2+1+1;
|
incr = 2+1+1;
|
||||||
else if (ch < 100)
|
else if (ch < 100)
|
||||||
|
@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
||||||
PyUnicode_READY(repunicode) == -1))
|
PyUnicode_READY(repunicode) == -1))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
if (PyBytes_Check(repunicode)) {
|
if (PyBytes_Check(repunicode)) {
|
||||||
/* Directly copy bytes result to output. */
|
/* Directly copy bytes result to output. */
|
||||||
repsize = PyBytes_Size(repunicode);
|
repsize = PyBytes_Size(repunicode);
|
||||||
|
@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* need more space? (at least enough for what we
|
/* need more space? (at least enough for what we
|
||||||
have+the replacement+the rest of the string, so
|
have+the replacement+the rest of the string, so
|
||||||
we won't have to check space for encodable characters) */
|
we won't have to check space for encodable characters) */
|
||||||
|
@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
str = PyBytes_AS_STRING(res) + respos;
|
str = PyBytes_AS_STRING(res) + respos;
|
||||||
ressize = requiredsize;
|
ressize = requiredsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if there is anything unencodable in the replacement
|
/* check if there is anything unencodable in the replacement
|
||||||
and copy it to the output */
|
and copy it to the output */
|
||||||
for (i = 0; repsize-->0; ++i, ++str) {
|
for (i = 0; repsize-->0; ++i, ++str) {
|
||||||
c = PyUnicode_READ_CHAR(repunicode, i);
|
ch = PyUnicode_READ_CHAR(repunicode, i);
|
||||||
if (c >= limit) {
|
if (ch >= limit) {
|
||||||
raise_encode_exception(&exc, encoding, unicode,
|
raise_encode_exception(&exc, encoding, unicode,
|
||||||
pos, pos+1, reason);
|
pos, pos+1, reason);
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
*str = (char)c;
|
*str = (char)ch;
|
||||||
}
|
}
|
||||||
pos = newpos;
|
pos = newpos;
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue