mirror of
https://github.com/python/cpython.git
synced 2025-07-16 07:45:20 +00:00
Issue #25227: Cleanup unicode_encode_ucs1() error handler
* Change limit type from unsigned int to Py_UCS4, to use the same type than the "ch" variable (an Unicode character). * Reuse ch variable for _Py_ERROR_XMLCHARREFREPLACE * Add some newlines for readability
This commit is contained in:
parent
1e5fcc3dea
commit
0030cd52da
1 changed files with 13 additions and 9 deletions
|
@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors,
|
|||
static PyObject *
|
||||
unicode_encode_ucs1(PyObject *unicode,
|
||||
const char *errors,
|
||||
unsigned int limit)
|
||||
const Py_UCS4 limit)
|
||||
{
|
||||
/* input state */
|
||||
Py_ssize_t pos=0, size;
|
||||
|
@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
ressize = size;
|
||||
|
||||
while (pos < size) {
|
||||
Py_UCS4 c = PyUnicode_READ(kind, data, pos);
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
|
||||
|
||||
/* can we encode this? */
|
||||
if (c<limit) {
|
||||
if (ch < limit) {
|
||||
/* no overflow check, because we know that the space is enough */
|
||||
*str++ = (char)c;
|
||||
*str++ = (char)ch;
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
|
@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
case _Py_ERROR_REPLACE:
|
||||
while (collstart++ < collend)
|
||||
*str++ = '?';
|
||||
/* fall through */
|
||||
/* fall through ignore error handler */
|
||||
case _Py_ERROR_IGNORE:
|
||||
pos = collend;
|
||||
break;
|
||||
|
@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
requiredsize = respos;
|
||||
/* determine replacement size */
|
||||
for (i = collstart; i < collend; ++i) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
Py_ssize_t incr;
|
||||
|
||||
ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch < 10)
|
||||
incr = 2+1+1;
|
||||
else if (ch < 100)
|
||||
|
@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
||||
PyUnicode_READY(repunicode) == -1))
|
||||
goto onError;
|
||||
|
||||
if (PyBytes_Check(repunicode)) {
|
||||
/* Directly copy bytes result to output. */
|
||||
repsize = PyBytes_Size(repunicode);
|
||||
|
@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
Py_DECREF(repunicode);
|
||||
break;
|
||||
}
|
||||
|
||||
/* need more space? (at least enough for what we
|
||||
have+the replacement+the rest of the string, so
|
||||
we won't have to check space for encodable characters) */
|
||||
|
@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
str = PyBytes_AS_STRING(res) + respos;
|
||||
ressize = requiredsize;
|
||||
}
|
||||
|
||||
/* check if there is anything unencodable in the replacement
|
||||
and copy it to the output */
|
||||
for (i = 0; repsize-->0; ++i, ++str) {
|
||||
c = PyUnicode_READ_CHAR(repunicode, i);
|
||||
if (c >= limit) {
|
||||
ch = PyUnicode_READ_CHAR(repunicode, i);
|
||||
if (ch >= limit) {
|
||||
raise_encode_exception(&exc, encoding, unicode,
|
||||
pos, pos+1, reason);
|
||||
Py_DECREF(repunicode);
|
||||
goto onError;
|
||||
}
|
||||
*str = (char)c;
|
||||
*str = (char)ch;
|
||||
}
|
||||
pos = newpos;
|
||||
Py_DECREF(repunicode);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue