gh-129173: refactor PyCodec_BackslashReplaceErrors into separate functions (#129895)

The logic of `PyCodec_BackslashReplaceErrors` is now split into separate functions,
each of which handling a specific exception type.
This commit is contained in:
Bénédikt Tran 2025-03-03 13:58:15 +01:00 committed by GitHub
parent 04091c0833
commit 3146a25e97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -956,49 +956,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
return restuple;
}
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
// --- handler: 'backslashreplace' --------------------------------------------
static PyObject *
_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc)
{
PyObject *obj;
Py_ssize_t objlen, start, end, slen;
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
if (_PyUnicodeError_GetParams(exc,
&obj, &objlen,
&start, &end, &slen, true) < 0)
{
return NULL;
}
PyObject *res = PyUnicode_New(4 * slen, 127);
if (res == NULL) {
Py_DECREF(obj);
return NULL;
}
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
const unsigned char ch = p[i];
outp[0] = '\\';
outp[1] = 'x';
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
outp[3] = Py_hexdigits[ch & 0xf];
}
assert(_PyUnicode_CheckConsistency(res, 1));
Py_DECREF(obj);
return Py_BuildValue("(Nn)", res, end);
}
if (
PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)
|| PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)
) {
if (_PyUnicodeError_GetParams(exc,
&obj, &objlen,
&start, &end, &slen, false) < 0)
{
return NULL;
}
}
else {
wrong_exception_type(exc);
if (_PyUnicodeError_GetParams(exc,
&obj, &objlen,
&start, &end, &slen, false) < 0)
{
return NULL;
}
@ -1035,6 +1004,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
}
static PyObject *
_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc)
{
PyObject *obj;
Py_ssize_t objlen, start, end, slen;
if (_PyUnicodeError_GetParams(exc,
&obj, &objlen,
&start, &end, &slen, true) < 0)
{
return NULL;
}
PyObject *res = PyUnicode_New(4 * slen, 127);
if (res == NULL) {
Py_DECREF(obj);
return NULL;
}
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
const unsigned char ch = p[i];
outp[0] = '\\';
outp[1] = 'x';
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
outp[3] = Py_hexdigits[ch & 0xf];
}
assert(_PyUnicode_CheckConsistency(res, 1));
Py_DECREF(obj);
return Py_BuildValue("(Nn)", res, end);
}
static inline PyObject *
_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc)
{
// Same implementation as for UnicodeEncodeError objects.
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
}
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
if (_PyIsUnicodeEncodeError(exc)) {
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
}
else if (_PyIsUnicodeDecodeError(exc)) {
return _PyCodec_BackslashReplaceUnicodeDecodeError(exc);
}
else if (_PyIsUnicodeTranslateError(exc)) {
return _PyCodec_BackslashReplaceUnicodeTranslateError(exc);
}
else {
wrong_exception_type(exc);
return NULL;
}
}
// --- handler: 'namereplace' -------------------------------------------------
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
@ -1502,7 +1530,8 @@ xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
}
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
static inline PyObject *
backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
{
return PyCodec_BackslashReplaceErrors(exc);
}