mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-129173: refactor PyCodec_ReplaceErrors
into separate functions (#129893)
The logic of `PyCodec_ReplaceErrors` is now split into separate functions, each of which handling a specific exception type.
This commit is contained in:
parent
4d3a7ea354
commit
fa6a8140dd
1 changed files with 83 additions and 40 deletions
123
Python/codecs.c
123
Python/codecs.c
|
@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create a Unicode string containing 'count' copies of the official
|
||||
* Unicode REPLACEMENT CHARACTER (0xFFFD).
|
||||
*/
|
||||
static PyObject *
|
||||
codec_handler_unicode_replacement_character(Py_ssize_t count)
|
||||
{
|
||||
PyObject *res = PyUnicode_New(count, Py_UNICODE_REPLACEMENT_CHARACTER);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
assert(count == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
|
||||
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
|
||||
for (Py_ssize_t i = 0; i < count; ++i) {
|
||||
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
// --- handler: 'strict' ------------------------------------------------------
|
||||
|
||||
PyObject *PyCodec_StrictErrors(PyObject *exc)
|
||||
|
@ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
|
|||
}
|
||||
|
||||
|
||||
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||
// --- handler: 'replace' -----------------------------------------------------
|
||||
|
||||
static PyObject *
|
||||
_PyCodec_ReplaceUnicodeEncodeError(PyObject *exc)
|
||||
{
|
||||
Py_ssize_t start, end, slen;
|
||||
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
|
||||
&start, &end, &slen, false) < 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
PyObject *res = PyUnicode_New(slen, '?');
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
|
||||
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
|
||||
memset(outp, '?', sizeof(Py_UCS1) * slen);
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
|
||||
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
|
||||
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
|
||||
&start, &end, &slen, false) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *res = PyUnicode_New(slen, '?');
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
|
||||
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
|
||||
memset(outp, '?', sizeof(Py_UCS1) * slen);
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
|
||||
static PyObject *
|
||||
_PyCodec_ReplaceUnicodeDecodeError(PyObject *exc)
|
||||
{
|
||||
Py_ssize_t end;
|
||||
if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
|
||||
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
|
||||
NULL, &end, NULL, true) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return Py_BuildValue("(Cn)",
|
||||
(int)Py_UNICODE_REPLACEMENT_CHARACTER,
|
||||
end);
|
||||
PyObject *res = codec_handler_unicode_replacement_character(1);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
|
||||
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
|
||||
&start, &end, &slen, false) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
|
||||
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
|
||||
for (Py_ssize_t i = 0; i < slen; ++i) {
|
||||
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
_PyCodec_ReplaceUnicodeTranslateError(PyObject *exc)
|
||||
{
|
||||
Py_ssize_t start, end, slen;
|
||||
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
|
||||
&start, &end, &slen, false) < 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
PyObject *res = codec_handler_unicode_replacement_character(slen);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||
{
|
||||
if (_PyIsUnicodeEncodeError(exc)) {
|
||||
return _PyCodec_ReplaceUnicodeEncodeError(exc);
|
||||
}
|
||||
else if (_PyIsUnicodeDecodeError(exc)) {
|
||||
return _PyCodec_ReplaceUnicodeDecodeError(exc);
|
||||
}
|
||||
else if (_PyIsUnicodeTranslateError(exc)) {
|
||||
return _PyCodec_ReplaceUnicodeTranslateError(exc);
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
|
@ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
|
|||
}
|
||||
|
||||
|
||||
static PyObject *replace_errors(PyObject *self, PyObject *exc)
|
||||
static inline PyObject *
|
||||
replace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
|
||||
{
|
||||
return PyCodec_ReplaceErrors(exc);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue