mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Issue #24870: Reuse the new _Py_error_handler enum
Factorize code with the new get_error_handler() function. Add some empty lines for readability.
This commit is contained in:
parent
f96418de05
commit
5014920cb7
1 changed files with 77 additions and 87 deletions
|
@ -293,6 +293,34 @@ static unsigned char ascii_linebreak[] = {
|
||||||
|
|
||||||
#include "clinic/unicodeobject.c.h"
|
#include "clinic/unicodeobject.c.h"
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
_Py_ERROR_UNKNOWN=0,
|
||||||
|
_Py_ERROR_STRICT,
|
||||||
|
_Py_ERROR_SURROGATEESCAPE,
|
||||||
|
_Py_ERROR_REPLACE,
|
||||||
|
_Py_ERROR_IGNORE,
|
||||||
|
_Py_ERROR_XMLCHARREFREPLACE,
|
||||||
|
_Py_ERROR_OTHER
|
||||||
|
} _Py_error_handler;
|
||||||
|
|
||||||
|
static _Py_error_handler
|
||||||
|
get_error_handler(const char *errors)
|
||||||
|
{
|
||||||
|
if (errors == NULL)
|
||||||
|
return _Py_ERROR_STRICT;
|
||||||
|
if (strcmp(errors, "strict") == 0)
|
||||||
|
return _Py_ERROR_STRICT;
|
||||||
|
if (strcmp(errors, "surrogateescape") == 0)
|
||||||
|
return _Py_ERROR_SURROGATEESCAPE;
|
||||||
|
if (strcmp(errors, "ignore") == 0)
|
||||||
|
return _Py_ERROR_IGNORE;
|
||||||
|
if (strcmp(errors, "replace") == 0)
|
||||||
|
return _Py_ERROR_REPLACE;
|
||||||
|
if (strcmp(errors, "xmlcharrefreplace") == 0)
|
||||||
|
return _Py_ERROR_XMLCHARREFREPLACE;
|
||||||
|
return _Py_ERROR_OTHER;
|
||||||
|
}
|
||||||
|
|
||||||
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
|
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
|
||||||
This function is kept for backward compatibility with the old API. */
|
This function is kept for backward compatibility with the old API. */
|
||||||
Py_UNICODE
|
Py_UNICODE
|
||||||
|
@ -3163,24 +3191,22 @@ wcstombs_errorpos(const wchar_t *wstr)
|
||||||
static int
|
static int
|
||||||
locale_error_handler(const char *errors, int *surrogateescape)
|
locale_error_handler(const char *errors, int *surrogateescape)
|
||||||
{
|
{
|
||||||
if (errors == NULL) {
|
_Py_error_handler error_handler = get_error_handler(errors);
|
||||||
|
switch (error_handler)
|
||||||
|
{
|
||||||
|
case _Py_ERROR_STRICT:
|
||||||
*surrogateescape = 0;
|
*surrogateescape = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
case _Py_ERROR_SURROGATEESCAPE:
|
||||||
|
|
||||||
if (strcmp(errors, "strict") == 0) {
|
|
||||||
*surrogateescape = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (strcmp(errors, "surrogateescape") == 0) {
|
|
||||||
*surrogateescape = 1;
|
*surrogateescape = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
default:
|
||||||
PyErr_Format(PyExc_ValueError,
|
PyErr_Format(PyExc_ValueError,
|
||||||
"only 'strict' and 'surrogateescape' error handlers "
|
"only 'strict' and 'surrogateescape' error handlers "
|
||||||
"are supported, not '%s'",
|
"are supported, not '%s'",
|
||||||
errors);
|
errors);
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -6403,11 +6429,9 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_ssize_t ressize;
|
Py_ssize_t ressize;
|
||||||
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
|
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
|
||||||
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
|
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *error_handler_obj = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
/* the following variable is used for caching string comparisons
|
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||||
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
|
||||||
int known_errorHandler = -1;
|
|
||||||
|
|
||||||
if (PyUnicode_READY(unicode) == -1)
|
if (PyUnicode_READY(unicode) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -6441,32 +6465,28 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_ssize_t collstart = pos;
|
Py_ssize_t collstart = pos;
|
||||||
Py_ssize_t collend = pos;
|
Py_ssize_t collend = pos;
|
||||||
/* find all unecodable characters */
|
/* find all unecodable characters */
|
||||||
|
|
||||||
while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
|
while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
|
||||||
++collend;
|
++collend;
|
||||||
|
|
||||||
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
||||||
if (known_errorHandler==-1) {
|
if (error_handler == _Py_ERROR_UNKNOWN)
|
||||||
if ((errors==NULL) || (!strcmp(errors, "strict")))
|
error_handler = get_error_handler(errors);
|
||||||
known_errorHandler = 1;
|
|
||||||
else if (!strcmp(errors, "replace"))
|
switch (error_handler) {
|
||||||
known_errorHandler = 2;
|
case _Py_ERROR_STRICT:
|
||||||
else if (!strcmp(errors, "ignore"))
|
|
||||||
known_errorHandler = 3;
|
|
||||||
else if (!strcmp(errors, "xmlcharrefreplace"))
|
|
||||||
known_errorHandler = 4;
|
|
||||||
else
|
|
||||||
known_errorHandler = 0;
|
|
||||||
}
|
|
||||||
switch (known_errorHandler) {
|
|
||||||
case 1: /* strict */
|
|
||||||
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
|
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
|
||||||
goto onError;
|
goto onError;
|
||||||
case 2: /* replace */
|
|
||||||
|
case _Py_ERROR_REPLACE:
|
||||||
while (collstart++ < collend)
|
while (collstart++ < collend)
|
||||||
*str++ = '?'; /* fall through */
|
*str++ = '?';
|
||||||
case 3: /* ignore */
|
/* fall through */
|
||||||
|
case _Py_ERROR_IGNORE:
|
||||||
pos = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
case 4: /* xmlcharrefreplace */
|
|
||||||
|
case _Py_ERROR_XMLCHARREFREPLACE:
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
requiredsize = respos;
|
requiredsize = respos;
|
||||||
/* determine replacement size */
|
/* determine replacement size */
|
||||||
|
@ -6510,8 +6530,9 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
}
|
}
|
||||||
pos = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj,
|
||||||
encoding, reason, unicode, &exc,
|
encoding, reason, unicode, &exc,
|
||||||
collstart, collend, &newpos);
|
collstart, collend, &newpos);
|
||||||
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
||||||
|
@ -6587,7 +6608,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(error_handler_obj);
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
@ -6597,7 +6618,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
Py_XDECREF(res);
|
Py_XDECREF(res);
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(error_handler_obj);
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -6644,28 +6665,6 @@ PyUnicode_AsLatin1String(PyObject *unicode)
|
||||||
|
|
||||||
/* --- 7-bit ASCII Codec -------------------------------------------------- */
|
/* --- 7-bit ASCII Codec -------------------------------------------------- */
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
_Py_ERROR_UNKNOWN=0,
|
|
||||||
_Py_ERROR_SURROGATEESCAPE,
|
|
||||||
_Py_ERROR_REPLACE,
|
|
||||||
_Py_ERROR_IGNORE,
|
|
||||||
_Py_ERROR_OTHER
|
|
||||||
} _Py_error_handler;
|
|
||||||
|
|
||||||
static _Py_error_handler
|
|
||||||
get_error_handler(const char *errors)
|
|
||||||
{
|
|
||||||
if (errors == NULL)
|
|
||||||
return _Py_ERROR_OTHER;
|
|
||||||
if (strcmp(errors, "surrogateescape") == 0)
|
|
||||||
return _Py_ERROR_SURROGATEESCAPE;
|
|
||||||
if (strcmp(errors, "ignore") == 0)
|
|
||||||
return _Py_ERROR_IGNORE;
|
|
||||||
if (strcmp(errors, "replace") == 0)
|
|
||||||
return _Py_ERROR_REPLACE;
|
|
||||||
return _Py_ERROR_OTHER;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_DecodeASCII(const char *s,
|
PyUnicode_DecodeASCII(const char *s,
|
||||||
Py_ssize_t size,
|
Py_ssize_t size,
|
||||||
|
@ -8129,7 +8128,7 @@ static int
|
||||||
charmap_encoding_error(
|
charmap_encoding_error(
|
||||||
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
|
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
|
||||||
PyObject **exceptionObject,
|
PyObject **exceptionObject,
|
||||||
int *known_errorHandler, PyObject **errorHandler, const char *errors,
|
_Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
|
||||||
PyObject **res, Py_ssize_t *respos)
|
PyObject **res, Py_ssize_t *respos)
|
||||||
{
|
{
|
||||||
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
|
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
|
||||||
|
@ -8176,23 +8175,15 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
/* cache callback name lookup
|
/* cache callback name lookup
|
||||||
* (if not done yet, i.e. it's the first error) */
|
* (if not done yet, i.e. it's the first error) */
|
||||||
if (*known_errorHandler==-1) {
|
if (*error_handler == _Py_ERROR_UNKNOWN)
|
||||||
if ((errors==NULL) || (!strcmp(errors, "strict")))
|
*error_handler = get_error_handler(errors);
|
||||||
*known_errorHandler = 1;
|
|
||||||
else if (!strcmp(errors, "replace"))
|
switch (*error_handler) {
|
||||||
*known_errorHandler = 2;
|
case _Py_ERROR_STRICT:
|
||||||
else if (!strcmp(errors, "ignore"))
|
|
||||||
*known_errorHandler = 3;
|
|
||||||
else if (!strcmp(errors, "xmlcharrefreplace"))
|
|
||||||
*known_errorHandler = 4;
|
|
||||||
else
|
|
||||||
*known_errorHandler = 0;
|
|
||||||
}
|
|
||||||
switch (*known_errorHandler) {
|
|
||||||
case 1: /* strict */
|
|
||||||
raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
||||||
return -1;
|
return -1;
|
||||||
case 2: /* replace */
|
|
||||||
|
case _Py_ERROR_REPLACE:
|
||||||
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
|
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
|
||||||
x = charmapencode_output('?', mapping, res, respos);
|
x = charmapencode_output('?', mapping, res, respos);
|
||||||
if (x==enc_EXCEPTION) {
|
if (x==enc_EXCEPTION) {
|
||||||
|
@ -8204,10 +8195,11 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case 3: /* ignore */
|
case _Py_ERROR_IGNORE:
|
||||||
*inpos = collendpos;
|
*inpos = collendpos;
|
||||||
break;
|
break;
|
||||||
case 4: /* xmlcharrefreplace */
|
|
||||||
|
case _Py_ERROR_XMLCHARREFREPLACE:
|
||||||
/* generate replacement (temporarily (mis)uses p) */
|
/* generate replacement (temporarily (mis)uses p) */
|
||||||
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
|
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
|
||||||
char buffer[2+29+1+1];
|
char buffer[2+29+1+1];
|
||||||
|
@ -8225,8 +8217,9 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
*inpos = collendpos;
|
*inpos = collendpos;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
|
repunicode = unicode_encode_call_errorhandler(errors, error_handler_obj,
|
||||||
encoding, reason, unicode, exceptionObject,
|
encoding, reason, unicode, exceptionObject,
|
||||||
collstartpos, collendpos, &newpos);
|
collstartpos, collendpos, &newpos);
|
||||||
if (repunicode == NULL)
|
if (repunicode == NULL)
|
||||||
|
@ -8289,12 +8282,9 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
|
||||||
Py_ssize_t size;
|
Py_ssize_t size;
|
||||||
/* current output position */
|
/* current output position */
|
||||||
Py_ssize_t respos = 0;
|
Py_ssize_t respos = 0;
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *error_handler_obj = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
/* the following variable is used for caching string comparisons
|
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||||
* -1=not initialized, 0=unknown, 1=strict, 2=replace,
|
|
||||||
* 3=ignore, 4=xmlcharrefreplace */
|
|
||||||
int known_errorHandler = -1;
|
|
||||||
void *data;
|
void *data;
|
||||||
int kind;
|
int kind;
|
||||||
|
|
||||||
|
@ -8325,7 +8315,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
|
||||||
if (x==enc_FAILED) { /* unencodable character */
|
if (x==enc_FAILED) { /* unencodable character */
|
||||||
if (charmap_encoding_error(unicode, &inpos, mapping,
|
if (charmap_encoding_error(unicode, &inpos, mapping,
|
||||||
&exc,
|
&exc,
|
||||||
&known_errorHandler, &errorHandler, errors,
|
&error_handler, &error_handler_obj, errors,
|
||||||
&res, &respos)) {
|
&res, &respos)) {
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -8341,13 +8331,13 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(error_handler_obj);
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
Py_XDECREF(res);
|
Py_XDECREF(res);
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(error_handler_obj);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue