Issue #24870: Reuse the new _Py_error_handler enum

Factorize code with the new get_error_handler() function.

Add some empty lines for readability.
This commit is contained in:
Victor Stinner 2015-09-22 00:26:54 +02:00
parent f96418de05
commit 5014920cb7

View file

@ -293,6 +293,34 @@ static unsigned char ascii_linebreak[] = {
#include "clinic/unicodeobject.c.h" #include "clinic/unicodeobject.c.h"
typedef enum {
_Py_ERROR_UNKNOWN=0,
_Py_ERROR_STRICT,
_Py_ERROR_SURROGATEESCAPE,
_Py_ERROR_REPLACE,
_Py_ERROR_IGNORE,
_Py_ERROR_XMLCHARREFREPLACE,
_Py_ERROR_OTHER
} _Py_error_handler;
static _Py_error_handler
get_error_handler(const char *errors)
{
if (errors == NULL)
return _Py_ERROR_STRICT;
if (strcmp(errors, "strict") == 0)
return _Py_ERROR_STRICT;
if (strcmp(errors, "surrogateescape") == 0)
return _Py_ERROR_SURROGATEESCAPE;
if (strcmp(errors, "ignore") == 0)
return _Py_ERROR_IGNORE;
if (strcmp(errors, "replace") == 0)
return _Py_ERROR_REPLACE;
if (strcmp(errors, "xmlcharrefreplace") == 0)
return _Py_ERROR_XMLCHARREFREPLACE;
return _Py_ERROR_OTHER;
}
/* The max unicode value is always 0x10FFFF while using the PEP-393 API. /* The max unicode value is always 0x10FFFF while using the PEP-393 API.
This function is kept for backward compatibility with the old API. */ This function is kept for backward compatibility with the old API. */
Py_UNICODE Py_UNICODE
@ -3163,24 +3191,22 @@ wcstombs_errorpos(const wchar_t *wstr)
static int static int
locale_error_handler(const char *errors, int *surrogateescape) locale_error_handler(const char *errors, int *surrogateescape)
{ {
if (errors == NULL) { _Py_error_handler error_handler = get_error_handler(errors);
switch (error_handler)
{
case _Py_ERROR_STRICT:
*surrogateescape = 0; *surrogateescape = 0;
return 0; return 0;
} case _Py_ERROR_SURROGATEESCAPE:
if (strcmp(errors, "strict") == 0) {
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "surrogateescape") == 0) {
*surrogateescape = 1; *surrogateescape = 1;
return 0; return 0;
} default:
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"only 'strict' and 'surrogateescape' error handlers " "only 'strict' and 'surrogateescape' error handlers "
"are supported, not '%s'", "are supported, not '%s'",
errors); errors);
return -1; return -1;
}
} }
PyObject * PyObject *
@ -6403,11 +6429,9 @@ unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t ressize; Py_ssize_t ressize;
const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *encoding = (limit == 256) ? "latin-1" : "ascii";
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
PyObject *errorHandler = NULL; PyObject *error_handler_obj = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
/* the following variable is used for caching string comparisons _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
if (PyUnicode_READY(unicode) == -1) if (PyUnicode_READY(unicode) == -1)
return NULL; return NULL;
@ -6441,32 +6465,28 @@ unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t collstart = pos; Py_ssize_t collstart = pos;
Py_ssize_t collend = pos; Py_ssize_t collend = pos;
/* find all unecodable characters */ /* find all unecodable characters */
while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
++collend; ++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */ /* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) { if (error_handler == _Py_ERROR_UNKNOWN)
if ((errors==NULL) || (!strcmp(errors, "strict"))) error_handler = get_error_handler(errors);
known_errorHandler = 1;
else if (!strcmp(errors, "replace")) switch (error_handler) {
known_errorHandler = 2; case _Py_ERROR_STRICT:
else if (!strcmp(errors, "ignore"))
known_errorHandler = 3;
else if (!strcmp(errors, "xmlcharrefreplace"))
known_errorHandler = 4;
else
known_errorHandler = 0;
}
switch (known_errorHandler) {
case 1: /* strict */
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
goto onError; goto onError;
case 2: /* replace */
case _Py_ERROR_REPLACE:
while (collstart++ < collend) while (collstart++ < collend)
*str++ = '?'; /* fall through */ *str++ = '?';
case 3: /* ignore */ /* fall through */
case _Py_ERROR_IGNORE:
pos = collend; pos = collend;
break; break;
case 4: /* xmlcharrefreplace */
case _Py_ERROR_XMLCHARREFREPLACE:
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
requiredsize = respos; requiredsize = respos;
/* determine replacement size */ /* determine replacement size */
@ -6510,8 +6530,9 @@ unicode_encode_ucs1(PyObject *unicode,
} }
pos = collend; pos = collend;
break; break;
default: default:
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj,
encoding, reason, unicode, &exc, encoding, reason, unicode, &exc,
collstart, collend, &newpos); collstart, collend, &newpos);
if (repunicode == NULL || (PyUnicode_Check(repunicode) && if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
@ -6587,7 +6608,7 @@ unicode_encode_ucs1(PyObject *unicode,
goto onError; goto onError;
} }
Py_XDECREF(errorHandler); Py_XDECREF(error_handler_obj);
Py_XDECREF(exc); Py_XDECREF(exc);
return res; return res;
@ -6597,7 +6618,7 @@ unicode_encode_ucs1(PyObject *unicode,
onError: onError:
Py_XDECREF(res); Py_XDECREF(res);
Py_XDECREF(errorHandler); Py_XDECREF(error_handler_obj);
Py_XDECREF(exc); Py_XDECREF(exc);
return NULL; return NULL;
} }
@ -6644,28 +6665,6 @@ PyUnicode_AsLatin1String(PyObject *unicode)
/* --- 7-bit ASCII Codec -------------------------------------------------- */ /* --- 7-bit ASCII Codec -------------------------------------------------- */
typedef enum {
_Py_ERROR_UNKNOWN=0,
_Py_ERROR_SURROGATEESCAPE,
_Py_ERROR_REPLACE,
_Py_ERROR_IGNORE,
_Py_ERROR_OTHER
} _Py_error_handler;
static _Py_error_handler
get_error_handler(const char *errors)
{
if (errors == NULL)
return _Py_ERROR_OTHER;
if (strcmp(errors, "surrogateescape") == 0)
return _Py_ERROR_SURROGATEESCAPE;
if (strcmp(errors, "ignore") == 0)
return _Py_ERROR_IGNORE;
if (strcmp(errors, "replace") == 0)
return _Py_ERROR_REPLACE;
return _Py_ERROR_OTHER;
}
PyObject * PyObject *
PyUnicode_DecodeASCII(const char *s, PyUnicode_DecodeASCII(const char *s,
Py_ssize_t size, Py_ssize_t size,
@ -8129,7 +8128,7 @@ static int
charmap_encoding_error( charmap_encoding_error(
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping, PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject, PyObject **exceptionObject,
int *known_errorHandler, PyObject **errorHandler, const char *errors, _Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
PyObject **res, Py_ssize_t *respos) PyObject **res, Py_ssize_t *respos)
{ {
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
@ -8176,23 +8175,15 @@ charmap_encoding_error(
} }
/* cache callback name lookup /* cache callback name lookup
* (if not done yet, i.e. it's the first error) */ * (if not done yet, i.e. it's the first error) */
if (*known_errorHandler==-1) { if (*error_handler == _Py_ERROR_UNKNOWN)
if ((errors==NULL) || (!strcmp(errors, "strict"))) *error_handler = get_error_handler(errors);
*known_errorHandler = 1;
else if (!strcmp(errors, "replace")) switch (*error_handler) {
*known_errorHandler = 2; case _Py_ERROR_STRICT:
else if (!strcmp(errors, "ignore"))
*known_errorHandler = 3;
else if (!strcmp(errors, "xmlcharrefreplace"))
*known_errorHandler = 4;
else
*known_errorHandler = 0;
}
switch (*known_errorHandler) {
case 1: /* strict */
raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1; return -1;
case 2: /* replace */
case _Py_ERROR_REPLACE:
for (collpos = collstartpos; collpos<collendpos; ++collpos) { for (collpos = collstartpos; collpos<collendpos; ++collpos) {
x = charmapencode_output('?', mapping, res, respos); x = charmapencode_output('?', mapping, res, respos);
if (x==enc_EXCEPTION) { if (x==enc_EXCEPTION) {
@ -8204,10 +8195,11 @@ charmap_encoding_error(
} }
} }
/* fall through */ /* fall through */
case 3: /* ignore */ case _Py_ERROR_IGNORE:
*inpos = collendpos; *inpos = collendpos;
break; break;
case 4: /* xmlcharrefreplace */
case _Py_ERROR_XMLCHARREFREPLACE:
/* generate replacement (temporarily (mis)uses p) */ /* generate replacement (temporarily (mis)uses p) */
for (collpos = collstartpos; collpos < collendpos; ++collpos) { for (collpos = collstartpos; collpos < collendpos; ++collpos) {
char buffer[2+29+1+1]; char buffer[2+29+1+1];
@ -8225,8 +8217,9 @@ charmap_encoding_error(
} }
*inpos = collendpos; *inpos = collendpos;
break; break;
default: default:
repunicode = unicode_encode_call_errorhandler(errors, errorHandler, repunicode = unicode_encode_call_errorhandler(errors, error_handler_obj,
encoding, reason, unicode, exceptionObject, encoding, reason, unicode, exceptionObject,
collstartpos, collendpos, &newpos); collstartpos, collendpos, &newpos);
if (repunicode == NULL) if (repunicode == NULL)
@ -8289,12 +8282,9 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
Py_ssize_t size; Py_ssize_t size;
/* current output position */ /* current output position */
Py_ssize_t respos = 0; Py_ssize_t respos = 0;
PyObject *errorHandler = NULL; PyObject *error_handler_obj = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
/* the following variable is used for caching string comparisons _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
* -1=not initialized, 0=unknown, 1=strict, 2=replace,
* 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
void *data; void *data;
int kind; int kind;
@ -8325,7 +8315,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
if (x==enc_FAILED) { /* unencodable character */ if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(unicode, &inpos, mapping, if (charmap_encoding_error(unicode, &inpos, mapping,
&exc, &exc,
&known_errorHandler, &errorHandler, errors, &error_handler, &error_handler_obj, errors,
&res, &respos)) { &res, &respos)) {
goto onError; goto onError;
} }
@ -8341,13 +8331,13 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
goto onError; goto onError;
Py_XDECREF(exc); Py_XDECREF(exc);
Py_XDECREF(errorHandler); Py_XDECREF(error_handler_obj);
return res; return res;
onError: onError:
Py_XDECREF(res); Py_XDECREF(res);
Py_XDECREF(exc); Py_XDECREF(exc);
Py_XDECREF(errorHandler); Py_XDECREF(error_handler_obj);
return NULL; return NULL;
} }