mirror of
https://github.com/python/cpython.git
synced 2025-07-31 23:23:11 +00:00
Slightly revised version of patch #1538956:
Replace UnicodeDecodeErrors raised during == and != compares of Unicode and other objects with a new UnicodeWarning. All other comparisons continue to raise exceptions. Exceptions other than UnicodeDecodeErrors are also left untouched.
This commit is contained in:
parent
e6dd31c50b
commit
040f76b79c
11 changed files with 170 additions and 36 deletions
|
@ -1560,6 +1560,31 @@ They all return \NULL{} or \code{-1} if an exception occurs.
|
||||||
greater than, respectively.
|
greater than, respectively.
|
||||||
\end{cfuncdesc}
|
\end{cfuncdesc}
|
||||||
|
|
||||||
|
\begin{cfuncdesc}{int}{PyUnicode_RichCompare}{PyObject *left,
|
||||||
|
PyObject *right,
|
||||||
|
int op}
|
||||||
|
|
||||||
|
% This entry could use some polishing - my TeX is too
|
||||||
|
% rusty these days... (MAL)
|
||||||
|
|
||||||
|
Rich compare two strings and return one of the following:
|
||||||
|
\begin{verbatim}
|
||||||
|
- NULL in case an exception was raised
|
||||||
|
- Py_True or Py_False for successfuly comparisons
|
||||||
|
- Py_NotImplemented in case the type combination is unknown
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
|
||||||
|
case the conversion of the arguments to Unicode fails with a
|
||||||
|
UnicodeDecodeError.
|
||||||
|
|
||||||
|
Possible values for \var{op}:
|
||||||
|
\begin{verbatim}
|
||||||
|
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\end{cfuncdesc}
|
||||||
|
|
||||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format,
|
\begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format,
|
||||||
PyObject *args}
|
PyObject *args}
|
||||||
Return a new string object from \var{format} and \var{args}; this
|
Return a new string object from \var{format} and \var{args}; this
|
||||||
|
|
|
@ -288,10 +288,11 @@ for each thread.
|
||||||
names are \samp{PyExc_} followed by the Python exception name.
|
names are \samp{PyExc_} followed by the Python exception name.
|
||||||
These have the type \ctype{PyObject*}; they are all class objects.
|
These have the type \ctype{PyObject*}; they are all class objects.
|
||||||
Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning},
|
Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning},
|
||||||
\cdata{PyExc_DeprecationWarning}, \cdata{PyExc_SyntaxWarning},
|
\cdata{PyExc_UnicodeWarning}, \cdata{PyExc_DeprecationWarning},
|
||||||
\cdata{PyExc_RuntimeWarning}, and \cdata{PyExc_FutureWarning}.
|
\cdata{PyExc_SyntaxWarning}, \cdata{PyExc_RuntimeWarning}, and
|
||||||
\cdata{PyExc_Warning} is a subclass of \cdata{PyExc_Exception}; the
|
\cdata{PyExc_FutureWarning}. \cdata{PyExc_Warning} is a subclass of
|
||||||
other warning categories are subclasses of \cdata{PyExc_Warning}.
|
\cdata{PyExc_Exception}; the other warning categories are subclasses
|
||||||
|
of \cdata{PyExc_Warning}.
|
||||||
|
|
||||||
For information about warning control, see the documentation for the
|
For information about warning control, see the documentation for the
|
||||||
\module{warnings} module and the \programopt{-W} option in the
|
\module{warnings} module and the \programopt{-W} option in the
|
||||||
|
|
|
@ -456,6 +456,11 @@ Base class for warnings about probable mistakes in module imports.
|
||||||
\versionadded{2.5}
|
\versionadded{2.5}
|
||||||
\end{excdesc}
|
\end{excdesc}
|
||||||
|
|
||||||
|
\begin{excdesc}{UnicodeWarning}
|
||||||
|
Base class for warnings related to Unicode.
|
||||||
|
\versionadded{2.5}
|
||||||
|
\end{excdesc}
|
||||||
|
|
||||||
The class hierarchy for built-in exceptions is:
|
The class hierarchy for built-in exceptions is:
|
||||||
|
|
||||||
\verbatiminput{../../Lib/test/exception_hierarchy.txt}
|
\verbatiminput{../../Lib/test/exception_hierarchy.txt}
|
||||||
|
|
|
@ -76,6 +76,9 @@ features that will be deprecated in the future (ignored by default).}
|
||||||
|
|
||||||
\lineii{ImportWarning}{Base category for warnings triggered during the
|
\lineii{ImportWarning}{Base category for warnings triggered during the
|
||||||
process of importing a module (ignored by default).}
|
process of importing a module (ignored by default).}
|
||||||
|
|
||||||
|
\lineii{UnicodeWarning}{Base category for warnings related to Unicode.}
|
||||||
|
|
||||||
\end{tableii}
|
\end{tableii}
|
||||||
|
|
||||||
While these are technically built-in exceptions, they are documented
|
While these are technically built-in exceptions, they are documented
|
||||||
|
|
|
@ -173,6 +173,7 @@ PyAPI_DATA(PyObject *) PyExc_SyntaxWarning;
|
||||||
PyAPI_DATA(PyObject *) PyExc_RuntimeWarning;
|
PyAPI_DATA(PyObject *) PyExc_RuntimeWarning;
|
||||||
PyAPI_DATA(PyObject *) PyExc_FutureWarning;
|
PyAPI_DATA(PyObject *) PyExc_FutureWarning;
|
||||||
PyAPI_DATA(PyObject *) PyExc_ImportWarning;
|
PyAPI_DATA(PyObject *) PyExc_ImportWarning;
|
||||||
|
PyAPI_DATA(PyObject *) PyExc_UnicodeWarning;
|
||||||
|
|
||||||
|
|
||||||
/* Convenience functions */
|
/* Convenience functions */
|
||||||
|
|
|
@ -189,6 +189,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
||||||
# define PyUnicode_Replace PyUnicodeUCS2_Replace
|
# define PyUnicode_Replace PyUnicodeUCS2_Replace
|
||||||
# define PyUnicode_Resize PyUnicodeUCS2_Resize
|
# define PyUnicode_Resize PyUnicodeUCS2_Resize
|
||||||
|
# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
|
||||||
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
|
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
|
||||||
# define PyUnicode_Split PyUnicodeUCS2_Split
|
# define PyUnicode_Split PyUnicodeUCS2_Split
|
||||||
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
||||||
|
@ -266,6 +267,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
||||||
# define PyUnicode_Replace PyUnicodeUCS4_Replace
|
# define PyUnicode_Replace PyUnicodeUCS4_Replace
|
||||||
# define PyUnicode_Resize PyUnicodeUCS4_Resize
|
# define PyUnicode_Resize PyUnicodeUCS4_Resize
|
||||||
|
# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
|
||||||
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
|
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
|
||||||
# define PyUnicode_Split PyUnicodeUCS4_Split
|
# define PyUnicode_Split PyUnicodeUCS4_Split
|
||||||
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
||||||
|
@ -1139,6 +1141,28 @@ PyAPI_FUNC(int) PyUnicode_Compare(
|
||||||
PyObject *right /* Right string */
|
PyObject *right /* Right string */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Rich compare two strings and return one of the following:
|
||||||
|
|
||||||
|
- NULL in case an exception was raised
|
||||||
|
- Py_True or Py_False for successfuly comparisons
|
||||||
|
- Py_NotImplemented in case the type combination is unknown
|
||||||
|
|
||||||
|
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
|
||||||
|
case the conversion of the arguments to Unicode fails with a
|
||||||
|
UnicodeDecodeError.
|
||||||
|
|
||||||
|
Possible values for op:
|
||||||
|
|
||||||
|
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
|
||||||
|
PyObject *left, /* Left string */
|
||||||
|
PyObject *right, /* Right string */
|
||||||
|
int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
|
||||||
|
);
|
||||||
|
|
||||||
/* Apply a argument tuple or dictionary to a format string and return
|
/* Apply a argument tuple or dictionary to a format string and return
|
||||||
the resulting Unicode string. */
|
the resulting Unicode string. */
|
||||||
|
|
||||||
|
|
|
@ -45,3 +45,4 @@ BaseException
|
||||||
+-- UserWarning
|
+-- UserWarning
|
||||||
+-- FutureWarning
|
+-- FutureWarning
|
||||||
+-- ImportWarning
|
+-- ImportWarning
|
||||||
|
+-- UnicodeWarning
|
||||||
|
|
24
Misc/NEWS
24
Misc/NEWS
|
@ -12,18 +12,18 @@ What's New in Python 2.5 release candidate 1?
|
||||||
Core and builtins
|
Core and builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
- Fix segfault when doing string formatting on subclasses of long.
|
- Unicode objects will no longer raise an exception when being
|
||||||
|
compared equal or unequal to a string and causing a
|
||||||
|
UnicodeDecodeError exception, e.g. as result of a decoding failure.
|
||||||
|
|
||||||
- Fix bug related to __len__ functions using values > 2**32 on 64-bit machines
|
Instead, the equal (==) and unequal (!=) comparison operators will
|
||||||
with new-style classes.
|
now issue a UnicodeWarning and interpret the two objects as
|
||||||
|
unequal. The UnicodeWarning can be filtered as desired using
|
||||||
- Fix bug related to __len__ functions returning negative values with
|
the warning framework, e.g. silenced completely, turned into an
|
||||||
classic classes.
|
exception, logged, etc.
|
||||||
|
|
||||||
- Patch #1538606, Fix __index__() clipping. There were some problems
|
Note that compare operators other than equal and unequal will still
|
||||||
discovered with the API and how integers that didn't fit into Py_ssize_t
|
raise UnicodeDecodeError exceptions as they've always done.
|
||||||
were handled. This patch attempts to provide enough alternatives
|
|
||||||
to effectively use __index__.
|
|
||||||
|
|
||||||
- Bug #1536021: __hash__ may now return long int; the final hash
|
- Bug #1536021: __hash__ may now return long int; the final hash
|
||||||
value is obtained by invoking hash on the long int.
|
value is obtained by invoking hash on the long int.
|
||||||
|
@ -99,6 +99,8 @@ Build
|
||||||
C API
|
C API
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
- New API for Unicode rich comparisons: PyUnicode_RichCompare()
|
||||||
|
|
||||||
- Bug #1069160. Internal correctness changes were made to
|
- Bug #1069160. Internal correctness changes were made to
|
||||||
``PyThreadState_SetAsyncExc()``. A test case was added, and
|
``PyThreadState_SetAsyncExc()``. A test case was added, and
|
||||||
the documentation was changed to state that the return value
|
the documentation was changed to state that the return value
|
||||||
|
|
|
@ -1948,6 +1948,14 @@ SimpleExtendsException(PyExc_Warning, ImportWarning,
|
||||||
"Base class for warnings about probable mistakes in module imports");
|
"Base class for warnings about probable mistakes in module imports");
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UnicodeWarning extends Warning
|
||||||
|
*/
|
||||||
|
SimpleExtendsException(PyExc_Warning, UnicodeWarning,
|
||||||
|
"Base class for warnings about Unicode related problems, mostly\n"
|
||||||
|
"related to conversion problems.");
|
||||||
|
|
||||||
|
|
||||||
/* Pre-computed MemoryError instance. Best to create this as early as
|
/* Pre-computed MemoryError instance. Best to create this as early as
|
||||||
* possible and not wait until a MemoryError is actually raised!
|
* possible and not wait until a MemoryError is actually raised!
|
||||||
*/
|
*/
|
||||||
|
@ -2048,6 +2056,7 @@ _PyExc_Init(void)
|
||||||
PRE_INIT(RuntimeWarning)
|
PRE_INIT(RuntimeWarning)
|
||||||
PRE_INIT(FutureWarning)
|
PRE_INIT(FutureWarning)
|
||||||
PRE_INIT(ImportWarning)
|
PRE_INIT(ImportWarning)
|
||||||
|
PRE_INIT(UnicodeWarning)
|
||||||
|
|
||||||
m = Py_InitModule4("exceptions", functions, exceptions_doc,
|
m = Py_InitModule4("exceptions", functions, exceptions_doc,
|
||||||
(PyObject *)NULL, PYTHON_API_VERSION);
|
(PyObject *)NULL, PYTHON_API_VERSION);
|
||||||
|
@ -2113,6 +2122,7 @@ _PyExc_Init(void)
|
||||||
POST_INIT(RuntimeWarning)
|
POST_INIT(RuntimeWarning)
|
||||||
POST_INIT(FutureWarning)
|
POST_INIT(FutureWarning)
|
||||||
POST_INIT(ImportWarning)
|
POST_INIT(ImportWarning)
|
||||||
|
POST_INIT(UnicodeWarning)
|
||||||
|
|
||||||
PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
|
PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
|
||||||
if (!PyExc_MemoryErrorInst)
|
if (!PyExc_MemoryErrorInst)
|
||||||
|
|
|
@ -731,23 +731,6 @@ default_3way_compare(PyObject *v, PyObject *w)
|
||||||
return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
|
return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef Py_USING_UNICODE
|
|
||||||
/* Special case for Unicode */
|
|
||||||
if (PyUnicode_Check(v) || PyUnicode_Check(w)) {
|
|
||||||
c = PyUnicode_Compare(v, w);
|
|
||||||
if (!PyErr_Occurred())
|
|
||||||
return c;
|
|
||||||
/* TypeErrors are ignored: if Unicode coercion fails due
|
|
||||||
to one of the arguments not having the right type, we
|
|
||||||
continue as defined by the coercion protocol (see
|
|
||||||
above). Luckily, decoding errors are reported as
|
|
||||||
ValueErrors and are not masked by this technique. */
|
|
||||||
if (!PyErr_ExceptionMatches(PyExc_TypeError))
|
|
||||||
return -2;
|
|
||||||
PyErr_Clear();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* None is smaller than anything */
|
/* None is smaller than anything */
|
||||||
if (v == Py_None)
|
if (v == Py_None)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -5405,6 +5405,82 @@ onError:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *PyUnicode_RichCompare(PyObject *left,
|
||||||
|
PyObject *right,
|
||||||
|
int op)
|
||||||
|
{
|
||||||
|
int result;
|
||||||
|
|
||||||
|
result = PyUnicode_Compare(left, right);
|
||||||
|
if (result == -1 && PyErr_Occurred())
|
||||||
|
goto onError;
|
||||||
|
|
||||||
|
/* Convert the return value to a Boolean */
|
||||||
|
switch (op) {
|
||||||
|
case Py_EQ:
|
||||||
|
result = (result == 0);
|
||||||
|
break;
|
||||||
|
case Py_NE:
|
||||||
|
result = (result != 0);
|
||||||
|
break;
|
||||||
|
case Py_LE:
|
||||||
|
result = (result <= 0);
|
||||||
|
break;
|
||||||
|
case Py_GE:
|
||||||
|
result = (result >= 0);
|
||||||
|
break;
|
||||||
|
case Py_LT:
|
||||||
|
result = (result == -1);
|
||||||
|
break;
|
||||||
|
case Py_GT:
|
||||||
|
result = (result == 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return PyBool_FromLong(result);
|
||||||
|
|
||||||
|
onError:
|
||||||
|
|
||||||
|
/* Standard case
|
||||||
|
|
||||||
|
Type errors mean that PyUnicode_FromObject() could not convert
|
||||||
|
one of the arguments (usually the right hand side) to Unicode,
|
||||||
|
ie. we can't handle the comparison request. However, it is
|
||||||
|
possible that the other object knows a comparison method, which
|
||||||
|
is why we return Py_NotImplemented to give the other object a
|
||||||
|
chance.
|
||||||
|
|
||||||
|
*/
|
||||||
|
if (PyErr_ExceptionMatches(PyExc_TypeError)) {
|
||||||
|
PyErr_Clear();
|
||||||
|
Py_INCREF(Py_NotImplemented);
|
||||||
|
return Py_NotImplemented;
|
||||||
|
}
|
||||||
|
if (op != Py_EQ && op != Py_NE)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Equality comparison.
|
||||||
|
|
||||||
|
This is a special case: we silence any PyExc_UnicodeDecodeError
|
||||||
|
and instead turn it into a PyErr_UnicodeWarning.
|
||||||
|
|
||||||
|
*/
|
||||||
|
if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))
|
||||||
|
return NULL;
|
||||||
|
PyErr_Clear();
|
||||||
|
if (PyErr_Warn(PyExc_UnicodeWarning,
|
||||||
|
(op == Py_EQ) ?
|
||||||
|
"Unicode equal comparison "
|
||||||
|
"failed to convert both arguments to Unicode - "
|
||||||
|
"interpreting them as being unequal" :
|
||||||
|
"Unicode unequal comparison "
|
||||||
|
"failed to convert both arguments to Unicode - "
|
||||||
|
"interpreting them as being unequal"
|
||||||
|
) < 0)
|
||||||
|
return NULL;
|
||||||
|
result = (op == Py_NE);
|
||||||
|
return PyBool_FromLong(result);
|
||||||
|
}
|
||||||
|
|
||||||
int PyUnicode_Contains(PyObject *container,
|
int PyUnicode_Contains(PyObject *container,
|
||||||
PyObject *element)
|
PyObject *element)
|
||||||
{
|
{
|
||||||
|
@ -6985,11 +7061,14 @@ static PySequenceMethods unicode_as_sequence = {
|
||||||
PyUnicode_Contains, /* sq_contains */
|
PyUnicode_Contains, /* sq_contains */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
unicode_subscript(PyUnicodeObject* self, PyObject* item)
|
unicode_subscript(PyUnicodeObject* self, PyObject* item)
|
||||||
{
|
{
|
||||||
if (PyIndex_Check(item)) {
|
PyNumberMethods *nb = item->ob_type->tp_as_number;
|
||||||
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
|
if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
|
||||||
|
Py_ssize_t i = nb->nb_index(item);
|
||||||
if (i == -1 && PyErr_Occurred())
|
if (i == -1 && PyErr_Occurred())
|
||||||
return NULL;
|
return NULL;
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
|
@ -7859,7 +7938,7 @@ PyTypeObject PyUnicode_Type = {
|
||||||
0, /* tp_print */
|
0, /* tp_print */
|
||||||
0, /* tp_getattr */
|
0, /* tp_getattr */
|
||||||
0, /* tp_setattr */
|
0, /* tp_setattr */
|
||||||
(cmpfunc) unicode_compare, /* tp_compare */
|
0, /* tp_compare */
|
||||||
unicode_repr, /* tp_repr */
|
unicode_repr, /* tp_repr */
|
||||||
&unicode_as_number, /* tp_as_number */
|
&unicode_as_number, /* tp_as_number */
|
||||||
&unicode_as_sequence, /* tp_as_sequence */
|
&unicode_as_sequence, /* tp_as_sequence */
|
||||||
|
@ -7875,7 +7954,7 @@ PyTypeObject PyUnicode_Type = {
|
||||||
unicode_doc, /* tp_doc */
|
unicode_doc, /* tp_doc */
|
||||||
0, /* tp_traverse */
|
0, /* tp_traverse */
|
||||||
0, /* tp_clear */
|
0, /* tp_clear */
|
||||||
0, /* tp_richcompare */
|
PyUnicode_RichCompare, /* tp_richcompare */
|
||||||
0, /* tp_weaklistoffset */
|
0, /* tp_weaklistoffset */
|
||||||
0, /* tp_iter */
|
0, /* tp_iter */
|
||||||
0, /* tp_iternext */
|
0, /* tp_iternext */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue