mirror of
https://github.com/python/cpython.git
synced 2025-07-15 23:35:23 +00:00
Compare and hash unicode objects like their UTF-8 representations.
Accept Unicode characters < 256 for 'c' format.
This commit is contained in:
parent
f15a29f975
commit
09dc34fc9c
2 changed files with 26 additions and 50 deletions
|
@ -5406,33 +5406,23 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
|
||||||
int PyUnicode_Compare(PyObject *left,
|
int PyUnicode_Compare(PyObject *left,
|
||||||
PyObject *right)
|
PyObject *right)
|
||||||
{
|
{
|
||||||
PyUnicodeObject *u = NULL, *v = NULL;
|
if (PyUnicode_Check(left) && PyUnicode_Check(right))
|
||||||
int result;
|
return unicode_compare((PyUnicodeObject *)left,
|
||||||
|
(PyUnicodeObject *)right);
|
||||||
/* Coerce the two arguments */
|
if ((PyString_Check(left) && PyUnicode_Check(right)) ||
|
||||||
u = (PyUnicodeObject *)PyUnicode_FromObject(left);
|
(PyUnicode_Check(left) && PyString_Check(right))) {
|
||||||
if (u == NULL)
|
if (PyUnicode_Check(left))
|
||||||
goto onError;
|
left = _PyUnicode_AsDefaultEncodedString(left, NULL);
|
||||||
v = (PyUnicodeObject *)PyUnicode_FromObject(right);
|
if (PyUnicode_Check(right))
|
||||||
if (v == NULL)
|
right = _PyUnicode_AsDefaultEncodedString(right, NULL);
|
||||||
goto onError;
|
assert(PyString_Check(left));
|
||||||
|
assert(PyString_Check(right));
|
||||||
/* Shortcut for empty or interned objects */
|
return PyObject_Compare(left, right);
|
||||||
if (v == u) {
|
|
||||||
Py_DECREF(u);
|
|
||||||
Py_DECREF(v);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
PyErr_Format(PyExc_TypeError,
|
||||||
result = unicode_compare(u, v);
|
"Can't compare %.100s and %.100s",
|
||||||
|
left->ob_type->tp_name,
|
||||||
Py_DECREF(u);
|
right->ob_type->tp_name);
|
||||||
Py_DECREF(v);
|
|
||||||
return result;
|
|
||||||
|
|
||||||
onError:
|
|
||||||
Py_XDECREF(u);
|
|
||||||
Py_XDECREF(v);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5802,30 +5792,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
|
||||||
}
|
}
|
||||||
|
|
||||||
static long
|
static long
|
||||||
unicode_hash(PyUnicodeObject *self)
|
unicode_hash(PyObject *self)
|
||||||
{
|
{
|
||||||
/* Since Unicode objects compare equal to their ASCII string
|
/* Since Unicode objects compare equal to their UTF-8 string
|
||||||
counterparts, they should use the individual character values
|
counterparts, we hash the UTF-8 string. */
|
||||||
as basis for their hash value. This is needed to assure that
|
PyObject *v = _PyUnicode_AsDefaultEncodedString(self, NULL);
|
||||||
strings and Unicode objects behave in the same way as
|
return PyObject_Hash(v);
|
||||||
dictionary keys. */
|
|
||||||
|
|
||||||
register Py_ssize_t len;
|
|
||||||
register Py_UNICODE *p;
|
|
||||||
register long x;
|
|
||||||
|
|
||||||
if (self->hash != -1)
|
|
||||||
return self->hash;
|
|
||||||
len = PyUnicode_GET_SIZE(self);
|
|
||||||
p = PyUnicode_AS_UNICODE(self);
|
|
||||||
x = *p << 7;
|
|
||||||
while (--len >= 0)
|
|
||||||
x = (1000003*x) ^ *p++;
|
|
||||||
x ^= PyUnicode_GET_SIZE(self);
|
|
||||||
if (x == -1)
|
|
||||||
x = -2;
|
|
||||||
self->hash = x;
|
|
||||||
return x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(index__doc__,
|
PyDoc_STRVAR(index__doc__,
|
||||||
|
|
|
@ -764,8 +764,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
||||||
char *p = va_arg(*p_va, char *);
|
char *p = va_arg(*p_va, char *);
|
||||||
if (PyString_Check(arg) && PyString_Size(arg) == 1)
|
if (PyString_Check(arg) && PyString_Size(arg) == 1)
|
||||||
*p = PyString_AS_STRING(arg)[0];
|
*p = PyString_AS_STRING(arg)[0];
|
||||||
|
else if (PyUnicode_Check(arg) &&
|
||||||
|
PyUnicode_GET_SIZE(arg) == 1 &&
|
||||||
|
PyUnicode_AS_UNICODE(arg)[0] < 256)
|
||||||
|
*p = PyUnicode_AS_UNICODE(arg)[0];
|
||||||
else
|
else
|
||||||
return converterr("char", arg, msgbuf, bufsize);
|
return converterr("char < 256", arg, msgbuf, bufsize);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue