mirror of
https://github.com/python/cpython.git
synced 2025-10-21 14:12:27 +00:00
Optimize unicode_hash() by not calling
_PyUnicode_AsDefaultEncodedString() at all -- this saves two object allocations (three block allocations!) and lots of redundant work. By using the same hash algorithm as string_hash(), we maintain the invariant that the hash of an ASCII string is the same whether represented as a PyString or a PyUnicode.
This commit is contained in:
parent
e4a9e788d3
commit
c2504931ee
1 changed files with 18 additions and 12 deletions
|
@ -6587,21 +6587,27 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
|
||||||
return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
|
return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Believe it or not, this produces the same value for ASCII strings
|
||||||
|
as string_hash(). */
|
||||||
static long
|
static long
|
||||||
unicode_hash(PyUnicodeObject *self)
|
unicode_hash(PyUnicodeObject *self)
|
||||||
{
|
{
|
||||||
if (self->hash != -1) {
|
Py_ssize_t len;
|
||||||
return self->hash;
|
Py_UNICODE *p;
|
||||||
}
|
long x;
|
||||||
else {
|
|
||||||
/* Since Unicode objects compare equal to their UTF-8 string
|
if (self->hash != -1)
|
||||||
counterparts, we hash the UTF-8 string. */
|
return self->hash;
|
||||||
PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL);
|
len = Py_Size(self);
|
||||||
if (v == NULL)
|
p = self->str;
|
||||||
return -1;
|
x = *p << 7;
|
||||||
assert(PyString_CheckExact(v));
|
while (--len >= 0)
|
||||||
return self->hash = v->ob_type->tp_hash(v);
|
x = (1000003*x) ^ *p++;
|
||||||
}
|
x ^= Py_Size(self);
|
||||||
|
if (x == -1)
|
||||||
|
x = -2;
|
||||||
|
self->hash = x;
|
||||||
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(index__doc__,
|
PyDoc_STRVAR(index__doc__,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue