Optimize unicode_hash() by not calling

_PyUnicode_AsDefaultEncodedString() at all -- this saves two object
allocations (three block allocations!)  and lots of redundant work.
By using the same hash algorithm as string_hash(), we maintain the
invariant that the hash of an ASCII string is the same whether
represented as a PyString or a PyUnicode.
This commit is contained in:
Guido van Rossum 2007-09-18 19:42:40 +00:00
parent e4a9e788d3
commit c2504931ee

View file

@ -6587,21 +6587,27 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1); return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
} }
/* Believe it or not, this produces the same value for ASCII strings
as string_hash(). */
static long static long
unicode_hash(PyUnicodeObject *self) unicode_hash(PyUnicodeObject *self)
{ {
if (self->hash != -1) { Py_ssize_t len;
return self->hash; Py_UNICODE *p;
} long x;
else {
/* Since Unicode objects compare equal to their UTF-8 string if (self->hash != -1)
counterparts, we hash the UTF-8 string. */ return self->hash;
PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL); len = Py_Size(self);
if (v == NULL) p = self->str;
return -1; x = *p << 7;
assert(PyString_CheckExact(v)); while (--len >= 0)
return self->hash = v->ob_type->tp_hash(v); x = (1000003*x) ^ *p++;
} x ^= Py_Size(self);
if (x == -1)
x = -2;
self->hash = x;
return x;
} }
PyDoc_STRVAR(index__doc__, PyDoc_STRVAR(index__doc__,