mirror of
https://github.com/python/cpython.git
synced 2025-07-29 06:05:00 +00:00
Convert relevant dict internals to Py_ssize_t.
I don't have a box with nearly enough RAM, or an OS, that could get close to tickling this, though (requires a dict w/ at least 2**31 entries).
This commit is contained in:
parent
1e44ca94ac
commit
9b10f7e0cb
2 changed files with 64 additions and 48 deletions
|
@ -8,7 +8,7 @@ extern "C" {
|
|||
/* Dictionary object type -- mapping from hashable object to object */
|
||||
|
||||
/* The distribution includes a separate file, Objects/dictnotes.txt,
|
||||
describing explorations into dictionary design and optimization.
|
||||
describing explorations into dictionary design and optimization.
|
||||
It covers typical dictionary use patterns, the parameters for
|
||||
tuning dictionaries, and several ideas for possible optimizations.
|
||||
*/
|
||||
|
@ -48,7 +48,11 @@ meaning otherwise.
|
|||
#define PyDict_MINSIZE 8
|
||||
|
||||
typedef struct {
|
||||
long me_hash; /* cached hash code of me_key */
|
||||
/* Cached hash code of me_key. Note that hash codes are C longs.
|
||||
* We have to use Py_ssize_t instead because dict_popitem() abuses
|
||||
* me_hash to hold a search finger.
|
||||
*/
|
||||
Py_ssize_t me_hash;
|
||||
PyObject *me_key;
|
||||
PyObject *me_value;
|
||||
} PyDictEntry;
|
||||
|
@ -65,14 +69,14 @@ it's two-thirds full.
|
|||
typedef struct _dictobject PyDictObject;
|
||||
struct _dictobject {
|
||||
PyObject_HEAD
|
||||
int ma_fill; /* # Active + # Dummy */
|
||||
int ma_used; /* # Active */
|
||||
Py_ssize_t ma_fill; /* # Active + # Dummy */
|
||||
Py_ssize_t ma_used; /* # Active */
|
||||
|
||||
/* The table contains ma_mask + 1 slots, and that's a power of 2.
|
||||
* We store the mask instead of the size because the mask is more
|
||||
* frequently needed.
|
||||
*/
|
||||
int ma_mask;
|
||||
Py_ssize_t ma_mask;
|
||||
|
||||
/* ma_table points to ma_smalltable for small tables, else to
|
||||
* additional malloc'ed memory. ma_table is never NULL! This rule
|
||||
|
|
|
@ -110,6 +110,16 @@ above, and then shifting perturb can be done while the table index is being
|
|||
masked); and the dictobject struct required a member to hold the table's
|
||||
polynomial. In Tim's experiments the current scheme ran faster, produced
|
||||
equally good collision statistics, needed less code & used less memory.
|
||||
|
||||
Theoretical Python 2.5 headache: hash codes are only C "long", but
|
||||
sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
|
||||
dict is genuinely huge, then only the slots directly reachable via indexing
|
||||
by a C long can be the first slot in a probe sequence. The probe sequence
|
||||
will still eventually reach every slot in the table, but the collision rate
|
||||
on initial probes may be much higher than this scheme was designed for.
|
||||
Getting a hash code as fat as Py_ssize_t is the only real cure. But in
|
||||
practice, this probably won't make a lick of difference for many years (at
|
||||
which point everyone will have terabytes of RAM on 64-bit boxes).
|
||||
*/
|
||||
|
||||
/* Object used as dummy key to fill deleted entries */
|
||||
|
@ -228,7 +238,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
|
|||
register Py_ssize_t i;
|
||||
register size_t perturb;
|
||||
register dictentry *freeslot;
|
||||
register unsigned int mask = mp->ma_mask;
|
||||
register Py_ssize_t mask = mp->ma_mask;
|
||||
dictentry *ep0 = mp->ma_table;
|
||||
register dictentry *ep;
|
||||
register int restore_error;
|
||||
|
@ -339,7 +349,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
|
|||
register Py_ssize_t i;
|
||||
register size_t perturb;
|
||||
register dictentry *freeslot;
|
||||
register unsigned int mask = mp->ma_mask;
|
||||
register Py_ssize_t mask = mp->ma_mask;
|
||||
dictentry *ep0 = mp->ma_table;
|
||||
register dictentry *ep;
|
||||
|
||||
|
@ -413,7 +423,7 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
|
|||
Py_DECREF(dummy);
|
||||
}
|
||||
ep->me_key = key;
|
||||
ep->me_hash = hash;
|
||||
ep->me_hash = (Py_ssize_t)hash;
|
||||
ep->me_value = value;
|
||||
mp->ma_used++;
|
||||
}
|
||||
|
@ -425,11 +435,11 @@ items again. When entries have been deleted, the new table may
|
|||
actually be smaller than the old one.
|
||||
*/
|
||||
static int
|
||||
dictresize(dictobject *mp, int minused)
|
||||
dictresize(dictobject *mp, Py_ssize_t minused)
|
||||
{
|
||||
int newsize;
|
||||
Py_ssize_t newsize;
|
||||
dictentry *oldtable, *newtable, *ep;
|
||||
int i;
|
||||
Py_ssize_t i;
|
||||
int is_oldtable_malloced;
|
||||
dictentry small_copy[PyDict_MINSIZE];
|
||||
|
||||
|
@ -537,7 +547,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
|
|||
{
|
||||
register dictobject *mp;
|
||||
register long hash;
|
||||
register int n_used;
|
||||
register Py_ssize_t n_used;
|
||||
|
||||
if (!PyDict_Check(op)) {
|
||||
PyErr_BadInternalCall();
|
||||
|
@ -568,14 +578,14 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
|
|||
* Quadrupling the size improves average dictionary sparseness
|
||||
* (reducing collisions) at the cost of some memory and iteration
|
||||
* speed (which loops over every possible entry). It also halves
|
||||
* the number of expensive resize operations in a growing dictionary.
|
||||
| * the number of expensive resize operations in a growing dictionary.
|
||||
*
|
||||
* Very large dictionaries (over 50K items) use doubling instead.
|
||||
* This may help applications with severe memory constraints.
|
||||
*/
|
||||
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
|
||||
return 0;
|
||||
return dictresize(mp, (mp->ma_used>50000 ? mp->ma_used*2 : mp->ma_used*4));
|
||||
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -619,10 +629,10 @@ PyDict_Clear(PyObject *op)
|
|||
dictobject *mp;
|
||||
dictentry *ep, *table;
|
||||
int table_is_malloced;
|
||||
int fill;
|
||||
Py_ssize_t fill;
|
||||
dictentry small_copy[PyDict_MINSIZE];
|
||||
#ifdef Py_DEBUG
|
||||
int i, n;
|
||||
Py_ssize_t i, n;
|
||||
#endif
|
||||
|
||||
if (!PyDict_Check(op))
|
||||
|
@ -685,7 +695,7 @@ PyDict_Clear(PyObject *op)
|
|||
/*
|
||||
* Iterate over a dict. Use like so:
|
||||
*
|
||||
* int i;
|
||||
* Py_ssize_t i;
|
||||
* PyObject *key, *value;
|
||||
* i = 0; # important! i should not otherwise be changed by you
|
||||
* while (PyDict_Next(yourdict, &i, &key, &value)) {
|
||||
|
@ -701,7 +711,7 @@ int
|
|||
PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
|
||||
{
|
||||
register Py_ssize_t i;
|
||||
register int mask;
|
||||
register Py_ssize_t mask;
|
||||
register dictentry *ep;
|
||||
|
||||
if (!PyDict_Check(op))
|
||||
|
@ -729,7 +739,7 @@ static void
|
|||
dict_dealloc(register dictobject *mp)
|
||||
{
|
||||
register dictentry *ep;
|
||||
int fill = mp->ma_fill;
|
||||
Py_ssize_t fill = mp->ma_fill;
|
||||
PyObject_GC_UnTrack(mp);
|
||||
Py_TRASHCAN_SAFE_BEGIN(mp)
|
||||
for (ep = mp->ma_table; fill > 0; ep++) {
|
||||
|
@ -751,10 +761,10 @@ dict_dealloc(register dictobject *mp)
|
|||
static int
|
||||
dict_print(register dictobject *mp, register FILE *fp, register int flags)
|
||||
{
|
||||
register int i;
|
||||
register int any;
|
||||
register Py_ssize_t i;
|
||||
register Py_ssize_t any;
|
||||
|
||||
i = Py_ReprEnter((PyObject*)mp);
|
||||
i = (int)Py_ReprEnter((PyObject*)mp);
|
||||
if (i != 0) {
|
||||
if (i < 0)
|
||||
return i;
|
||||
|
@ -896,7 +906,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
|
|||
PyObject *missing;
|
||||
static PyObject *missing_str = NULL;
|
||||
if (missing_str == NULL)
|
||||
missing_str =
|
||||
missing_str =
|
||||
PyString_InternFromString("__missing__");
|
||||
missing = _PyType_Lookup(mp->ob_type, missing_str);
|
||||
if (missing != NULL)
|
||||
|
@ -930,9 +940,9 @@ static PyObject *
|
|||
dict_keys(register dictobject *mp)
|
||||
{
|
||||
register PyObject *v;
|
||||
register int i, j;
|
||||
register Py_ssize_t i, j;
|
||||
dictentry *ep;
|
||||
int mask, n;
|
||||
Py_ssize_t mask, n;
|
||||
|
||||
again:
|
||||
n = mp->ma_used;
|
||||
|
@ -964,9 +974,9 @@ static PyObject *
|
|||
dict_values(register dictobject *mp)
|
||||
{
|
||||
register PyObject *v;
|
||||
register int i, j;
|
||||
register Py_ssize_t i, j;
|
||||
dictentry *ep;
|
||||
int mask, n;
|
||||
Py_ssize_t mask, n;
|
||||
|
||||
again:
|
||||
n = mp->ma_used;
|
||||
|
@ -998,8 +1008,8 @@ static PyObject *
|
|||
dict_items(register dictobject *mp)
|
||||
{
|
||||
register PyObject *v;
|
||||
register int i, j, n;
|
||||
int mask;
|
||||
register Py_ssize_t i, j, n;
|
||||
Py_ssize_t mask;
|
||||
PyObject *item, *key, *value;
|
||||
dictentry *ep;
|
||||
|
||||
|
@ -1132,7 +1142,7 @@ int
|
|||
PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
|
||||
{
|
||||
PyObject *it; /* iter(seq2) */
|
||||
int i; /* index into seq2 of current element */
|
||||
Py_ssize_t i; /* index into seq2 of current element */
|
||||
PyObject *item; /* seq2[i] */
|
||||
PyObject *fast; /* item as a 2-tuple or 2-list */
|
||||
|
||||
|
@ -1195,7 +1205,7 @@ Fail:
|
|||
i = -1;
|
||||
Return:
|
||||
Py_DECREF(it);
|
||||
return i;
|
||||
return (int)i;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1208,7 +1218,7 @@ int
|
|||
PyDict_Merge(PyObject *a, PyObject *b, int override)
|
||||
{
|
||||
register PyDictObject *mp, *other;
|
||||
register int i;
|
||||
register Py_ssize_t i;
|
||||
dictentry *entry;
|
||||
|
||||
/* We accept for the argument either a concrete dictionary object,
|
||||
|
@ -1247,7 +1257,8 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
|
|||
PyDict_GetItem(a, entry->me_key) == NULL)) {
|
||||
Py_INCREF(entry->me_key);
|
||||
Py_INCREF(entry->me_value);
|
||||
insertdict(mp, entry->me_key, entry->me_hash,
|
||||
insertdict(mp, entry->me_key,
|
||||
(long)entry->me_hash,
|
||||
entry->me_value);
|
||||
}
|
||||
}
|
||||
|
@ -1376,7 +1387,8 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
|
|||
{
|
||||
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
|
||||
PyObject *aval = NULL; /* a[akey] */
|
||||
int i, cmp;
|
||||
Py_ssize_t i;
|
||||
int cmp;
|
||||
|
||||
for (i = 0; i <= a->ma_mask; i++) {
|
||||
PyObject *thiskey, *thisaval, *thisbval;
|
||||
|
@ -1399,7 +1411,7 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
|
|||
* find its associated value anymore; or
|
||||
* maybe it is but the compare deleted the
|
||||
* a[thiskey] entry.
|
||||
*/
|
||||
| */
|
||||
Py_DECREF(thiskey);
|
||||
continue;
|
||||
}
|
||||
|
@ -1499,7 +1511,7 @@ Finished:
|
|||
static int
|
||||
dict_equal(dictobject *a, dictobject *b)
|
||||
{
|
||||
int i;
|
||||
Py_ssize_t i;
|
||||
|
||||
if (a->ma_used != b->ma_used)
|
||||
/* can't be equal if # of entries differ */
|
||||
|
@ -1673,7 +1685,7 @@ dict_pop(dictobject *mp, PyObject *args)
|
|||
static PyObject *
|
||||
dict_popitem(dictobject *mp)
|
||||
{
|
||||
int i = 0;
|
||||
Py_ssize_t i = 0;
|
||||
dictentry *ep;
|
||||
PyObject *res;
|
||||
|
||||
|
@ -1683,7 +1695,7 @@ dict_popitem(dictobject *mp)
|
|||
* happened, the result would be an infinite loop (searching for an
|
||||
* entry that no longer exists). Note that the usual popitem()
|
||||
* idiom is "while d: k, v = d.popitem()". so needing to throw the
|
||||
* tuple away if the dict *is* empty isn't a significant
|
||||
* tuple away if the dict *is* empty isn't a significant
|
||||
* inefficiency -- possible, but unlikely in practice.
|
||||
*/
|
||||
res = PyTuple_New(2);
|
||||
|
@ -1699,11 +1711,11 @@ dict_popitem(dictobject *mp)
|
|||
* field of slot 0 to hold a search finger:
|
||||
* If slot 0 has a value, use slot 0.
|
||||
* Else slot 0 is being used to hold a search finger,
|
||||
* and we use its hash value as the first index to look.
|
||||
| * and we use its hash value as the first index to look.
|
||||
*/
|
||||
ep = &mp->ma_table[0];
|
||||
if (ep->me_value == NULL) {
|
||||
i = (int)ep->me_hash;
|
||||
i = ep->me_hash;
|
||||
/* The hash field may be a real hash value, or it may be a
|
||||
* legit search finger, or it may be a once-legit search
|
||||
* finger that's out of bounds now because it wrapped around
|
||||
|
@ -2035,10 +2047,10 @@ PyDict_DelItemString(PyObject *v, const char *key)
|
|||
typedef struct {
|
||||
PyObject_HEAD
|
||||
dictobject *di_dict; /* Set to NULL when iterator is exhausted */
|
||||
int di_used;
|
||||
int di_pos;
|
||||
Py_ssize_t di_used;
|
||||
Py_ssize_t di_pos;
|
||||
PyObject* di_result; /* reusable result tuple for iteritems */
|
||||
long len;
|
||||
Py_ssize_t len;
|
||||
} dictiterobject;
|
||||
|
||||
static PyObject *
|
||||
|
@ -2076,10 +2088,10 @@ dictiter_dealloc(dictiterobject *di)
|
|||
static PyObject *
|
||||
dictiter_len(dictiterobject *di)
|
||||
{
|
||||
long len = 0;
|
||||
Py_ssize_t len = 0;
|
||||
if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
|
||||
len = di->len;
|
||||
return PyInt_FromLong(len);
|
||||
return PyInt_FromSize_t(len);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
|
||||
|
@ -2092,7 +2104,7 @@ static PyMethodDef dictiter_methods[] = {
|
|||
static PyObject *dictiter_iternextkey(dictiterobject *di)
|
||||
{
|
||||
PyObject *key;
|
||||
register int i, mask;
|
||||
register Py_ssize_t i, mask;
|
||||
register dictentry *ep;
|
||||
dictobject *d = di->di_dict;
|
||||
|
||||
|
@ -2165,7 +2177,7 @@ PyTypeObject PyDictIterKey_Type = {
|
|||
static PyObject *dictiter_iternextvalue(dictiterobject *di)
|
||||
{
|
||||
PyObject *value;
|
||||
register int i, mask;
|
||||
register Py_ssize_t i, mask;
|
||||
register dictentry *ep;
|
||||
dictobject *d = di->di_dict;
|
||||
|
||||
|
@ -2238,7 +2250,7 @@ PyTypeObject PyDictIterValue_Type = {
|
|||
static PyObject *dictiter_iternextitem(dictiterobject *di)
|
||||
{
|
||||
PyObject *key, *value, *result = di->di_result;
|
||||
register int i, mask;
|
||||
register Py_ssize_t i, mask;
|
||||
register dictentry *ep;
|
||||
dictobject *d = di->di_dict;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue