mirror of
https://github.com/python/cpython.git
synced 2025-09-02 15:07:53 +00:00
Bring comments up to date (e.g., they still said the table had to be
a prime size, which is in fact never true anymore ...).
This commit is contained in:
parent
8152d32375
commit
ea8f2bf9ca
1 changed files with 40 additions and 23 deletions
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Table of irreducible polynomials to efficiently cycle through
|
Table of irreducible polynomials to efficiently cycle through
|
||||||
GF(2^n)-{0}, 2<=n<=30.
|
GF(2^n)-{0}, 2<=n<=30. A table size is always a power of 2.
|
||||||
*/
|
*/
|
||||||
static long polys[] = {
|
static long polys[] = {
|
||||||
4 + 3,
|
4 + 3,
|
||||||
|
@ -54,13 +54,26 @@ static long polys[] = {
|
||||||
static PyObject *dummy; /* Initialized by first call to newdictobject() */
|
static PyObject *dummy; /* Initialized by first call to newdictobject() */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Invariant for entries: when in use, me_value is not NULL and me_key is
|
There are three kinds of slots in the table:
|
||||||
not NULL and not dummy; when not in use, me_value is NULL and me_key
|
|
||||||
is either NULL or dummy. A dummy key value cannot be replaced by
|
1. Unused. me_key == me_value == NULL
|
||||||
NULL, since otherwise other keys may be lost.
|
Does not hold an active (key, value) pair now and never did. Unused can
|
||||||
|
transition to Active upon key insertion. This is the only case in which
|
||||||
|
me_key is NULL, and is each slot's initial state.
|
||||||
|
|
||||||
|
2. Active. me_key != NULL and me_key != dummy and me_value != NULL
|
||||||
|
Holds an active (key, value) pair. Active can transition to Dummy upon
|
||||||
|
key deletion. This is the only case in which me_value != NULL.
|
||||||
|
|
||||||
|
3. Dummy. me_key == dummy && me_value == NULL
|
||||||
|
Previously held an active (key, value) pair, but that was deleted and an
|
||||||
|
active pair has not yet overwritten the slot. Dummy can transition to
|
||||||
|
Active upon key insertion. Dummy slots cannot be made Unused again
|
||||||
|
(cannot have me_key set to NULL), else the probe sequence in case of
|
||||||
|
collision would have no way to know they were once active.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
long me_hash;
|
long me_hash; /* cached hash code of me_key */
|
||||||
PyObject *me_key;
|
PyObject *me_key;
|
||||||
PyObject *me_value;
|
PyObject *me_value;
|
||||||
#ifdef USE_CACHE_ALIGNED
|
#ifdef USE_CACHE_ALIGNED
|
||||||
|
@ -69,20 +82,21 @@ typedef struct {
|
||||||
} dictentry;
|
} dictentry;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
To ensure the lookup algorithm terminates, the table size must be a
|
To ensure the lookup algorithm terminates, there must be at least one Unsused
|
||||||
prime number and there must be at least one NULL key in the table.
|
slot (NULL key) in the table.
|
||||||
The value ma_fill is the number of non-NULL keys; ma_used is the number
|
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
|
||||||
of non-NULL, non-dummy keys.
|
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
|
||||||
To avoid slowing down lookups on a near-full table, we resize the table
|
values == the number of Active items).
|
||||||
when it is more than half filled.
|
To avoid slowing down lookups on a near-full table, we resize the table when
|
||||||
|
it is more than half filled.
|
||||||
*/
|
*/
|
||||||
typedef struct dictobject dictobject;
|
typedef struct dictobject dictobject;
|
||||||
struct dictobject {
|
struct dictobject {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
int ma_fill;
|
int ma_fill; /* # Active + # Dummy */
|
||||||
int ma_used;
|
int ma_used; /* # Active */
|
||||||
int ma_size;
|
int ma_size; /* total # slots in ma_table */
|
||||||
int ma_poly;
|
int ma_poly; /* appopriate entry from polys vector */
|
||||||
dictentry *ma_table;
|
dictentry *ma_table;
|
||||||
dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
|
dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
|
||||||
};
|
};
|
||||||
|
@ -138,12 +152,12 @@ This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
|
||||||
Open addressing is preferred over chaining since the link overhead for
|
Open addressing is preferred over chaining since the link overhead for
|
||||||
chaining would be substantial (100% with typical malloc overhead).
|
chaining would be substantial (100% with typical malloc overhead).
|
||||||
However, instead of going through the table at constant steps, we cycle
|
However, instead of going through the table at constant steps, we cycle
|
||||||
through the values of GF(2^n)-{0}. This avoids modulo computations, being
|
through the values of GF(2^n). This avoids modulo computations, being
|
||||||
much cheaper on RISC machines, without leading to clustering.
|
much cheaper on RISC machines, without leading to clustering.
|
||||||
|
|
||||||
The initial probe index is computed as hash mod the table size.
|
The initial probe index is computed as hash mod the table size.
|
||||||
Subsequent probe indices use the values of x^i in GF(2^n) as an offset,
|
Subsequent probe indices use the values of x^i in GF(2^n)-{0} as an offset,
|
||||||
where x is a root. The initial value is derived from hash, too.
|
where x is a root. The initial offset is derived from hash, too.
|
||||||
|
|
||||||
All arithmetic on hash should ignore overflow.
|
All arithmetic on hash should ignore overflow.
|
||||||
|
|
||||||
|
@ -168,11 +182,14 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
|
||||||
register int cmp;
|
register int cmp;
|
||||||
PyObject *err_type, *err_value, *err_tb;
|
PyObject *err_type, *err_value, *err_tb;
|
||||||
/* We must come up with (i, incr) such that 0 <= i < ma_size
|
/* We must come up with (i, incr) such that 0 <= i < ma_size
|
||||||
and 0 < incr < ma_size and both are a function of hash */
|
and 0 < incr < ma_size and both are a function of hash.
|
||||||
|
i is the initial table index and incr the initial probe offset. */
|
||||||
i = (~hash) & mask;
|
i = (~hash) & mask;
|
||||||
/* We use ~hash instead of hash, as degenerate hash functions, such
|
/* We use ~hash instead of hash, as degenerate hash functions, such
|
||||||
as for ints <sigh>, can have lots of leading zeros. It's not
|
as for ints <sigh>, can have lots of leading zeros. It's not
|
||||||
really a performance risk, but better safe than sorry. */
|
really a performance risk, but better safe than sorry.
|
||||||
|
12-Dec-00 tim: so ~hash produces lots of leading ones instead --
|
||||||
|
what's the gain? */
|
||||||
ep = &ep0[i];
|
ep = &ep0[i];
|
||||||
if (ep->me_key == NULL || ep->me_key == key)
|
if (ep->me_key == NULL || ep->me_key == key)
|
||||||
return ep;
|
return ep;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue