mirror of
https://github.com/python/cpython.git
synced 2025-12-15 21:44:50 +00:00
Implement PEP 412: Key-sharing dictionaries (closes #13903)
Patch from Mark Shannon.
This commit is contained in:
parent
80d07f8251
commit
7d95e40721
12 changed files with 1353 additions and 904 deletions
|
|
@ -13,78 +13,20 @@ extern "C" {
|
|||
tuning dictionaries, and several ideas for possible optimizations.
|
||||
*/
|
||||
|
||||
/*
|
||||
There are three kinds of slots in the table:
|
||||
|
||||
1. Unused. me_key == me_value == NULL
|
||||
Does not hold an active (key, value) pair now and never did. Unused can
|
||||
transition to Active upon key insertion. This is the only case in which
|
||||
me_key is NULL, and is each slot's initial state.
|
||||
|
||||
2. Active. me_key != NULL and me_key != dummy and me_value != NULL
|
||||
Holds an active (key, value) pair. Active can transition to Dummy upon
|
||||
key deletion. This is the only case in which me_value != NULL.
|
||||
|
||||
3. Dummy. me_key == dummy and me_value == NULL
|
||||
Previously held an active (key, value) pair, but that was deleted and an
|
||||
active pair has not yet overwritten the slot. Dummy can transition to
|
||||
Active upon key insertion. Dummy slots cannot be made Unused again
|
||||
(cannot have me_key set to NULL), else the probe sequence in case of
|
||||
collision would have no way to know they were once active.
|
||||
|
||||
Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
|
||||
hold a search finger. The me_hash field of Unused or Dummy slots has no
|
||||
meaning otherwise.
|
||||
*/
|
||||
|
||||
/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are
|
||||
* allocated directly in the dict object (in the ma_smalltable member).
|
||||
* It must be a power of 2, and at least 4. 8 allows dicts with no more
|
||||
* than 5 active entries to live in ma_smalltable (and so avoid an
|
||||
* additional malloc); instrumentation suggested this suffices for the
|
||||
* majority of dicts (consisting mostly of usually-small instance dicts and
|
||||
* usually-small dicts created to pass keyword arguments).
|
||||
*/
|
||||
#ifndef Py_LIMITED_API
|
||||
#define PyDict_MINSIZE 8
|
||||
|
||||
typedef struct _dictkeysobject PyDictKeysObject;
|
||||
|
||||
/* The ma_values pointer is NULL for a combined table
|
||||
* or points to an array of PyObject* for a split table
|
||||
*/
|
||||
typedef struct {
|
||||
/* Cached hash code of me_key. */
|
||||
Py_hash_t me_hash;
|
||||
PyObject *me_key;
|
||||
PyObject *me_value;
|
||||
} PyDictEntry;
|
||||
|
||||
/*
|
||||
To ensure the lookup algorithm terminates, there must be at least one Unused
|
||||
slot (NULL key) in the table.
|
||||
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
|
||||
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
|
||||
values == the number of Active items).
|
||||
To avoid slowing down lookups on a near-full table, we resize the table when
|
||||
it's two-thirds full.
|
||||
*/
|
||||
typedef struct _dictobject PyDictObject;
|
||||
struct _dictobject {
|
||||
PyObject_HEAD
|
||||
Py_ssize_t ma_fill; /* # Active + # Dummy */
|
||||
Py_ssize_t ma_used; /* # Active */
|
||||
Py_ssize_t ma_used;
|
||||
PyDictKeysObject *ma_keys;
|
||||
PyObject **ma_values;
|
||||
} PyDictObject;
|
||||
|
||||
/* The table contains ma_mask + 1 slots, and that's a power of 2.
|
||||
* We store the mask instead of the size because the mask is more
|
||||
* frequently needed.
|
||||
*/
|
||||
Py_ssize_t ma_mask;
|
||||
|
||||
/* ma_table points to ma_smalltable for small tables, else to
|
||||
* additional malloc'ed memory. ma_table is never NULL! This rule
|
||||
* saves repeated runtime null-tests in the workhorse getitem and
|
||||
* setitem calls.
|
||||
*/
|
||||
PyDictEntry *ma_table;
|
||||
PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
|
||||
PyDictEntry ma_smalltable[PyDict_MINSIZE];
|
||||
};
|
||||
#endif /* Py_LIMITED_API */
|
||||
|
||||
PyAPI_DATA(PyTypeObject) PyDict_Type;
|
||||
|
|
@ -117,6 +59,8 @@ PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
|
|||
PyAPI_FUNC(int) PyDict_Next(
|
||||
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
|
||||
#ifndef Py_LIMITED_API
|
||||
PyDictKeysObject *_PyDict_NewKeysForClass(void);
|
||||
PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
|
||||
PyAPI_FUNC(int) _PyDict_Next(
|
||||
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
|
||||
#endif
|
||||
|
|
@ -131,6 +75,7 @@ PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, Py_hash_t hash);
|
|||
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
|
||||
PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
|
||||
PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
|
||||
#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
|
||||
|
||||
PyAPI_FUNC(int) PyDict_ClearFreeList(void);
|
||||
#endif
|
||||
|
|
@ -162,6 +107,11 @@ PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *it
|
|||
PyAPI_FUNC(int) _PyDict_SetItemId(PyObject *dp, struct _Py_Identifier *key, PyObject *item);
|
||||
PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
|
||||
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue