mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
bpo-46675: Allow object value arrays and split key dictionaries larger than 16 (GH-31191)
This commit is contained in:
parent
328fe3fd20
commit
25db2b361b
6 changed files with 90 additions and 50 deletions
|
@ -99,11 +99,17 @@ struct _dictkeysobject {
|
||||||
see the DK_ENTRIES() macro */
|
see the DK_ENTRIES() macro */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This must be no more than 16, for the order vector to fit in 64 bits */
|
/* This must be no more than 250, for the prefix size to fit in one byte. */
|
||||||
#define SHARED_KEYS_MAX_SIZE 16
|
#define SHARED_KEYS_MAX_SIZE 30
|
||||||
|
#define NEXT_LOG2_SHARED_KEYS_MAX_SIZE 6
|
||||||
|
|
||||||
|
/* Layout of dict values:
|
||||||
|
*
|
||||||
|
* The PyObject *values are preceded by an array of bytes holding
|
||||||
|
* the insertion order and size.
|
||||||
|
* [-1] = prefix size. [-2] = used size. size[-2-n...] = insertion order.
|
||||||
|
*/
|
||||||
struct _dictvalues {
|
struct _dictvalues {
|
||||||
uint64_t mv_order;
|
|
||||||
PyObject *values[1];
|
PyObject *values[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -131,6 +137,18 @@ extern uint64_t _pydict_global_version;
|
||||||
|
|
||||||
PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
|
PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
_PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix)
|
||||||
|
{
|
||||||
|
assert(ix < SHARED_KEYS_MAX_SIZE);
|
||||||
|
uint8_t *size_ptr = ((uint8_t *)values)-2;
|
||||||
|
int size = *size_ptr;
|
||||||
|
assert(size+2 < ((uint8_t *)values)[-1]);
|
||||||
|
size++;
|
||||||
|
size_ptr[-size] = (uint8_t)ix;
|
||||||
|
*size_ptr = size;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5505,7 +5505,7 @@ class SharedKeyTests(unittest.TestCase):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
#Shrink keys by repeatedly creating instances
|
#Shrink keys by repeatedly creating instances
|
||||||
[(A(), B()) for _ in range(20)]
|
[(A(), B()) for _ in range(30)]
|
||||||
|
|
||||||
a, b = A(), B()
|
a, b = A(), B()
|
||||||
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
||||||
|
|
|
@ -1504,15 +1504,16 @@ class SizeofTest(unittest.TestCase):
|
||||||
'6P')
|
'6P')
|
||||||
class newstyleclass(object): pass
|
class newstyleclass(object): pass
|
||||||
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
|
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
|
||||||
# dict with shared keys
|
# dict with shared keys
|
||||||
check(newstyleclass().__dict__, size('nQ2P') + 15*self.P)
|
[newstyleclass() for _ in range(100)]
|
||||||
|
check(newstyleclass().__dict__, size('nQ2P') + self.P)
|
||||||
o = newstyleclass()
|
o = newstyleclass()
|
||||||
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
|
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
|
||||||
# Separate block for PyDictKeysObject with 16 keys and 10 entries
|
# Separate block for PyDictKeysObject with 16 keys and 10 entries
|
||||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
|
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
|
||||||
# dict with shared keys
|
# dict with shared keys
|
||||||
check(newstyleclass().__dict__, size('nQ2P') + 13*self.P)
|
check(newstyleclass().__dict__, size('nQ2P') + self.P)
|
||||||
# unicode
|
# unicode
|
||||||
# each tuple contains a string and its expected character size
|
# each tuple contains a string and its expected character size
|
||||||
# don't put any static strings here, as they may contain
|
# don't put any static strings here, as they may contain
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Allow more than 16 items in a split dict before it is combined. The limit is
|
||||||
|
now 254.
|
|
@ -453,8 +453,14 @@ static PyDictKeysObject empty_keys_struct = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static PyDictValues empty_values_struct = { 0, { NULL }};
|
struct {
|
||||||
#define empty_values (&empty_values_struct)
|
uint8_t prefix[sizeof(PyObject *)];
|
||||||
|
PyDictValues values;
|
||||||
|
} empty_values_struct = {
|
||||||
|
{ [sizeof(PyObject *)-1] = sizeof(PyObject *) },
|
||||||
|
{{NULL}}
|
||||||
|
};
|
||||||
|
#define empty_values (&empty_values_struct.values)
|
||||||
|
|
||||||
#define Py_EMPTY_KEYS &empty_keys_struct
|
#define Py_EMPTY_KEYS &empty_keys_struct
|
||||||
|
|
||||||
|
@ -470,9 +476,9 @@ static PyDictValues empty_values_struct = { 0, { NULL }};
|
||||||
static inline int
|
static inline int
|
||||||
get_index_from_order(PyDictObject *mp, Py_ssize_t i)
|
get_index_from_order(PyDictObject *mp, Py_ssize_t i)
|
||||||
{
|
{
|
||||||
assert(mp->ma_used <= 16);
|
assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
|
||||||
int shift = (int)(mp->ma_used-1-i)*4;
|
assert(i < (((char *)mp->ma_values)[-2]));
|
||||||
return (int)(mp->ma_values->mv_order >> shift) & 15;
|
return ((char *)mp->ma_values)[-3-i];
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -636,11 +642,25 @@ free_keys_object(PyDictKeysObject *keys)
|
||||||
static inline PyDictValues*
|
static inline PyDictValues*
|
||||||
new_values(Py_ssize_t size)
|
new_values(Py_ssize_t size)
|
||||||
{
|
{
|
||||||
Py_ssize_t n = sizeof(PyDictValues) + sizeof(PyObject *) * (size-1);
|
assert(size > 0);
|
||||||
return (PyDictValues*)PyMem_Malloc(n);
|
size_t prefix_size = _Py_SIZE_ROUND_UP(size+2, sizeof(PyObject *));
|
||||||
|
assert(prefix_size < 256);
|
||||||
|
size_t n = prefix_size + size * sizeof(PyObject *);
|
||||||
|
uint8_t *mem = PyMem_Malloc(n);
|
||||||
|
if (mem == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(prefix_size % sizeof(PyObject *) == 0);
|
||||||
|
mem[prefix_size-1] = (uint8_t)prefix_size;
|
||||||
|
return (PyDictValues*)(mem + prefix_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define free_values(values) PyMem_Free(values)
|
static inline void
|
||||||
|
free_values(PyDictValues *values)
|
||||||
|
{
|
||||||
|
int prefix_size = ((uint8_t *)values)[-1];
|
||||||
|
PyMem_Free(((char *)values)-prefix_size);
|
||||||
|
}
|
||||||
|
|
||||||
/* Consumes a reference to the keys object */
|
/* Consumes a reference to the keys object */
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -699,7 +719,7 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
|
||||||
dictkeys_decref(keys);
|
dictkeys_decref(keys);
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
values->mv_order = 0;
|
((char *)values)[-2] = 0;
|
||||||
for (i = 0; i < size; i++) {
|
for (i = 0; i < size; i++) {
|
||||||
values->values[i] = NULL;
|
values->values[i] = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1017,7 +1037,7 @@ insertion_resize(PyDictObject *mp)
|
||||||
return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)));
|
return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static Py_ssize_t
|
||||||
insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
|
insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
|
||||||
{
|
{
|
||||||
assert(PyUnicode_CheckExact(name));
|
assert(PyUnicode_CheckExact(name));
|
||||||
|
@ -1048,7 +1068,7 @@ insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
|
||||||
keys->dk_nentries++;
|
keys->dk_nentries++;
|
||||||
}
|
}
|
||||||
assert (ix < SHARED_KEYS_MAX_SIZE);
|
assert (ix < SHARED_KEYS_MAX_SIZE);
|
||||||
return (int)ix;
|
return ix;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1093,9 +1113,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
|
||||||
ep->me_hash = hash;
|
ep->me_hash = hash;
|
||||||
if (mp->ma_values) {
|
if (mp->ma_values) {
|
||||||
Py_ssize_t index = mp->ma_keys->dk_nentries;
|
Py_ssize_t index = mp->ma_keys->dk_nentries;
|
||||||
assert(index < SHARED_KEYS_MAX_SIZE);
|
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
|
||||||
assert((mp->ma_values->mv_order >> 60) == 0);
|
|
||||||
mp->ma_values->mv_order = ((mp->ma_values->mv_order)<<4) | index;
|
|
||||||
assert (mp->ma_values->values[index] == NULL);
|
assert (mp->ma_values->values[index] == NULL);
|
||||||
mp->ma_values->values[index] = value;
|
mp->ma_values->values[index] = value;
|
||||||
}
|
}
|
||||||
|
@ -1115,7 +1133,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
|
||||||
if (_PyDict_HasSplitTable(mp)) {
|
if (_PyDict_HasSplitTable(mp)) {
|
||||||
mp->ma_values->values[ix] = value;
|
mp->ma_values->values[ix] = value;
|
||||||
if (old_value == NULL) {
|
if (old_value == NULL) {
|
||||||
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
|
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
|
||||||
mp->ma_used++;
|
mp->ma_used++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1598,19 +1616,20 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
|
||||||
return insertdict(mp, key, hash, value);
|
return insertdict(mp, key, hash, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static void
|
||||||
delete_index_from_order(uint64_t order, Py_ssize_t ix)
|
delete_index_from_values(PyDictValues *values, Py_ssize_t ix)
|
||||||
{ /* Update order */
|
{
|
||||||
for (int i = 0;; i+= 4) {
|
uint8_t *size_ptr = ((uint8_t *)values)-2;
|
||||||
assert (i < 64);
|
int size = *size_ptr;
|
||||||
if (((order >> i) & 15) == (uint64_t)ix) {
|
int i;
|
||||||
/* Remove 4 bits at ith position */
|
for (i = 1; size_ptr[-i] != ix; i++) {
|
||||||
uint64_t high = ((order>>i)>>4)<<i;
|
assert(i <= size);
|
||||||
uint64_t low = order & ((((uint64_t)1)<<i)-1);
|
|
||||||
return high | low;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Py_UNREACHABLE();
|
assert(i <= size);
|
||||||
|
for (; i < size; i++) {
|
||||||
|
size_ptr[-i] = size_ptr[-i-1];
|
||||||
|
}
|
||||||
|
*size_ptr = size -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -1631,8 +1650,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
|
||||||
mp->ma_values->values[ix] = NULL;
|
mp->ma_values->values[ix] = NULL;
|
||||||
assert(ix < SHARED_KEYS_MAX_SIZE);
|
assert(ix < SHARED_KEYS_MAX_SIZE);
|
||||||
/* Update order */
|
/* Update order */
|
||||||
mp->ma_values->mv_order =
|
delete_index_from_values(mp->ma_values, ix);
|
||||||
delete_index_from_order(mp->ma_values->mv_order, ix);
|
|
||||||
ASSERT_CONSISTENT(mp);
|
ASSERT_CONSISTENT(mp);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -2729,7 +2747,8 @@ PyDict_Copy(PyObject *o)
|
||||||
free_values(newvalues);
|
free_values(newvalues);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
newvalues->mv_order = mp->ma_values->mv_order;
|
size_t prefix_size = ((uint8_t *)newvalues)[-1];
|
||||||
|
memcpy(((char *)newvalues)-prefix_size, ((char *)mp->ma_values)-prefix_size, prefix_size-1);
|
||||||
split_copy->ma_values = newvalues;
|
split_copy->ma_values = newvalues;
|
||||||
split_copy->ma_keys = mp->ma_keys;
|
split_copy->ma_keys = mp->ma_keys;
|
||||||
split_copy->ma_used = mp->ma_used;
|
split_copy->ma_used = mp->ma_used;
|
||||||
|
@ -3031,11 +3050,11 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
|
||||||
ep->me_key = key;
|
ep->me_key = key;
|
||||||
ep->me_hash = hash;
|
ep->me_hash = hash;
|
||||||
if (_PyDict_HasSplitTable(mp)) {
|
if (_PyDict_HasSplitTable(mp)) {
|
||||||
int index = (int)mp->ma_keys->dk_nentries;
|
Py_ssize_t index = (int)mp->ma_keys->dk_nentries;
|
||||||
assert(index < SHARED_KEYS_MAX_SIZE);
|
assert(index < SHARED_KEYS_MAX_SIZE);
|
||||||
assert(mp->ma_values->values[index] == NULL);
|
assert(mp->ma_values->values[index] == NULL);
|
||||||
mp->ma_values->values[index] = value;
|
mp->ma_values->values[index] = value;
|
||||||
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | index;
|
_PyDictValues_AddToInsertionOrder(mp->ma_values, index);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ep->me_value = value;
|
ep->me_value = value;
|
||||||
|
@ -3053,7 +3072,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
|
||||||
Py_INCREF(value);
|
Py_INCREF(value);
|
||||||
MAINTAIN_TRACKING(mp, key, value);
|
MAINTAIN_TRACKING(mp, key, value);
|
||||||
mp->ma_values->values[ix] = value;
|
mp->ma_values->values[ix] = value;
|
||||||
mp->ma_values->mv_order = (mp->ma_values->mv_order << 4) | ix;
|
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
|
||||||
mp->ma_used++;
|
mp->ma_used++;
|
||||||
mp->ma_version_tag = DICT_NEXT_VERSION();
|
mp->ma_version_tag = DICT_NEXT_VERSION();
|
||||||
}
|
}
|
||||||
|
@ -4941,7 +4960,7 @@ dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
|
||||||
PyDictKeysObject *
|
PyDictKeysObject *
|
||||||
_PyDict_NewKeysForClass(void)
|
_PyDict_NewKeysForClass(void)
|
||||||
{
|
{
|
||||||
PyDictKeysObject *keys = new_keys_object(5); /* log2(32) */
|
PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE);
|
||||||
if (keys == NULL) {
|
if (keys == NULL) {
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
}
|
}
|
||||||
|
@ -4974,7 +4993,8 @@ init_inline_values(PyObject *obj, PyTypeObject *tp)
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
values->mv_order = 0;
|
assert(((uint8_t *)values)[-1] >= size+2);
|
||||||
|
((uint8_t *)values)[-2] = 0;
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
values->values[i] = NULL;
|
values->values[i] = NULL;
|
||||||
}
|
}
|
||||||
|
@ -5047,14 +5067,14 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
|
||||||
assert(keys != NULL);
|
assert(keys != NULL);
|
||||||
assert(values != NULL);
|
assert(values != NULL);
|
||||||
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
|
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
|
||||||
int ix = insert_into_dictkeys(keys, name);
|
Py_ssize_t ix = insert_into_dictkeys(keys, name);
|
||||||
if (ix == DKIX_EMPTY) {
|
if (ix == DKIX_EMPTY) {
|
||||||
if (value == NULL) {
|
if (value == NULL) {
|
||||||
PyErr_SetObject(PyExc_AttributeError, name);
|
PyErr_SetObject(PyExc_AttributeError, name);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#ifdef Py_STATS
|
#ifdef Py_STATS
|
||||||
if (shared_keys_usable_size(keys) > 14) {
|
if (shared_keys_usable_size(keys) == SHARED_KEYS_MAX_SIZE) {
|
||||||
OBJECT_STAT_INC(dict_materialized_too_big);
|
OBJECT_STAT_INC(dict_materialized_too_big);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -5077,11 +5097,11 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
|
||||||
PyErr_SetObject(PyExc_AttributeError, name);
|
PyErr_SetObject(PyExc_AttributeError, name);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
values->mv_order = (values->mv_order << 4) | ix;
|
_PyDictValues_AddToInsertionOrder(values, ix);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (value == NULL) {
|
if (value == NULL) {
|
||||||
values->mv_order = delete_index_from_order(values->mv_order, ix);
|
delete_index_from_values(values, ix);
|
||||||
}
|
}
|
||||||
Py_DECREF(old_value);
|
Py_DECREF(old_value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3536,14 +3536,13 @@ handle_eval_breaker:
|
||||||
PyDictValues *values = *_PyObject_ValuesPointer(owner);
|
PyDictValues *values = *_PyObject_ValuesPointer(owner);
|
||||||
DEOPT_IF(values == NULL, STORE_ATTR);
|
DEOPT_IF(values == NULL, STORE_ATTR);
|
||||||
STAT_INC(STORE_ATTR, hit);
|
STAT_INC(STORE_ATTR, hit);
|
||||||
int index = cache0->index;
|
Py_ssize_t index = cache0->index;
|
||||||
STACK_SHRINK(1);
|
STACK_SHRINK(1);
|
||||||
PyObject *value = POP();
|
PyObject *value = POP();
|
||||||
PyObject *old_value = values->values[index];
|
PyObject *old_value = values->values[index];
|
||||||
values->values[index] = value;
|
values->values[index] = value;
|
||||||
if (old_value == NULL) {
|
if (old_value == NULL) {
|
||||||
assert(index < 16);
|
_PyDictValues_AddToInsertionOrder(values, index);
|
||||||
values->mv_order = (values->mv_order << 4) | index;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Py_DECREF(old_value);
|
Py_DECREF(old_value);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue