Dict.insert WIP

This commit is contained in:
Folkert 2021-02-13 23:32:01 +01:00
parent 84e4cbc859
commit b2e626ab0c
2 changed files with 141 additions and 32 deletions

View file

@ -6,6 +6,8 @@ const Allocator = mem.Allocator;
const level_size = 32;
const INITIAL_SEED = 0xc70f6907;
const InPlace = packed enum(u8) {
InPlace,
Clone,
@ -25,6 +27,10 @@ const MaybeIndex = union(MaybeIndexTag) {
index: usize, not_found: void
};
fn nextSeed(seed: u64) u64 {
return seed + 1;
}
// aligmnent of elements. The number (16 or 8) indicates the maximum
// alignment of the key and value. The tag furthermore indicates
// which has the biggest aligmnent. If both are the same, we put
@ -156,6 +162,68 @@ pub const RocDict = extern struct {
};
}
pub fn reallocate(
self: RocDict,
allocator: *Allocator,
for_level: usize,
alignment: usize,
key_width: usize,
value_width: usize,
) RocDict {
const first_slot = switch (alignment) {
8 => blk: {
const slot_size = slotSize(key_width, value_width);
const number_of_slots = 8 + 16;
const length = @sizeOf(usize) + (number_of_slots * slot_size);
var new_bytes: []align(8) u8 = allocator.alignedAlloc(u8, 8, length) catch unreachable;
var as_usize_array = @ptrCast([*]usize, new_bytes);
const v: isize = std.math.minInt(isize);
as_usize_array[0] = @bitCast(usize, v);
var as_u8_array = @ptrCast([*]u8, new_bytes);
const first_slot = as_u8_array + @sizeOf(usize);
break :blk first_slot;
},
else => unreachable,
};
// transfer the memory
// number of slots we currently have (before reallocating)
const number_of_elements = 8;
const next_number_of_elements = 2 * number_of_elements;
var source_ptr = self.dict_bytes orelse unreachable;
var dest_ptr = first_slot;
var source_offset: usize = 0;
var dest_offset: usize = 0;
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * key_width);
source_offset += number_of_elements * key_width;
dest_offset += number_of_elements * key_width + (next_number_of_elements * key_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * value_width);
source_offset += number_of_elements * value_width;
dest_offset += number_of_elements * value_width + (next_number_of_elements * value_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * @sizeOf(Slot));
var i: usize = 0;
while (i < next_number_of_elements) : (i += 1) {
(dest_ptr + dest_offset + number_of_elements * @sizeOf(Slot))[i] = @enumToInt(Slot.Empty);
}
return RocDict{
.dict_bytes = first_slot,
.dict_slot_len = 8 + 16,
.dict_entries_len = self.dict_entries_len,
};
}
pub fn asU8ptr(self: RocDict) [*]u8 {
return @ptrCast([*]u8, self.dict_bytes);
}
@ -187,12 +255,12 @@ pub const RocDict = extern struct {
// unfortunately, we have to clone
var new_dict = RocDict.allocate(allocator, in_place, self.dict_slot_len, self.dict_entries_len, alignment.toUsize(), key_width, value_width);
var new_dict = RocDict.allocate(allocator, in_place, 8, self.dict_entries_len, alignment.toUsize(), key_width, value_width);
var old_bytes: [*]u8 = @ptrCast([*]u8, self.dict_bytes);
var new_bytes: [*]u8 = @ptrCast([*]u8, new_dict.dict_bytes);
const number_of_bytes = self.dict_slot_len * (@sizeOf(Slot) + key_width + value_width);
const number_of_bytes = 8 * (@sizeOf(Slot) + key_width + value_width);
@memcpy(new_bytes, old_bytes, number_of_bytes);
// we copied potentially-refcounted values; make sure to increment
@ -353,7 +421,7 @@ const Dec = fn (?[*]u8) callconv(.C) void;
// Dict.insert : Dict k v, k, v -> Dict k v
pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value: Opaque, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void {
const n: usize = std.math.max(input.dict_slot_len, 8);
const seed: u64 = 0;
const seed: u64 = INITIAL_SEED;
var result: RocDict = blk: {
if (input.isEmpty()) {
@ -382,51 +450,92 @@ pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width:
var temp = input.makeUnique(std.heap.c_allocator, in_place, alignment, key_width, value_width, inc_key, inc_value);
break :blk temp;
// break :blk input;
}
};
// hash the key, and modulo by the maximum size
// (so we get an in-bounds index)
const hash = hash_fn(seed, key);
const index = hash % n;
var index = hash % n;
switch (result.getSlot(n, index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
result.setSlot(n, index, key_width, value_width, Slot.Filled);
result.setKey(n, index, alignment, key_width, value_width, key);
result.setValue(n, index, alignment, key_width, value_width, value);
result.dict_entries_len += 1;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = result.getKey(n, index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
// we will override the old value, but first have to decrement its refcount
const current_value = result.getValue(n, index, alignment, key_width, value_width);
dec_value(current_value);
// we must consume the key argument!
dec_key(key);
var current_level: usize = 1;
var current_level_size: usize = 8;
var next_level_size: usize = 16;
while (true) {
switch (result.getSlot(n, index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
result.setSlot(n, index, key_width, value_width, Slot.Filled);
result.setKey(n, index, alignment, key_width, value_width, key);
result.setValue(n, index, alignment, key_width, value_width, value);
} else {
// TODO rehash, possibly grow the allocation?
unreachable;
}
},
result.dict_entries_len += 1;
break;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = result.getKey(n, index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
// we will override the old value, but first have to decrement its refcount
const current_value = result.getValue(n, index, alignment, key_width, value_width);
dec_value(current_value);
// we must consume the key argument!
dec_key(key);
result.setValue(n, index, alignment, key_width, value_width, value);
break;
} else {
const next_layer_exists = false;
if (next_layer_exists) {
// rehash key with next seed
const next_level_seed = nextSeed(seed);
const next_level_index = hash_fn(next_level_seed, key) % 16;
index = (current_level_size + next_level_index);
current_level += 1;
current_level_size *= 2;
next_level_size *= 2;
continue;
} else {
// 8, 16, 32 ..
result = result.reallocate(std.heap.c_allocator, current_level, alignment.toUsize(), key_width, value_width);
const next_level_seed = nextSeed(seed);
const next_level_index = hash_fn(next_level_seed, key) % 16;
const new_index = (current_level_size + next_level_index);
const capacity = 8 + 16;
result.setSlot(capacity, new_index, key_width, value_width, Slot.Filled);
result.setKey(capacity, new_index, alignment, key_width, value_width, key);
result.setValue(capacity, new_index, alignment, key_width, value_width, value);
result.dict_entries_len += 1;
break;
}
}
},
}
}
// write result into pointer
output.* = result;
}
// { ptr, length, level: u8 }
// [ key1 .. key8, value1, ...
// Dict.remove : Dict k v, k -> Dict k v
pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void {
const capacity: usize = input.dict_slot_len;
const n = capacity;
const seed: u64 = 0;
const seed: u64 = INITIAL_SEED;
switch (input.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
@ -455,7 +564,7 @@ pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width:
// Dict.contains : Dict k v, k -> Bool
pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) callconv(.C) bool {
const capacity: usize = dict.dict_slot_len;
const seed: u64 = 0;
const seed: u64 = INITIAL_SEED;
switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
@ -471,7 +580,7 @@ pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width:
pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_value: Inc) callconv(.C) extern struct { value: Opaque, flag: bool } {
const capacity: usize = dict.dict_slot_len;
const n: usize = capacity;
const seed: u64 = 0;
const seed: u64 = INITIAL_SEED;
switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {