refactor findIndex and insert

This commit is contained in:
Folkert 2021-02-14 00:25:44 +01:00
parent b2e626ab0c
commit 49bba11e06

View file

@ -31,6 +31,36 @@ fn nextSeed(seed: u64) u64 {
return seed + 1;
}
fn total_slots_at_level(input: usize) usize {
if (input == 0) {
return 0;
}
var n = input;
var slots: usize = 8;
while (n > 1) : (n -= 1) {
slots = slots * 2 + slots;
}
return slots;
}
fn slots_at_level(input: usize) usize {
if (input == 0) {
return 0;
}
var n = input;
var slots: usize = 8;
while (n > 1) : (n -= 1) {
slots = slots * 2;
}
return slots;
}
// aligmnent of elements. The number (16 or 8) indicates the maximum
// alignment of the key and value. The tag furthermore indicates
// which has the biggest aligmnent. If both are the same, we put
@ -63,39 +93,23 @@ const Alignment = packed enum(u8) {
pub const RocDict = extern struct {
dict_bytes: ?[*]u8,
dict_entries_len: usize,
dict_slot_len: usize,
number_of_levels: usize,
pub fn empty() RocDict {
return RocDict{
.dict_entries_len = 0,
.dict_slot_len = 0,
.number_of_levels = 0,
.dict_bytes = null,
};
}
pub fn init(allocator: *Allocator, bytes_ptr: [*]const u8, number_of_slots: usize, number_of_entries: usize, key_size: usize, value_size: usize) RocDict {
var result = RocDict.allocate(
allocator,
InPlace.Clone,
number_of_slots,
number_of_entries,
8,
key_size,
value_size,
);
@memcpy(result.asU8ptr(), bytes_ptr, number_of_slots);
return result;
}
pub fn deinit(self: RocDict, allocator: *Allocator, key_size: usize, value_size: usize) void {
if (!self.isEmpty()) {
const slot_size = slotSize(key_size, value_size);
const dict_bytes_ptr: [*]u8 = self.dict_bytes orelse unreachable;
const dict_bytes: []u8 = dict_bytes_ptr[0..(self.dict_slot_len)];
const dict_bytes: []u8 = dict_bytes_ptr[0..(self.number_of_levels)];
allocator.free(dict_bytes);
}
}
@ -103,12 +117,13 @@ pub const RocDict = extern struct {
pub fn allocate(
allocator: *Allocator,
result_in_place: InPlace,
number_of_slots: usize,
number_of_levels: usize,
number_of_entries: usize,
alignment: usize,
key_size: usize,
value_size: usize,
) RocDict {
const number_of_slots = total_slots_at_level(number_of_levels);
const first_slot = switch (alignment) {
8 => blk: {
const slot_size = slotSize(key_size, value_size);
@ -157,7 +172,7 @@ pub const RocDict = extern struct {
return RocDict{
.dict_bytes = first_slot,
.dict_slot_len = number_of_slots,
.number_of_levels = number_of_levels,
.dict_entries_len = number_of_entries,
};
}
@ -165,17 +180,19 @@ pub const RocDict = extern struct {
pub fn reallocate(
self: RocDict,
allocator: *Allocator,
for_level: usize,
alignment: usize,
key_width: usize,
value_width: usize,
) RocDict {
const new_level = self.number_of_levels + 1;
const slot_size = slotSize(key_width, value_width);
const old_capacity = self.capacity();
const new_capacity = total_slots_at_level(new_level);
const first_slot = switch (alignment) {
8 => blk: {
const slot_size = slotSize(key_width, value_width);
const number_of_slots = 8 + 16;
const length = @sizeOf(usize) + (number_of_slots * slot_size);
const length = @sizeOf(usize) + (new_capacity * slot_size);
var new_bytes: []align(8) u8 = allocator.alignedAlloc(u8, 8, length) catch unreachable;
@ -193,33 +210,32 @@ pub const RocDict = extern struct {
// transfer the memory
// number of slots we currently have (before reallocating)
const number_of_elements = 8;
const next_number_of_elements = 2 * number_of_elements;
var source_ptr = self.dict_bytes orelse unreachable;
var dest_ptr = first_slot;
if (old_capacity > 0) {
var source_offset: usize = 0;
var dest_offset: usize = 0;
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * key_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * key_width);
source_offset += number_of_elements * key_width;
dest_offset += number_of_elements * key_width + (next_number_of_elements * key_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * value_width);
source_offset += old_capacity * key_width;
dest_offset += old_capacity * key_width + (new_capacity * key_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * value_width);
source_offset += number_of_elements * value_width;
dest_offset += number_of_elements * value_width + (next_number_of_elements * value_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * @sizeOf(Slot));
source_offset += old_capacity * value_width;
dest_offset += old_capacity * value_width + (new_capacity * value_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * @sizeOf(Slot));
}
var i: usize = 0;
while (i < next_number_of_elements) : (i += 1) {
(dest_ptr + dest_offset + number_of_elements * @sizeOf(Slot))[i] = @enumToInt(Slot.Empty);
const first_new_slot_value = dest_ptr + old_capacity * slot_size + new_capacity * (key_width + value_width);
while (i < (new_capacity - old_capacity)) : (i += 1) {
(first_new_slot_value)[i] = @enumToInt(Slot.Empty);
}
return RocDict{
.dict_bytes = first_slot,
.dict_slot_len = 8 + 16,
.number_of_levels = self.number_of_levels + 1,
.dict_entries_len = self.dict_entries_len,
};
}
@ -244,6 +260,10 @@ pub const RocDict = extern struct {
return false;
}
pub fn capacity(self: RocDict) usize {
return total_slots_at_level(self.number_of_levels);
}
pub fn makeUnique(self: RocDict, allocator: *Allocator, in_place: InPlace, alignment: Alignment, key_width: usize, value_width: usize, inc_key: Inc, inc_value: Inc) RocDict {
if (self.isEmpty()) {
return self;
@ -265,24 +285,24 @@ pub const RocDict = extern struct {
// we copied potentially-refcounted values; make sure to increment
const size = new_dict.dict_entries_len;
const n = new_dict.dict_slot_len;
const n = new_dict.number_of_levels;
var i: usize = 0;
i = 0;
while (i < size) : (i += 1) {
inc_key(new_dict.getKey(n, i, alignment, key_width, value_width));
inc_key(new_dict.getKey(i, alignment, key_width, value_width));
}
i = 0;
while (i < size) : (i += 1) {
inc_value(new_dict.getValue(n, i, alignment, key_width, value_width));
inc_value(new_dict.getValue(i, alignment, key_width, value_width));
}
return new_dict;
}
fn getSlot(self: *const RocDict, capacity: usize, index: usize, key_width: usize, value_width: usize) Slot {
const offset = capacity * (key_width + value_width) + index * @sizeOf(Slot);
fn getSlot(self: *const RocDict, index: usize, key_width: usize, value_width: usize) Slot {
const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot);
if (self.dict_bytes) |u8_ptr| {
return @intToEnum(Slot, u8_ptr[offset]);
@ -291,8 +311,8 @@ pub const RocDict = extern struct {
}
}
fn setSlot(self: *RocDict, capacity: usize, index: usize, key_width: usize, value_width: usize, slot: Slot) void {
const offset = capacity * (key_width + value_width) + index * @sizeOf(Slot);
fn setSlot(self: *RocDict, index: usize, key_width: usize, value_width: usize, slot: Slot) void {
const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot);
if (self.dict_bytes) |u8_ptr| {
u8_ptr[offset] = @enumToInt(slot);
@ -301,12 +321,12 @@ pub const RocDict = extern struct {
}
}
fn setKey(self: *RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
fn setKey(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (index * key_width);
} else {
break :blk (capacity * value_width) + (index * key_width);
break :blk (self.capacity() * value_width) + (index * key_width);
}
};
@ -318,12 +338,12 @@ pub const RocDict = extern struct {
}
}
fn getKey(self: *const RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
fn getKey(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (index * key_width);
} else {
break :blk (capacity * value_width) + (index * key_width);
break :blk (self.capacity() * value_width) + (index * key_width);
}
};
@ -334,10 +354,10 @@ pub const RocDict = extern struct {
}
}
fn setValue(self: *RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
fn setValue(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (capacity * key_width) + (index * value_width);
break :blk (self.capacity() * key_width) + (index * value_width);
} else {
break :blk (index * value_width);
}
@ -351,10 +371,10 @@ pub const RocDict = extern struct {
}
}
fn getValue(self: *const RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
fn getValue(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (capacity * key_width) + (index * value_width);
break :blk (self.capacity() * key_width) + (index * value_width);
} else {
break :blk (index * value_width);
}
@ -367,33 +387,45 @@ pub const RocDict = extern struct {
}
}
fn findIndex(self: *const RocDict, capacity: usize, seed: u64, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) MaybeIndex {
fn findIndex(self: *const RocDict, seed: u64, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) MaybeIndex {
if (self.isEmpty()) {
return MaybeIndex.not_found;
}
const n = capacity;
var current_level: usize = 1;
var current_level_size: usize = 8;
var next_level_size: usize = 2 * current_level_size;
// hash the key, and modulo by the maximum size
// (so we get an in-bounds index)
const hash = hash_fn(seed, key);
const index = hash % n;
var index = hash % current_level_size;
switch (self.getSlot(n, index, key_width, value_width)) {
while (true) {
if (current_level > self.number_of_levels) {
return MaybeIndex.not_found;
}
switch (self.getSlot(index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
return MaybeIndex.not_found;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = self.getKey(n, index, alignment, key_width, value_width);
const current_key = self.getKey(index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
return MaybeIndex{ .index = index };
} else {
unreachable;
current_level += 1;
current_level_size *= 2;
next_level_size *= 2;
continue;
}
},
}
}
}
};
// Dict.empty
@ -420,105 +452,63 @@ const Dec = fn (?[*]u8) callconv(.C) void;
// Dict.insert : Dict k v, k, v -> Dict k v
pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value: Opaque, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void {
const n: usize = std.math.max(input.dict_slot_len, 8);
const seed: u64 = INITIAL_SEED;
var seed: u64 = INITIAL_SEED;
var result: RocDict = blk: {
if (input.isEmpty()) {
var temp = RocDict.allocate(
std.heap.c_allocator,
InPlace.Clone,
n, // number_of_slots,
0, // number_of_entries,
alignment.toUsize(),
key_width,
value_width,
);
{
var i: usize = 0;
while (i < n) {
temp.setSlot(n, i, key_width, value_width, Slot.Empty);
i += 1;
}
}
break :blk temp;
break :blk input;
} else {
const in_place = InPlace.Clone;
var temp = input.makeUnique(std.heap.c_allocator, in_place, alignment, key_width, value_width, inc_key, inc_value);
break :blk temp;
// break :blk input;
}
};
// hash the key, and modulo by the maximum size
// (so we get an in-bounds index)
const hash = hash_fn(seed, key);
var index = hash % n;
var current_level: usize = 1;
var current_level_size: usize = 8;
var next_level_size: usize = 16;
var next_level_size: usize = 2 * current_level_size;
while (true) {
switch (result.getSlot(n, index, key_width, value_width)) {
if (current_level > result.number_of_levels) {
result = result.reallocate(std.heap.c_allocator, alignment.toUsize(), key_width, value_width);
}
const hash = hash_fn(seed, key);
const index = hash % current_level_size;
switch (result.getSlot(index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
result.setSlot(n, index, key_width, value_width, Slot.Filled);
result.setKey(n, index, alignment, key_width, value_width, key);
result.setValue(n, index, alignment, key_width, value_width, value);
result.setSlot(index, key_width, value_width, Slot.Filled);
result.setKey(index, alignment, key_width, value_width, key);
result.setValue(index, alignment, key_width, value_width, value);
result.dict_entries_len += 1;
break;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = result.getKey(n, index, alignment, key_width, value_width);
const current_key = result.getKey(index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
// we will override the old value, but first have to decrement its refcount
const current_value = result.getValue(n, index, alignment, key_width, value_width);
const current_value = result.getValue(index, alignment, key_width, value_width);
dec_value(current_value);
// we must consume the key argument!
dec_key(key);
result.setValue(n, index, alignment, key_width, value_width, value);
result.setValue(index, alignment, key_width, value_width, value);
break;
} else {
const next_layer_exists = false;
seed = nextSeed(seed);
if (next_layer_exists) {
// rehash key with next seed
const next_level_seed = nextSeed(seed);
const next_level_index = hash_fn(next_level_seed, key) % 16;
index = (current_level_size + next_level_index);
current_level += 1;
current_level_size *= 2;
next_level_size *= 2;
continue;
} else {
// 8, 16, 32 ..
result = result.reallocate(std.heap.c_allocator, current_level, alignment.toUsize(), key_width, value_width);
const next_level_seed = nextSeed(seed);
const next_level_index = hash_fn(next_level_seed, key) % 16;
const new_index = (current_level_size + next_level_index);
const capacity = 8 + 16;
result.setSlot(capacity, new_index, key_width, value_width, Slot.Filled);
result.setKey(capacity, new_index, alignment, key_width, value_width, key);
result.setValue(capacity, new_index, alignment, key_width, value_width, value);
result.dict_entries_len += 1;
break;
}
}
},
}
@ -533,11 +523,9 @@ pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width:
// Dict.remove : Dict k v, k -> Dict k v
pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void {
const capacity: usize = input.dict_slot_len;
const n = capacity;
const seed: u64 = INITIAL_SEED;
switch (input.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
switch (input.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
// the key was not found; we're done
output.* = input;
@ -547,9 +535,9 @@ pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width:
// TODO make sure input is unique (or duplicate otherwise)
var dict = input;
dict.setSlot(n, index, key_width, value_width, Slot.PreviouslyFilled);
const old_key = dict.getKey(n, index, alignment, key_width, value_width);
const old_value = dict.getValue(n, index, alignment, key_width, value_width);
dict.setSlot(index, key_width, value_width, Slot.PreviouslyFilled);
const old_key = dict.getKey(index, alignment, key_width, value_width);
const old_value = dict.getValue(index, alignment, key_width, value_width);
dec_key(old_key);
dec_value(old_value);
@ -563,10 +551,9 @@ pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width:
// Dict.contains : Dict k v, k -> Bool
pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) callconv(.C) bool {
const capacity: usize = dict.dict_slot_len;
const seed: u64 = INITIAL_SEED;
switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
switch (dict.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
return false;
},
@ -578,16 +565,14 @@ pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width:
// Dict.get : Dict k v, k -> { flag: bool, value: Opaque }
pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_value: Inc) callconv(.C) extern struct { value: Opaque, flag: bool } {
const capacity: usize = dict.dict_slot_len;
const n: usize = capacity;
const seed: u64 = INITIAL_SEED;
switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
switch (dict.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
return .{ .flag = false, .value = null };
},
MaybeIndex.index => |index| {
var value = dict.getValue(n, index, alignment, key_width, value_width);
var value = dict.getValue(index, alignment, key_width, value_width);
inc_value(value);
return .{ .flag = true, .value = value };
},
@ -598,17 +583,17 @@ pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usiz
// increment or decrement all dict elements (but not the dict's allocation itself)
pub fn elementsRc(dict: RocDict, alignment: Alignment, key_width: usize, value_width: usize, modify_key: Inc, modify_value: Inc) callconv(.C) void {
const size = dict.dict_entries_len;
const n = dict.dict_slot_len;
const n = dict.number_of_levels;
var i: usize = 0;
i = 0;
while (i < size) : (i += 1) {
modify_key(dict.getKey(n, i, alignment, key_width, value_width));
modify_key(dict.getKey(i, alignment, key_width, value_width));
}
i = 0;
while (i < size) : (i += 1) {
modify_value(dict.getValue(n, i, alignment, key_width, value_width));
modify_value(dict.getValue(i, alignment, key_width, value_width));
}
}