From 49bba11e06daffeed78ece639d07eb17a6a6f691 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 14 Feb 2021 00:25:44 +0100 Subject: [PATCH] refactor findIndex and insert --- compiler/builtins/bitcode/src/dict.zig | 287 ++++++++++++------------- 1 file changed, 136 insertions(+), 151 deletions(-) diff --git a/compiler/builtins/bitcode/src/dict.zig b/compiler/builtins/bitcode/src/dict.zig index 9974c28e8e..4cc91984b8 100644 --- a/compiler/builtins/bitcode/src/dict.zig +++ b/compiler/builtins/bitcode/src/dict.zig @@ -31,6 +31,36 @@ fn nextSeed(seed: u64) u64 { return seed + 1; } +fn total_slots_at_level(input: usize) usize { + if (input == 0) { + return 0; + } + + var n = input; + var slots: usize = 8; + + while (n > 1) : (n -= 1) { + slots = slots * 2 + slots; + } + + return slots; +} + +fn slots_at_level(input: usize) usize { + if (input == 0) { + return 0; + } + + var n = input; + var slots: usize = 8; + + while (n > 1) : (n -= 1) { + slots = slots * 2; + } + + return slots; +} + // aligmnent of elements. The number (16 or 8) indicates the maximum // alignment of the key and value. The tag furthermore indicates // which has the biggest aligmnent. If both are the same, we put @@ -63,39 +93,23 @@ const Alignment = packed enum(u8) { pub const RocDict = extern struct { dict_bytes: ?[*]u8, dict_entries_len: usize, - dict_slot_len: usize, + number_of_levels: usize, pub fn empty() RocDict { return RocDict{ .dict_entries_len = 0, - .dict_slot_len = 0, + .number_of_levels = 0, .dict_bytes = null, }; } - pub fn init(allocator: *Allocator, bytes_ptr: [*]const u8, number_of_slots: usize, number_of_entries: usize, key_size: usize, value_size: usize) RocDict { - var result = RocDict.allocate( - allocator, - InPlace.Clone, - number_of_slots, - number_of_entries, - 8, - key_size, - value_size, - ); - - @memcpy(result.asU8ptr(), bytes_ptr, number_of_slots); - - return result; - } - pub fn deinit(self: RocDict, allocator: *Allocator, key_size: usize, value_size: usize) void { if (!self.isEmpty()) { const slot_size = slotSize(key_size, value_size); const dict_bytes_ptr: [*]u8 = self.dict_bytes orelse unreachable; - const dict_bytes: []u8 = dict_bytes_ptr[0..(self.dict_slot_len)]; + const dict_bytes: []u8 = dict_bytes_ptr[0..(self.number_of_levels)]; allocator.free(dict_bytes); } } @@ -103,12 +117,13 @@ pub const RocDict = extern struct { pub fn allocate( allocator: *Allocator, result_in_place: InPlace, - number_of_slots: usize, + number_of_levels: usize, number_of_entries: usize, alignment: usize, key_size: usize, value_size: usize, ) RocDict { + const number_of_slots = total_slots_at_level(number_of_levels); const first_slot = switch (alignment) { 8 => blk: { const slot_size = slotSize(key_size, value_size); @@ -157,7 +172,7 @@ pub const RocDict = extern struct { return RocDict{ .dict_bytes = first_slot, - .dict_slot_len = number_of_slots, + .number_of_levels = number_of_levels, .dict_entries_len = number_of_entries, }; } @@ -165,17 +180,19 @@ pub const RocDict = extern struct { pub fn reallocate( self: RocDict, allocator: *Allocator, - for_level: usize, alignment: usize, key_width: usize, value_width: usize, ) RocDict { + const new_level = self.number_of_levels + 1; + const slot_size = slotSize(key_width, value_width); + + const old_capacity = self.capacity(); + const new_capacity = total_slots_at_level(new_level); + const first_slot = switch (alignment) { 8 => blk: { - const slot_size = slotSize(key_width, value_width); - const number_of_slots = 8 + 16; - - const length = @sizeOf(usize) + (number_of_slots * slot_size); + const length = @sizeOf(usize) + (new_capacity * slot_size); var new_bytes: []align(8) u8 = allocator.alignedAlloc(u8, 8, length) catch unreachable; @@ -193,33 +210,32 @@ pub const RocDict = extern struct { // transfer the memory - // number of slots we currently have (before reallocating) - const number_of_elements = 8; - const next_number_of_elements = 2 * number_of_elements; - var source_ptr = self.dict_bytes orelse unreachable; var dest_ptr = first_slot; - var source_offset: usize = 0; - var dest_offset: usize = 0; - @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * key_width); + if (old_capacity > 0) { + var source_offset: usize = 0; + var dest_offset: usize = 0; + @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * key_width); - source_offset += number_of_elements * key_width; - dest_offset += number_of_elements * key_width + (next_number_of_elements * key_width); - @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * value_width); + source_offset += old_capacity * key_width; + dest_offset += old_capacity * key_width + (new_capacity * key_width); + @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * value_width); - source_offset += number_of_elements * value_width; - dest_offset += number_of_elements * value_width + (next_number_of_elements * value_width); - @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, number_of_elements * @sizeOf(Slot)); + source_offset += old_capacity * value_width; + dest_offset += old_capacity * value_width + (new_capacity * value_width); + @memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * @sizeOf(Slot)); + } var i: usize = 0; - while (i < next_number_of_elements) : (i += 1) { - (dest_ptr + dest_offset + number_of_elements * @sizeOf(Slot))[i] = @enumToInt(Slot.Empty); + const first_new_slot_value = dest_ptr + old_capacity * slot_size + new_capacity * (key_width + value_width); + while (i < (new_capacity - old_capacity)) : (i += 1) { + (first_new_slot_value)[i] = @enumToInt(Slot.Empty); } return RocDict{ .dict_bytes = first_slot, - .dict_slot_len = 8 + 16, + .number_of_levels = self.number_of_levels + 1, .dict_entries_len = self.dict_entries_len, }; } @@ -244,6 +260,10 @@ pub const RocDict = extern struct { return false; } + pub fn capacity(self: RocDict) usize { + return total_slots_at_level(self.number_of_levels); + } + pub fn makeUnique(self: RocDict, allocator: *Allocator, in_place: InPlace, alignment: Alignment, key_width: usize, value_width: usize, inc_key: Inc, inc_value: Inc) RocDict { if (self.isEmpty()) { return self; @@ -265,24 +285,24 @@ pub const RocDict = extern struct { // we copied potentially-refcounted values; make sure to increment const size = new_dict.dict_entries_len; - const n = new_dict.dict_slot_len; + const n = new_dict.number_of_levels; var i: usize = 0; i = 0; while (i < size) : (i += 1) { - inc_key(new_dict.getKey(n, i, alignment, key_width, value_width)); + inc_key(new_dict.getKey(i, alignment, key_width, value_width)); } i = 0; while (i < size) : (i += 1) { - inc_value(new_dict.getValue(n, i, alignment, key_width, value_width)); + inc_value(new_dict.getValue(i, alignment, key_width, value_width)); } return new_dict; } - fn getSlot(self: *const RocDict, capacity: usize, index: usize, key_width: usize, value_width: usize) Slot { - const offset = capacity * (key_width + value_width) + index * @sizeOf(Slot); + fn getSlot(self: *const RocDict, index: usize, key_width: usize, value_width: usize) Slot { + const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot); if (self.dict_bytes) |u8_ptr| { return @intToEnum(Slot, u8_ptr[offset]); @@ -291,8 +311,8 @@ pub const RocDict = extern struct { } } - fn setSlot(self: *RocDict, capacity: usize, index: usize, key_width: usize, value_width: usize, slot: Slot) void { - const offset = capacity * (key_width + value_width) + index * @sizeOf(Slot); + fn setSlot(self: *RocDict, index: usize, key_width: usize, value_width: usize, slot: Slot) void { + const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot); if (self.dict_bytes) |u8_ptr| { u8_ptr[offset] = @enumToInt(slot); @@ -301,12 +321,12 @@ pub const RocDict = extern struct { } } - fn setKey(self: *RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void { + fn setKey(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void { const offset = blk: { if (alignment.keyFirst()) { break :blk (index * key_width); } else { - break :blk (capacity * value_width) + (index * key_width); + break :blk (self.capacity() * value_width) + (index * key_width); } }; @@ -318,12 +338,12 @@ pub const RocDict = extern struct { } } - fn getKey(self: *const RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque { + fn getKey(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque { const offset = blk: { if (alignment.keyFirst()) { break :blk (index * key_width); } else { - break :blk (capacity * value_width) + (index * key_width); + break :blk (self.capacity() * value_width) + (index * key_width); } }; @@ -334,10 +354,10 @@ pub const RocDict = extern struct { } } - fn setValue(self: *RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void { + fn setValue(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void { const offset = blk: { if (alignment.keyFirst()) { - break :blk (capacity * key_width) + (index * value_width); + break :blk (self.capacity() * key_width) + (index * value_width); } else { break :blk (index * value_width); } @@ -351,10 +371,10 @@ pub const RocDict = extern struct { } } - fn getValue(self: *const RocDict, capacity: usize, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque { + fn getValue(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque { const offset = blk: { if (alignment.keyFirst()) { - break :blk (capacity * key_width) + (index * value_width); + break :blk (self.capacity() * key_width) + (index * value_width); } else { break :blk (index * value_width); } @@ -367,31 +387,43 @@ pub const RocDict = extern struct { } } - fn findIndex(self: *const RocDict, capacity: usize, seed: u64, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) MaybeIndex { + fn findIndex(self: *const RocDict, seed: u64, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) MaybeIndex { if (self.isEmpty()) { return MaybeIndex.not_found; } - const n = capacity; + var current_level: usize = 1; + var current_level_size: usize = 8; + var next_level_size: usize = 2 * current_level_size; + // hash the key, and modulo by the maximum size // (so we get an in-bounds index) const hash = hash_fn(seed, key); - const index = hash % n; + var index = hash % current_level_size; - switch (self.getSlot(n, index, key_width, value_width)) { - Slot.Empty, Slot.PreviouslyFilled => { + while (true) { + if (current_level > self.number_of_levels) { return MaybeIndex.not_found; - }, - Slot.Filled => { - // is this the same key, or a new key? - const current_key = self.getKey(n, index, alignment, key_width, value_width); + } - if (is_eq(key, current_key)) { - return MaybeIndex{ .index = index }; - } else { - unreachable; - } - }, + switch (self.getSlot(index, key_width, value_width)) { + Slot.Empty, Slot.PreviouslyFilled => { + return MaybeIndex.not_found; + }, + Slot.Filled => { + // is this the same key, or a new key? + const current_key = self.getKey(index, alignment, key_width, value_width); + + if (is_eq(key, current_key)) { + return MaybeIndex{ .index = index }; + } else { + current_level += 1; + current_level_size *= 2; + next_level_size *= 2; + continue; + } + }, + } } } }; @@ -420,105 +452,63 @@ const Dec = fn (?[*]u8) callconv(.C) void; // Dict.insert : Dict k v, k, v -> Dict k v pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value: Opaque, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void { - const n: usize = std.math.max(input.dict_slot_len, 8); - const seed: u64 = INITIAL_SEED; + var seed: u64 = INITIAL_SEED; var result: RocDict = blk: { if (input.isEmpty()) { - var temp = RocDict.allocate( - std.heap.c_allocator, - InPlace.Clone, - n, // number_of_slots, - 0, // number_of_entries, - alignment.toUsize(), - key_width, - value_width, - ); - - { - var i: usize = 0; - while (i < n) { - temp.setSlot(n, i, key_width, value_width, Slot.Empty); - i += 1; - } - } - - break :blk temp; + break :blk input; } else { const in_place = InPlace.Clone; var temp = input.makeUnique(std.heap.c_allocator, in_place, alignment, key_width, value_width, inc_key, inc_value); break :blk temp; - // break :blk input; } }; - // hash the key, and modulo by the maximum size - // (so we get an in-bounds index) - const hash = hash_fn(seed, key); - var index = hash % n; - var current_level: usize = 1; var current_level_size: usize = 8; - var next_level_size: usize = 16; + var next_level_size: usize = 2 * current_level_size; while (true) { - switch (result.getSlot(n, index, key_width, value_width)) { + if (current_level > result.number_of_levels) { + result = result.reallocate(std.heap.c_allocator, alignment.toUsize(), key_width, value_width); + } + + const hash = hash_fn(seed, key); + const index = hash % current_level_size; + + switch (result.getSlot(index, key_width, value_width)) { Slot.Empty, Slot.PreviouslyFilled => { - result.setSlot(n, index, key_width, value_width, Slot.Filled); - result.setKey(n, index, alignment, key_width, value_width, key); - result.setValue(n, index, alignment, key_width, value_width, value); + result.setSlot(index, key_width, value_width, Slot.Filled); + result.setKey(index, alignment, key_width, value_width, key); + result.setValue(index, alignment, key_width, value_width, value); result.dict_entries_len += 1; break; }, Slot.Filled => { // is this the same key, or a new key? - const current_key = result.getKey(n, index, alignment, key_width, value_width); + const current_key = result.getKey(index, alignment, key_width, value_width); if (is_eq(key, current_key)) { // we will override the old value, but first have to decrement its refcount - const current_value = result.getValue(n, index, alignment, key_width, value_width); + const current_value = result.getValue(index, alignment, key_width, value_width); dec_value(current_value); // we must consume the key argument! dec_key(key); - result.setValue(n, index, alignment, key_width, value_width, value); + result.setValue(index, alignment, key_width, value_width, value); break; } else { - const next_layer_exists = false; + seed = nextSeed(seed); - if (next_layer_exists) { - // rehash key with next seed - const next_level_seed = nextSeed(seed); - const next_level_index = hash_fn(next_level_seed, key) % 16; + current_level += 1; + current_level_size *= 2; + next_level_size *= 2; - index = (current_level_size + next_level_index); - current_level += 1; - - current_level_size *= 2; - next_level_size *= 2; - - continue; - } else { - // 8, 16, 32 .. - result = result.reallocate(std.heap.c_allocator, current_level, alignment.toUsize(), key_width, value_width); - - const next_level_seed = nextSeed(seed); - const next_level_index = hash_fn(next_level_seed, key) % 16; - - const new_index = (current_level_size + next_level_index); - - const capacity = 8 + 16; - result.setSlot(capacity, new_index, key_width, value_width, Slot.Filled); - result.setKey(capacity, new_index, alignment, key_width, value_width, key); - result.setValue(capacity, new_index, alignment, key_width, value_width, value); - - result.dict_entries_len += 1; - break; - } + continue; } }, } @@ -533,11 +523,9 @@ pub fn dictInsert(input: RocDict, alignment: Alignment, key: Opaque, key_width: // Dict.remove : Dict k v, k -> Dict k v pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_key: Inc, dec_key: Dec, inc_value: Inc, dec_value: Dec, output: *RocDict) callconv(.C) void { - const capacity: usize = input.dict_slot_len; - const n = capacity; const seed: u64 = INITIAL_SEED; - switch (input.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { + switch (input.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { MaybeIndex.not_found => { // the key was not found; we're done output.* = input; @@ -547,9 +535,9 @@ pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: // TODO make sure input is unique (or duplicate otherwise) var dict = input; - dict.setSlot(n, index, key_width, value_width, Slot.PreviouslyFilled); - const old_key = dict.getKey(n, index, alignment, key_width, value_width); - const old_value = dict.getValue(n, index, alignment, key_width, value_width); + dict.setSlot(index, key_width, value_width, Slot.PreviouslyFilled); + const old_key = dict.getKey(index, alignment, key_width, value_width); + const old_value = dict.getValue(index, alignment, key_width, value_width); dec_key(old_key); dec_value(old_value); @@ -563,10 +551,9 @@ pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: // Dict.contains : Dict k v, k -> Bool pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) callconv(.C) bool { - const capacity: usize = dict.dict_slot_len; const seed: u64 = INITIAL_SEED; - switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { + switch (dict.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { MaybeIndex.not_found => { return false; }, @@ -578,16 +565,14 @@ pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width: // Dict.get : Dict k v, k -> { flag: bool, value: Opaque } pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_value: Inc) callconv(.C) extern struct { value: Opaque, flag: bool } { - const capacity: usize = dict.dict_slot_len; - const n: usize = capacity; const seed: u64 = INITIAL_SEED; - switch (dict.findIndex(capacity, seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { + switch (dict.findIndex(seed, alignment, key, key_width, value_width, hash_fn, is_eq)) { MaybeIndex.not_found => { return .{ .flag = false, .value = null }; }, MaybeIndex.index => |index| { - var value = dict.getValue(n, index, alignment, key_width, value_width); + var value = dict.getValue(index, alignment, key_width, value_width); inc_value(value); return .{ .flag = true, .value = value }; }, @@ -598,17 +583,17 @@ pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usiz // increment or decrement all dict elements (but not the dict's allocation itself) pub fn elementsRc(dict: RocDict, alignment: Alignment, key_width: usize, value_width: usize, modify_key: Inc, modify_value: Inc) callconv(.C) void { const size = dict.dict_entries_len; - const n = dict.dict_slot_len; + const n = dict.number_of_levels; var i: usize = 0; i = 0; while (i < size) : (i += 1) { - modify_key(dict.getKey(n, i, alignment, key_width, value_width)); + modify_key(dict.getKey(i, alignment, key_width, value_width)); } i = 0; while (i < size) : (i += 1) { - modify_value(dict.getValue(n, i, alignment, key_width, value_width)); + modify_value(dict.getValue(i, alignment, key_width, value_width)); } }