remove dict/hash stuff from the zig builtins

This commit is contained in:
Folkert 2022-07-13 11:59:46 +02:00
parent 4d55b756bb
commit 6c26d8812f
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
8 changed files with 3 additions and 1737 deletions

View file

@ -1,815 +0,0 @@
const std = @import("std");
const testing = std.testing;
const expectEqual = testing.expectEqual;
const mem = std.mem;
const assert = std.debug.assert;
const utils = @import("utils.zig");
const RocList = @import("list.zig").RocList;
const INITIAL_SEED = 0xc70f6907;
const InPlace = enum(u8) {
InPlace,
Clone,
};
const Slot = enum(u8) {
Empty,
Filled,
PreviouslyFilled,
};
const MaybeIndexTag = enum { index, not_found };
const MaybeIndex = union(MaybeIndexTag) { index: usize, not_found: void };
fn nextSeed(seed: u64) u64 {
// TODO is this a valid way to get a new seed? are there better ways?
return seed + 1;
}
fn totalCapacityAtLevel(input: usize) usize {
if (input == 0) {
return 0;
}
var n = input;
var slots: usize = 8;
while (n > 1) : (n -= 1) {
slots = slots * 2 + slots;
}
return slots;
}
fn capacityOfLevel(input: usize) usize {
if (input == 0) {
return 0;
}
var n = input;
var slots: usize = 8;
while (n > 1) : (n -= 1) {
slots = slots * 2;
}
return slots;
}
// aligmnent of elements. The number (16 or 8) indicates the maximum
// alignment of the key and value. The tag furthermore indicates
// which has the biggest aligmnent. If both are the same, we put
// the key first
const Alignment = extern struct {
bits: u8,
const VALUE_BEFORE_KEY_FLAG: u8 = 0b1000_0000;
fn toU32(self: Alignment) u32 {
if (self.bits >= VALUE_BEFORE_KEY_FLAG) {
return self.bits ^ Alignment.VALUE_BEFORE_KEY_FLAG;
} else {
return self.bits;
}
}
fn keyFirst(self: Alignment) bool {
if (self.bits & Alignment.VALUE_BEFORE_KEY_FLAG > 0) {
return false;
} else {
return true;
}
}
};
pub fn decref(
bytes_or_null: ?[*]u8,
data_bytes: usize,
alignment: Alignment,
) void {
return utils.decref(bytes_or_null, data_bytes, alignment.toU32());
}
pub fn allocateWithRefcount(
data_bytes: usize,
alignment: Alignment,
) [*]u8 {
return utils.allocateWithRefcount(data_bytes, alignment.toU32());
}
pub const RocDict = extern struct {
dict_bytes: ?[*]u8,
dict_entries_len: usize,
number_of_levels: usize,
pub fn empty() RocDict {
return RocDict{
.dict_entries_len = 0,
.number_of_levels = 0,
.dict_bytes = null,
};
}
pub fn allocate(
number_of_levels: usize,
number_of_entries: usize,
alignment: Alignment,
key_size: usize,
value_size: usize,
) RocDict {
const number_of_slots = totalCapacityAtLevel(number_of_levels);
const slot_size = slotSize(key_size, value_size);
const data_bytes = number_of_slots * slot_size;
return RocDict{
.dict_bytes = allocateWithRefcount(data_bytes, alignment),
.number_of_levels = number_of_levels,
.dict_entries_len = number_of_entries,
};
}
pub fn reallocate(
self: RocDict,
alignment: Alignment,
key_width: usize,
value_width: usize,
) RocDict {
const new_level = self.number_of_levels + 1;
const slot_size = slotSize(key_width, value_width);
const old_capacity = self.capacity();
const new_capacity = totalCapacityAtLevel(new_level);
const delta_capacity = new_capacity - old_capacity;
const data_bytes = new_capacity * slot_size;
const first_slot = allocateWithRefcount(data_bytes, alignment);
// transfer the memory
if (self.dict_bytes) |source_ptr| {
const dest_ptr = first_slot;
var source_offset: usize = 0;
var dest_offset: usize = 0;
if (alignment.keyFirst()) {
// copy keys
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * key_width);
// copy values
source_offset = old_capacity * key_width;
dest_offset = new_capacity * key_width;
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * value_width);
} else {
// copy values
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * value_width);
// copy keys
source_offset = old_capacity * value_width;
dest_offset = new_capacity * value_width;
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * key_width);
}
// copy slots
source_offset = old_capacity * (key_width + value_width);
dest_offset = new_capacity * (key_width + value_width);
@memcpy(dest_ptr + dest_offset, source_ptr + source_offset, old_capacity * @sizeOf(Slot));
}
var i: usize = 0;
const first_new_slot_value = first_slot + old_capacity * slot_size + delta_capacity * (key_width + value_width);
while (i < (new_capacity - old_capacity)) : (i += 1) {
(first_new_slot_value)[i] = @enumToInt(Slot.Empty);
}
const result = RocDict{
.dict_bytes = first_slot,
.number_of_levels = self.number_of_levels + 1,
.dict_entries_len = self.dict_entries_len,
};
// NOTE we fuse an increment of all keys/values with a decrement of the input dict
decref(self.dict_bytes, self.capacity() * slotSize(key_width, value_width), alignment);
return result;
}
pub fn asU8ptr(self: RocDict) [*]u8 {
return @ptrCast([*]u8, self.dict_bytes);
}
pub fn len(self: RocDict) usize {
return self.dict_entries_len;
}
pub fn isEmpty(self: RocDict) bool {
return self.len() == 0;
}
pub fn isUnique(self: RocDict) bool {
// the empty dict is unique (in the sense that copying it will not leak memory)
if (self.isEmpty()) {
return true;
}
// otherwise, check if the refcount is one
const ptr: [*]usize = @ptrCast([*]usize, @alignCast(@alignOf(usize), self.dict_bytes));
return (ptr - 1)[0] == utils.REFCOUNT_ONE;
}
pub fn capacity(self: RocDict) usize {
return totalCapacityAtLevel(self.number_of_levels);
}
pub fn makeUnique(self: RocDict, alignment: Alignment, key_width: usize, value_width: usize) RocDict {
if (self.isEmpty()) {
return self;
}
if (self.isUnique()) {
return self;
}
// unfortunately, we have to clone
var new_dict = RocDict.allocate(self.number_of_levels, self.dict_entries_len, alignment, key_width, value_width);
var old_bytes: [*]u8 = @ptrCast([*]u8, self.dict_bytes);
var new_bytes: [*]u8 = @ptrCast([*]u8, new_dict.dict_bytes);
const number_of_bytes = self.capacity() * (@sizeOf(Slot) + key_width + value_width);
@memcpy(new_bytes, old_bytes, number_of_bytes);
// NOTE we fuse an increment of all keys/values with a decrement of the input dict
const data_bytes = self.capacity() * slotSize(key_width, value_width);
decref(self.dict_bytes, data_bytes, alignment);
return new_dict;
}
fn getSlot(self: *const RocDict, index: usize, key_width: usize, value_width: usize) Slot {
const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot);
const ptr = self.dict_bytes orelse unreachable;
return @intToEnum(Slot, ptr[offset]);
}
fn setSlot(self: *RocDict, index: usize, key_width: usize, value_width: usize, slot: Slot) void {
const offset = self.capacity() * (key_width + value_width) + index * @sizeOf(Slot);
const ptr = self.dict_bytes orelse unreachable;
ptr[offset] = @enumToInt(slot);
}
fn setKey(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
if (key_width == 0) {
return;
}
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (index * key_width);
} else {
break :blk (self.capacity() * value_width) + (index * key_width);
}
};
const ptr = self.dict_bytes orelse unreachable;
const source = data orelse unreachable;
const dest = ptr + offset;
@memcpy(dest, source, key_width);
}
fn getKey(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
if (key_width == 0) {
return null;
}
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (index * key_width);
} else {
break :blk (self.capacity() * value_width) + (index * key_width);
}
};
const ptr = self.dict_bytes orelse unreachable;
return ptr + offset;
}
fn setValue(self: *RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize, data: Opaque) void {
if (value_width == 0) {
return;
}
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (self.capacity() * key_width) + (index * value_width);
} else {
break :blk (index * value_width);
}
};
const ptr = self.dict_bytes orelse unreachable;
const source = data orelse unreachable;
const dest = ptr + offset;
@memcpy(dest, source, value_width);
}
fn getValue(self: *const RocDict, index: usize, alignment: Alignment, key_width: usize, value_width: usize) Opaque {
if (value_width == 0) {
return null;
}
const offset = blk: {
if (alignment.keyFirst()) {
break :blk (self.capacity() * key_width) + (index * value_width);
} else {
break :blk (index * value_width);
}
};
const ptr = self.dict_bytes orelse unreachable;
return ptr + offset;
}
fn findIndex(self: *const RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) MaybeIndex {
if (self.isEmpty()) {
return MaybeIndex.not_found;
}
var seed: u64 = INITIAL_SEED;
var current_level: usize = 1;
var current_level_size: usize = 8;
var next_level_size: usize = 2 * current_level_size;
while (true) {
if (current_level > self.number_of_levels) {
return MaybeIndex.not_found;
}
// hash the key, and modulo by the maximum size
// (so we get an in-bounds index)
const hash = hash_fn(seed, key);
const index = capacityOfLevel(current_level - 1) + @intCast(usize, (hash % current_level_size));
switch (self.getSlot(index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
return MaybeIndex.not_found;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = self.getKey(index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
return MaybeIndex{ .index = index };
} else {
current_level += 1;
current_level_size *= 2;
next_level_size *= 2;
seed = nextSeed(seed);
continue;
}
},
}
}
}
};
// Dict.empty
pub fn dictEmpty(dict: *RocDict) callconv(.C) void {
dict.* = RocDict.empty();
}
pub fn slotSize(key_size: usize, value_size: usize) usize {
return @sizeOf(Slot) + key_size + value_size;
}
// Dict.len
pub fn dictLen(dict: RocDict) callconv(.C) usize {
return dict.dict_entries_len;
}
// commonly used type aliases
const Opaque = ?[*]u8;
const HashFn = fn (u64, ?[*]u8) callconv(.C) u64;
const EqFn = fn (?[*]u8, ?[*]u8) callconv(.C) bool;
const Inc = fn (?[*]u8) callconv(.C) void;
const IncN = fn (?[*]u8, usize) callconv(.C) void;
const Dec = fn (?[*]u8) callconv(.C) void;
const Caller3 = fn (?[*]u8, ?[*]u8, ?[*]u8, ?[*]u8, ?[*]u8) callconv(.C) void;
// Dict.insert : Dict k v, k, v -> Dict k v
pub fn dictInsert(
input: RocDict,
alignment: Alignment,
key: Opaque,
key_width: usize,
value: Opaque,
value_width: usize,
hash_fn: HashFn,
is_eq: EqFn,
dec_key: Dec,
dec_value: Dec,
output: *RocDict,
) callconv(.C) void {
var seed: u64 = INITIAL_SEED;
var result = input.makeUnique(alignment, key_width, value_width);
var current_level: usize = 1;
var current_level_size: usize = 8;
var next_level_size: usize = 2 * current_level_size;
while (true) {
if (current_level > result.number_of_levels) {
result = result.reallocate(alignment, key_width, value_width);
}
const hash = hash_fn(seed, key);
const index = capacityOfLevel(current_level - 1) + @intCast(usize, (hash % current_level_size));
assert(index < result.capacity());
switch (result.getSlot(index, key_width, value_width)) {
Slot.Empty, Slot.PreviouslyFilled => {
result.setSlot(index, key_width, value_width, Slot.Filled);
result.setKey(index, alignment, key_width, value_width, key);
result.setValue(index, alignment, key_width, value_width, value);
result.dict_entries_len += 1;
break;
},
Slot.Filled => {
// is this the same key, or a new key?
const current_key = result.getKey(index, alignment, key_width, value_width);
if (is_eq(key, current_key)) {
// we will override the old value, but first have to decrement its refcount
const current_value = result.getValue(index, alignment, key_width, value_width);
dec_value(current_value);
// we must consume the key argument!
dec_key(key);
result.setValue(index, alignment, key_width, value_width, value);
break;
} else {
seed = nextSeed(seed);
current_level += 1;
current_level_size *= 2;
next_level_size *= 2;
continue;
}
},
}
}
// write result into pointer
output.* = result;
}
// Dict.remove : Dict k v, k -> Dict k v
pub fn dictRemove(input: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, dec_key: Dec, dec_value: Dec, output: *RocDict) callconv(.C) void {
switch (input.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
// the key was not found; we're done
output.* = input;
return;
},
MaybeIndex.index => |index| {
var dict = input.makeUnique(alignment, key_width, value_width);
assert(index < dict.capacity());
dict.setSlot(index, key_width, value_width, Slot.PreviouslyFilled);
const old_key = dict.getKey(index, alignment, key_width, value_width);
const old_value = dict.getValue(index, alignment, key_width, value_width);
dec_key(old_key);
dec_value(old_value);
dict.dict_entries_len -= 1;
// if the dict is now completely empty, free its allocation
if (dict.dict_entries_len == 0) {
const data_bytes = dict.capacity() * slotSize(key_width, value_width);
decref(dict.dict_bytes, data_bytes, alignment);
output.* = RocDict.empty();
return;
}
output.* = dict;
},
}
}
// Dict.contains : Dict k v, k -> Bool
pub fn dictContains(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn) callconv(.C) bool {
switch (dict.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
return false;
},
MaybeIndex.index => |_| {
return true;
},
}
}
// Dict.get : Dict k v, k -> { flag: bool, value: Opaque }
pub fn dictGet(dict: RocDict, alignment: Alignment, key: Opaque, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, inc_value: Inc) callconv(.C) extern struct { value: Opaque, flag: bool } {
switch (dict.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
return .{ .flag = false, .value = null };
},
MaybeIndex.index => |index| {
var value = dict.getValue(index, alignment, key_width, value_width);
inc_value(value);
return .{ .flag = true, .value = value };
},
}
}
// Dict.elementsRc
// increment or decrement all dict elements (but not the dict's allocation itself)
pub fn elementsRc(dict: RocDict, alignment: Alignment, key_width: usize, value_width: usize, modify_key: Inc, modify_value: Inc) callconv(.C) void {
const size = dict.capacity();
var i: usize = 0;
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
modify_key(dict.getKey(i, alignment, key_width, value_width));
modify_value(dict.getValue(i, alignment, key_width, value_width));
},
else => {},
}
}
}
pub fn dictKeys(
dict: RocDict,
alignment: Alignment,
key_width: usize,
value_width: usize,
inc_key: Inc,
) callconv(.C) RocList {
const size = dict.capacity();
var length: usize = 0;
var i: usize = 0;
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
length += 1;
},
else => {},
}
}
if (length == 0) {
return RocList.empty();
}
const data_bytes = length * key_width;
var ptr = allocateWithRefcount(data_bytes, alignment);
i = 0;
var copied: usize = 0;
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const key = dict.getKey(i, alignment, key_width, value_width);
inc_key(key);
const key_cast = @ptrCast([*]const u8, key);
@memcpy(ptr + (copied * key_width), key_cast, key_width);
copied += 1;
},
else => {},
}
}
return RocList{ .bytes = ptr, .length = length, .capacity = length };
}
pub fn dictValues(
dict: RocDict,
alignment: Alignment,
key_width: usize,
value_width: usize,
inc_value: Inc,
) callconv(.C) RocList {
const size = dict.capacity();
var length: usize = 0;
var i: usize = 0;
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
length += 1;
},
else => {},
}
}
if (length == 0) {
return RocList.empty();
}
const data_bytes = length * value_width;
var ptr = allocateWithRefcount(data_bytes, alignment);
i = 0;
var copied: usize = 0;
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const value = dict.getValue(i, alignment, key_width, value_width);
inc_value(value);
const value_cast = @ptrCast([*]const u8, value);
@memcpy(ptr + (copied * value_width), value_cast, value_width);
copied += 1;
},
else => {},
}
}
return RocList{ .bytes = ptr, .length = length, .capacity = length };
}
fn doNothing(_: Opaque) callconv(.C) void {
return;
}
pub fn dictUnion(
dict1: RocDict,
dict2: RocDict,
alignment: Alignment,
key_width: usize,
value_width: usize,
hash_fn: HashFn,
is_eq: EqFn,
inc_key: Inc,
inc_value: Inc,
output: *RocDict,
) callconv(.C) void {
output.* = dict1.makeUnique(alignment, key_width, value_width);
var i: usize = 0;
while (i < dict2.capacity()) : (i += 1) {
switch (dict2.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const key = dict2.getKey(i, alignment, key_width, value_width);
switch (output.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
const value = dict2.getValue(i, alignment, key_width, value_width);
inc_value(value);
// we need an extra RC token for the key
inc_key(key);
inc_value(value);
// we know the newly added key is not a duplicate, so the `dec`s are unreachable
const dec_key = doNothing;
const dec_value = doNothing;
dictInsert(output.*, alignment, key, key_width, value, value_width, hash_fn, is_eq, dec_key, dec_value, output);
},
MaybeIndex.index => |_| {
// the key is already in the output dict
continue;
},
}
},
else => {},
}
}
}
pub fn dictIntersection(dict1: RocDict, dict2: RocDict, alignment: Alignment, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, dec_key: Inc, dec_value: Inc, output: *RocDict) callconv(.C) void {
output.* = dict1.makeUnique(alignment, key_width, value_width);
var i: usize = 0;
const size = dict1.capacity();
while (i < size) : (i += 1) {
switch (output.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const key = dict1.getKey(i, alignment, key_width, value_width);
switch (dict2.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
dictRemove(output.*, alignment, key, key_width, value_width, hash_fn, is_eq, dec_key, dec_value, output);
},
MaybeIndex.index => |_| {
// keep this key/value
continue;
},
}
},
else => {},
}
}
}
pub fn dictDifference(dict1: RocDict, dict2: RocDict, alignment: Alignment, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, dec_key: Dec, dec_value: Dec, output: *RocDict) callconv(.C) void {
output.* = dict1.makeUnique(alignment, key_width, value_width);
var i: usize = 0;
const size = dict1.capacity();
while (i < size) : (i += 1) {
switch (output.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const key = dict1.getKey(i, alignment, key_width, value_width);
switch (dict2.findIndex(alignment, key, key_width, value_width, hash_fn, is_eq)) {
MaybeIndex.not_found => {
// keep this key/value
continue;
},
MaybeIndex.index => |_| {
dictRemove(output.*, alignment, key, key_width, value_width, hash_fn, is_eq, dec_key, dec_value, output);
},
}
},
else => {},
}
}
}
pub fn setFromList(list: RocList, alignment: Alignment, key_width: usize, value_width: usize, hash_fn: HashFn, is_eq: EqFn, dec_key: Dec, output: *RocDict) callconv(.C) void {
output.* = RocDict.empty();
var ptr = @ptrCast([*]u8, list.bytes);
const dec_value = doNothing;
const value = null;
const size = list.length;
var i: usize = 0;
while (i < size) : (i += 1) {
const key = ptr + i * key_width;
dictInsert(output.*, alignment, key, key_width, value, value_width, hash_fn, is_eq, dec_key, dec_value, output);
}
// NOTE: decref checks for the empty case
const data_bytes = size * key_width;
decref(list.bytes, data_bytes, alignment);
}
pub fn dictWalk(
dict: RocDict,
caller: Caller3,
data: Opaque,
inc_n_data: IncN,
data_is_owned: bool,
accum: Opaque,
alignment: Alignment,
key_width: usize,
value_width: usize,
accum_width: usize,
output: Opaque,
) callconv(.C) void {
const alignment_u32 = alignment.toU32();
// allocate space to write the result of the stepper into
// experimentally aliasing the accum and output pointers is not a good idea
// TODO handle alloc failing!
const bytes_ptr: [*]u8 = utils.alloc(accum_width, alignment_u32) orelse unreachable;
var b1 = output orelse unreachable;
var b2 = bytes_ptr;
if (data_is_owned) {
inc_n_data(data, dict.len());
}
@memcpy(b2, accum orelse unreachable, accum_width);
var i: usize = 0;
const size = dict.capacity();
while (i < size) : (i += 1) {
switch (dict.getSlot(i, key_width, value_width)) {
Slot.Filled => {
const key = dict.getKey(i, alignment, key_width, value_width);
const value = dict.getValue(i, alignment, key_width, value_width);
caller(data, b2, key, value, b1);
std.mem.swap([*]u8, &b1, &b2);
},
else => {},
}
}
@memcpy(output orelse unreachable, b2, accum_width);
utils.dealloc(bytes_ptr, alignment_u32);
}

View file

@ -53,31 +53,6 @@ comptime {
exportListFn(list.listIsUnique, "is_unique");
}
// Dict Module
const dict = @import("dict.zig");
const hash = @import("hash.zig");
comptime {
exportDictFn(dict.dictLen, "len");
exportDictFn(dict.dictEmpty, "empty");
exportDictFn(dict.dictInsert, "insert");
exportDictFn(dict.dictRemove, "remove");
exportDictFn(dict.dictContains, "contains");
exportDictFn(dict.dictGet, "get");
exportDictFn(dict.elementsRc, "elementsRc");
exportDictFn(dict.dictKeys, "keys");
exportDictFn(dict.dictValues, "values");
exportDictFn(dict.dictUnion, "union");
exportDictFn(dict.dictIntersection, "intersection");
exportDictFn(dict.dictDifference, "difference");
exportDictFn(dict.dictWalk, "walk");
exportDictFn(dict.setFromList, "set_from_list");
exportDictFn(hash.wyhash, "hash");
exportDictFn(hash.wyhash_rocstr, "hash_str");
}
// Num Module
const num = @import("num.zig");