From c8c3944673b106eea491e44e5a1ccdfce9e88f23 Mon Sep 17 00:00:00 2001 From: Chadtech Date: Tue, 29 Dec 2020 03:27:01 -0500 Subject: [PATCH] Dict and Hash zig modules --- compiler/builtins/bitcode/src/dict.zig | 285 +++++++++++++++++++++++++ compiler/builtins/bitcode/src/hash.zig | 139 ++++++++++++ 2 files changed, 424 insertions(+) create mode 100644 compiler/builtins/bitcode/src/dict.zig create mode 100644 compiler/builtins/bitcode/src/hash.zig diff --git a/compiler/builtins/bitcode/src/dict.zig b/compiler/builtins/bitcode/src/dict.zig new file mode 100644 index 0000000000..26ce5dbd69 --- /dev/null +++ b/compiler/builtins/bitcode/src/dict.zig @@ -0,0 +1,285 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; +const testing = std.testing; +const expectEqual = testing.expectEqual; +const assert = std.debug.assert; + +const print = std.debug.print; + + +const hash = @import("hash.zig"); + +const size = 32; + +pub fn RocDict( + comptime Key: type, + comptime Value: type, +) type { + return struct { + const Self = @This(); + + pub const Entry = struct { + key: Key, + value: Value, + + pub fn eq(self: Entry, other: Entry) bool { + const same_keys = self.key == other.key; + const same_value = self.value == other.value; + return same_keys and same_value; + } + + pub fn set_value(self: *Entry, value: Value) void { + self.*.value = value; + } + }; + + + pub const Query = struct { + maybe_entry: ?Entry + }; + + len: u64, + entries: [size]?Entry, + + fn query(self: Self, key: Key, level: u64) Query { + const index = key_to_index_at_level(key, level); + + const maybe_entry = self.entries[index]; + + if (maybe_entry == null) { + return Query { .maybe_entry = null }; + } else { + var entry = maybe_entry.?; + + if (entry.key == key) { + return Query { .maybe_entry = entry }; + } else { + return self.query(key, level + 1); + } + } + } + + fn key_to_index_at_level(key: Key, level: u64) u64 { + const index = hash.hash(std.mem.asBytes(&key), level) % size; + + return index; + } + + pub fn init(allocator: *Allocator) Self { + const roc_dict_size = @sizeOf(Self); + + var init_entries: [size]?Entry = undefined; + + for (init_entries) |*entry, i| { + entry.* = null; + } + + return Self { + .len = 0, + .entries = init_entries, + }; + } + + + pub fn get(self: Self, key: Key) ?Value { + + const q = self.query(key, 0); + + if (q.maybe_entry == null) { + return null; + } else { + const entry = q.maybe_entry.?; + + return entry.value; + } + } + + pub fn insert(self: *Self, key: Key, value: Value) void { + const level = 0; + + const q = self.query(key, level); + + const index = key_to_index_at_level(key, level); + + if (q.maybe_entry == null) { + + var new_entry = Entry { + .value = value, + .key = key + }; + + self.entries[index] = new_entry; + + self.len += 1; + } else { + var entry = q.maybe_entry.?; + + var entry_ptr = &entry; + + entry_ptr.set_value(value); + + self.entries[index] = entry; + } + } + + pub fn get_len(self: Self) u64 { + return self.len; + } + + pub fn eq(self: Self, other: Self) bool { + if (self.get_len() != other.get_len()) { + return false; + } + + var levels_count : u64 = self.entries.len; + var are_same = true; + + var i : u64 = 0; + while ((i < size) and are_same) { + const maybe_entry = self.entries[i]; + const maybe_other_entry = other.entries[i]; + + if (maybe_entry == null) { + if (maybe_other_entry != null) { + are_same = false; + } else { + i += 1; + } + } else { + if (maybe_other_entry == null) { + are_same = false; + } else { + const entry = maybe_entry.?; + const other_entry = maybe_other_entry.?; + if (entry.eq(other_entry)) { + i += 1; + } else { + are_same = false; + } + } + } + } + + return are_same; + } + }; +} + +test "RocDict.insert with hash collisions" { + var dict = RocDict(u64,u64).init(testing.allocator); + + var i : u64 = 0; + + while (i < (size * 2)) { + dict.insert(i, i); + + i += 1; + } + + i = 0; + while (i < (size * 2)) { + const entry = dict.get(i); + + expectEqual(i, entry.?); + + i += 1; + } +} + +test "repeated RocDict.insert" { + var dict = RocDict(u64,u64).init(testing.allocator); + + const index = 0; + dict.insert(index, 17); + dict.insert(index, 49); + + var value : ?u64 = dict.get(index); + + if (value == null) { + unreachable; + } else { + const result : ?u64 = 49; + expectEqual(result, value); + } +} + +test "RocDict.eq" { + var fst = RocDict(u64,u64).init(testing.allocator); + var snd = RocDict(u64,u64).init(testing.allocator); + + const key = 0; + const value = 30; + + fst.insert(key, value); + snd.insert(key, value); + assert(fst.eq(snd)); + + var empty = RocDict(u64,u64).init(testing.allocator); + assert(!fst.eq(empty)); + + var trd = RocDict(u64,u64).init(testing.allocator); + trd.insert(key, value + 1); + + assert(!fst.eq(trd)); +} + + + +test "RocDict.get_len" { + var dict = RocDict(u64,u64).init(testing.allocator); + + const index = 0; + + dict.insert(index, 16); + + const expect_len : u64 = 1; + expectEqual(expect_len, dict.get_len()); + + dict.insert(index, 16); + + expectEqual(expect_len, dict.get_len()); + + dict.insert(index + 1, 3); + + expectEqual(expect_len + 1, dict.get_len()); +} + + +test "RocDict.insert" { + var dict = RocDict(u64,u64).init(testing.allocator); + + const index = 0; + const value : u64 = 30; + + dict.insert(index, value); + + var result_value : ?u64 = dict.get(index); + expectEqual(value, result_value.?); + + var expect_len : u64 = 1; + expectEqual(expect_len, dict.get_len()); +} + + +test "RocDict.get" { + const empty = RocDict(u64, u64).init(testing.allocator); + + const expect : ?u64 = null; + expectEqual(expect, empty.get(29)); +} + + +test "RocDict.init" { + const empty = RocDict(u64, u64).init(testing.allocator); + expectEqual(empty.get_len(), 0); + + const MadeUpType = struct { + oneField: u64 + }; + + const empty_made_up = RocDict(u64, MadeUpType).init(testing.allocator); + + const expect : u64 = 0; + expectEqual(expect, empty_made_up.get_len()); +} \ No newline at end of file diff --git a/compiler/builtins/bitcode/src/hash.zig b/compiler/builtins/bitcode/src/hash.zig new file mode 100644 index 0000000000..5ee8be977f --- /dev/null +++ b/compiler/builtins/bitcode/src/hash.zig @@ -0,0 +1,139 @@ +const std = @import("std"); +const mem = std.mem; +const expectEqual = std.testing.expectEqual; + +const primes = [6]u64{ + 0xa0761d6478bd642f, + 0xe7037ed1a0b428db, + 0x8ebc6af09c88c6e3, + 0x589965cc75374cc3, + 0x1d8e4e27c47d124f, + 0xeb44accab455d165, +}; + +fn read_bytes(comptime bytes: u8, data: []const u8) u64 { + return mem.readVarInt(u64, data[0..bytes], @import("builtin").endian); +} + +fn read_8bytes_swapped(data: []const u8) u64 { + return (read_bytes(4, data) << 32 | read_bytes(4, data[4 .. ])); +} + +fn mum(a: u64, b :u64) u64 { + var r : u128 = @intCast(u128, a) * @intCast(u128, b); + r = (r >> 64) ^ r; + + return @truncate(u64, r); +} + +pub fn hash(key: []const u8, init_seed: u64) u64 { + const len = key.len; + + + var seed = init_seed; + + var i: usize = 0; + + while (i + 32 <= len) { + seed = mum( + seed ^ primes[0], + mum( + read_bytes(8, key[i ..]) ^ primes[1], + read_bytes(8, key[i + 8 ..]) ^ primes[2] + ) ^ + mum( + read_bytes(8, key[i + 16 ..]) ^ primes[3], + read_bytes(8, key[i + 24 ..]) ^ primes[4] + ) + ); + + i += 32; + } + + seed ^= primes[0]; + + const rem_len = @truncate(u5, len); + + if (rem_len != 0) { + + const rem_bits = @truncate(u3, rem_len % 8); + const rem_bytes = @truncate(u2, (len - 1) / 8); + const rem_key = key[ i + @intCast(usize, rem_bytes) * 8 ..]; + + const rest = switch (rem_bits) { + 0 => read_8bytes_swapped(rem_key), + 1 => read_bytes(1, rem_key), + 2 => read_bytes(2, rem_key), + 3 => read_bytes(2, rem_key) << 8 | read_bytes(1, rem_key[2..]), + 4 => read_bytes(4, rem_key), + 5 => read_bytes(4, rem_key) << 8 | read_bytes(1, rem_key[4..]), + 6 => read_bytes(4, rem_key) << 16 | read_bytes(2, rem_key[4..]), + 7 => read_bytes(4, rem_key) << 24 | read_bytes(2, rem_key[4..]) << 8 | read_bytes(1, rem_key[6..]), + } ^ + primes[@intCast(usize, rem_bytes) + 1]; + + seed = switch (rem_bytes) { + 0 => mum(seed, rest), + 1 => mum(read_8bytes_swapped(key[i .. ]) ^ seed, rest), + 2 => mum ( + read_8bytes_swapped(key[ i .. ]) ^ seed, + read_8bytes_swapped(key[ i + 8 .. ]) & primes[2] + ) ^ + mum(seed, rest), + 3 => mum( + read_8bytes_swapped(key[ i .. ]) ^ seed, + read_8bytes_swapped(key[ i + 8 ..]) ^ primes[2] + ) ^ + mum(read_8bytes_swapped(key[ i + 16 .. ]) ^ seed, rest), + }; + } + + return mum(seed, len ^ primes[5]); +} + +test "test hash" { + const fst_key : []const u8 = &[1]u8 { 0 }; + const fst_result : u64 = hash(fst_key, 0); + const fst_expectation : u64 = 10120618241204775652; + + expectEqual(fst_expectation, fst_result); + + const snd_key : []const u8 = &[1]u8 { 1 }; + const snd_result : u64 = hash(snd_key, 0); + const snd_expectation : u64 = 16604119901607610318; + + expectEqual(snd_expectation, snd_result); + + const thd_key : []const u8 = ""; + const thd_result : u64 = hash(thd_key, 0); + const thd_expectation : u64 = 17969918002310452037; + + expectEqual(thd_expectation, thd_result); + + const frth_key : []const u8 = &[42]u8 { + 1,1,1,1,1,1, + 2,2,2,2,2,2, + 3,3,3,3,3,3, + 4,4,4,4,4,4, + 5,5,5,5,5,5, + 6,6,6,6,6,6, + 7,7,7,7,7,7 + }; + const frth_result : u64 = hash(frth_key, 0); + const frth_expectation : u64 = 10505276342277112336; + + expectEqual(frth_expectation, frth_result); +} + +//test "Render Indices" { +// const print = std.debug.print; +// +// const limit = 32; +// +// var i : u8 = 0; +// +// while (i < (2 * limit)) { +// print("Index {} hashes to {}\n", .{ i, hash(std.mem.asBytes(&i), 0) % limit}); +// i += 1; +// } +//} \ No newline at end of file