Dict and Hash zig modules

This commit is contained in:
Chadtech 2020-12-29 03:27:01 -05:00
parent b1bf03e7a0
commit c8c3944673
2 changed files with 424 additions and 0 deletions

View file

@ -0,0 +1,285 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const testing = std.testing;
const expectEqual = testing.expectEqual;
const assert = std.debug.assert;
const print = std.debug.print;
const hash = @import("hash.zig");
const size = 32;
pub fn RocDict(
comptime Key: type,
comptime Value: type,
) type {
return struct {
const Self = @This();
pub const Entry = struct {
key: Key,
value: Value,
pub fn eq(self: Entry, other: Entry) bool {
const same_keys = self.key == other.key;
const same_value = self.value == other.value;
return same_keys and same_value;
}
pub fn set_value(self: *Entry, value: Value) void {
self.*.value = value;
}
};
pub const Query = struct {
maybe_entry: ?Entry
};
len: u64,
entries: [size]?Entry,
fn query(self: Self, key: Key, level: u64) Query {
const index = key_to_index_at_level(key, level);
const maybe_entry = self.entries[index];
if (maybe_entry == null) {
return Query { .maybe_entry = null };
} else {
var entry = maybe_entry.?;
if (entry.key == key) {
return Query { .maybe_entry = entry };
} else {
return self.query(key, level + 1);
}
}
}
fn key_to_index_at_level(key: Key, level: u64) u64 {
const index = hash.hash(std.mem.asBytes(&key), level) % size;
return index;
}
pub fn init(allocator: *Allocator) Self {
const roc_dict_size = @sizeOf(Self);
var init_entries: [size]?Entry = undefined;
for (init_entries) |*entry, i| {
entry.* = null;
}
return Self {
.len = 0,
.entries = init_entries,
};
}
pub fn get(self: Self, key: Key) ?Value {
const q = self.query(key, 0);
if (q.maybe_entry == null) {
return null;
} else {
const entry = q.maybe_entry.?;
return entry.value;
}
}
pub fn insert(self: *Self, key: Key, value: Value) void {
const level = 0;
const q = self.query(key, level);
const index = key_to_index_at_level(key, level);
if (q.maybe_entry == null) {
var new_entry = Entry {
.value = value,
.key = key
};
self.entries[index] = new_entry;
self.len += 1;
} else {
var entry = q.maybe_entry.?;
var entry_ptr = &entry;
entry_ptr.set_value(value);
self.entries[index] = entry;
}
}
pub fn get_len(self: Self) u64 {
return self.len;
}
pub fn eq(self: Self, other: Self) bool {
if (self.get_len() != other.get_len()) {
return false;
}
var levels_count : u64 = self.entries.len;
var are_same = true;
var i : u64 = 0;
while ((i < size) and are_same) {
const maybe_entry = self.entries[i];
const maybe_other_entry = other.entries[i];
if (maybe_entry == null) {
if (maybe_other_entry != null) {
are_same = false;
} else {
i += 1;
}
} else {
if (maybe_other_entry == null) {
are_same = false;
} else {
const entry = maybe_entry.?;
const other_entry = maybe_other_entry.?;
if (entry.eq(other_entry)) {
i += 1;
} else {
are_same = false;
}
}
}
}
return are_same;
}
};
}
test "RocDict.insert with hash collisions" {
var dict = RocDict(u64,u64).init(testing.allocator);
var i : u64 = 0;
while (i < (size * 2)) {
dict.insert(i, i);
i += 1;
}
i = 0;
while (i < (size * 2)) {
const entry = dict.get(i);
expectEqual(i, entry.?);
i += 1;
}
}
test "repeated RocDict.insert" {
var dict = RocDict(u64,u64).init(testing.allocator);
const index = 0;
dict.insert(index, 17);
dict.insert(index, 49);
var value : ?u64 = dict.get(index);
if (value == null) {
unreachable;
} else {
const result : ?u64 = 49;
expectEqual(result, value);
}
}
test "RocDict.eq" {
var fst = RocDict(u64,u64).init(testing.allocator);
var snd = RocDict(u64,u64).init(testing.allocator);
const key = 0;
const value = 30;
fst.insert(key, value);
snd.insert(key, value);
assert(fst.eq(snd));
var empty = RocDict(u64,u64).init(testing.allocator);
assert(!fst.eq(empty));
var trd = RocDict(u64,u64).init(testing.allocator);
trd.insert(key, value + 1);
assert(!fst.eq(trd));
}
test "RocDict.get_len" {
var dict = RocDict(u64,u64).init(testing.allocator);
const index = 0;
dict.insert(index, 16);
const expect_len : u64 = 1;
expectEqual(expect_len, dict.get_len());
dict.insert(index, 16);
expectEqual(expect_len, dict.get_len());
dict.insert(index + 1, 3);
expectEqual(expect_len + 1, dict.get_len());
}
test "RocDict.insert" {
var dict = RocDict(u64,u64).init(testing.allocator);
const index = 0;
const value : u64 = 30;
dict.insert(index, value);
var result_value : ?u64 = dict.get(index);
expectEqual(value, result_value.?);
var expect_len : u64 = 1;
expectEqual(expect_len, dict.get_len());
}
test "RocDict.get" {
const empty = RocDict(u64, u64).init(testing.allocator);
const expect : ?u64 = null;
expectEqual(expect, empty.get(29));
}
test "RocDict.init" {
const empty = RocDict(u64, u64).init(testing.allocator);
expectEqual(empty.get_len(), 0);
const MadeUpType = struct {
oneField: u64
};
const empty_made_up = RocDict(u64, MadeUpType).init(testing.allocator);
const expect : u64 = 0;
expectEqual(expect, empty_made_up.get_len());
}

View file

@ -0,0 +1,139 @@
const std = @import("std");
const mem = std.mem;
const expectEqual = std.testing.expectEqual;
const primes = [6]u64{
0xa0761d6478bd642f,
0xe7037ed1a0b428db,
0x8ebc6af09c88c6e3,
0x589965cc75374cc3,
0x1d8e4e27c47d124f,
0xeb44accab455d165,
};
fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
return mem.readVarInt(u64, data[0..bytes], @import("builtin").endian);
}
fn read_8bytes_swapped(data: []const u8) u64 {
return (read_bytes(4, data) << 32 | read_bytes(4, data[4 .. ]));
}
fn mum(a: u64, b :u64) u64 {
var r : u128 = @intCast(u128, a) * @intCast(u128, b);
r = (r >> 64) ^ r;
return @truncate(u64, r);
}
pub fn hash(key: []const u8, init_seed: u64) u64 {
const len = key.len;
var seed = init_seed;
var i: usize = 0;
while (i + 32 <= len) {
seed = mum(
seed ^ primes[0],
mum(
read_bytes(8, key[i ..]) ^ primes[1],
read_bytes(8, key[i + 8 ..]) ^ primes[2]
) ^
mum(
read_bytes(8, key[i + 16 ..]) ^ primes[3],
read_bytes(8, key[i + 24 ..]) ^ primes[4]
)
);
i += 32;
}
seed ^= primes[0];
const rem_len = @truncate(u5, len);
if (rem_len != 0) {
const rem_bits = @truncate(u3, rem_len % 8);
const rem_bytes = @truncate(u2, (len - 1) / 8);
const rem_key = key[ i + @intCast(usize, rem_bytes) * 8 ..];
const rest = switch (rem_bits) {
0 => read_8bytes_swapped(rem_key),
1 => read_bytes(1, rem_key),
2 => read_bytes(2, rem_key),
3 => read_bytes(2, rem_key) << 8 | read_bytes(1, rem_key[2..]),
4 => read_bytes(4, rem_key),
5 => read_bytes(4, rem_key) << 8 | read_bytes(1, rem_key[4..]),
6 => read_bytes(4, rem_key) << 16 | read_bytes(2, rem_key[4..]),
7 => read_bytes(4, rem_key) << 24 | read_bytes(2, rem_key[4..]) << 8 | read_bytes(1, rem_key[6..]),
} ^
primes[@intCast(usize, rem_bytes) + 1];
seed = switch (rem_bytes) {
0 => mum(seed, rest),
1 => mum(read_8bytes_swapped(key[i .. ]) ^ seed, rest),
2 => mum (
read_8bytes_swapped(key[ i .. ]) ^ seed,
read_8bytes_swapped(key[ i + 8 .. ]) & primes[2]
) ^
mum(seed, rest),
3 => mum(
read_8bytes_swapped(key[ i .. ]) ^ seed,
read_8bytes_swapped(key[ i + 8 ..]) ^ primes[2]
) ^
mum(read_8bytes_swapped(key[ i + 16 .. ]) ^ seed, rest),
};
}
return mum(seed, len ^ primes[5]);
}
test "test hash" {
const fst_key : []const u8 = &[1]u8 { 0 };
const fst_result : u64 = hash(fst_key, 0);
const fst_expectation : u64 = 10120618241204775652;
expectEqual(fst_expectation, fst_result);
const snd_key : []const u8 = &[1]u8 { 1 };
const snd_result : u64 = hash(snd_key, 0);
const snd_expectation : u64 = 16604119901607610318;
expectEqual(snd_expectation, snd_result);
const thd_key : []const u8 = "";
const thd_result : u64 = hash(thd_key, 0);
const thd_expectation : u64 = 17969918002310452037;
expectEqual(thd_expectation, thd_result);
const frth_key : []const u8 = &[42]u8 {
1,1,1,1,1,1,
2,2,2,2,2,2,
3,3,3,3,3,3,
4,4,4,4,4,4,
5,5,5,5,5,5,
6,6,6,6,6,6,
7,7,7,7,7,7
};
const frth_result : u64 = hash(frth_key, 0);
const frth_expectation : u64 = 10505276342277112336;
expectEqual(frth_expectation, frth_result);
}
//test "Render Indices" {
// const print = std.debug.print;
//
// const limit = 32;
//
// var i : u8 = 0;
//
// while (i < (2 * limit)) {
// print("Index {} hashes to {}\n", .{ i, hash(std.mem.asBytes(&i), 0) % limit});
// i += 1;
// }
//}