Add an indexmap like dict to roc

This commit is contained in:
Brendan Hansknecht 2022-10-25 23:09:36 -07:00
parent 9becbedd22
commit cb08dca6f1
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
3 changed files with 466 additions and 152 deletions

4
Cargo.lock generated
View file

@ -1982,7 +1982,7 @@ checksum = "adab1eaa3408fb7f0c777a73e7465fd5656136fc93b670eb6df3c88c2c1344e3"
[[package]] [[package]]
name = "inkwell" name = "inkwell"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/roc-lang/inkwell?branch=master#accd406858a40ca2a1463ff77d79f3c5e4c96f4e" source = "git+https://github.com/roc-lang/inkwell?branch=master#9b63d543eaf996aa91fdeb20a2bc8b8558775648"
dependencies = [ dependencies = [
"either", "either",
"inkwell_internals", "inkwell_internals",
@ -1995,7 +1995,7 @@ dependencies = [
[[package]] [[package]]
name = "inkwell_internals" name = "inkwell_internals"
version = "0.5.0" version = "0.5.0"
source = "git+https://github.com/roc-lang/inkwell?branch=master#accd406858a40ca2a1463ff77d79f3c5e4c96f4e" source = "git+https://github.com/roc-lang/inkwell?branch=master#9b63d543eaf996aa91fdeb20a2bc8b8558775648"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View file

@ -4,13 +4,17 @@ interface Dict
empty, empty,
withCapacity, withCapacity,
single, single,
get, clear,
walk, capacity,
insert,
len, len,
get,
contains,
insert,
remove, remove,
update, update,
contains, walk,
toList,
fromList,
keys, keys,
values, values,
insertAll, insertAll,
@ -22,8 +26,8 @@ interface Dict
Result.{ Result }, Result.{ Result },
List, List,
Str, Str,
Num.{ Nat, U64, U8 }, Num.{ Nat, U64, U8, I8 },
Hash.{ Hasher }, Hash.{ Hasher, Hash },
] ]
## A [dictionary](https://en.wikipedia.org/wiki/Associative_array) that lets you ## A [dictionary](https://en.wikipedia.org/wiki/Associative_array) that lets you
@ -74,45 +78,89 @@ interface Dict
## does. It removes an element and moves the most recent insertion into the ## does. It removes an element and moves the most recent insertion into the
## vacated spot. ## vacated spot.
## ##
## This move is done as a performance optimization, and it lets [Dict.remove] ## This move is done as a performance optimization, and it lets [remove] have
## have [constant time complexity](https://en.wikipedia.org/wiki/Time_complexity#Constant_time). ## [constant time complexity](https://en.wikipedia.org/wiki/Time_complexity#Constant_time). ##
## ##
## ### Equality ## Dict is inspired by [IndexMap](https://docs.rs/indexmap/latest/indexmap/map/struct.IndexMap.html).
## ## The internal implementation of a dictionary is similar to [absl::flat_hash_map](https://abseil.io/docs/cpp/guides/container).
## Two dictionaries are equal when their contents and orderings match. This ## It has a list of keys value pairs that is ordered based on insertion.
## means that when `dict1 == dict2`, the expression `fn dict1 == fn dict2` will ## It uses a list of indices into the data as the backing of a hash map.
## also evaluate to `Bool.true`. The function `fn` can count on the ordering of Dict k v := {
## values in the dictionary to also match. # TODO: Add hashflooding ordered map fall back.
Dict k v := List [Pair k v] has [Eq] # TODO: Add Groups and SIMD h1 key comparison (initial tests where slower, but with proper SIMD should be fast).
# TODO: As an optimization, we can make all of these lists in one allocation
# TODO: Grow data with the rest of the hashmap. This will require creating a list of garbage data.
# TODO: Change remove to use tombstones. Store the tombstones in a bitmap.
# TODO: define Eq and Hash that are unordered. Only if value has hash/eq?
metadata : List I8,
dataIndices : List Nat,
data : List (T k v),
size : Nat,
} | k has Hash & Eq
## Return an empty dictionary. ## Return an empty dictionary.
empty : Dict k v empty : Dict k v | k has Hash & Eq
empty = @Dict [] empty =
@Dict {
metadata: List.repeat emptySlot 8,
dataIndices: List.repeat 0 8,
data: [],
size: 0,
}
## Returns the max number of elements the dictionary can hold before requiring a rehash.
capacity : Dict k v -> Nat | k has Hash & Eq
capacity = \@Dict { dataIndices } ->
cap = List.len dataIndices
cap - Num.shiftRightZfBy cap 3
## Return a dictionary with space allocated for a number of entries. This ## Return a dictionary with space allocated for a number of entries. This
## may provide a performance optimisation if you know how many entries will be ## may provide a performance optimisation if you know how many entries will be
## inserted. ## inserted.
withCapacity : Nat -> Dict k v withCapacity : Nat -> Dict k v | k has Hash & Eq
withCapacity = \n -> @Dict (List.withCapacity n) withCapacity = \_ ->
# TODO power of 2 * 8 and actual implementation
empty
## Get the value for a given key. If there is a value for the specified key it ## Returns a dictionary containing the key and value provided as input.
## will return [Ok value], otherwise return [Err KeyNotFound].
## ##
## dictionary = ## expect
## Dict.single "A" "B"
## |> Bool.isEq (Dict.insert Dict.empty "A" "B")
single : k, v -> Dict k v | k has Hash & Eq
single = \k, v ->
insert empty k v
## Returns the number of values in the dictionary.
##
## expect
## Dict.empty ## Dict.empty
## |> Dict.insert 1 "Apple" ## |> Dict.insert "One" "A Song"
## |> Dict.insert 2 "Orange" ## |> Dict.insert "Two" "Candy Canes"
## ## |> Dict.insert "Three" "Boughs of Holly"
## expect Dict.get dictionary 1 == Ok "Apple" ## |> Dict.len
## expect Dict.get dictionary 2000 == Err KeyNotFound ## |> Bool.isEq 3
get : Dict k v, k -> Result v [KeyNotFound] | k has Eq len : Dict k v -> Nat | k has Hash & Eq
get = \@Dict list, needle -> len = \@Dict { size } ->
when List.findFirst list (\Pair key _ -> key == needle) is size
Ok (Pair _ v) ->
Ok v
Err NotFound -> clear : Dict k v -> Dict k v | k has Hash & Eq
Err KeyNotFound clear = \@Dict { metadata, dataIndices, data } ->
cap = List.len dataIndices
# Only clear large allocations.
if cap > 128 * 8 then
empty
else
@Dict {
metadata: List.map metadata (\_ -> emptySlot),
# just leave data indicies as garbage, no need to clear.
dataIndices,
# use takeFirst to keep around the capacity.
data: List.takeFirst data 0,
size: 0,
}
## Iterate through the keys and values in the dictionary and call the provided ## Iterate through the keys and values in the dictionary and call the provided
## function with signature `state, k, v -> state` for each value, with an ## function with signature `state, k, v -> state` for each value, with an
@ -124,9 +172,63 @@ get = \@Dict list, needle ->
## |> Dict.insert "Orange" 24 ## |> Dict.insert "Orange" 24
## |> Dict.walk 0 (\count, _, qty -> count + qty) ## |> Dict.walk 0 (\count, _, qty -> count + qty)
## |> Bool.isEq 36 ## |> Bool.isEq 36
walk : Dict k v, state, (state, k, v -> state) -> state walk : Dict k v, state, (state, k, v -> state) -> state | k has Hash & Eq
walk = \@Dict list, initialState, transform -> walk = \@Dict { data }, initialState, transform ->
List.walk list initialState (\state, Pair k v -> transform state k v) List.walk data initialState (\state, T k v -> transform state k v)
## Get the value for a given key. If there is a value for the specified key it
## will return [Ok value], otherwise return [Err KeyNotFound].
##
## dictionary =
## Dict.empty
## |> Dict.insert 1 "Apple"
## |> Dict.insert 2 "Orange"
##
## expect Dict.get dictionary 1 == Ok "Apple"
## expect Dict.get dictionary 2000 == Err KeyNotFound
get : Dict k v, k -> Result v [KeyNotFound]* | k has Hash & Eq
get = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
|> Hash.hash key
|> complete
h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
when findIndexHelper metadata dataIndices data h2Key key probe 0 is
Ok index ->
dataIndex = listGetUnsafe dataIndices index
(T _ v) = listGetUnsafe data dataIndex
Ok v
Err NotFound ->
Err KeyNotFound
## Check if the dictionary has a value for a specified key.
##
## expect
## Dict.empty
## |> Dict.insert 1234 "5678"
## |> Dict.contains 1234
## |> Bool.isEq Bool.true
contains : Dict k v, k -> Bool | k has Hash & Eq
contains = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
|> Hash.hash key
|> complete
h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
when findIndexHelper metadata dataIndices data h2Key key probe 0 is
Ok _ ->
Bool.true
Err NotFound ->
Bool.false
## Insert a value into the dictionary at a specified key. ## Insert a value into the dictionary at a specified key.
## ##
@ -135,29 +237,42 @@ walk = \@Dict list, initialState, transform ->
## |> Dict.insert "Apples" 12 ## |> Dict.insert "Apples" 12
## |> Dict.get "Apples" ## |> Dict.get "Apples"
## |> Bool.isEq (Ok 12) ## |> Bool.isEq (Ok 12)
insert : Dict k v, k, v -> Dict k v | k has Eq insert : Dict k v, k, v -> Dict k v | k has Hash & Eq
insert = \@Dict list, k, v -> insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
when List.findFirstIndex list (\Pair key _ -> key == k) is hashKey =
Err NotFound -> createLowLevelHasher {}
insertFresh (@Dict list) k v |> Hash.hash key
|> complete
h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
when findIndexHelper metadata dataIndices data h2Key key probe 0 is
Ok index -> Ok index ->
list dataIndex = listGetUnsafe dataIndices index
|> List.set index (Pair k v)
|> @Dict
## Returns the number of values in the dictionary. @Dict {
## metadata,
## expect dataIndices,
## Dict.empty data: List.set data dataIndex (T key value),
## |> Dict.insert "One" "A Song" size,
## |> Dict.insert "Two" "Candy Canes" }
## |> Dict.insert "Three" "Boughs of Holly"
## |> Dict.len Err NotFound ->
## |> Bool.isEq 3 # The dictionary has grown, it might need to rehash.
len : Dict k v -> Nat rehashedDict =
len = \@Dict list -> maybeRehash
List.len list (
@Dict {
metadata,
dataIndices,
data,
size: size + 1,
}
)
# Need to rescan searching for the first empty or deleted cell.
insertNotFoundHelper rehashedDict key value h1Key h2Key
## Remove a value from the dictionary for a specified key. ## Remove a value from the dictionary for a specified key.
## ##
@ -167,19 +282,49 @@ len = \@Dict list ->
## |> Dict.remove "Some" ## |> Dict.remove "Some"
## |> Dict.len ## |> Dict.len
## |> Bool.isEq 0 ## |> Bool.isEq 0
remove : Dict k v, k -> Dict k v | k has Eq remove : Dict k v, k -> Dict k v | k has Hash & Eq
remove = \@Dict list, key -> remove = \@Dict { metadata, dataIndices, data, size }, key ->
when List.findFirstIndex list (\Pair k _ -> k == key) is # TODO: change this from swap remove to tombstone and test is performance is still good.
Err NotFound -> hashKey =
@Dict list createLowLevelHasher {}
|> Hash.hash key
|> complete
h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
when findIndexHelper metadata dataIndices data h2Key key probe 0 is
Ok index -> Ok index ->
lastIndex = List.len list - 1 last = List.len data - 1
list if index == last then
|> List.swap index lastIndex @Dict {
|> List.dropLast metadata: List.set metadata index deletedSlot,
|> @Dict dataIndices,
data: List.dropLast data,
size: size - 1,
}
else
# Swap with last and update index of value that used to be last.
dataIndex = listGetUnsafe dataIndices index
(T lastKey _) = listGetUnsafe data last
nextData =
data
|> List.swap dataIndex last
|> List.dropLast
nextDict =
@Dict {
metadata: List.set metadata index deletedSlot,
dataIndices,
data: nextData,
size: size - 1,
}
updateDataIndex nextDict lastKey dataIndex
Err NotFound ->
@Dict { metadata, dataIndices, data, size }
## Insert or remove a value for a specified key. This function enables a ## Insert or remove a value for a specified key. This function enables a
## performance optimisation for the use case of providing a default when a value ## performance optimisation for the use case of providing a default when a value
@ -195,8 +340,9 @@ remove = \@Dict list, key ->
## expect Dict.update Dict.empty "a" alterValue == Dict.single "a" Bool.false ## expect Dict.update Dict.empty "a" alterValue == Dict.single "a" Bool.false
## expect Dict.update (Dict.single "a" Bool.false) "a" alterValue == Dict.single "a" Bool.true ## expect Dict.update (Dict.single "a" Bool.false) "a" alterValue == Dict.single "a" Bool.true
## expect Dict.update (Dict.single "a" Bool.true) "a" alterValue == Dict.empty ## expect Dict.update (Dict.single "a" Bool.true) "a" alterValue == Dict.empty
update : Dict k v, k, ([Present v, Missing] -> [Present v, Missing]) -> Dict k v | k has Eq update : Dict k v, k, ([Present v, Missing] -> [Present v, Missing]) -> Dict k v | k has Hash & Eq
update = \dict, key, alter -> update = \dict, key, alter ->
# TODO: look into optimizing by merging substeps and reducing lookups.
possibleValue = possibleValue =
get dict key get dict key
|> Result.map Present |> Result.map Present
@ -206,46 +352,35 @@ update = \dict, key, alter ->
Present value -> insert dict key value Present value -> insert dict key value
Missing -> remove dict key Missing -> remove dict key
# Internal for testing only ## Returns dictionary with the keys and values specified by the input [List].
alterValue : [Present Bool, Missing] -> [Present Bool, Missing]
alterValue = \possibleValue ->
when possibleValue is
Missing -> Present Bool.false
Present value -> if value then Missing else Present Bool.true
expect update empty "a" alterValue == single "a" Bool.false
expect update (single "a" Bool.false) "a" alterValue == single "a" Bool.true
expect update (single "a" Bool.true) "a" alterValue == empty
## Check if the dictionary has a value for a specified key.
## ##
## expect ## expect
## Dict.empty ## Dict.single 1 "One"
## |> Dict.insert 1234 "5678" ## |> Dict.insert 2 "Two"
## |> Dict.contains 1234 ## |> Dict.insert 3 "Three"
contains : Dict k v, k -> Bool | k has Eq ## |> Dict.insert 4 "Four"
contains = \@Dict list, needle -> ## |> Bool.isEq (Dict.fromList [T 1 "One", T 2 "Two", T 3 "Three", T 4 "Four"])
List.any list \Pair key _val -> key == needle fromList : List (T k v) -> Dict k v | k has Hash & Eq
fromList = \data ->
# TODO: make this efficient. Should just set data and then set all indicies in the hashmap.
List.walk data empty (\dict, T k v -> insert dict k v)
expect contains empty "a" == Bool.false ## Returns the keys and values of a dictionary as a [List].
expect contains (single "a" {}) "a" == Bool.true ## This requires allocating a temporary list, prefer using [Dict.toList] or [Dict.walk] instead.
expect contains (single "b" {}) "a" == Bool.false
expect
Dict.empty
|> Dict.insert 1234 "5678"
|> Dict.contains 1234
|> Bool.isEq Bool.true
## Returns a dictionary containing the key and value provided as input.
## ##
## expect ## expect
## Dict.single "A" "B" ## Dict.single 1 "One"
## |> Bool.isEq (Dict.insert Dict.empty "A" "B") ## |> Dict.insert 2 "Two"
single : k, v -> Dict k v ## |> Dict.insert 3 "Three"
single = \key, value -> ## |> Dict.insert 4 "Four"
@Dict [Pair key value] ## |> Dict.toList
## |> Bool.isEq [T 1 "One", T 2 "Two", T 3 "Three", T 4 "Four"]
toList : Dict k v -> List (T k v) | k has Hash & Eq
toList = \@Dict { data } ->
data
## Returns the keys of a dictionary as a [List]. ## Returns the keys of a dictionary as a [List].
## This requires allocating a temporary list, prefer using [Dict.toList] or [Dict.walk] instead.
## ##
## expect ## expect
## Dict.single 1 "One" ## Dict.single 1 "One"
@ -254,11 +389,12 @@ single = \key, value ->
## |> Dict.insert 4 "Four" ## |> Dict.insert 4 "Four"
## |> Dict.keys ## |> Dict.keys
## |> Bool.isEq [1,2,3,4] ## |> Bool.isEq [1,2,3,4]
keys : Dict k v -> List k keys : Dict k v -> List k | k has Hash & Eq
keys = \@Dict list -> keys = \@Dict { data } ->
List.map list (\Pair k _ -> k) List.map data (\T k _ -> k)
## Returns the values of a dictionary as a [List]. ## Returns the values of a dictionary as a [List].
## This requires allocating a temporary list, prefer using [Dict.toList] or [Dict.walk] instead.
## ##
## expect ## expect
## Dict.single 1 "One" ## Dict.single 1 "One"
@ -267,22 +403,22 @@ keys = \@Dict list ->
## |> Dict.insert 4 "Four" ## |> Dict.insert 4 "Four"
## |> Dict.values ## |> Dict.values
## |> Bool.isEq ["One","Two","Three","Four"] ## |> Bool.isEq ["One","Two","Three","Four"]
values : Dict k v -> List v values : Dict k v -> List v | k has Hash & Eq
values = \@Dict list -> values = \@Dict { data } ->
List.map list (\Pair _ v -> v) List.map data (\T _ v -> v)
## Combine two dictionaries by keeping the [union](https://en.wikipedia.org/wiki/Union_(set_theory)) ## Combine two dictionaries by keeping the [union](https://en.wikipedia.org/wiki/Union_(set_theory))
## of all the key-value pairs. This means that all the key-value pairs in ## of all the key-value pairs. This means that all the key-value pairs in
## both dictionaries will be combined. Note that where there are pairs ## both dictionaries will be combined. Note that where there are pairs
## with the same key, the value contained in the first input will be ## with the same key, the value contained in the second input will be
## retained, and the value in the second input will be removed. ## retained, and the value in the first input will be removed.
## ##
## first = ## first =
## Dict.single 1 "Keep Me" ## Dict.single 1 "Not Me"
## |> Dict.insert 2 "And Me" ## |> Dict.insert 2 "And Me"
## ##
## second = ## second =
## Dict.single 1 "Not Me" ## Dict.single 1 "Keep Me"
## |> Dict.insert 3 "Me Too" ## |> Dict.insert 3 "Me Too"
## |> Dict.insert 4 "And Also Me" ## |> Dict.insert 4 "And Also Me"
## ##
@ -294,9 +430,9 @@ values = \@Dict list ->
## ##
## expect ## expect
## Dict.insertAll first second == expected ## Dict.insertAll first second == expected
insertAll : Dict k v, Dict k v -> Dict k v | k has Eq insertAll : Dict k v, Dict k v -> Dict k v | k has Hash & Eq
insertAll = \xs, @Dict ys -> insertAll = \xs, ys ->
List.walk ys xs (\state, Pair k v -> Dict.insertIfVacant state k v) walk ys xs insert
## Combine two dictionaries by keeping the [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory)) ## Combine two dictionaries by keeping the [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory))
## of all the key-value pairs. This means that we keep only those pairs ## of all the key-value pairs. This means that we keep only those pairs
@ -315,10 +451,17 @@ insertAll = \xs, @Dict ys ->
## |> Dict.insert 4 "Or Me" ## |> Dict.insert 4 "Or Me"
## ##
## expect Dict.keepShared first second == first ## expect Dict.keepShared first second == first
keepShared : Dict k v, Dict k v -> Dict k v | k has Eq keepShared : Dict k v, Dict k v -> Dict k v | k has Hash & Eq
keepShared = \@Dict xs, ys -> keepShared = \xs, ys ->
List.keepIf xs (\Pair k _ -> Dict.contains ys k) walk
|> @Dict ys
xs
(\state, k, _ ->
if contains state k then
state
else
remove state k
)
## Remove the key-value pairs in the first input that are also in the second ## Remove the key-value pairs in the first input that are also in the second
## using the [set difference](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement) ## using the [set difference](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement)
@ -339,25 +482,193 @@ keepShared = \@Dict xs, ys ->
## |> Dict.insert 2 "And Me" ## |> Dict.insert 2 "And Me"
## ##
## expect Dict.removeAll first second == expected ## expect Dict.removeAll first second == expected
removeAll : Dict k v, Dict k v -> Dict k v | k has Eq removeAll : Dict k v, Dict k v -> Dict k v | k has Hash & Eq
removeAll = \xs, @Dict ys -> removeAll = \xs, ys ->
List.walk ys xs (\state, Pair k _ -> Dict.remove state k) walk ys xs (\state, k, _ -> remove state k)
## Internal helper function to insert a new association # TODO: re-add type definition one #4408 is fixed
## # updateDataIndex : Dict k v, k, Nat -> Dict k v | k has Hash & Eq
## Precondition: `k` should not exist in the Dict yet. updateDataIndex = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
insertFresh : Dict k v, k, v -> Dict k v hashKey =
insertFresh = \@Dict list, k, v -> createLowLevelHasher {}
list |> Hash.hash key
|> List.append (Pair k v) |> complete
|> @Dict h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
insertIfVacant : Dict k v, k, v -> Dict k v | k has Eq when findIndexHelper metadata dataIndices data h2Key key probe 0 is
insertIfVacant = \dict, key, value -> Ok index ->
if Dict.contains dict key then @Dict {
dict metadata,
dataIndices: List.set dataIndices index dataIndex,
data,
size,
}
Err NotFound ->
# This should be impossible.
@Dict { metadata, dataIndices, data, size }
insertNotFoundHelper : Dict k v, k, v, U64, I8 -> Dict k v
insertNotFoundHelper = \@Dict { metadata, dataIndices, data, size }, key, value, h1Key, h2Key ->
probe = newProbe h1Key (div8 (List.len metadata))
index = nextEmptyOrDeletedHelper metadata probe 0
nextData = List.append data (T key value)
dataIndex = List.len data - 1
@Dict {
metadata: List.set metadata index h2Key,
dataIndices: List.set dataIndices index dataIndex,
data: nextData,
size,
}
nextEmptyOrDeletedHelper : List I8, Probe, Nat -> Nat
nextEmptyOrDeletedHelper = \metadata, probe, offset ->
# For inserting, we can use deleted indices.
index = Num.addWrap (mul8 probe.slotIndex) offset
md = listGetUnsafe metadata index
if md < 0 then
# Empty or deleted slot, no possibility of the element.
index
else if offset == 7 then
nextEmptyOrDeletedHelper metadata (nextProbe probe) 0
else else
Dict.insert dict key value nextEmptyOrDeletedHelper metadata probe (Num.addWrap offset 1)
# TODO: investigate if this needs to be split into more specific helper functions.
# There is a chance that returning specific sub-info like the value would be faster.
findIndexHelper : List I8, List Nat, List (T k v), I8, k, Probe, Nat -> Result Nat [NotFound] | k has Hash & Eq
findIndexHelper = \metadata, dataIndices, data, h2Key, key, probe, offset ->
# For finding a value, we must search past all deleted element tombstones.
index = Num.addWrap (mul8 probe.slotIndex) offset
md = listGetUnsafe metadata index
if md == emptySlot then
# Empty slot, no possibility of the element.
Err NotFound
else if md == h2Key then
# Potentially matching slot, check if the key is a match.
dataIndex = listGetUnsafe dataIndices index
(T k _) = listGetUnsafe data dataIndex
if k == key then
# We have a match, return its index.
Ok index
else if offset == 7 then
# No match, keep checking.
findIndexHelper metadata dataIndices data h2Key key (nextProbe probe) 0
else
findIndexHelper metadata dataIndices data h2Key key probe (Num.addWrap offset 1)
else if offset == 7 then
# Used slot, check next slot.
findIndexHelper metadata dataIndices data h2Key key (nextProbe probe) 0
else
findIndexHelper metadata dataIndices data h2Key key probe (Num.addWrap offset 1)
# This is how we grow the container.
# If we aren't to the load factor yet, just ignore this.
# The container must have an updated size including any elements about to be inserted.
maybeRehash : Dict k v -> Dict k v | k has Hash & Eq
maybeRehash = \@Dict { metadata, dataIndices, data, size } ->
cap = List.len dataIndices
maxLoadCap =
# This is 7/8 * capacity, which is the max load factor.
cap - Num.shiftRightZfBy cap 3
if size > maxLoadCap then
rehash (@Dict { metadata, dataIndices, data, size })
else
@Dict { metadata, dataIndices, data, size }
# TODO: switch rehash to iterate data and eventually clear out tombstones as well.
rehash : Dict k v -> Dict k v | k has Hash & Eq
rehash = \@Dict { metadata, dataIndices, data, size } ->
newLen = 2 * List.len data
newDict =
@Dict {
metadata: List.repeat emptySlot newLen,
dataIndices: List.repeat 0 newLen,
data,
size,
}
rehashHelper newDict metadata dataIndices data 0
rehashHelper : Dict k v, List I8, List Nat, List (T k v), Nat -> Dict k v | k has Hash & Eq
rehashHelper = \dict, metadata, dataIndices, data, index ->
md = listGetUnsafe metadata index
nextDict =
if md >= 0 then
# We have an actual element here
dataIndex = listGetUnsafe dataIndices index
(T k _) = listGetUnsafe data dataIndex
insertForRehash dict k dataIndex
else
# Empty or deleted data
dict
rehashHelper nextDict metadata dataIndices data (index + 1)
insertForRehash : Dict k v, k, Nat -> Dict k v | k has Hash & Eq
insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
hashKey =
createLowLevelHasher {}
|> Hash.hash key
|> complete
h1Key = h1 hashKey
h2Key = h2 hashKey
probe = newProbe h1Key (div8 (List.len metadata))
index = nextEmptyOrDeletedHelper metadata probe 0
@Dict {
metadata: List.set metadata index h2Key,
dataIndices: List.set dataIndices index dataIndex,
data,
size,
}
emptySlot : I8
emptySlot = -128
deletedSlot : I8
deletedSlot = -2
T k v : [T k v]
# Capacity must be a power of 2.
# We still will use slots of 8 even though this version has no true slots.
# We just move an element at a time.
# Thus, the true index is slotIndex * 8 + offset.
Probe : { slotIndex : Nat, probeI : Nat, mask : Nat }
newProbe : U64, Nat -> Probe
newProbe = \h1Key, slots ->
mask = Num.subSaturated slots 1
slotIndex = Num.bitwiseAnd (Num.toNat h1Key) mask
{ slotIndex, probeI: 1, mask }
nextProbe : Probe -> Probe
nextProbe = \{ slotIndex, probeI, mask } ->
nextSlotIndex = Num.bitwiseAnd (Num.addWrap slotIndex probeI) mask
{ slotIndex: nextSlotIndex, probeI: Num.addWrap probeI 1, mask }
mul8 = \val -> Num.shiftLeftBy val 3
div8 = \val -> Num.shiftRightZfBy val 3
h1 : U64 -> U64
h1 = \hashKey ->
Num.shiftRightZfBy hashKey 7
h2 : U64 -> I8
h2 = \hashKey ->
Num.toI8 (Num.bitwiseAnd hashKey 0b0111_1111)
# We have decided not to expose the standard roc hashing algorithm. # We have decided not to expose the standard roc hashing algorithm.
# This is to avoid external dependence and the need for versioning. # This is to avoid external dependence and the need for versioning.

View file

@ -1400,26 +1400,29 @@ define_builtins! {
0 DICT_DICT: "Dict" exposed_type=true // the Dict.Dict type alias 0 DICT_DICT: "Dict" exposed_type=true // the Dict.Dict type alias
1 DICT_EMPTY: "empty" 1 DICT_EMPTY: "empty"
2 DICT_SINGLE: "single" 2 DICT_SINGLE: "single"
3 DICT_GET: "get" 3 DICT_CLEAR: "clear"
4 DICT_GET_RESULT: "#get_result" // symbol used in the definition of Dict.get 4 DICT_LEN: "len"
5 DICT_WALK: "walk" 5 DICT_GET: "get"
6 DICT_INSERT: "insert" 6 DICT_GET_RESULT: "#get_result" // symbol used in the definition of Dict.get
7 DICT_LEN: "len" 7 DICT_CONTAINS: "contains"
8 DICT_INSERT: "insert"
9 DICT_REMOVE: "remove"
8 DICT_REMOVE: "remove" 10 DICT_WALK: "walk"
9 DICT_CONTAINS: "contains" 11 DICT_FROM_LIST: "fromList"
10 DICT_KEYS: "keys" 12 DICT_TO_LIST: "toList"
11 DICT_VALUES: "values" 13 DICT_KEYS: "keys"
14 DICT_VALUES: "values"
12 DICT_INSERT_ALL: "insertAll" // union 15 DICT_INSERT_ALL: "insertAll" // union
13 DICT_KEEP_SHARED: "keepShared" // intersection 16 DICT_KEEP_SHARED: "keepShared" // intersection
14 DICT_REMOVE_ALL: "removeAll" // difference 17 DICT_REMOVE_ALL: "removeAll" // difference
15 DICT_WITH_CAPACITY: "withCapacity" 18 DICT_WITH_CAPACITY: "withCapacity"
16 DICT_CAPACITY: "capacity" 19 DICT_CAPACITY: "capacity"
17 DICT_UPDATE: "update" 20 DICT_UPDATE: "update"
18 DICT_LIST_GET_UNSAFE: "listGetUnsafe" 21 DICT_LIST_GET_UNSAFE: "listGetUnsafe"
} }
9 SET: "Set" => { 9 SET: "Set" => {
0 SET_SET: "Set" exposed_type=true // the Set.Set type alias 0 SET_SET: "Set" exposed_type=true // the Set.Set type alias