Merge pull request #6256 from roc-lang/dict-update-fix

Correct Dict.update to use proper index
This commit is contained in:
Brendan Hansknecht 2023-12-12 01:52:35 -08:00 committed by GitHub
commit 012d93574a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 872 additions and 808 deletions

View file

@ -538,20 +538,39 @@ update = \@Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }, key
Err KeyNotFound ->
when alter Missing is
Present newValue ->
if maxBucketCapacity == 0 then
if List.len data >= (Num.toNat maxBucketCapacity) then
# Need to reallocate let regular insert handle that.
insert (@Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }) key newValue
else
# Can skip work by jumping staight to the found bucket.
# That will be the location we want to insert in.
hash = hashKey key
distAndFingerprint = distAndFingerprintFromHash hash
baseDistAndFingerprint = distAndFingerprintFromHash hash
baseBucketIndex = bucketIndexFromHash hash shifts
insertHelper buckets data bucketIndex distAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts
# Due to the unrolling of loops in find along with loop optimizations,
# The bucketIndex is not guaranteed to be correct here.
# It is only correct if we have traversed past the number of find unrolls.
dist = circularDist baseBucketIndex bucketIndex (List.len buckets)
if dist <= findManualUnrolls then
insertHelper buckets data baseBucketIndex baseDistAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts
else
distAndFingerprint = incrementDistN baseDistAndFingerprint (Num.toU32 dist)
insertHelper buckets data bucketIndex distAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts
Missing ->
@Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }
circularDist = \start, end, size ->
correction =
if start > end then
size
else
0
end
|> Num.subWrap start
|> Num.addWrap correction
## Returns the keys and values of a dictionary as a [List].
## This requires allocating a temporary list, prefer using [Dict.toList] or [Dict.walk] instead.
## ```
@ -709,21 +728,26 @@ maxBucketCount = maxSize
incrementDist = \distAndFingerprint ->
distAndFingerprint + distInc
incrementDistN = \distAndFingerprint, n ->
distAndFingerprint + (n * distInc)
decrementDist = \distAndFingerprint ->
distAndFingerprint - distInc
find : Dict k v, k -> { bucketIndex : Nat, result : Result v [KeyNotFound] }
find = \@Dict { buckets, data, shifts }, key ->
if !(List.isEmpty data) then
hash = hashKey key
distAndFingerprint = distAndFingerprintFromHash hash
bucketIndex = bucketIndexFromHash hash shifts
hash = hashKey key
distAndFingerprint = distAndFingerprintFromHash hash
bucketIndex = bucketIndexFromHash hash shifts
if !(List.isEmpty data) then
# TODO: this is true in the C++ code, confirm it in Roc as well.
# unrolled loop. *Always* check a few directly, then enter the loop. This is faster.
findFirstUnroll buckets bucketIndex distAndFingerprint data key
else
{ bucketIndex: 0, result: Err KeyNotFound }
{ bucketIndex, result: Err KeyNotFound }
findManualUnrolls = 2
findFirstUnroll : List Bucket, Nat, U32, List (k, v), k -> { bucketIndex : Nat, result : Result v [KeyNotFound] } where k implements Eq
findFirstUnroll = \buckets, bucketIndex, distAndFingerprint, data, key ->
@ -890,16 +914,16 @@ nextWhileLessHelper = \buckets, bucketIndex, distAndFingerprint ->
else
(bucketIndex, distAndFingerprint)
placeAndShiftUp = \buckets0, bucket0, bucketIndex ->
placeAndShiftUp = \buckets0, bucket, bucketIndex ->
loaded = listGetUnsafe buckets0 (Num.toNat bucketIndex)
if loaded.distAndFingerprint != 0 then
{ list: buckets1, value: bucket1 } = List.replace buckets0 (Num.toNat bucketIndex) bucket0
buckets1 = List.set buckets0 (Num.toNat bucketIndex) bucket
placeAndShiftUp
buckets1
{ bucket1 & distAndFingerprint: incrementDist bucket1.distAndFingerprint }
{ loaded & distAndFingerprint: incrementDist loaded.distAndFingerprint }
(nextBucketIndex bucketIndex (List.len buckets1))
else
List.set buckets0 (Num.toNat bucketIndex) bucket0
List.set buckets0 (Num.toNat bucketIndex) bucket
nextBucketIndex = \bucketIndex, maxBuckets ->
# I just ported this impl directly.
@ -1168,6 +1192,48 @@ expect
|> get 7
|> Bool.isEq (Ok "Testing")
# All BadKey's hash to the same location.
# This is needed to test some robinhood logic.
BadKey := U64 implements [
Eq,
Hash {
hash: hashBadKey,
},
]
hashBadKey : hasher, BadKey -> hasher where hasher implements Hasher
hashBadKey = \hasher, _ -> Hash.hash hasher 0
expect
badKeys = [
@BadKey 0,
@BadKey 1,
@BadKey 2,
@BadKey 3,
@BadKey 4,
@BadKey 5,
@BadKey 6,
@BadKey 5,
@BadKey 4,
@BadKey 3,
@BadKey 3,
@BadKey 3,
@BadKey 10,
]
dict =
acc, k <- List.walk badKeys (Dict.empty {})
Dict.update acc k \val ->
when val is
Present p -> Present (p + 1)
Missing -> Present 0
allInsertedCorrectly =
acc, k <- List.walk badKeys Bool.true
acc && Dict.contains dict k
allInsertedCorrectly
# Note, there are a number of places we should probably use set and replace unsafe.
# unsafe primitive that does not perform a bounds check
listGetUnsafe : List a, Nat -> a

View file

@ -1,29 +1,29 @@
procedure Dict.1 (Dict.692):
let Dict.701 : List {U32, U32} = Array [];
let Dict.702 : List {[], []} = Array [];
let Dict.703 : U64 = 0i64;
let Dict.42 : Float32 = CallByName Dict.42;
let Dict.43 : U8 = CallByName Dict.43;
let Dict.700 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = Struct {Dict.701, Dict.702, Dict.703, Dict.42, Dict.43};
ret Dict.700;
procedure Dict.1 (Dict.726):
let Dict.735 : List {U32, U32} = Array [];
let Dict.736 : List {[], []} = Array [];
let Dict.737 : U64 = 0i64;
let Dict.44 : Float32 = CallByName Dict.44;
let Dict.45 : U8 = CallByName Dict.45;
let Dict.734 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = Struct {Dict.735, Dict.736, Dict.737, Dict.44, Dict.45};
ret Dict.734;
procedure Dict.4 (Dict.698):
let Dict.150 : List {[], []} = StructAtIndex 1 Dict.698;
let #Derived_gen.0 : List {U32, U32} = StructAtIndex 0 Dict.698;
procedure Dict.4 (Dict.732):
let Dict.157 : List {[], []} = StructAtIndex 1 Dict.732;
let #Derived_gen.0 : List {U32, U32} = StructAtIndex 0 Dict.732;
dec #Derived_gen.0;
let Dict.699 : U64 = CallByName List.6 Dict.150;
dec Dict.150;
ret Dict.699;
let Dict.733 : U64 = CallByName List.6 Dict.157;
dec Dict.157;
ret Dict.733;
procedure Dict.42 ():
let Dict.707 : Float32 = 0.8f64;
ret Dict.707;
procedure Dict.44 ():
let Dict.741 : Float32 = 0.8f64;
ret Dict.741;
procedure Dict.43 ():
let Dict.705 : U8 = 64i64;
let Dict.706 : U8 = 3i64;
let Dict.704 : U8 = CallByName Num.20 Dict.705 Dict.706;
ret Dict.704;
procedure Dict.45 ():
let Dict.739 : U8 = 64i64;
let Dict.740 : U8 = 3i64;
let Dict.738 : U8 = CallByName Num.20 Dict.739 Dict.740;
ret Dict.738;
procedure List.6 (#Attr.2):
let List.553 : U64 = lowlevel ListLen #Attr.2;

File diff suppressed because it is too large Load diff

View file

@ -3,22 +3,22 @@
app "test" provides [main] to "./platform"
f = \{} ->
#^{-1} <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
#^{-1} <2937><117>{} -<120>[[f(1)]]-> <116>[Ok <2945>{}]<80>*
when g {} is
# ^ <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
# ^ <2927><2945>{} -<2935>[[g(2)]]-> <72>[Ok <2945>{}]<102>*
_ -> Ok {}
g = \{} ->
#^{-1} <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
#^{-1} <2927><2945>{} -<2935>[[g(2)]]-> <72>[Ok <2945>{}]<102>*
when h {} is
# ^ <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
# ^ <2932><2945>{} -<2940>[[h(3)]]-> <94>[Ok <2945>{}]<124>*
_ -> Ok {}
h = \{} ->
#^{-1} <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
#^{-1} <2932><2945>{} -<2940>[[h(3)]]-> <94>[Ok <2945>{}]<124>*
when f {} is
# ^ <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
# ^ <2937><117>{} -<120>[[f(1)]]-> <116>[Ok <2945>{}]<80>*
_ -> Ok {}
main = f {}
# ^ <2928><133>{} -<136>[[f(1)]]-> <138>[Ok <2926>{}]<2927>w_a
# ^ <2947><133>{} -<136>[[f(1)]]-> <138>[Ok <2945>{}]<2946>w_a