Merge branch 'main' into list-walk-with-index-until

2025-08-04 04:08:19 +00:00 · 2023-12-12 07:54:42 -05:00 · 2023-12-12 07:54:42 -05:00 · 2db4cae9d1
commit 2db4cae9d1
parent 80dd0b6b93 012d93574a
4 changed files with 609 additions and 506 deletions
--- a/crates/compiler/builtins/roc/Dict.roc
+++ b/crates/compiler/builtins/roc/Dict.roc
@ -538,20 +538,39 @@ update = \@Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }, key
        Err KeyNotFound ->
            when alter Missing is
                Present newValue ->
-                    if maxBucketCapacity == 0 then
+                    if List.len data >= (Num.toNat maxBucketCapacity) then
                        # Need to reallocate let regular insert handle that.
                        insert (@Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }) key newValue
                    else
                        # Can skip work by jumping staight to the found bucket.
                        # That will be the location we want to insert in.
                        hash = hashKey key
-                        distAndFingerprint = distAndFingerprintFromHash hash
+                        baseDistAndFingerprint = distAndFingerprintFromHash hash
+                        baseBucketIndex = bucketIndexFromHash hash shifts

-                        insertHelper buckets data bucketIndex distAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts
+                        # Due to the unrolling of loops in find along with loop optimizations,
+                        # The bucketIndex is not guaranteed to be correct here.
+                        # It is only correct if we have traversed past the number of find unrolls.
+                        dist = circularDist baseBucketIndex bucketIndex (List.len buckets)
+                        if dist <= findManualUnrolls then
+                            insertHelper buckets data baseBucketIndex baseDistAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts
+                        else
+                            distAndFingerprint = incrementDistN baseDistAndFingerprint (Num.toU32 dist)
+                            insertHelper buckets data bucketIndex distAndFingerprint key newValue maxBucketCapacity maxLoadFactor shifts

                Missing ->
                    @Dict { buckets, data, maxBucketCapacity, maxLoadFactor, shifts }

+circularDist = \start, end, size ->
+    correction =
+        if start > end then
+            size
+        else
+            0
+    end
+    |> Num.subWrap start
+    |> Num.addWrap correction
+
 ## Returns the keys and values of a dictionary as a [List].
 ## This requires allocating a temporary list, prefer using [Dict.toList] or [Dict.walk] instead.
 ## ```
@ -709,21 +728,26 @@ maxBucketCount = maxSize
 incrementDist = \distAndFingerprint ->
    distAndFingerprint + distInc

+incrementDistN = \distAndFingerprint, n ->
+    distAndFingerprint + (n * distInc)
+
 decrementDist = \distAndFingerprint ->
    distAndFingerprint - distInc

 find : Dict k v, k -> { bucketIndex : Nat, result : Result v [KeyNotFound] }
 find = \@Dict { buckets, data, shifts }, key ->
-    if !(List.isEmpty data) then
-        hash = hashKey key
-        distAndFingerprint = distAndFingerprintFromHash hash
-        bucketIndex = bucketIndexFromHash hash shifts
+    hash = hashKey key
+    distAndFingerprint = distAndFingerprintFromHash hash
+    bucketIndex = bucketIndexFromHash hash shifts

+    if !(List.isEmpty data) then
        # TODO: this is true in the C++ code, confirm it in Roc as well.
        # unrolled loop. *Always* check a few directly, then enter the loop. This is faster.
        findFirstUnroll buckets bucketIndex distAndFingerprint data key
    else
-        { bucketIndex: 0, result: Err KeyNotFound }
+        { bucketIndex, result: Err KeyNotFound }
+
+findManualUnrolls = 2

 findFirstUnroll : List Bucket, Nat, U32, List (k, v), k -> { bucketIndex : Nat, result : Result v [KeyNotFound] } where k implements Eq
 findFirstUnroll = \buckets, bucketIndex, distAndFingerprint, data, key ->
@ -890,16 +914,16 @@ nextWhileLessHelper = \buckets, bucketIndex, distAndFingerprint ->
    else
        (bucketIndex, distAndFingerprint)

-placeAndShiftUp = \buckets0, bucket0, bucketIndex ->
+placeAndShiftUp = \buckets0, bucket, bucketIndex ->
    loaded = listGetUnsafe buckets0 (Num.toNat bucketIndex)
    if loaded.distAndFingerprint != 0 then
-        { list: buckets1, value: bucket1 } = List.replace buckets0 (Num.toNat bucketIndex) bucket0
+        buckets1 = List.set buckets0 (Num.toNat bucketIndex) bucket
        placeAndShiftUp
            buckets1
-            { bucket1 & distAndFingerprint: incrementDist bucket1.distAndFingerprint }
+            { loaded & distAndFingerprint: incrementDist loaded.distAndFingerprint }
            (nextBucketIndex bucketIndex (List.len buckets1))
    else
-        List.set buckets0 (Num.toNat bucketIndex) bucket0
+        List.set buckets0 (Num.toNat bucketIndex) bucket

 nextBucketIndex = \bucketIndex, maxBuckets ->
    # I just ported this impl directly.
@ -1168,6 +1192,48 @@ expect
    |> get 7
    |> Bool.isEq (Ok "Testing")

+# All BadKey's hash to the same location.
+# This is needed to test some robinhood logic.
+BadKey := U64 implements [
+        Eq,
+        Hash {
+            hash: hashBadKey,
+        },
+    ]
+
+hashBadKey : hasher, BadKey -> hasher where hasher implements Hasher
+hashBadKey = \hasher, _ -> Hash.hash hasher 0
+
+expect
+    badKeys = [
+        @BadKey 0,
+        @BadKey 1,
+        @BadKey 2,
+        @BadKey 3,
+        @BadKey 4,
+        @BadKey 5,
+        @BadKey 6,
+        @BadKey 5,
+        @BadKey 4,
+        @BadKey 3,
+        @BadKey 3,
+        @BadKey 3,
+        @BadKey 10,
+    ]
+
+    dict =
+        acc, k <- List.walk badKeys (Dict.empty {})
+        Dict.update acc k \val ->
+            when val is
+                Present p -> Present (p + 1)
+                Missing -> Present 0
+
+    allInsertedCorrectly =
+        acc, k <- List.walk badKeys Bool.true
+        acc && Dict.contains dict k
+
+    allInsertedCorrectly
+
 # Note, there are a number of places we should probably use set and replace unsafe.
 # unsafe primitive that does not perform a bounds check
 listGetUnsafe : List a, Nat -> a