mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-13 07:16:18 +00:00
Merge branch 'roc-lang:main' into updating-docs
This commit is contained in:
commit
a99fe32bff
15 changed files with 914 additions and 72 deletions
|
@ -21,7 +21,9 @@ interface Dict
|
|||
Bool.{ Bool, Eq },
|
||||
Result.{ Result },
|
||||
List,
|
||||
Num.{ Nat },
|
||||
Str,
|
||||
Num.{ Nat, U64, U8 },
|
||||
Hash.{ Hasher },
|
||||
]
|
||||
|
||||
## A [dictionary](https://en.wikipedia.org/wiki/Associative_array) that lets you can associate keys with values.
|
||||
|
@ -206,3 +208,412 @@ insertIfVacant = \dict, key, value ->
|
|||
dict
|
||||
else
|
||||
Dict.insert dict key value
|
||||
|
||||
# We have decided not to expose the standard roc hashing algorithm.
|
||||
# This is to avoid external dependence and the need for versioning.
|
||||
# The current implementation is a form of [Wyhash final3](https://github.com/wangyi-fudan/wyhash/blob/a5995b98ebfa7bd38bfadc0919326d2e7aabb805/wyhash.h).
|
||||
# It is 64bit and little endian specific currently.
|
||||
# TODO: wyhash is slow for large keys, use something like cityhash if the keys are too long.
|
||||
# TODO: Add a builtin to distinguish big endian systems and change loading orders.
|
||||
# TODO: Switch out Wymum on systems with slow 128bit multiplication.
|
||||
LowLevelHasher := { originalSeed : U64, state : U64 } has [
|
||||
Hasher {
|
||||
addBytes,
|
||||
addU8,
|
||||
addU16,
|
||||
addU32,
|
||||
addU64,
|
||||
addU128,
|
||||
addI8,
|
||||
addI16,
|
||||
addI32,
|
||||
addI64,
|
||||
addI128,
|
||||
complete,
|
||||
},
|
||||
]
|
||||
|
||||
# unsafe primitive that does not perform a bounds check
|
||||
# TODO hide behind an InternalList.roc module
|
||||
listGetUnsafe : List a, Nat -> a
|
||||
|
||||
createLowLevelHasher : { seed ?U64 } -> LowLevelHasher
|
||||
createLowLevelHasher = \{ seed ? 0x526F_6352_616E_643F } ->
|
||||
@LowLevelHasher { originalSeed: seed, state: seed }
|
||||
|
||||
combineState : LowLevelHasher, { a : U64, b : U64, seed : U64, length : U64 } -> LowLevelHasher
|
||||
combineState = \@LowLevelHasher { originalSeed, state }, { a, b, seed, length } ->
|
||||
tmp = wymix (Num.bitwiseXor wyp1 a) (Num.bitwiseXor seed b)
|
||||
hash = wymix (Num.bitwiseXor wyp1 length) tmp
|
||||
|
||||
@LowLevelHasher { originalSeed, state: wymix state hash }
|
||||
|
||||
complete = \@LowLevelHasher { state } -> state
|
||||
|
||||
addI8 = \hasher, i8 ->
|
||||
addU8 hasher (Num.toU8 i8)
|
||||
addI16 = \hasher, i16 ->
|
||||
addU16 hasher (Num.toU16 i16)
|
||||
addI32 = \hasher, i32 ->
|
||||
addU32 hasher (Num.toU32 i32)
|
||||
addI64 = \hasher, i64 ->
|
||||
addU64 hasher (Num.toU64 i64)
|
||||
addI128 = \hasher, i128 ->
|
||||
addU128 hasher (Num.toU128 i128)
|
||||
|
||||
# These implementations hash each value individually with the seed and then mix
|
||||
# the resulting hash with the state. There are other options that may be faster
|
||||
# like using the output of the last hash as the seed to the current hash.
|
||||
# I am simply not sure the tradeoffs here. Theoretically this method is more sound.
|
||||
# Either way, the performance will be similar and we can change this later.
|
||||
addU8 = \@LowLevelHasher { originalSeed, state }, u8 ->
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
p0 = Num.toU64 u8
|
||||
a =
|
||||
Num.shiftLeftBy p0 16
|
||||
|> Num.bitwiseOr (Num.shiftLeftBy p0 8)
|
||||
|> Num.bitwiseOr p0
|
||||
b = 0
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 1 }
|
||||
|
||||
addU16 = \@LowLevelHasher { originalSeed, state }, u16 ->
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
p0 = Num.bitwiseAnd u16 0xFF |> Num.toU64
|
||||
p1 = Num.shiftRightZfBy u16 8 |> Num.toU64
|
||||
a =
|
||||
Num.shiftLeftBy p0 16
|
||||
|> Num.bitwiseOr (Num.shiftLeftBy p1 8)
|
||||
|> Num.bitwiseOr p1
|
||||
b = 0
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 2 }
|
||||
|
||||
addU32 = \@LowLevelHasher { originalSeed, state }, u32 ->
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
p0 = Num.toU64 u32
|
||||
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p0
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a, b: a, seed, length: 4 }
|
||||
|
||||
addU64 = \@LowLevelHasher { originalSeed, state }, u64 ->
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
p0 = Num.bitwiseAnd 0xFFFF_FFFF u64
|
||||
p1 = Num.shiftRightZfBy u64 32
|
||||
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p1
|
||||
b = Num.shiftLeftBy p1 32 |> Num.bitwiseOr p0
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 8 }
|
||||
|
||||
addU128 = \@LowLevelHasher { originalSeed, state }, u128 ->
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
lower = u128 |> Num.toU64
|
||||
upper = Num.shiftRightZfBy u128 64 |> Num.toU64
|
||||
p0 = Num.bitwiseAnd 0xFFFF_FFFF lower
|
||||
p1 = Num.shiftRightZfBy lower 32 |> Num.bitwiseAnd 0xFFFF_FFFF
|
||||
p2 = Num.bitwiseAnd 0xFFFF_FFFF upper
|
||||
p3 = Num.shiftRightZfBy upper 32 |> Num.bitwiseAnd 0xFFFF_FFFF
|
||||
a = Num.shiftLeftBy p0 32 |> Num.bitwiseOr p2
|
||||
b = Num.shiftLeftBy p3 32 |> Num.bitwiseOr p1
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a, b, seed, length: 16 }
|
||||
|
||||
addBytes : LowLevelHasher, List U8 -> LowLevelHasher
|
||||
addBytes = \@LowLevelHasher { originalSeed, state }, list ->
|
||||
length = List.len list
|
||||
seed = Num.bitwiseXor originalSeed wyp0
|
||||
abs =
|
||||
if length <= 16 then
|
||||
if length >= 4 then
|
||||
x = Num.shiftRightZfBy length 3 |> Num.shiftLeftBy 2
|
||||
a = Num.bitwiseOr (wyr4 list 0 |> Num.shiftLeftBy 32) (wyr4 list x)
|
||||
b =
|
||||
(wyr4 list (Num.subWrap length 4) |> Num.shiftLeftBy 32)
|
||||
|> Num.bitwiseOr (wyr4 list (Num.subWrap length 4 |> Num.subWrap x))
|
||||
|
||||
{ a, b, seed }
|
||||
else if length > 0 then
|
||||
{ a: wyr3 list 0 length, b: 0, seed }
|
||||
else
|
||||
{ a: 0, b: 0, seed }
|
||||
else if length <= 48 then
|
||||
hashBytesHelper16 seed list 0 length
|
||||
else
|
||||
hashBytesHelper48 seed seed seed list 0 length
|
||||
|
||||
combineState (@LowLevelHasher { originalSeed, state }) { a: abs.a, b: abs.b, seed: abs.seed, length: Num.toU64 length }
|
||||
|
||||
hashBytesHelper48 : U64, U64, U64, List U8, Nat, Nat -> { a : U64, b : U64, seed : U64 }
|
||||
hashBytesHelper48 = \seed, see1, see2, list, index, remaining ->
|
||||
newSeed = wymix (Num.bitwiseXor (wyr8 list index) wyp1) (Num.bitwiseXor (wyr8 list (Num.addWrap index 8)) seed)
|
||||
newSee1 = wymix (Num.bitwiseXor (wyr8 list (Num.addWrap index 16)) wyp2) (Num.bitwiseXor (wyr8 list (Num.addWrap index 24)) see1)
|
||||
newSee2 = wymix (Num.bitwiseXor (wyr8 list (Num.addWrap index 32)) wyp3) (Num.bitwiseXor (wyr8 list (Num.addWrap index 40)) see2)
|
||||
newRemaining = Num.subWrap remaining 48
|
||||
newIndex = Num.addWrap index 48
|
||||
|
||||
if newRemaining > 48 then
|
||||
hashBytesHelper48 newSeed newSee1 newSee2 list newIndex newRemaining
|
||||
else if newRemaining > 16 then
|
||||
finalSeed = Num.bitwiseXor newSee2 (Num.bitwiseXor newSee1 newSeed)
|
||||
|
||||
hashBytesHelper16 finalSeed list newIndex newRemaining
|
||||
else
|
||||
finalSeed = Num.bitwiseXor newSee2 (Num.bitwiseXor newSee1 newSeed)
|
||||
|
||||
{ a: wyr8 list (Num.subWrap newRemaining 16 |> Num.addWrap newIndex), b: wyr8 list (Num.subWrap newRemaining 8 |> Num.addWrap newIndex), seed: finalSeed }
|
||||
|
||||
hashBytesHelper16 : U64, List U8, Nat, Nat -> { a : U64, b : U64, seed : U64 }
|
||||
hashBytesHelper16 = \seed, list, index, remaining ->
|
||||
newSeed = wymix (Num.bitwiseXor (wyr8 list index) wyp1) (Num.bitwiseXor (wyr8 list (Num.addWrap index 8)) seed)
|
||||
newRemaining = Num.subWrap remaining 16
|
||||
newIndex = Num.addWrap index 16
|
||||
|
||||
if newRemaining <= 16 then
|
||||
{ a: wyr8 list (Num.subWrap newRemaining 16 |> Num.addWrap newIndex), b: wyr8 list (Num.subWrap newRemaining 8 |> Num.addWrap newIndex), seed: newSeed }
|
||||
else
|
||||
hashBytesHelper16 newSeed list newIndex newRemaining
|
||||
|
||||
wyp0 : U64
|
||||
wyp0 = 0xa0761d6478bd642f
|
||||
wyp1 : U64
|
||||
wyp1 = 0xe7037ed1a0b428db
|
||||
wyp2 : U64
|
||||
wyp2 = 0x8ebc6af09c88c6e3
|
||||
wyp3 : U64
|
||||
wyp3 = 0x589965cc75374cc3
|
||||
|
||||
wymix : U64, U64 -> U64
|
||||
wymix = \a, b ->
|
||||
{ lower, upper } = wymum a b
|
||||
|
||||
Num.bitwiseXor lower upper
|
||||
|
||||
wymum : U64, U64 -> { lower : U64, upper : U64 }
|
||||
wymum = \a, b ->
|
||||
r = Num.toU128 a * Num.toU128 b
|
||||
lower = Num.toU64 r
|
||||
upper = Num.shiftRightZfBy r 64 |> Num.toU64
|
||||
|
||||
# This is the more robust form.
|
||||
# { lower: Num.bitwiseXor a lower, upper: Num.bitwiseXor b upper }
|
||||
{ lower, upper }
|
||||
|
||||
# Get the next 8 bytes as a U64
|
||||
wyr8 : List U8, Nat -> U64
|
||||
wyr8 = \list, index ->
|
||||
# With seamless slices and Num.fromBytes, this should be possible to make faster and nicer.
|
||||
# It would also deal with the fact that on big endian systems we want to invert the order here.
|
||||
# Without seamless slices, we would need fromBytes to take an index.
|
||||
p1 = listGetUnsafe list index |> Num.toU64
|
||||
p2 = listGetUnsafe list (Num.addWrap index 1) |> Num.toU64
|
||||
p3 = listGetUnsafe list (Num.addWrap index 2) |> Num.toU64
|
||||
p4 = listGetUnsafe list (Num.addWrap index 3) |> Num.toU64
|
||||
p5 = listGetUnsafe list (Num.addWrap index 4) |> Num.toU64
|
||||
p6 = listGetUnsafe list (Num.addWrap index 5) |> Num.toU64
|
||||
p7 = listGetUnsafe list (Num.addWrap index 6) |> Num.toU64
|
||||
p8 = listGetUnsafe list (Num.addWrap index 7) |> Num.toU64
|
||||
a = Num.bitwiseOr p1 (Num.shiftLeftBy p2 8)
|
||||
b = Num.bitwiseOr (Num.shiftLeftBy p3 16) (Num.shiftLeftBy p4 24)
|
||||
c = Num.bitwiseOr (Num.shiftLeftBy p5 32) (Num.shiftLeftBy p6 40)
|
||||
d = Num.bitwiseOr (Num.shiftLeftBy p7 48) (Num.shiftLeftBy p8 56)
|
||||
|
||||
Num.bitwiseOr (Num.bitwiseOr a b) (Num.bitwiseOr c d)
|
||||
|
||||
# Get the next 4 bytes as a U64 with some shifting.
|
||||
wyr4 : List U8, Nat -> U64
|
||||
wyr4 = \list, index ->
|
||||
p1 = listGetUnsafe list index |> Num.toU64
|
||||
p2 = listGetUnsafe list (Num.addWrap index 1) |> Num.toU64
|
||||
p3 = listGetUnsafe list (Num.addWrap index 2) |> Num.toU64
|
||||
p4 = listGetUnsafe list (Num.addWrap index 3) |> Num.toU64
|
||||
a = Num.bitwiseOr p1 (Num.shiftLeftBy p2 8)
|
||||
b = Num.bitwiseOr (Num.shiftLeftBy p3 16) (Num.shiftLeftBy p4 24)
|
||||
|
||||
Num.bitwiseOr a b
|
||||
|
||||
# Get the next K bytes with some shifting.
|
||||
# K must be 3 or less.
|
||||
wyr3 : List U8, Nat, Nat -> U64
|
||||
wyr3 = \list, index, k ->
|
||||
# ((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1]
|
||||
p1 = listGetUnsafe list index |> Num.toU64
|
||||
p2 = listGetUnsafe list (Num.shiftRightZfBy k 1 |> Num.addWrap index) |> Num.toU64
|
||||
p3 = listGetUnsafe list (Num.subWrap k 1 |> Num.addWrap index) |> Num.toU64
|
||||
a = Num.bitwiseOr (Num.shiftLeftBy p1 16) (Num.shiftLeftBy p2 8)
|
||||
|
||||
Num.bitwiseOr a p3
|
||||
|
||||
# TODO: would be great to have table driven expects for this.
|
||||
# Would also be great to have some sort of property based hasher
|
||||
# where we can compare `addU*` functions to the `addBytes` function.
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes []
|
||||
|> complete
|
||||
|
||||
hash == 0x1C3F_F8BF_07F9_B0B3
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0x42]
|
||||
|> complete
|
||||
|
||||
hash == 0x8F9F_0A1E_E06F_0D52
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU8 0x42
|
||||
|> complete
|
||||
|
||||
hash == 0x8F9F_0A1E_E06F_0D52
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0xFF, 0xFF]
|
||||
|> complete
|
||||
|
||||
hash == 0x86CC_8B71_563F_F084
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU16 0xFFFF
|
||||
|> complete
|
||||
|
||||
hash == 0x86CC_8B71_563F_F084
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0x36, 0xA7]
|
||||
|> complete
|
||||
|
||||
hash == 0xD1A5_0F24_2536_84F8
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU16 0xA736
|
||||
|> complete
|
||||
|
||||
hash == 0xD1A5_0F24_2536_84F8
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0x00, 0x00, 0x00, 0x00]
|
||||
|> complete
|
||||
|
||||
hash == 0x3762_ACB1_7604_B541
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU32 0x0000_0000
|
||||
|> complete
|
||||
|
||||
hash == 0x3762_ACB1_7604_B541
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0xA9, 0x2F, 0xEE, 0x21]
|
||||
|> complete
|
||||
|
||||
hash == 0x20F3_3FD7_D32E_C7A9
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU32 0x21EE_2FA9
|
||||
|> complete
|
||||
|
||||
hash == 0x20F3_3FD7_D32E_C7A9
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes [0x5D, 0x66, 0xB1, 0x8F, 0x68, 0x44, 0xC7, 0x03, 0xE1, 0xDD, 0x23, 0x34, 0xBB, 0x9A, 0x42, 0xA7]
|
||||
|> complete
|
||||
|
||||
hash == 0xA16F_DDAA_C167_74C7
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addU128 0xA742_9ABB_3423_DDE1_03C7_4468_8FB1_665D
|
||||
|> complete
|
||||
|
||||
hash == 0xA16F_DDAA_C167_74C7
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashStrBytes "abcdefghijklmnopqrstuvwxyz"
|
||||
|> complete
|
||||
|
||||
hash == 0xBEE0_A8FD_E990_D285
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashStrBytes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
|
||||
|> complete
|
||||
|
||||
hash == 0xB3C5_8528_9D82_A6EF
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashStrBytes "1234567890123456789012345678901234567890123456789012345678901234567890"
|
||||
|> complete
|
||||
|
||||
hash == 0xDB6B_7997_7A55_BA03
|
||||
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> addBytes (List.repeat 0x77 100)
|
||||
|> complete
|
||||
|
||||
hash == 0x171F_EEE2_B764_8E5E
|
||||
|
||||
# Note, had to specify u8 in the lists below to avoid ability type resolution error.
|
||||
# Apparently it won't pick the default integer.
|
||||
expect
|
||||
hash =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashUnordered [8u8, 82u8, 3u8, 8u8, 24u8] List.walk
|
||||
|> complete
|
||||
|
||||
hash == 0x999F_B530_3529_F17D
|
||||
|
||||
expect
|
||||
hash1 =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashUnordered ([0u8, 1u8, 2u8, 3u8, 4u8]) List.walk
|
||||
|> complete
|
||||
|
||||
hash2 =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8] List.walk
|
||||
|> complete
|
||||
|
||||
hash1 == hash2
|
||||
|
||||
expect
|
||||
hash1 =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashUnordered [0u8, 1u8, 2u8, 3u8, 4u8] List.walk
|
||||
|> complete
|
||||
|
||||
hash2 =
|
||||
createLowLevelHasher {}
|
||||
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8, 0u8] List.walk
|
||||
|> complete
|
||||
|
||||
hash1 != hash2
|
||||
|
|
|
@ -17,6 +17,7 @@ interface Hash
|
|||
complete,
|
||||
hashStrBytes,
|
||||
hashList,
|
||||
hashUnordered,
|
||||
] imports [
|
||||
List,
|
||||
Str,
|
||||
|
@ -75,10 +76,33 @@ Hasher has
|
|||
|
||||
## Adds a string into a [Hasher] by hashing its UTF-8 bytes.
|
||||
hashStrBytes = \hasher, s ->
|
||||
Str.walkUtf8WithIndex s hasher \accumHasher, byte, _ ->
|
||||
addU8 accumHasher byte
|
||||
addBytes hasher (Str.toUtf8 s)
|
||||
|
||||
## Adds a list of [Hash]able elements to a [Hasher] by hashing each element.
|
||||
hashList = \hasher, lst ->
|
||||
List.walk lst hasher \accumHasher, elem ->
|
||||
hash accumHasher elem
|
||||
|
||||
## Adds a container of [Hash]able elements to a [Hasher] by hashing each element.
|
||||
## The container is iterated using the walk method passed in.
|
||||
## The order of the elements does not affect the final hash.
|
||||
hashUnordered = \hasher, container, walk ->
|
||||
walk
|
||||
container
|
||||
0
|
||||
(\accum, elem ->
|
||||
x =
|
||||
# Note, we intentionally copy the hasher in every iteration.
|
||||
# Having the same base state is required for unordered hashing.
|
||||
hasher
|
||||
|> hash elem
|
||||
|> complete
|
||||
nextAccum = Num.addWrap accum x
|
||||
|
||||
if nextAccum < accum then
|
||||
# we don't want to lose a bit of entropy on overflow, so add it back in.
|
||||
Num.addWrap nextAccum 1
|
||||
else
|
||||
nextAccum
|
||||
)
|
||||
|> \accum -> addU64 hasher accum
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue