Merge pull request #5445 from roc-lang/dict-pseudo-seed

Seed Dict and Set
This commit is contained in:
Brendan Hansknecht 2023-05-26 16:19:15 +00:00 committed by GitHub
commit 0b475ae979
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 99 additions and 57 deletions

View file

@ -188,6 +188,7 @@ comptime {
exportUtilsFn(utils.isUnique, "is_unique");
exportUtilsFn(utils.decrefCheckNullC, "decref_check_null");
exportUtilsFn(utils.allocateWithRefcountC, "allocate_with_refcount");
exportUtilsFn(utils.dictPseudoSeed, "dict_pseudo_seed");
@export(panic_utils.panic, .{ .name = "roc_builtins.utils." ++ "panic", .linkage = .Weak });

View file

@ -436,3 +436,15 @@ test "increfC, static data" {
increfRcPtrC(ptr_to_refcount, 2);
try std.testing.expectEqual(mock_rc, REFCOUNT_MAX_ISIZE);
}
// This returns a compilation dependent pseudo random seed for dictionaries.
// The seed is the address of this function.
// This avoids all roc Dicts using a known seed and being trivial to DOS.
// Still not as secure as true random, but a lot better.
// This value must not change between calls unless Dict is changed to store the seed on creation.
// Note: On esstentially all OSes, this will be affected by ASLR and different each run.
// In wasm, the value will be constant to the build as a whole.
// Either way, it can not be know by an attacker unless they get access to the executable.
pub fn dictPseudoSeed() callconv(.C) u64 {
return @intCast(u64, @ptrToInt(dictPseudoSeed));
}

View file

@ -286,7 +286,7 @@ walkUntil = \@Dict { data }, initialState, transform ->
get : Dict k v, k -> Result v [KeyNotFound] | k has Hash & Eq
get = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -314,7 +314,7 @@ get = \@Dict { metadata, dataIndices, data }, key ->
contains : Dict k v, k -> Bool | k has Hash & Eq
contains = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -339,7 +339,7 @@ contains = \@Dict { metadata, dataIndices, data }, key ->
insert : Dict k v, k, v -> Dict k v | k has Hash & Eq
insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -386,7 +386,7 @@ remove : Dict k v, k -> Dict k v | k has Hash & Eq
remove = \@Dict { metadata, dataIndices, data, size }, key ->
# TODO: change this from swap remove to tombstone and test is performance is still good.
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -569,7 +569,7 @@ swapAndUpdateDataIndex : Dict k v, Nat, Nat -> Dict k v | k has Hash & Eq
swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIndex, lastIndex ->
(T key _) = listGetUnsafe data lastIndex
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -711,7 +711,7 @@ rehashHelper = \dict, oldMetadata, oldDataIndices, oldData, index ->
insertForRehash : Dict k v, k, Nat -> Dict k v | k has Hash & Eq
insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -993,8 +993,16 @@ LowLevelHasher := { originalSeed : U64, state : U64 } has [
# TODO hide behind an InternalList.roc module
listGetUnsafe : List a, Nat -> a
createLowLevelHasher : { seed ? U64 } -> LowLevelHasher
createLowLevelHasher = \{ seed ? 0x526F_6352_616E_643F } ->
# Returns a application specific pseudo random seed for Dict.
# This avoids trivial DOS attacks.
pseudoSeed : {} -> U64
createLowLevelHasher : [PseudoRandSeed, WithSeed U64] -> LowLevelHasher
createLowLevelHasher = \seedOpt ->
seed =
when seedOpt is
PseudoRandSeed -> pseudoSeed {}
WithSeed s -> s
@LowLevelHasher { originalSeed: seed, state: seed }
combineState : LowLevelHasher, { a : U64, b : U64, seed : U64, length : U64 } -> LowLevelHasher
@ -1188,12 +1196,14 @@ wyr3 = \list, index, k ->
Num.bitwiseOr a p3
testSeed = WithSeed 0x526F_6352_616E_643F
# TODO: would be great to have table driven expects for this.
# Would also be great to have some sort of property based hasher
# where we can compare `addU*` functions to the `addBytes` function.
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes []
|> complete
@ -1201,7 +1211,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x42]
|> complete
@ -1209,7 +1219,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU8 0x42
|> complete
@ -1217,7 +1227,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0xFF, 0xFF]
|> complete
@ -1225,7 +1235,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU16 0xFFFF
|> complete
@ -1233,7 +1243,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x36, 0xA7]
|> complete
@ -1241,7 +1251,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU16 0xA736
|> complete
@ -1249,7 +1259,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x00, 0x00, 0x00, 0x00]
|> complete
@ -1257,7 +1267,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU32 0x0000_0000
|> complete
@ -1265,7 +1275,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0xA9, 0x2F, 0xEE, 0x21]
|> complete
@ -1273,7 +1283,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU32 0x21EE_2FA9
|> complete
@ -1281,7 +1291,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x5D, 0x66, 0xB1, 0x8F, 0x68, 0x44, 0xC7, 0x03, 0xE1, 0xDD, 0x23, 0x34, 0xBB, 0x9A, 0x42, 0xA7]
|> complete
@ -1289,7 +1299,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU128 0xA742_9ABB_3423_DDE1_03C7_4468_8FB1_665D
|> complete
@ -1297,7 +1307,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "abcdefghijklmnopqrstuvwxyz"
|> complete
@ -1305,7 +1315,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
|> complete
@ -1313,7 +1323,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "1234567890123456789012345678901234567890123456789012345678901234567890"
|> complete
@ -1321,7 +1331,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes (List.repeat 0x77 100)
|> complete
@ -1331,7 +1341,7 @@ expect
# Apparently it won't pick the default integer.
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [8u8, 82u8, 3u8, 8u8, 24u8] List.walk
|> complete
@ -1339,12 +1349,12 @@ expect
expect
hash1 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered ([0u8, 1u8, 2u8, 3u8, 4u8]) List.walk
|> complete
hash2 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8] List.walk
|> complete
@ -1352,12 +1362,12 @@ expect
expect
hash1 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [0u8, 1u8, 2u8, 3u8, 4u8] List.walk
|> complete
hash2 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8, 0u8] List.walk
|> complete

View file

@ -390,6 +390,7 @@ pub const UTILS_INCREF_DATA_PTR: &str = "roc_builtins.utils.incref_data_ptr";
pub const UTILS_DECREF_DATA_PTR: &str = "roc_builtins.utils.decref_data_ptr";
pub const UTILS_IS_UNIQUE: &str = "roc_builtins.utils.is_unique";
pub const UTILS_DECREF_CHECK_NULL: &str = "roc_builtins.utils.decref_check_null";
pub const UTILS_DICT_PSEUDO_SEED: &str = "roc_builtins.utils.dict_pseudo_seed";
pub const UTILS_EXPECT_FAILED_START_SHARED_BUFFER: &str =
"roc_builtins.utils.expect_failed_start_shared_buffer";

View file

@ -217,6 +217,7 @@ map_symbol_to_lowlevel_and_arity! {
BoxExpr; BOX_BOX_FUNCTION; 1,
UnboxExpr; BOX_UNBOX; 1,
Unreachable; LIST_UNREACHABLE; 1,
DictPseudoSeed; DICT_PSEUDO_SEED; 1,
}
/// Some builtins cannot be constructed in code gen alone, and need to be defined

View file

@ -1548,6 +1548,13 @@ trait Backend<'a> {
arg_layouts,
ret_layout,
),
LowLevel::DictPseudoSeed => self.build_fn_call(
sym,
bitcode::UTILS_DICT_PSEUDO_SEED.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::NumToStr => {
let arg_layout = arg_layouts[0];
let intrinsic = match self.interner().get(arg_layout).repr {

View file

@ -1310,6 +1310,11 @@ pub(crate) fn run_low_level<'a, 'ctx>(
ptr.into()
}
},
DictPseudoSeed => {
// Dict.pseudoSeed : {} -> u64
call_bitcode_fn(env, &[], bitcode::UTILS_DICT_PSEUDO_SEED)
}
}
}

View file

@ -1982,6 +1982,7 @@ impl<'a> LowLevelCall<'a> {
},
StoredValue::StackMemory { .. } => { /* do nothing */ }
},
DictPseudoSeed => self.load_args_and_call_zig(backend, bitcode::UTILS_DICT_PSEUDO_SEED),
}
}

View file

@ -126,6 +126,7 @@ pub enum LowLevel {
BoxExpr,
UnboxExpr,
Unreachable,
DictPseudoSeed,
}
macro_rules! higher_order {
@ -345,4 +346,5 @@ map_symbol_to_lowlevel! {
Or <= BOOL_OR,
Not <= BOOL_NOT,
Unreachable <= LIST_UNREACHABLE,
DictPseudoSeed <= DICT_PSEUDO_SEED,
}

View file

@ -1463,6 +1463,7 @@ define_builtins! {
21 DICT_UPDATE: "update"
22 DICT_LIST_GET_UNSAFE: "listGetUnsafe"
23 DICT_PSEUDO_SEED: "pseudoSeed"
}
9 SET: "Set" => {
0 SET_SET: "Set" exposed_type=true // the Set.Set type alias

View file

@ -947,6 +947,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[Ownership] {
// - other refcounted arguments are Borrowed
match op {
Unreachable => arena.alloc_slice_copy(&[irrelevant]),
DictPseudoSeed => arena.alloc_slice_copy(&[irrelevant]),
ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes | StrGraphemes
| StrCountUtf8Bytes | StrGetCapacity | ListGetCapacity => {
arena.alloc_slice_copy(&[borrowed])

View file

@ -1,28 +1,28 @@
procedure Dict.1 (Dict.536):
let Dict.546 : List {[], []} = Array [];
let Dict.553 : U64 = 0i64;
let Dict.554 : U64 = 8i64;
let Dict.547 : List U64 = CallByName List.11 Dict.553 Dict.554;
let Dict.550 : I8 = CallByName Dict.36;
let Dict.551 : U64 = 8i64;
let Dict.548 : List I8 = CallByName List.11 Dict.550 Dict.551;
let Dict.549 : U64 = 0i64;
let Dict.545 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.546, Dict.547, Dict.548, Dict.549};
ret Dict.545;
procedure Dict.1 (Dict.547):
let Dict.556 : List {[], []} = Array [];
let Dict.563 : U64 = 0i64;
let Dict.564 : U64 = 8i64;
let Dict.557 : List U64 = CallByName List.11 Dict.563 Dict.564;
let Dict.560 : I8 = CallByName Dict.37;
let Dict.561 : U64 = 8i64;
let Dict.558 : List I8 = CallByName List.11 Dict.560 Dict.561;
let Dict.559 : U64 = 0i64;
let Dict.555 : {List {[], []}, List U64, List I8, U64} = Struct {Dict.556, Dict.557, Dict.558, Dict.559};
ret Dict.555;
procedure Dict.36 ():
let Dict.552 : I8 = -128i64;
ret Dict.552;
procedure Dict.37 ():
let Dict.562 : I8 = -128i64;
ret Dict.562;
procedure Dict.4 (Dict.543):
let Dict.97 : U64 = StructAtIndex 3 Dict.543;
let #Derived_gen.2 : List {[], []} = StructAtIndex 0 Dict.543;
procedure Dict.4 (Dict.553):
let Dict.99 : U64 = StructAtIndex 3 Dict.553;
let #Derived_gen.2 : List {[], []} = StructAtIndex 0 Dict.553;
dec #Derived_gen.2;
let #Derived_gen.1 : List U64 = StructAtIndex 1 Dict.543;
let #Derived_gen.1 : List U64 = StructAtIndex 1 Dict.553;
dec #Derived_gen.1;
let #Derived_gen.0 : List I8 = StructAtIndex 2 Dict.543;
let #Derived_gen.0 : List I8 = StructAtIndex 2 Dict.553;
dec #Derived_gen.0;
ret Dict.97;
ret Dict.99;
procedure List.11 (List.115, List.116):
let List.495 : List I8 = CallByName List.68 List.116;

View file

@ -3,22 +3,22 @@
app "test" provides [main] to "./platform"
f = \{} ->
#^{-1} <1606><116>{} -<119>[[f(1)]]-> <115>[Ok <1614>{}]<79>*
#^{-1} <1599><116>{} -<119>[[f(1)]]-> <115>[Ok <1607>{}]<79>*
when g {} is
# ^ <1596><1614>{} -<1604>[[g(2)]]-> <71>[Ok <1614>{}]<101>*
# ^ <1589><1607>{} -<1597>[[g(2)]]-> <71>[Ok <1607>{}]<101>*
_ -> Ok {}
g = \{} ->
#^{-1} <1596><1614>{} -<1604>[[g(2)]]-> <71>[Ok <1614>{}]<101>*
#^{-1} <1589><1607>{} -<1597>[[g(2)]]-> <71>[Ok <1607>{}]<101>*
when h {} is
# ^ <1601><1614>{} -<1609>[[h(3)]]-> <93>[Ok <1614>{}]<123>*
# ^ <1594><1607>{} -<1602>[[h(3)]]-> <93>[Ok <1607>{}]<123>*
_ -> Ok {}
h = \{} ->
#^{-1} <1601><1614>{} -<1609>[[h(3)]]-> <93>[Ok <1614>{}]<123>*
#^{-1} <1594><1607>{} -<1602>[[h(3)]]-> <93>[Ok <1607>{}]<123>*
when f {} is
# ^ <1606><116>{} -<119>[[f(1)]]-> <115>[Ok <1614>{}]<79>*
# ^ <1599><116>{} -<119>[[f(1)]]-> <115>[Ok <1607>{}]<79>*
_ -> Ok {}
main = f {}
# ^ <1616><132>{} -<135>[[f(1)]]-> <137>[Ok <1614>{}]<1615>w_a
# ^ <1609><132>{} -<135>[[f(1)]]-> <137>[Ok <1607>{}]<1608>w_a