Seed Dict and Set

This changes Dict and Set to have a compilation depedent seed.
The seed is not exposed to userland in anyway.
This gets a much more DOS resistant Dict and Set with no cost.
This commit is contained in:
Brendan Hansknecht 2023-05-25 09:39:52 -07:00
parent 8ecbd8c071
commit b4c359588e
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
11 changed files with 69 additions and 30 deletions

View file

@ -188,6 +188,7 @@ comptime {
exportUtilsFn(utils.isUnique, "is_unique");
exportUtilsFn(utils.decrefCheckNullC, "decref_check_null");
exportUtilsFn(utils.allocateWithRefcountC, "allocate_with_refcount");
exportUtilsFn(utils.dictPseudoSeed, "dict_pseudo_seed");
@export(panic_utils.panic, .{ .name = "roc_builtins.utils." ++ "panic", .linkage = .Weak });

View file

@ -436,3 +436,12 @@ test "increfC, static data" {
increfRcPtrC(ptr_to_refcount, 2);
try std.testing.expectEqual(mock_rc, REFCOUNT_MAX_ISIZE);
}
// This returns a compilation dependent pseudo random seed for dictionaries.
// The seed is the address of this function.
// This avoids all roc Dicts using a known seed and being trivial to DOS.
// Still not as secure as true random, but a lot better.
// This value must not change between calls unless Dict is changed to store the seed on creation.
pub fn dictPseudoSeed() callconv(.C) u64 {
return @intCast(u64, @ptrToInt(dictPseudoSeed));
}

View file

@ -286,7 +286,7 @@ walkUntil = \@Dict { data }, initialState, transform ->
get : Dict k v, k -> Result v [KeyNotFound] | k has Hash & Eq
get = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -314,7 +314,7 @@ get = \@Dict { metadata, dataIndices, data }, key ->
contains : Dict k v, k -> Bool | k has Hash & Eq
contains = \@Dict { metadata, dataIndices, data }, key ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -339,7 +339,7 @@ contains = \@Dict { metadata, dataIndices, data }, key ->
insert : Dict k v, k, v -> Dict k v | k has Hash & Eq
insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -386,7 +386,7 @@ remove : Dict k v, k -> Dict k v | k has Hash & Eq
remove = \@Dict { metadata, dataIndices, data, size }, key ->
# TODO: change this from swap remove to tombstone and test is performance is still good.
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -569,7 +569,7 @@ swapAndUpdateDataIndex : Dict k v, Nat, Nat -> Dict k v | k has Hash & Eq
swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIndex, lastIndex ->
(T key _) = listGetUnsafe data lastIndex
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -711,7 +711,7 @@ rehashHelper = \dict, oldMetadata, oldDataIndices, oldData, index ->
insertForRehash : Dict k v, k, Nat -> Dict k v | k has Hash & Eq
insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
hashKey =
createLowLevelHasher {}
createLowLevelHasher PseudoRandSeed
|> Hash.hash key
|> complete
h1Key = h1 hashKey
@ -993,8 +993,16 @@ LowLevelHasher := { originalSeed : U64, state : U64 } has [
# TODO hide behind an InternalList.roc module
listGetUnsafe : List a, Nat -> a
createLowLevelHasher : { seed ? U64 } -> LowLevelHasher
createLowLevelHasher = \{ seed ? 0x526F_6352_616E_643F } ->
# Returns a application specific pseudo random seed for Dict.
# This avoids trivial DOS attacks.
pseudoSeed : {} -> U64
createLowLevelHasher : [PseudoRandSeed, WithSeed U64] -> LowLevelHasher
createLowLevelHasher = \seedOpt ->
seed =
when seedOpt is
PseudoRandSeed -> pseudoSeed {}
WithSeed s -> s
@LowLevelHasher { originalSeed: seed, state: seed }
combineState : LowLevelHasher, { a : U64, b : U64, seed : U64, length : U64 } -> LowLevelHasher
@ -1188,12 +1196,14 @@ wyr3 = \list, index, k ->
Num.bitwiseOr a p3
testSeed = WithSeed 0x526F_6352_616E_643F
# TODO: would be great to have table driven expects for this.
# Would also be great to have some sort of property based hasher
# where we can compare `addU*` functions to the `addBytes` function.
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes []
|> complete
@ -1201,7 +1211,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x42]
|> complete
@ -1209,7 +1219,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU8 0x42
|> complete
@ -1217,7 +1227,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0xFF, 0xFF]
|> complete
@ -1225,7 +1235,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU16 0xFFFF
|> complete
@ -1233,7 +1243,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x36, 0xA7]
|> complete
@ -1241,7 +1251,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU16 0xA736
|> complete
@ -1249,7 +1259,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x00, 0x00, 0x00, 0x00]
|> complete
@ -1257,7 +1267,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU32 0x0000_0000
|> complete
@ -1265,7 +1275,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0xA9, 0x2F, 0xEE, 0x21]
|> complete
@ -1273,7 +1283,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU32 0x21EE_2FA9
|> complete
@ -1281,7 +1291,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes [0x5D, 0x66, 0xB1, 0x8F, 0x68, 0x44, 0xC7, 0x03, 0xE1, 0xDD, 0x23, 0x34, 0xBB, 0x9A, 0x42, 0xA7]
|> complete
@ -1289,7 +1299,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addU128 0xA742_9ABB_3423_DDE1_03C7_4468_8FB1_665D
|> complete
@ -1297,7 +1307,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "abcdefghijklmnopqrstuvwxyz"
|> complete
@ -1305,7 +1315,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
|> complete
@ -1313,7 +1323,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashStrBytes "1234567890123456789012345678901234567890123456789012345678901234567890"
|> complete
@ -1321,7 +1331,7 @@ expect
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> addBytes (List.repeat 0x77 100)
|> complete
@ -1331,7 +1341,7 @@ expect
# Apparently it won't pick the default integer.
expect
hash =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [8u8, 82u8, 3u8, 8u8, 24u8] List.walk
|> complete
@ -1339,12 +1349,12 @@ expect
expect
hash1 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered ([0u8, 1u8, 2u8, 3u8, 4u8]) List.walk
|> complete
hash2 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8] List.walk
|> complete
@ -1352,12 +1362,12 @@ expect
expect
hash1 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [0u8, 1u8, 2u8, 3u8, 4u8] List.walk
|> complete
hash2 =
createLowLevelHasher {}
createLowLevelHasher testSeed
|> Hash.hashUnordered [4u8, 3u8, 2u8, 1u8, 0u8, 0u8] List.walk
|> complete

View file

@ -390,6 +390,7 @@ pub const UTILS_INCREF_DATA_PTR: &str = "roc_builtins.utils.incref_data_ptr";
pub const UTILS_DECREF_DATA_PTR: &str = "roc_builtins.utils.decref_data_ptr";
pub const UTILS_IS_UNIQUE: &str = "roc_builtins.utils.is_unique";
pub const UTILS_DECREF_CHECK_NULL: &str = "roc_builtins.utils.decref_check_null";
pub const UTILS_DICT_PSEUDO_SEED: &str = "roc_builtins.utils.dict_pseudo_seed";
pub const UTILS_EXPECT_FAILED_START_SHARED_BUFFER: &str =
"roc_builtins.utils.expect_failed_start_shared_buffer";

View file

@ -217,6 +217,7 @@ map_symbol_to_lowlevel_and_arity! {
BoxExpr; BOX_BOX_FUNCTION; 1,
UnboxExpr; BOX_UNBOX; 1,
Unreachable; LIST_UNREACHABLE; 1,
DictPseudoSeed; DICT_PSEUDO_SEED; 1,
}
/// Some builtins cannot be constructed in code gen alone, and need to be defined

View file

@ -1548,6 +1548,13 @@ trait Backend<'a> {
arg_layouts,
ret_layout,
),
LowLevel::DictPseudoSeed => self.build_fn_call(
sym,
bitcode::UTILS_DICT_PSEUDO_SEED.to_string(),
args,
arg_layouts,
ret_layout,
),
LowLevel::NumToStr => {
let arg_layout = arg_layouts[0];
let intrinsic = match self.interner().get(arg_layout).repr {

View file

@ -1310,6 +1310,11 @@ pub(crate) fn run_low_level<'a, 'ctx>(
ptr.into()
}
},
DictPseudoSeed => {
// Dict.pseudoSeed : {} -> u64
call_bitcode_fn(env, &[], bitcode::UTILS_DICT_PSEUDO_SEED)
}
}
}

View file

@ -1982,6 +1982,7 @@ impl<'a> LowLevelCall<'a> {
},
StoredValue::StackMemory { .. } => { /* do nothing */ }
},
DictPseudoSeed => self.load_args_and_call_zig(backend, bitcode::UTILS_DICT_PSEUDO_SEED),
}
}

View file

@ -126,6 +126,7 @@ pub enum LowLevel {
BoxExpr,
UnboxExpr,
Unreachable,
DictPseudoSeed,
}
macro_rules! higher_order {
@ -345,4 +346,5 @@ map_symbol_to_lowlevel! {
Or <= BOOL_OR,
Not <= BOOL_NOT,
Unreachable <= LIST_UNREACHABLE,
DictPseudoSeed <= DICT_PSEUDO_SEED,
}

View file

@ -1463,6 +1463,7 @@ define_builtins! {
21 DICT_UPDATE: "update"
22 DICT_LIST_GET_UNSAFE: "listGetUnsafe"
23 DICT_PSEUDO_SEED: "pseudoSeed"
}
9 SET: "Set" => {
0 SET_SET: "Set" exposed_type=true // the Set.Set type alias

View file

@ -947,6 +947,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[Ownership] {
// - other refcounted arguments are Borrowed
match op {
Unreachable => arena.alloc_slice_copy(&[irrelevant]),
DictPseudoSeed => arena.alloc_slice_copy(&[irrelevant]),
ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes | StrGraphemes
| StrCountUtf8Bytes | StrGetCapacity | ListGetCapacity => {
arena.alloc_slice_copy(&[borrowed])