From 89bf22598ecbe3fac6ba62944b76630501ac28bd Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 14:41:08 +0100 Subject: [PATCH 01/26] call by pointer wrappers need to call by pointer for closures --- compiler/mono/src/ir.rs | 65 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 9e6f2ccd47..79628b27ac 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5751,8 +5751,13 @@ fn call_by_pointer<'a>( let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol); if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized { + // TODO we should be able to call by name in this wrapper for "normal" functions + // but closures, specifically top-level values that are closures (by unification) + // cause issues. The caller (which is here) doesn't know whether the called is a closure + // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name + // more in the future match layout { - Layout::FunctionPointer(arg_layouts, ret_layout) => { + Layout::FunctionPointer(arg_layouts, ret_layout) if false => { if arg_layouts.iter().any(|l| l.contains_refcounted()) { let name = env.unique_symbol(); let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); @@ -5766,6 +5771,7 @@ fn call_by_pointer<'a>( let args = args.into_bump_slice(); let call_symbol = env.unique_symbol(); + debug_assert_eq!(arg_layouts.len(), arg_symbols.len()); let call_type = CallType::ByName { name: symbol, full_layout: layout.clone(), @@ -5804,6 +5810,63 @@ fn call_by_pointer<'a>( Expr::FunctionPointer(symbol, layout) } } + Layout::FunctionPointer(arg_layouts, ret_layout) => { + if arg_layouts.iter().any(|l| l.contains_refcounted()) { + let name = env.unique_symbol(); + let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); + let mut arg_symbols = Vec::with_capacity_in(arg_layouts.len(), env.arena); + + for layout in arg_layouts { + let symbol = env.unique_symbol(); + args.push((layout.clone(), symbol)); + arg_symbols.push(symbol); + } + let args = args.into_bump_slice(); + + let call_symbol = env.unique_symbol(); + let fpointer_symbol = env.unique_symbol(); + debug_assert_eq!(arg_layouts.len(), arg_symbols.len()); + let call_type = CallType::ByPointer { + name: fpointer_symbol, + full_layout: layout.clone(), + ret_layout: ret_layout.clone(), + arg_layouts, + }; + let call = Call { + call_type, + arguments: arg_symbols.into_bump_slice(), + }; + let expr = Expr::Call(call); + + let mut body = Stmt::Ret(call_symbol); + + body = Stmt::Let(call_symbol, expr, ret_layout.clone(), env.arena.alloc(body)); + + let expr = Expr::FunctionPointer(symbol, layout.clone()); + body = Stmt::Let(fpointer_symbol, expr, layout.clone(), env.arena.alloc(body)); + + let closure_data_layout = None; + let proc = Proc { + name, + args, + body, + closure_data_layout, + ret_layout: ret_layout.clone(), + is_self_recursive: SelfRecursive::NotSelfRecursive, + must_own_arguments: true, + host_exposed_layouts: HostExposedLayouts::NotHostExposed, + }; + + procs + .specialized + .insert((name, layout.clone()), InProgressProc::Done(proc)); + Expr::FunctionPointer(name, layout) + } else { + // if none of the arguments is refcounted, then owning the arguments has no + // meaning + Expr::FunctionPointer(symbol, layout) + } + } _ => { // e.g. Num.maxInt or other constants Expr::FunctionPointer(symbol, layout) From ea76578e0694e68284a084c882509cd1d41e13ce Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 15:15:16 +0100 Subject: [PATCH 02/26] add Num.bitwiseOr and Num.shiftLeftBy --- compiler/builtins/src/std.rs | 18 ++++++++++++++++++ compiler/can/src/builtins.rs | 16 ++++++++++++++++ compiler/gen/src/llvm/build.rs | 26 +++++++++++++++++++++++++- compiler/module/src/low_level.rs | 2 ++ compiler/module/src/symbol.rs | 20 +++++++++++--------- compiler/mono/src/borrow.rs | 12 +++++++++++- 6 files changed, 83 insertions(+), 11 deletions(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 42eb034581..5156fadc0b 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -324,6 +324,24 @@ pub fn types() -> MutMap { ), ); + // bitwiseOr : Int a, Int a -> Int a + add_type( + Symbol::NUM_BITWISE_OR, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + + // shiftLeftBy : Nat, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_LEFT, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index fdff0a4e2c..f3e1ff4a32 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -151,6 +151,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_MIN_INT => num_min_int, NUM_BITWISE_AND => num_bitwise_and, NUM_BITWISE_XOR => num_bitwise_xor, + NUM_BITWISE_OR => num_bitwise_or, + NUM_SHIFT_LEFT=> num_shift_left_by, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -273,6 +275,10 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_ASIN => num_asin, Symbol::NUM_MAX_INT => num_max_int, Symbol::NUM_MIN_INT => num_min_int, + Symbol::NUM_BITWISE_AND => num_bitwise_and, + Symbol::NUM_BITWISE_XOR => num_bitwise_xor, + Symbol::NUM_BITWISE_OR => num_bitwise_or, + Symbol::NUM_SHIFT_LEFT=> num_shift_left_by, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1299,6 +1305,16 @@ fn num_bitwise_xor(symbol: Symbol, var_store: &mut VarStore) -> Def { num_binop(symbol, var_store, LowLevel::NumBitwiseXor) } +/// Num.bitwiseOr: Int, Int -> Int +fn num_bitwise_or(symbol: Symbol, var_store: &mut VarStore) -> Def { + num_binop(symbol, var_store, LowLevel::NumBitwiseOr) +} + +/// Num.shiftLeftBy: Nat, Int a -> Int a +fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 5e65bc2cee..e77ae31500 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3943,7 +3943,23 @@ fn run_low_level<'a, 'ctx, 'env>( build_num_binop(env, parent, lhs_arg, lhs_layout, rhs_arg, rhs_layout, op) } - NumBitwiseAnd | NumBitwiseXor => { + NumBitwiseAnd | NumBitwiseOr | NumBitwiseXor => { + debug_assert_eq!(args.len(), 2); + + let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); + let (rhs_arg, rhs_layout) = load_symbol_and_layout(scope, &args[1]); + + build_int_binop( + env, + parent, + lhs_arg.into_int_value(), + lhs_layout, + rhs_arg.into_int_value(), + rhs_layout, + op, + ) + } + NumShiftLeftBy => { debug_assert_eq!(args.len(), 2); let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); @@ -4585,6 +4601,14 @@ fn build_int_binop<'a, 'ctx, 'env>( NumPowInt => call_bitcode_fn(env, &[lhs.into(), rhs.into()], &bitcode::NUM_POW_INT), NumBitwiseAnd => bd.build_and(lhs, rhs, "int_bitwise_and").into(), NumBitwiseXor => bd.build_xor(lhs, rhs, "int_bitwise_xor").into(), + NumBitwiseOr => bd.build_or(lhs, rhs, "int_bitwise_or").into(), + NumShiftLeftBy => { + // NOTE arguments are flipped; + // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);` + // as `Num.shiftLeftBy 0 0b0000_0001 + bd.build_left_shift(rhs, lhs, "int_bitwise_or").into() + } + _ => { unreachable!("Unrecognized int binary operation: {:?}", op); } diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index e69fa0dd02..05a20c72c5 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -78,6 +78,8 @@ pub enum LowLevel { NumAsin, NumBitwiseAnd, NumBitwiseXor, + NumBitwiseOr, + NumShiftLeftBy, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 54700dd492..64717e405b 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -841,15 +841,17 @@ define_builtins! { 80 NUM_BINARY32: "Binary32" imported 81 NUM_BITWISE_AND: "bitwiseAnd" 82 NUM_BITWISE_XOR: "bitwiseXor" - 83 NUM_SUB_WRAP: "subWrap" - 84 NUM_SUB_CHECKED: "subChecked" - 85 NUM_MUL_WRAP: "mulWrap" - 86 NUM_MUL_CHECKED: "mulChecked" - 87 NUM_INT: "Int" imported - 88 NUM_FLOAT: "Float" imported - 89 NUM_AT_NATURAL: "@Natural" - 90 NUM_NATURAL: "Natural" imported - 91 NUM_NAT: "Nat" imported + 83 NUM_BITWISE_OR: "bitwiseOr" + 84 NUM_SHIFT_LEFT: "shiftLeftBy" + 85 NUM_SUB_WRAP: "subWrap" + 86 NUM_SUB_CHECKED: "subChecked" + 87 NUM_MUL_WRAP: "mulWrap" + 88 NUM_MUL_CHECKED: "mulChecked" + 89 NUM_INT: "Int" imported + 90 NUM_FLOAT: "Float" imported + 91 NUM_AT_NATURAL: "@Natural" + 92 NUM_NATURAL: "Natural" imported + 93 NUM_NAT: "Nat" imported } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index ba652c0c7b..2fa8893d4c 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -373,6 +373,14 @@ impl<'a> BorrowInfState<'a> { self.own_var(z); // if the function exects an owned argument (ps), the argument must be owned (args) + debug_assert_eq!( + arguments.len(), + ps.len(), + "{:?} has {} parameters, but was applied to {} arguments", + name, + ps.len(), + arguments.len() + ); self.own_args_using_params(arguments, ps); } None => { @@ -658,7 +666,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd - | NumBitwiseXor => arena.alloc_slice_copy(&[irrelevant, irrelevant]), + | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => { + arena.alloc_slice_copy(&[irrelevant, irrelevant]) + } NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => { From 128741e5856adebc368836c54b40a1eaf50979fd Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 16:01:01 +0100 Subject: [PATCH 03/26] add right shift operators --- compiler/builtins/src/std.rs | 20 +++++++++++++++++++- compiler/can/src/builtins.rs | 16 +++++++++++++++- compiler/gen/src/llvm/build.rs | 14 ++++++++++++-- compiler/module/src/low_level.rs | 2 ++ compiler/module/src/symbol.rs | 20 +++++++++++--------- compiler/mono/src/borrow.rs | 2 +- 6 files changed, 60 insertions(+), 14 deletions(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index e7e7eea3c1..084edc1cd4 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -333,7 +333,7 @@ pub fn types() -> MutMap { ), ); - // shiftLeftBy : Nat, Int a -> Int a + // shiftLeftBy : Int a, Int a -> Int a add_type( Symbol::NUM_SHIFT_LEFT, top_level_function( @@ -342,6 +342,24 @@ pub fn types() -> MutMap { ), ); + // shiftRightBy : Int a, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_RIGHT, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + + // shiftRightZfBy : Int a, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_RIGHT_ZERO_FILL, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index 6c8f7a9cd3..c8516d26ca 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -154,6 +154,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_BITWISE_XOR => num_bitwise_xor, NUM_BITWISE_OR => num_bitwise_or, NUM_SHIFT_LEFT=> num_shift_left_by, + NUM_SHIFT_RIGHT => num_shift_right_by, + NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -280,7 +282,9 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_BITWISE_AND => num_bitwise_and, Symbol::NUM_BITWISE_XOR => num_bitwise_xor, Symbol::NUM_BITWISE_OR => num_bitwise_or, - Symbol::NUM_SHIFT_LEFT=> num_shift_left_by, + Symbol::NUM_SHIFT_LEFT => num_shift_left_by, + Symbol::NUM_SHIFT_RIGHT => num_shift_right_by, + Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1317,6 +1321,16 @@ fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store) } +/// Num.shiftRightBy: Nat, Int a -> Int a +fn num_shift_right_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftRightBy, var_store) +} + +/// Num.shiftRightZfBy: Nat, Int a -> Int a +fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 250c903014..ed149b77a4 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3967,7 +3967,7 @@ fn run_low_level<'a, 'ctx, 'env>( op, ) } - NumShiftLeftBy => { + NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => { debug_assert_eq!(args.len(), 2); let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); @@ -4614,7 +4614,17 @@ fn build_int_binop<'a, 'ctx, 'env>( // NOTE arguments are flipped; // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);` // as `Num.shiftLeftBy 0 0b0000_0001 - bd.build_left_shift(rhs, lhs, "int_bitwise_or").into() + bd.build_left_shift(rhs, lhs, "int_shift_left").into() + } + NumShiftRightBy => { + // NOTE arguments are flipped; + bd.build_right_shift(rhs, lhs, false, "int_shift_right") + .into() + } + NumShiftRightZfBy => { + // NOTE arguments are flipped; + bd.build_right_shift(rhs, lhs, true, "int_shift_right_zf") + .into() } _ => { diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 9740c80d80..7279a4ae76 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -81,6 +81,8 @@ pub enum LowLevel { NumBitwiseXor, NumBitwiseOr, NumShiftLeftBy, + NumShiftRightBy, + NumShiftRightZfBy, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 23fa1837f0..a7d19116dc 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -843,15 +843,17 @@ define_builtins! { 82 NUM_BITWISE_XOR: "bitwiseXor" 83 NUM_BITWISE_OR: "bitwiseOr" 84 NUM_SHIFT_LEFT: "shiftLeftBy" - 85 NUM_SUB_WRAP: "subWrap" - 86 NUM_SUB_CHECKED: "subChecked" - 87 NUM_MUL_WRAP: "mulWrap" - 88 NUM_MUL_CHECKED: "mulChecked" - 89 NUM_INT: "Int" imported - 90 NUM_FLOAT: "Float" imported - 91 NUM_AT_NATURAL: "@Natural" - 92 NUM_NATURAL: "Natural" imported - 93 NUM_NAT: "Nat" imported + 85 NUM_SHIFT_RIGHT: "shiftRightBy" + 86 NUM_SHIFT_RIGHT_ZERO_FILL: "shiftRightZfBy" + 87 NUM_SUB_WRAP: "subWrap" + 88 NUM_SUB_CHECKED: "subChecked" + 89 NUM_MUL_WRAP: "mulWrap" + 90 NUM_MUL_CHECKED: "mulChecked" + 91 NUM_INT: "Int" imported + 92 NUM_FLOAT: "Float" imported + 93 NUM_AT_NATURAL: "@Natural" + 94 NUM_NATURAL: "Natural" imported + 95 NUM_NAT: "Nat" imported } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index a06f9f7e37..294e2582c0 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -666,7 +666,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd - | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => { + | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => { arena.alloc_slice_copy(&[irrelevant, irrelevant]) } From 43e71f2ee933e25f5a1f45243af8be6c8290724c Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 16:07:35 +0100 Subject: [PATCH 04/26] int cast --- compiler/builtins/src/std.rs | 6 ++++++ compiler/can/src/builtins.rs | 7 +++++++ compiler/gen/src/llvm/build.rs | 10 ++++++++++ compiler/module/src/low_level.rs | 1 + compiler/module/src/symbol.rs | 2 ++ compiler/mono/src/borrow.rs | 2 +- 6 files changed, 27 insertions(+), 1 deletion(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 084edc1cd4..5a3b499dd7 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -360,6 +360,12 @@ pub fn types() -> MutMap { ), ); + // intCast : Int a -> Int b + add_type( + Symbol::NUM_INT_CAST, + top_level_function(vec![int_type(flex(TVAR1))], Box::new(int_type(flex(TVAR2)))), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index c8516d26ca..af38527fcc 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -156,6 +156,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_SHIFT_LEFT=> num_shift_left_by, NUM_SHIFT_RIGHT => num_shift_right_by, NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, + NUM_INT_CAST=> num_int_cast, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -285,6 +286,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_SHIFT_LEFT => num_shift_left_by, Symbol::NUM_SHIFT_RIGHT => num_shift_right_by, Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, + Symbol::NUM_INT_CAST=> num_int_cast, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1331,6 +1333,11 @@ fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store) } +/// Num.intCast: Int a -> Int b +fn num_int_cast(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_1(symbol, LowLevel::NumIntCast, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index ed149b77a4..9ba362841f 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3983,6 +3983,16 @@ fn run_low_level<'a, 'ctx, 'env>( op, ) } + NumIntCast => { + debug_assert_eq!(args.len(), 1); + + let arg = load_symbol(scope, &args[0]).into_int_value(); + + let to = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes) + .into_int_type(); + + env.builder.build_int_cast(arg, to, "inc_cast").into() + } Eq => { debug_assert_eq!(args.len(), 2); diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 7279a4ae76..640b8c8bca 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -83,6 +83,7 @@ pub enum LowLevel { NumShiftLeftBy, NumShiftRightBy, NumShiftRightZfBy, + NumIntCast, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index a7d19116dc..62f5a9d457 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -854,6 +854,8 @@ define_builtins! { 93 NUM_AT_NATURAL: "@Natural" 94 NUM_NATURAL: "Natural" imported 95 NUM_NAT: "Nat" imported + 96 NUM_INT_CAST: "intCast" + } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index 294e2582c0..c0d4f1e091 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -671,7 +671,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { } NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor - | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => { + | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin | NumIntCast => { arena.alloc_slice_copy(&[irrelevant]) } StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]), From bcbef5d3aac61fea000a9c9f8425fc630e638b6e Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:10:09 +0100 Subject: [PATCH 05/26] generalize add and sub intrinsics to all integer types --- compiler/gen/src/llvm/build.rs | 103 ++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 9ba362841f..31fc0ce6c3 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -296,8 +296,10 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) { let void_type = ctx.void_type(); let i1_type = ctx.bool_type(); let f64_type = ctx.f64_type(); + let i128_type = ctx.i128_type(); let i64_type = ctx.i64_type(); let i32_type = ctx.i32_type(); + let i16_type = ctx.i16_type(); let i8_type = ctx.i8_type(); let i8_ptr_type = i8_type.ptr_type(AddressSpace::Generic); @@ -377,18 +379,72 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) { f64_type.fn_type(&[f64_type.into()], false), ); + // add with overflow + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I8, { + let fields = [i8_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i8_type.into(), i8_type.into()], false) + }); + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I16, { + let fields = [i16_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i16_type.into(), i16_type.into()], false) + }); + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I32, { + let fields = [i32_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i32_type.into(), i32_type.into()], false) + }); + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) .fn_type(&[i64_type.into(), i64_type.into()], false) }); + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I128, { + let fields = [i128_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i128_type.into(), i128_type.into()], false) + }); + + // sub with overflow + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I8, { + let fields = [i8_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i8_type.into(), i8_type.into()], false) + }); + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I16, { + let fields = [i16_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i16_type.into(), i16_type.into()], false) + }); + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I32, { + let fields = [i32_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i32_type.into(), i32_type.into()], false) + }); + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) .fn_type(&[i64_type.into(), i64_type.into()], false) }); + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I128, { + let fields = [i128_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i128_type.into(), i128_type.into()], false) + }); + + // mul with overflow + add_intrinsic(module, LLVM_SMUL_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) @@ -406,8 +462,19 @@ static LLVM_COS_F64: &str = "llvm.cos.f64"; static LLVM_POW_F64: &str = "llvm.pow.f64"; static LLVM_CEILING_F64: &str = "llvm.ceil.f64"; static LLVM_FLOOR_F64: &str = "llvm.floor.f64"; + +pub static LLVM_SADD_WITH_OVERFLOW_I8: &str = "llvm.sadd.with.overflow.i8"; +pub static LLVM_SADD_WITH_OVERFLOW_I16: &str = "llvm.sadd.with.overflow.i16"; +pub static LLVM_SADD_WITH_OVERFLOW_I32: &str = "llvm.sadd.with.overflow.i32"; pub static LLVM_SADD_WITH_OVERFLOW_I64: &str = "llvm.sadd.with.overflow.i64"; +pub static LLVM_SADD_WITH_OVERFLOW_I128: &str = "llvm.sadd.with.overflow.i128"; + +pub static LLVM_SSUB_WITH_OVERFLOW_I8: &str = "llvm.ssub.with.overflow.i8"; +pub static LLVM_SSUB_WITH_OVERFLOW_I16: &str = "llvm.ssub.with.overflow.i16"; +pub static LLVM_SSUB_WITH_OVERFLOW_I32: &str = "llvm.ssub.with.overflow.i32"; pub static LLVM_SSUB_WITH_OVERFLOW_I64: &str = "llvm.ssub.with.overflow.i64"; +pub static LLVM_SSUB_WITH_OVERFLOW_I128: &str = "llvm.ssub.with.overflow.i128"; + pub static LLVM_SMUL_WITH_OVERFLOW_I64: &str = "llvm.smul.with.overflow.i64"; fn add_intrinsic<'ctx>( @@ -4506,7 +4573,7 @@ fn build_int_binop<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, parent: FunctionValue<'ctx>, lhs: IntValue<'ctx>, - _lhs_layout: &Layout<'a>, + lhs_layout: &Layout<'a>, rhs: IntValue<'ctx>, _rhs_layout: &Layout<'a>, op: LowLevel, @@ -4519,8 +4586,23 @@ fn build_int_binop<'a, 'ctx, 'env>( match op { NumAdd => { let context = env.context; + + let intrinsic = match lhs_layout { + Layout::Builtin(Builtin::Int8) => LLVM_SADD_WITH_OVERFLOW_I8, + Layout::Builtin(Builtin::Int16) => LLVM_SADD_WITH_OVERFLOW_I16, + Layout::Builtin(Builtin::Int32) => LLVM_SADD_WITH_OVERFLOW_I32, + Layout::Builtin(Builtin::Int64) => LLVM_SADD_WITH_OVERFLOW_I64, + Layout::Builtin(Builtin::Int128) => LLVM_SADD_WITH_OVERFLOW_I128, + Layout::Builtin(Builtin::Usize) => match env.ptr_bytes { + 4 => LLVM_SADD_WITH_OVERFLOW_I32, + 8 => LLVM_SADD_WITH_OVERFLOW_I64, + other => panic!("invalid ptr_bytes {}", other), + }, + _ => unreachable!(), + }; + let result = env - .call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]) + .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()]) .into_struct_value(); let add_result = bd.build_extract_value(result, 0, "add_result").unwrap(); @@ -4550,8 +4632,23 @@ fn build_int_binop<'a, 'ctx, 'env>( NumAddChecked => env.call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]), NumSub => { let context = env.context; + + let intrinsic = match lhs_layout { + Layout::Builtin(Builtin::Int8) => LLVM_SSUB_WITH_OVERFLOW_I8, + Layout::Builtin(Builtin::Int16) => LLVM_SSUB_WITH_OVERFLOW_I16, + Layout::Builtin(Builtin::Int32) => LLVM_SSUB_WITH_OVERFLOW_I32, + Layout::Builtin(Builtin::Int64) => LLVM_SSUB_WITH_OVERFLOW_I64, + Layout::Builtin(Builtin::Int128) => LLVM_SSUB_WITH_OVERFLOW_I128, + Layout::Builtin(Builtin::Usize) => match env.ptr_bytes { + 4 => LLVM_SSUB_WITH_OVERFLOW_I32, + 8 => LLVM_SSUB_WITH_OVERFLOW_I64, + other => panic!("invalid ptr_bytes {}", other), + }, + _ => unreachable!("invalid layout {:?}", lhs_layout), + }; + let result = env - .call_intrinsic(LLVM_SSUB_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]) + .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()]) .into_struct_value(); let sub_result = bd.build_extract_value(result, 0, "sub_result").unwrap(); From 9116e9e8c96cfdeadd7c9cd7b78096c75d5b5b10 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:52:32 +0100 Subject: [PATCH 06/26] add Str.toBytes --- compiler/builtins/bitcode/src/main.zig | 1 + compiler/builtins/bitcode/src/str.zig | 21 +++++++++++++++++++++ compiler/builtins/src/bitcode.rs | 1 + compiler/builtins/src/std.rs | 6 ++++++ compiler/can/src/builtins.rs | 7 +++++++ compiler/gen/src/llvm/build.rs | 14 ++++++++++++-- compiler/gen/src/llvm/build_str.rs | 22 ++++++++++++++++++++++ compiler/module/src/low_level.rs | 1 + compiler/module/src/symbol.rs | 1 + compiler/mono/src/borrow.rs | 1 + 10 files changed, 73 insertions(+), 2 deletions(-) diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index 7df2061ed9..e124afe62c 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -68,6 +68,7 @@ comptime { exportStrFn(str.strFromFloatC, "from_float"); exportStrFn(str.strEqual, "equal"); exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes"); + exportStrFn(str.strToBytesC, "to_bytes"); } // Export helpers - Must be run inside a comptime diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 32057d35d0..a752a37f0e 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1,4 +1,5 @@ const utils = @import("utils.zig"); +const RocList = @import("list.zig").RocList; const std = @import("std"); const mem = std.mem; const always_inline = std.builtin.CallOptions.Modifier.always_inline; @@ -961,6 +962,26 @@ test "RocStr.joinWith: result is big" { expect(roc_result.eq(result)); } +// Str.toBytes +pub fn strToBytesC(arg: RocStr) callconv(.C) RocList { + return @call(.{ .modifier = always_inline }, strToBytes, .{ std.heap.c_allocator, arg }); +} + +fn strToBytes(allocator: *Allocator, arg: RocStr) RocList { + if (arg.isEmpty()) { + return RocList.empty(); + } else if (arg.isSmallStr()) { + const length = arg.len(); + const ptr = utils.allocateWithRefcount(allocator, @alignOf(usize), length); + + @memcpy(ptr, arg.asU8ptr(), length); + + return RocList{ .length = length, .bytes = ptr }; + } else { + return RocList{ .length = arg.len(), .bytes = arg.str_bytes }; + } +} + pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool { const bytes: []u8 = ptr[0..len]; return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes}); diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index 134bce68ad..b19117e106 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -42,6 +42,7 @@ pub const STR_FROM_INT: &str = "roc_builtins.str.from_int"; pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes"; +pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 5a3b499dd7..5de65ca3dd 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -623,6 +623,12 @@ pub fn types() -> MutMap { ), ); + // toBytes : Str -> List U8 + add_type( + Symbol::STR_TO_BYTES, + top_level_function(vec![str_type()], Box::new(list_type(u8_type()))), + ); + // fromFloat : Float a -> Str add_type( Symbol::STR_FROM_FLOAT, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index af38527fcc..7db64b296f 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -62,6 +62,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_COUNT_GRAPHEMES => str_count_graphemes, STR_FROM_INT => str_from_int, STR_FROM_UTF8 => str_from_utf8, + STR_TO_BYTES => str_to_bytes, STR_FROM_FLOAT=> str_from_float, LIST_LEN => list_len, LIST_GET => list_get, @@ -196,6 +197,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes, Symbol::STR_FROM_INT => str_from_int, Symbol::STR_FROM_UTF8 => str_from_utf8, + Symbol::STR_TO_BYTES => str_to_bytes, Symbol::STR_FROM_FLOAT=> str_from_float, Symbol::LIST_LEN => list_len, Symbol::LIST_GET => list_get, @@ -1655,6 +1657,11 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { ) } +/// Str.toBytes : Str -> List U8 +fn str_to_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_1(symbol, LowLevel::StrToBytes, var_store) +} + /// Str.fromFloat : Float * -> Str fn str_from_float(symbol: Symbol, var_store: &mut VarStore) -> Def { let float_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 31fc0ce6c3..506dec1253 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -12,7 +12,7 @@ use crate::llvm::build_list::{ }; use crate::llvm::build_str::{ str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_from_utf8, - str_join_with, str_number_of_bytes, str_split, str_starts_with, CHAR_LAYOUT, + str_join_with, str_number_of_bytes, str_split, str_starts_with, str_to_bytes, CHAR_LAYOUT, }; use crate::llvm::compare::{generic_eq, generic_neq}; use crate::llvm::convert::{ @@ -3611,13 +3611,23 @@ fn run_low_level<'a, 'ctx, 'env>( str_from_float(env, scope, args[0]) } StrFromUtf8 => { - // Str.fromInt : Int -> Str + // Str.fromUtf8 : List U8 -> Result Str Utf8Problem debug_assert_eq!(args.len(), 1); let original_wrapper = load_symbol(scope, &args[0]).into_struct_value(); str_from_utf8(env, parent, original_wrapper) } + StrToBytes => { + // Str.fromInt : Str -> List U8 + debug_assert_eq!(args.len(), 1); + + // this is an identity conversion + // we just implement it here to subvert the type system + let string = load_symbol(scope, &args[0]); + + str_to_bytes(env, string.into_struct_value()) + } StrSplit => { // Str.split : Str, Str -> List Str debug_assert_eq!(args.len(), 2); diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs index be27698163..301b726fb3 100644 --- a/compiler/gen/src/llvm/build_str.rs +++ b/compiler/gen/src/llvm/build_str.rs @@ -275,6 +275,28 @@ pub fn str_from_int<'a, 'ctx, 'env>( zig_str_to_struct(env, zig_result).into() } +/// Str.toBytes : Str -> List U8 +pub fn str_to_bytes<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + original_wrapper: StructValue<'ctx>, +) -> BasicValueEnum<'ctx> { + let string = complex_bitcast( + env.builder, + original_wrapper.into(), + env.context.i128_type().into(), + "to_bytes", + ); + + let zig_result = call_bitcode_fn(env, &[string], &bitcode::STR_TO_BYTES); + + complex_bitcast( + env.builder, + zig_result, + collection(env.context, env.ptr_bytes).into(), + "to_bytes", + ) +} + /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 } pub fn str_from_utf8<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 640b8c8bca..07422cd4d0 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -12,6 +12,7 @@ pub enum LowLevel { StrCountGraphemes, StrFromInt, StrFromUtf8, + StrToBytes, StrFromFloat, ListLen, ListGetUnsafe, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 62f5a9d457..2497e9cfd3 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -882,6 +882,7 @@ define_builtins! { 12 STR_FROM_UTF8: "fromUtf8" 13 STR_UT8_PROBLEM: "Utf8Problem" // the Utf8Problem type alias 14 STR_UT8_BYTE_PROBLEM: "Utf8ByteProblem" // the Utf8ByteProblem type alias + 15 STR_TO_BYTES: "toBytes" } 4 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index c0d4f1e091..c87f0b4e05 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -676,6 +676,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { } StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]), StrFromUtf8 => arena.alloc_slice_copy(&[owned]), + StrToBytes => arena.alloc_slice_copy(&[owned]), StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]), Hash => arena.alloc_slice_copy(&[borrowed, irrelevant]), DictSize => arena.alloc_slice_copy(&[borrowed]), From e218279f42e473689222200bc1ca67649d757391 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:54:32 +0100 Subject: [PATCH 07/26] add example files --- examples/benchmarks/Base64.roc | 164 ++++++++++++++++++++++++++++ examples/benchmarks/BytesDecode.roc | 106 ++++++++++++++++++ 2 files changed, 270 insertions(+) create mode 100644 examples/benchmarks/Base64.roc create mode 100644 examples/benchmarks/BytesDecode.roc diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc new file mode 100644 index 0000000000..e9dc460cb3 --- /dev/null +++ b/examples/benchmarks/Base64.roc @@ -0,0 +1,164 @@ +app "base64" + packages { base: "platform" } + imports [base.Task, BytesDecode.{Decoder} ] + provides [ main ] to base + +IO a : Task.Task a [] + +Decoder a : BytesDecode.Decoder a + +main : IO {} +main = + # when fromBytes [ 0 ] is + when fromBytes (Str.toBytes "Hello World") is + Ok str -> + Task.putLine str + + Err _ -> + Task.putLine "sadness" + + + + + +# ------ + + +fromBytes : List U8 -> Result Str BytesDecode.DecodeError +fromBytes = \bytes -> + BytesDecode.decode bytes (decodeBase64 (List.len bytes)) + + +decodeBase64 : Nat -> BytesDecode.Decoder Str +decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string: "" } + +loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str) +loopHelp = \{ remaining, string } -> + if remaining >= 3 then + helper = \x, y, z -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + c : U32 + c = Num.intCast z + combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c + Loop + { + remaining: remaining - 3, + string: Str.concat string (bitsToChars combined 0) + } + + BytesDecode.map3 helper + BytesDecode.u8 + BytesDecode.u8 + BytesDecode.u8 + + else if remaining == 0 then + BytesDecode.succeed (Done string) + + else if remaining == 2 then + helperX = \x, y -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) + Done (Str.concat string (bitsToChars combined 1)) + + BytesDecode.map2 helperX + BytesDecode.u8 + BytesDecode.u8 + else + # remaining = 1 + BytesDecode.u8 + |> BytesDecode.map (\x -> + a : U32 + a = Num.intCast x + Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) + + +bitsToChars : U32, Int * -> Str +bitsToChars = \bits, missing -> + when Str.fromUtf8 (bitsToCharsHelp bits missing) is + Ok str -> str + Err _ -> "" + +# Mask that can be used to get the lowest 6 bits of a binary number +lowest6BitsMask : Int * +lowest6BitsMask = 63 + + +bitsToCharsHelp : U32, Int * -> List U8 +bitsToCharsHelp = \bits, missing -> + # Performance Notes + # `String.cons` proved to be the fastest way of combining characters into a string + # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940 + # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by + # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` + # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. + + # any 6-bit number is a valid base64 digit, so this is actually safe + p = + Num.shiftRightZfBy 18 bits + |> Num.intCast + |> unsafeToChar + + q = + Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + r = + Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + s = + Num.bitwiseAnd bits lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + equals : U8 + equals = 61 + + when missing is + 0 -> + [ p, q, r, s ] + 1 -> + [ p, q, r, equals ] + 2 -> + [ p, q, equals , equals ] + _ -> + # unreachable + [] + +# Base64 index to character/digit +unsafeToChar : U8 -> U8 +unsafeToChar = \n -> + if n <= 25 then + # uppercase characters + 65 + n + + else if n <= 51 then + # lowercase characters + 97 + (n - 26) + + else if n <= 61 then + # digit characters + 48 + (n - 52) + + else + # special cases + when n is + 62 -> + # '+' + 43 + + 63 -> + # '/' + 47 + + _ -> + # anything else is invalid '\u{0000}' + 0 diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/BytesDecode.roc new file mode 100644 index 0000000000..03a341c4ff --- /dev/null +++ b/examples/benchmarks/BytesDecode.roc @@ -0,0 +1,106 @@ +interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] + +State : { bytes: List U8, cursor : Nat } + +DecodeError : [ OutOfBytes ] + + +Decoder a : [ @Decoder (State -> [Good State a, Bad DecodeError]) ] + +decode : List U8, Decoder a -> Result a DecodeError +decode = \bytes, @Decoder decoder -> + when decoder { bytes, cursor: 0 } is + Good _ value -> + Ok value + + Bad e -> + Err e + +succeed : a -> Decoder a +succeed = \value -> @Decoder \state -> Good state value + +map : Decoder a, (a -> b) -> Decoder b +map = \@Decoder decoder, transform -> + @Decoder \state -> + when decoder state is + Good state1 value -> + Good state1 (transform value) + + Bad e -> + Bad e + + +map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c +map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> + @Decoder \state1 -> + when decoder1 state1 is + Good state2 a -> + when decoder2 state2 is + Good state3 b -> + Good state3 (transform a b) + + Bad e -> + Bad e + + Bad e -> + Bad e + +map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d +map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> + @Decoder \state1 -> + when decoder1 state1 is + Good state2 a -> + when decoder2 state2 is + Good state3 b -> + when decoder3 state3 is + Good state4 c -> + Good state4 (transform a b c) + + Bad e -> + Bad e + + Bad e -> + Bad e + + Bad e -> + Bad e + +after : Decoder a, (a -> Decoder b) -> Decoder b +after = \@Decoder decoder, transform -> + @Decoder \state -> + when decoder state is + Good state1 value -> + (@Decoder decoder1) = transform value + decoder1 state1 + + + Bad e -> + Bad e + +u8 : Decoder U8 +u8 = @Decoder \state -> + when List.get state.bytes state.cursor is + Ok b -> + Good { state & cursor: state.cursor + 1 } b + + Err _ -> + Bad OutOfBytes + +Step state b : [ Loop state, Done b ] + +loop : (state -> Decoder (Step state a)), state -> Decoder a +loop = \stepper, initial -> + @Decoder \state -> + loopHelp stepper initial state + +loopHelp = \stepper, accum, state -> + (@Decoder stepper1) = stepper accum + when stepper1 state is + Good newState (Done value) -> + Good newState value + + Good newState (Loop newAccum) -> + loopHelp stepper newAccum newState + + Bad e -> + Bad e From c4ddeefed96e741b6208d38ad47d5ee78253e515 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:45:17 +0100 Subject: [PATCH 08/26] add test of integer type inference let polymorphism --- compiler/solve/tests/solve_expr.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index 229ae772a7..ba5d0519c3 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -4315,4 +4315,26 @@ mod solve_expr { "Str", ); } + + #[test] + fn int_type_let_polymorphism() { + infer_eq_without_problem( + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + x = 4 + + f : U8 -> U32 + f = \z -> Num.intCast z + + y = f x + + main = + x + "# + ), + "Num *", + ); + } } From ad96d1ae24202c8cbe7ce170b051b9cd78c692e9 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:45:26 +0100 Subject: [PATCH 09/26] trim comment --- examples/benchmarks/Base64.roc | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index e9dc460cb3..cfcc364334 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -91,9 +91,6 @@ lowest6BitsMask = 63 bitsToCharsHelp : U32, Int * -> List U8 bitsToCharsHelp = \bits, missing -> - # Performance Notes - # `String.cons` proved to be the fastest way of combining characters into a string - # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940 # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. From 86cf7cd983f92cb2a9bf8d9b443528bf8040f3c3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:52:06 +0100 Subject: [PATCH 10/26] move file --- examples/benchmarks/Base64.roc | 34 +++++++++---------- .../{BytesDecode.roc => Bytes/Decode.roc} | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) rename examples/benchmarks/{BytesDecode.roc => Bytes/Decode.roc} (95%) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index cfcc364334..92b30f1555 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -1,11 +1,11 @@ app "base64" packages { base: "platform" } - imports [base.Task, BytesDecode.{Decoder} ] + imports [base.Task, Bytes.Decode.{Decoder} ] provides [ main ] to base IO a : Task.Task a [] -Decoder a : BytesDecode.Decoder a +Decoder a : Bytes.Decode.Decoder a main : IO {} main = @@ -24,15 +24,15 @@ main = # ------ -fromBytes : List U8 -> Result Str BytesDecode.DecodeError +fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError fromBytes = \bytes -> - BytesDecode.decode bytes (decodeBase64 (List.len bytes)) + Bytes.Decode.decode bytes (decodeBase64 (List.len bytes)) -decodeBase64 : Nat -> BytesDecode.Decoder Str -decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string: "" } +decodeBase64 : Nat -> Bytes.Decode.Decoder Str +decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: "" } -loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str) +loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then helper = \x, y, z -> @@ -49,13 +49,13 @@ loopHelp = \{ remaining, string } -> string: Str.concat string (bitsToChars combined 0) } - BytesDecode.map3 helper - BytesDecode.u8 - BytesDecode.u8 - BytesDecode.u8 + Bytes.Decode.map3 helper + Bytes.Decode.u8 + Bytes.Decode.u8 + Bytes.Decode.u8 else if remaining == 0 then - BytesDecode.succeed (Done string) + Bytes.Decode.succeed (Done string) else if remaining == 2 then helperX = \x, y -> @@ -66,13 +66,13 @@ loopHelp = \{ remaining, string } -> combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) Done (Str.concat string (bitsToChars combined 1)) - BytesDecode.map2 helperX - BytesDecode.u8 - BytesDecode.u8 + Bytes.Decode.map2 helperX + Bytes.Decode.u8 + Bytes.Decode.u8 else # remaining = 1 - BytesDecode.u8 - |> BytesDecode.map (\x -> + Bytes.Decode.u8 + |> Bytes.Decode.map (\x -> a : U32 a = Num.intCast x Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/Bytes/Decode.roc similarity index 95% rename from examples/benchmarks/BytesDecode.roc rename to examples/benchmarks/Bytes/Decode.roc index 03a341c4ff..db14d857a6 100644 --- a/examples/benchmarks/BytesDecode.roc +++ b/examples/benchmarks/Bytes/Decode.roc @@ -1,4 +1,4 @@ -interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] +interface Bytes.Decode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] State : { bytes: List U8, cursor : Nat } From c4972f45baaa1dae44736079e644594a2081d4b8 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:12:47 +0100 Subject: [PATCH 11/26] bit shift tests --- compiler/gen/tests/gen_num.rs | 25 +++++++++++++++++++++++++ examples/benchmarks/Base64.roc | 4 ---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs index 262b8495b9..5fb442927c 100644 --- a/compiler/gen/tests/gen_num.rs +++ b/compiler/gen/tests/gen_num.rs @@ -1343,4 +1343,29 @@ mod gen_num { f64 ); } + + #[test] + fn shift_left_by() { + assert_evals_to!("Num.shiftLeftBy 0 0b0000_0001", 0b0000_0001, i64); + assert_evals_to!("Num.shiftLeftBy 1 0b0000_0001", 0b0000_0010, i64); + assert_evals_to!("Num.shiftLeftBy 2 0b0000_0011", 0b0000_1100, i64); + } + + #[test] + #[ignore] + fn shift_right_by() { + // Sign Extended Right Shift + assert_evals_to!("Num.shiftRightBy 0 0b0100_0000i8", 0b0001_0000, i8); + assert_evals_to!("Num.shiftRightBy 1 0b1110_0000u8", 0b1111_0000u8 as i8, i8); + assert_evals_to!("Num.shiftRightBy 2 0b1100_0000u8", 0b1111_0000u8 as i8, i8); + } + + #[test] + #[ignore] + fn shift_right_zf_by() { + // Logical Right Shift + assert_evals_to!("Num.shiftRightBy 1 0b1100_0000u8", 0b0011_0000, i64); + assert_evals_to!("Num.shiftRightBy 2 0b0000_0010u8", 0b0000_0001, i64); + assert_evals_to!("Num.shiftRightBy 3 0b0000_1100u8", 0b0000_0011, i64); + } } diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 92b30f1555..1fed392288 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -17,10 +17,6 @@ main = Err _ -> Task.putLine "sadness" - - - - # ------ From 1746b8da6f5cb3662bf26941b2ffef162eebbe29 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:14:20 +0100 Subject: [PATCH 12/26] bitwise or test --- compiler/gen/tests/gen_num.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs index 5fb442927c..f6976de9e0 100644 --- a/compiler/gen/tests/gen_num.rs +++ b/compiler/gen/tests/gen_num.rs @@ -750,6 +750,12 @@ mod gen_num { assert_evals_to!("Num.bitwiseXor 200 0", 200, i64); } + #[test] + fn bitwise_or() { + assert_evals_to!("Num.bitwiseOr 1 1", 1, i64); + assert_evals_to!("Num.bitwiseOr 1 2", 3, i64); + } + #[test] fn lt_i64() { assert_evals_to!("1 < 2", true, bool); From 63091392f55398924d7b688de9ca20af230f3db9 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:18:15 +0100 Subject: [PATCH 13/26] toBytes test --- compiler/gen/tests/gen_str.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/gen/tests/gen_str.rs b/compiler/gen/tests/gen_str.rs index dafdbf4b60..53bfa1f53e 100644 --- a/compiler/gen/tests/gen_str.rs +++ b/compiler/gen/tests/gen_str.rs @@ -816,4 +816,17 @@ mod gen_str { fn str_from_float() { assert_evals_to!(r#"Str.fromFloat 3.14"#, RocStr::from("3.140000"), RocStr); } + + #[test] + fn str_to_bytes() { + assert_evals_to!(r#"Str.toBytes "hello""#, &[104, 101, 108, 108, 111], &[u8]); + assert_evals_to!( + r#"Str.toBytes "this is a long string""#, + &[ + 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 108, 111, 110, 103, 32, 115, 116, + 114, 105, 110, 103 + ], + &[u8] + ); + } } From c24d51e69d39326343827d5b94f826fc6a2d8868 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 15:14:52 +0100 Subject: [PATCH 14/26] remove old function --- compiler/mono/src/ir.rs | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 79628b27ac..9d4d41efd3 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -302,39 +302,6 @@ pub enum InProgressProc<'a> { } impl<'a> Procs<'a> { - /// Absorb the contents of another Procs into this one. - pub fn absorb(&mut self, mut other: Procs<'a>) { - debug_assert!(self.pending_specializations.is_some()); - debug_assert!(other.pending_specializations.is_some()); - - match self.pending_specializations { - Some(ref mut pending_specializations) => { - for (k, v) in other.pending_specializations.unwrap().drain() { - pending_specializations.insert(k, v); - } - } - None => { - unreachable!(); - } - } - - for (k, v) in other.partial_procs.drain() { - self.partial_procs.insert(k, v); - } - - for (k, v) in other.specialized.drain() { - self.specialized.insert(k, v); - } - - for (k, v) in other.runtime_errors.drain() { - self.runtime_errors.insert(k, v); - } - - for symbol in other.module_thunks.drain() { - self.module_thunks.insert(symbol); - } - } - pub fn get_specialized_procs_without_rc( self, arena: &'a Bump, From 092db87474a1ed13457d3aa647e07a66934b141e Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:05:44 +0100 Subject: [PATCH 15/26] add import dependencies to module cache --- compiler/load/src/file.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 44fe5bfb0b..058b6f6821 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -358,6 +358,7 @@ struct ModuleCache<'a> { external_specializations_requested: MutMap, /// Various information + imports: MutMap>, documentation: MutMap, can_problems: MutMap>, type_problems: MutMap>, @@ -1641,6 +1642,18 @@ fn update<'a>( .exposed_symbols_by_module .insert(home, exposed_symbols); + state + .module_cache + .imports + .entry(header.module_id) + .or_default() + .extend( + header + .package_qualified_imported_modules + .iter() + .map(|x| *x.as_inner()), + ); + work.extend(state.dependencies.add_module( header.module_id, &header.package_qualified_imported_modules, From 64955f23ff0b29bb1fa7ec9393a09d49f516597c Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:07:39 +0100 Subject: [PATCH 16/26] store module thunks --- compiler/load/src/file.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 058b6f6821..cabeef9470 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -359,6 +359,7 @@ struct ModuleCache<'a> { /// Various information imports: MutMap>, + top_level_thunks: MutMap>, documentation: MutMap, can_problems: MutMap>, type_problems: MutMap>, @@ -1917,6 +1918,13 @@ fn update<'a>( } } + state + .module_cache + .top_level_thunks + .entry(module_id) + .or_default() + .extend(procs.module_thunks.iter().copied()); + let found_specializations_module = FoundSpecializationsModule { layout_cache, module_id, From a361148380d64a54b5e42f64c88b484f2d718e3a Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:09:47 +0100 Subject: [PATCH 17/26] add imported_module_thunks --- compiler/mono/src/ir.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 9d4d41efd3..aaf86934ab 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -273,6 +273,7 @@ impl ExternalSpecializations { #[derive(Clone, Debug)] pub struct Procs<'a> { pub partial_procs: MutMap>, + pub imported_module_thunks: MutSet, pub module_thunks: MutSet, pub pending_specializations: Option, PendingSpecialization>>>, pub specialized: MutMap<(Symbol, Layout<'a>), InProgressProc<'a>>, @@ -285,6 +286,7 @@ impl<'a> Default for Procs<'a> { fn default() -> Self { Self { partial_procs: MutMap::default(), + imported_module_thunks: MutSet::default(), module_thunks: MutSet::default(), pending_specializations: Some(MutMap::default()), specialized: MutMap::default(), From 6bd10ddc050a8ef2d3568401c2b492beeea4b5ca Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:23:35 +0100 Subject: [PATCH 18/26] use imported module thunks for pointer calling --- compiler/load/src/file.rs | 20 ++++++++++++++++++++ compiler/mono/src/ir.rs | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index cabeef9470..283be55b16 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -546,11 +546,24 @@ fn start_phase<'a>(module_id: ModuleId, phase: Phase, state: &mut State<'a>) -> ident_ids, } = typechecked; + let mut imported_module_thunks = MutSet::default(); + + if let Some(imports) = state.module_cache.imports.get(&module_id) { + for imported in imports.iter() { + imported_module_thunks.extend( + state.module_cache.top_level_thunks[imported] + .iter() + .copied(), + ); + } + } + BuildTask::BuildPendingSpecializations { layout_cache, module_id, module_timing, solved_subs, + imported_module_thunks, decls, ident_ids, exposed_to_host: state.exposed_to_host.clone(), @@ -950,6 +963,7 @@ enum BuildTask<'a> { module_timing: ModuleTiming, layout_cache: LayoutCache<'a>, solved_subs: Solved, + imported_module_thunks: MutSet, module_id: ModuleId, ident_ids: IdentIds, decls: Vec, @@ -3666,6 +3680,7 @@ fn make_specializations<'a>( fn build_pending_specializations<'a>( arena: &'a Bump, solved_subs: Solved, + imported_module_thunks: MutSet, home: ModuleId, mut ident_ids: IdentIds, decls: Vec, @@ -3678,6 +3693,9 @@ fn build_pending_specializations<'a>( let find_specializations_start = SystemTime::now(); let mut procs = Procs::default(); + debug_assert!(procs.imported_module_thunks.is_empty()); + procs.imported_module_thunks = imported_module_thunks; + let mut mono_problems = std::vec::Vec::new(); let mut subs = solved_subs.into_inner(); let mut mono_env = roc_mono::ir::Env { @@ -3959,10 +3977,12 @@ where module_timing, layout_cache, solved_subs, + imported_module_thunks, exposed_to_host, } => Ok(build_pending_specializations( arena, solved_subs, + imported_module_thunks, module_id, ident_ids, decls, diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index aaf86934ab..991249ee8e 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5725,8 +5725,11 @@ fn call_by_pointer<'a>( // cause issues. The caller (which is here) doesn't know whether the called is a closure // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name // more in the future + let is_thunk = + procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol); + match layout { - Layout::FunctionPointer(arg_layouts, ret_layout) if false => { + Layout::FunctionPointer(arg_layouts, ret_layout) if !is_thunk => { if arg_layouts.iter().any(|l| l.contains_refcounted()) { let name = env.unique_symbol(); let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); From 1c1c53ba950f3a8ecc9dfd1fc44bc4826369f717 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:38:10 +0100 Subject: [PATCH 19/26] flip map argument order --- examples/benchmarks/Base64.roc | 58 ++++++++++++++-------------- examples/benchmarks/Bytes/Decode.roc | 8 ++-- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 1fed392288..1f2ecad216 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -31,47 +31,47 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then - helper = \x, y, z -> - a : U32 - a = Num.intCast x - b : U32 - b = Num.intCast y - c : U32 - c = Num.intCast z - combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c - Loop - { - remaining: remaining - 3, - string: Str.concat string (bitsToChars combined 0) - } - - Bytes.Decode.map3 helper + Bytes.Decode.map3 Bytes.Decode.u8 Bytes.Decode.u8 Bytes.Decode.u8 + \x, y, z -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + c : U32 + c = Num.intCast z + combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c + Loop + { + remaining: remaining - 3, + string: Str.concat string (bitsToChars combined 0) + } else if remaining == 0 then Bytes.Decode.succeed (Done string) else if remaining == 2 then - helperX = \x, y -> - a : U32 - a = Num.intCast x - b : U32 - b = Num.intCast y - combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) - Done (Str.concat string (bitsToChars combined 1)) + Bytes.Decode.map2 + Bytes.Decode.u8 + Bytes.Decode.u8 + \x, y -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) + Done (Str.concat string (bitsToChars combined 1)) - Bytes.Decode.map2 helperX - Bytes.Decode.u8 - Bytes.Decode.u8 else # remaining = 1 + Bytes.Decode.map Bytes.Decode.u8 - |> Bytes.Decode.map (\x -> - a : U32 - a = Num.intCast x - Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) + \x -> + a : U32 + a = Num.intCast x + Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)) bitsToChars : U32, Int * -> Str diff --git a/examples/benchmarks/Bytes/Decode.roc b/examples/benchmarks/Bytes/Decode.roc index db14d857a6..f1da59e389 100644 --- a/examples/benchmarks/Bytes/Decode.roc +++ b/examples/benchmarks/Bytes/Decode.roc @@ -30,8 +30,8 @@ map = \@Decoder decoder, transform -> Bad e -map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c -map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> +map2 : Decoder a, Decoder b, (a, b -> c) -> Decoder c +map2 = \@Decoder decoder1, @Decoder decoder2, transform -> @Decoder \state1 -> when decoder1 state1 is Good state2 a -> @@ -45,8 +45,8 @@ map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> Bad e -> Bad e -map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d -map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> +map3 : Decoder a, Decoder b, Decoder c, (a, b, c -> d) -> Decoder d +map3 = \@Decoder decoder1, @Decoder decoder2, @Decoder decoder3, transform -> @Decoder \state1 -> when decoder1 state1 is Good state2 a -> From aff8266f0f6526bad8836628b528274f39bf6c55 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:40:46 +0100 Subject: [PATCH 20/26] move astar test --- cli/tests/cli_run.rs | 4 ++-- examples/benchmarks/{AStarTests.roc => TestAStar.roc} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename examples/benchmarks/{AStarTests.roc => TestAStar.roc} (98%) diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index 6ad19aed58..2f81acaec2 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -231,9 +231,9 @@ mod cli_run { #[serial(astar)] fn run_astar_optimized_1() { check_output_with_stdin( - &example_file("benchmarks", "AStarTests.roc"), + &example_file("benchmarks", "TestAStar.roc"), "1", - "astar-tests", + "test-astar", &[], "True\n", false, diff --git a/examples/benchmarks/AStarTests.roc b/examples/benchmarks/TestAStar.roc similarity index 98% rename from examples/benchmarks/AStarTests.roc rename to examples/benchmarks/TestAStar.roc index 1cb5909c8f..0401566b6a 100644 --- a/examples/benchmarks/AStarTests.roc +++ b/examples/benchmarks/TestAStar.roc @@ -1,4 +1,4 @@ -app "astar-tests" +app "test-astar" packages { base: "platform" } imports [base.Task, AStar] provides [ main ] to base From 17a44aab024877acf1d2822c155da97f96257baf Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:40:56 +0100 Subject: [PATCH 21/26] fix whitespace --- examples/benchmarks/Base64.roc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 1f2ecad216..c2be2d31fa 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -31,7 +31,7 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then - Bytes.Decode.map3 + Bytes.Decode.map3 Bytes.Decode.u8 Bytes.Decode.u8 Bytes.Decode.u8 @@ -53,7 +53,7 @@ loopHelp = \{ remaining, string } -> Bytes.Decode.succeed (Done string) else if remaining == 2 then - Bytes.Decode.map2 + Bytes.Decode.map2 Bytes.Decode.u8 Bytes.Decode.u8 \x, y -> @@ -66,9 +66,9 @@ loopHelp = \{ remaining, string } -> else # remaining = 1 - Bytes.Decode.map + Bytes.Decode.map Bytes.Decode.u8 - \x -> + \x -> a : U32 a = Num.intCast x Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)) @@ -90,33 +90,33 @@ bitsToCharsHelp = \bits, missing -> # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. - + # any 6-bit number is a valid base64 digit, so this is actually safe p = Num.shiftRightZfBy 18 bits |> Num.intCast - |> unsafeToChar + |> unsafeToChar q = Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar r = Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar s = Num.bitwiseAnd bits lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar equals : U8 equals = 61 when missing is - 0 -> + 0 -> [ p, q, r, s ] 1 -> [ p, q, r, equals ] From a6edc58323f1d11e6544abd070afe728c6568ab7 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:25:14 +0100 Subject: [PATCH 22/26] make fromUtf8 do RC --- cli/tests/cli_run.rs | 12 ++ compiler/builtins/bitcode/src/main.zig | 2 +- compiler/builtins/bitcode/src/str.zig | 158 +++++++++++++++++-------- compiler/builtins/src/bitcode.rs | 2 +- compiler/can/src/builtins.rs | 8 +- compiler/gen/src/llvm/build_str.rs | 122 ++++--------------- examples/benchmarks/Base64.roc | 19 +-- examples/benchmarks/TestBase64.roc | 17 +++ 8 files changed, 172 insertions(+), 168 deletions(-) create mode 100644 examples/benchmarks/TestBase64.roc diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index 2f81acaec2..f3889ae374 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -240,6 +240,18 @@ mod cli_run { ); } + #[test] + #[serial(base64)] + fn base64() { + check_output( + &example_file("benchmarks", "TestBase64.roc"), + "test-base64", + &[], + "SGVsbG8gV29ybGQ=", + true, + ); + } + #[test] #[serial(closure)] fn closure() { diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index e124afe62c..e20d376b88 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -67,8 +67,8 @@ comptime { exportStrFn(str.strFromIntC, "from_int"); exportStrFn(str.strFromFloatC, "from_float"); exportStrFn(str.strEqual, "equal"); - exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes"); exportStrFn(str.strToBytesC, "to_bytes"); + exportStrFn(str.fromUtf8C, "from_utf8"); } // Export helpers - Must be run inside a comptime diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index a752a37f0e..572eaa14e0 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -15,6 +15,7 @@ const InPlace = packed enum(u8) { Clone, }; +const SMALL_STR_MAX_LENGTH = small_string_size - 1; const small_string_size = 2 * @sizeOf(usize); const blank_small_string: [16]u8 = init_blank_small_string(small_string_size); @@ -982,6 +983,71 @@ fn strToBytes(allocator: *Allocator, arg: RocStr) RocList { } } +const FromUtf8Result = extern struct { + byte_index: usize, + string: RocStr, + is_ok: bool, + problem_code: Utf8ByteProblem, +}; + +pub fn fromUtf8C(arg: RocList, output: *FromUtf8Result) callconv(.C) void { + output.* = @call(.{ .modifier = always_inline }, fromUtf8, .{ std.heap.c_allocator, arg }); +} + +fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result { + const bytes = @ptrCast([*]const u8, arg.bytes)[0..arg.length]; + + if (unicode.utf8ValidateSlice(bytes)) { + // the output will be correct. Now we need to take ownership of the input + if (arg.len() <= SMALL_STR_MAX_LENGTH) { + // turn the bytes into a small string + const string = RocStr.init(allocator, @ptrCast([*]u8, arg.bytes), arg.len()); + + // then decrement the input list + const data_bytes = arg.len(); + utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes); + + return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + } else { + const byte_list = arg.makeUnique(allocator, @alignOf(usize), @sizeOf(u8)); + + const string = RocStr{ .str_bytes = byte_list.bytes, .str_len = byte_list.length }; + + return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + } + } else { + const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); + + // TODO what should we do RC-wise here + // const data_bytes = arg.len(); + // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes); + + return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; + } +} + +fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: Utf8ByteProblem } { + var index: usize = 0; + + while (index < length) { + const nextNumBytes = numberOfNextCodepointBytes(bytes, length, index) catch |err| { + switch (err) { + error.UnexpectedEof => { + return .{ .index = index, .problem = Utf8ByteProblem.UnexpectedEndOfSequence }; + }, + error.Utf8InvalidStartByte => return .{ .index = index, .problem = Utf8ByteProblem.InvalidStartByte }, + error.Utf8ExpectedContinuation => return .{ .index = index, .problem = Utf8ByteProblem.ExpectedContinuation }, + error.Utf8OverlongEncoding => return .{ .index = index, .problem = Utf8ByteProblem.OverlongEncoding }, + error.Utf8EncodesSurrogateHalf => return .{ .index = index, .problem = Utf8ByteProblem.EncodesSurrogateHalf }, + error.Utf8CodepointTooLarge => return .{ .index = index, .problem = Utf8ByteProblem.CodepointTooLarge }, + } + }; + index += nextNumBytes; + } + + unreachable; +} + pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool { const bytes: []u8 = ptr[0..len]; return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes}); @@ -1019,76 +1085,74 @@ pub const Utf8ByteProblem = packed enum(u8) { OverlongEncoding = 4, UnexpectedEndOfSequence = 5, }; -pub const ValidateUtf8BytesResult = extern struct { - is_ok: bool, byte_index: usize, problem_code: Utf8ByteProblem -}; -const is_ok_utf8_byte_response = - ValidateUtf8BytesResult{ .is_ok = true, .byte_index = 0, .problem_code = Utf8ByteProblem.UnexpectedEndOfSequence }; -inline fn toErrUtf8ByteResponse(byte_index: usize, problem_code: Utf8ByteProblem) ValidateUtf8BytesResult { - return ValidateUtf8BytesResult{ .is_ok = false, .byte_index = byte_index, .problem_code = problem_code }; +fn validateUtf8Bytes(bytes: [*]u8, length: usize) FromUtf8Result { + return fromUtf8(std.testing.allocator, RocList{ .bytes = bytes, .length = length }); } -// Validate that an array of bytes is valid UTF-8, but if it fails catch & return the error & byte index -pub fn validateUtf8Bytes(ptr: [*]u8, len: usize) callconv(.C) ValidateUtf8BytesResult { - var index: usize = 0; - while (index < len) { - const nextNumBytes = numberOfNextCodepointBytes(ptr, len, index) catch |err| { - return toErrUtf8ByteResponse( - index, - switch (err) { - error.UnexpectedEof => Utf8ByteProblem.UnexpectedEndOfSequence, - error.Utf8InvalidStartByte => Utf8ByteProblem.InvalidStartByte, - error.Utf8ExpectedContinuation => Utf8ByteProblem.ExpectedContinuation, - error.Utf8OverlongEncoding => Utf8ByteProblem.OverlongEncoding, - error.Utf8EncodesSurrogateHalf => Utf8ByteProblem.EncodesSurrogateHalf, - error.Utf8CodepointTooLarge => Utf8ByteProblem.CodepointTooLarge, - }, - ); - }; - index += nextNumBytes; - } - return is_ok_utf8_byte_response; +fn validateUtf8BytesX(str: RocList) FromUtf8Result { + return fromUtf8(std.testing.allocator, str); } +fn expectOk(result: FromUtf8Result) void { + expectEqual(result.is_ok, true); +} + +fn sliceHelp(bytes: [*]const u8, length: usize) RocList { + var list = RocList.allocate(testing.allocator, @alignOf(usize), length, @sizeOf(u8)); + @memcpy(list.bytes orelse unreachable, bytes, length); + list.length = length; + + return list; +} + +fn toErrUtf8ByteResponse(index: usize, problem: Utf8ByteProblem) FromUtf8Result { + return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = index, .problem_code = problem }; +} + +// NOTE on memory: the validate function consumes a RC token of the input. Since +// we freshly created it (in `sliceHelp`), it has only one RC token, and input list will be deallocated. +// +// If we tested with big strings, we'd have to deallocate the output string, but never the input list + test "validateUtf8Bytes: ascii" { - const str_len = 3; - var str: [str_len]u8 = "abc".*; - const str_ptr: [*]u8 = &str; + const raw = "abc"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode œ" { - const str_len = 2; - var str: [str_len]u8 = "œ".*; - const str_ptr: [*]u8 = &str; + const raw = "œ"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode ∆" { - const str_len = 3; - var str: [str_len]u8 = "∆".*; - const str_ptr: [*]u8 = &str; + const raw = "∆"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: emoji" { - const str_len = 4; - var str: [str_len]u8 = "💖".*; - const str_ptr: [*]u8 = &str; + const raw = "💖"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode ∆ in middle of array" { - const str_len = 9; - var str: [str_len]u8 = "œb∆c¬".*; - const str_ptr: [*]u8 = &str; + const raw = "œb∆c¬"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: invalid start byte" { diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index b19117e106..125099af96 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -41,8 +41,8 @@ pub const STR_NUMBER_OF_BYTES: &str = "roc_builtins.str.number_of_bytes"; pub const STR_FROM_INT: &str = "roc_builtins.str.from_int"; pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; -pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes"; pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes"; +pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index 7db64b296f..68cceaacf6 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -1598,7 +1598,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { Access { record_var, ext_var: var_store.fresh(), - field: "isOk".into(), + field: "c_isOk".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, @@ -1610,7 +1610,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { vec![Access { record_var, ext_var: var_store.fresh(), - field: "str".into(), + field: "b_str".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }], @@ -1627,14 +1627,14 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { Access { record_var, ext_var: var_store.fresh(), - field: "problem".into(), + field: "d_problem".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, Access { record_var, ext_var: var_store.fresh(), - field: "byteIndex".into(), + field: "a_byteIndex".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs index 301b726fb3..514d483c06 100644 --- a/compiler/gen/src/llvm/build_str.rs +++ b/compiler/gen/src/llvm/build_str.rs @@ -1,13 +1,11 @@ use crate::llvm::bitcode::{call_bitcode_fn, call_void_bitcode_fn}; use crate::llvm::build::{complex_bitcast, Env, InPlace, Scope}; -use crate::llvm::build_list::{ - allocate_list, build_basic_phi2, empty_polymorphic_list, list_len, load_list_ptr, store_list, -}; -use crate::llvm::convert::{collection, get_ptr_type}; +use crate::llvm::build_list::{allocate_list, store_list}; +use crate::llvm::convert::collection; use inkwell::builder::Builder; -use inkwell::types::{BasicTypeEnum, StructType}; +use inkwell::types::BasicTypeEnum; use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue}; -use inkwell::{AddressSpace, IntPredicate}; +use inkwell::AddressSpace; use roc_builtins::bitcode; use roc_module::symbol::Symbol; use roc_mono::layout::{Builtin, Layout}; @@ -300,43 +298,28 @@ pub fn str_to_bytes<'a, 'ctx, 'env>( /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 } pub fn str_from_utf8<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, - parent: FunctionValue<'ctx>, + _parent: FunctionValue<'ctx>, original_wrapper: StructValue<'ctx>, ) -> BasicValueEnum<'ctx> { let builder = env.builder; let ctx = env.context; - let list_len = list_len(builder, original_wrapper); - let ptr_type = get_ptr_type(&ctx.i8_type().into(), AddressSpace::Generic); - let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type); - - let result_type = env - .module - .get_struct_type("str.ValidateUtf8BytesResult") - .unwrap(); + let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap(); let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result"); call_void_bitcode_fn( env, - &[result_ptr.into(), list_ptr.into(), list_len.into()], - &bitcode::STR_VALIDATE_UTF_BYTES, + &[ + complex_bitcast( + env.builder, + original_wrapper.into(), + env.context.i128_type().into(), + "to_i128", + ), + result_ptr.into(), + ], + &bitcode::STR_FROM_UTF8, ); - let utf8_validate_bytes_result = builder - .build_load(result_ptr, "load_utf8_validate_bytes_result") - .into_struct_value(); - - let is_ok = builder - .build_extract_value(utf8_validate_bytes_result, 0, "extract_extract_is_ok") - .unwrap() - .into_int_value(); - let byte_index = builder - .build_extract_value(utf8_validate_bytes_result, 1, "extract_byte_index") - .unwrap() - .into_int_value(); - let problem_code = builder - .build_extract_value(utf8_validate_bytes_result, 2, "extract_problem_code") - .unwrap() - .into_int_value(); let record_type = env.context.struct_type( &[ @@ -348,71 +331,16 @@ pub fn str_from_utf8<'a, 'ctx, 'env>( false, ); - let comparison = builder.build_int_compare( - IntPredicate::EQ, - is_ok, - ctx.bool_type().const_int(1, false), - "compare_is_ok", - ); + let result_ptr_cast = env + .builder + .build_bitcast( + result_ptr, + record_type.ptr_type(AddressSpace::Generic), + "to_unnamed", + ) + .into_pointer_value(); - build_basic_phi2( - env, - parent, - comparison, - || { - // We have a valid utf8 byte sequence - // TODO: Should we do something different here if we're doing this in place? - let zig_str = - call_bitcode_fn(env, &[list_ptr.into(), list_len.into()], &bitcode::STR_INIT) - .into_struct_value(); - build_struct( - builder, - record_type, - vec![ - ( - env.ptr_int().const_int(0, false).into(), - "insert_zeroed_byte_index", - ), - (zig_str_to_struct(env, zig_str).into(), "insert_str"), - (ctx.bool_type().const_int(1, false).into(), "insert_is_ok"), - ( - ctx.i8_type().const_int(0, false).into(), - "insert_zeroed_problem", - ), - ], - ) - .into() - }, - || { - // We do not have a valid utf8 byte sequence - build_struct( - builder, - record_type, - vec![ - (byte_index.into(), "insert_byte_index"), - (empty_polymorphic_list(env), "insert_zeroed_str"), - (ctx.bool_type().const_int(0, false).into(), "insert_is_ok"), - (problem_code.into(), "insert_problem"), - ], - ) - .into() - }, - BasicTypeEnum::StructType(record_type), - ) -} - -fn build_struct<'env, 'ctx>( - builder: &'env Builder<'ctx>, - struct_type: StructType<'ctx>, - values: Vec<(BasicValueEnum<'ctx>, &str)>, -) -> StructValue<'ctx> { - let mut val = struct_type.get_undef().into(); - for (index, (value, name)) in values.iter().enumerate() { - val = builder - .build_insert_value(val, *value, index as u32, name) - .unwrap(); - } - val.into_struct_value() + builder.build_load(result_ptr_cast, "load_utf8_validate_bytes_result") } /// Str.fromInt : Int -> Str diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index c2be2d31fa..c8d7c586f7 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -1,24 +1,7 @@ -app "base64" - packages { base: "platform" } - imports [base.Task, Bytes.Decode.{Decoder} ] - provides [ main ] to base - -IO a : Task.Task a [] +interface Base64 exposes [ fromBytes ] imports [ Bytes.Decode ] Decoder a : Bytes.Decode.Decoder a -main : IO {} -main = - # when fromBytes [ 0 ] is - when fromBytes (Str.toBytes "Hello World") is - Ok str -> - Task.putLine str - - Err _ -> - Task.putLine "sadness" - -# ------ - fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError fromBytes = \bytes -> diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc new file mode 100644 index 0000000000..75adbca8be --- /dev/null +++ b/examples/benchmarks/TestBase64.roc @@ -0,0 +1,17 @@ +app "test-base64" + packages { base: "platform" } + imports [base.Task, Base64 ] + provides [ main ] to base + +IO a : Task.Task a [] + +main : IO {} +main = + # when fromBytes [ 0 ] is + when Base64.fromBytes (Str.toBytes "Hello World") is + Ok str -> + Task.putLine str + + Err _ -> + Task.putLine "sadness" + From 75ee81db883b1e76e276b596807af6668867ea0f Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:27:02 +0100 Subject: [PATCH 23/26] fix base64 test output --- cli/tests/cli_run.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index f3889ae374..251259b490 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -247,7 +247,7 @@ mod cli_run { &example_file("benchmarks", "TestBase64.roc"), "test-base64", &[], - "SGVsbG8gV29ybGQ=", + "SGVsbG8gV29ybGQ=\n", true, ); } From 7304154452271f86c835a681a30112f8fe0483e2 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:36:16 +0100 Subject: [PATCH 24/26] update comment --- compiler/mono/src/ir.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 991249ee8e..2ad3995fe0 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5720,11 +5720,13 @@ fn call_by_pointer<'a>( let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol); if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized { - // TODO we should be able to call by name in this wrapper for "normal" functions - // but closures, specifically top-level values that are closures (by unification) - // cause issues. The caller (which is here) doesn't know whether the called is a closure - // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name - // more in the future + // anything that is not a thunk can be called by-value in the wrapper + // (the above condition guarantees we're dealing with a top-level symbol) + // + // But thunks cannot be called by-value, since they are not really functions to all parts + // of the system (notably RC insertion). So we still call those by-pointer. + // Luckily such values were top-level originally (in the user code), and can therefore + // not be closures let is_thunk = procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol); From 3537fa57d2d1463d737270281c2054db85c5db0a Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:46:58 +0100 Subject: [PATCH 25/26] decrement when the input is invalid utf8 --- compiler/builtins/bitcode/src/str.zig | 6 +++--- examples/benchmarks/TestBase64.roc | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 572eaa14e0..e18eca4306 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1018,9 +1018,9 @@ fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result { } else { const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); - // TODO what should we do RC-wise here - // const data_bytes = arg.len(); - // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes); + // consume the input list + const data_bytes = arg.len(); + utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes); return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; } diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc index 75adbca8be..27c5617ebf 100644 --- a/examples/benchmarks/TestBase64.roc +++ b/examples/benchmarks/TestBase64.roc @@ -7,7 +7,6 @@ IO a : Task.Task a [] main : IO {} main = - # when fromBytes [ 0 ] is when Base64.fromBytes (Str.toBytes "Hello World") is Ok str -> Task.putLine str From 134f8a15e9e9baaa204990ff127352d4577bd5ba Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 22:26:18 +0100 Subject: [PATCH 26/26] fix zig tests --- compiler/builtins/bitcode/src/str.zig | 98 +++++++++++++-------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index e18eca4306..62a88058e0 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1155,102 +1155,100 @@ test "validateUtf8Bytes: unicode ∆ in middle of array" { expectOk(validateUtf8BytesX(list)); } +fn expectErr(list: RocList, index: usize, err: Utf8DecodeError, problem: Utf8ByteProblem) void { + const str_ptr = @ptrCast([*]u8, list.bytes); + const str_len = list.length; + + expectError(err, numberOfNextCodepointBytes(str_ptr, str_len, index)); + expectEqual(toErrUtf8ByteResponse(index, problem), validateUtf8Bytes(str_ptr, str_len)); +} + test "validateUtf8Bytes: invalid start byte" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 4; - var str: [str_len]u8 = "ab\x80c".*; - const str_ptr: [*]u8 = &str; + const raw = "ab\x80c"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8InvalidStartByte, numberOfNextCodepointBytes(str_ptr, str_len, 2)); - expectEqual(toErrUtf8ByteResponse(2, Utf8ByteProblem.InvalidStartByte), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 2, error.Utf8InvalidStartByte, Utf8ByteProblem.InvalidStartByte); } test "validateUtf8Bytes: unexpected eof for 2 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 4; - var str: [str_len]u8 = "abc\xc2".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xc2"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 2 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 5; - var str: [str_len]u8 = "abc\xc2\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xc2\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: unexpected eof for 3 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430 - const str_len = 5; - var str: [str_len]u8 = "abc\xe0\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xe0\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 3 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430 - const str_len = 6; - var str: [str_len]u8 = "abc\xe0\xa0\xc0".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xe0\xa0\xc0"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: unexpected eof for 4 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437 - const str_len = 6; - var str: [str_len]u8 = "abc\xf0\x90\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x90\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 4 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437 - const str_len = 7; - var str: [str_len]u8 = "abc\xf0\x90\x80\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x90\x80\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: overlong" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L451 - const str_len = 7; - var str: [str_len]u8 = "abc\xf0\x80\x80\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x80\x80\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8OverlongEncoding, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.OverlongEncoding), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8OverlongEncoding, Utf8ByteProblem.OverlongEncoding); } test "validateUtf8Bytes: codepoint out too large" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L465 - const str_len = 7; - var str: [str_len]u8 = "abc\xf4\x90\x80\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf4\x90\x80\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8CodepointTooLarge, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.CodepointTooLarge), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8CodepointTooLarge, Utf8ByteProblem.CodepointTooLarge); } test "validateUtf8Bytes: surrogate halves" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L468 - const str_len = 6; - var str: [str_len]u8 = "abc\xed\xa0\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xed\xa0\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8EncodesSurrogateHalf, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.EncodesSurrogateHalf), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8EncodesSurrogateHalf, Utf8ByteProblem.EncodesSurrogateHalf); }