From 89bf22598ecbe3fac6ba62944b76630501ac28bd Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 14:41:08 +0100 Subject: [PATCH 01/33] call by pointer wrappers need to call by pointer for closures --- compiler/mono/src/ir.rs | 65 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 9e6f2ccd47..79628b27ac 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5751,8 +5751,13 @@ fn call_by_pointer<'a>( let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol); if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized { + // TODO we should be able to call by name in this wrapper for "normal" functions + // but closures, specifically top-level values that are closures (by unification) + // cause issues. The caller (which is here) doesn't know whether the called is a closure + // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name + // more in the future match layout { - Layout::FunctionPointer(arg_layouts, ret_layout) => { + Layout::FunctionPointer(arg_layouts, ret_layout) if false => { if arg_layouts.iter().any(|l| l.contains_refcounted()) { let name = env.unique_symbol(); let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); @@ -5766,6 +5771,7 @@ fn call_by_pointer<'a>( let args = args.into_bump_slice(); let call_symbol = env.unique_symbol(); + debug_assert_eq!(arg_layouts.len(), arg_symbols.len()); let call_type = CallType::ByName { name: symbol, full_layout: layout.clone(), @@ -5804,6 +5810,63 @@ fn call_by_pointer<'a>( Expr::FunctionPointer(symbol, layout) } } + Layout::FunctionPointer(arg_layouts, ret_layout) => { + if arg_layouts.iter().any(|l| l.contains_refcounted()) { + let name = env.unique_symbol(); + let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); + let mut arg_symbols = Vec::with_capacity_in(arg_layouts.len(), env.arena); + + for layout in arg_layouts { + let symbol = env.unique_symbol(); + args.push((layout.clone(), symbol)); + arg_symbols.push(symbol); + } + let args = args.into_bump_slice(); + + let call_symbol = env.unique_symbol(); + let fpointer_symbol = env.unique_symbol(); + debug_assert_eq!(arg_layouts.len(), arg_symbols.len()); + let call_type = CallType::ByPointer { + name: fpointer_symbol, + full_layout: layout.clone(), + ret_layout: ret_layout.clone(), + arg_layouts, + }; + let call = Call { + call_type, + arguments: arg_symbols.into_bump_slice(), + }; + let expr = Expr::Call(call); + + let mut body = Stmt::Ret(call_symbol); + + body = Stmt::Let(call_symbol, expr, ret_layout.clone(), env.arena.alloc(body)); + + let expr = Expr::FunctionPointer(symbol, layout.clone()); + body = Stmt::Let(fpointer_symbol, expr, layout.clone(), env.arena.alloc(body)); + + let closure_data_layout = None; + let proc = Proc { + name, + args, + body, + closure_data_layout, + ret_layout: ret_layout.clone(), + is_self_recursive: SelfRecursive::NotSelfRecursive, + must_own_arguments: true, + host_exposed_layouts: HostExposedLayouts::NotHostExposed, + }; + + procs + .specialized + .insert((name, layout.clone()), InProgressProc::Done(proc)); + Expr::FunctionPointer(name, layout) + } else { + // if none of the arguments is refcounted, then owning the arguments has no + // meaning + Expr::FunctionPointer(symbol, layout) + } + } _ => { // e.g. Num.maxInt or other constants Expr::FunctionPointer(symbol, layout) From ea76578e0694e68284a084c882509cd1d41e13ce Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 15:15:16 +0100 Subject: [PATCH 02/33] add Num.bitwiseOr and Num.shiftLeftBy --- compiler/builtins/src/std.rs | 18 ++++++++++++++++++ compiler/can/src/builtins.rs | 16 ++++++++++++++++ compiler/gen/src/llvm/build.rs | 26 +++++++++++++++++++++++++- compiler/module/src/low_level.rs | 2 ++ compiler/module/src/symbol.rs | 20 +++++++++++--------- compiler/mono/src/borrow.rs | 12 +++++++++++- 6 files changed, 83 insertions(+), 11 deletions(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 42eb034581..5156fadc0b 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -324,6 +324,24 @@ pub fn types() -> MutMap { ), ); + // bitwiseOr : Int a, Int a -> Int a + add_type( + Symbol::NUM_BITWISE_OR, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + + // shiftLeftBy : Nat, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_LEFT, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index fdff0a4e2c..f3e1ff4a32 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -151,6 +151,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_MIN_INT => num_min_int, NUM_BITWISE_AND => num_bitwise_and, NUM_BITWISE_XOR => num_bitwise_xor, + NUM_BITWISE_OR => num_bitwise_or, + NUM_SHIFT_LEFT=> num_shift_left_by, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -273,6 +275,10 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_ASIN => num_asin, Symbol::NUM_MAX_INT => num_max_int, Symbol::NUM_MIN_INT => num_min_int, + Symbol::NUM_BITWISE_AND => num_bitwise_and, + Symbol::NUM_BITWISE_XOR => num_bitwise_xor, + Symbol::NUM_BITWISE_OR => num_bitwise_or, + Symbol::NUM_SHIFT_LEFT=> num_shift_left_by, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1299,6 +1305,16 @@ fn num_bitwise_xor(symbol: Symbol, var_store: &mut VarStore) -> Def { num_binop(symbol, var_store, LowLevel::NumBitwiseXor) } +/// Num.bitwiseOr: Int, Int -> Int +fn num_bitwise_or(symbol: Symbol, var_store: &mut VarStore) -> Def { + num_binop(symbol, var_store, LowLevel::NumBitwiseOr) +} + +/// Num.shiftLeftBy: Nat, Int a -> Int a +fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 5e65bc2cee..e77ae31500 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3943,7 +3943,23 @@ fn run_low_level<'a, 'ctx, 'env>( build_num_binop(env, parent, lhs_arg, lhs_layout, rhs_arg, rhs_layout, op) } - NumBitwiseAnd | NumBitwiseXor => { + NumBitwiseAnd | NumBitwiseOr | NumBitwiseXor => { + debug_assert_eq!(args.len(), 2); + + let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); + let (rhs_arg, rhs_layout) = load_symbol_and_layout(scope, &args[1]); + + build_int_binop( + env, + parent, + lhs_arg.into_int_value(), + lhs_layout, + rhs_arg.into_int_value(), + rhs_layout, + op, + ) + } + NumShiftLeftBy => { debug_assert_eq!(args.len(), 2); let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); @@ -4585,6 +4601,14 @@ fn build_int_binop<'a, 'ctx, 'env>( NumPowInt => call_bitcode_fn(env, &[lhs.into(), rhs.into()], &bitcode::NUM_POW_INT), NumBitwiseAnd => bd.build_and(lhs, rhs, "int_bitwise_and").into(), NumBitwiseXor => bd.build_xor(lhs, rhs, "int_bitwise_xor").into(), + NumBitwiseOr => bd.build_or(lhs, rhs, "int_bitwise_or").into(), + NumShiftLeftBy => { + // NOTE arguments are flipped; + // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);` + // as `Num.shiftLeftBy 0 0b0000_0001 + bd.build_left_shift(rhs, lhs, "int_bitwise_or").into() + } + _ => { unreachable!("Unrecognized int binary operation: {:?}", op); } diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index e69fa0dd02..05a20c72c5 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -78,6 +78,8 @@ pub enum LowLevel { NumAsin, NumBitwiseAnd, NumBitwiseXor, + NumBitwiseOr, + NumShiftLeftBy, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 54700dd492..64717e405b 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -841,15 +841,17 @@ define_builtins! { 80 NUM_BINARY32: "Binary32" imported 81 NUM_BITWISE_AND: "bitwiseAnd" 82 NUM_BITWISE_XOR: "bitwiseXor" - 83 NUM_SUB_WRAP: "subWrap" - 84 NUM_SUB_CHECKED: "subChecked" - 85 NUM_MUL_WRAP: "mulWrap" - 86 NUM_MUL_CHECKED: "mulChecked" - 87 NUM_INT: "Int" imported - 88 NUM_FLOAT: "Float" imported - 89 NUM_AT_NATURAL: "@Natural" - 90 NUM_NATURAL: "Natural" imported - 91 NUM_NAT: "Nat" imported + 83 NUM_BITWISE_OR: "bitwiseOr" + 84 NUM_SHIFT_LEFT: "shiftLeftBy" + 85 NUM_SUB_WRAP: "subWrap" + 86 NUM_SUB_CHECKED: "subChecked" + 87 NUM_MUL_WRAP: "mulWrap" + 88 NUM_MUL_CHECKED: "mulChecked" + 89 NUM_INT: "Int" imported + 90 NUM_FLOAT: "Float" imported + 91 NUM_AT_NATURAL: "@Natural" + 92 NUM_NATURAL: "Natural" imported + 93 NUM_NAT: "Nat" imported } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index ba652c0c7b..2fa8893d4c 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -373,6 +373,14 @@ impl<'a> BorrowInfState<'a> { self.own_var(z); // if the function exects an owned argument (ps), the argument must be owned (args) + debug_assert_eq!( + arguments.len(), + ps.len(), + "{:?} has {} parameters, but was applied to {} arguments", + name, + ps.len(), + arguments.len() + ); self.own_args_using_params(arguments, ps); } None => { @@ -658,7 +666,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd - | NumBitwiseXor => arena.alloc_slice_copy(&[irrelevant, irrelevant]), + | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => { + arena.alloc_slice_copy(&[irrelevant, irrelevant]) + } NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => { From 128741e5856adebc368836c54b40a1eaf50979fd Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 16:01:01 +0100 Subject: [PATCH 03/33] add right shift operators --- compiler/builtins/src/std.rs | 20 +++++++++++++++++++- compiler/can/src/builtins.rs | 16 +++++++++++++++- compiler/gen/src/llvm/build.rs | 14 ++++++++++++-- compiler/module/src/low_level.rs | 2 ++ compiler/module/src/symbol.rs | 20 +++++++++++--------- compiler/mono/src/borrow.rs | 2 +- 6 files changed, 60 insertions(+), 14 deletions(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index e7e7eea3c1..084edc1cd4 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -333,7 +333,7 @@ pub fn types() -> MutMap { ), ); - // shiftLeftBy : Nat, Int a -> Int a + // shiftLeftBy : Int a, Int a -> Int a add_type( Symbol::NUM_SHIFT_LEFT, top_level_function( @@ -342,6 +342,24 @@ pub fn types() -> MutMap { ), ); + // shiftRightBy : Int a, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_RIGHT, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + + // shiftRightZfBy : Int a, Int a -> Int a + add_type( + Symbol::NUM_SHIFT_RIGHT_ZERO_FILL, + top_level_function( + vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))], + Box::new(int_type(flex(TVAR1))), + ), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index 6c8f7a9cd3..c8516d26ca 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -154,6 +154,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_BITWISE_XOR => num_bitwise_xor, NUM_BITWISE_OR => num_bitwise_or, NUM_SHIFT_LEFT=> num_shift_left_by, + NUM_SHIFT_RIGHT => num_shift_right_by, + NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -280,7 +282,9 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_BITWISE_AND => num_bitwise_and, Symbol::NUM_BITWISE_XOR => num_bitwise_xor, Symbol::NUM_BITWISE_OR => num_bitwise_or, - Symbol::NUM_SHIFT_LEFT=> num_shift_left_by, + Symbol::NUM_SHIFT_LEFT => num_shift_left_by, + Symbol::NUM_SHIFT_RIGHT => num_shift_right_by, + Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1317,6 +1321,16 @@ fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store) } +/// Num.shiftRightBy: Nat, Int a -> Int a +fn num_shift_right_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftRightBy, var_store) +} + +/// Num.shiftRightZfBy: Nat, Int a -> Int a +fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 250c903014..ed149b77a4 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3967,7 +3967,7 @@ fn run_low_level<'a, 'ctx, 'env>( op, ) } - NumShiftLeftBy => { + NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => { debug_assert_eq!(args.len(), 2); let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]); @@ -4614,7 +4614,17 @@ fn build_int_binop<'a, 'ctx, 'env>( // NOTE arguments are flipped; // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);` // as `Num.shiftLeftBy 0 0b0000_0001 - bd.build_left_shift(rhs, lhs, "int_bitwise_or").into() + bd.build_left_shift(rhs, lhs, "int_shift_left").into() + } + NumShiftRightBy => { + // NOTE arguments are flipped; + bd.build_right_shift(rhs, lhs, false, "int_shift_right") + .into() + } + NumShiftRightZfBy => { + // NOTE arguments are flipped; + bd.build_right_shift(rhs, lhs, true, "int_shift_right_zf") + .into() } _ => { diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 9740c80d80..7279a4ae76 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -81,6 +81,8 @@ pub enum LowLevel { NumBitwiseXor, NumBitwiseOr, NumShiftLeftBy, + NumShiftRightBy, + NumShiftRightZfBy, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 23fa1837f0..a7d19116dc 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -843,15 +843,17 @@ define_builtins! { 82 NUM_BITWISE_XOR: "bitwiseXor" 83 NUM_BITWISE_OR: "bitwiseOr" 84 NUM_SHIFT_LEFT: "shiftLeftBy" - 85 NUM_SUB_WRAP: "subWrap" - 86 NUM_SUB_CHECKED: "subChecked" - 87 NUM_MUL_WRAP: "mulWrap" - 88 NUM_MUL_CHECKED: "mulChecked" - 89 NUM_INT: "Int" imported - 90 NUM_FLOAT: "Float" imported - 91 NUM_AT_NATURAL: "@Natural" - 92 NUM_NATURAL: "Natural" imported - 93 NUM_NAT: "Nat" imported + 85 NUM_SHIFT_RIGHT: "shiftRightBy" + 86 NUM_SHIFT_RIGHT_ZERO_FILL: "shiftRightZfBy" + 87 NUM_SUB_WRAP: "subWrap" + 88 NUM_SUB_CHECKED: "subChecked" + 89 NUM_MUL_WRAP: "mulWrap" + 90 NUM_MUL_CHECKED: "mulChecked" + 91 NUM_INT: "Int" imported + 92 NUM_FLOAT: "Float" imported + 93 NUM_AT_NATURAL: "@Natural" + 94 NUM_NATURAL: "Natural" imported + 95 NUM_NAT: "Nat" imported } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index a06f9f7e37..294e2582c0 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -666,7 +666,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd - | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => { + | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => { arena.alloc_slice_copy(&[irrelevant, irrelevant]) } From 43e71f2ee933e25f5a1f45243af8be6c8290724c Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 16:07:35 +0100 Subject: [PATCH 04/33] int cast --- compiler/builtins/src/std.rs | 6 ++++++ compiler/can/src/builtins.rs | 7 +++++++ compiler/gen/src/llvm/build.rs | 10 ++++++++++ compiler/module/src/low_level.rs | 1 + compiler/module/src/symbol.rs | 2 ++ compiler/mono/src/borrow.rs | 2 +- 6 files changed, 27 insertions(+), 1 deletion(-) diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 084edc1cd4..5a3b499dd7 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -360,6 +360,12 @@ pub fn types() -> MutMap { ), ); + // intCast : Int a -> Int b + add_type( + Symbol::NUM_INT_CAST, + top_level_function(vec![int_type(flex(TVAR1))], Box::new(int_type(flex(TVAR2)))), + ); + // rem : Int a, Int a -> Result (Int a) [ DivByZero ]* add_type( Symbol::NUM_REM, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index c8516d26ca..af38527fcc 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -156,6 +156,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option NUM_SHIFT_LEFT=> num_shift_left_by, NUM_SHIFT_RIGHT => num_shift_right_by, NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, + NUM_INT_CAST=> num_int_cast, RESULT_MAP => result_map, RESULT_MAP_ERR => result_map_err, RESULT_WITH_DEFAULT => result_with_default, @@ -285,6 +286,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::NUM_SHIFT_LEFT => num_shift_left_by, Symbol::NUM_SHIFT_RIGHT => num_shift_right_by, Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by, + Symbol::NUM_INT_CAST=> num_int_cast, Symbol::RESULT_MAP => result_map, Symbol::RESULT_MAP_ERR => result_map_err, Symbol::RESULT_WITH_DEFAULT => result_with_default, @@ -1331,6 +1333,11 @@ fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store) } +/// Num.intCast: Int a -> Int b +fn num_int_cast(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_1(symbol, LowLevel::NumIntCast, var_store) +} + /// List.isEmpty : List * -> Bool fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def { let list_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index ed149b77a4..9ba362841f 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3983,6 +3983,16 @@ fn run_low_level<'a, 'ctx, 'env>( op, ) } + NumIntCast => { + debug_assert_eq!(args.len(), 1); + + let arg = load_symbol(scope, &args[0]).into_int_value(); + + let to = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes) + .into_int_type(); + + env.builder.build_int_cast(arg, to, "inc_cast").into() + } Eq => { debug_assert_eq!(args.len(), 2); diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 7279a4ae76..640b8c8bca 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -83,6 +83,7 @@ pub enum LowLevel { NumShiftLeftBy, NumShiftRightBy, NumShiftRightZfBy, + NumIntCast, Eq, NotEq, And, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index a7d19116dc..62f5a9d457 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -854,6 +854,8 @@ define_builtins! { 93 NUM_AT_NATURAL: "@Natural" 94 NUM_NATURAL: "Natural" imported 95 NUM_NAT: "Nat" imported + 96 NUM_INT_CAST: "intCast" + } 2 BOOL: "Bool" => { 0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index 294e2582c0..c0d4f1e091 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -671,7 +671,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { } NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor - | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => { + | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin | NumIntCast => { arena.alloc_slice_copy(&[irrelevant]) } StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]), From bcbef5d3aac61fea000a9c9f8425fc630e638b6e Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:10:09 +0100 Subject: [PATCH 05/33] generalize add and sub intrinsics to all integer types --- compiler/gen/src/llvm/build.rs | 103 ++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 9ba362841f..31fc0ce6c3 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -296,8 +296,10 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) { let void_type = ctx.void_type(); let i1_type = ctx.bool_type(); let f64_type = ctx.f64_type(); + let i128_type = ctx.i128_type(); let i64_type = ctx.i64_type(); let i32_type = ctx.i32_type(); + let i16_type = ctx.i16_type(); let i8_type = ctx.i8_type(); let i8_ptr_type = i8_type.ptr_type(AddressSpace::Generic); @@ -377,18 +379,72 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) { f64_type.fn_type(&[f64_type.into()], false), ); + // add with overflow + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I8, { + let fields = [i8_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i8_type.into(), i8_type.into()], false) + }); + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I16, { + let fields = [i16_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i16_type.into(), i16_type.into()], false) + }); + + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I32, { + let fields = [i32_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i32_type.into(), i32_type.into()], false) + }); + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) .fn_type(&[i64_type.into(), i64_type.into()], false) }); + add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I128, { + let fields = [i128_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i128_type.into(), i128_type.into()], false) + }); + + // sub with overflow + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I8, { + let fields = [i8_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i8_type.into(), i8_type.into()], false) + }); + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I16, { + let fields = [i16_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i16_type.into(), i16_type.into()], false) + }); + + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I32, { + let fields = [i32_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i32_type.into(), i32_type.into()], false) + }); + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) .fn_type(&[i64_type.into(), i64_type.into()], false) }); + add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I128, { + let fields = [i128_type.into(), i1_type.into()]; + ctx.struct_type(&fields, false) + .fn_type(&[i128_type.into(), i128_type.into()], false) + }); + + // mul with overflow + add_intrinsic(module, LLVM_SMUL_WITH_OVERFLOW_I64, { let fields = [i64_type.into(), i1_type.into()]; ctx.struct_type(&fields, false) @@ -406,8 +462,19 @@ static LLVM_COS_F64: &str = "llvm.cos.f64"; static LLVM_POW_F64: &str = "llvm.pow.f64"; static LLVM_CEILING_F64: &str = "llvm.ceil.f64"; static LLVM_FLOOR_F64: &str = "llvm.floor.f64"; + +pub static LLVM_SADD_WITH_OVERFLOW_I8: &str = "llvm.sadd.with.overflow.i8"; +pub static LLVM_SADD_WITH_OVERFLOW_I16: &str = "llvm.sadd.with.overflow.i16"; +pub static LLVM_SADD_WITH_OVERFLOW_I32: &str = "llvm.sadd.with.overflow.i32"; pub static LLVM_SADD_WITH_OVERFLOW_I64: &str = "llvm.sadd.with.overflow.i64"; +pub static LLVM_SADD_WITH_OVERFLOW_I128: &str = "llvm.sadd.with.overflow.i128"; + +pub static LLVM_SSUB_WITH_OVERFLOW_I8: &str = "llvm.ssub.with.overflow.i8"; +pub static LLVM_SSUB_WITH_OVERFLOW_I16: &str = "llvm.ssub.with.overflow.i16"; +pub static LLVM_SSUB_WITH_OVERFLOW_I32: &str = "llvm.ssub.with.overflow.i32"; pub static LLVM_SSUB_WITH_OVERFLOW_I64: &str = "llvm.ssub.with.overflow.i64"; +pub static LLVM_SSUB_WITH_OVERFLOW_I128: &str = "llvm.ssub.with.overflow.i128"; + pub static LLVM_SMUL_WITH_OVERFLOW_I64: &str = "llvm.smul.with.overflow.i64"; fn add_intrinsic<'ctx>( @@ -4506,7 +4573,7 @@ fn build_int_binop<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, parent: FunctionValue<'ctx>, lhs: IntValue<'ctx>, - _lhs_layout: &Layout<'a>, + lhs_layout: &Layout<'a>, rhs: IntValue<'ctx>, _rhs_layout: &Layout<'a>, op: LowLevel, @@ -4519,8 +4586,23 @@ fn build_int_binop<'a, 'ctx, 'env>( match op { NumAdd => { let context = env.context; + + let intrinsic = match lhs_layout { + Layout::Builtin(Builtin::Int8) => LLVM_SADD_WITH_OVERFLOW_I8, + Layout::Builtin(Builtin::Int16) => LLVM_SADD_WITH_OVERFLOW_I16, + Layout::Builtin(Builtin::Int32) => LLVM_SADD_WITH_OVERFLOW_I32, + Layout::Builtin(Builtin::Int64) => LLVM_SADD_WITH_OVERFLOW_I64, + Layout::Builtin(Builtin::Int128) => LLVM_SADD_WITH_OVERFLOW_I128, + Layout::Builtin(Builtin::Usize) => match env.ptr_bytes { + 4 => LLVM_SADD_WITH_OVERFLOW_I32, + 8 => LLVM_SADD_WITH_OVERFLOW_I64, + other => panic!("invalid ptr_bytes {}", other), + }, + _ => unreachable!(), + }; + let result = env - .call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]) + .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()]) .into_struct_value(); let add_result = bd.build_extract_value(result, 0, "add_result").unwrap(); @@ -4550,8 +4632,23 @@ fn build_int_binop<'a, 'ctx, 'env>( NumAddChecked => env.call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]), NumSub => { let context = env.context; + + let intrinsic = match lhs_layout { + Layout::Builtin(Builtin::Int8) => LLVM_SSUB_WITH_OVERFLOW_I8, + Layout::Builtin(Builtin::Int16) => LLVM_SSUB_WITH_OVERFLOW_I16, + Layout::Builtin(Builtin::Int32) => LLVM_SSUB_WITH_OVERFLOW_I32, + Layout::Builtin(Builtin::Int64) => LLVM_SSUB_WITH_OVERFLOW_I64, + Layout::Builtin(Builtin::Int128) => LLVM_SSUB_WITH_OVERFLOW_I128, + Layout::Builtin(Builtin::Usize) => match env.ptr_bytes { + 4 => LLVM_SSUB_WITH_OVERFLOW_I32, + 8 => LLVM_SSUB_WITH_OVERFLOW_I64, + other => panic!("invalid ptr_bytes {}", other), + }, + _ => unreachable!("invalid layout {:?}", lhs_layout), + }; + let result = env - .call_intrinsic(LLVM_SSUB_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]) + .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()]) .into_struct_value(); let sub_result = bd.build_extract_value(result, 0, "sub_result").unwrap(); From 9116e9e8c96cfdeadd7c9cd7b78096c75d5b5b10 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:52:32 +0100 Subject: [PATCH 06/33] add Str.toBytes --- compiler/builtins/bitcode/src/main.zig | 1 + compiler/builtins/bitcode/src/str.zig | 21 +++++++++++++++++++++ compiler/builtins/src/bitcode.rs | 1 + compiler/builtins/src/std.rs | 6 ++++++ compiler/can/src/builtins.rs | 7 +++++++ compiler/gen/src/llvm/build.rs | 14 ++++++++++++-- compiler/gen/src/llvm/build_str.rs | 22 ++++++++++++++++++++++ compiler/module/src/low_level.rs | 1 + compiler/module/src/symbol.rs | 1 + compiler/mono/src/borrow.rs | 1 + 10 files changed, 73 insertions(+), 2 deletions(-) diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index 7df2061ed9..e124afe62c 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -68,6 +68,7 @@ comptime { exportStrFn(str.strFromFloatC, "from_float"); exportStrFn(str.strEqual, "equal"); exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes"); + exportStrFn(str.strToBytesC, "to_bytes"); } // Export helpers - Must be run inside a comptime diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 32057d35d0..a752a37f0e 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1,4 +1,5 @@ const utils = @import("utils.zig"); +const RocList = @import("list.zig").RocList; const std = @import("std"); const mem = std.mem; const always_inline = std.builtin.CallOptions.Modifier.always_inline; @@ -961,6 +962,26 @@ test "RocStr.joinWith: result is big" { expect(roc_result.eq(result)); } +// Str.toBytes +pub fn strToBytesC(arg: RocStr) callconv(.C) RocList { + return @call(.{ .modifier = always_inline }, strToBytes, .{ std.heap.c_allocator, arg }); +} + +fn strToBytes(allocator: *Allocator, arg: RocStr) RocList { + if (arg.isEmpty()) { + return RocList.empty(); + } else if (arg.isSmallStr()) { + const length = arg.len(); + const ptr = utils.allocateWithRefcount(allocator, @alignOf(usize), length); + + @memcpy(ptr, arg.asU8ptr(), length); + + return RocList{ .length = length, .bytes = ptr }; + } else { + return RocList{ .length = arg.len(), .bytes = arg.str_bytes }; + } +} + pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool { const bytes: []u8 = ptr[0..len]; return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes}); diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index 134bce68ad..b19117e106 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -42,6 +42,7 @@ pub const STR_FROM_INT: &str = "roc_builtins.str.from_int"; pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes"; +pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 5a3b499dd7..5de65ca3dd 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -623,6 +623,12 @@ pub fn types() -> MutMap { ), ); + // toBytes : Str -> List U8 + add_type( + Symbol::STR_TO_BYTES, + top_level_function(vec![str_type()], Box::new(list_type(u8_type()))), + ); + // fromFloat : Float a -> Str add_type( Symbol::STR_FROM_FLOAT, diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index af38527fcc..7db64b296f 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -62,6 +62,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_COUNT_GRAPHEMES => str_count_graphemes, STR_FROM_INT => str_from_int, STR_FROM_UTF8 => str_from_utf8, + STR_TO_BYTES => str_to_bytes, STR_FROM_FLOAT=> str_from_float, LIST_LEN => list_len, LIST_GET => list_get, @@ -196,6 +197,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap { Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes, Symbol::STR_FROM_INT => str_from_int, Symbol::STR_FROM_UTF8 => str_from_utf8, + Symbol::STR_TO_BYTES => str_to_bytes, Symbol::STR_FROM_FLOAT=> str_from_float, Symbol::LIST_LEN => list_len, Symbol::LIST_GET => list_get, @@ -1655,6 +1657,11 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { ) } +/// Str.toBytes : Str -> List U8 +fn str_to_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_1(symbol, LowLevel::StrToBytes, var_store) +} + /// Str.fromFloat : Float * -> Str fn str_from_float(symbol: Symbol, var_store: &mut VarStore) -> Def { let float_var = var_store.fresh(); diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 31fc0ce6c3..506dec1253 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -12,7 +12,7 @@ use crate::llvm::build_list::{ }; use crate::llvm::build_str::{ str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_from_utf8, - str_join_with, str_number_of_bytes, str_split, str_starts_with, CHAR_LAYOUT, + str_join_with, str_number_of_bytes, str_split, str_starts_with, str_to_bytes, CHAR_LAYOUT, }; use crate::llvm::compare::{generic_eq, generic_neq}; use crate::llvm::convert::{ @@ -3611,13 +3611,23 @@ fn run_low_level<'a, 'ctx, 'env>( str_from_float(env, scope, args[0]) } StrFromUtf8 => { - // Str.fromInt : Int -> Str + // Str.fromUtf8 : List U8 -> Result Str Utf8Problem debug_assert_eq!(args.len(), 1); let original_wrapper = load_symbol(scope, &args[0]).into_struct_value(); str_from_utf8(env, parent, original_wrapper) } + StrToBytes => { + // Str.fromInt : Str -> List U8 + debug_assert_eq!(args.len(), 1); + + // this is an identity conversion + // we just implement it here to subvert the type system + let string = load_symbol(scope, &args[0]); + + str_to_bytes(env, string.into_struct_value()) + } StrSplit => { // Str.split : Str, Str -> List Str debug_assert_eq!(args.len(), 2); diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs index be27698163..301b726fb3 100644 --- a/compiler/gen/src/llvm/build_str.rs +++ b/compiler/gen/src/llvm/build_str.rs @@ -275,6 +275,28 @@ pub fn str_from_int<'a, 'ctx, 'env>( zig_str_to_struct(env, zig_result).into() } +/// Str.toBytes : Str -> List U8 +pub fn str_to_bytes<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + original_wrapper: StructValue<'ctx>, +) -> BasicValueEnum<'ctx> { + let string = complex_bitcast( + env.builder, + original_wrapper.into(), + env.context.i128_type().into(), + "to_bytes", + ); + + let zig_result = call_bitcode_fn(env, &[string], &bitcode::STR_TO_BYTES); + + complex_bitcast( + env.builder, + zig_result, + collection(env.context, env.ptr_bytes).into(), + "to_bytes", + ) +} + /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 } pub fn str_from_utf8<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs index 640b8c8bca..07422cd4d0 100644 --- a/compiler/module/src/low_level.rs +++ b/compiler/module/src/low_level.rs @@ -12,6 +12,7 @@ pub enum LowLevel { StrCountGraphemes, StrFromInt, StrFromUtf8, + StrToBytes, StrFromFloat, ListLen, ListGetUnsafe, diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 62f5a9d457..2497e9cfd3 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -882,6 +882,7 @@ define_builtins! { 12 STR_FROM_UTF8: "fromUtf8" 13 STR_UT8_PROBLEM: "Utf8Problem" // the Utf8Problem type alias 14 STR_UT8_BYTE_PROBLEM: "Utf8ByteProblem" // the Utf8ByteProblem type alias + 15 STR_TO_BYTES: "toBytes" } 4 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index c0d4f1e091..c87f0b4e05 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -676,6 +676,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { } StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]), StrFromUtf8 => arena.alloc_slice_copy(&[owned]), + StrToBytes => arena.alloc_slice_copy(&[owned]), StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]), Hash => arena.alloc_slice_copy(&[borrowed, irrelevant]), DictSize => arena.alloc_slice_copy(&[borrowed]), From e218279f42e473689222200bc1ca67649d757391 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 19:54:32 +0100 Subject: [PATCH 07/33] add example files --- examples/benchmarks/Base64.roc | 164 ++++++++++++++++++++++++++++ examples/benchmarks/BytesDecode.roc | 106 ++++++++++++++++++ 2 files changed, 270 insertions(+) create mode 100644 examples/benchmarks/Base64.roc create mode 100644 examples/benchmarks/BytesDecode.roc diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc new file mode 100644 index 0000000000..e9dc460cb3 --- /dev/null +++ b/examples/benchmarks/Base64.roc @@ -0,0 +1,164 @@ +app "base64" + packages { base: "platform" } + imports [base.Task, BytesDecode.{Decoder} ] + provides [ main ] to base + +IO a : Task.Task a [] + +Decoder a : BytesDecode.Decoder a + +main : IO {} +main = + # when fromBytes [ 0 ] is + when fromBytes (Str.toBytes "Hello World") is + Ok str -> + Task.putLine str + + Err _ -> + Task.putLine "sadness" + + + + + +# ------ + + +fromBytes : List U8 -> Result Str BytesDecode.DecodeError +fromBytes = \bytes -> + BytesDecode.decode bytes (decodeBase64 (List.len bytes)) + + +decodeBase64 : Nat -> BytesDecode.Decoder Str +decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string: "" } + +loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str) +loopHelp = \{ remaining, string } -> + if remaining >= 3 then + helper = \x, y, z -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + c : U32 + c = Num.intCast z + combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c + Loop + { + remaining: remaining - 3, + string: Str.concat string (bitsToChars combined 0) + } + + BytesDecode.map3 helper + BytesDecode.u8 + BytesDecode.u8 + BytesDecode.u8 + + else if remaining == 0 then + BytesDecode.succeed (Done string) + + else if remaining == 2 then + helperX = \x, y -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) + Done (Str.concat string (bitsToChars combined 1)) + + BytesDecode.map2 helperX + BytesDecode.u8 + BytesDecode.u8 + else + # remaining = 1 + BytesDecode.u8 + |> BytesDecode.map (\x -> + a : U32 + a = Num.intCast x + Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) + + +bitsToChars : U32, Int * -> Str +bitsToChars = \bits, missing -> + when Str.fromUtf8 (bitsToCharsHelp bits missing) is + Ok str -> str + Err _ -> "" + +# Mask that can be used to get the lowest 6 bits of a binary number +lowest6BitsMask : Int * +lowest6BitsMask = 63 + + +bitsToCharsHelp : U32, Int * -> List U8 +bitsToCharsHelp = \bits, missing -> + # Performance Notes + # `String.cons` proved to be the fastest way of combining characters into a string + # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940 + # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by + # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` + # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. + + # any 6-bit number is a valid base64 digit, so this is actually safe + p = + Num.shiftRightZfBy 18 bits + |> Num.intCast + |> unsafeToChar + + q = + Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + r = + Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + s = + Num.bitwiseAnd bits lowest6BitsMask + |> Num.intCast + |> unsafeToChar + + equals : U8 + equals = 61 + + when missing is + 0 -> + [ p, q, r, s ] + 1 -> + [ p, q, r, equals ] + 2 -> + [ p, q, equals , equals ] + _ -> + # unreachable + [] + +# Base64 index to character/digit +unsafeToChar : U8 -> U8 +unsafeToChar = \n -> + if n <= 25 then + # uppercase characters + 65 + n + + else if n <= 51 then + # lowercase characters + 97 + (n - 26) + + else if n <= 61 then + # digit characters + 48 + (n - 52) + + else + # special cases + when n is + 62 -> + # '+' + 43 + + 63 -> + # '/' + 47 + + _ -> + # anything else is invalid '\u{0000}' + 0 diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/BytesDecode.roc new file mode 100644 index 0000000000..03a341c4ff --- /dev/null +++ b/examples/benchmarks/BytesDecode.roc @@ -0,0 +1,106 @@ +interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] + +State : { bytes: List U8, cursor : Nat } + +DecodeError : [ OutOfBytes ] + + +Decoder a : [ @Decoder (State -> [Good State a, Bad DecodeError]) ] + +decode : List U8, Decoder a -> Result a DecodeError +decode = \bytes, @Decoder decoder -> + when decoder { bytes, cursor: 0 } is + Good _ value -> + Ok value + + Bad e -> + Err e + +succeed : a -> Decoder a +succeed = \value -> @Decoder \state -> Good state value + +map : Decoder a, (a -> b) -> Decoder b +map = \@Decoder decoder, transform -> + @Decoder \state -> + when decoder state is + Good state1 value -> + Good state1 (transform value) + + Bad e -> + Bad e + + +map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c +map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> + @Decoder \state1 -> + when decoder1 state1 is + Good state2 a -> + when decoder2 state2 is + Good state3 b -> + Good state3 (transform a b) + + Bad e -> + Bad e + + Bad e -> + Bad e + +map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d +map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> + @Decoder \state1 -> + when decoder1 state1 is + Good state2 a -> + when decoder2 state2 is + Good state3 b -> + when decoder3 state3 is + Good state4 c -> + Good state4 (transform a b c) + + Bad e -> + Bad e + + Bad e -> + Bad e + + Bad e -> + Bad e + +after : Decoder a, (a -> Decoder b) -> Decoder b +after = \@Decoder decoder, transform -> + @Decoder \state -> + when decoder state is + Good state1 value -> + (@Decoder decoder1) = transform value + decoder1 state1 + + + Bad e -> + Bad e + +u8 : Decoder U8 +u8 = @Decoder \state -> + when List.get state.bytes state.cursor is + Ok b -> + Good { state & cursor: state.cursor + 1 } b + + Err _ -> + Bad OutOfBytes + +Step state b : [ Loop state, Done b ] + +loop : (state -> Decoder (Step state a)), state -> Decoder a +loop = \stepper, initial -> + @Decoder \state -> + loopHelp stepper initial state + +loopHelp = \stepper, accum, state -> + (@Decoder stepper1) = stepper accum + when stepper1 state is + Good newState (Done value) -> + Good newState value + + Good newState (Loop newAccum) -> + loopHelp stepper newAccum newState + + Bad e -> + Bad e From c4ddeefed96e741b6208d38ad47d5ee78253e515 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:45:17 +0100 Subject: [PATCH 08/33] add test of integer type inference let polymorphism --- compiler/solve/tests/solve_expr.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index 229ae772a7..ba5d0519c3 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -4315,4 +4315,26 @@ mod solve_expr { "Str", ); } + + #[test] + fn int_type_let_polymorphism() { + infer_eq_without_problem( + indoc!( + r#" + app "test" provides [ main ] to "./platform" + + x = 4 + + f : U8 -> U32 + f = \z -> Num.intCast z + + y = f x + + main = + x + "# + ), + "Num *", + ); + } } From ad96d1ae24202c8cbe7ce170b051b9cd78c692e9 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:45:26 +0100 Subject: [PATCH 09/33] trim comment --- examples/benchmarks/Base64.roc | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index e9dc460cb3..cfcc364334 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -91,9 +91,6 @@ lowest6BitsMask = 63 bitsToCharsHelp : U32, Int * -> List U8 bitsToCharsHelp = \bits, missing -> - # Performance Notes - # `String.cons` proved to be the fastest way of combining characters into a string - # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940 # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. From 86cf7cd983f92cb2a9bf8d9b443528bf8040f3c3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 20:52:06 +0100 Subject: [PATCH 10/33] move file --- examples/benchmarks/Base64.roc | 34 +++++++++---------- .../{BytesDecode.roc => Bytes/Decode.roc} | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) rename examples/benchmarks/{BytesDecode.roc => Bytes/Decode.roc} (95%) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index cfcc364334..92b30f1555 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -1,11 +1,11 @@ app "base64" packages { base: "platform" } - imports [base.Task, BytesDecode.{Decoder} ] + imports [base.Task, Bytes.Decode.{Decoder} ] provides [ main ] to base IO a : Task.Task a [] -Decoder a : BytesDecode.Decoder a +Decoder a : Bytes.Decode.Decoder a main : IO {} main = @@ -24,15 +24,15 @@ main = # ------ -fromBytes : List U8 -> Result Str BytesDecode.DecodeError +fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError fromBytes = \bytes -> - BytesDecode.decode bytes (decodeBase64 (List.len bytes)) + Bytes.Decode.decode bytes (decodeBase64 (List.len bytes)) -decodeBase64 : Nat -> BytesDecode.Decoder Str -decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string: "" } +decodeBase64 : Nat -> Bytes.Decode.Decoder Str +decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: "" } -loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str) +loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then helper = \x, y, z -> @@ -49,13 +49,13 @@ loopHelp = \{ remaining, string } -> string: Str.concat string (bitsToChars combined 0) } - BytesDecode.map3 helper - BytesDecode.u8 - BytesDecode.u8 - BytesDecode.u8 + Bytes.Decode.map3 helper + Bytes.Decode.u8 + Bytes.Decode.u8 + Bytes.Decode.u8 else if remaining == 0 then - BytesDecode.succeed (Done string) + Bytes.Decode.succeed (Done string) else if remaining == 2 then helperX = \x, y -> @@ -66,13 +66,13 @@ loopHelp = \{ remaining, string } -> combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) Done (Str.concat string (bitsToChars combined 1)) - BytesDecode.map2 helperX - BytesDecode.u8 - BytesDecode.u8 + Bytes.Decode.map2 helperX + Bytes.Decode.u8 + Bytes.Decode.u8 else # remaining = 1 - BytesDecode.u8 - |> BytesDecode.map (\x -> + Bytes.Decode.u8 + |> Bytes.Decode.map (\x -> a : U32 a = Num.intCast x Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/Bytes/Decode.roc similarity index 95% rename from examples/benchmarks/BytesDecode.roc rename to examples/benchmarks/Bytes/Decode.roc index 03a341c4ff..db14d857a6 100644 --- a/examples/benchmarks/BytesDecode.roc +++ b/examples/benchmarks/Bytes/Decode.roc @@ -1,4 +1,4 @@ -interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] +interface Bytes.Decode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports [] State : { bytes: List U8, cursor : Nat } From c4972f45baaa1dae44736079e644594a2081d4b8 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:12:47 +0100 Subject: [PATCH 11/33] bit shift tests --- compiler/gen/tests/gen_num.rs | 25 +++++++++++++++++++++++++ examples/benchmarks/Base64.roc | 4 ---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs index 262b8495b9..5fb442927c 100644 --- a/compiler/gen/tests/gen_num.rs +++ b/compiler/gen/tests/gen_num.rs @@ -1343,4 +1343,29 @@ mod gen_num { f64 ); } + + #[test] + fn shift_left_by() { + assert_evals_to!("Num.shiftLeftBy 0 0b0000_0001", 0b0000_0001, i64); + assert_evals_to!("Num.shiftLeftBy 1 0b0000_0001", 0b0000_0010, i64); + assert_evals_to!("Num.shiftLeftBy 2 0b0000_0011", 0b0000_1100, i64); + } + + #[test] + #[ignore] + fn shift_right_by() { + // Sign Extended Right Shift + assert_evals_to!("Num.shiftRightBy 0 0b0100_0000i8", 0b0001_0000, i8); + assert_evals_to!("Num.shiftRightBy 1 0b1110_0000u8", 0b1111_0000u8 as i8, i8); + assert_evals_to!("Num.shiftRightBy 2 0b1100_0000u8", 0b1111_0000u8 as i8, i8); + } + + #[test] + #[ignore] + fn shift_right_zf_by() { + // Logical Right Shift + assert_evals_to!("Num.shiftRightBy 1 0b1100_0000u8", 0b0011_0000, i64); + assert_evals_to!("Num.shiftRightBy 2 0b0000_0010u8", 0b0000_0001, i64); + assert_evals_to!("Num.shiftRightBy 3 0b0000_1100u8", 0b0000_0011, i64); + } } diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 92b30f1555..1fed392288 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -17,10 +17,6 @@ main = Err _ -> Task.putLine "sadness" - - - - # ------ From 1746b8da6f5cb3662bf26941b2ffef162eebbe29 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:14:20 +0100 Subject: [PATCH 12/33] bitwise or test --- compiler/gen/tests/gen_num.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs index 5fb442927c..f6976de9e0 100644 --- a/compiler/gen/tests/gen_num.rs +++ b/compiler/gen/tests/gen_num.rs @@ -750,6 +750,12 @@ mod gen_num { assert_evals_to!("Num.bitwiseXor 200 0", 200, i64); } + #[test] + fn bitwise_or() { + assert_evals_to!("Num.bitwiseOr 1 1", 1, i64); + assert_evals_to!("Num.bitwiseOr 1 2", 3, i64); + } + #[test] fn lt_i64() { assert_evals_to!("1 < 2", true, bool); From 63091392f55398924d7b688de9ca20af230f3db9 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 21 Feb 2021 21:18:15 +0100 Subject: [PATCH 13/33] toBytes test --- compiler/gen/tests/gen_str.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/gen/tests/gen_str.rs b/compiler/gen/tests/gen_str.rs index dafdbf4b60..53bfa1f53e 100644 --- a/compiler/gen/tests/gen_str.rs +++ b/compiler/gen/tests/gen_str.rs @@ -816,4 +816,17 @@ mod gen_str { fn str_from_float() { assert_evals_to!(r#"Str.fromFloat 3.14"#, RocStr::from("3.140000"), RocStr); } + + #[test] + fn str_to_bytes() { + assert_evals_to!(r#"Str.toBytes "hello""#, &[104, 101, 108, 108, 111], &[u8]); + assert_evals_to!( + r#"Str.toBytes "this is a long string""#, + &[ + 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 108, 111, 110, 103, 32, 115, 116, + 114, 105, 110, 103 + ], + &[u8] + ); + } } From 30ecd378a07e63ffa52a0a5efe47e54f446a7057 Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 14:31:48 +0100 Subject: [PATCH 14/33] refactor parse AST to allow multiple if branches --- compiler/can/src/expr.rs | 39 ++++++--- compiler/can/src/operator.rs | 21 +++-- compiler/fmt/src/expr.rs | 164 +++++++++++++++++++---------------- compiler/parse/src/ast.rs | 2 +- compiler/parse/src/expr.rs | 5 +- editor/src/lang/expr.rs | 23 +++-- 6 files changed, 145 insertions(+), 109 deletions(-) diff --git a/compiler/can/src/expr.rs b/compiler/can/src/expr.rs index a2248b7391..d1718703e6 100644 --- a/compiler/can/src/expr.rs +++ b/compiler/can/src/expr.rs @@ -674,32 +674,43 @@ pub fn canonicalize_expr<'a>( Output::default(), ) } - ast::Expr::If(cond, then_branch, else_branch) => { - let (loc_cond, mut output) = - canonicalize_expr(env, var_store, scope, cond.region, &cond.value); - let (loc_then, then_output) = canonicalize_expr( - env, - var_store, - scope, - then_branch.region, - &then_branch.value, - ); + ast::Expr::If(if_thens, final_else_branch) => { + let mut branches = Vec::with_capacity(1); + let mut output = Output::default(); + + for (condition, then_branch) in if_thens.iter() { + let (loc_cond, cond_output) = + canonicalize_expr(env, var_store, scope, condition.region, &condition.value); + + let (loc_then, then_output) = canonicalize_expr( + env, + var_store, + scope, + then_branch.region, + &then_branch.value, + ); + + branches.push((loc_cond, loc_then)); + + output.references = output.references.union(cond_output.references); + output.references = output.references.union(then_output.references); + } + let (loc_else, else_output) = canonicalize_expr( env, var_store, scope, - else_branch.region, - &else_branch.value, + final_else_branch.region, + &final_else_branch.value, ); - output.references = output.references.union(then_output.references); output.references = output.references.union(else_output.references); ( If { cond_var: var_store.fresh(), branch_var: var_store.fresh(), - branches: vec![(loc_cond, loc_then)], + branches, final_else: Box::new(loc_else), }, output, diff --git a/compiler/can/src/operator.rs b/compiler/can/src/operator.rs index 5f9d73bb70..048c2b9d83 100644 --- a/compiler/can/src/operator.rs +++ b/compiler/can/src/operator.rs @@ -290,16 +290,21 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located>) -> &'a }), ) } - If(condition, then_branch, else_branch) - | Nested(If(condition, then_branch, else_branch)) => { - // If does not get desugared yet so we can give more targetted error messages during - // type checking. - let desugared_cond = &*arena.alloc(desugar_expr(arena, &condition)); - let desugared_then = &*arena.alloc(desugar_expr(arena, &then_branch)); - let desugared_else = &*arena.alloc(desugar_expr(arena, &else_branch)); + If(if_thens, final_else_branch) | Nested(If(if_thens, final_else_branch)) => { + // If does not get desugared into `when` so we can give more targetted error messages during type checking. + let desugared_final_else = &*arena.alloc(desugar_expr(arena, &final_else_branch)); + + let mut desugared_if_thens = Vec::with_capacity_in(if_thens.len(), arena); + + for (condition, then_branch) in if_thens.iter() { + desugared_if_thens.push(( + desugar_expr(arena, condition).clone(), + desugar_expr(arena, then_branch).clone(), + )); + } arena.alloc(Located { - value: If(desugared_cond, desugared_then, desugared_else), + value: If(desugared_if_thens.into_bump_slice(), desugared_final_else), region: loc_expr.region, }) } diff --git a/compiler/fmt/src/expr.rs b/compiler/fmt/src/expr.rs index 1be56ee514..80056cce7f 100644 --- a/compiler/fmt/src/expr.rs +++ b/compiler/fmt/src/expr.rs @@ -58,8 +58,11 @@ impl<'a> Formattable<'a> for Expr<'a> { loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline()) } - If(loc_cond, loc_if_true, loc_if_false) => { - loc_cond.is_multiline() || loc_if_true.is_multiline() || loc_if_false.is_multiline() + If(branches, final_else) => { + final_else.is_multiline() + || branches + .iter() + .any(|(c, t)| c.is_multiline() || t.is_multiline()) } BinOp((loc_left, _, loc_right)) => { @@ -257,8 +260,8 @@ impl<'a> Formattable<'a> for Expr<'a> { // still print the return value. ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent); } - If(loc_condition, loc_then, loc_else) => { - fmt_if(buf, loc_condition, loc_then, loc_else, indent); + If(branches, final_else) => { + fmt_if(buf, branches, final_else, self.is_multiline(), indent); } When(loc_condition, branches) => fmt_when(buf, loc_condition, branches, indent), List { @@ -629,15 +632,15 @@ fn fmt_when<'a>( fn fmt_if<'a>( buf: &mut String<'a>, - loc_condition: &'a Located>, - loc_then: &'a Located>, - loc_else: &'a Located>, + branches: &'a [(Located>, Located>)], + final_else: &'a Located>, + is_multiline: bool, indent: u16, ) { - let is_multiline_then = loc_then.is_multiline(); - let is_multiline_else = loc_else.is_multiline(); - let is_multiline_condition = loc_condition.is_multiline(); - let is_multiline = is_multiline_then || is_multiline_else || is_multiline_condition; + // let is_multiline_then = loc_then.is_multiline(); + // let is_multiline_else = final_else.is_multiline(); + // let is_multiline_condition = loc_condition.is_multiline(); + // let is_multiline = is_multiline_then || is_multiline_else || is_multiline_condition; let return_indent = if is_multiline { indent + INDENT @@ -645,80 +648,89 @@ fn fmt_if<'a>( indent }; - buf.push_str("if"); + for (loc_condition, loc_then) in branches.iter() { + let is_multiline_condition = loc_condition.is_multiline(); - if is_multiline_condition { - match &loc_condition.value { - Expr::SpaceBefore(expr_below, spaces_above_expr) => { - fmt_comments_only(buf, spaces_above_expr.iter(), NewlineAt::Top, return_indent); - newline(buf, return_indent); + buf.push_str("if"); - match &expr_below { - Expr::SpaceAfter(expr_above, spaces_below_expr) => { - expr_above.format(buf, return_indent); - fmt_comments_only( - buf, - spaces_below_expr.iter(), - NewlineAt::Top, - return_indent, - ); - newline(buf, indent); - } + if is_multiline_condition { + match &loc_condition.value { + Expr::SpaceBefore(expr_below, spaces_above_expr) => { + fmt_comments_only(buf, spaces_above_expr.iter(), NewlineAt::Top, return_indent); + newline(buf, return_indent); - _ => { - expr_below.format(buf, return_indent); + match &expr_below { + Expr::SpaceAfter(expr_above, spaces_below_expr) => { + expr_above.format(buf, return_indent); + fmt_comments_only( + buf, + spaces_below_expr.iter(), + NewlineAt::Top, + return_indent, + ); + newline(buf, indent); + } + + _ => { + expr_below.format(buf, return_indent); + } } } - } - Expr::SpaceAfter(expr_above, spaces_below_expr) => { - newline(buf, return_indent); - expr_above.format(buf, return_indent); - fmt_comments_only(buf, spaces_below_expr.iter(), NewlineAt::Top, return_indent); - newline(buf, indent); - } + Expr::SpaceAfter(expr_above, spaces_below_expr) => { + newline(buf, return_indent); + expr_above.format(buf, return_indent); + fmt_comments_only(buf, spaces_below_expr.iter(), NewlineAt::Top, return_indent); + newline(buf, indent); + } - _ => { - newline(buf, return_indent); - loc_condition.format(buf, return_indent); - newline(buf, indent); - } - } - } else { - buf.push(' '); - loc_condition.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent); - buf.push(' '); - } - - buf.push_str("then"); - - if is_multiline { - match &loc_then.value { - Expr::SpaceBefore(expr_below, spaces_below) => { - // we want exactly one newline, user-inserted extra newlines are ignored. - newline(buf, return_indent); - fmt_comments_only(buf, spaces_below.iter(), NewlineAt::Bottom, return_indent); - - match &expr_below { - Expr::SpaceAfter(expr_above, spaces_above) => { - expr_above.format(buf, return_indent); - - fmt_comments_only(buf, spaces_above.iter(), NewlineAt::Top, return_indent); - newline(buf, indent); - } - - _ => { - expr_below.format(buf, return_indent); - } + _ => { + newline(buf, return_indent); + loc_condition.format(buf, return_indent); + newline(buf, indent); } } - _ => { - loc_condition.format(buf, return_indent); - } + } else { + buf.push(' '); + loc_condition.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent); + buf.push(' '); + } + + buf.push_str("then"); + + if is_multiline { + match &loc_then.value { + Expr::SpaceBefore(expr_below, spaces_below) => { + // we want exactly one newline, user-inserted extra newlines are ignored. + newline(buf, return_indent); + fmt_comments_only(buf, spaces_below.iter(), NewlineAt::Bottom, return_indent); + + match &expr_below { + Expr::SpaceAfter(expr_above, spaces_above) => { + expr_above.format(buf, return_indent); + + fmt_comments_only( + buf, + spaces_above.iter(), + NewlineAt::Top, + return_indent, + ); + newline(buf, indent); + } + + _ => { + expr_below.format(buf, return_indent); + } + } + } + _ => { + loc_condition.format(buf, return_indent); + } + } + } else { + buf.push_str(" "); + loc_then.format(buf, return_indent); } - } else { - buf.push_str(" "); - loc_then.format(buf, return_indent); } if is_multiline { @@ -728,7 +740,7 @@ fn fmt_if<'a>( buf.push_str(" else "); } - loc_else.format(buf, return_indent); + final_else.format(buf, return_indent); } pub fn fmt_closure<'a>( diff --git a/compiler/parse/src/ast.rs b/compiler/parse/src/ast.rs index 70964e246e..f33ba8149b 100644 --- a/compiler/parse/src/ast.rs +++ b/compiler/parse/src/ast.rs @@ -127,7 +127,7 @@ pub enum Expr<'a> { UnaryOp(&'a Loc>, Loc), // Conditionals - If(&'a Loc>, &'a Loc>, &'a Loc>), + If(&'a [(Loc>, Loc>)], &'a Loc>), When( /// The condition &'a Loc>, diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 8d187f10fb..5f05d89b17 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -324,7 +324,7 @@ pub fn expr_to_pattern<'a>( | Expr::Closure(_, _) | Expr::BinOp(_) | Expr::Defs(_, _) - | Expr::If(_, _, _) + | Expr::If(_, _) | Expr::When(_, _) | Expr::MalformedClosure | Expr::PrecedenceConflict(_, _, _, _) @@ -1264,8 +1264,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a> ), |arena: &'a Bump, (condition, (then_branch, else_branch))| { Expr::If( - &*arena.alloc(condition), - &*arena.alloc(then_branch), + arena.alloc([(condition, then_branch)]), &*arena.alloc(else_branch), ) } diff --git a/editor/src/lang/expr.rs b/editor/src/lang/expr.rs index 83791facb1..3cc1a4f912 100644 --- a/editor/src/lang/expr.rs +++ b/editor/src/lang/expr.rs @@ -508,22 +508,31 @@ pub fn to_expr2<'a>( Output::default(), ), - If(cond, then_branch, else_branch) => { - let (cond, mut output) = to_expr2(env, scope, &cond.value, cond.region); + If(branches, final_else) => { + let mut new_branches = Vec::with_capacity(branches.len()); + let mut output = Output::default(); - let (then_expr, then_output) = - to_expr2(env, scope, &then_branch.value, then_branch.region); + for (condition, then_branch) in branches.iter() { + let (cond, cond_output) = to_expr2(env, scope, &condition.value, condition.region); + + let (then_expr, then_output) = + to_expr2(env, scope, &then_branch.value, then_branch.region); + + output.references.union_mut(cond_output.references); + output.references.union_mut(then_output.references); + + new_branches.push((cond, then_expr)); + } let (else_expr, else_output) = - to_expr2(env, scope, &else_branch.value, else_branch.region); + to_expr2(env, scope, &final_else.value, final_else.region); - output.references.union_mut(then_output.references); output.references.union_mut(else_output.references); let expr = Expr2::If { cond_var: env.var_store.fresh(), expr_var: env.var_store.fresh(), - branches: PoolVec::new(vec![(cond, then_expr)].into_iter(), env.pool), + branches: PoolVec::new(new_branches.into_iter(), env.pool), final_else: env.pool.add(else_expr), }; From 5d8944fc6a4a9ca6910219bf66d6e46369fe334b Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 15:05:25 +0100 Subject: [PATCH 15/33] use new parser for If --- compiler/parse/src/expr.rs | 90 ++++++++++++++++++++++-------------- compiler/parse/src/parser.rs | 20 ++++++++ 2 files changed, 76 insertions(+), 34 deletions(-) diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 5f05d89b17..7fe80a1b02 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -11,7 +11,7 @@ use crate::number_literal::number_literal; use crate::parser::{ self, allocated, and_then_with_indent_level, ascii_char, ascii_string, attempt, backtrackable, fail, map, newline_char, not, not_followed_by, optional, sep_by1, specialize, specialize_ref, - then, unexpected, unexpected_eof, word1, word2, EExpr, Either, ParseResult, Parser, State, + then, unexpected, unexpected_eof, word1, word2, EExpr, Either, If, ParseResult, Parser, State, SyntaxError, When, }; use crate::pattern::loc_closure_param; @@ -1234,40 +1234,62 @@ mod when { } } -pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - map_with_arena!( - and!( - skip_first!( - parser::keyword(keyword::IF, min_indent), - space1_around( - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - min_indent, - ) +pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { + move |arena: &'a Bump, state| { + let (_, _, state) = parser::keyword_e(keyword::IF, If::If).parse(arena, state)?; + + let mut branches = Vec::with_capacity_in(1, arena); + + let (_, cond, state) = space0_around_e( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), ), - and!( - skip_first!( - parser::keyword(keyword::THEN, min_indent), - space1_around( - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - min_indent, - ) - ), - skip_first!( - parser::keyword(keyword::ELSE, min_indent), - // NOTE changed this from space1_around to space1_before - space1_before( - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - min_indent, - ) - ) - ) - ), - |arena: &'a Bump, (condition, (then_branch, else_branch))| { - Expr::If( - arena.alloc([(condition, then_branch)]), - &*arena.alloc(else_branch), - ) - } + min_indent, + If::Space, + If::IndentCondition, + ) + .parse(arena, state)?; + + let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then).parse(arena, state)?; + + let (_, then_branch, state) = space0_around_e( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentThen, + ) + .parse(arena, state)?; + + let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else).parse(arena, state)?; + + branches.push((cond, then_branch)); + + let (_, else_branch, state) = space0_before_e( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentElse, + ) + .parse(arena, state)?; + + // parse the final else + let expr = Expr::If(branches.into_bump_slice(), arena.alloc(else_branch)); + + Ok((MadeProgress, expr, state)) + } +} + +pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { + specialize( + |e, r, c| SyntaxError::Expr(EExpr::If(e, r, c)), + if_expr_help(min_indent), ) } diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index 71d2259bc1..a9da276780 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -378,6 +378,7 @@ pub enum EExpr<'a> { Space(BadInputError, Row, Col), When(When<'a>, Row, Col), + If(If<'a>, Row, Col), // EInParens(PInParens<'a>, Row, Col), IndentStart(Row, Col), @@ -408,6 +409,25 @@ pub enum When<'a> { PatternAlignment(u16, Row, Col), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum If<'a> { + Space(BadInputError, Row, Col), + If(Row, Col), + Then(Row, Col), + Else(Row, Col), + // TODO make EEXpr + Condition(&'a EExpr<'a>, Row, Col), + ThenBranch(&'a EExpr<'a>, Row, Col), + ElseBranch(&'a EExpr<'a>, Row, Col), + Syntax(&'a SyntaxError<'a>, Row, Col), + + IndentCondition(Row, Col), + IndentThen(Row, Col), + IndentElse(Row, Col), + + PatternAlignment(u16, Row, Col), +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum EPattern<'a> { Record(PRecord<'a>, Row, Col), From 3907680536dd608505395a3b82a3d842932cf3ad Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 15:21:19 +0100 Subject: [PATCH 16/33] parse multiple if-then-else pairs into one AST node --- compiler/parse/src/expr.rs | 82 ++++++++++++++-------- compiler/parse/src/parser.rs | 1 + compiler/reporting/tests/test_reporting.rs | 59 ++++++++-------- 3 files changed, 85 insertions(+), 57 deletions(-) diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 7fe80a1b02..84964e164b 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -3,7 +3,7 @@ use crate::ast::{ }; use crate::blankspace::{ line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_e, - space0_before, space0_before_e, space0_e, space1, space1_around, space1_before, spaces_exactly, + space0_before, space0_before_e, space0_e, space1, space1_before, spaces_exactly, }; use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident}; use crate::keyword; @@ -1240,33 +1240,59 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { let mut branches = Vec::with_capacity_in(1, arena); - let (_, cond, state) = space0_around_e( - specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - ), - min_indent, - If::Space, - If::IndentCondition, - ) - .parse(arena, state)?; + let mut loop_state = state; - let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then).parse(arena, state)?; + let state_final_else = loop { + let state = loop_state; + let (_, cond, state) = space0_around_e( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentCondition, + ) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; - let (_, then_branch, state) = space0_around_e( - specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - ), - min_indent, - If::Space, - If::IndentThen, - ) - .parse(arena, state)?; + let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; - let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else).parse(arena, state)?; + let (_, then_branch, state) = space0_around_e( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentThen, + ) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; - branches.push((cond, then_branch)); + let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; + + branches.push((cond, then_branch)); + + // try to parse another `if` + // NOTE this drops spaces between the `else` and the `if` + let optional_if = and!( + backtrackable(space0_e(min_indent, If::Space, If::IndentIf)), + parser::keyword_e(keyword::IF, If::If) + ); + + match optional_if.parse(arena, state) { + Err((_, _, state)) => break state, + Ok((_, _, state)) => { + loop_state = state; + continue; + } + } + }; let (_, else_branch, state) = space0_before_e( specialize_ref( @@ -1277,9 +1303,9 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { If::Space, If::IndentElse, ) - .parse(arena, state)?; + .parse(arena, state_final_else) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; - // parse the final else let expr = Expr::If(branches.into_bump_slice(), arena.alloc(else_branch)); Ok((MadeProgress, expr, state)) @@ -1287,10 +1313,10 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { } pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - specialize( + debug!(specialize( |e, r, c| SyntaxError::Expr(EExpr::If(e, r, c)), if_expr_help(min_indent), - ) + )) } /// This is a helper function for parsing function args. diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index a9da276780..9f6eba16bd 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -422,6 +422,7 @@ pub enum If<'a> { Syntax(&'a SyntaxError<'a>, Row, Col), IndentCondition(Row, Col), + IndentIf(Row, Col), IndentThen(Row, Col), IndentElse(Row, Col), diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index 435f092b1d..66be20f5ca 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -801,35 +801,36 @@ mod test_reporting { ) } - // #[test] - // fn if_3_branch_mismatch() { - // report_problem_as( - // indoc!( - // r#" - // if True then 2 else if False then 2 else "foo" - // "# - // ), - // indoc!( - // r#" - // ── TYPE MISMATCH ─────────────────────────────────────────────────────────────── - - // The 2nd branch of this `if` does not match all the previous branches: - - // 1│ if True then 2 else "foo" - // ^^^^^ - - // The 2nd branch is a string of type - - // Str - - // But all the previous branches have the type - - // Num a - - // "# - // ), - // ) - // } + #[test] + fn if_3_branch_mismatch() { + report_problem_as( + indoc!( + r#" + if True then 2 else if False then 2 else "foo" + "# + ), + indoc!( + r#" + ── TYPE MISMATCH ─────────────────────────────────────────────────────────────── + + The 3rd branch of this `if` does not match all the previous branches: + + 1│ if True then 2 else if False then 2 else "foo" + ^^^^^ + + The 3rd branch is a string of type: + + Str + + But all the previous branches have type: + + Num a + + I need all branches in an `if` to have the same type! + "# + ), + ) + } #[test] fn when_branch_mismatch() { From 6eab8abe9e4b75e2fd5289ca64bbdc4546ede774 Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 18:34:08 +0100 Subject: [PATCH 17/33] improve message for outdented then --- compiler/parse/src/blankspace.rs | 12 +- compiler/parse/src/expr.rs | 88 +++++++------ compiler/parse/src/parser.rs | 11 +- compiler/parse/src/pattern.rs | 5 +- compiler/parse/src/type_annotation.rs | 10 +- compiler/reporting/src/error/parse.rs | 141 ++++++++++++++++++++- compiler/reporting/tests/test_reporting.rs | 60 ++++++++- 7 files changed, 270 insertions(+), 57 deletions(-) diff --git a/compiler/parse/src/blankspace.rs b/compiler/parse/src/blankspace.rs index 10b256b9b8..f2234ebed0 100644 --- a/compiler/parse/src/blankspace.rs +++ b/compiler/parse/src/blankspace.rs @@ -60,11 +60,12 @@ where ) } -pub fn space0_around_e<'a, P, S, E>( +pub fn space0_around_ee<'a, P, S, E>( parser: P, min_indent: u16, space_problem: fn(BadInputError, Row, Col) -> E, - indent_problem: fn(Row, Col) -> E, + indent_before_problem: fn(Row, Col) -> E, + indent_after_problem: fn(Row, Col) -> E, ) -> impl Parser<'a, Located, E> where S: Spaceable<'a>, @@ -75,8 +76,11 @@ where { parser::map_with_arena( and( - space0_e(min_indent, space_problem, indent_problem), - and(parser, space0_e(min_indent, space_problem, indent_problem)), + space0_e(min_indent, space_problem, indent_before_problem), + and( + parser, + space0_e(min_indent, space_problem, indent_after_problem), + ), ), move |arena: &'a Bump, tuples: ( diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 84964e164b..1b01a08edb 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -2,7 +2,7 @@ use crate::ast::{ AssignedField, Attempting, CommentOrNewline, Def, Expr, Pattern, Spaceable, TypeAnnotation, }; use crate::blankspace::{ - line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_e, + line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_ee, space0_before, space0_before_e, space0_e, space1, space1_before, spaces_exactly, }; use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident}; @@ -1029,14 +1029,15 @@ mod when { and!( when_with_indent(), skip_second!( - space0_around_e( + space0_around_ee( loc!(specialize_ref( When::Syntax, move |arena, state| parse_expr(min_indent, arena, state) )), min_indent, When::Space, - When::IndentCondition + When::IndentCondition, + When::IndentIs, ), parser::keyword_e(keyword::IS, When::Is) ) @@ -1182,13 +1183,14 @@ mod when { skip_first!( parser::keyword_e(keyword::IF, When::IfToken), // TODO we should require space before the expression but not after - space0_around_e( + space0_around_ee( loc!(specialize_ref(When::IfGuard, move |arena, state| { parse_expr(min_indent, arena, state) })), min_indent, When::Space, When::IndentIfGuard, + When::IndentArrow, ) ), Some @@ -1234,6 +1236,49 @@ mod when { } } +fn if_branch<'a>( + min_indent: u16, +) -> impl Parser<'a, (Located>, Located>), If<'a>> { + move |arena, state| { + // NOTE: only parse spaces before the expression + let (_, cond, state) = space0_around_ee( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentCondition, + If::IndentThenToken, + ) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; + + let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; + + let (_, then_branch, state) = space0_around_ee( + specialize_ref( + If::Syntax, + loc!(move |arena, state| parse_expr(min_indent, arena, state)), + ), + min_indent, + If::Space, + If::IndentThenBranch, + If::IndentElseToken, + ) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; + + let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else) + .parse(arena, state) + .map_err(|(_, f, s)| (MadeProgress, f, s))?; + + Ok((MadeProgress, (cond, then_branch), state)) + } +} + pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { move |arena: &'a Bump, state| { let (_, _, state) = parser::keyword_e(keyword::IF, If::If).parse(arena, state)?; @@ -1243,38 +1288,7 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { let mut loop_state = state; let state_final_else = loop { - let state = loop_state; - let (_, cond, state) = space0_around_e( - specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - ), - min_indent, - If::Space, - If::IndentCondition, - ) - .parse(arena, state) - .map_err(|(_, f, s)| (MadeProgress, f, s))?; - - let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then) - .parse(arena, state) - .map_err(|(_, f, s)| (MadeProgress, f, s))?; - - let (_, then_branch, state) = space0_around_e( - specialize_ref( - If::Syntax, - loc!(move |arena, state| parse_expr(min_indent, arena, state)), - ), - min_indent, - If::Space, - If::IndentThen, - ) - .parse(arena, state) - .map_err(|(_, f, s)| (MadeProgress, f, s))?; - - let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else) - .parse(arena, state) - .map_err(|(_, f, s)| (MadeProgress, f, s))?; + let (_, (cond, then_branch), state) = if_branch(min_indent).parse(arena, loop_state)?; branches.push((cond, then_branch)); @@ -1301,7 +1315,7 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> { ), min_indent, If::Space, - If::IndentElse, + If::IndentElseBranch, ) .parse(arena, state_final_else) .map_err(|(_, f, s)| (MadeProgress, f, s))?; diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index 9f6eba16bd..b1d3d3545a 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -423,10 +423,10 @@ pub enum If<'a> { IndentCondition(Row, Col), IndentIf(Row, Col), - IndentThen(Row, Col), - IndentElse(Row, Col), - - PatternAlignment(u16, Row, Col), + IndentThenToken(Row, Col), + IndentElseToken(Row, Col), + IndentThenBranch(Row, Col), + IndentElseBranch(Row, Col), } #[derive(Debug, Clone, PartialEq, Eq)] @@ -1452,10 +1452,11 @@ macro_rules! collection_trailing_sep_e { and!( $crate::parser::trailing_sep_by0( $delimiter, - $crate::blankspace::space0_around_e( + $crate::blankspace::space0_around_ee( $elem, $min_indent, $space_problem, + $indent_problem, $indent_problem ) ), diff --git a/compiler/parse/src/pattern.rs b/compiler/parse/src/pattern.rs index b9b3f576e6..02daa8c5a9 100644 --- a/compiler/parse/src/pattern.rs +++ b/compiler/parse/src/pattern.rs @@ -1,5 +1,5 @@ use crate::ast::Pattern; -use crate::blankspace::{space0_around_e, space0_before_e, space0_e}; +use crate::blankspace::{space0_around_ee, space0_before_e, space0_e}; use crate::ident::{ident, lowercase_ident, Ident}; use crate::number_literal::number_literal; use crate::parser::Progress::{self, *}; @@ -133,11 +133,12 @@ fn loc_pattern_in_parens_help<'a>( ) -> impl Parser<'a, Located>, PInParens<'a>> { between!( word1(b'(', PInParens::Open), - space0_around_e( + space0_around_ee( move |arena, state| specialize_ref(PInParens::Syntax, loc_pattern(min_indent)) .parse(arena, state), min_indent, PInParens::Space, + PInParens::IndentOpen, PInParens::IndentEnd, ), word1(b')', PInParens::End) diff --git a/compiler/parse/src/type_annotation.rs b/compiler/parse/src/type_annotation.rs index 974ec7f94b..88181f0908 100644 --- a/compiler/parse/src/type_annotation.rs +++ b/compiler/parse/src/type_annotation.rs @@ -1,5 +1,5 @@ use crate::ast::{AssignedField, Tag, TypeAnnotation}; -use crate::blankspace::{space0_around_e, space0_before_e, space0_e}; +use crate::blankspace::{space0_around_ee, space0_before_e, space0_e}; use crate::ident::join_module_parts; use crate::keyword; use crate::parser::{ @@ -146,11 +146,12 @@ fn loc_type_in_parens<'a>( ) -> impl Parser<'a, Located>, TInParens<'a>> { between!( word1(b'(', TInParens::Open), - space0_around_e( + space0_around_ee( move |arena, state| specialize_ref(TInParens::Type, expression(min_indent)) .parse(arena, state), min_indent, TInParens::Space, + TInParens::IndentOpen, TInParens::IndentEnd, ), word1(b')', TInParens::End) @@ -436,11 +437,12 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located let (p2, rest, state) = zero_or_more!(skip_first!( word1(b',', Type::TFunctionArgument), one_of![ - space0_around_e( + space0_around_ee( term(min_indent), min_indent, Type::TSpace, - Type::TIndentStart + Type::TIndentStart, + Type::TIndentEnd ), |_, state: State<'a>| Err(( NoProgress, diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs index 3431c87379..33f09653c1 100644 --- a/compiler/reporting/src/error/parse.rs +++ b/compiler/reporting/src/error/parse.rs @@ -158,7 +158,9 @@ enum Context { enum Node { WhenCondition, WhenBranch, - // WhenIfGuard, + IfCondition, + IfThenBranch, + IfElseBranch, } fn to_expr_report<'a>( @@ -173,10 +175,130 @@ fn to_expr_report<'a>( match parse_problem { EExpr::When(when, row, col) => to_when_report(alloc, filename, context, &when, *row, *col), + EExpr::If(when, row, col) => to_if_report(alloc, filename, context, &when, *row, *col), _ => todo!("unhandled parse error: {:?}", parse_problem), } } +fn to_if_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + context: Context, + parse_problem: &roc_parse::parser::If<'a>, + start_row: Row, + start_col: Col, +) -> Report<'a> { + use roc_parse::parser::If; + + match *parse_problem { + If::Syntax(syntax, row, col) => to_syntax_report(alloc, filename, syntax, row, col), + If::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col), + + If::Condition(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InNode(Node::IfCondition, start_row, start_col, Box::new(context)), + expr, + row, + col, + ), + + If::ThenBranch(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InNode(Node::IfThenBranch, start_row, start_col, Box::new(context)), + expr, + row, + col, + ), + + If::ElseBranch(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InNode(Node::IfElseBranch, start_row, start_col, Box::new(context)), + expr, + row, + col, + ), + + If::If(_row, _col) => unreachable!("another branch would be taken"), + If::IndentIf(_row, _col) => unreachable!("another branch would be taken"), + + If::Then(row, col) | If::IndentThenBranch(row, col) | If::IndentThenToken(row, col) => { + to_unfinished_if_report( + alloc, + filename, + row, + col, + start_row, + start_col, + alloc.concat(vec![ + alloc.reflow(r"I was expecting to see the "), + alloc.keyword("then"), + alloc.reflow(r" keyword next."), + ]), + ) + } + + If::Else(row, col) | If::IndentElseBranch(row, col) | If::IndentElseToken(row, col) => { + to_unfinished_if_report( + alloc, + filename, + row, + col, + start_row, + start_col, + alloc.concat(vec![ + alloc.reflow(r"I was expecting to see the "), + alloc.keyword("else"), + alloc.reflow(r" keyword next."), + ]), + ) + } + + If::IndentCondition(row, col) => to_unfinished_if_report( + alloc, + filename, + row, + col, + start_row, + start_col, + alloc.concat(vec![ + alloc.reflow(r"I was expecting to see a expression next") + ]), + ), + } +} + +fn to_unfinished_if_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + row: Row, + col: Col, + start_row: Row, + start_col: Col, + message: RocDocBuilder<'a>, +) -> Report<'a> { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.concat(vec![ + alloc.reflow(r"I was partway through parsing an "), + alloc.keyword("if"), + alloc.reflow(r" expression, but I got stuck here:"), + ]), + alloc.region_with_subregion(surroundings, region), + message, + ]); + + Report { + filename, + doc, + title: "UNFINISHED IF".to_string(), + } +} + fn to_when_report<'a>( alloc: &'a RocDocAllocator<'a>, filename: PathBuf, @@ -792,6 +914,23 @@ fn to_type_report<'a>( } } + Type::TIndentEnd(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I am partway through parsing a type, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.note("I may be confused by indentation"), + ]); + + Report { + filename, + doc, + title: "UNFINISHED TYPE".to_string(), + } + } + Type::TAsIndentStart(row, col) => { let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); let region = Region::from_row_col(*row, *col); diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index 66be20f5ca..aa54cccaae 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -4636,12 +4636,12 @@ mod test_reporting { indoc!( r#" ── UNFINISHED TYPE ───────────────────────────────────────────────────────────── - - I just started parsing a type, but I got stuck here: - + + I am partway through parsing a type, but I got stuck here: + 1│ f : I64, I64 ^ - + Note: I may be confused by indentation "# ), @@ -4950,4 +4950,56 @@ mod test_reporting { ), ) } + + #[test] + fn if_outdented_then() { + // TODO I think we can do better here + report_problem_as( + indoc!( + r#" + x = + if 5 == 5 + then 2 else 3 + + x + "# + ), + indoc!( + r#" + ── UNFINISHED IF ─────────────────────────────────────────────────────────────── + + I was partway through parsing an `if` expression, but I got stuck here: + + 2│ if 5 == 5 + ^ + + I was expecting to see the `then` keyword next. + "# + ), + ) + } + + #[test] + fn if_missing_else() { + // this should get better with time + report_problem_as( + indoc!( + r#" + if 5 == 5 then 2 + "# + ), + indoc!( + r#" + ── UNFINISHED IF ─────────────────────────────────────────────────────────────── + + I was partway through parsing an `if` expression, but I got stuck here: + + 1│ if 5 == 5 then 2 + ^ + + I was expecting to see the `else` keyword next. + "# + ), + ) + } } From f3234e002ab8ee6c74be55c148dda5aca711ef28 Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 20:05:58 +0100 Subject: [PATCH 18/33] change list over --- compiler/parse/src/expr.rs | 65 +++++++++++++++++++----------------- compiler/parse/src/parser.rs | 14 ++++++++ 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs index 1b01a08edb..aebe5044b7 100644 --- a/compiler/parse/src/expr.rs +++ b/compiler/parse/src/expr.rs @@ -11,8 +11,8 @@ use crate::number_literal::number_literal; use crate::parser::{ self, allocated, and_then_with_indent_level, ascii_char, ascii_string, attempt, backtrackable, fail, map, newline_char, not, not_followed_by, optional, sep_by1, specialize, specialize_ref, - then, unexpected, unexpected_eof, word1, word2, EExpr, Either, If, ParseResult, Parser, State, - SyntaxError, When, + then, unexpected, unexpected_eof, word1, word2, BadInputError, EExpr, Either, If, List, + ParseResult, Parser, State, SyntaxError, When, }; use crate::pattern::loc_closure_param; use crate::type_annotation; @@ -1693,37 +1693,42 @@ fn binop<'a>() -> impl Parser<'a, BinOp, SyntaxError<'a>> { map!(ascii_char(b'%'), |_| BinOp::Percent) ) } - -pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { - let elems = collection_trailing_sep!( - ascii_char(b'['), - loc!(expr(min_indent)), - ascii_char(b','), - ascii_char(b']'), - min_indent - ); - - parser::attempt( - Attempting::List, - map_with_arena!(elems, |arena, - (parsed_elems, final_comments): ( - Vec<'a, Located>>, - &'a [CommentOrNewline<'a>] - )| { - let mut allocated = Vec::with_capacity_in(parsed_elems.len(), arena); - - for parsed_elem in parsed_elems { - allocated.push(&*arena.alloc(parsed_elem)); - } - - Expr::List { - items: allocated.into_bump_slice(), - final_comments, - } - }), +fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { + specialize( + |e, r, c| SyntaxError::Expr(EExpr::List(e, r, c)), + list_literal_help(min_indent), ) } +fn list_literal_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, List<'a>> { + move |arena, state| { + let (_, (parsed_elems, final_comments), state) = collection_trailing_sep_e!( + word1(b'[', List::Open), + specialize_ref(List::Syntax, loc!(expr(min_indent))), + word1(b',', List::End), + word1(b']', List::End), + min_indent, + List::Open, + List::Space, + List::IndentEnd + ) + .parse(arena, state)?; + + let mut allocated = Vec::with_capacity_in(parsed_elems.len(), arena); + + for parsed_elem in parsed_elems { + allocated.push(&*arena.alloc(parsed_elem)); + } + + let expr = Expr::List { + items: allocated.into_bump_slice(), + final_comments, + }; + + Ok((MadeProgress, expr, state)) + } +} + // Parser<'a, Vec<'a, Located>>> fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { then( diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index b1d3d3545a..1b2ea71cce 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -380,11 +380,25 @@ pub enum EExpr<'a> { When(When<'a>, Row, Col), If(If<'a>, Row, Col), + List(List<'a>, Row, Col), + // EInParens(PInParens<'a>, Row, Col), IndentStart(Row, Col), IndentEnd(Row, Col), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum List<'a> { + Open(Row, Col), + End(Row, Col), + Space(BadInputError, Row, Col), + + Syntax(&'a SyntaxError<'a>, Row, Col), + + IndentStart(Row, Col), + IndentEnd(Row, Col), +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum When<'a> { Space(BadInputError, Row, Col), From 80b64b42ff348ec22d9aec5950a65a76dce5e4aa Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 23 Feb 2021 23:57:17 +0100 Subject: [PATCH 19/33] tests and list error messages --- compiler/parse/src/parser.rs | 3 +- compiler/reporting/src/error/parse.rs | 114 ++++++++++++++++++++- compiler/reporting/tests/test_reporting.rs | 82 +++++++++++++++ 3 files changed, 197 insertions(+), 2 deletions(-) diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index 1b2ea71cce..ab980a5b38 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -394,8 +394,9 @@ pub enum List<'a> { Space(BadInputError, Row, Col), Syntax(&'a SyntaxError<'a>, Row, Col), + Expr(&'a EExpr<'a>, Row, Col), - IndentStart(Row, Col), + IndentOpen(Row, Col), IndentEnd(Row, Col), } diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs index 33f09653c1..37c155e2cd 100644 --- a/compiler/reporting/src/error/parse.rs +++ b/compiler/reporting/src/error/parse.rs @@ -161,6 +161,7 @@ enum Node { IfCondition, IfThenBranch, IfElseBranch, + ListElement, } fn to_expr_report<'a>( @@ -175,11 +176,121 @@ fn to_expr_report<'a>( match parse_problem { EExpr::When(when, row, col) => to_when_report(alloc, filename, context, &when, *row, *col), - EExpr::If(when, row, col) => to_if_report(alloc, filename, context, &when, *row, *col), + EExpr::If(if_, row, col) => to_if_report(alloc, filename, context, &if_, *row, *col), + EExpr::List(list, row, col) => to_list_report(alloc, filename, context, &list, *row, *col), _ => todo!("unhandled parse error: {:?}", parse_problem), } } +fn to_list_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + context: Context, + parse_problem: &roc_parse::parser::List<'a>, + start_row: Row, + start_col: Col, +) -> Report<'a> { + use roc_parse::parser::List; + + match *parse_problem { + List::Syntax(syntax, row, col) => to_syntax_report(alloc, filename, syntax, row, col), + List::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col), + + List::Expr(expr, row, col) => to_expr_report( + alloc, + filename, + Context::InNode(Node::ListElement, start_row, start_col, Box::new(context)), + expr, + row, + col, + ), + + List::Open(row, col) | List::End(row, col) => { + match dbg!(what_is_next(alloc.src_lines, row, col)) { + Next::Other(Some(',')) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through started parsing a list, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc + .reflow(r"I was expecting to see a list entry before this comma, "), + alloc.reflow(r"so try adding a list entry"), + alloc.reflow(r" and see if that helps?"), + ]), + ]); + Report { + filename, + doc, + title: "UNFINISHED LIST".to_string(), + } + } + _ => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through started parsing a list, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow( + r"I was expecting to see a closing square bracket before this, ", + ), + alloc.reflow(r"so try adding a "), + alloc.parser_suggestion("]"), + alloc.reflow(r" and see if that helps?"), + ]), + alloc.concat(vec![ + alloc.note("When "), + alloc.reflow(r"I get stuck like this, "), + alloc.reflow(r"it usually means that there is a missing parenthesis "), + alloc.reflow(r"or bracket somewhere earlier. "), + alloc.reflow(r"It could also be a stray keyword or operator."), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED LIST".to_string(), + } + } + } + } + + List::IndentOpen(row, col) | List::IndentEnd(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I cannot find the end of this list:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"You could change it to something like "), + alloc.parser_suggestion("[ 1, 2, 3 ]"), + alloc.reflow(" or even just "), + alloc.parser_suggestion("[]"), + alloc.reflow(". Anything where there is an open and a close square bracket, "), + alloc.reflow("and where the elements of the list are separated by commas."), + ]), + note_for_tag_union_type_indent(alloc), + ]); + + Report { + filename, + doc, + title: "UNFINISHED LIST".to_string(), + } + } + } +} + fn to_if_report<'a>( alloc: &'a RocDocAllocator<'a>, filename: PathBuf, @@ -1745,6 +1856,7 @@ fn to_space_report<'a>( } } +#[derive(Debug)] enum Next<'a> { Keyword(&'a str), // Operator(&'a str), diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index aa54cccaae..7ab6eefa14 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -5002,4 +5002,86 @@ mod test_reporting { ), ) } + + #[test] + fn list_double_comma() { + report_problem_as( + indoc!( + r#" + [ 1, 2, , 3 ] + "# + ), + indoc!( + r#" + ── UNFINISHED LIST ───────────────────────────────────────────────────────────── + + I am partway through started parsing a list, but I got stuck here: + + 1│ [ 1, 2, , 3 ] + ^ + + I was expecting to see a list entry before this comma, so try adding a + list entry and see if that helps? + "# + ), + ) + } + + #[test] + fn list_without_end() { + report_problem_as( + indoc!( + r#" + [ 1, 2, + "# + ), + indoc!( + r#" + ── UNFINISHED LIST ───────────────────────────────────────────────────────────── + + I am partway through started parsing a list, but I got stuck here: + + 1│ [ 1, 2, + ^ + + I was expecting to see a closing square bracket before this, so try + adding a ] and see if that helps? + + Note: When I get stuck like this, it usually means that there is a + missing parenthesis or bracket somewhere earlier. It could also be a + stray keyword or operator. + "# + ), + ) + } + + #[test] + fn list_bad_indent() { + report_problem_as( + indoc!( + r#" + x = [ 1, 2, + ] + + x + "# + ), + indoc!( + r#" + ── UNFINISHED LIST ───────────────────────────────────────────────────────────── + + I cannot find the end of this list: + + 1│ x = [ 1, 2, + ^ + + You could change it to something like [ 1, 2, 3 ] or even just []. + Anything where there is an open and a close square bracket, and where + the elements of the list are separated by commas. + + Note: I may be confused by indentation + "# + ), + ) + } } From 1c98bca071b71bc7db4985ccd1974f526dbb3ee3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 00:56:27 +0100 Subject: [PATCH 20/33] astar test does not use stdin --- cli/tests/cli_run.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index 6ad19aed58..49388b27b0 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -230,9 +230,8 @@ mod cli_run { #[test] #[serial(astar)] fn run_astar_optimized_1() { - check_output_with_stdin( + check_output( &example_file("benchmarks", "AStarTests.roc"), - "1", "astar-tests", &[], "True\n", From c24d51e69d39326343827d5b94f826fc6a2d8868 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 15:14:52 +0100 Subject: [PATCH 21/33] remove old function --- compiler/mono/src/ir.rs | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 79628b27ac..9d4d41efd3 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -302,39 +302,6 @@ pub enum InProgressProc<'a> { } impl<'a> Procs<'a> { - /// Absorb the contents of another Procs into this one. - pub fn absorb(&mut self, mut other: Procs<'a>) { - debug_assert!(self.pending_specializations.is_some()); - debug_assert!(other.pending_specializations.is_some()); - - match self.pending_specializations { - Some(ref mut pending_specializations) => { - for (k, v) in other.pending_specializations.unwrap().drain() { - pending_specializations.insert(k, v); - } - } - None => { - unreachable!(); - } - } - - for (k, v) in other.partial_procs.drain() { - self.partial_procs.insert(k, v); - } - - for (k, v) in other.specialized.drain() { - self.specialized.insert(k, v); - } - - for (k, v) in other.runtime_errors.drain() { - self.runtime_errors.insert(k, v); - } - - for symbol in other.module_thunks.drain() { - self.module_thunks.insert(symbol); - } - } - pub fn get_specialized_procs_without_rc( self, arena: &'a Bump, From 092db87474a1ed13457d3aa647e07a66934b141e Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:05:44 +0100 Subject: [PATCH 22/33] add import dependencies to module cache --- compiler/load/src/file.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 44fe5bfb0b..058b6f6821 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -358,6 +358,7 @@ struct ModuleCache<'a> { external_specializations_requested: MutMap, /// Various information + imports: MutMap>, documentation: MutMap, can_problems: MutMap>, type_problems: MutMap>, @@ -1641,6 +1642,18 @@ fn update<'a>( .exposed_symbols_by_module .insert(home, exposed_symbols); + state + .module_cache + .imports + .entry(header.module_id) + .or_default() + .extend( + header + .package_qualified_imported_modules + .iter() + .map(|x| *x.as_inner()), + ); + work.extend(state.dependencies.add_module( header.module_id, &header.package_qualified_imported_modules, From 64955f23ff0b29bb1fa7ec9393a09d49f516597c Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:07:39 +0100 Subject: [PATCH 23/33] store module thunks --- compiler/load/src/file.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 058b6f6821..cabeef9470 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -359,6 +359,7 @@ struct ModuleCache<'a> { /// Various information imports: MutMap>, + top_level_thunks: MutMap>, documentation: MutMap, can_problems: MutMap>, type_problems: MutMap>, @@ -1917,6 +1918,13 @@ fn update<'a>( } } + state + .module_cache + .top_level_thunks + .entry(module_id) + .or_default() + .extend(procs.module_thunks.iter().copied()); + let found_specializations_module = FoundSpecializationsModule { layout_cache, module_id, From a361148380d64a54b5e42f64c88b484f2d718e3a Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:09:47 +0100 Subject: [PATCH 24/33] add imported_module_thunks --- compiler/mono/src/ir.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 9d4d41efd3..aaf86934ab 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -273,6 +273,7 @@ impl ExternalSpecializations { #[derive(Clone, Debug)] pub struct Procs<'a> { pub partial_procs: MutMap>, + pub imported_module_thunks: MutSet, pub module_thunks: MutSet, pub pending_specializations: Option, PendingSpecialization>>>, pub specialized: MutMap<(Symbol, Layout<'a>), InProgressProc<'a>>, @@ -285,6 +286,7 @@ impl<'a> Default for Procs<'a> { fn default() -> Self { Self { partial_procs: MutMap::default(), + imported_module_thunks: MutSet::default(), module_thunks: MutSet::default(), pending_specializations: Some(MutMap::default()), specialized: MutMap::default(), From 6bd10ddc050a8ef2d3568401c2b492beeea4b5ca Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:23:35 +0100 Subject: [PATCH 25/33] use imported module thunks for pointer calling --- compiler/load/src/file.rs | 20 ++++++++++++++++++++ compiler/mono/src/ir.rs | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index cabeef9470..283be55b16 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -546,11 +546,24 @@ fn start_phase<'a>(module_id: ModuleId, phase: Phase, state: &mut State<'a>) -> ident_ids, } = typechecked; + let mut imported_module_thunks = MutSet::default(); + + if let Some(imports) = state.module_cache.imports.get(&module_id) { + for imported in imports.iter() { + imported_module_thunks.extend( + state.module_cache.top_level_thunks[imported] + .iter() + .copied(), + ); + } + } + BuildTask::BuildPendingSpecializations { layout_cache, module_id, module_timing, solved_subs, + imported_module_thunks, decls, ident_ids, exposed_to_host: state.exposed_to_host.clone(), @@ -950,6 +963,7 @@ enum BuildTask<'a> { module_timing: ModuleTiming, layout_cache: LayoutCache<'a>, solved_subs: Solved, + imported_module_thunks: MutSet, module_id: ModuleId, ident_ids: IdentIds, decls: Vec, @@ -3666,6 +3680,7 @@ fn make_specializations<'a>( fn build_pending_specializations<'a>( arena: &'a Bump, solved_subs: Solved, + imported_module_thunks: MutSet, home: ModuleId, mut ident_ids: IdentIds, decls: Vec, @@ -3678,6 +3693,9 @@ fn build_pending_specializations<'a>( let find_specializations_start = SystemTime::now(); let mut procs = Procs::default(); + debug_assert!(procs.imported_module_thunks.is_empty()); + procs.imported_module_thunks = imported_module_thunks; + let mut mono_problems = std::vec::Vec::new(); let mut subs = solved_subs.into_inner(); let mut mono_env = roc_mono::ir::Env { @@ -3959,10 +3977,12 @@ where module_timing, layout_cache, solved_subs, + imported_module_thunks, exposed_to_host, } => Ok(build_pending_specializations( arena, solved_subs, + imported_module_thunks, module_id, ident_ids, decls, diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index aaf86934ab..991249ee8e 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5725,8 +5725,11 @@ fn call_by_pointer<'a>( // cause issues. The caller (which is here) doesn't know whether the called is a closure // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name // more in the future + let is_thunk = + procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol); + match layout { - Layout::FunctionPointer(arg_layouts, ret_layout) if false => { + Layout::FunctionPointer(arg_layouts, ret_layout) if !is_thunk => { if arg_layouts.iter().any(|l| l.contains_refcounted()) { let name = env.unique_symbol(); let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena); From 1c1c53ba950f3a8ecc9dfd1fc44bc4826369f717 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:38:10 +0100 Subject: [PATCH 26/33] flip map argument order --- examples/benchmarks/Base64.roc | 58 ++++++++++++++-------------- examples/benchmarks/Bytes/Decode.roc | 8 ++-- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 1fed392288..1f2ecad216 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -31,47 +31,47 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then - helper = \x, y, z -> - a : U32 - a = Num.intCast x - b : U32 - b = Num.intCast y - c : U32 - c = Num.intCast z - combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c - Loop - { - remaining: remaining - 3, - string: Str.concat string (bitsToChars combined 0) - } - - Bytes.Decode.map3 helper + Bytes.Decode.map3 Bytes.Decode.u8 Bytes.Decode.u8 Bytes.Decode.u8 + \x, y, z -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + c : U32 + c = Num.intCast z + combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c + Loop + { + remaining: remaining - 3, + string: Str.concat string (bitsToChars combined 0) + } else if remaining == 0 then Bytes.Decode.succeed (Done string) else if remaining == 2 then - helperX = \x, y -> - a : U32 - a = Num.intCast x - b : U32 - b = Num.intCast y - combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) - Done (Str.concat string (bitsToChars combined 1)) + Bytes.Decode.map2 + Bytes.Decode.u8 + Bytes.Decode.u8 + \x, y -> + a : U32 + a = Num.intCast x + b : U32 + b = Num.intCast y + combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b) + Done (Str.concat string (bitsToChars combined 1)) - Bytes.Decode.map2 helperX - Bytes.Decode.u8 - Bytes.Decode.u8 else # remaining = 1 + Bytes.Decode.map Bytes.Decode.u8 - |> Bytes.Decode.map (\x -> - a : U32 - a = Num.intCast x - Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))) + \x -> + a : U32 + a = Num.intCast x + Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)) bitsToChars : U32, Int * -> Str diff --git a/examples/benchmarks/Bytes/Decode.roc b/examples/benchmarks/Bytes/Decode.roc index db14d857a6..f1da59e389 100644 --- a/examples/benchmarks/Bytes/Decode.roc +++ b/examples/benchmarks/Bytes/Decode.roc @@ -30,8 +30,8 @@ map = \@Decoder decoder, transform -> Bad e -map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c -map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> +map2 : Decoder a, Decoder b, (a, b -> c) -> Decoder c +map2 = \@Decoder decoder1, @Decoder decoder2, transform -> @Decoder \state1 -> when decoder1 state1 is Good state2 a -> @@ -45,8 +45,8 @@ map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> Bad e -> Bad e -map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d -map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> +map3 : Decoder a, Decoder b, Decoder c, (a, b, c -> d) -> Decoder d +map3 = \@Decoder decoder1, @Decoder decoder2, @Decoder decoder3, transform -> @Decoder \state1 -> when decoder1 state1 is Good state2 a -> From aff8266f0f6526bad8836628b528274f39bf6c55 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:40:46 +0100 Subject: [PATCH 27/33] move astar test --- cli/tests/cli_run.rs | 4 ++-- examples/benchmarks/{AStarTests.roc => TestAStar.roc} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename examples/benchmarks/{AStarTests.roc => TestAStar.roc} (98%) diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index 6ad19aed58..2f81acaec2 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -231,9 +231,9 @@ mod cli_run { #[serial(astar)] fn run_astar_optimized_1() { check_output_with_stdin( - &example_file("benchmarks", "AStarTests.roc"), + &example_file("benchmarks", "TestAStar.roc"), "1", - "astar-tests", + "test-astar", &[], "True\n", false, diff --git a/examples/benchmarks/AStarTests.roc b/examples/benchmarks/TestAStar.roc similarity index 98% rename from examples/benchmarks/AStarTests.roc rename to examples/benchmarks/TestAStar.roc index 1cb5909c8f..0401566b6a 100644 --- a/examples/benchmarks/AStarTests.roc +++ b/examples/benchmarks/TestAStar.roc @@ -1,4 +1,4 @@ -app "astar-tests" +app "test-astar" packages { base: "platform" } imports [base.Task, AStar] provides [ main ] to base From 17a44aab024877acf1d2822c155da97f96257baf Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 16:40:56 +0100 Subject: [PATCH 28/33] fix whitespace --- examples/benchmarks/Base64.roc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index 1f2ecad216..c2be2d31fa 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -31,7 +31,7 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string: loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str) loopHelp = \{ remaining, string } -> if remaining >= 3 then - Bytes.Decode.map3 + Bytes.Decode.map3 Bytes.Decode.u8 Bytes.Decode.u8 Bytes.Decode.u8 @@ -53,7 +53,7 @@ loopHelp = \{ remaining, string } -> Bytes.Decode.succeed (Done string) else if remaining == 2 then - Bytes.Decode.map2 + Bytes.Decode.map2 Bytes.Decode.u8 Bytes.Decode.u8 \x, y -> @@ -66,9 +66,9 @@ loopHelp = \{ remaining, string } -> else # remaining = 1 - Bytes.Decode.map + Bytes.Decode.map Bytes.Decode.u8 - \x -> + \x -> a : U32 a = Num.intCast x Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)) @@ -90,33 +90,33 @@ bitsToCharsHelp = \bits, missing -> # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd` # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left. - + # any 6-bit number is a valid base64 digit, so this is actually safe p = Num.shiftRightZfBy 18 bits |> Num.intCast - |> unsafeToChar + |> unsafeToChar q = Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar r = Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar s = Num.bitwiseAnd bits lowest6BitsMask |> Num.intCast - |> unsafeToChar + |> unsafeToChar equals : U8 equals = 61 when missing is - 0 -> + 0 -> [ p, q, r, s ] 1 -> [ p, q, r, equals ] From a6edc58323f1d11e6544abd070afe728c6568ab7 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:25:14 +0100 Subject: [PATCH 29/33] make fromUtf8 do RC --- cli/tests/cli_run.rs | 12 ++ compiler/builtins/bitcode/src/main.zig | 2 +- compiler/builtins/bitcode/src/str.zig | 158 +++++++++++++++++-------- compiler/builtins/src/bitcode.rs | 2 +- compiler/can/src/builtins.rs | 8 +- compiler/gen/src/llvm/build_str.rs | 122 ++++--------------- examples/benchmarks/Base64.roc | 19 +-- examples/benchmarks/TestBase64.roc | 17 +++ 8 files changed, 172 insertions(+), 168 deletions(-) create mode 100644 examples/benchmarks/TestBase64.roc diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index 2f81acaec2..f3889ae374 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -240,6 +240,18 @@ mod cli_run { ); } + #[test] + #[serial(base64)] + fn base64() { + check_output( + &example_file("benchmarks", "TestBase64.roc"), + "test-base64", + &[], + "SGVsbG8gV29ybGQ=", + true, + ); + } + #[test] #[serial(closure)] fn closure() { diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index e124afe62c..e20d376b88 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -67,8 +67,8 @@ comptime { exportStrFn(str.strFromIntC, "from_int"); exportStrFn(str.strFromFloatC, "from_float"); exportStrFn(str.strEqual, "equal"); - exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes"); exportStrFn(str.strToBytesC, "to_bytes"); + exportStrFn(str.fromUtf8C, "from_utf8"); } // Export helpers - Must be run inside a comptime diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index a752a37f0e..572eaa14e0 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -15,6 +15,7 @@ const InPlace = packed enum(u8) { Clone, }; +const SMALL_STR_MAX_LENGTH = small_string_size - 1; const small_string_size = 2 * @sizeOf(usize); const blank_small_string: [16]u8 = init_blank_small_string(small_string_size); @@ -982,6 +983,71 @@ fn strToBytes(allocator: *Allocator, arg: RocStr) RocList { } } +const FromUtf8Result = extern struct { + byte_index: usize, + string: RocStr, + is_ok: bool, + problem_code: Utf8ByteProblem, +}; + +pub fn fromUtf8C(arg: RocList, output: *FromUtf8Result) callconv(.C) void { + output.* = @call(.{ .modifier = always_inline }, fromUtf8, .{ std.heap.c_allocator, arg }); +} + +fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result { + const bytes = @ptrCast([*]const u8, arg.bytes)[0..arg.length]; + + if (unicode.utf8ValidateSlice(bytes)) { + // the output will be correct. Now we need to take ownership of the input + if (arg.len() <= SMALL_STR_MAX_LENGTH) { + // turn the bytes into a small string + const string = RocStr.init(allocator, @ptrCast([*]u8, arg.bytes), arg.len()); + + // then decrement the input list + const data_bytes = arg.len(); + utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes); + + return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + } else { + const byte_list = arg.makeUnique(allocator, @alignOf(usize), @sizeOf(u8)); + + const string = RocStr{ .str_bytes = byte_list.bytes, .str_len = byte_list.length }; + + return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + } + } else { + const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); + + // TODO what should we do RC-wise here + // const data_bytes = arg.len(); + // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes); + + return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; + } +} + +fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: Utf8ByteProblem } { + var index: usize = 0; + + while (index < length) { + const nextNumBytes = numberOfNextCodepointBytes(bytes, length, index) catch |err| { + switch (err) { + error.UnexpectedEof => { + return .{ .index = index, .problem = Utf8ByteProblem.UnexpectedEndOfSequence }; + }, + error.Utf8InvalidStartByte => return .{ .index = index, .problem = Utf8ByteProblem.InvalidStartByte }, + error.Utf8ExpectedContinuation => return .{ .index = index, .problem = Utf8ByteProblem.ExpectedContinuation }, + error.Utf8OverlongEncoding => return .{ .index = index, .problem = Utf8ByteProblem.OverlongEncoding }, + error.Utf8EncodesSurrogateHalf => return .{ .index = index, .problem = Utf8ByteProblem.EncodesSurrogateHalf }, + error.Utf8CodepointTooLarge => return .{ .index = index, .problem = Utf8ByteProblem.CodepointTooLarge }, + } + }; + index += nextNumBytes; + } + + unreachable; +} + pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool { const bytes: []u8 = ptr[0..len]; return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes}); @@ -1019,76 +1085,74 @@ pub const Utf8ByteProblem = packed enum(u8) { OverlongEncoding = 4, UnexpectedEndOfSequence = 5, }; -pub const ValidateUtf8BytesResult = extern struct { - is_ok: bool, byte_index: usize, problem_code: Utf8ByteProblem -}; -const is_ok_utf8_byte_response = - ValidateUtf8BytesResult{ .is_ok = true, .byte_index = 0, .problem_code = Utf8ByteProblem.UnexpectedEndOfSequence }; -inline fn toErrUtf8ByteResponse(byte_index: usize, problem_code: Utf8ByteProblem) ValidateUtf8BytesResult { - return ValidateUtf8BytesResult{ .is_ok = false, .byte_index = byte_index, .problem_code = problem_code }; +fn validateUtf8Bytes(bytes: [*]u8, length: usize) FromUtf8Result { + return fromUtf8(std.testing.allocator, RocList{ .bytes = bytes, .length = length }); } -// Validate that an array of bytes is valid UTF-8, but if it fails catch & return the error & byte index -pub fn validateUtf8Bytes(ptr: [*]u8, len: usize) callconv(.C) ValidateUtf8BytesResult { - var index: usize = 0; - while (index < len) { - const nextNumBytes = numberOfNextCodepointBytes(ptr, len, index) catch |err| { - return toErrUtf8ByteResponse( - index, - switch (err) { - error.UnexpectedEof => Utf8ByteProblem.UnexpectedEndOfSequence, - error.Utf8InvalidStartByte => Utf8ByteProblem.InvalidStartByte, - error.Utf8ExpectedContinuation => Utf8ByteProblem.ExpectedContinuation, - error.Utf8OverlongEncoding => Utf8ByteProblem.OverlongEncoding, - error.Utf8EncodesSurrogateHalf => Utf8ByteProblem.EncodesSurrogateHalf, - error.Utf8CodepointTooLarge => Utf8ByteProblem.CodepointTooLarge, - }, - ); - }; - index += nextNumBytes; - } - return is_ok_utf8_byte_response; +fn validateUtf8BytesX(str: RocList) FromUtf8Result { + return fromUtf8(std.testing.allocator, str); } +fn expectOk(result: FromUtf8Result) void { + expectEqual(result.is_ok, true); +} + +fn sliceHelp(bytes: [*]const u8, length: usize) RocList { + var list = RocList.allocate(testing.allocator, @alignOf(usize), length, @sizeOf(u8)); + @memcpy(list.bytes orelse unreachable, bytes, length); + list.length = length; + + return list; +} + +fn toErrUtf8ByteResponse(index: usize, problem: Utf8ByteProblem) FromUtf8Result { + return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = index, .problem_code = problem }; +} + +// NOTE on memory: the validate function consumes a RC token of the input. Since +// we freshly created it (in `sliceHelp`), it has only one RC token, and input list will be deallocated. +// +// If we tested with big strings, we'd have to deallocate the output string, but never the input list + test "validateUtf8Bytes: ascii" { - const str_len = 3; - var str: [str_len]u8 = "abc".*; - const str_ptr: [*]u8 = &str; + const raw = "abc"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode œ" { - const str_len = 2; - var str: [str_len]u8 = "œ".*; - const str_ptr: [*]u8 = &str; + const raw = "œ"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode ∆" { - const str_len = 3; - var str: [str_len]u8 = "∆".*; - const str_ptr: [*]u8 = &str; + const raw = "∆"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: emoji" { - const str_len = 4; - var str: [str_len]u8 = "💖".*; - const str_ptr: [*]u8 = &str; + const raw = "💖"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: unicode ∆ in middle of array" { - const str_len = 9; - var str: [str_len]u8 = "œb∆c¬".*; - const str_ptr: [*]u8 = &str; + const raw = "œb∆c¬"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len)); + expectOk(validateUtf8BytesX(list)); } test "validateUtf8Bytes: invalid start byte" { diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index b19117e106..125099af96 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -41,8 +41,8 @@ pub const STR_NUMBER_OF_BYTES: &str = "roc_builtins.str.number_of_bytes"; pub const STR_FROM_INT: &str = "roc_builtins.str.from_int"; pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; -pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes"; pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes"; +pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs index 7db64b296f..68cceaacf6 100644 --- a/compiler/can/src/builtins.rs +++ b/compiler/can/src/builtins.rs @@ -1598,7 +1598,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { Access { record_var, ext_var: var_store.fresh(), - field: "isOk".into(), + field: "c_isOk".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, @@ -1610,7 +1610,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { vec![Access { record_var, ext_var: var_store.fresh(), - field: "str".into(), + field: "b_str".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }], @@ -1627,14 +1627,14 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { Access { record_var, ext_var: var_store.fresh(), - field: "problem".into(), + field: "d_problem".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, Access { record_var, ext_var: var_store.fresh(), - field: "byteIndex".into(), + field: "a_byteIndex".into(), field_var: var_store.fresh(), loc_expr: Box::new(no_region(Var(Symbol::ARG_2))), }, diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs index 301b726fb3..514d483c06 100644 --- a/compiler/gen/src/llvm/build_str.rs +++ b/compiler/gen/src/llvm/build_str.rs @@ -1,13 +1,11 @@ use crate::llvm::bitcode::{call_bitcode_fn, call_void_bitcode_fn}; use crate::llvm::build::{complex_bitcast, Env, InPlace, Scope}; -use crate::llvm::build_list::{ - allocate_list, build_basic_phi2, empty_polymorphic_list, list_len, load_list_ptr, store_list, -}; -use crate::llvm::convert::{collection, get_ptr_type}; +use crate::llvm::build_list::{allocate_list, store_list}; +use crate::llvm::convert::collection; use inkwell::builder::Builder; -use inkwell::types::{BasicTypeEnum, StructType}; +use inkwell::types::BasicTypeEnum; use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue}; -use inkwell::{AddressSpace, IntPredicate}; +use inkwell::AddressSpace; use roc_builtins::bitcode; use roc_module::symbol::Symbol; use roc_mono::layout::{Builtin, Layout}; @@ -300,43 +298,28 @@ pub fn str_to_bytes<'a, 'ctx, 'env>( /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 } pub fn str_from_utf8<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, - parent: FunctionValue<'ctx>, + _parent: FunctionValue<'ctx>, original_wrapper: StructValue<'ctx>, ) -> BasicValueEnum<'ctx> { let builder = env.builder; let ctx = env.context; - let list_len = list_len(builder, original_wrapper); - let ptr_type = get_ptr_type(&ctx.i8_type().into(), AddressSpace::Generic); - let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type); - - let result_type = env - .module - .get_struct_type("str.ValidateUtf8BytesResult") - .unwrap(); + let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap(); let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result"); call_void_bitcode_fn( env, - &[result_ptr.into(), list_ptr.into(), list_len.into()], - &bitcode::STR_VALIDATE_UTF_BYTES, + &[ + complex_bitcast( + env.builder, + original_wrapper.into(), + env.context.i128_type().into(), + "to_i128", + ), + result_ptr.into(), + ], + &bitcode::STR_FROM_UTF8, ); - let utf8_validate_bytes_result = builder - .build_load(result_ptr, "load_utf8_validate_bytes_result") - .into_struct_value(); - - let is_ok = builder - .build_extract_value(utf8_validate_bytes_result, 0, "extract_extract_is_ok") - .unwrap() - .into_int_value(); - let byte_index = builder - .build_extract_value(utf8_validate_bytes_result, 1, "extract_byte_index") - .unwrap() - .into_int_value(); - let problem_code = builder - .build_extract_value(utf8_validate_bytes_result, 2, "extract_problem_code") - .unwrap() - .into_int_value(); let record_type = env.context.struct_type( &[ @@ -348,71 +331,16 @@ pub fn str_from_utf8<'a, 'ctx, 'env>( false, ); - let comparison = builder.build_int_compare( - IntPredicate::EQ, - is_ok, - ctx.bool_type().const_int(1, false), - "compare_is_ok", - ); + let result_ptr_cast = env + .builder + .build_bitcast( + result_ptr, + record_type.ptr_type(AddressSpace::Generic), + "to_unnamed", + ) + .into_pointer_value(); - build_basic_phi2( - env, - parent, - comparison, - || { - // We have a valid utf8 byte sequence - // TODO: Should we do something different here if we're doing this in place? - let zig_str = - call_bitcode_fn(env, &[list_ptr.into(), list_len.into()], &bitcode::STR_INIT) - .into_struct_value(); - build_struct( - builder, - record_type, - vec![ - ( - env.ptr_int().const_int(0, false).into(), - "insert_zeroed_byte_index", - ), - (zig_str_to_struct(env, zig_str).into(), "insert_str"), - (ctx.bool_type().const_int(1, false).into(), "insert_is_ok"), - ( - ctx.i8_type().const_int(0, false).into(), - "insert_zeroed_problem", - ), - ], - ) - .into() - }, - || { - // We do not have a valid utf8 byte sequence - build_struct( - builder, - record_type, - vec![ - (byte_index.into(), "insert_byte_index"), - (empty_polymorphic_list(env), "insert_zeroed_str"), - (ctx.bool_type().const_int(0, false).into(), "insert_is_ok"), - (problem_code.into(), "insert_problem"), - ], - ) - .into() - }, - BasicTypeEnum::StructType(record_type), - ) -} - -fn build_struct<'env, 'ctx>( - builder: &'env Builder<'ctx>, - struct_type: StructType<'ctx>, - values: Vec<(BasicValueEnum<'ctx>, &str)>, -) -> StructValue<'ctx> { - let mut val = struct_type.get_undef().into(); - for (index, (value, name)) in values.iter().enumerate() { - val = builder - .build_insert_value(val, *value, index as u32, name) - .unwrap(); - } - val.into_struct_value() + builder.build_load(result_ptr_cast, "load_utf8_validate_bytes_result") } /// Str.fromInt : Int -> Str diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc index c2be2d31fa..c8d7c586f7 100644 --- a/examples/benchmarks/Base64.roc +++ b/examples/benchmarks/Base64.roc @@ -1,24 +1,7 @@ -app "base64" - packages { base: "platform" } - imports [base.Task, Bytes.Decode.{Decoder} ] - provides [ main ] to base - -IO a : Task.Task a [] +interface Base64 exposes [ fromBytes ] imports [ Bytes.Decode ] Decoder a : Bytes.Decode.Decoder a -main : IO {} -main = - # when fromBytes [ 0 ] is - when fromBytes (Str.toBytes "Hello World") is - Ok str -> - Task.putLine str - - Err _ -> - Task.putLine "sadness" - -# ------ - fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError fromBytes = \bytes -> diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc new file mode 100644 index 0000000000..75adbca8be --- /dev/null +++ b/examples/benchmarks/TestBase64.roc @@ -0,0 +1,17 @@ +app "test-base64" + packages { base: "platform" } + imports [base.Task, Base64 ] + provides [ main ] to base + +IO a : Task.Task a [] + +main : IO {} +main = + # when fromBytes [ 0 ] is + when Base64.fromBytes (Str.toBytes "Hello World") is + Ok str -> + Task.putLine str + + Err _ -> + Task.putLine "sadness" + From 75ee81db883b1e76e276b596807af6668867ea0f Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:27:02 +0100 Subject: [PATCH 30/33] fix base64 test output --- cli/tests/cli_run.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index f3889ae374..251259b490 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -247,7 +247,7 @@ mod cli_run { &example_file("benchmarks", "TestBase64.roc"), "test-base64", &[], - "SGVsbG8gV29ybGQ=", + "SGVsbG8gV29ybGQ=\n", true, ); } From 7304154452271f86c835a681a30112f8fe0483e2 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:36:16 +0100 Subject: [PATCH 31/33] update comment --- compiler/mono/src/ir.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 991249ee8e..2ad3995fe0 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -5720,11 +5720,13 @@ fn call_by_pointer<'a>( let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol); if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized { - // TODO we should be able to call by name in this wrapper for "normal" functions - // but closures, specifically top-level values that are closures (by unification) - // cause issues. The caller (which is here) doesn't know whether the called is a closure - // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name - // more in the future + // anything that is not a thunk can be called by-value in the wrapper + // (the above condition guarantees we're dealing with a top-level symbol) + // + // But thunks cannot be called by-value, since they are not really functions to all parts + // of the system (notably RC insertion). So we still call those by-pointer. + // Luckily such values were top-level originally (in the user code), and can therefore + // not be closures let is_thunk = procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol); From 3537fa57d2d1463d737270281c2054db85c5db0a Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 21:46:58 +0100 Subject: [PATCH 32/33] decrement when the input is invalid utf8 --- compiler/builtins/bitcode/src/str.zig | 6 +++--- examples/benchmarks/TestBase64.roc | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 572eaa14e0..e18eca4306 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1018,9 +1018,9 @@ fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result { } else { const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); - // TODO what should we do RC-wise here - // const data_bytes = arg.len(); - // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes); + // consume the input list + const data_bytes = arg.len(); + utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes); return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; } diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc index 75adbca8be..27c5617ebf 100644 --- a/examples/benchmarks/TestBase64.roc +++ b/examples/benchmarks/TestBase64.roc @@ -7,7 +7,6 @@ IO a : Task.Task a [] main : IO {} main = - # when fromBytes [ 0 ] is when Base64.fromBytes (Str.toBytes "Hello World") is Ok str -> Task.putLine str From 134f8a15e9e9baaa204990ff127352d4577bd5ba Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 24 Feb 2021 22:26:18 +0100 Subject: [PATCH 33/33] fix zig tests --- compiler/builtins/bitcode/src/str.zig | 98 +++++++++++++-------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index e18eca4306..62a88058e0 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -1155,102 +1155,100 @@ test "validateUtf8Bytes: unicode ∆ in middle of array" { expectOk(validateUtf8BytesX(list)); } +fn expectErr(list: RocList, index: usize, err: Utf8DecodeError, problem: Utf8ByteProblem) void { + const str_ptr = @ptrCast([*]u8, list.bytes); + const str_len = list.length; + + expectError(err, numberOfNextCodepointBytes(str_ptr, str_len, index)); + expectEqual(toErrUtf8ByteResponse(index, problem), validateUtf8Bytes(str_ptr, str_len)); +} + test "validateUtf8Bytes: invalid start byte" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 4; - var str: [str_len]u8 = "ab\x80c".*; - const str_ptr: [*]u8 = &str; + const raw = "ab\x80c"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8InvalidStartByte, numberOfNextCodepointBytes(str_ptr, str_len, 2)); - expectEqual(toErrUtf8ByteResponse(2, Utf8ByteProblem.InvalidStartByte), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 2, error.Utf8InvalidStartByte, Utf8ByteProblem.InvalidStartByte); } test "validateUtf8Bytes: unexpected eof for 2 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 4; - var str: [str_len]u8 = "abc\xc2".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xc2"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 2 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426 - const str_len = 5; - var str: [str_len]u8 = "abc\xc2\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xc2\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: unexpected eof for 3 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430 - const str_len = 5; - var str: [str_len]u8 = "abc\xe0\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xe0\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 3 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430 - const str_len = 6; - var str: [str_len]u8 = "abc\xe0\xa0\xc0".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xe0\xa0\xc0"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: unexpected eof for 4 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437 - const str_len = 6; - var str: [str_len]u8 = "abc\xf0\x90\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x90\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence); } test "validateUtf8Bytes: expected continuation for 4 byte sequence" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437 - const str_len = 7; - var str: [str_len]u8 = "abc\xf0\x90\x80\x00".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x90\x80\x00"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation); } test "validateUtf8Bytes: overlong" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L451 - const str_len = 7; - var str: [str_len]u8 = "abc\xf0\x80\x80\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf0\x80\x80\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8OverlongEncoding, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.OverlongEncoding), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8OverlongEncoding, Utf8ByteProblem.OverlongEncoding); } test "validateUtf8Bytes: codepoint out too large" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L465 - const str_len = 7; - var str: [str_len]u8 = "abc\xf4\x90\x80\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xf4\x90\x80\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8CodepointTooLarge, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.CodepointTooLarge), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8CodepointTooLarge, Utf8ByteProblem.CodepointTooLarge); } test "validateUtf8Bytes: surrogate halves" { // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L468 - const str_len = 6; - var str: [str_len]u8 = "abc\xed\xa0\x80".*; - const str_ptr: [*]u8 = &str; + const raw = "abc\xed\xa0\x80"; + const ptr: [*]const u8 = @ptrCast([*]const u8, raw); + const list = sliceHelp(ptr, raw.len); - expectError(error.Utf8EncodesSurrogateHalf, numberOfNextCodepointBytes(str_ptr, str_len, 3)); - expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.EncodesSurrogateHalf), validateUtf8Bytes(str_ptr, str_len)); + expectErr(list, 3, error.Utf8EncodesSurrogateHalf, Utf8ByteProblem.EncodesSurrogateHalf); }