From 89bf22598ecbe3fac6ba62944b76630501ac28bd Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 14:41:08 +0100
Subject: [PATCH 01/26] call by pointer wrappers need to call by pointer for
 closures

---
 compiler/mono/src/ir.rs | 65 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 9e6f2ccd47..79628b27ac 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5751,8 +5751,13 @@ fn call_by_pointer<'a>(
     let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol);
     if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized
     {
+        // TODO we should be able to call by name in this wrapper for "normal" functions
+        // but closures, specifically top-level values that are closures (by unification)
+        // cause issues. The caller (which is here) doesn't know whether the called is a closure
+        // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
+        // more in the future
         match layout {
-            Layout::FunctionPointer(arg_layouts, ret_layout) => {
+            Layout::FunctionPointer(arg_layouts, ret_layout) if false => {
                 if arg_layouts.iter().any(|l| l.contains_refcounted()) {
                     let name = env.unique_symbol();
                     let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);
@@ -5766,6 +5771,7 @@ fn call_by_pointer<'a>(
                     let args = args.into_bump_slice();
 
                     let call_symbol = env.unique_symbol();
+                    debug_assert_eq!(arg_layouts.len(), arg_symbols.len());
                     let call_type = CallType::ByName {
                         name: symbol,
                         full_layout: layout.clone(),
@@ -5804,6 +5810,63 @@ fn call_by_pointer<'a>(
                     Expr::FunctionPointer(symbol, layout)
                 }
             }
+            Layout::FunctionPointer(arg_layouts, ret_layout) => {
+                if arg_layouts.iter().any(|l| l.contains_refcounted()) {
+                    let name = env.unique_symbol();
+                    let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);
+                    let mut arg_symbols = Vec::with_capacity_in(arg_layouts.len(), env.arena);
+
+                    for layout in arg_layouts {
+                        let symbol = env.unique_symbol();
+                        args.push((layout.clone(), symbol));
+                        arg_symbols.push(symbol);
+                    }
+                    let args = args.into_bump_slice();
+
+                    let call_symbol = env.unique_symbol();
+                    let fpointer_symbol = env.unique_symbol();
+                    debug_assert_eq!(arg_layouts.len(), arg_symbols.len());
+                    let call_type = CallType::ByPointer {
+                        name: fpointer_symbol,
+                        full_layout: layout.clone(),
+                        ret_layout: ret_layout.clone(),
+                        arg_layouts,
+                    };
+                    let call = Call {
+                        call_type,
+                        arguments: arg_symbols.into_bump_slice(),
+                    };
+                    let expr = Expr::Call(call);
+
+                    let mut body = Stmt::Ret(call_symbol);
+
+                    body = Stmt::Let(call_symbol, expr, ret_layout.clone(), env.arena.alloc(body));
+
+                    let expr = Expr::FunctionPointer(symbol, layout.clone());
+                    body = Stmt::Let(fpointer_symbol, expr, layout.clone(), env.arena.alloc(body));
+
+                    let closure_data_layout = None;
+                    let proc = Proc {
+                        name,
+                        args,
+                        body,
+                        closure_data_layout,
+                        ret_layout: ret_layout.clone(),
+                        is_self_recursive: SelfRecursive::NotSelfRecursive,
+                        must_own_arguments: true,
+                        host_exposed_layouts: HostExposedLayouts::NotHostExposed,
+                    };
+
+                    procs
+                        .specialized
+                        .insert((name, layout.clone()), InProgressProc::Done(proc));
+                    Expr::FunctionPointer(name, layout)
+                } else {
+                    // if none of the arguments is refcounted, then owning the arguments has no
+                    // meaning
+                    Expr::FunctionPointer(symbol, layout)
+                }
+            }
             _ => {
                 // e.g. Num.maxInt or other constants
                 Expr::FunctionPointer(symbol, layout)

From ea76578e0694e68284a084c882509cd1d41e13ce Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 15:15:16 +0100
Subject: [PATCH 02/26] add Num.bitwiseOr and Num.shiftLeftBy

---
 compiler/builtins/src/std.rs     | 18 ++++++++++++++++++
 compiler/can/src/builtins.rs     | 16 ++++++++++++++++
 compiler/gen/src/llvm/build.rs   | 26 +++++++++++++++++++++++++-
 compiler/module/src/low_level.rs |  2 ++
 compiler/module/src/symbol.rs    | 20 +++++++++++---------
 compiler/mono/src/borrow.rs      | 12 +++++++++++-
 6 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 42eb034581..5156fadc0b 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -324,6 +324,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // bitwiseOr : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_BITWISE_OR,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
+    // shiftLeftBy : Nat, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_LEFT,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index fdff0a4e2c..f3e1ff4a32 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -151,6 +151,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_MIN_INT => num_min_int,
         NUM_BITWISE_AND => num_bitwise_and,
         NUM_BITWISE_XOR => num_bitwise_xor,
+        NUM_BITWISE_OR => num_bitwise_or,
+        NUM_SHIFT_LEFT=> num_shift_left_by,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -273,6 +275,10 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_ASIN => num_asin,
         Symbol::NUM_MAX_INT => num_max_int,
         Symbol::NUM_MIN_INT => num_min_int,
+        Symbol::NUM_BITWISE_AND => num_bitwise_and,
+        Symbol::NUM_BITWISE_XOR => num_bitwise_xor,
+        Symbol::NUM_BITWISE_OR => num_bitwise_or,
+        Symbol::NUM_SHIFT_LEFT=> num_shift_left_by,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1299,6 +1305,16 @@ fn num_bitwise_xor(symbol: Symbol, var_store: &mut VarStore) -> Def {
     num_binop(symbol, var_store, LowLevel::NumBitwiseXor)
 }
 
+/// Num.bitwiseOr: Int, Int -> Int
+fn num_bitwise_or(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    num_binop(symbol, var_store, LowLevel::NumBitwiseOr)
+}
+
+/// Num.shiftLeftBy: Nat, Int a -> Int a
+fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 5e65bc2cee..e77ae31500 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3943,7 +3943,23 @@ fn run_low_level<'a, 'ctx, 'env>(
 
             build_num_binop(env, parent, lhs_arg, lhs_layout, rhs_arg, rhs_layout, op)
         }
-        NumBitwiseAnd | NumBitwiseXor => {
+        NumBitwiseAnd | NumBitwiseOr | NumBitwiseXor => {
+            debug_assert_eq!(args.len(), 2);
+
+            let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
+            let (rhs_arg, rhs_layout) = load_symbol_and_layout(scope, &args[1]);
+
+            build_int_binop(
+                env,
+                parent,
+                lhs_arg.into_int_value(),
+                lhs_layout,
+                rhs_arg.into_int_value(),
+                rhs_layout,
+                op,
+            )
+        }
+        NumShiftLeftBy => {
             debug_assert_eq!(args.len(), 2);
 
             let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
@@ -4585,6 +4601,14 @@ fn build_int_binop<'a, 'ctx, 'env>(
         NumPowInt => call_bitcode_fn(env, &[lhs.into(), rhs.into()], &bitcode::NUM_POW_INT),
         NumBitwiseAnd => bd.build_and(lhs, rhs, "int_bitwise_and").into(),
         NumBitwiseXor => bd.build_xor(lhs, rhs, "int_bitwise_xor").into(),
+        NumBitwiseOr => bd.build_or(lhs, rhs, "int_bitwise_or").into(),
+        NumShiftLeftBy => {
+            // NOTE arguments are flipped;
+            // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);`
+            // as `Num.shiftLeftBy 0 0b0000_0001
+            bd.build_left_shift(rhs, lhs, "int_bitwise_or").into()
+        }
+
         _ => {
             unreachable!("Unrecognized int binary operation: {:?}", op);
         }
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index e69fa0dd02..05a20c72c5 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -78,6 +78,8 @@ pub enum LowLevel {
     NumAsin,
     NumBitwiseAnd,
     NumBitwiseXor,
+    NumBitwiseOr,
+    NumShiftLeftBy,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 54700dd492..64717e405b 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -841,15 +841,17 @@ define_builtins! {
         80 NUM_BINARY32: "Binary32" imported
         81 NUM_BITWISE_AND: "bitwiseAnd"
         82 NUM_BITWISE_XOR: "bitwiseXor"
-        83 NUM_SUB_WRAP: "subWrap"
-        84 NUM_SUB_CHECKED: "subChecked"
-        85 NUM_MUL_WRAP: "mulWrap"
-        86 NUM_MUL_CHECKED: "mulChecked"
-        87 NUM_INT: "Int" imported
-        88 NUM_FLOAT: "Float" imported
-        89 NUM_AT_NATURAL: "@Natural"
-        90 NUM_NATURAL: "Natural" imported
-        91 NUM_NAT: "Nat" imported
+        83 NUM_BITWISE_OR: "bitwiseOr"
+        84 NUM_SHIFT_LEFT: "shiftLeftBy"
+        85 NUM_SUB_WRAP: "subWrap"
+        86 NUM_SUB_CHECKED: "subChecked"
+        87 NUM_MUL_WRAP: "mulWrap"
+        88 NUM_MUL_CHECKED: "mulChecked"
+        89 NUM_INT: "Int" imported
+        90 NUM_FLOAT: "Float" imported
+        91 NUM_AT_NATURAL: "@Natural"
+        92 NUM_NATURAL: "Natural" imported
+        93 NUM_NAT: "Nat" imported
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index ba652c0c7b..2fa8893d4c 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -373,6 +373,14 @@ impl<'a> BorrowInfState<'a> {
                         self.own_var(z);
 
                         // if the function exects an owned argument (ps), the argument must be owned (args)
+                        debug_assert_eq!(
+                            arguments.len(),
+                            ps.len(),
+                            "{:?} has {} parameters, but was applied to {} arguments",
+                            name,
+                            ps.len(),
+                            arguments.len()
+                        );
                         self.own_args_using_params(arguments, ps);
                     }
                     None => {
@@ -658,7 +666,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked
         | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare
         | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd
-        | NumBitwiseXor => arena.alloc_slice_copy(&[irrelevant, irrelevant]),
+        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => {
+            arena.alloc_slice_copy(&[irrelevant, irrelevant])
+        }
 
         NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor
         | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => {

From 128741e5856adebc368836c54b40a1eaf50979fd Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 16:01:01 +0100
Subject: [PATCH 03/26] add right shift operators

---
 compiler/builtins/src/std.rs     | 20 +++++++++++++++++++-
 compiler/can/src/builtins.rs     | 16 +++++++++++++++-
 compiler/gen/src/llvm/build.rs   | 14 ++++++++++++--
 compiler/module/src/low_level.rs |  2 ++
 compiler/module/src/symbol.rs    | 20 +++++++++++---------
 compiler/mono/src/borrow.rs      |  2 +-
 6 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index e7e7eea3c1..084edc1cd4 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -333,7 +333,7 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
-    // shiftLeftBy : Nat, Int a -> Int a
+    // shiftLeftBy : Int a, Int a -> Int a
     add_type(
         Symbol::NUM_SHIFT_LEFT,
         top_level_function(
@@ -342,6 +342,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // shiftRightBy : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_RIGHT,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
+    // shiftRightZfBy : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_RIGHT_ZERO_FILL,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index 6c8f7a9cd3..c8516d26ca 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -154,6 +154,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_BITWISE_XOR => num_bitwise_xor,
         NUM_BITWISE_OR => num_bitwise_or,
         NUM_SHIFT_LEFT=> num_shift_left_by,
+        NUM_SHIFT_RIGHT => num_shift_right_by,
+        NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -280,7 +282,9 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_BITWISE_AND => num_bitwise_and,
         Symbol::NUM_BITWISE_XOR => num_bitwise_xor,
         Symbol::NUM_BITWISE_OR => num_bitwise_or,
-        Symbol::NUM_SHIFT_LEFT=> num_shift_left_by,
+        Symbol::NUM_SHIFT_LEFT => num_shift_left_by,
+        Symbol::NUM_SHIFT_RIGHT => num_shift_right_by,
+        Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1317,6 +1321,16 @@ fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
     lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store)
 }
 
+/// Num.shiftRightBy: Nat, Int a -> Int a
+fn num_shift_right_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftRightBy, var_store)
+}
+
+/// Num.shiftRightZfBy: Nat, Int a -> Int a
+fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 250c903014..ed149b77a4 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3967,7 +3967,7 @@ fn run_low_level<'a, 'ctx, 'env>(
                 op,
             )
         }
-        NumShiftLeftBy => {
+        NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => {
             debug_assert_eq!(args.len(), 2);
 
             let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
@@ -4614,7 +4614,17 @@ fn build_int_binop<'a, 'ctx, 'env>(
             // NOTE arguments are flipped;
             // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);`
             // as `Num.shiftLeftBy 0 0b0000_0001
-            bd.build_left_shift(rhs, lhs, "int_bitwise_or").into()
+            bd.build_left_shift(rhs, lhs, "int_shift_left").into()
+        }
+        NumShiftRightBy => {
+            // NOTE arguments are flipped;
+            bd.build_right_shift(rhs, lhs, false, "int_shift_right")
+                .into()
+        }
+        NumShiftRightZfBy => {
+            // NOTE arguments are flipped;
+            bd.build_right_shift(rhs, lhs, true, "int_shift_right_zf")
+                .into()
         }
 
         _ => {
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 9740c80d80..7279a4ae76 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -81,6 +81,8 @@ pub enum LowLevel {
     NumBitwiseXor,
     NumBitwiseOr,
     NumShiftLeftBy,
+    NumShiftRightBy,
+    NumShiftRightZfBy,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 23fa1837f0..a7d19116dc 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -843,15 +843,17 @@ define_builtins! {
         82 NUM_BITWISE_XOR: "bitwiseXor"
         83 NUM_BITWISE_OR: "bitwiseOr"
         84 NUM_SHIFT_LEFT: "shiftLeftBy"
-        85 NUM_SUB_WRAP: "subWrap"
-        86 NUM_SUB_CHECKED: "subChecked"
-        87 NUM_MUL_WRAP: "mulWrap"
-        88 NUM_MUL_CHECKED: "mulChecked"
-        89 NUM_INT: "Int" imported
-        90 NUM_FLOAT: "Float" imported
-        91 NUM_AT_NATURAL: "@Natural"
-        92 NUM_NATURAL: "Natural" imported
-        93 NUM_NAT: "Nat" imported
+        85 NUM_SHIFT_RIGHT: "shiftRightBy"
+        86 NUM_SHIFT_RIGHT_ZERO_FILL: "shiftRightZfBy"
+        87 NUM_SUB_WRAP: "subWrap"
+        88 NUM_SUB_CHECKED: "subChecked"
+        89 NUM_MUL_WRAP: "mulWrap"
+        90 NUM_MUL_CHECKED: "mulChecked"
+        91 NUM_INT: "Int" imported
+        92 NUM_FLOAT: "Float" imported
+        93 NUM_AT_NATURAL: "@Natural"
+        94 NUM_NATURAL: "Natural" imported
+        95 NUM_NAT: "Nat" imported
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index a06f9f7e37..294e2582c0 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -666,7 +666,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked
         | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare
         | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd
-        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => {
+        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => {
             arena.alloc_slice_copy(&[irrelevant, irrelevant])
         }
 

From 43e71f2ee933e25f5a1f45243af8be6c8290724c Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 16:07:35 +0100
Subject: [PATCH 04/26] int cast

---
 compiler/builtins/src/std.rs     |  6 ++++++
 compiler/can/src/builtins.rs     |  7 +++++++
 compiler/gen/src/llvm/build.rs   | 10 ++++++++++
 compiler/module/src/low_level.rs |  1 +
 compiler/module/src/symbol.rs    |  2 ++
 compiler/mono/src/borrow.rs      |  2 +-
 6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 084edc1cd4..5a3b499dd7 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -360,6 +360,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // intCast : Int a -> Int b
+    add_type(
+        Symbol::NUM_INT_CAST,
+        top_level_function(vec![int_type(flex(TVAR1))], Box::new(int_type(flex(TVAR2)))),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index c8516d26ca..af38527fcc 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -156,6 +156,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_SHIFT_LEFT=> num_shift_left_by,
         NUM_SHIFT_RIGHT => num_shift_right_by,
         NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
+        NUM_INT_CAST=> num_int_cast,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -285,6 +286,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_SHIFT_LEFT => num_shift_left_by,
         Symbol::NUM_SHIFT_RIGHT => num_shift_right_by,
         Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
+        Symbol::NUM_INT_CAST=> num_int_cast,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1331,6 +1333,11 @@ fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
     lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store)
 }
 
+/// Num.intCast: Int a -> Int b
+fn num_int_cast(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_1(symbol, LowLevel::NumIntCast, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index ed149b77a4..9ba362841f 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3983,6 +3983,16 @@ fn run_low_level<'a, 'ctx, 'env>(
                 op,
             )
         }
+        NumIntCast => {
+            debug_assert_eq!(args.len(), 1);
+
+            let arg = load_symbol(scope, &args[0]).into_int_value();
+
+            let to = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes)
+                .into_int_type();
+
+            env.builder.build_int_cast(arg, to, "inc_cast").into()
+        }
         Eq => {
             debug_assert_eq!(args.len(), 2);
 
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 7279a4ae76..640b8c8bca 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -83,6 +83,7 @@ pub enum LowLevel {
     NumShiftLeftBy,
     NumShiftRightBy,
     NumShiftRightZfBy,
+    NumIntCast,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index a7d19116dc..62f5a9d457 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -854,6 +854,8 @@ define_builtins! {
         93 NUM_AT_NATURAL: "@Natural"
         94 NUM_NATURAL: "Natural" imported
         95 NUM_NAT: "Nat" imported
+        96 NUM_INT_CAST: "intCast"
+
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index 294e2582c0..c0d4f1e091 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -671,7 +671,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         }
 
         NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor
-        | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => {
+        | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin | NumIntCast => {
             arena.alloc_slice_copy(&[irrelevant])
         }
         StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]),

From bcbef5d3aac61fea000a9c9f8425fc630e638b6e Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:10:09 +0100
Subject: [PATCH 05/26] generalize add and sub intrinsics to all integer types

---
 compiler/gen/src/llvm/build.rs | 103 ++++++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 9ba362841f..31fc0ce6c3 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -296,8 +296,10 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) {
     let void_type = ctx.void_type();
     let i1_type = ctx.bool_type();
     let f64_type = ctx.f64_type();
+    let i128_type = ctx.i128_type();
     let i64_type = ctx.i64_type();
     let i32_type = ctx.i32_type();
+    let i16_type = ctx.i16_type();
     let i8_type = ctx.i8_type();
     let i8_ptr_type = i8_type.ptr_type(AddressSpace::Generic);
 
@@ -377,18 +379,72 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) {
         f64_type.fn_type(&[f64_type.into()], false),
     );
 
+    // add with overflow
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I8, {
+        let fields = [i8_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i8_type.into(), i8_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I16, {
+        let fields = [i16_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i16_type.into(), i16_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I32, {
+        let fields = [i32_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i32_type.into(), i32_type.into()], false)
+    });
+
     add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
             .fn_type(&[i64_type.into(), i64_type.into()], false)
     });
 
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I128, {
+        let fields = [i128_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i128_type.into(), i128_type.into()], false)
+    });
+
+    // sub with overflow
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I8, {
+        let fields = [i8_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i8_type.into(), i8_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I16, {
+        let fields = [i16_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i16_type.into(), i16_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I32, {
+        let fields = [i32_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i32_type.into(), i32_type.into()], false)
+    });
+
     add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
             .fn_type(&[i64_type.into(), i64_type.into()], false)
     });
 
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I128, {
+        let fields = [i128_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i128_type.into(), i128_type.into()], false)
+    });
+
+    // mul with overflow
+
     add_intrinsic(module, LLVM_SMUL_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
@@ -406,8 +462,19 @@ static LLVM_COS_F64: &str = "llvm.cos.f64";
 static LLVM_POW_F64: &str = "llvm.pow.f64";
 static LLVM_CEILING_F64: &str = "llvm.ceil.f64";
 static LLVM_FLOOR_F64: &str = "llvm.floor.f64";
+
+pub static LLVM_SADD_WITH_OVERFLOW_I8: &str = "llvm.sadd.with.overflow.i8";
+pub static LLVM_SADD_WITH_OVERFLOW_I16: &str = "llvm.sadd.with.overflow.i16";
+pub static LLVM_SADD_WITH_OVERFLOW_I32: &str = "llvm.sadd.with.overflow.i32";
 pub static LLVM_SADD_WITH_OVERFLOW_I64: &str = "llvm.sadd.with.overflow.i64";
+pub static LLVM_SADD_WITH_OVERFLOW_I128: &str = "llvm.sadd.with.overflow.i128";
+
+pub static LLVM_SSUB_WITH_OVERFLOW_I8: &str = "llvm.ssub.with.overflow.i8";
+pub static LLVM_SSUB_WITH_OVERFLOW_I16: &str = "llvm.ssub.with.overflow.i16";
+pub static LLVM_SSUB_WITH_OVERFLOW_I32: &str = "llvm.ssub.with.overflow.i32";
 pub static LLVM_SSUB_WITH_OVERFLOW_I64: &str = "llvm.ssub.with.overflow.i64";
+pub static LLVM_SSUB_WITH_OVERFLOW_I128: &str = "llvm.ssub.with.overflow.i128";
+
 pub static LLVM_SMUL_WITH_OVERFLOW_I64: &str = "llvm.smul.with.overflow.i64";
 
 fn add_intrinsic<'ctx>(
@@ -4506,7 +4573,7 @@ fn build_int_binop<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
     parent: FunctionValue<'ctx>,
     lhs: IntValue<'ctx>,
-    _lhs_layout: &Layout<'a>,
+    lhs_layout: &Layout<'a>,
     rhs: IntValue<'ctx>,
     _rhs_layout: &Layout<'a>,
     op: LowLevel,
@@ -4519,8 +4586,23 @@ fn build_int_binop<'a, 'ctx, 'env>(
     match op {
         NumAdd => {
             let context = env.context;
+
+            let intrinsic = match lhs_layout {
+                Layout::Builtin(Builtin::Int8) => LLVM_SADD_WITH_OVERFLOW_I8,
+                Layout::Builtin(Builtin::Int16) => LLVM_SADD_WITH_OVERFLOW_I16,
+                Layout::Builtin(Builtin::Int32) => LLVM_SADD_WITH_OVERFLOW_I32,
+                Layout::Builtin(Builtin::Int64) => LLVM_SADD_WITH_OVERFLOW_I64,
+                Layout::Builtin(Builtin::Int128) => LLVM_SADD_WITH_OVERFLOW_I128,
+                Layout::Builtin(Builtin::Usize) => match env.ptr_bytes {
+                    4 => LLVM_SADD_WITH_OVERFLOW_I32,
+                    8 => LLVM_SADD_WITH_OVERFLOW_I64,
+                    other => panic!("invalid ptr_bytes {}", other),
+                },
+                _ => unreachable!(),
+            };
+
             let result = env
-                .call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()])
+                .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()])
                 .into_struct_value();
 
             let add_result = bd.build_extract_value(result, 0, "add_result").unwrap();
@@ -4550,8 +4632,23 @@ fn build_int_binop<'a, 'ctx, 'env>(
         NumAddChecked => env.call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]),
         NumSub => {
             let context = env.context;
+
+            let intrinsic = match lhs_layout {
+                Layout::Builtin(Builtin::Int8) => LLVM_SSUB_WITH_OVERFLOW_I8,
+                Layout::Builtin(Builtin::Int16) => LLVM_SSUB_WITH_OVERFLOW_I16,
+                Layout::Builtin(Builtin::Int32) => LLVM_SSUB_WITH_OVERFLOW_I32,
+                Layout::Builtin(Builtin::Int64) => LLVM_SSUB_WITH_OVERFLOW_I64,
+                Layout::Builtin(Builtin::Int128) => LLVM_SSUB_WITH_OVERFLOW_I128,
+                Layout::Builtin(Builtin::Usize) => match env.ptr_bytes {
+                    4 => LLVM_SSUB_WITH_OVERFLOW_I32,
+                    8 => LLVM_SSUB_WITH_OVERFLOW_I64,
+                    other => panic!("invalid ptr_bytes {}", other),
+                },
+                _ => unreachable!("invalid layout {:?}", lhs_layout),
+            };
+
             let result = env
-                .call_intrinsic(LLVM_SSUB_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()])
+                .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()])
                 .into_struct_value();
 
             let sub_result = bd.build_extract_value(result, 0, "sub_result").unwrap();

From 9116e9e8c96cfdeadd7c9cd7b78096c75d5b5b10 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:52:32 +0100
Subject: [PATCH 06/26] add Str.toBytes

---
 compiler/builtins/bitcode/src/main.zig |  1 +
 compiler/builtins/bitcode/src/str.zig  | 21 +++++++++++++++++++++
 compiler/builtins/src/bitcode.rs       |  1 +
 compiler/builtins/src/std.rs           |  6 ++++++
 compiler/can/src/builtins.rs           |  7 +++++++
 compiler/gen/src/llvm/build.rs         | 14 ++++++++++++--
 compiler/gen/src/llvm/build_str.rs     | 22 ++++++++++++++++++++++
 compiler/module/src/low_level.rs       |  1 +
 compiler/module/src/symbol.rs          |  1 +
 compiler/mono/src/borrow.rs            |  1 +
 10 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig
index 7df2061ed9..e124afe62c 100644
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@@ -68,6 +68,7 @@ comptime {
     exportStrFn(str.strFromFloatC, "from_float");
     exportStrFn(str.strEqual, "equal");
     exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes");
+    exportStrFn(str.strToBytesC, "to_bytes");
 }
 
 // Export helpers - Must be run inside a comptime
diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index 32057d35d0..a752a37f0e 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1,4 +1,5 @@
 const utils = @import("utils.zig");
+const RocList = @import("list.zig").RocList;
 const std = @import("std");
 const mem = std.mem;
 const always_inline = std.builtin.CallOptions.Modifier.always_inline;
@@ -961,6 +962,26 @@ test "RocStr.joinWith: result is big" {
     expect(roc_result.eq(result));
 }
 
+// Str.toBytes
+pub fn strToBytesC(arg: RocStr) callconv(.C) RocList {
+    return @call(.{ .modifier = always_inline }, strToBytes, .{ std.heap.c_allocator, arg });
+}
+
+fn strToBytes(allocator: *Allocator, arg: RocStr) RocList {
+    if (arg.isEmpty()) {
+        return RocList.empty();
+    } else if (arg.isSmallStr()) {
+        const length = arg.len();
+        const ptr = utils.allocateWithRefcount(allocator, @alignOf(usize), length);
+
+        @memcpy(ptr, arg.asU8ptr(), length);
+
+        return RocList{ .length = length, .bytes = ptr };
+    } else {
+        return RocList{ .length = arg.len(), .bytes = arg.str_bytes };
+    }
+}
+
 pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
     const bytes: []u8 = ptr[0..len];
     return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes});
diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs
index 134bce68ad..b19117e106 100644
--- a/compiler/builtins/src/bitcode.rs
+++ b/compiler/builtins/src/bitcode.rs
@@ -42,6 +42,7 @@ pub const STR_FROM_INT: &str = "roc_builtins.str.from_int";
 pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float";
 pub const STR_EQUAL: &str = "roc_builtins.str.equal";
 pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes";
+pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes";
 
 pub const DICT_HASH: &str = "roc_builtins.dict.hash";
 pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 5a3b499dd7..5de65ca3dd 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -623,6 +623,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // toBytes : Str -> List U8
+    add_type(
+        Symbol::STR_TO_BYTES,
+        top_level_function(vec![str_type()], Box::new(list_type(u8_type()))),
+    );
+
     // fromFloat : Float a -> Str
     add_type(
         Symbol::STR_FROM_FLOAT,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index af38527fcc..7db64b296f 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -62,6 +62,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         STR_COUNT_GRAPHEMES => str_count_graphemes,
         STR_FROM_INT => str_from_int,
         STR_FROM_UTF8 => str_from_utf8,
+        STR_TO_BYTES => str_to_bytes,
         STR_FROM_FLOAT=> str_from_float,
         LIST_LEN => list_len,
         LIST_GET => list_get,
@@ -196,6 +197,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes,
         Symbol::STR_FROM_INT => str_from_int,
         Symbol::STR_FROM_UTF8 => str_from_utf8,
+        Symbol::STR_TO_BYTES => str_to_bytes,
         Symbol::STR_FROM_FLOAT=> str_from_float,
         Symbol::LIST_LEN => list_len,
         Symbol::LIST_GET => list_get,
@@ -1655,6 +1657,11 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
     )
 }
 
+/// Str.toBytes : Str -> List U8
+fn str_to_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_1(symbol, LowLevel::StrToBytes, var_store)
+}
+
 /// Str.fromFloat : Float * -> Str
 fn str_from_float(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let float_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 31fc0ce6c3..506dec1253 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -12,7 +12,7 @@ use crate::llvm::build_list::{
 };
 use crate::llvm::build_str::{
     str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_from_utf8,
-    str_join_with, str_number_of_bytes, str_split, str_starts_with, CHAR_LAYOUT,
+    str_join_with, str_number_of_bytes, str_split, str_starts_with, str_to_bytes, CHAR_LAYOUT,
 };
 use crate::llvm::compare::{generic_eq, generic_neq};
 use crate::llvm::convert::{
@@ -3611,13 +3611,23 @@ fn run_low_level<'a, 'ctx, 'env>(
             str_from_float(env, scope, args[0])
         }
         StrFromUtf8 => {
-            // Str.fromInt : Int -> Str
+            // Str.fromUtf8 : List U8 -> Result Str Utf8Problem
             debug_assert_eq!(args.len(), 1);
 
             let original_wrapper = load_symbol(scope, &args[0]).into_struct_value();
 
             str_from_utf8(env, parent, original_wrapper)
         }
+        StrToBytes => {
+            // Str.fromInt : Str -> List U8
+            debug_assert_eq!(args.len(), 1);
+
+            // this is an identity conversion
+            // we just implement it here to subvert the type system
+            let string = load_symbol(scope, &args[0]);
+
+            str_to_bytes(env, string.into_struct_value())
+        }
         StrSplit => {
             // Str.split : Str, Str -> List Str
             debug_assert_eq!(args.len(), 2);
diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs
index be27698163..301b726fb3 100644
--- a/compiler/gen/src/llvm/build_str.rs
+++ b/compiler/gen/src/llvm/build_str.rs
@@ -275,6 +275,28 @@ pub fn str_from_int<'a, 'ctx, 'env>(
     zig_str_to_struct(env, zig_result).into()
 }
 
+/// Str.toBytes : Str -> List U8
+pub fn str_to_bytes<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    original_wrapper: StructValue<'ctx>,
+) -> BasicValueEnum<'ctx> {
+    let string = complex_bitcast(
+        env.builder,
+        original_wrapper.into(),
+        env.context.i128_type().into(),
+        "to_bytes",
+    );
+
+    let zig_result = call_bitcode_fn(env, &[string], &bitcode::STR_TO_BYTES);
+
+    complex_bitcast(
+        env.builder,
+        zig_result,
+        collection(env.context, env.ptr_bytes).into(),
+        "to_bytes",
+    )
+}
+
 /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 }
 pub fn str_from_utf8<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 640b8c8bca..07422cd4d0 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -12,6 +12,7 @@ pub enum LowLevel {
     StrCountGraphemes,
     StrFromInt,
     StrFromUtf8,
+    StrToBytes,
     StrFromFloat,
     ListLen,
     ListGetUnsafe,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 62f5a9d457..2497e9cfd3 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -882,6 +882,7 @@ define_builtins! {
         12 STR_FROM_UTF8: "fromUtf8"
         13 STR_UT8_PROBLEM: "Utf8Problem" // the Utf8Problem type alias
         14 STR_UT8_BYTE_PROBLEM: "Utf8ByteProblem" // the Utf8ByteProblem type alias
+        15 STR_TO_BYTES: "toBytes"
     }
     4 LIST: "List" => {
         0 LIST_LIST: "List" imported // the List.List type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index c0d4f1e091..c87f0b4e05 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -676,6 +676,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         }
         StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]),
         StrFromUtf8 => arena.alloc_slice_copy(&[owned]),
+        StrToBytes => arena.alloc_slice_copy(&[owned]),
         StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]),
         Hash => arena.alloc_slice_copy(&[borrowed, irrelevant]),
         DictSize => arena.alloc_slice_copy(&[borrowed]),

From e218279f42e473689222200bc1ca67649d757391 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:54:32 +0100
Subject: [PATCH 07/26] add example files

---
 examples/benchmarks/Base64.roc      | 164 ++++++++++++++++++++++++++++
 examples/benchmarks/BytesDecode.roc | 106 ++++++++++++++++++
 2 files changed, 270 insertions(+)
 create mode 100644 examples/benchmarks/Base64.roc
 create mode 100644 examples/benchmarks/BytesDecode.roc

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
new file mode 100644
index 0000000000..e9dc460cb3
--- /dev/null
+++ b/examples/benchmarks/Base64.roc
@@ -0,0 +1,164 @@
+app "base64"
+    packages { base: "platform" }
+    imports [base.Task, BytesDecode.{Decoder} ]
+    provides [ main ] to base
+
+IO a : Task.Task a []
+
+Decoder a : BytesDecode.Decoder a
+
+main : IO {}
+main =
+    # when fromBytes [ 0 ] is
+    when fromBytes (Str.toBytes "Hello World") is
+        Ok str ->
+            Task.putLine str
+
+        Err _ ->
+            Task.putLine "sadness"
+
+
+
+
+
+# ------
+
+
+fromBytes : List U8 -> Result Str BytesDecode.DecodeError
+fromBytes = \bytes ->
+    BytesDecode.decode  bytes (decodeBase64 (List.len bytes))
+
+
+decodeBase64 : Nat -> BytesDecode.Decoder Str
+decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string:  "" }
+
+loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str)
+loopHelp = \{ remaining, string } ->
+    if remaining >= 3 then
+        helper = \x, y, z ->
+            a : U32
+            a = Num.intCast x
+            b : U32
+            b = Num.intCast y
+            c : U32
+            c = Num.intCast z
+            combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
+            Loop
+                {
+                    remaining: remaining - 3,
+                    string: Str.concat string (bitsToChars combined 0)
+                }
+
+        BytesDecode.map3 helper
+            BytesDecode.u8
+            BytesDecode.u8
+            BytesDecode.u8
+
+    else if remaining == 0 then
+        BytesDecode.succeed (Done string)
+
+    else if remaining == 2 then
+        helperX = \x, y ->
+            a : U32
+            a = Num.intCast x
+            b : U32
+            b = Num.intCast y
+            combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
+            Done (Str.concat string (bitsToChars combined 1))
+
+        BytesDecode.map2 helperX
+            BytesDecode.u8
+            BytesDecode.u8
+    else
+        # remaining = 1
+            BytesDecode.u8
+                |> BytesDecode.map (\x -> 
+                    a : U32
+                    a = Num.intCast x
+                    Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
+
+
+bitsToChars : U32, Int * -> Str
+bitsToChars = \bits, missing ->
+    when Str.fromUtf8 (bitsToCharsHelp bits missing) is
+        Ok str -> str
+        Err _ -> ""
+
+# Mask that can be used to get the lowest 6 bits of a binary number
+lowest6BitsMask : Int *
+lowest6BitsMask = 63
+
+
+bitsToCharsHelp : U32, Int * -> List U8
+bitsToCharsHelp = \bits, missing ->
+    # Performance Notes
+    # `String.cons` proved to be the fastest way of combining characters into a string
+    # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940
+    # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
+    # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
+    # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.
+        
+    # any 6-bit number is a valid base64 digit, so this is actually safe
+    p =
+        Num.shiftRightZfBy 18 bits
+            |> Num.intCast
+            |> unsafeToChar 
+
+    q =
+        Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    r =
+        Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    s =
+        Num.bitwiseAnd bits lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    equals : U8
+    equals = 61
+
+    when missing is
+        0 -> 
+            [ p, q, r, s ]
+        1 ->
+            [ p, q, r, equals ]
+        2 ->
+            [ p, q, equals , equals ]
+        _ ->
+            # unreachable
+            []
+
+# Base64 index to character/digit
+unsafeToChar : U8 -> U8
+unsafeToChar = \n ->
+    if n <= 25 then
+        # uppercase characters
+        65 + n
+
+    else if n <= 51 then
+        # lowercase characters
+        97 + (n - 26)
+
+    else if n <= 61 then
+        # digit characters
+        48 + (n - 52)
+
+    else
+        # special cases
+        when n is
+            62 ->
+                # '+'
+                43
+
+            63 ->
+                # '/'
+                47
+
+            _ ->
+                # anything else is invalid '\u{0000}'
+                0
diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/BytesDecode.roc
new file mode 100644
index 0000000000..03a341c4ff
--- /dev/null
+++ b/examples/benchmarks/BytesDecode.roc
@@ -0,0 +1,106 @@
+interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
+
+State : { bytes: List U8, cursor : Nat }
+
+DecodeError : [ OutOfBytes ]
+
+
+Decoder a : [ @Decoder (State -> [Good State a, Bad DecodeError]) ]
+
+decode : List U8, Decoder a -> Result a DecodeError
+decode = \bytes, @Decoder decoder ->
+    when decoder { bytes, cursor: 0 } is
+        Good _ value ->
+            Ok value
+
+        Bad e ->
+            Err e
+
+succeed : a -> Decoder a
+succeed = \value -> @Decoder \state -> Good state value 
+
+map : Decoder a, (a -> b) -> Decoder b
+map = \@Decoder decoder, transform -> 
+    @Decoder \state -> 
+        when decoder state is
+            Good state1 value ->
+                Good state1 (transform value)
+
+            Bad e ->
+                Bad e
+
+
+map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c
+map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> 
+    @Decoder \state1 -> 
+        when decoder1 state1 is
+            Good state2 a ->
+                when decoder2 state2 is
+                    Good state3 b ->
+                        Good state3 (transform a b)
+
+                    Bad e ->
+                        Bad e
+
+            Bad e ->
+                Bad e
+
+map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d
+map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> 
+    @Decoder \state1 -> 
+        when decoder1 state1 is
+            Good state2 a ->
+                when decoder2 state2 is
+                    Good state3 b ->
+                        when decoder3 state3 is
+                            Good state4 c ->
+                                Good state4 (transform a b c)
+
+                            Bad e ->
+                                Bad e
+
+                    Bad e ->
+                        Bad e
+
+            Bad e ->
+                Bad e
+
+after : Decoder a, (a -> Decoder b) -> Decoder b
+after = \@Decoder decoder, transform -> 
+    @Decoder \state -> 
+        when decoder state is
+            Good state1 value ->
+                (@Decoder decoder1) = transform value
+                decoder1 state1
+
+
+            Bad e ->
+                Bad e
+
+u8 : Decoder U8
+u8 = @Decoder \state ->
+    when List.get state.bytes state.cursor is
+        Ok b ->
+            Good { state & cursor: state.cursor + 1 } b
+
+        Err _ ->
+            Bad OutOfBytes
+
+Step state b : [ Loop state, Done b ]
+
+loop : (state -> Decoder (Step state a)), state -> Decoder a
+loop = \stepper, initial ->
+    @Decoder \state -> 
+        loopHelp stepper initial state
+
+loopHelp = \stepper, accum, state -> 
+    (@Decoder stepper1) = stepper accum
+    when stepper1 state is
+        Good newState (Done value) ->
+            Good newState value
+
+        Good newState (Loop newAccum) ->
+            loopHelp stepper newAccum newState
+
+        Bad e ->
+            Bad e

From c4ddeefed96e741b6208d38ad47d5ee78253e515 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:45:17 +0100
Subject: [PATCH 08/26] add test of integer type inference let polymorphism

---
 compiler/solve/tests/solve_expr.rs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs
index 229ae772a7..ba5d0519c3 100644
--- a/compiler/solve/tests/solve_expr.rs
+++ b/compiler/solve/tests/solve_expr.rs
@@ -4315,4 +4315,26 @@ mod solve_expr {
             "Str",
         );
     }
+
+    #[test]
+    fn int_type_let_polymorphism() {
+        infer_eq_without_problem(
+            indoc!(
+                r#"
+                app "test" provides [ main ] to "./platform"
+
+                x = 4
+
+                f : U8 -> U32
+                f = \z -> Num.intCast z
+
+                y = f x
+
+                main =
+                    x
+                "#
+            ),
+            "Num *",
+        );
+    }
 }

From ad96d1ae24202c8cbe7ce170b051b9cd78c692e9 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:45:26 +0100
Subject: [PATCH 09/26] trim comment

---
 examples/benchmarks/Base64.roc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index e9dc460cb3..cfcc364334 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -91,9 +91,6 @@ lowest6BitsMask = 63
 
 bitsToCharsHelp : U32, Int * -> List U8
 bitsToCharsHelp = \bits, missing ->
-    # Performance Notes
-    # `String.cons` proved to be the fastest way of combining characters into a string
-    # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940
     # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
     # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
     # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.

From 86cf7cd983f92cb2a9bf8d9b443528bf8040f3c3 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:52:06 +0100
Subject: [PATCH 10/26] move file

---
 examples/benchmarks/Base64.roc                | 34 +++++++++----------
 .../{BytesDecode.roc => Bytes/Decode.roc}     |  2 +-
 2 files changed, 18 insertions(+), 18 deletions(-)
 rename examples/benchmarks/{BytesDecode.roc => Bytes/Decode.roc} (95%)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index cfcc364334..92b30f1555 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -1,11 +1,11 @@
 app "base64"
     packages { base: "platform" }
-    imports [base.Task, BytesDecode.{Decoder} ]
+    imports [base.Task, Bytes.Decode.{Decoder} ]
     provides [ main ] to base
 
 IO a : Task.Task a []
 
-Decoder a : BytesDecode.Decoder a
+Decoder a : Bytes.Decode.Decoder a
 
 main : IO {}
 main =
@@ -24,15 +24,15 @@ main =
 # ------
 
 
-fromBytes : List U8 -> Result Str BytesDecode.DecodeError
+fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError
 fromBytes = \bytes ->
-    BytesDecode.decode  bytes (decodeBase64 (List.len bytes))
+    Bytes.Decode.decode  bytes (decodeBase64 (List.len bytes))
 
 
-decodeBase64 : Nat -> BytesDecode.Decoder Str
-decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string:  "" }
+decodeBase64 : Nat -> Bytes.Decode.Decoder Str
+decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:  "" }
 
-loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str)
+loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
         helper = \x, y, z ->
@@ -49,13 +49,13 @@ loopHelp = \{ remaining, string } ->
                     string: Str.concat string (bitsToChars combined 0)
                 }
 
-        BytesDecode.map3 helper
-            BytesDecode.u8
-            BytesDecode.u8
-            BytesDecode.u8
+        Bytes.Decode.map3 helper
+            Bytes.Decode.u8
+            Bytes.Decode.u8
+            Bytes.Decode.u8
 
     else if remaining == 0 then
-        BytesDecode.succeed (Done string)
+        Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
         helperX = \x, y ->
@@ -66,13 +66,13 @@ loopHelp = \{ remaining, string } ->
             combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
             Done (Str.concat string (bitsToChars combined 1))
 
-        BytesDecode.map2 helperX
-            BytesDecode.u8
-            BytesDecode.u8
+        Bytes.Decode.map2 helperX
+            Bytes.Decode.u8
+            Bytes.Decode.u8
     else
         # remaining = 1
-            BytesDecode.u8
-                |> BytesDecode.map (\x -> 
+            Bytes.Decode.u8
+                |> Bytes.Decode.map (\x -> 
                     a : U32
                     a = Num.intCast x
                     Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/Bytes/Decode.roc
similarity index 95%
rename from examples/benchmarks/BytesDecode.roc
rename to examples/benchmarks/Bytes/Decode.roc
index 03a341c4ff..db14d857a6 100644
--- a/examples/benchmarks/BytesDecode.roc
+++ b/examples/benchmarks/Bytes/Decode.roc
@@ -1,4 +1,4 @@
-interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
+interface Bytes.Decode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
 
 State : { bytes: List U8, cursor : Nat }
 

From c4972f45baaa1dae44736079e644594a2081d4b8 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:12:47 +0100
Subject: [PATCH 11/26] bit shift tests

---
 compiler/gen/tests/gen_num.rs  | 25 +++++++++++++++++++++++++
 examples/benchmarks/Base64.roc |  4 ----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs
index 262b8495b9..5fb442927c 100644
--- a/compiler/gen/tests/gen_num.rs
+++ b/compiler/gen/tests/gen_num.rs
@@ -1343,4 +1343,29 @@ mod gen_num {
             f64
         );
     }
+
+    #[test]
+    fn shift_left_by() {
+        assert_evals_to!("Num.shiftLeftBy 0 0b0000_0001", 0b0000_0001, i64);
+        assert_evals_to!("Num.shiftLeftBy 1 0b0000_0001", 0b0000_0010, i64);
+        assert_evals_to!("Num.shiftLeftBy 2 0b0000_0011", 0b0000_1100, i64);
+    }
+
+    #[test]
+    #[ignore]
+    fn shift_right_by() {
+        // Sign Extended Right Shift
+        assert_evals_to!("Num.shiftRightBy 0 0b0100_0000i8", 0b0001_0000, i8);
+        assert_evals_to!("Num.shiftRightBy 1 0b1110_0000u8", 0b1111_0000u8 as i8, i8);
+        assert_evals_to!("Num.shiftRightBy 2 0b1100_0000u8", 0b1111_0000u8 as i8, i8);
+    }
+
+    #[test]
+    #[ignore]
+    fn shift_right_zf_by() {
+        // Logical Right Shift
+        assert_evals_to!("Num.shiftRightBy 1 0b1100_0000u8", 0b0011_0000, i64);
+        assert_evals_to!("Num.shiftRightBy 2 0b0000_0010u8", 0b0000_0001, i64);
+        assert_evals_to!("Num.shiftRightBy 3 0b0000_1100u8", 0b0000_0011, i64);
+    }
 }
diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 92b30f1555..1fed392288 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -17,10 +17,6 @@ main =
         Err _ ->
             Task.putLine "sadness"
 
-
-
-
-
 # ------
 
 

From 1746b8da6f5cb3662bf26941b2ffef162eebbe29 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:14:20 +0100
Subject: [PATCH 12/26] bitwise or test

---
 compiler/gen/tests/gen_num.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs
index 5fb442927c..f6976de9e0 100644
--- a/compiler/gen/tests/gen_num.rs
+++ b/compiler/gen/tests/gen_num.rs
@@ -750,6 +750,12 @@ mod gen_num {
         assert_evals_to!("Num.bitwiseXor 200 0", 200, i64);
     }
 
+    #[test]
+    fn bitwise_or() {
+        assert_evals_to!("Num.bitwiseOr 1 1", 1, i64);
+        assert_evals_to!("Num.bitwiseOr 1 2", 3, i64);
+    }
+
     #[test]
     fn lt_i64() {
         assert_evals_to!("1 < 2", true, bool);

From 63091392f55398924d7b688de9ca20af230f3db9 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:18:15 +0100
Subject: [PATCH 13/26] toBytes test

---
 compiler/gen/tests/gen_str.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compiler/gen/tests/gen_str.rs b/compiler/gen/tests/gen_str.rs
index dafdbf4b60..53bfa1f53e 100644
--- a/compiler/gen/tests/gen_str.rs
+++ b/compiler/gen/tests/gen_str.rs
@@ -816,4 +816,17 @@ mod gen_str {
     fn str_from_float() {
         assert_evals_to!(r#"Str.fromFloat 3.14"#, RocStr::from("3.140000"), RocStr);
     }
+
+    #[test]
+    fn str_to_bytes() {
+        assert_evals_to!(r#"Str.toBytes "hello""#, &[104, 101, 108, 108, 111], &[u8]);
+        assert_evals_to!(
+            r#"Str.toBytes "this is a long string""#,
+            &[
+                116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 108, 111, 110, 103, 32, 115, 116,
+                114, 105, 110, 103
+            ],
+            &[u8]
+        );
+    }
 }

From c24d51e69d39326343827d5b94f826fc6a2d8868 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 15:14:52 +0100
Subject: [PATCH 14/26] remove old function

---
 compiler/mono/src/ir.rs | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 79628b27ac..9d4d41efd3 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -302,39 +302,6 @@ pub enum InProgressProc<'a> {
 }
 
 impl<'a> Procs<'a> {
-    /// Absorb the contents of another Procs into this one.
-    pub fn absorb(&mut self, mut other: Procs<'a>) {
-        debug_assert!(self.pending_specializations.is_some());
-        debug_assert!(other.pending_specializations.is_some());
-
-        match self.pending_specializations {
-            Some(ref mut pending_specializations) => {
-                for (k, v) in other.pending_specializations.unwrap().drain() {
-                    pending_specializations.insert(k, v);
-                }
-            }
-            None => {
-                unreachable!();
-            }
-        }
-
-        for (k, v) in other.partial_procs.drain() {
-            self.partial_procs.insert(k, v);
-        }
-
-        for (k, v) in other.specialized.drain() {
-            self.specialized.insert(k, v);
-        }
-
-        for (k, v) in other.runtime_errors.drain() {
-            self.runtime_errors.insert(k, v);
-        }
-
-        for symbol in other.module_thunks.drain() {
-            self.module_thunks.insert(symbol);
-        }
-    }
-
     pub fn get_specialized_procs_without_rc(
         self,
         arena: &'a Bump,

From 092db87474a1ed13457d3aa647e07a66934b141e Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:05:44 +0100
Subject: [PATCH 15/26] add import dependencies to module cache

---
 compiler/load/src/file.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index 44fe5bfb0b..058b6f6821 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -358,6 +358,7 @@ struct ModuleCache<'a> {
     external_specializations_requested: MutMap<ModuleId, ExternalSpecializations>,
 
     /// Various information
+    imports: MutMap<ModuleId, MutSet<ModuleId>>,
     documentation: MutMap<ModuleId, ModuleDocumentation>,
     can_problems: MutMap<ModuleId, Vec<roc_problem::can::Problem>>,
     type_problems: MutMap<ModuleId, Vec<solve::TypeError>>,
@@ -1641,6 +1642,18 @@ fn update<'a>(
                 .exposed_symbols_by_module
                 .insert(home, exposed_symbols);
 
+            state
+                .module_cache
+                .imports
+                .entry(header.module_id)
+                .or_default()
+                .extend(
+                    header
+                        .package_qualified_imported_modules
+                        .iter()
+                        .map(|x| *x.as_inner()),
+                );
+
             work.extend(state.dependencies.add_module(
                 header.module_id,
                 &header.package_qualified_imported_modules,

From 64955f23ff0b29bb1fa7ec9393a09d49f516597c Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:07:39 +0100
Subject: [PATCH 16/26] store module thunks

---
 compiler/load/src/file.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index 058b6f6821..cabeef9470 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -359,6 +359,7 @@ struct ModuleCache<'a> {
 
     /// Various information
     imports: MutMap<ModuleId, MutSet<ModuleId>>,
+    top_level_thunks: MutMap<ModuleId, MutSet<Symbol>>,
     documentation: MutMap<ModuleId, ModuleDocumentation>,
     can_problems: MutMap<ModuleId, Vec<roc_problem::can::Problem>>,
     type_problems: MutMap<ModuleId, Vec<solve::TypeError>>,
@@ -1917,6 +1918,13 @@ fn update<'a>(
                 }
             }
 
+            state
+                .module_cache
+                .top_level_thunks
+                .entry(module_id)
+                .or_default()
+                .extend(procs.module_thunks.iter().copied());
+
             let found_specializations_module = FoundSpecializationsModule {
                 layout_cache,
                 module_id,

From a361148380d64a54b5e42f64c88b484f2d718e3a Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:09:47 +0100
Subject: [PATCH 17/26] add imported_module_thunks

---
 compiler/mono/src/ir.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 9d4d41efd3..aaf86934ab 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -273,6 +273,7 @@ impl ExternalSpecializations {
 #[derive(Clone, Debug)]
 pub struct Procs<'a> {
     pub partial_procs: MutMap<Symbol, PartialProc<'a>>,
+    pub imported_module_thunks: MutSet<Symbol>,
     pub module_thunks: MutSet<Symbol>,
     pub pending_specializations: Option<MutMap<Symbol, MutMap<Layout<'a>, PendingSpecialization>>>,
     pub specialized: MutMap<(Symbol, Layout<'a>), InProgressProc<'a>>,
@@ -285,6 +286,7 @@ impl<'a> Default for Procs<'a> {
     fn default() -> Self {
         Self {
             partial_procs: MutMap::default(),
+            imported_module_thunks: MutSet::default(),
             module_thunks: MutSet::default(),
             pending_specializations: Some(MutMap::default()),
             specialized: MutMap::default(),

From 6bd10ddc050a8ef2d3568401c2b492beeea4b5ca Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:23:35 +0100
Subject: [PATCH 18/26] use imported module thunks for pointer calling

---
 compiler/load/src/file.rs | 20 ++++++++++++++++++++
 compiler/mono/src/ir.rs   |  5 ++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index cabeef9470..283be55b16 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -546,11 +546,24 @@ fn start_phase<'a>(module_id: ModuleId, phase: Phase, state: &mut State<'a>) ->
                     ident_ids,
                 } = typechecked;
 
+                let mut imported_module_thunks = MutSet::default();
+
+                if let Some(imports) = state.module_cache.imports.get(&module_id) {
+                    for imported in imports.iter() {
+                        imported_module_thunks.extend(
+                            state.module_cache.top_level_thunks[imported]
+                                .iter()
+                                .copied(),
+                        );
+                    }
+                }
+
                 BuildTask::BuildPendingSpecializations {
                     layout_cache,
                     module_id,
                     module_timing,
                     solved_subs,
+                    imported_module_thunks,
                     decls,
                     ident_ids,
                     exposed_to_host: state.exposed_to_host.clone(),
@@ -950,6 +963,7 @@ enum BuildTask<'a> {
         module_timing: ModuleTiming,
         layout_cache: LayoutCache<'a>,
         solved_subs: Solved<Subs>,
+        imported_module_thunks: MutSet<Symbol>,
         module_id: ModuleId,
         ident_ids: IdentIds,
         decls: Vec<Declaration>,
@@ -3666,6 +3680,7 @@ fn make_specializations<'a>(
 fn build_pending_specializations<'a>(
     arena: &'a Bump,
     solved_subs: Solved<Subs>,
+    imported_module_thunks: MutSet<Symbol>,
     home: ModuleId,
     mut ident_ids: IdentIds,
     decls: Vec<Declaration>,
@@ -3678,6 +3693,9 @@ fn build_pending_specializations<'a>(
     let find_specializations_start = SystemTime::now();
     let mut procs = Procs::default();
 
+    debug_assert!(procs.imported_module_thunks.is_empty());
+    procs.imported_module_thunks = imported_module_thunks;
+
     let mut mono_problems = std::vec::Vec::new();
     let mut subs = solved_subs.into_inner();
     let mut mono_env = roc_mono::ir::Env {
@@ -3959,10 +3977,12 @@ where
             module_timing,
             layout_cache,
             solved_subs,
+            imported_module_thunks,
             exposed_to_host,
         } => Ok(build_pending_specializations(
             arena,
             solved_subs,
+            imported_module_thunks,
             module_id,
             ident_ids,
             decls,
diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index aaf86934ab..991249ee8e 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5725,8 +5725,11 @@ fn call_by_pointer<'a>(
         // cause issues. The caller (which is here) doesn't know whether the called is a closure
         // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
         // more in the future
+        let is_thunk =
+            procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol);
+
         match layout {
-            Layout::FunctionPointer(arg_layouts, ret_layout) if false => {
+            Layout::FunctionPointer(arg_layouts, ret_layout) if !is_thunk => {
                 if arg_layouts.iter().any(|l| l.contains_refcounted()) {
                     let name = env.unique_symbol();
                     let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);

From 1c1c53ba950f3a8ecc9dfd1fc44bc4826369f717 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:38:10 +0100
Subject: [PATCH 19/26] flip map argument order

---
 examples/benchmarks/Base64.roc       | 58 ++++++++++++++--------------
 examples/benchmarks/Bytes/Decode.roc |  8 ++--
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 1fed392288..1f2ecad216 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -31,47 +31,47 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:
 loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
-        helper = \x, y, z ->
-            a : U32
-            a = Num.intCast x
-            b : U32
-            b = Num.intCast y
-            c : U32
-            c = Num.intCast z
-            combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
-            Loop
-                {
-                    remaining: remaining - 3,
-                    string: Str.concat string (bitsToChars combined 0)
-                }
-
-        Bytes.Decode.map3 helper
+        Bytes.Decode.map3 
             Bytes.Decode.u8
             Bytes.Decode.u8
             Bytes.Decode.u8
+            \x, y, z ->
+                a : U32
+                a = Num.intCast x
+                b : U32
+                b = Num.intCast y
+                c : U32
+                c = Num.intCast z
+                combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
+                Loop
+                    {
+                        remaining: remaining - 3,
+                        string: Str.concat string (bitsToChars combined 0)
+                    }
 
     else if remaining == 0 then
         Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
-        helperX = \x, y ->
-            a : U32
-            a = Num.intCast x
-            b : U32
-            b = Num.intCast y
-            combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
-            Done (Str.concat string (bitsToChars combined 1))
+        Bytes.Decode.map2 
+            Bytes.Decode.u8
+            Bytes.Decode.u8
+            \x, y ->
+                a : U32
+                a = Num.intCast x
+                b : U32
+                b = Num.intCast y
+                combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
+                Done (Str.concat string (bitsToChars combined 1))
 
-        Bytes.Decode.map2 helperX
-            Bytes.Decode.u8
-            Bytes.Decode.u8
     else
         # remaining = 1
+        Bytes.Decode.map 
             Bytes.Decode.u8
-                |> Bytes.Decode.map (\x -> 
-                    a : U32
-                    a = Num.intCast x
-                    Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
+            \x -> 
+                a : U32
+                a = Num.intCast x
+                Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))
 
 
 bitsToChars : U32, Int * -> Str
diff --git a/examples/benchmarks/Bytes/Decode.roc b/examples/benchmarks/Bytes/Decode.roc
index db14d857a6..f1da59e389 100644
--- a/examples/benchmarks/Bytes/Decode.roc
+++ b/examples/benchmarks/Bytes/Decode.roc
@@ -30,8 +30,8 @@ map = \@Decoder decoder, transform ->
                 Bad e
 
 
-map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c
-map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> 
+map2 : Decoder a, Decoder b, (a, b -> c) -> Decoder c
+map2 = \@Decoder decoder1, @Decoder decoder2, transform -> 
     @Decoder \state1 -> 
         when decoder1 state1 is
             Good state2 a ->
@@ -45,8 +45,8 @@ map2 = \transform, @Decoder decoder1, @Decoder decoder2 ->
             Bad e ->
                 Bad e
 
-map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d
-map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> 
+map3 : Decoder a, Decoder b, Decoder c, (a, b, c -> d) -> Decoder d
+map3 = \@Decoder decoder1, @Decoder decoder2, @Decoder decoder3, transform -> 
     @Decoder \state1 -> 
         when decoder1 state1 is
             Good state2 a ->

From aff8266f0f6526bad8836628b528274f39bf6c55 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:40:46 +0100
Subject: [PATCH 20/26] move astar test

---
 cli/tests/cli_run.rs                                  | 4 ++--
 examples/benchmarks/{AStarTests.roc => TestAStar.roc} | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename examples/benchmarks/{AStarTests.roc => TestAStar.roc} (98%)

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index 6ad19aed58..2f81acaec2 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -231,9 +231,9 @@ mod cli_run {
     #[serial(astar)]
     fn run_astar_optimized_1() {
         check_output_with_stdin(
-            &example_file("benchmarks", "AStarTests.roc"),
+            &example_file("benchmarks", "TestAStar.roc"),
             "1",
-            "astar-tests",
+            "test-astar",
             &[],
             "True\n",
             false,
diff --git a/examples/benchmarks/AStarTests.roc b/examples/benchmarks/TestAStar.roc
similarity index 98%
rename from examples/benchmarks/AStarTests.roc
rename to examples/benchmarks/TestAStar.roc
index 1cb5909c8f..0401566b6a 100644
--- a/examples/benchmarks/AStarTests.roc
+++ b/examples/benchmarks/TestAStar.roc
@@ -1,4 +1,4 @@
-app "astar-tests"
+app "test-astar"
     packages { base: "platform" }
     imports [base.Task, AStar]
     provides [ main ] to base

From 17a44aab024877acf1d2822c155da97f96257baf Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:40:56 +0100
Subject: [PATCH 21/26] fix whitespace

---
 examples/benchmarks/Base64.roc | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 1f2ecad216..c2be2d31fa 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -31,7 +31,7 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:
 loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
-        Bytes.Decode.map3 
+        Bytes.Decode.map3
             Bytes.Decode.u8
             Bytes.Decode.u8
             Bytes.Decode.u8
@@ -53,7 +53,7 @@ loopHelp = \{ remaining, string } ->
         Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
-        Bytes.Decode.map2 
+        Bytes.Decode.map2
             Bytes.Decode.u8
             Bytes.Decode.u8
             \x, y ->
@@ -66,9 +66,9 @@ loopHelp = \{ remaining, string } ->
 
     else
         # remaining = 1
-        Bytes.Decode.map 
+        Bytes.Decode.map
             Bytes.Decode.u8
-            \x -> 
+            \x ->
                 a : U32
                 a = Num.intCast x
                 Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))
@@ -90,33 +90,33 @@ bitsToCharsHelp = \bits, missing ->
     # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
     # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
     # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.
-        
+
     # any 6-bit number is a valid base64 digit, so this is actually safe
     p =
         Num.shiftRightZfBy 18 bits
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     q =
         Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     r =
         Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     s =
         Num.bitwiseAnd bits lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     equals : U8
     equals = 61
 
     when missing is
-        0 -> 
+        0 ->
             [ p, q, r, s ]
         1 ->
             [ p, q, r, equals ]

From a6edc58323f1d11e6544abd070afe728c6568ab7 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:25:14 +0100
Subject: [PATCH 22/26] make fromUtf8 do RC

---
 cli/tests/cli_run.rs                   |  12 ++
 compiler/builtins/bitcode/src/main.zig |   2 +-
 compiler/builtins/bitcode/src/str.zig  | 158 +++++++++++++++++--------
 compiler/builtins/src/bitcode.rs       |   2 +-
 compiler/can/src/builtins.rs           |   8 +-
 compiler/gen/src/llvm/build_str.rs     | 122 ++++---------------
 examples/benchmarks/Base64.roc         |  19 +--
 examples/benchmarks/TestBase64.roc     |  17 +++
 8 files changed, 172 insertions(+), 168 deletions(-)
 create mode 100644 examples/benchmarks/TestBase64.roc

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index 2f81acaec2..f3889ae374 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -240,6 +240,18 @@ mod cli_run {
         );
     }
 
+    #[test]
+    #[serial(base64)]
+    fn base64() {
+        check_output(
+            &example_file("benchmarks", "TestBase64.roc"),
+            "test-base64",
+            &[],
+            "SGVsbG8gV29ybGQ=",
+            true,
+        );
+    }
+
     #[test]
     #[serial(closure)]
     fn closure() {
diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig
index e124afe62c..e20d376b88 100644
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@@ -67,8 +67,8 @@ comptime {
     exportStrFn(str.strFromIntC, "from_int");
     exportStrFn(str.strFromFloatC, "from_float");
     exportStrFn(str.strEqual, "equal");
-    exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes");
     exportStrFn(str.strToBytesC, "to_bytes");
+    exportStrFn(str.fromUtf8C, "from_utf8");
 }
 
 // Export helpers - Must be run inside a comptime
diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index a752a37f0e..572eaa14e0 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -15,6 +15,7 @@ const InPlace = packed enum(u8) {
     Clone,
 };
 
+const SMALL_STR_MAX_LENGTH = small_string_size - 1;
 const small_string_size = 2 * @sizeOf(usize);
 const blank_small_string: [16]u8 = init_blank_small_string(small_string_size);
 
@@ -982,6 +983,71 @@ fn strToBytes(allocator: *Allocator, arg: RocStr) RocList {
     }
 }
 
+const FromUtf8Result = extern struct {
+    byte_index: usize,
+    string: RocStr,
+    is_ok: bool,
+    problem_code: Utf8ByteProblem,
+};
+
+pub fn fromUtf8C(arg: RocList, output: *FromUtf8Result) callconv(.C) void {
+    output.* = @call(.{ .modifier = always_inline }, fromUtf8, .{ std.heap.c_allocator, arg });
+}
+
+fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result {
+    const bytes = @ptrCast([*]const u8, arg.bytes)[0..arg.length];
+
+    if (unicode.utf8ValidateSlice(bytes)) {
+        // the output will be correct. Now we need to take ownership of the input
+        if (arg.len() <= SMALL_STR_MAX_LENGTH) {
+            // turn the bytes into a small string
+            const string = RocStr.init(allocator, @ptrCast([*]u8, arg.bytes), arg.len());
+
+            // then decrement the input list
+            const data_bytes = arg.len();
+            utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes);
+
+            return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte };
+        } else {
+            const byte_list = arg.makeUnique(allocator, @alignOf(usize), @sizeOf(u8));
+
+            const string = RocStr{ .str_bytes = byte_list.bytes, .str_len = byte_list.length };
+
+            return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte };
+        }
+    } else {
+        const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length);
+
+        // TODO what should we do RC-wise here
+        // const data_bytes = arg.len();
+        // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes);
+
+        return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem };
+    }
+}
+
+fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: Utf8ByteProblem } {
+    var index: usize = 0;
+
+    while (index < length) {
+        const nextNumBytes = numberOfNextCodepointBytes(bytes, length, index) catch |err| {
+            switch (err) {
+                error.UnexpectedEof => {
+                    return .{ .index = index, .problem = Utf8ByteProblem.UnexpectedEndOfSequence };
+                },
+                error.Utf8InvalidStartByte => return .{ .index = index, .problem = Utf8ByteProblem.InvalidStartByte },
+                error.Utf8ExpectedContinuation => return .{ .index = index, .problem = Utf8ByteProblem.ExpectedContinuation },
+                error.Utf8OverlongEncoding => return .{ .index = index, .problem = Utf8ByteProblem.OverlongEncoding },
+                error.Utf8EncodesSurrogateHalf => return .{ .index = index, .problem = Utf8ByteProblem.EncodesSurrogateHalf },
+                error.Utf8CodepointTooLarge => return .{ .index = index, .problem = Utf8ByteProblem.CodepointTooLarge },
+            }
+        };
+        index += nextNumBytes;
+    }
+
+    unreachable;
+}
+
 pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
     const bytes: []u8 = ptr[0..len];
     return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes});
@@ -1019,76 +1085,74 @@ pub const Utf8ByteProblem = packed enum(u8) {
     OverlongEncoding = 4,
     UnexpectedEndOfSequence = 5,
 };
-pub const ValidateUtf8BytesResult = extern struct {
-    is_ok: bool, byte_index: usize, problem_code: Utf8ByteProblem
-};
 
-const is_ok_utf8_byte_response =
-    ValidateUtf8BytesResult{ .is_ok = true, .byte_index = 0, .problem_code = Utf8ByteProblem.UnexpectedEndOfSequence };
-inline fn toErrUtf8ByteResponse(byte_index: usize, problem_code: Utf8ByteProblem) ValidateUtf8BytesResult {
-    return ValidateUtf8BytesResult{ .is_ok = false, .byte_index = byte_index, .problem_code = problem_code };
+fn validateUtf8Bytes(bytes: [*]u8, length: usize) FromUtf8Result {
+    return fromUtf8(std.testing.allocator, RocList{ .bytes = bytes, .length = length });
 }
 
-// Validate that an array of bytes is valid UTF-8, but if it fails catch & return the error & byte index
-pub fn validateUtf8Bytes(ptr: [*]u8, len: usize) callconv(.C) ValidateUtf8BytesResult {
-    var index: usize = 0;
-    while (index < len) {
-        const nextNumBytes = numberOfNextCodepointBytes(ptr, len, index) catch |err| {
-            return toErrUtf8ByteResponse(
-                index,
-                switch (err) {
-                    error.UnexpectedEof => Utf8ByteProblem.UnexpectedEndOfSequence,
-                    error.Utf8InvalidStartByte => Utf8ByteProblem.InvalidStartByte,
-                    error.Utf8ExpectedContinuation => Utf8ByteProblem.ExpectedContinuation,
-                    error.Utf8OverlongEncoding => Utf8ByteProblem.OverlongEncoding,
-                    error.Utf8EncodesSurrogateHalf => Utf8ByteProblem.EncodesSurrogateHalf,
-                    error.Utf8CodepointTooLarge => Utf8ByteProblem.CodepointTooLarge,
-                },
-            );
-        };
-        index += nextNumBytes;
-    }
-    return is_ok_utf8_byte_response;
+fn validateUtf8BytesX(str: RocList) FromUtf8Result {
+    return fromUtf8(std.testing.allocator, str);
 }
 
+fn expectOk(result: FromUtf8Result) void {
+    expectEqual(result.is_ok, true);
+}
+
+fn sliceHelp(bytes: [*]const u8, length: usize) RocList {
+    var list = RocList.allocate(testing.allocator, @alignOf(usize), length, @sizeOf(u8));
+    @memcpy(list.bytes orelse unreachable, bytes, length);
+    list.length = length;
+
+    return list;
+}
+
+fn toErrUtf8ByteResponse(index: usize, problem: Utf8ByteProblem) FromUtf8Result {
+    return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = index, .problem_code = problem };
+}
+
+// NOTE on memory: the validate function consumes a RC token of the input. Since
+// we freshly created it (in `sliceHelp`), it has only one RC token, and input list will be deallocated.
+//
+// If we tested with big strings, we'd have to deallocate the output string, but never the input list
+
 test "validateUtf8Bytes: ascii" {
-    const str_len = 3;
-    var str: [str_len]u8 = "abc".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode œ" {
-    const str_len = 2;
-    var str: [str_len]u8 = "œ".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "œ";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode ∆" {
-    const str_len = 3;
-    var str: [str_len]u8 = "∆".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "∆";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: emoji" {
-    const str_len = 4;
-    var str: [str_len]u8 = "💖".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "💖";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode ∆ in middle of array" {
-    const str_len = 9;
-    var str: [str_len]u8 = "œb∆c¬".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "œb∆c¬";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: invalid start byte" {
diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs
index b19117e106..125099af96 100644
--- a/compiler/builtins/src/bitcode.rs
+++ b/compiler/builtins/src/bitcode.rs
@@ -41,8 +41,8 @@ pub const STR_NUMBER_OF_BYTES: &str = "roc_builtins.str.number_of_bytes";
 pub const STR_FROM_INT: &str = "roc_builtins.str.from_int";
 pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float";
 pub const STR_EQUAL: &str = "roc_builtins.str.equal";
-pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes";
 pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes";
+pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8";
 
 pub const DICT_HASH: &str = "roc_builtins.dict.hash";
 pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index 7db64b296f..68cceaacf6 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -1598,7 +1598,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                 Access {
                     record_var,
                     ext_var: var_store.fresh(),
-                    field: "isOk".into(),
+                    field: "c_isOk".into(),
                     field_var: var_store.fresh(),
                     loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                 },
@@ -1610,7 +1610,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                 vec![Access {
                     record_var,
                     ext_var: var_store.fresh(),
-                    field: "str".into(),
+                    field: "b_str".into(),
                     field_var: var_store.fresh(),
                     loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                 }],
@@ -1627,14 +1627,14 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                         Access {
                             record_var,
                             ext_var: var_store.fresh(),
-                            field: "problem".into(),
+                            field: "d_problem".into(),
                             field_var: var_store.fresh(),
                             loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                         },
                         Access {
                             record_var,
                             ext_var: var_store.fresh(),
-                            field: "byteIndex".into(),
+                            field: "a_byteIndex".into(),
                             field_var: var_store.fresh(),
                             loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                         },
diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs
index 301b726fb3..514d483c06 100644
--- a/compiler/gen/src/llvm/build_str.rs
+++ b/compiler/gen/src/llvm/build_str.rs
@@ -1,13 +1,11 @@
 use crate::llvm::bitcode::{call_bitcode_fn, call_void_bitcode_fn};
 use crate::llvm::build::{complex_bitcast, Env, InPlace, Scope};
-use crate::llvm::build_list::{
-    allocate_list, build_basic_phi2, empty_polymorphic_list, list_len, load_list_ptr, store_list,
-};
-use crate::llvm::convert::{collection, get_ptr_type};
+use crate::llvm::build_list::{allocate_list, store_list};
+use crate::llvm::convert::collection;
 use inkwell::builder::Builder;
-use inkwell::types::{BasicTypeEnum, StructType};
+use inkwell::types::BasicTypeEnum;
 use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
-use inkwell::{AddressSpace, IntPredicate};
+use inkwell::AddressSpace;
 use roc_builtins::bitcode;
 use roc_module::symbol::Symbol;
 use roc_mono::layout::{Builtin, Layout};
@@ -300,43 +298,28 @@ pub fn str_to_bytes<'a, 'ctx, 'env>(
 /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 }
 pub fn str_from_utf8<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
-    parent: FunctionValue<'ctx>,
+    _parent: FunctionValue<'ctx>,
     original_wrapper: StructValue<'ctx>,
 ) -> BasicValueEnum<'ctx> {
     let builder = env.builder;
     let ctx = env.context;
 
-    let list_len = list_len(builder, original_wrapper);
-    let ptr_type = get_ptr_type(&ctx.i8_type().into(), AddressSpace::Generic);
-    let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
-
-    let result_type = env
-        .module
-        .get_struct_type("str.ValidateUtf8BytesResult")
-        .unwrap();
+    let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap();
     let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result");
 
     call_void_bitcode_fn(
         env,
-        &[result_ptr.into(), list_ptr.into(), list_len.into()],
-        &bitcode::STR_VALIDATE_UTF_BYTES,
+        &[
+            complex_bitcast(
+                env.builder,
+                original_wrapper.into(),
+                env.context.i128_type().into(),
+                "to_i128",
+            ),
+            result_ptr.into(),
+        ],
+        &bitcode::STR_FROM_UTF8,
     );
-    let utf8_validate_bytes_result = builder
-        .build_load(result_ptr, "load_utf8_validate_bytes_result")
-        .into_struct_value();
-
-    let is_ok = builder
-        .build_extract_value(utf8_validate_bytes_result, 0, "extract_extract_is_ok")
-        .unwrap()
-        .into_int_value();
-    let byte_index = builder
-        .build_extract_value(utf8_validate_bytes_result, 1, "extract_byte_index")
-        .unwrap()
-        .into_int_value();
-    let problem_code = builder
-        .build_extract_value(utf8_validate_bytes_result, 2, "extract_problem_code")
-        .unwrap()
-        .into_int_value();
 
     let record_type = env.context.struct_type(
         &[
@@ -348,71 +331,16 @@ pub fn str_from_utf8<'a, 'ctx, 'env>(
         false,
     );
 
-    let comparison = builder.build_int_compare(
-        IntPredicate::EQ,
-        is_ok,
-        ctx.bool_type().const_int(1, false),
-        "compare_is_ok",
-    );
+    let result_ptr_cast = env
+        .builder
+        .build_bitcast(
+            result_ptr,
+            record_type.ptr_type(AddressSpace::Generic),
+            "to_unnamed",
+        )
+        .into_pointer_value();
 
-    build_basic_phi2(
-        env,
-        parent,
-        comparison,
-        || {
-            // We have a valid utf8 byte sequence
-            // TODO: Should we do something different here if we're doing this in place?
-            let zig_str =
-                call_bitcode_fn(env, &[list_ptr.into(), list_len.into()], &bitcode::STR_INIT)
-                    .into_struct_value();
-            build_struct(
-                builder,
-                record_type,
-                vec![
-                    (
-                        env.ptr_int().const_int(0, false).into(),
-                        "insert_zeroed_byte_index",
-                    ),
-                    (zig_str_to_struct(env, zig_str).into(), "insert_str"),
-                    (ctx.bool_type().const_int(1, false).into(), "insert_is_ok"),
-                    (
-                        ctx.i8_type().const_int(0, false).into(),
-                        "insert_zeroed_problem",
-                    ),
-                ],
-            )
-            .into()
-        },
-        || {
-            // We do not have a valid utf8 byte sequence
-            build_struct(
-                builder,
-                record_type,
-                vec![
-                    (byte_index.into(), "insert_byte_index"),
-                    (empty_polymorphic_list(env), "insert_zeroed_str"),
-                    (ctx.bool_type().const_int(0, false).into(), "insert_is_ok"),
-                    (problem_code.into(), "insert_problem"),
-                ],
-            )
-            .into()
-        },
-        BasicTypeEnum::StructType(record_type),
-    )
-}
-
-fn build_struct<'env, 'ctx>(
-    builder: &'env Builder<'ctx>,
-    struct_type: StructType<'ctx>,
-    values: Vec<(BasicValueEnum<'ctx>, &str)>,
-) -> StructValue<'ctx> {
-    let mut val = struct_type.get_undef().into();
-    for (index, (value, name)) in values.iter().enumerate() {
-        val = builder
-            .build_insert_value(val, *value, index as u32, name)
-            .unwrap();
-    }
-    val.into_struct_value()
+    builder.build_load(result_ptr_cast, "load_utf8_validate_bytes_result")
 }
 
 /// Str.fromInt : Int -> Str
diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index c2be2d31fa..c8d7c586f7 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -1,24 +1,7 @@
-app "base64"
-    packages { base: "platform" }
-    imports [base.Task, Bytes.Decode.{Decoder} ]
-    provides [ main ] to base
-
-IO a : Task.Task a []
+interface Base64 exposes [ fromBytes ] imports [ Bytes.Decode ]
 
 Decoder a : Bytes.Decode.Decoder a
 
-main : IO {}
-main =
-    # when fromBytes [ 0 ] is
-    when fromBytes (Str.toBytes "Hello World") is
-        Ok str ->
-            Task.putLine str
-
-        Err _ ->
-            Task.putLine "sadness"
-
-# ------
-
 
 fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError
 fromBytes = \bytes ->
diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc
new file mode 100644
index 0000000000..75adbca8be
--- /dev/null
+++ b/examples/benchmarks/TestBase64.roc
@@ -0,0 +1,17 @@
+app "test-base64"
+    packages { base: "platform" }
+    imports [base.Task, Base64 ]
+    provides [ main ] to base
+
+IO a : Task.Task a []
+
+main : IO {}
+main =
+    # when fromBytes [ 0 ] is
+    when Base64.fromBytes (Str.toBytes "Hello World") is
+        Ok str ->
+            Task.putLine str
+
+        Err _ ->
+            Task.putLine "sadness"
+

From 75ee81db883b1e76e276b596807af6668867ea0f Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:27:02 +0100
Subject: [PATCH 23/26] fix base64 test output

---
 cli/tests/cli_run.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index f3889ae374..251259b490 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -247,7 +247,7 @@ mod cli_run {
             &example_file("benchmarks", "TestBase64.roc"),
             "test-base64",
             &[],
-            "SGVsbG8gV29ybGQ=",
+            "SGVsbG8gV29ybGQ=\n",
             true,
         );
     }

From 7304154452271f86c835a681a30112f8fe0483e2 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:36:16 +0100
Subject: [PATCH 24/26] update comment

---
 compiler/mono/src/ir.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 991249ee8e..2ad3995fe0 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5720,11 +5720,13 @@ fn call_by_pointer<'a>(
     let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol);
     if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized
     {
-        // TODO we should be able to call by name in this wrapper for "normal" functions
-        // but closures, specifically top-level values that are closures (by unification)
-        // cause issues. The caller (which is here) doesn't know whether the called is a closure
-        // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
-        // more in the future
+        // anything that is not a thunk can be called by-value in the wrapper
+        // (the above condition guarantees we're dealing with a top-level symbol)
+        //
+        // But thunks cannot be called by-value, since they are not really functions to all parts
+        // of the system (notably RC insertion). So we still call those by-pointer.
+        // Luckily such values were top-level originally (in the user code), and can therefore
+        // not be closures
         let is_thunk =
             procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol);
 

From 3537fa57d2d1463d737270281c2054db85c5db0a Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:46:58 +0100
Subject: [PATCH 25/26] decrement when the input is invalid utf8

---
 compiler/builtins/bitcode/src/str.zig | 6 +++---
 examples/benchmarks/TestBase64.roc    | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index 572eaa14e0..e18eca4306 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1018,9 +1018,9 @@ fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result {
     } else {
         const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length);
 
-        // TODO what should we do RC-wise here
-        // const data_bytes = arg.len();
-        // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes);
+        // consume the input list
+        const data_bytes = arg.len();
+        utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes);
 
         return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem };
     }
diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc
index 75adbca8be..27c5617ebf 100644
--- a/examples/benchmarks/TestBase64.roc
+++ b/examples/benchmarks/TestBase64.roc
@@ -7,7 +7,6 @@ IO a : Task.Task a []
 
 main : IO {}
 main =
-    # when fromBytes [ 0 ] is
     when Base64.fromBytes (Str.toBytes "Hello World") is
         Ok str ->
             Task.putLine str

From 134f8a15e9e9baaa204990ff127352d4577bd5ba Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 22:26:18 +0100
Subject: [PATCH 26/26] fix zig tests

---
 compiler/builtins/bitcode/src/str.zig | 98 +++++++++++++--------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index e18eca4306..62a88058e0 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1155,102 +1155,100 @@ test "validateUtf8Bytes: unicode ∆ in middle of array" {
     expectOk(validateUtf8BytesX(list));
 }
 
+fn expectErr(list: RocList, index: usize, err: Utf8DecodeError, problem: Utf8ByteProblem) void {
+    const str_ptr = @ptrCast([*]u8, list.bytes);
+    const str_len = list.length;
+
+    expectError(err, numberOfNextCodepointBytes(str_ptr, str_len, index));
+    expectEqual(toErrUtf8ByteResponse(index, problem), validateUtf8Bytes(str_ptr, str_len));
+}
+
 test "validateUtf8Bytes: invalid start byte" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 4;
-    var str: [str_len]u8 = "ab\x80c".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "ab\x80c";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8InvalidStartByte, numberOfNextCodepointBytes(str_ptr, str_len, 2));
-    expectEqual(toErrUtf8ByteResponse(2, Utf8ByteProblem.InvalidStartByte), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 2, error.Utf8InvalidStartByte, Utf8ByteProblem.InvalidStartByte);
 }
 
 test "validateUtf8Bytes: unexpected eof for 2 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 4;
-    var str: [str_len]u8 = "abc\xc2".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xc2";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 2 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 5;
-    var str: [str_len]u8 = "abc\xc2\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xc2\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: unexpected eof for 3 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430
-    const str_len = 5;
-    var str: [str_len]u8 = "abc\xe0\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xe0\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 3 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xe0\xa0\xc0".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xe0\xa0\xc0";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: unexpected eof for 4 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xf0\x90\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x90\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 4 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf0\x90\x80\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x90\x80\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: overlong" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L451
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf0\x80\x80\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x80\x80\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8OverlongEncoding, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.OverlongEncoding), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8OverlongEncoding, Utf8ByteProblem.OverlongEncoding);
 }
 
 test "validateUtf8Bytes: codepoint out too large" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L465
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf4\x90\x80\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf4\x90\x80\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8CodepointTooLarge, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.CodepointTooLarge), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8CodepointTooLarge, Utf8ByteProblem.CodepointTooLarge);
 }
 
 test "validateUtf8Bytes: surrogate halves" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L468
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xed\xa0\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xed\xa0\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8EncodesSurrogateHalf, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.EncodesSurrogateHalf), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8EncodesSurrogateHalf, Utf8ByteProblem.EncodesSurrogateHalf);
 }