From 89bf22598ecbe3fac6ba62944b76630501ac28bd Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 14:41:08 +0100
Subject: [PATCH 01/33] call by pointer wrappers need to call by pointer for
 closures

---
 compiler/mono/src/ir.rs | 65 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 9e6f2ccd47..79628b27ac 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5751,8 +5751,13 @@ fn call_by_pointer<'a>(
     let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol);
     if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized
     {
+        // TODO we should be able to call by name in this wrapper for "normal" functions
+        // but closures, specifically top-level values that are closures (by unification)
+        // cause issues. The caller (which is here) doesn't know whether the called is a closure
+        // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
+        // more in the future
         match layout {
-            Layout::FunctionPointer(arg_layouts, ret_layout) => {
+            Layout::FunctionPointer(arg_layouts, ret_layout) if false => {
                 if arg_layouts.iter().any(|l| l.contains_refcounted()) {
                     let name = env.unique_symbol();
                     let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);
@@ -5766,6 +5771,7 @@ fn call_by_pointer<'a>(
                     let args = args.into_bump_slice();
 
                     let call_symbol = env.unique_symbol();
+                    debug_assert_eq!(arg_layouts.len(), arg_symbols.len());
                     let call_type = CallType::ByName {
                         name: symbol,
                         full_layout: layout.clone(),
@@ -5804,6 +5810,63 @@ fn call_by_pointer<'a>(
                     Expr::FunctionPointer(symbol, layout)
                 }
             }
+            Layout::FunctionPointer(arg_layouts, ret_layout) => {
+                if arg_layouts.iter().any(|l| l.contains_refcounted()) {
+                    let name = env.unique_symbol();
+                    let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);
+                    let mut arg_symbols = Vec::with_capacity_in(arg_layouts.len(), env.arena);
+
+                    for layout in arg_layouts {
+                        let symbol = env.unique_symbol();
+                        args.push((layout.clone(), symbol));
+                        arg_symbols.push(symbol);
+                    }
+                    let args = args.into_bump_slice();
+
+                    let call_symbol = env.unique_symbol();
+                    let fpointer_symbol = env.unique_symbol();
+                    debug_assert_eq!(arg_layouts.len(), arg_symbols.len());
+                    let call_type = CallType::ByPointer {
+                        name: fpointer_symbol,
+                        full_layout: layout.clone(),
+                        ret_layout: ret_layout.clone(),
+                        arg_layouts,
+                    };
+                    let call = Call {
+                        call_type,
+                        arguments: arg_symbols.into_bump_slice(),
+                    };
+                    let expr = Expr::Call(call);
+
+                    let mut body = Stmt::Ret(call_symbol);
+
+                    body = Stmt::Let(call_symbol, expr, ret_layout.clone(), env.arena.alloc(body));
+
+                    let expr = Expr::FunctionPointer(symbol, layout.clone());
+                    body = Stmt::Let(fpointer_symbol, expr, layout.clone(), env.arena.alloc(body));
+
+                    let closure_data_layout = None;
+                    let proc = Proc {
+                        name,
+                        args,
+                        body,
+                        closure_data_layout,
+                        ret_layout: ret_layout.clone(),
+                        is_self_recursive: SelfRecursive::NotSelfRecursive,
+                        must_own_arguments: true,
+                        host_exposed_layouts: HostExposedLayouts::NotHostExposed,
+                    };
+
+                    procs
+                        .specialized
+                        .insert((name, layout.clone()), InProgressProc::Done(proc));
+                    Expr::FunctionPointer(name, layout)
+                } else {
+                    // if none of the arguments is refcounted, then owning the arguments has no
+                    // meaning
+                    Expr::FunctionPointer(symbol, layout)
+                }
+            }
             _ => {
                 // e.g. Num.maxInt or other constants
                 Expr::FunctionPointer(symbol, layout)

From ea76578e0694e68284a084c882509cd1d41e13ce Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 15:15:16 +0100
Subject: [PATCH 02/33] add Num.bitwiseOr and Num.shiftLeftBy

---
 compiler/builtins/src/std.rs     | 18 ++++++++++++++++++
 compiler/can/src/builtins.rs     | 16 ++++++++++++++++
 compiler/gen/src/llvm/build.rs   | 26 +++++++++++++++++++++++++-
 compiler/module/src/low_level.rs |  2 ++
 compiler/module/src/symbol.rs    | 20 +++++++++++---------
 compiler/mono/src/borrow.rs      | 12 +++++++++++-
 6 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 42eb034581..5156fadc0b 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -324,6 +324,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // bitwiseOr : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_BITWISE_OR,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
+    // shiftLeftBy : Nat, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_LEFT,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index fdff0a4e2c..f3e1ff4a32 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -151,6 +151,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_MIN_INT => num_min_int,
         NUM_BITWISE_AND => num_bitwise_and,
         NUM_BITWISE_XOR => num_bitwise_xor,
+        NUM_BITWISE_OR => num_bitwise_or,
+        NUM_SHIFT_LEFT=> num_shift_left_by,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -273,6 +275,10 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_ASIN => num_asin,
         Symbol::NUM_MAX_INT => num_max_int,
         Symbol::NUM_MIN_INT => num_min_int,
+        Symbol::NUM_BITWISE_AND => num_bitwise_and,
+        Symbol::NUM_BITWISE_XOR => num_bitwise_xor,
+        Symbol::NUM_BITWISE_OR => num_bitwise_or,
+        Symbol::NUM_SHIFT_LEFT=> num_shift_left_by,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1299,6 +1305,16 @@ fn num_bitwise_xor(symbol: Symbol, var_store: &mut VarStore) -> Def {
     num_binop(symbol, var_store, LowLevel::NumBitwiseXor)
 }
 
+/// Num.bitwiseOr: Int, Int -> Int
+fn num_bitwise_or(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    num_binop(symbol, var_store, LowLevel::NumBitwiseOr)
+}
+
+/// Num.shiftLeftBy: Nat, Int a -> Int a
+fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 5e65bc2cee..e77ae31500 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3943,7 +3943,23 @@ fn run_low_level<'a, 'ctx, 'env>(
 
             build_num_binop(env, parent, lhs_arg, lhs_layout, rhs_arg, rhs_layout, op)
         }
-        NumBitwiseAnd | NumBitwiseXor => {
+        NumBitwiseAnd | NumBitwiseOr | NumBitwiseXor => {
+            debug_assert_eq!(args.len(), 2);
+
+            let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
+            let (rhs_arg, rhs_layout) = load_symbol_and_layout(scope, &args[1]);
+
+            build_int_binop(
+                env,
+                parent,
+                lhs_arg.into_int_value(),
+                lhs_layout,
+                rhs_arg.into_int_value(),
+                rhs_layout,
+                op,
+            )
+        }
+        NumShiftLeftBy => {
             debug_assert_eq!(args.len(), 2);
 
             let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
@@ -4585,6 +4601,14 @@ fn build_int_binop<'a, 'ctx, 'env>(
         NumPowInt => call_bitcode_fn(env, &[lhs.into(), rhs.into()], &bitcode::NUM_POW_INT),
         NumBitwiseAnd => bd.build_and(lhs, rhs, "int_bitwise_and").into(),
         NumBitwiseXor => bd.build_xor(lhs, rhs, "int_bitwise_xor").into(),
+        NumBitwiseOr => bd.build_or(lhs, rhs, "int_bitwise_or").into(),
+        NumShiftLeftBy => {
+            // NOTE arguments are flipped;
+            // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);`
+            // as `Num.shiftLeftBy 0 0b0000_0001
+            bd.build_left_shift(rhs, lhs, "int_bitwise_or").into()
+        }
+
         _ => {
             unreachable!("Unrecognized int binary operation: {:?}", op);
         }
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index e69fa0dd02..05a20c72c5 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -78,6 +78,8 @@ pub enum LowLevel {
     NumAsin,
     NumBitwiseAnd,
     NumBitwiseXor,
+    NumBitwiseOr,
+    NumShiftLeftBy,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 54700dd492..64717e405b 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -841,15 +841,17 @@ define_builtins! {
         80 NUM_BINARY32: "Binary32" imported
         81 NUM_BITWISE_AND: "bitwiseAnd"
         82 NUM_BITWISE_XOR: "bitwiseXor"
-        83 NUM_SUB_WRAP: "subWrap"
-        84 NUM_SUB_CHECKED: "subChecked"
-        85 NUM_MUL_WRAP: "mulWrap"
-        86 NUM_MUL_CHECKED: "mulChecked"
-        87 NUM_INT: "Int" imported
-        88 NUM_FLOAT: "Float" imported
-        89 NUM_AT_NATURAL: "@Natural"
-        90 NUM_NATURAL: "Natural" imported
-        91 NUM_NAT: "Nat" imported
+        83 NUM_BITWISE_OR: "bitwiseOr"
+        84 NUM_SHIFT_LEFT: "shiftLeftBy"
+        85 NUM_SUB_WRAP: "subWrap"
+        86 NUM_SUB_CHECKED: "subChecked"
+        87 NUM_MUL_WRAP: "mulWrap"
+        88 NUM_MUL_CHECKED: "mulChecked"
+        89 NUM_INT: "Int" imported
+        90 NUM_FLOAT: "Float" imported
+        91 NUM_AT_NATURAL: "@Natural"
+        92 NUM_NATURAL: "Natural" imported
+        93 NUM_NAT: "Nat" imported
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index ba652c0c7b..2fa8893d4c 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -373,6 +373,14 @@ impl<'a> BorrowInfState<'a> {
                         self.own_var(z);
 
                         // if the function exects an owned argument (ps), the argument must be owned (args)
+                        debug_assert_eq!(
+                            arguments.len(),
+                            ps.len(),
+                            "{:?} has {} parameters, but was applied to {} arguments",
+                            name,
+                            ps.len(),
+                            arguments.len()
+                        );
                         self.own_args_using_params(arguments, ps);
                     }
                     None => {
@@ -658,7 +666,9 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked
         | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare
         | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd
-        | NumBitwiseXor => arena.alloc_slice_copy(&[irrelevant, irrelevant]),
+        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => {
+            arena.alloc_slice_copy(&[irrelevant, irrelevant])
+        }
 
         NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor
         | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => {

From 128741e5856adebc368836c54b40a1eaf50979fd Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 16:01:01 +0100
Subject: [PATCH 03/33] add right shift operators

---
 compiler/builtins/src/std.rs     | 20 +++++++++++++++++++-
 compiler/can/src/builtins.rs     | 16 +++++++++++++++-
 compiler/gen/src/llvm/build.rs   | 14 ++++++++++++--
 compiler/module/src/low_level.rs |  2 ++
 compiler/module/src/symbol.rs    | 20 +++++++++++---------
 compiler/mono/src/borrow.rs      |  2 +-
 6 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index e7e7eea3c1..084edc1cd4 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -333,7 +333,7 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
-    // shiftLeftBy : Nat, Int a -> Int a
+    // shiftLeftBy : Int a, Int a -> Int a
     add_type(
         Symbol::NUM_SHIFT_LEFT,
         top_level_function(
@@ -342,6 +342,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // shiftRightBy : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_RIGHT,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
+    // shiftRightZfBy : Int a, Int a -> Int a
+    add_type(
+        Symbol::NUM_SHIFT_RIGHT_ZERO_FILL,
+        top_level_function(
+            vec![int_type(flex(TVAR1)), int_type(flex(TVAR1))],
+            Box::new(int_type(flex(TVAR1))),
+        ),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index 6c8f7a9cd3..c8516d26ca 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -154,6 +154,8 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_BITWISE_XOR => num_bitwise_xor,
         NUM_BITWISE_OR => num_bitwise_or,
         NUM_SHIFT_LEFT=> num_shift_left_by,
+        NUM_SHIFT_RIGHT => num_shift_right_by,
+        NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -280,7 +282,9 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_BITWISE_AND => num_bitwise_and,
         Symbol::NUM_BITWISE_XOR => num_bitwise_xor,
         Symbol::NUM_BITWISE_OR => num_bitwise_or,
-        Symbol::NUM_SHIFT_LEFT=> num_shift_left_by,
+        Symbol::NUM_SHIFT_LEFT => num_shift_left_by,
+        Symbol::NUM_SHIFT_RIGHT => num_shift_right_by,
+        Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1317,6 +1321,16 @@ fn num_shift_left_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
     lowlevel_2(symbol, LowLevel::NumShiftLeftBy, var_store)
 }
 
+/// Num.shiftRightBy: Nat, Int a -> Int a
+fn num_shift_right_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftRightBy, var_store)
+}
+
+/// Num.shiftRightZfBy: Nat, Int a -> Int a
+fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 250c903014..ed149b77a4 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3967,7 +3967,7 @@ fn run_low_level<'a, 'ctx, 'env>(
                 op,
             )
         }
-        NumShiftLeftBy => {
+        NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => {
             debug_assert_eq!(args.len(), 2);
 
             let (lhs_arg, lhs_layout) = load_symbol_and_layout(scope, &args[0]);
@@ -4614,7 +4614,17 @@ fn build_int_binop<'a, 'ctx, 'env>(
             // NOTE arguments are flipped;
             // we write `assert_eq!(0b0000_0001 << 0, 0b0000_0001);`
             // as `Num.shiftLeftBy 0 0b0000_0001
-            bd.build_left_shift(rhs, lhs, "int_bitwise_or").into()
+            bd.build_left_shift(rhs, lhs, "int_shift_left").into()
+        }
+        NumShiftRightBy => {
+            // NOTE arguments are flipped;
+            bd.build_right_shift(rhs, lhs, false, "int_shift_right")
+                .into()
+        }
+        NumShiftRightZfBy => {
+            // NOTE arguments are flipped;
+            bd.build_right_shift(rhs, lhs, true, "int_shift_right_zf")
+                .into()
         }
 
         _ => {
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 9740c80d80..7279a4ae76 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -81,6 +81,8 @@ pub enum LowLevel {
     NumBitwiseXor,
     NumBitwiseOr,
     NumShiftLeftBy,
+    NumShiftRightBy,
+    NumShiftRightZfBy,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 23fa1837f0..a7d19116dc 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -843,15 +843,17 @@ define_builtins! {
         82 NUM_BITWISE_XOR: "bitwiseXor"
         83 NUM_BITWISE_OR: "bitwiseOr"
         84 NUM_SHIFT_LEFT: "shiftLeftBy"
-        85 NUM_SUB_WRAP: "subWrap"
-        86 NUM_SUB_CHECKED: "subChecked"
-        87 NUM_MUL_WRAP: "mulWrap"
-        88 NUM_MUL_CHECKED: "mulChecked"
-        89 NUM_INT: "Int" imported
-        90 NUM_FLOAT: "Float" imported
-        91 NUM_AT_NATURAL: "@Natural"
-        92 NUM_NATURAL: "Natural" imported
-        93 NUM_NAT: "Nat" imported
+        85 NUM_SHIFT_RIGHT: "shiftRightBy"
+        86 NUM_SHIFT_RIGHT_ZERO_FILL: "shiftRightZfBy"
+        87 NUM_SUB_WRAP: "subWrap"
+        88 NUM_SUB_CHECKED: "subChecked"
+        89 NUM_MUL_WRAP: "mulWrap"
+        90 NUM_MUL_CHECKED: "mulChecked"
+        91 NUM_INT: "Int" imported
+        92 NUM_FLOAT: "Float" imported
+        93 NUM_AT_NATURAL: "@Natural"
+        94 NUM_NATURAL: "Natural" imported
+        95 NUM_NAT: "Nat" imported
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index a06f9f7e37..294e2582c0 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -666,7 +666,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         And | Or | NumAdd | NumAddWrap | NumAddChecked | NumSub | NumSubWrap | NumSubChecked
         | NumMul | NumMulWrap | NumMulChecked | NumGt | NumGte | NumLt | NumLte | NumCompare
         | NumDivUnchecked | NumRemUnchecked | NumPow | NumPowInt | NumBitwiseAnd
-        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy => {
+        | NumBitwiseXor | NumBitwiseOr | NumShiftLeftBy | NumShiftRightBy | NumShiftRightZfBy => {
             arena.alloc_slice_copy(&[irrelevant, irrelevant])
         }
 

From 43e71f2ee933e25f5a1f45243af8be6c8290724c Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 16:07:35 +0100
Subject: [PATCH 04/33] int cast

---
 compiler/builtins/src/std.rs     |  6 ++++++
 compiler/can/src/builtins.rs     |  7 +++++++
 compiler/gen/src/llvm/build.rs   | 10 ++++++++++
 compiler/module/src/low_level.rs |  1 +
 compiler/module/src/symbol.rs    |  2 ++
 compiler/mono/src/borrow.rs      |  2 +-
 6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 084edc1cd4..5a3b499dd7 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -360,6 +360,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // intCast : Int a -> Int b
+    add_type(
+        Symbol::NUM_INT_CAST,
+        top_level_function(vec![int_type(flex(TVAR1))], Box::new(int_type(flex(TVAR2)))),
+    );
+
     // rem : Int a, Int a -> Result (Int a) [ DivByZero ]*
     add_type(
         Symbol::NUM_REM,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index c8516d26ca..af38527fcc 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -156,6 +156,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         NUM_SHIFT_LEFT=> num_shift_left_by,
         NUM_SHIFT_RIGHT => num_shift_right_by,
         NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
+        NUM_INT_CAST=> num_int_cast,
         RESULT_MAP => result_map,
         RESULT_MAP_ERR => result_map_err,
         RESULT_WITH_DEFAULT => result_with_default,
@@ -285,6 +286,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::NUM_SHIFT_LEFT => num_shift_left_by,
         Symbol::NUM_SHIFT_RIGHT => num_shift_right_by,
         Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => num_shift_right_zf_by,
+        Symbol::NUM_INT_CAST=> num_int_cast,
         Symbol::RESULT_MAP => result_map,
         Symbol::RESULT_MAP_ERR => result_map_err,
         Symbol::RESULT_WITH_DEFAULT => result_with_default,
@@ -1331,6 +1333,11 @@ fn num_shift_right_zf_by(symbol: Symbol, var_store: &mut VarStore) -> Def {
     lowlevel_2(symbol, LowLevel::NumShiftRightZfBy, var_store)
 }
 
+/// Num.intCast: Int a -> Int b
+fn num_int_cast(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_1(symbol, LowLevel::NumIntCast, var_store)
+}
+
 /// List.isEmpty : List * -> Bool
 fn list_is_empty(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let list_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index ed149b77a4..9ba362841f 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -3983,6 +3983,16 @@ fn run_low_level<'a, 'ctx, 'env>(
                 op,
             )
         }
+        NumIntCast => {
+            debug_assert_eq!(args.len(), 1);
+
+            let arg = load_symbol(scope, &args[0]).into_int_value();
+
+            let to = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes)
+                .into_int_type();
+
+            env.builder.build_int_cast(arg, to, "inc_cast").into()
+        }
         Eq => {
             debug_assert_eq!(args.len(), 2);
 
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 7279a4ae76..640b8c8bca 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -83,6 +83,7 @@ pub enum LowLevel {
     NumShiftLeftBy,
     NumShiftRightBy,
     NumShiftRightZfBy,
+    NumIntCast,
     Eq,
     NotEq,
     And,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index a7d19116dc..62f5a9d457 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -854,6 +854,8 @@ define_builtins! {
         93 NUM_AT_NATURAL: "@Natural"
         94 NUM_NATURAL: "Natural" imported
         95 NUM_NAT: "Nat" imported
+        96 NUM_INT_CAST: "intCast"
+
     }
     2 BOOL: "Bool" => {
         0 BOOL_BOOL: "Bool" imported // the Bool.Bool type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index 294e2582c0..c0d4f1e091 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -671,7 +671,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         }
 
         NumAbs | NumNeg | NumSin | NumCos | NumSqrtUnchecked | NumRound | NumCeiling | NumFloor
-        | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => {
+        | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin | NumIntCast => {
             arena.alloc_slice_copy(&[irrelevant])
         }
         StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]),

From bcbef5d3aac61fea000a9c9f8425fc630e638b6e Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:10:09 +0100
Subject: [PATCH 05/33] generalize add and sub intrinsics to all integer types

---
 compiler/gen/src/llvm/build.rs | 103 ++++++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 9ba362841f..31fc0ce6c3 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -296,8 +296,10 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) {
     let void_type = ctx.void_type();
     let i1_type = ctx.bool_type();
     let f64_type = ctx.f64_type();
+    let i128_type = ctx.i128_type();
     let i64_type = ctx.i64_type();
     let i32_type = ctx.i32_type();
+    let i16_type = ctx.i16_type();
     let i8_type = ctx.i8_type();
     let i8_ptr_type = i8_type.ptr_type(AddressSpace::Generic);
 
@@ -377,18 +379,72 @@ fn add_intrinsics<'ctx>(ctx: &'ctx Context, module: &Module<'ctx>) {
         f64_type.fn_type(&[f64_type.into()], false),
     );
 
+    // add with overflow
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I8, {
+        let fields = [i8_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i8_type.into(), i8_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I16, {
+        let fields = [i16_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i16_type.into(), i16_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I32, {
+        let fields = [i32_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i32_type.into(), i32_type.into()], false)
+    });
+
     add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
             .fn_type(&[i64_type.into(), i64_type.into()], false)
     });
 
+    add_intrinsic(module, LLVM_SADD_WITH_OVERFLOW_I128, {
+        let fields = [i128_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i128_type.into(), i128_type.into()], false)
+    });
+
+    // sub with overflow
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I8, {
+        let fields = [i8_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i8_type.into(), i8_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I16, {
+        let fields = [i16_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i16_type.into(), i16_type.into()], false)
+    });
+
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I32, {
+        let fields = [i32_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i32_type.into(), i32_type.into()], false)
+    });
+
     add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
             .fn_type(&[i64_type.into(), i64_type.into()], false)
     });
 
+    add_intrinsic(module, LLVM_SSUB_WITH_OVERFLOW_I128, {
+        let fields = [i128_type.into(), i1_type.into()];
+        ctx.struct_type(&fields, false)
+            .fn_type(&[i128_type.into(), i128_type.into()], false)
+    });
+
+    // mul with overflow
+
     add_intrinsic(module, LLVM_SMUL_WITH_OVERFLOW_I64, {
         let fields = [i64_type.into(), i1_type.into()];
         ctx.struct_type(&fields, false)
@@ -406,8 +462,19 @@ static LLVM_COS_F64: &str = "llvm.cos.f64";
 static LLVM_POW_F64: &str = "llvm.pow.f64";
 static LLVM_CEILING_F64: &str = "llvm.ceil.f64";
 static LLVM_FLOOR_F64: &str = "llvm.floor.f64";
+
+pub static LLVM_SADD_WITH_OVERFLOW_I8: &str = "llvm.sadd.with.overflow.i8";
+pub static LLVM_SADD_WITH_OVERFLOW_I16: &str = "llvm.sadd.with.overflow.i16";
+pub static LLVM_SADD_WITH_OVERFLOW_I32: &str = "llvm.sadd.with.overflow.i32";
 pub static LLVM_SADD_WITH_OVERFLOW_I64: &str = "llvm.sadd.with.overflow.i64";
+pub static LLVM_SADD_WITH_OVERFLOW_I128: &str = "llvm.sadd.with.overflow.i128";
+
+pub static LLVM_SSUB_WITH_OVERFLOW_I8: &str = "llvm.ssub.with.overflow.i8";
+pub static LLVM_SSUB_WITH_OVERFLOW_I16: &str = "llvm.ssub.with.overflow.i16";
+pub static LLVM_SSUB_WITH_OVERFLOW_I32: &str = "llvm.ssub.with.overflow.i32";
 pub static LLVM_SSUB_WITH_OVERFLOW_I64: &str = "llvm.ssub.with.overflow.i64";
+pub static LLVM_SSUB_WITH_OVERFLOW_I128: &str = "llvm.ssub.with.overflow.i128";
+
 pub static LLVM_SMUL_WITH_OVERFLOW_I64: &str = "llvm.smul.with.overflow.i64";
 
 fn add_intrinsic<'ctx>(
@@ -4506,7 +4573,7 @@ fn build_int_binop<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
     parent: FunctionValue<'ctx>,
     lhs: IntValue<'ctx>,
-    _lhs_layout: &Layout<'a>,
+    lhs_layout: &Layout<'a>,
     rhs: IntValue<'ctx>,
     _rhs_layout: &Layout<'a>,
     op: LowLevel,
@@ -4519,8 +4586,23 @@ fn build_int_binop<'a, 'ctx, 'env>(
     match op {
         NumAdd => {
             let context = env.context;
+
+            let intrinsic = match lhs_layout {
+                Layout::Builtin(Builtin::Int8) => LLVM_SADD_WITH_OVERFLOW_I8,
+                Layout::Builtin(Builtin::Int16) => LLVM_SADD_WITH_OVERFLOW_I16,
+                Layout::Builtin(Builtin::Int32) => LLVM_SADD_WITH_OVERFLOW_I32,
+                Layout::Builtin(Builtin::Int64) => LLVM_SADD_WITH_OVERFLOW_I64,
+                Layout::Builtin(Builtin::Int128) => LLVM_SADD_WITH_OVERFLOW_I128,
+                Layout::Builtin(Builtin::Usize) => match env.ptr_bytes {
+                    4 => LLVM_SADD_WITH_OVERFLOW_I32,
+                    8 => LLVM_SADD_WITH_OVERFLOW_I64,
+                    other => panic!("invalid ptr_bytes {}", other),
+                },
+                _ => unreachable!(),
+            };
+
             let result = env
-                .call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()])
+                .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()])
                 .into_struct_value();
 
             let add_result = bd.build_extract_value(result, 0, "add_result").unwrap();
@@ -4550,8 +4632,23 @@ fn build_int_binop<'a, 'ctx, 'env>(
         NumAddChecked => env.call_intrinsic(LLVM_SADD_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()]),
         NumSub => {
             let context = env.context;
+
+            let intrinsic = match lhs_layout {
+                Layout::Builtin(Builtin::Int8) => LLVM_SSUB_WITH_OVERFLOW_I8,
+                Layout::Builtin(Builtin::Int16) => LLVM_SSUB_WITH_OVERFLOW_I16,
+                Layout::Builtin(Builtin::Int32) => LLVM_SSUB_WITH_OVERFLOW_I32,
+                Layout::Builtin(Builtin::Int64) => LLVM_SSUB_WITH_OVERFLOW_I64,
+                Layout::Builtin(Builtin::Int128) => LLVM_SSUB_WITH_OVERFLOW_I128,
+                Layout::Builtin(Builtin::Usize) => match env.ptr_bytes {
+                    4 => LLVM_SSUB_WITH_OVERFLOW_I32,
+                    8 => LLVM_SSUB_WITH_OVERFLOW_I64,
+                    other => panic!("invalid ptr_bytes {}", other),
+                },
+                _ => unreachable!("invalid layout {:?}", lhs_layout),
+            };
+
             let result = env
-                .call_intrinsic(LLVM_SSUB_WITH_OVERFLOW_I64, &[lhs.into(), rhs.into()])
+                .call_intrinsic(intrinsic, &[lhs.into(), rhs.into()])
                 .into_struct_value();
 
             let sub_result = bd.build_extract_value(result, 0, "sub_result").unwrap();

From 9116e9e8c96cfdeadd7c9cd7b78096c75d5b5b10 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:52:32 +0100
Subject: [PATCH 06/33] add Str.toBytes

---
 compiler/builtins/bitcode/src/main.zig |  1 +
 compiler/builtins/bitcode/src/str.zig  | 21 +++++++++++++++++++++
 compiler/builtins/src/bitcode.rs       |  1 +
 compiler/builtins/src/std.rs           |  6 ++++++
 compiler/can/src/builtins.rs           |  7 +++++++
 compiler/gen/src/llvm/build.rs         | 14 ++++++++++++--
 compiler/gen/src/llvm/build_str.rs     | 22 ++++++++++++++++++++++
 compiler/module/src/low_level.rs       |  1 +
 compiler/module/src/symbol.rs          |  1 +
 compiler/mono/src/borrow.rs            |  1 +
 10 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig
index 7df2061ed9..e124afe62c 100644
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@@ -68,6 +68,7 @@ comptime {
     exportStrFn(str.strFromFloatC, "from_float");
     exportStrFn(str.strEqual, "equal");
     exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes");
+    exportStrFn(str.strToBytesC, "to_bytes");
 }
 
 // Export helpers - Must be run inside a comptime
diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index 32057d35d0..a752a37f0e 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1,4 +1,5 @@
 const utils = @import("utils.zig");
+const RocList = @import("list.zig").RocList;
 const std = @import("std");
 const mem = std.mem;
 const always_inline = std.builtin.CallOptions.Modifier.always_inline;
@@ -961,6 +962,26 @@ test "RocStr.joinWith: result is big" {
     expect(roc_result.eq(result));
 }
 
+// Str.toBytes
+pub fn strToBytesC(arg: RocStr) callconv(.C) RocList {
+    return @call(.{ .modifier = always_inline }, strToBytes, .{ std.heap.c_allocator, arg });
+}
+
+fn strToBytes(allocator: *Allocator, arg: RocStr) RocList {
+    if (arg.isEmpty()) {
+        return RocList.empty();
+    } else if (arg.isSmallStr()) {
+        const length = arg.len();
+        const ptr = utils.allocateWithRefcount(allocator, @alignOf(usize), length);
+
+        @memcpy(ptr, arg.asU8ptr(), length);
+
+        return RocList{ .length = length, .bytes = ptr };
+    } else {
+        return RocList{ .length = arg.len(), .bytes = arg.str_bytes };
+    }
+}
+
 pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
     const bytes: []u8 = ptr[0..len];
     return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes});
diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs
index 134bce68ad..b19117e106 100644
--- a/compiler/builtins/src/bitcode.rs
+++ b/compiler/builtins/src/bitcode.rs
@@ -42,6 +42,7 @@ pub const STR_FROM_INT: &str = "roc_builtins.str.from_int";
 pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float";
 pub const STR_EQUAL: &str = "roc_builtins.str.equal";
 pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes";
+pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes";
 
 pub const DICT_HASH: &str = "roc_builtins.dict.hash";
 pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs
index 5a3b499dd7..5de65ca3dd 100644
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@@ -623,6 +623,12 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
         ),
     );
 
+    // toBytes : Str -> List U8
+    add_type(
+        Symbol::STR_TO_BYTES,
+        top_level_function(vec![str_type()], Box::new(list_type(u8_type()))),
+    );
+
     // fromFloat : Float a -> Str
     add_type(
         Symbol::STR_FROM_FLOAT,
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index af38527fcc..7db64b296f 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -62,6 +62,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option<Def>
         STR_COUNT_GRAPHEMES => str_count_graphemes,
         STR_FROM_INT => str_from_int,
         STR_FROM_UTF8 => str_from_utf8,
+        STR_TO_BYTES => str_to_bytes,
         STR_FROM_FLOAT=> str_from_float,
         LIST_LEN => list_len,
         LIST_GET => list_get,
@@ -196,6 +197,7 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
         Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes,
         Symbol::STR_FROM_INT => str_from_int,
         Symbol::STR_FROM_UTF8 => str_from_utf8,
+        Symbol::STR_TO_BYTES => str_to_bytes,
         Symbol::STR_FROM_FLOAT=> str_from_float,
         Symbol::LIST_LEN => list_len,
         Symbol::LIST_GET => list_get,
@@ -1655,6 +1657,11 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
     )
 }
 
+/// Str.toBytes : Str -> List U8
+fn str_to_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    lowlevel_1(symbol, LowLevel::StrToBytes, var_store)
+}
+
 /// Str.fromFloat : Float * -> Str
 fn str_from_float(symbol: Symbol, var_store: &mut VarStore) -> Def {
     let float_var = var_store.fresh();
diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs
index 31fc0ce6c3..506dec1253 100644
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@@ -12,7 +12,7 @@ use crate::llvm::build_list::{
 };
 use crate::llvm::build_str::{
     str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_from_utf8,
-    str_join_with, str_number_of_bytes, str_split, str_starts_with, CHAR_LAYOUT,
+    str_join_with, str_number_of_bytes, str_split, str_starts_with, str_to_bytes, CHAR_LAYOUT,
 };
 use crate::llvm::compare::{generic_eq, generic_neq};
 use crate::llvm::convert::{
@@ -3611,13 +3611,23 @@ fn run_low_level<'a, 'ctx, 'env>(
             str_from_float(env, scope, args[0])
         }
         StrFromUtf8 => {
-            // Str.fromInt : Int -> Str
+            // Str.fromUtf8 : List U8 -> Result Str Utf8Problem
             debug_assert_eq!(args.len(), 1);
 
             let original_wrapper = load_symbol(scope, &args[0]).into_struct_value();
 
             str_from_utf8(env, parent, original_wrapper)
         }
+        StrToBytes => {
+            // Str.fromInt : Str -> List U8
+            debug_assert_eq!(args.len(), 1);
+
+            // this is an identity conversion
+            // we just implement it here to subvert the type system
+            let string = load_symbol(scope, &args[0]);
+
+            str_to_bytes(env, string.into_struct_value())
+        }
         StrSplit => {
             // Str.split : Str, Str -> List Str
             debug_assert_eq!(args.len(), 2);
diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs
index be27698163..301b726fb3 100644
--- a/compiler/gen/src/llvm/build_str.rs
+++ b/compiler/gen/src/llvm/build_str.rs
@@ -275,6 +275,28 @@ pub fn str_from_int<'a, 'ctx, 'env>(
     zig_str_to_struct(env, zig_result).into()
 }
 
+/// Str.toBytes : Str -> List U8
+pub fn str_to_bytes<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    original_wrapper: StructValue<'ctx>,
+) -> BasicValueEnum<'ctx> {
+    let string = complex_bitcast(
+        env.builder,
+        original_wrapper.into(),
+        env.context.i128_type().into(),
+        "to_bytes",
+    );
+
+    let zig_result = call_bitcode_fn(env, &[string], &bitcode::STR_TO_BYTES);
+
+    complex_bitcast(
+        env.builder,
+        zig_result,
+        collection(env.context, env.ptr_bytes).into(),
+        "to_bytes",
+    )
+}
+
 /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 }
 pub fn str_from_utf8<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
diff --git a/compiler/module/src/low_level.rs b/compiler/module/src/low_level.rs
index 640b8c8bca..07422cd4d0 100644
--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@@ -12,6 +12,7 @@ pub enum LowLevel {
     StrCountGraphemes,
     StrFromInt,
     StrFromUtf8,
+    StrToBytes,
     StrFromFloat,
     ListLen,
     ListGetUnsafe,
diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs
index 62f5a9d457..2497e9cfd3 100644
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@@ -882,6 +882,7 @@ define_builtins! {
         12 STR_FROM_UTF8: "fromUtf8"
         13 STR_UT8_PROBLEM: "Utf8Problem" // the Utf8Problem type alias
         14 STR_UT8_BYTE_PROBLEM: "Utf8ByteProblem" // the Utf8ByteProblem type alias
+        15 STR_TO_BYTES: "toBytes"
     }
     4 LIST: "List" => {
         0 LIST_LIST: "List" imported // the List.List type alias
diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs
index c0d4f1e091..c87f0b4e05 100644
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@@ -676,6 +676,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
         }
         StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]),
         StrFromUtf8 => arena.alloc_slice_copy(&[owned]),
+        StrToBytes => arena.alloc_slice_copy(&[owned]),
         StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]),
         Hash => arena.alloc_slice_copy(&[borrowed, irrelevant]),
         DictSize => arena.alloc_slice_copy(&[borrowed]),

From e218279f42e473689222200bc1ca67649d757391 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 19:54:32 +0100
Subject: [PATCH 07/33] add example files

---
 examples/benchmarks/Base64.roc      | 164 ++++++++++++++++++++++++++++
 examples/benchmarks/BytesDecode.roc | 106 ++++++++++++++++++
 2 files changed, 270 insertions(+)
 create mode 100644 examples/benchmarks/Base64.roc
 create mode 100644 examples/benchmarks/BytesDecode.roc

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
new file mode 100644
index 0000000000..e9dc460cb3
--- /dev/null
+++ b/examples/benchmarks/Base64.roc
@@ -0,0 +1,164 @@
+app "base64"
+    packages { base: "platform" }
+    imports [base.Task, BytesDecode.{Decoder} ]
+    provides [ main ] to base
+
+IO a : Task.Task a []
+
+Decoder a : BytesDecode.Decoder a
+
+main : IO {}
+main =
+    # when fromBytes [ 0 ] is
+    when fromBytes (Str.toBytes "Hello World") is
+        Ok str ->
+            Task.putLine str
+
+        Err _ ->
+            Task.putLine "sadness"
+
+
+
+
+
+# ------
+
+
+fromBytes : List U8 -> Result Str BytesDecode.DecodeError
+fromBytes = \bytes ->
+    BytesDecode.decode  bytes (decodeBase64 (List.len bytes))
+
+
+decodeBase64 : Nat -> BytesDecode.Decoder Str
+decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string:  "" }
+
+loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str)
+loopHelp = \{ remaining, string } ->
+    if remaining >= 3 then
+        helper = \x, y, z ->
+            a : U32
+            a = Num.intCast x
+            b : U32
+            b = Num.intCast y
+            c : U32
+            c = Num.intCast z
+            combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
+            Loop
+                {
+                    remaining: remaining - 3,
+                    string: Str.concat string (bitsToChars combined 0)
+                }
+
+        BytesDecode.map3 helper
+            BytesDecode.u8
+            BytesDecode.u8
+            BytesDecode.u8
+
+    else if remaining == 0 then
+        BytesDecode.succeed (Done string)
+
+    else if remaining == 2 then
+        helperX = \x, y ->
+            a : U32
+            a = Num.intCast x
+            b : U32
+            b = Num.intCast y
+            combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
+            Done (Str.concat string (bitsToChars combined 1))
+
+        BytesDecode.map2 helperX
+            BytesDecode.u8
+            BytesDecode.u8
+    else
+        # remaining = 1
+            BytesDecode.u8
+                |> BytesDecode.map (\x -> 
+                    a : U32
+                    a = Num.intCast x
+                    Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
+
+
+bitsToChars : U32, Int * -> Str
+bitsToChars = \bits, missing ->
+    when Str.fromUtf8 (bitsToCharsHelp bits missing) is
+        Ok str -> str
+        Err _ -> ""
+
+# Mask that can be used to get the lowest 6 bits of a binary number
+lowest6BitsMask : Int *
+lowest6BitsMask = 63
+
+
+bitsToCharsHelp : U32, Int * -> List U8
+bitsToCharsHelp = \bits, missing ->
+    # Performance Notes
+    # `String.cons` proved to be the fastest way of combining characters into a string
+    # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940
+    # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
+    # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
+    # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.
+        
+    # any 6-bit number is a valid base64 digit, so this is actually safe
+    p =
+        Num.shiftRightZfBy 18 bits
+            |> Num.intCast
+            |> unsafeToChar 
+
+    q =
+        Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    r =
+        Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    s =
+        Num.bitwiseAnd bits lowest6BitsMask
+            |> Num.intCast
+            |> unsafeToChar 
+
+    equals : U8
+    equals = 61
+
+    when missing is
+        0 -> 
+            [ p, q, r, s ]
+        1 ->
+            [ p, q, r, equals ]
+        2 ->
+            [ p, q, equals , equals ]
+        _ ->
+            # unreachable
+            []
+
+# Base64 index to character/digit
+unsafeToChar : U8 -> U8
+unsafeToChar = \n ->
+    if n <= 25 then
+        # uppercase characters
+        65 + n
+
+    else if n <= 51 then
+        # lowercase characters
+        97 + (n - 26)
+
+    else if n <= 61 then
+        # digit characters
+        48 + (n - 52)
+
+    else
+        # special cases
+        when n is
+            62 ->
+                # '+'
+                43
+
+            63 ->
+                # '/'
+                47
+
+            _ ->
+                # anything else is invalid '\u{0000}'
+                0
diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/BytesDecode.roc
new file mode 100644
index 0000000000..03a341c4ff
--- /dev/null
+++ b/examples/benchmarks/BytesDecode.roc
@@ -0,0 +1,106 @@
+interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
+
+State : { bytes: List U8, cursor : Nat }
+
+DecodeError : [ OutOfBytes ]
+
+
+Decoder a : [ @Decoder (State -> [Good State a, Bad DecodeError]) ]
+
+decode : List U8, Decoder a -> Result a DecodeError
+decode = \bytes, @Decoder decoder ->
+    when decoder { bytes, cursor: 0 } is
+        Good _ value ->
+            Ok value
+
+        Bad e ->
+            Err e
+
+succeed : a -> Decoder a
+succeed = \value -> @Decoder \state -> Good state value 
+
+map : Decoder a, (a -> b) -> Decoder b
+map = \@Decoder decoder, transform -> 
+    @Decoder \state -> 
+        when decoder state is
+            Good state1 value ->
+                Good state1 (transform value)
+
+            Bad e ->
+                Bad e
+
+
+map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c
+map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> 
+    @Decoder \state1 -> 
+        when decoder1 state1 is
+            Good state2 a ->
+                when decoder2 state2 is
+                    Good state3 b ->
+                        Good state3 (transform a b)
+
+                    Bad e ->
+                        Bad e
+
+            Bad e ->
+                Bad e
+
+map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d
+map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> 
+    @Decoder \state1 -> 
+        when decoder1 state1 is
+            Good state2 a ->
+                when decoder2 state2 is
+                    Good state3 b ->
+                        when decoder3 state3 is
+                            Good state4 c ->
+                                Good state4 (transform a b c)
+
+                            Bad e ->
+                                Bad e
+
+                    Bad e ->
+                        Bad e
+
+            Bad e ->
+                Bad e
+
+after : Decoder a, (a -> Decoder b) -> Decoder b
+after = \@Decoder decoder, transform -> 
+    @Decoder \state -> 
+        when decoder state is
+            Good state1 value ->
+                (@Decoder decoder1) = transform value
+                decoder1 state1
+
+
+            Bad e ->
+                Bad e
+
+u8 : Decoder U8
+u8 = @Decoder \state ->
+    when List.get state.bytes state.cursor is
+        Ok b ->
+            Good { state & cursor: state.cursor + 1 } b
+
+        Err _ ->
+            Bad OutOfBytes
+
+Step state b : [ Loop state, Done b ]
+
+loop : (state -> Decoder (Step state a)), state -> Decoder a
+loop = \stepper, initial ->
+    @Decoder \state -> 
+        loopHelp stepper initial state
+
+loopHelp = \stepper, accum, state -> 
+    (@Decoder stepper1) = stepper accum
+    when stepper1 state is
+        Good newState (Done value) ->
+            Good newState value
+
+        Good newState (Loop newAccum) ->
+            loopHelp stepper newAccum newState
+
+        Bad e ->
+            Bad e

From c4ddeefed96e741b6208d38ad47d5ee78253e515 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:45:17 +0100
Subject: [PATCH 08/33] add test of integer type inference let polymorphism

---
 compiler/solve/tests/solve_expr.rs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs
index 229ae772a7..ba5d0519c3 100644
--- a/compiler/solve/tests/solve_expr.rs
+++ b/compiler/solve/tests/solve_expr.rs
@@ -4315,4 +4315,26 @@ mod solve_expr {
             "Str",
         );
     }
+
+    #[test]
+    fn int_type_let_polymorphism() {
+        infer_eq_without_problem(
+            indoc!(
+                r#"
+                app "test" provides [ main ] to "./platform"
+
+                x = 4
+
+                f : U8 -> U32
+                f = \z -> Num.intCast z
+
+                y = f x
+
+                main =
+                    x
+                "#
+            ),
+            "Num *",
+        );
+    }
 }

From ad96d1ae24202c8cbe7ce170b051b9cd78c692e9 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:45:26 +0100
Subject: [PATCH 09/33] trim comment

---
 examples/benchmarks/Base64.roc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index e9dc460cb3..cfcc364334 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -91,9 +91,6 @@ lowest6BitsMask = 63
 
 bitsToCharsHelp : U32, Int * -> List U8
 bitsToCharsHelp = \bits, missing ->
-    # Performance Notes
-    # `String.cons` proved to be the fastest way of combining characters into a string
-    # see also https://github.com/danfishgold/base64-bytes/pull/3#discussion_r342321940
     # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
     # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
     # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.

From 86cf7cd983f92cb2a9bf8d9b443528bf8040f3c3 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 20:52:06 +0100
Subject: [PATCH 10/33] move file

---
 examples/benchmarks/Base64.roc                | 34 +++++++++----------
 .../{BytesDecode.roc => Bytes/Decode.roc}     |  2 +-
 2 files changed, 18 insertions(+), 18 deletions(-)
 rename examples/benchmarks/{BytesDecode.roc => Bytes/Decode.roc} (95%)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index cfcc364334..92b30f1555 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -1,11 +1,11 @@
 app "base64"
     packages { base: "platform" }
-    imports [base.Task, BytesDecode.{Decoder} ]
+    imports [base.Task, Bytes.Decode.{Decoder} ]
     provides [ main ] to base
 
 IO a : Task.Task a []
 
-Decoder a : BytesDecode.Decoder a
+Decoder a : Bytes.Decode.Decoder a
 
 main : IO {}
 main =
@@ -24,15 +24,15 @@ main =
 # ------
 
 
-fromBytes : List U8 -> Result Str BytesDecode.DecodeError
+fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError
 fromBytes = \bytes ->
-    BytesDecode.decode  bytes (decodeBase64 (List.len bytes))
+    Bytes.Decode.decode  bytes (decodeBase64 (List.len bytes))
 
 
-decodeBase64 : Nat -> BytesDecode.Decoder Str
-decodeBase64 = \width -> BytesDecode.loop loopHelp { remaining: width, string:  "" }
+decodeBase64 : Nat -> Bytes.Decode.Decoder Str
+decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:  "" }
 
-loopHelp : { remaining : Nat, string : Str } -> Decoder (BytesDecode.Step { remaining : Nat, string : Str } Str)
+loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
         helper = \x, y, z ->
@@ -49,13 +49,13 @@ loopHelp = \{ remaining, string } ->
                     string: Str.concat string (bitsToChars combined 0)
                 }
 
-        BytesDecode.map3 helper
-            BytesDecode.u8
-            BytesDecode.u8
-            BytesDecode.u8
+        Bytes.Decode.map3 helper
+            Bytes.Decode.u8
+            Bytes.Decode.u8
+            Bytes.Decode.u8
 
     else if remaining == 0 then
-        BytesDecode.succeed (Done string)
+        Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
         helperX = \x, y ->
@@ -66,13 +66,13 @@ loopHelp = \{ remaining, string } ->
             combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
             Done (Str.concat string (bitsToChars combined 1))
 
-        BytesDecode.map2 helperX
-            BytesDecode.u8
-            BytesDecode.u8
+        Bytes.Decode.map2 helperX
+            Bytes.Decode.u8
+            Bytes.Decode.u8
     else
         # remaining = 1
-            BytesDecode.u8
-                |> BytesDecode.map (\x -> 
+            Bytes.Decode.u8
+                |> Bytes.Decode.map (\x -> 
                     a : U32
                     a = Num.intCast x
                     Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
diff --git a/examples/benchmarks/BytesDecode.roc b/examples/benchmarks/Bytes/Decode.roc
similarity index 95%
rename from examples/benchmarks/BytesDecode.roc
rename to examples/benchmarks/Bytes/Decode.roc
index 03a341c4ff..db14d857a6 100644
--- a/examples/benchmarks/BytesDecode.roc
+++ b/examples/benchmarks/Bytes/Decode.roc
@@ -1,4 +1,4 @@
-interface BytesDecode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
+interface Bytes.Decode exposes [ Decoder, decode, map, map2, u8, loop, Step, succeed, DecodeError, after, map3 ] imports []
 
 State : { bytes: List U8, cursor : Nat }
 

From c4972f45baaa1dae44736079e644594a2081d4b8 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:12:47 +0100
Subject: [PATCH 11/33] bit shift tests

---
 compiler/gen/tests/gen_num.rs  | 25 +++++++++++++++++++++++++
 examples/benchmarks/Base64.roc |  4 ----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs
index 262b8495b9..5fb442927c 100644
--- a/compiler/gen/tests/gen_num.rs
+++ b/compiler/gen/tests/gen_num.rs
@@ -1343,4 +1343,29 @@ mod gen_num {
             f64
         );
     }
+
+    #[test]
+    fn shift_left_by() {
+        assert_evals_to!("Num.shiftLeftBy 0 0b0000_0001", 0b0000_0001, i64);
+        assert_evals_to!("Num.shiftLeftBy 1 0b0000_0001", 0b0000_0010, i64);
+        assert_evals_to!("Num.shiftLeftBy 2 0b0000_0011", 0b0000_1100, i64);
+    }
+
+    #[test]
+    #[ignore]
+    fn shift_right_by() {
+        // Sign Extended Right Shift
+        assert_evals_to!("Num.shiftRightBy 0 0b0100_0000i8", 0b0001_0000, i8);
+        assert_evals_to!("Num.shiftRightBy 1 0b1110_0000u8", 0b1111_0000u8 as i8, i8);
+        assert_evals_to!("Num.shiftRightBy 2 0b1100_0000u8", 0b1111_0000u8 as i8, i8);
+    }
+
+    #[test]
+    #[ignore]
+    fn shift_right_zf_by() {
+        // Logical Right Shift
+        assert_evals_to!("Num.shiftRightBy 1 0b1100_0000u8", 0b0011_0000, i64);
+        assert_evals_to!("Num.shiftRightBy 2 0b0000_0010u8", 0b0000_0001, i64);
+        assert_evals_to!("Num.shiftRightBy 3 0b0000_1100u8", 0b0000_0011, i64);
+    }
 }
diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 92b30f1555..1fed392288 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -17,10 +17,6 @@ main =
         Err _ ->
             Task.putLine "sadness"
 
-
-
-
-
 # ------
 
 

From 1746b8da6f5cb3662bf26941b2ffef162eebbe29 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:14:20 +0100
Subject: [PATCH 12/33] bitwise or test

---
 compiler/gen/tests/gen_num.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs
index 5fb442927c..f6976de9e0 100644
--- a/compiler/gen/tests/gen_num.rs
+++ b/compiler/gen/tests/gen_num.rs
@@ -750,6 +750,12 @@ mod gen_num {
         assert_evals_to!("Num.bitwiseXor 200 0", 200, i64);
     }
 
+    #[test]
+    fn bitwise_or() {
+        assert_evals_to!("Num.bitwiseOr 1 1", 1, i64);
+        assert_evals_to!("Num.bitwiseOr 1 2", 3, i64);
+    }
+
     #[test]
     fn lt_i64() {
         assert_evals_to!("1 < 2", true, bool);

From 63091392f55398924d7b688de9ca20af230f3db9 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sun, 21 Feb 2021 21:18:15 +0100
Subject: [PATCH 13/33] toBytes test

---
 compiler/gen/tests/gen_str.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compiler/gen/tests/gen_str.rs b/compiler/gen/tests/gen_str.rs
index dafdbf4b60..53bfa1f53e 100644
--- a/compiler/gen/tests/gen_str.rs
+++ b/compiler/gen/tests/gen_str.rs
@@ -816,4 +816,17 @@ mod gen_str {
     fn str_from_float() {
         assert_evals_to!(r#"Str.fromFloat 3.14"#, RocStr::from("3.140000"), RocStr);
     }
+
+    #[test]
+    fn str_to_bytes() {
+        assert_evals_to!(r#"Str.toBytes "hello""#, &[104, 101, 108, 108, 111], &[u8]);
+        assert_evals_to!(
+            r#"Str.toBytes "this is a long string""#,
+            &[
+                116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 108, 111, 110, 103, 32, 115, 116,
+                114, 105, 110, 103
+            ],
+            &[u8]
+        );
+    }
 }

From 30ecd378a07e63ffa52a0a5efe47e54f446a7057 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 14:31:48 +0100
Subject: [PATCH 14/33] refactor parse AST to allow multiple if branches

---
 compiler/can/src/expr.rs     |  39 ++++++---
 compiler/can/src/operator.rs |  21 +++--
 compiler/fmt/src/expr.rs     | 164 +++++++++++++++++++----------------
 compiler/parse/src/ast.rs    |   2 +-
 compiler/parse/src/expr.rs   |   5 +-
 editor/src/lang/expr.rs      |  23 +++--
 6 files changed, 145 insertions(+), 109 deletions(-)

diff --git a/compiler/can/src/expr.rs b/compiler/can/src/expr.rs
index a2248b7391..d1718703e6 100644
--- a/compiler/can/src/expr.rs
+++ b/compiler/can/src/expr.rs
@@ -674,32 +674,43 @@ pub fn canonicalize_expr<'a>(
                 Output::default(),
             )
         }
-        ast::Expr::If(cond, then_branch, else_branch) => {
-            let (loc_cond, mut output) =
-                canonicalize_expr(env, var_store, scope, cond.region, &cond.value);
-            let (loc_then, then_output) = canonicalize_expr(
-                env,
-                var_store,
-                scope,
-                then_branch.region,
-                &then_branch.value,
-            );
+        ast::Expr::If(if_thens, final_else_branch) => {
+            let mut branches = Vec::with_capacity(1);
+            let mut output = Output::default();
+
+            for (condition, then_branch) in if_thens.iter() {
+                let (loc_cond, cond_output) =
+                    canonicalize_expr(env, var_store, scope, condition.region, &condition.value);
+
+                let (loc_then, then_output) = canonicalize_expr(
+                    env,
+                    var_store,
+                    scope,
+                    then_branch.region,
+                    &then_branch.value,
+                );
+
+                branches.push((loc_cond, loc_then));
+
+                output.references = output.references.union(cond_output.references);
+                output.references = output.references.union(then_output.references);
+            }
+
             let (loc_else, else_output) = canonicalize_expr(
                 env,
                 var_store,
                 scope,
-                else_branch.region,
-                &else_branch.value,
+                final_else_branch.region,
+                &final_else_branch.value,
             );
 
-            output.references = output.references.union(then_output.references);
             output.references = output.references.union(else_output.references);
 
             (
                 If {
                     cond_var: var_store.fresh(),
                     branch_var: var_store.fresh(),
-                    branches: vec![(loc_cond, loc_then)],
+                    branches,
                     final_else: Box::new(loc_else),
                 },
                 output,
diff --git a/compiler/can/src/operator.rs b/compiler/can/src/operator.rs
index 5f9d73bb70..048c2b9d83 100644
--- a/compiler/can/src/operator.rs
+++ b/compiler/can/src/operator.rs
@@ -290,16 +290,21 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
                 }),
             )
         }
-        If(condition, then_branch, else_branch)
-        | Nested(If(condition, then_branch, else_branch)) => {
-            // If does not get desugared yet so we can give more targetted error messages during
-            // type checking.
-            let desugared_cond = &*arena.alloc(desugar_expr(arena, &condition));
-            let desugared_then = &*arena.alloc(desugar_expr(arena, &then_branch));
-            let desugared_else = &*arena.alloc(desugar_expr(arena, &else_branch));
+        If(if_thens, final_else_branch) | Nested(If(if_thens, final_else_branch)) => {
+            // If does not get desugared into `when` so we can give more targetted error messages during type checking.
+            let desugared_final_else = &*arena.alloc(desugar_expr(arena, &final_else_branch));
+
+            let mut desugared_if_thens = Vec::with_capacity_in(if_thens.len(), arena);
+
+            for (condition, then_branch) in if_thens.iter() {
+                desugared_if_thens.push((
+                    desugar_expr(arena, condition).clone(),
+                    desugar_expr(arena, then_branch).clone(),
+                ));
+            }
 
             arena.alloc(Located {
-                value: If(desugared_cond, desugared_then, desugared_else),
+                value: If(desugared_if_thens.into_bump_slice(), desugared_final_else),
                 region: loc_expr.region,
             })
         }
diff --git a/compiler/fmt/src/expr.rs b/compiler/fmt/src/expr.rs
index 1be56ee514..80056cce7f 100644
--- a/compiler/fmt/src/expr.rs
+++ b/compiler/fmt/src/expr.rs
@@ -58,8 +58,11 @@ impl<'a> Formattable<'a> for Expr<'a> {
                 loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline())
             }
 
-            If(loc_cond, loc_if_true, loc_if_false) => {
-                loc_cond.is_multiline() || loc_if_true.is_multiline() || loc_if_false.is_multiline()
+            If(branches, final_else) => {
+                final_else.is_multiline()
+                    || branches
+                        .iter()
+                        .any(|(c, t)| c.is_multiline() || t.is_multiline())
             }
 
             BinOp((loc_left, _, loc_right)) => {
@@ -257,8 +260,8 @@ impl<'a> Formattable<'a> for Expr<'a> {
                 // still print the return value.
                 ret.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
             }
-            If(loc_condition, loc_then, loc_else) => {
-                fmt_if(buf, loc_condition, loc_then, loc_else, indent);
+            If(branches, final_else) => {
+                fmt_if(buf, branches, final_else, self.is_multiline(), indent);
             }
             When(loc_condition, branches) => fmt_when(buf, loc_condition, branches, indent),
             List {
@@ -629,15 +632,15 @@ fn fmt_when<'a>(
 
 fn fmt_if<'a>(
     buf: &mut String<'a>,
-    loc_condition: &'a Located<Expr<'a>>,
-    loc_then: &'a Located<Expr<'a>>,
-    loc_else: &'a Located<Expr<'a>>,
+    branches: &'a [(Located<Expr<'a>>, Located<Expr<'a>>)],
+    final_else: &'a Located<Expr<'a>>,
+    is_multiline: bool,
     indent: u16,
 ) {
-    let is_multiline_then = loc_then.is_multiline();
-    let is_multiline_else = loc_else.is_multiline();
-    let is_multiline_condition = loc_condition.is_multiline();
-    let is_multiline = is_multiline_then || is_multiline_else || is_multiline_condition;
+    //    let is_multiline_then = loc_then.is_multiline();
+    //    let is_multiline_else = final_else.is_multiline();
+    //    let is_multiline_condition = loc_condition.is_multiline();
+    //    let is_multiline = is_multiline_then || is_multiline_else || is_multiline_condition;
 
     let return_indent = if is_multiline {
         indent + INDENT
@@ -645,80 +648,89 @@ fn fmt_if<'a>(
         indent
     };
 
-    buf.push_str("if");
+    for (loc_condition, loc_then) in branches.iter() {
+        let is_multiline_condition = loc_condition.is_multiline();
 
-    if is_multiline_condition {
-        match &loc_condition.value {
-            Expr::SpaceBefore(expr_below, spaces_above_expr) => {
-                fmt_comments_only(buf, spaces_above_expr.iter(), NewlineAt::Top, return_indent);
-                newline(buf, return_indent);
+        buf.push_str("if");
 
-                match &expr_below {
-                    Expr::SpaceAfter(expr_above, spaces_below_expr) => {
-                        expr_above.format(buf, return_indent);
-                        fmt_comments_only(
-                            buf,
-                            spaces_below_expr.iter(),
-                            NewlineAt::Top,
-                            return_indent,
-                        );
-                        newline(buf, indent);
-                    }
+        if is_multiline_condition {
+            match &loc_condition.value {
+                Expr::SpaceBefore(expr_below, spaces_above_expr) => {
+                    fmt_comments_only(buf, spaces_above_expr.iter(), NewlineAt::Top, return_indent);
+                    newline(buf, return_indent);
 
-                    _ => {
-                        expr_below.format(buf, return_indent);
+                    match &expr_below {
+                        Expr::SpaceAfter(expr_above, spaces_below_expr) => {
+                            expr_above.format(buf, return_indent);
+                            fmt_comments_only(
+                                buf,
+                                spaces_below_expr.iter(),
+                                NewlineAt::Top,
+                                return_indent,
+                            );
+                            newline(buf, indent);
+                        }
+
+                        _ => {
+                            expr_below.format(buf, return_indent);
+                        }
                     }
                 }
-            }
 
-            Expr::SpaceAfter(expr_above, spaces_below_expr) => {
-                newline(buf, return_indent);
-                expr_above.format(buf, return_indent);
-                fmt_comments_only(buf, spaces_below_expr.iter(), NewlineAt::Top, return_indent);
-                newline(buf, indent);
-            }
+                Expr::SpaceAfter(expr_above, spaces_below_expr) => {
+                    newline(buf, return_indent);
+                    expr_above.format(buf, return_indent);
+                    fmt_comments_only(buf, spaces_below_expr.iter(), NewlineAt::Top, return_indent);
+                    newline(buf, indent);
+                }
 
-            _ => {
-                newline(buf, return_indent);
-                loc_condition.format(buf, return_indent);
-                newline(buf, indent);
-            }
-        }
-    } else {
-        buf.push(' ');
-        loc_condition.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
-        buf.push(' ');
-    }
-
-    buf.push_str("then");
-
-    if is_multiline {
-        match &loc_then.value {
-            Expr::SpaceBefore(expr_below, spaces_below) => {
-                // we want exactly one newline, user-inserted extra newlines are ignored.
-                newline(buf, return_indent);
-                fmt_comments_only(buf, spaces_below.iter(), NewlineAt::Bottom, return_indent);
-
-                match &expr_below {
-                    Expr::SpaceAfter(expr_above, spaces_above) => {
-                        expr_above.format(buf, return_indent);
-
-                        fmt_comments_only(buf, spaces_above.iter(), NewlineAt::Top, return_indent);
-                        newline(buf, indent);
-                    }
-
-                    _ => {
-                        expr_below.format(buf, return_indent);
-                    }
+                _ => {
+                    newline(buf, return_indent);
+                    loc_condition.format(buf, return_indent);
+                    newline(buf, indent);
                 }
             }
-            _ => {
-                loc_condition.format(buf, return_indent);
-            }
+        } else {
+            buf.push(' ');
+            loc_condition.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
+            buf.push(' ');
+        }
+
+        buf.push_str("then");
+
+        if is_multiline {
+            match &loc_then.value {
+                Expr::SpaceBefore(expr_below, spaces_below) => {
+                    // we want exactly one newline, user-inserted extra newlines are ignored.
+                    newline(buf, return_indent);
+                    fmt_comments_only(buf, spaces_below.iter(), NewlineAt::Bottom, return_indent);
+
+                    match &expr_below {
+                        Expr::SpaceAfter(expr_above, spaces_above) => {
+                            expr_above.format(buf, return_indent);
+
+                            fmt_comments_only(
+                                buf,
+                                spaces_above.iter(),
+                                NewlineAt::Top,
+                                return_indent,
+                            );
+                            newline(buf, indent);
+                        }
+
+                        _ => {
+                            expr_below.format(buf, return_indent);
+                        }
+                    }
+                }
+                _ => {
+                    loc_condition.format(buf, return_indent);
+                }
+            }
+        } else {
+            buf.push_str(" ");
+            loc_then.format(buf, return_indent);
         }
-    } else {
-        buf.push_str(" ");
-        loc_then.format(buf, return_indent);
     }
 
     if is_multiline {
@@ -728,7 +740,7 @@ fn fmt_if<'a>(
         buf.push_str(" else ");
     }
 
-    loc_else.format(buf, return_indent);
+    final_else.format(buf, return_indent);
 }
 
 pub fn fmt_closure<'a>(
diff --git a/compiler/parse/src/ast.rs b/compiler/parse/src/ast.rs
index 70964e246e..f33ba8149b 100644
--- a/compiler/parse/src/ast.rs
+++ b/compiler/parse/src/ast.rs
@@ -127,7 +127,7 @@ pub enum Expr<'a> {
     UnaryOp(&'a Loc<Expr<'a>>, Loc<UnaryOp>),
 
     // Conditionals
-    If(&'a Loc<Expr<'a>>, &'a Loc<Expr<'a>>, &'a Loc<Expr<'a>>),
+    If(&'a [(Loc<Expr<'a>>, Loc<Expr<'a>>)], &'a Loc<Expr<'a>>),
     When(
         /// The condition
         &'a Loc<Expr<'a>>,
diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs
index 8d187f10fb..5f05d89b17 100644
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@@ -324,7 +324,7 @@ pub fn expr_to_pattern<'a>(
         | Expr::Closure(_, _)
         | Expr::BinOp(_)
         | Expr::Defs(_, _)
-        | Expr::If(_, _, _)
+        | Expr::If(_, _)
         | Expr::When(_, _)
         | Expr::MalformedClosure
         | Expr::PrecedenceConflict(_, _, _, _)
@@ -1264,8 +1264,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>
         ),
         |arena: &'a Bump, (condition, (then_branch, else_branch))| {
             Expr::If(
-                &*arena.alloc(condition),
-                &*arena.alloc(then_branch),
+                arena.alloc([(condition, then_branch)]),
                 &*arena.alloc(else_branch),
             )
         }
diff --git a/editor/src/lang/expr.rs b/editor/src/lang/expr.rs
index 83791facb1..3cc1a4f912 100644
--- a/editor/src/lang/expr.rs
+++ b/editor/src/lang/expr.rs
@@ -508,22 +508,31 @@ pub fn to_expr2<'a>(
             Output::default(),
         ),
 
-        If(cond, then_branch, else_branch) => {
-            let (cond, mut output) = to_expr2(env, scope, &cond.value, cond.region);
+        If(branches, final_else) => {
+            let mut new_branches = Vec::with_capacity(branches.len());
+            let mut output = Output::default();
 
-            let (then_expr, then_output) =
-                to_expr2(env, scope, &then_branch.value, then_branch.region);
+            for (condition, then_branch) in branches.iter() {
+                let (cond, cond_output) = to_expr2(env, scope, &condition.value, condition.region);
+
+                let (then_expr, then_output) =
+                    to_expr2(env, scope, &then_branch.value, then_branch.region);
+
+                output.references.union_mut(cond_output.references);
+                output.references.union_mut(then_output.references);
+
+                new_branches.push((cond, then_expr));
+            }
 
             let (else_expr, else_output) =
-                to_expr2(env, scope, &else_branch.value, else_branch.region);
+                to_expr2(env, scope, &final_else.value, final_else.region);
 
-            output.references.union_mut(then_output.references);
             output.references.union_mut(else_output.references);
 
             let expr = Expr2::If {
                 cond_var: env.var_store.fresh(),
                 expr_var: env.var_store.fresh(),
-                branches: PoolVec::new(vec![(cond, then_expr)].into_iter(), env.pool),
+                branches: PoolVec::new(new_branches.into_iter(), env.pool),
                 final_else: env.pool.add(else_expr),
             };
 

From 5d8944fc6a4a9ca6910219bf66d6e46369fe334b Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 15:05:25 +0100
Subject: [PATCH 15/33] use new parser for If

---
 compiler/parse/src/expr.rs   | 90 ++++++++++++++++++++++--------------
 compiler/parse/src/parser.rs | 20 ++++++++
 2 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs
index 5f05d89b17..7fe80a1b02 100644
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@@ -11,7 +11,7 @@ use crate::number_literal::number_literal;
 use crate::parser::{
     self, allocated, and_then_with_indent_level, ascii_char, ascii_string, attempt, backtrackable,
     fail, map, newline_char, not, not_followed_by, optional, sep_by1, specialize, specialize_ref,
-    then, unexpected, unexpected_eof, word1, word2, EExpr, Either, ParseResult, Parser, State,
+    then, unexpected, unexpected_eof, word1, word2, EExpr, Either, If, ParseResult, Parser, State,
     SyntaxError, When,
 };
 use crate::pattern::loc_closure_param;
@@ -1234,40 +1234,62 @@ mod when {
     }
 }
 
-pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
-    map_with_arena!(
-        and!(
-            skip_first!(
-                parser::keyword(keyword::IF, min_indent),
-                space1_around(
-                    loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-                    min_indent,
-                )
+pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
+    move |arena: &'a Bump, state| {
+        let (_, _, state) = parser::keyword_e(keyword::IF, If::If).parse(arena, state)?;
+
+        let mut branches = Vec::with_capacity_in(1, arena);
+
+        let (_, cond, state) = space0_around_e(
+            specialize_ref(
+                If::Syntax,
+                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
             ),
-            and!(
-                skip_first!(
-                    parser::keyword(keyword::THEN, min_indent),
-                    space1_around(
-                        loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-                        min_indent,
-                    )
-                ),
-                skip_first!(
-                    parser::keyword(keyword::ELSE, min_indent),
-                    // NOTE changed this from space1_around to space1_before
-                    space1_before(
-                        loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-                        min_indent,
-                    )
-                )
-            )
-        ),
-        |arena: &'a Bump, (condition, (then_branch, else_branch))| {
-            Expr::If(
-                arena.alloc([(condition, then_branch)]),
-                &*arena.alloc(else_branch),
-            )
-        }
+            min_indent,
+            If::Space,
+            If::IndentCondition,
+        )
+        .parse(arena, state)?;
+
+        let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then).parse(arena, state)?;
+
+        let (_, then_branch, state) = space0_around_e(
+            specialize_ref(
+                If::Syntax,
+                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+            ),
+            min_indent,
+            If::Space,
+            If::IndentThen,
+        )
+        .parse(arena, state)?;
+
+        let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else).parse(arena, state)?;
+
+        branches.push((cond, then_branch));
+
+        let (_, else_branch, state) = space0_before_e(
+            specialize_ref(
+                If::Syntax,
+                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+            ),
+            min_indent,
+            If::Space,
+            If::IndentElse,
+        )
+        .parse(arena, state)?;
+
+        // parse the final else
+        let expr = Expr::If(branches.into_bump_slice(), arena.alloc(else_branch));
+
+        Ok((MadeProgress, expr, state))
+    }
+}
+
+pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
+    specialize(
+        |e, r, c| SyntaxError::Expr(EExpr::If(e, r, c)),
+        if_expr_help(min_indent),
     )
 }
 
diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs
index 71d2259bc1..a9da276780 100644
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@@ -378,6 +378,7 @@ pub enum EExpr<'a> {
     Space(BadInputError, Row, Col),
 
     When(When<'a>, Row, Col),
+    If(If<'a>, Row, Col),
 
     // EInParens(PInParens<'a>, Row, Col),
     IndentStart(Row, Col),
@@ -408,6 +409,25 @@ pub enum When<'a> {
     PatternAlignment(u16, Row, Col),
 }
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum If<'a> {
+    Space(BadInputError, Row, Col),
+    If(Row, Col),
+    Then(Row, Col),
+    Else(Row, Col),
+    // TODO make EEXpr
+    Condition(&'a EExpr<'a>, Row, Col),
+    ThenBranch(&'a EExpr<'a>, Row, Col),
+    ElseBranch(&'a EExpr<'a>, Row, Col),
+    Syntax(&'a SyntaxError<'a>, Row, Col),
+
+    IndentCondition(Row, Col),
+    IndentThen(Row, Col),
+    IndentElse(Row, Col),
+
+    PatternAlignment(u16, Row, Col),
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum EPattern<'a> {
     Record(PRecord<'a>, Row, Col),

From 3907680536dd608505395a3b82a3d842932cf3ad Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 15:21:19 +0100
Subject: [PATCH 16/33] parse multiple if-then-else pairs into one AST node

---
 compiler/parse/src/expr.rs                 | 82 ++++++++++++++--------
 compiler/parse/src/parser.rs               |  1 +
 compiler/reporting/tests/test_reporting.rs | 59 ++++++++--------
 3 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs
index 7fe80a1b02..84964e164b 100644
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@@ -3,7 +3,7 @@ use crate::ast::{
 };
 use crate::blankspace::{
     line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_e,
-    space0_before, space0_before_e, space0_e, space1, space1_around, space1_before, spaces_exactly,
+    space0_before, space0_before_e, space0_e, space1, space1_before, spaces_exactly,
 };
 use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
 use crate::keyword;
@@ -1240,33 +1240,59 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
 
         let mut branches = Vec::with_capacity_in(1, arena);
 
-        let (_, cond, state) = space0_around_e(
-            specialize_ref(
-                If::Syntax,
-                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-            ),
-            min_indent,
-            If::Space,
-            If::IndentCondition,
-        )
-        .parse(arena, state)?;
+        let mut loop_state = state;
 
-        let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then).parse(arena, state)?;
+        let state_final_else = loop {
+            let state = loop_state;
+            let (_, cond, state) = space0_around_e(
+                specialize_ref(
+                    If::Syntax,
+                    loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+                ),
+                min_indent,
+                If::Space,
+                If::IndentCondition,
+            )
+            .parse(arena, state)
+            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
 
-        let (_, then_branch, state) = space0_around_e(
-            specialize_ref(
-                If::Syntax,
-                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-            ),
-            min_indent,
-            If::Space,
-            If::IndentThen,
-        )
-        .parse(arena, state)?;
+            let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then)
+                .parse(arena, state)
+                .map_err(|(_, f, s)| (MadeProgress, f, s))?;
 
-        let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else).parse(arena, state)?;
+            let (_, then_branch, state) = space0_around_e(
+                specialize_ref(
+                    If::Syntax,
+                    loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+                ),
+                min_indent,
+                If::Space,
+                If::IndentThen,
+            )
+            .parse(arena, state)
+            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
 
-        branches.push((cond, then_branch));
+            let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else)
+                .parse(arena, state)
+                .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+
+            branches.push((cond, then_branch));
+
+            // try to parse another `if`
+            // NOTE this drops spaces between the `else` and the `if`
+            let optional_if = and!(
+                backtrackable(space0_e(min_indent, If::Space, If::IndentIf)),
+                parser::keyword_e(keyword::IF, If::If)
+            );
+
+            match optional_if.parse(arena, state) {
+                Err((_, _, state)) => break state,
+                Ok((_, _, state)) => {
+                    loop_state = state;
+                    continue;
+                }
+            }
+        };
 
         let (_, else_branch, state) = space0_before_e(
             specialize_ref(
@@ -1277,9 +1303,9 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
             If::Space,
             If::IndentElse,
         )
-        .parse(arena, state)?;
+        .parse(arena, state_final_else)
+        .map_err(|(_, f, s)| (MadeProgress, f, s))?;
 
-        // parse the final else
         let expr = Expr::If(branches.into_bump_slice(), arena.alloc(else_branch));
 
         Ok((MadeProgress, expr, state))
@@ -1287,10 +1313,10 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
 }
 
 pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
-    specialize(
+    debug!(specialize(
         |e, r, c| SyntaxError::Expr(EExpr::If(e, r, c)),
         if_expr_help(min_indent),
-    )
+    ))
 }
 
 /// This is a helper function for parsing function args.
diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs
index a9da276780..9f6eba16bd 100644
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@@ -422,6 +422,7 @@ pub enum If<'a> {
     Syntax(&'a SyntaxError<'a>, Row, Col),
 
     IndentCondition(Row, Col),
+    IndentIf(Row, Col),
     IndentThen(Row, Col),
     IndentElse(Row, Col),
 
diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs
index 435f092b1d..66be20f5ca 100644
--- a/compiler/reporting/tests/test_reporting.rs
+++ b/compiler/reporting/tests/test_reporting.rs
@@ -801,35 +801,36 @@ mod test_reporting {
         )
     }
 
-    // #[test]
-    // fn if_3_branch_mismatch() {
-    //     report_problem_as(
-    //         indoc!(
-    //             r#"
-    //             if True then 2 else if False then 2 else "foo"
-    //             "#
-    //         ),
-    //         indoc!(
-    //             r#"
-    //  ── TYPE MISMATCH ───────────────────────────────────────────────────────────────
-
-    //             The 2nd branch of this `if` does not match all the previous branches:
-
-    //             1│ if True then 2 else "foo"
-    //                                     ^^^^^
-
-    //             The 2nd branch is a string of type
-
-    //                 Str
-
-    //             But all the previous branches have the type
-
-    //                 Num a
-
-    //             "#
-    //         ),
-    //     )
-    // }
+    #[test]
+    fn if_3_branch_mismatch() {
+        report_problem_as(
+            indoc!(
+                r#"
+                 if True then 2 else if False then 2 else "foo"
+                 "#
+            ),
+            indoc!(
+                r#"
+                ── TYPE MISMATCH ───────────────────────────────────────────────────────────────
+                
+                The 3rd branch of this `if` does not match all the previous branches:
+                
+                1│  if True then 2 else if False then 2 else "foo"
+                                                             ^^^^^
+                
+                The 3rd branch is a string of type:
+                
+                    Str
+                
+                But all the previous branches have type:
+                
+                    Num a
+                
+                I need all branches in an `if` to have the same type!
+                "#
+            ),
+        )
+    }
 
     #[test]
     fn when_branch_mismatch() {

From 6eab8abe9e4b75e2fd5289ca64bbdc4546ede774 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 18:34:08 +0100
Subject: [PATCH 17/33] improve message for outdented then

---
 compiler/parse/src/blankspace.rs           |  12 +-
 compiler/parse/src/expr.rs                 |  88 +++++++------
 compiler/parse/src/parser.rs               |  11 +-
 compiler/parse/src/pattern.rs              |   5 +-
 compiler/parse/src/type_annotation.rs      |  10 +-
 compiler/reporting/src/error/parse.rs      | 141 ++++++++++++++++++++-
 compiler/reporting/tests/test_reporting.rs |  60 ++++++++-
 7 files changed, 270 insertions(+), 57 deletions(-)

diff --git a/compiler/parse/src/blankspace.rs b/compiler/parse/src/blankspace.rs
index 10b256b9b8..f2234ebed0 100644
--- a/compiler/parse/src/blankspace.rs
+++ b/compiler/parse/src/blankspace.rs
@@ -60,11 +60,12 @@ where
     )
 }
 
-pub fn space0_around_e<'a, P, S, E>(
+pub fn space0_around_ee<'a, P, S, E>(
     parser: P,
     min_indent: u16,
     space_problem: fn(BadInputError, Row, Col) -> E,
-    indent_problem: fn(Row, Col) -> E,
+    indent_before_problem: fn(Row, Col) -> E,
+    indent_after_problem: fn(Row, Col) -> E,
 ) -> impl Parser<'a, Located<S>, E>
 where
     S: Spaceable<'a>,
@@ -75,8 +76,11 @@ where
 {
     parser::map_with_arena(
         and(
-            space0_e(min_indent, space_problem, indent_problem),
-            and(parser, space0_e(min_indent, space_problem, indent_problem)),
+            space0_e(min_indent, space_problem, indent_before_problem),
+            and(
+                parser,
+                space0_e(min_indent, space_problem, indent_after_problem),
+            ),
         ),
         move |arena: &'a Bump,
               tuples: (
diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs
index 84964e164b..1b01a08edb 100644
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@@ -2,7 +2,7 @@ use crate::ast::{
     AssignedField, Attempting, CommentOrNewline, Def, Expr, Pattern, Spaceable, TypeAnnotation,
 };
 use crate::blankspace::{
-    line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_e,
+    line_comment, space0, space0_after, space0_after_e, space0_around, space0_around_ee,
     space0_before, space0_before_e, space0_e, space1, space1_before, spaces_exactly,
 };
 use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
@@ -1029,14 +1029,15 @@ mod when {
             and!(
                 when_with_indent(),
                 skip_second!(
-                    space0_around_e(
+                    space0_around_ee(
                         loc!(specialize_ref(
                             When::Syntax,
                             move |arena, state| parse_expr(min_indent, arena, state)
                         )),
                         min_indent,
                         When::Space,
-                        When::IndentCondition
+                        When::IndentCondition,
+                        When::IndentIs,
                     ),
                     parser::keyword_e(keyword::IS, When::Is)
                 )
@@ -1182,13 +1183,14 @@ mod when {
                     skip_first!(
                         parser::keyword_e(keyword::IF, When::IfToken),
                         // TODO we should require space before the expression but not after
-                        space0_around_e(
+                        space0_around_ee(
                             loc!(specialize_ref(When::IfGuard, move |arena, state| {
                                 parse_expr(min_indent, arena, state)
                             })),
                             min_indent,
                             When::Space,
                             When::IndentIfGuard,
+                            When::IndentArrow,
                         )
                     ),
                     Some
@@ -1234,6 +1236,49 @@ mod when {
     }
 }
 
+fn if_branch<'a>(
+    min_indent: u16,
+) -> impl Parser<'a, (Located<Expr<'a>>, Located<Expr<'a>>), If<'a>> {
+    move |arena, state| {
+        // NOTE: only parse spaces before the expression
+        let (_, cond, state) = space0_around_ee(
+            specialize_ref(
+                If::Syntax,
+                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+            ),
+            min_indent,
+            If::Space,
+            If::IndentCondition,
+            If::IndentThenToken,
+        )
+        .parse(arena, state)
+        .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+
+        let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then)
+            .parse(arena, state)
+            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+
+        let (_, then_branch, state) = space0_around_ee(
+            specialize_ref(
+                If::Syntax,
+                loc!(move |arena, state| parse_expr(min_indent, arena, state)),
+            ),
+            min_indent,
+            If::Space,
+            If::IndentThenBranch,
+            If::IndentElseToken,
+        )
+        .parse(arena, state)
+        .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+
+        let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else)
+            .parse(arena, state)
+            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+
+        Ok((MadeProgress, (cond, then_branch), state))
+    }
+}
+
 pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
     move |arena: &'a Bump, state| {
         let (_, _, state) = parser::keyword_e(keyword::IF, If::If).parse(arena, state)?;
@@ -1243,38 +1288,7 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
         let mut loop_state = state;
 
         let state_final_else = loop {
-            let state = loop_state;
-            let (_, cond, state) = space0_around_e(
-                specialize_ref(
-                    If::Syntax,
-                    loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-                ),
-                min_indent,
-                If::Space,
-                If::IndentCondition,
-            )
-            .parse(arena, state)
-            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
-
-            let (_, _, state) = parser::keyword_e(keyword::THEN, If::Then)
-                .parse(arena, state)
-                .map_err(|(_, f, s)| (MadeProgress, f, s))?;
-
-            let (_, then_branch, state) = space0_around_e(
-                specialize_ref(
-                    If::Syntax,
-                    loc!(move |arena, state| parse_expr(min_indent, arena, state)),
-                ),
-                min_indent,
-                If::Space,
-                If::IndentThen,
-            )
-            .parse(arena, state)
-            .map_err(|(_, f, s)| (MadeProgress, f, s))?;
-
-            let (_, _, state) = parser::keyword_e(keyword::ELSE, If::Else)
-                .parse(arena, state)
-                .map_err(|(_, f, s)| (MadeProgress, f, s))?;
+            let (_, (cond, then_branch), state) = if_branch(min_indent).parse(arena, loop_state)?;
 
             branches.push((cond, then_branch));
 
@@ -1301,7 +1315,7 @@ pub fn if_expr_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, If<'a>> {
             ),
             min_indent,
             If::Space,
-            If::IndentElse,
+            If::IndentElseBranch,
         )
         .parse(arena, state_final_else)
         .map_err(|(_, f, s)| (MadeProgress, f, s))?;
diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs
index 9f6eba16bd..b1d3d3545a 100644
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@@ -423,10 +423,10 @@ pub enum If<'a> {
 
     IndentCondition(Row, Col),
     IndentIf(Row, Col),
-    IndentThen(Row, Col),
-    IndentElse(Row, Col),
-
-    PatternAlignment(u16, Row, Col),
+    IndentThenToken(Row, Col),
+    IndentElseToken(Row, Col),
+    IndentThenBranch(Row, Col),
+    IndentElseBranch(Row, Col),
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -1452,10 +1452,11 @@ macro_rules! collection_trailing_sep_e {
                                     and!(
                                         $crate::parser::trailing_sep_by0(
                                             $delimiter,
-                                            $crate::blankspace::space0_around_e(
+                                            $crate::blankspace::space0_around_ee(
                                                 $elem,
                                                 $min_indent,
                                                 $space_problem,
+                                                $indent_problem,
                                                 $indent_problem
                                             )
                                         ),
diff --git a/compiler/parse/src/pattern.rs b/compiler/parse/src/pattern.rs
index b9b3f576e6..02daa8c5a9 100644
--- a/compiler/parse/src/pattern.rs
+++ b/compiler/parse/src/pattern.rs
@@ -1,5 +1,5 @@
 use crate::ast::Pattern;
-use crate::blankspace::{space0_around_e, space0_before_e, space0_e};
+use crate::blankspace::{space0_around_ee, space0_before_e, space0_e};
 use crate::ident::{ident, lowercase_ident, Ident};
 use crate::number_literal::number_literal;
 use crate::parser::Progress::{self, *};
@@ -133,11 +133,12 @@ fn loc_pattern_in_parens_help<'a>(
 ) -> impl Parser<'a, Located<Pattern<'a>>, PInParens<'a>> {
     between!(
         word1(b'(', PInParens::Open),
-        space0_around_e(
+        space0_around_ee(
             move |arena, state| specialize_ref(PInParens::Syntax, loc_pattern(min_indent))
                 .parse(arena, state),
             min_indent,
             PInParens::Space,
+            PInParens::IndentOpen,
             PInParens::IndentEnd,
         ),
         word1(b')', PInParens::End)
diff --git a/compiler/parse/src/type_annotation.rs b/compiler/parse/src/type_annotation.rs
index 974ec7f94b..88181f0908 100644
--- a/compiler/parse/src/type_annotation.rs
+++ b/compiler/parse/src/type_annotation.rs
@@ -1,5 +1,5 @@
 use crate::ast::{AssignedField, Tag, TypeAnnotation};
-use crate::blankspace::{space0_around_e, space0_before_e, space0_e};
+use crate::blankspace::{space0_around_ee, space0_before_e, space0_e};
 use crate::ident::join_module_parts;
 use crate::keyword;
 use crate::parser::{
@@ -146,11 +146,12 @@ fn loc_type_in_parens<'a>(
 ) -> impl Parser<'a, Located<TypeAnnotation<'a>>, TInParens<'a>> {
     between!(
         word1(b'(', TInParens::Open),
-        space0_around_e(
+        space0_around_ee(
             move |arena, state| specialize_ref(TInParens::Type, expression(min_indent))
                 .parse(arena, state),
             min_indent,
             TInParens::Space,
+            TInParens::IndentOpen,
             TInParens::IndentEnd,
         ),
         word1(b')', TInParens::End)
@@ -436,11 +437,12 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
         let (p2, rest, state) = zero_or_more!(skip_first!(
             word1(b',', Type::TFunctionArgument),
             one_of![
-                space0_around_e(
+                space0_around_ee(
                     term(min_indent),
                     min_indent,
                     Type::TSpace,
-                    Type::TIndentStart
+                    Type::TIndentStart,
+                    Type::TIndentEnd
                 ),
                 |_, state: State<'a>| Err((
                     NoProgress,
diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs
index 3431c87379..33f09653c1 100644
--- a/compiler/reporting/src/error/parse.rs
+++ b/compiler/reporting/src/error/parse.rs
@@ -158,7 +158,9 @@ enum Context {
 enum Node {
     WhenCondition,
     WhenBranch,
-    // WhenIfGuard,
+    IfCondition,
+    IfThenBranch,
+    IfElseBranch,
 }
 
 fn to_expr_report<'a>(
@@ -173,10 +175,130 @@ fn to_expr_report<'a>(
 
     match parse_problem {
         EExpr::When(when, row, col) => to_when_report(alloc, filename, context, &when, *row, *col),
+        EExpr::If(when, row, col) => to_if_report(alloc, filename, context, &when, *row, *col),
         _ => todo!("unhandled parse error: {:?}", parse_problem),
     }
 }
 
+fn to_if_report<'a>(
+    alloc: &'a RocDocAllocator<'a>,
+    filename: PathBuf,
+    context: Context,
+    parse_problem: &roc_parse::parser::If<'a>,
+    start_row: Row,
+    start_col: Col,
+) -> Report<'a> {
+    use roc_parse::parser::If;
+
+    match *parse_problem {
+        If::Syntax(syntax, row, col) => to_syntax_report(alloc, filename, syntax, row, col),
+        If::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col),
+
+        If::Condition(expr, row, col) => to_expr_report(
+            alloc,
+            filename,
+            Context::InNode(Node::IfCondition, start_row, start_col, Box::new(context)),
+            expr,
+            row,
+            col,
+        ),
+
+        If::ThenBranch(expr, row, col) => to_expr_report(
+            alloc,
+            filename,
+            Context::InNode(Node::IfThenBranch, start_row, start_col, Box::new(context)),
+            expr,
+            row,
+            col,
+        ),
+
+        If::ElseBranch(expr, row, col) => to_expr_report(
+            alloc,
+            filename,
+            Context::InNode(Node::IfElseBranch, start_row, start_col, Box::new(context)),
+            expr,
+            row,
+            col,
+        ),
+
+        If::If(_row, _col) => unreachable!("another branch would be taken"),
+        If::IndentIf(_row, _col) => unreachable!("another branch would be taken"),
+
+        If::Then(row, col) | If::IndentThenBranch(row, col) | If::IndentThenToken(row, col) => {
+            to_unfinished_if_report(
+                alloc,
+                filename,
+                row,
+                col,
+                start_row,
+                start_col,
+                alloc.concat(vec![
+                    alloc.reflow(r"I was expecting to see the "),
+                    alloc.keyword("then"),
+                    alloc.reflow(r" keyword next."),
+                ]),
+            )
+        }
+
+        If::Else(row, col) | If::IndentElseBranch(row, col) | If::IndentElseToken(row, col) => {
+            to_unfinished_if_report(
+                alloc,
+                filename,
+                row,
+                col,
+                start_row,
+                start_col,
+                alloc.concat(vec![
+                    alloc.reflow(r"I was expecting to see the "),
+                    alloc.keyword("else"),
+                    alloc.reflow(r" keyword next."),
+                ]),
+            )
+        }
+
+        If::IndentCondition(row, col) => to_unfinished_if_report(
+            alloc,
+            filename,
+            row,
+            col,
+            start_row,
+            start_col,
+            alloc.concat(vec![
+                alloc.reflow(r"I was expecting to see a expression next")
+            ]),
+        ),
+    }
+}
+
+fn to_unfinished_if_report<'a>(
+    alloc: &'a RocDocAllocator<'a>,
+    filename: PathBuf,
+    row: Row,
+    col: Col,
+    start_row: Row,
+    start_col: Col,
+    message: RocDocBuilder<'a>,
+) -> Report<'a> {
+    let surroundings = Region::from_rows_cols(start_row, start_col, row, col);
+    let region = Region::from_row_col(row, col);
+
+    let doc = alloc.stack(vec![
+        alloc.concat(vec![
+            alloc.reflow(r"I was partway through parsing an "),
+            alloc.keyword("if"),
+            alloc.reflow(r" expression, but I got stuck here:"),
+        ]),
+        alloc.region_with_subregion(surroundings, region),
+        message,
+    ]);
+
+    Report {
+        filename,
+        doc,
+        title: "UNFINISHED IF".to_string(),
+    }
+}
+
 fn to_when_report<'a>(
     alloc: &'a RocDocAllocator<'a>,
     filename: PathBuf,
@@ -792,6 +914,23 @@ fn to_type_report<'a>(
             }
         }
 
+        Type::TIndentEnd(row, col) => {
+            let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col);
+            let region = Region::from_row_col(*row, *col);
+
+            let doc = alloc.stack(vec![
+                alloc.reflow(r"I am partway through parsing a type, but I got stuck here:"),
+                alloc.region_with_subregion(surroundings, region),
+                alloc.note("I may be confused by indentation"),
+            ]);
+
+            Report {
+                filename,
+                doc,
+                title: "UNFINISHED TYPE".to_string(),
+            }
+        }
+
         Type::TAsIndentStart(row, col) => {
             let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col);
             let region = Region::from_row_col(*row, *col);
diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs
index 66be20f5ca..aa54cccaae 100644
--- a/compiler/reporting/tests/test_reporting.rs
+++ b/compiler/reporting/tests/test_reporting.rs
@@ -4636,12 +4636,12 @@ mod test_reporting {
             indoc!(
                 r#"
                 ── UNFINISHED TYPE ─────────────────────────────────────────────────────────────
-
-                I just started parsing a type, but I got stuck here:
-
+                
+                I am partway through parsing a type, but I got stuck here:
+                
                 1│  f : I64, I64
                                 ^
-
+                
                 Note: I may be confused by indentation
             "#
             ),
@@ -4950,4 +4950,56 @@ mod test_reporting {
             ),
         )
     }
+
+    #[test]
+    fn if_outdented_then() {
+        // TODO I think we can do better here
+        report_problem_as(
+            indoc!(
+                r#"
+                x =
+                    if 5 == 5 
+                then 2 else 3
+
+                x
+                "#
+            ),
+            indoc!(
+                r#"
+                ── UNFINISHED IF ───────────────────────────────────────────────────────────────
+                
+                I was partway through parsing an `if` expression, but I got stuck here:
+                
+                2│      if 5 == 5 
+                                 ^
+                
+                I was expecting to see the `then` keyword next.
+            "#
+            ),
+        )
+    }
+
+    #[test]
+    fn if_missing_else() {
+        // this should get better with time
+        report_problem_as(
+            indoc!(
+                r#"
+                if 5 == 5 then 2
+                "#
+            ),
+            indoc!(
+                r#"
+                ── UNFINISHED IF ───────────────────────────────────────────────────────────────
+                
+                I was partway through parsing an `if` expression, but I got stuck here:
+                
+                1│  if 5 == 5 then 2
+                                    ^
+                
+                I was expecting to see the `else` keyword next.
+            "#
+            ),
+        )
+    }
 }

From f3234e002ab8ee6c74be55c148dda5aca711ef28 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 20:05:58 +0100
Subject: [PATCH 18/33] change list over

---
 compiler/parse/src/expr.rs   | 65 +++++++++++++++++++-----------------
 compiler/parse/src/parser.rs | 14 ++++++++
 2 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs
index 1b01a08edb..aebe5044b7 100644
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@@ -11,8 +11,8 @@ use crate::number_literal::number_literal;
 use crate::parser::{
     self, allocated, and_then_with_indent_level, ascii_char, ascii_string, attempt, backtrackable,
     fail, map, newline_char, not, not_followed_by, optional, sep_by1, specialize, specialize_ref,
-    then, unexpected, unexpected_eof, word1, word2, EExpr, Either, If, ParseResult, Parser, State,
-    SyntaxError, When,
+    then, unexpected, unexpected_eof, word1, word2, BadInputError, EExpr, Either, If, List,
+    ParseResult, Parser, State, SyntaxError, When,
 };
 use crate::pattern::loc_closure_param;
 use crate::type_annotation;
@@ -1693,37 +1693,42 @@ fn binop<'a>() -> impl Parser<'a, BinOp, SyntaxError<'a>> {
         map!(ascii_char(b'%'), |_| BinOp::Percent)
     )
 }
-
-pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
-    let elems = collection_trailing_sep!(
-        ascii_char(b'['),
-        loc!(expr(min_indent)),
-        ascii_char(b','),
-        ascii_char(b']'),
-        min_indent
-    );
-
-    parser::attempt(
-        Attempting::List,
-        map_with_arena!(elems, |arena,
-                                (parsed_elems, final_comments): (
-            Vec<'a, Located<Expr<'a>>>,
-            &'a [CommentOrNewline<'a>]
-        )| {
-            let mut allocated = Vec::with_capacity_in(parsed_elems.len(), arena);
-
-            for parsed_elem in parsed_elems {
-                allocated.push(&*arena.alloc(parsed_elem));
-            }
-
-            Expr::List {
-                items: allocated.into_bump_slice(),
-                final_comments,
-            }
-        }),
+fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
+    specialize(
+        |e, r, c| SyntaxError::Expr(EExpr::List(e, r, c)),
+        list_literal_help(min_indent),
     )
 }
 
+fn list_literal_help<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, List<'a>> {
+    move |arena, state| {
+        let (_, (parsed_elems, final_comments), state) = collection_trailing_sep_e!(
+            word1(b'[', List::Open),
+            specialize_ref(List::Syntax, loc!(expr(min_indent))),
+            word1(b',', List::End),
+            word1(b']', List::End),
+            min_indent,
+            List::Open,
+            List::Space,
+            List::IndentEnd
+        )
+        .parse(arena, state)?;
+
+        let mut allocated = Vec::with_capacity_in(parsed_elems.len(), arena);
+
+        for parsed_elem in parsed_elems {
+            allocated.push(&*arena.alloc(parsed_elem));
+        }
+
+        let expr = Expr::List {
+            items: allocated.into_bump_slice(),
+            final_comments,
+        };
+
+        Ok((MadeProgress, expr, state))
+    }
+}
+
 // Parser<'a, Vec<'a, Located<AssignedField<'a, S>>>>
 fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
     then(
diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs
index b1d3d3545a..1b2ea71cce 100644
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@@ -380,11 +380,25 @@ pub enum EExpr<'a> {
     When(When<'a>, Row, Col),
     If(If<'a>, Row, Col),
 
+    List(List<'a>, Row, Col),
+
     // EInParens(PInParens<'a>, Row, Col),
     IndentStart(Row, Col),
     IndentEnd(Row, Col),
 }
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum List<'a> {
+    Open(Row, Col),
+    End(Row, Col),
+    Space(BadInputError, Row, Col),
+
+    Syntax(&'a SyntaxError<'a>, Row, Col),
+
+    IndentStart(Row, Col),
+    IndentEnd(Row, Col),
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum When<'a> {
     Space(BadInputError, Row, Col),

From 80b64b42ff348ec22d9aec5950a65a76dce5e4aa Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Tue, 23 Feb 2021 23:57:17 +0100
Subject: [PATCH 19/33] tests and list error messages

---
 compiler/parse/src/parser.rs               |   3 +-
 compiler/reporting/src/error/parse.rs      | 114 ++++++++++++++++++++-
 compiler/reporting/tests/test_reporting.rs |  82 +++++++++++++++
 3 files changed, 197 insertions(+), 2 deletions(-)

diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs
index 1b2ea71cce..ab980a5b38 100644
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@@ -394,8 +394,9 @@ pub enum List<'a> {
     Space(BadInputError, Row, Col),
 
     Syntax(&'a SyntaxError<'a>, Row, Col),
+    Expr(&'a EExpr<'a>, Row, Col),
 
-    IndentStart(Row, Col),
+    IndentOpen(Row, Col),
     IndentEnd(Row, Col),
 }
 
diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs
index 33f09653c1..37c155e2cd 100644
--- a/compiler/reporting/src/error/parse.rs
+++ b/compiler/reporting/src/error/parse.rs
@@ -161,6 +161,7 @@ enum Node {
     IfCondition,
     IfThenBranch,
     IfElseBranch,
+    ListElement,
 }
 
 fn to_expr_report<'a>(
@@ -175,11 +176,121 @@ fn to_expr_report<'a>(
 
     match parse_problem {
         EExpr::When(when, row, col) => to_when_report(alloc, filename, context, &when, *row, *col),
-        EExpr::If(when, row, col) => to_if_report(alloc, filename, context, &when, *row, *col),
+        EExpr::If(if_, row, col) => to_if_report(alloc, filename, context, &if_, *row, *col),
+        EExpr::List(list, row, col) => to_list_report(alloc, filename, context, &list, *row, *col),
         _ => todo!("unhandled parse error: {:?}", parse_problem),
     }
 }
 
+fn to_list_report<'a>(
+    alloc: &'a RocDocAllocator<'a>,
+    filename: PathBuf,
+    context: Context,
+    parse_problem: &roc_parse::parser::List<'a>,
+    start_row: Row,
+    start_col: Col,
+) -> Report<'a> {
+    use roc_parse::parser::List;
+
+    match *parse_problem {
+        List::Syntax(syntax, row, col) => to_syntax_report(alloc, filename, syntax, row, col),
+        List::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col),
+
+        List::Expr(expr, row, col) => to_expr_report(
+            alloc,
+            filename,
+            Context::InNode(Node::ListElement, start_row, start_col, Box::new(context)),
+            expr,
+            row,
+            col,
+        ),
+
+        List::Open(row, col) | List::End(row, col) => {
+            match dbg!(what_is_next(alloc.src_lines, row, col)) {
+                Next::Other(Some(',')) => {
+                    let surroundings = Region::from_rows_cols(start_row, start_col, row, col);
+                    let region = Region::from_row_col(row, col);
+
+                    let doc = alloc.stack(vec![
+                        alloc.reflow(
+                            r"I am partway through started parsing a list, but I got stuck here:",
+                        ),
+                        alloc.region_with_subregion(surroundings, region),
+                        alloc.concat(vec![
+                            alloc
+                                .reflow(r"I was expecting to see a list entry before this comma, "),
+                            alloc.reflow(r"so try adding a list entry"),
+                            alloc.reflow(r" and see if that helps?"),
+                        ]),
+                    ]);
+                    Report {
+                        filename,
+                        doc,
+                        title: "UNFINISHED LIST".to_string(),
+                    }
+                }
+                _ => {
+                    let surroundings = Region::from_rows_cols(start_row, start_col, row, col);
+                    let region = Region::from_row_col(row, col);
+
+                    let doc = alloc.stack(vec![
+                        alloc.reflow(
+                            r"I am partway through started parsing a list, but I got stuck here:",
+                        ),
+                        alloc.region_with_subregion(surroundings, region),
+                        alloc.concat(vec![
+                            alloc.reflow(
+                                r"I was expecting to see a closing square bracket before this, ",
+                            ),
+                            alloc.reflow(r"so try adding a "),
+                            alloc.parser_suggestion("]"),
+                            alloc.reflow(r" and see if that helps?"),
+                        ]),
+                        alloc.concat(vec![
+                            alloc.note("When "),
+                            alloc.reflow(r"I get stuck like this, "),
+                            alloc.reflow(r"it usually means that there is a missing parenthesis "),
+                            alloc.reflow(r"or bracket somewhere earlier. "),
+                            alloc.reflow(r"It could also be a stray keyword or operator."),
+                        ]),
+                    ]);
+
+                    Report {
+                        filename,
+                        doc,
+                        title: "UNFINISHED LIST".to_string(),
+                    }
+                }
+            }
+        }
+
+        List::IndentOpen(row, col) | List::IndentEnd(row, col) => {
+            let surroundings = Region::from_rows_cols(start_row, start_col, row, col);
+            let region = Region::from_row_col(row, col);
+
+            let doc = alloc.stack(vec![
+                alloc.reflow(r"I cannot find the end of this list:"),
+                alloc.region_with_subregion(surroundings, region),
+                alloc.concat(vec![
+                    alloc.reflow(r"You could change it to something like "),
+                    alloc.parser_suggestion("[ 1, 2, 3 ]"),
+                    alloc.reflow(" or even just "),
+                    alloc.parser_suggestion("[]"),
+                    alloc.reflow(". Anything where there is an open and a close square bracket, "),
+                    alloc.reflow("and where the elements of the list are separated by commas."),
+                ]),
+                note_for_tag_union_type_indent(alloc),
+            ]);
+
+            Report {
+                filename,
+                doc,
+                title: "UNFINISHED LIST".to_string(),
+            }
+        }
+    }
+}
+
 fn to_if_report<'a>(
     alloc: &'a RocDocAllocator<'a>,
     filename: PathBuf,
@@ -1745,6 +1856,7 @@ fn to_space_report<'a>(
     }
 }
 
+#[derive(Debug)]
 enum Next<'a> {
     Keyword(&'a str),
     // Operator(&'a str),
diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs
index aa54cccaae..7ab6eefa14 100644
--- a/compiler/reporting/tests/test_reporting.rs
+++ b/compiler/reporting/tests/test_reporting.rs
@@ -5002,4 +5002,86 @@ mod test_reporting {
             ),
         )
     }
+
+    #[test]
+    fn list_double_comma() {
+        report_problem_as(
+            indoc!(
+                r#"
+                [ 1, 2, , 3 ]
+                "#
+            ),
+            indoc!(
+                r#"
+                ── UNFINISHED LIST ─────────────────────────────────────────────────────────────
+                
+                I am partway through started parsing a list, but I got stuck here:
+                
+                1│  [ 1, 2, , 3 ]
+                            ^
+                
+                I was expecting to see a list entry before this comma, so try adding a
+                list entry and see if that helps?
+            "#
+            ),
+        )
+    }
+
+    #[test]
+    fn list_without_end() {
+        report_problem_as(
+            indoc!(
+                r#"
+                [ 1, 2, 
+                "#
+            ),
+            indoc!(
+                r#"
+                ── UNFINISHED LIST ─────────────────────────────────────────────────────────────
+                
+                I am partway through started parsing a list, but I got stuck here:
+                
+                1│  [ 1, 2, 
+                           ^
+                
+                I was expecting to see a closing square bracket before this, so try
+                adding a ] and see if that helps?
+                
+                Note: When I get stuck like this, it usually means that there is a
+                missing parenthesis or bracket somewhere earlier. It could also be a
+                stray keyword or operator.
+            "#
+            ),
+        )
+    }
+
+    #[test]
+    fn list_bad_indent() {
+        report_problem_as(
+            indoc!(
+                r#"
+                x = [ 1, 2, 
+                ]
+
+                x
+                "#
+            ),
+            indoc!(
+                r#"
+                ── UNFINISHED LIST ─────────────────────────────────────────────────────────────
+                
+                I cannot find the end of this list:
+                
+                1│  x = [ 1, 2, 
+                               ^
+                
+                You could change it to something like [ 1, 2, 3 ] or even just [].
+                Anything where there is an open and a close square bracket, and where
+                the elements of the list are separated by commas.
+                
+                Note: I may be confused by indentation
+            "#
+            ),
+        )
+    }
 }

From 1c98bca071b71bc7db4985ccd1974f526dbb3ee3 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 00:56:27 +0100
Subject: [PATCH 20/33] astar test  does not use stdin

---
 cli/tests/cli_run.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index 6ad19aed58..49388b27b0 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -230,9 +230,8 @@ mod cli_run {
     #[test]
     #[serial(astar)]
     fn run_astar_optimized_1() {
-        check_output_with_stdin(
+        check_output(
             &example_file("benchmarks", "AStarTests.roc"),
-            "1",
             "astar-tests",
             &[],
             "True\n",

From c24d51e69d39326343827d5b94f826fc6a2d8868 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 15:14:52 +0100
Subject: [PATCH 21/33] remove old function

---
 compiler/mono/src/ir.rs | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 79628b27ac..9d4d41efd3 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -302,39 +302,6 @@ pub enum InProgressProc<'a> {
 }
 
 impl<'a> Procs<'a> {
-    /// Absorb the contents of another Procs into this one.
-    pub fn absorb(&mut self, mut other: Procs<'a>) {
-        debug_assert!(self.pending_specializations.is_some());
-        debug_assert!(other.pending_specializations.is_some());
-
-        match self.pending_specializations {
-            Some(ref mut pending_specializations) => {
-                for (k, v) in other.pending_specializations.unwrap().drain() {
-                    pending_specializations.insert(k, v);
-                }
-            }
-            None => {
-                unreachable!();
-            }
-        }
-
-        for (k, v) in other.partial_procs.drain() {
-            self.partial_procs.insert(k, v);
-        }
-
-        for (k, v) in other.specialized.drain() {
-            self.specialized.insert(k, v);
-        }
-
-        for (k, v) in other.runtime_errors.drain() {
-            self.runtime_errors.insert(k, v);
-        }
-
-        for symbol in other.module_thunks.drain() {
-            self.module_thunks.insert(symbol);
-        }
-    }
-
     pub fn get_specialized_procs_without_rc(
         self,
         arena: &'a Bump,

From 092db87474a1ed13457d3aa647e07a66934b141e Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:05:44 +0100
Subject: [PATCH 22/33] add import dependencies to module cache

---
 compiler/load/src/file.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index 44fe5bfb0b..058b6f6821 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -358,6 +358,7 @@ struct ModuleCache<'a> {
     external_specializations_requested: MutMap<ModuleId, ExternalSpecializations>,
 
     /// Various information
+    imports: MutMap<ModuleId, MutSet<ModuleId>>,
     documentation: MutMap<ModuleId, ModuleDocumentation>,
     can_problems: MutMap<ModuleId, Vec<roc_problem::can::Problem>>,
     type_problems: MutMap<ModuleId, Vec<solve::TypeError>>,
@@ -1641,6 +1642,18 @@ fn update<'a>(
                 .exposed_symbols_by_module
                 .insert(home, exposed_symbols);
 
+            state
+                .module_cache
+                .imports
+                .entry(header.module_id)
+                .or_default()
+                .extend(
+                    header
+                        .package_qualified_imported_modules
+                        .iter()
+                        .map(|x| *x.as_inner()),
+                );
+
             work.extend(state.dependencies.add_module(
                 header.module_id,
                 &header.package_qualified_imported_modules,

From 64955f23ff0b29bb1fa7ec9393a09d49f516597c Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:07:39 +0100
Subject: [PATCH 23/33] store module thunks

---
 compiler/load/src/file.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index 058b6f6821..cabeef9470 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -359,6 +359,7 @@ struct ModuleCache<'a> {
 
     /// Various information
     imports: MutMap<ModuleId, MutSet<ModuleId>>,
+    top_level_thunks: MutMap<ModuleId, MutSet<Symbol>>,
     documentation: MutMap<ModuleId, ModuleDocumentation>,
     can_problems: MutMap<ModuleId, Vec<roc_problem::can::Problem>>,
     type_problems: MutMap<ModuleId, Vec<solve::TypeError>>,
@@ -1917,6 +1918,13 @@ fn update<'a>(
                 }
             }
 
+            state
+                .module_cache
+                .top_level_thunks
+                .entry(module_id)
+                .or_default()
+                .extend(procs.module_thunks.iter().copied());
+
             let found_specializations_module = FoundSpecializationsModule {
                 layout_cache,
                 module_id,

From a361148380d64a54b5e42f64c88b484f2d718e3a Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:09:47 +0100
Subject: [PATCH 24/33] add imported_module_thunks

---
 compiler/mono/src/ir.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 9d4d41efd3..aaf86934ab 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -273,6 +273,7 @@ impl ExternalSpecializations {
 #[derive(Clone, Debug)]
 pub struct Procs<'a> {
     pub partial_procs: MutMap<Symbol, PartialProc<'a>>,
+    pub imported_module_thunks: MutSet<Symbol>,
     pub module_thunks: MutSet<Symbol>,
     pub pending_specializations: Option<MutMap<Symbol, MutMap<Layout<'a>, PendingSpecialization>>>,
     pub specialized: MutMap<(Symbol, Layout<'a>), InProgressProc<'a>>,
@@ -285,6 +286,7 @@ impl<'a> Default for Procs<'a> {
     fn default() -> Self {
         Self {
             partial_procs: MutMap::default(),
+            imported_module_thunks: MutSet::default(),
             module_thunks: MutSet::default(),
             pending_specializations: Some(MutMap::default()),
             specialized: MutMap::default(),

From 6bd10ddc050a8ef2d3568401c2b492beeea4b5ca Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:23:35 +0100
Subject: [PATCH 25/33] use imported module thunks for pointer calling

---
 compiler/load/src/file.rs | 20 ++++++++++++++++++++
 compiler/mono/src/ir.rs   |  5 ++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs
index cabeef9470..283be55b16 100644
--- a/compiler/load/src/file.rs
+++ b/compiler/load/src/file.rs
@@ -546,11 +546,24 @@ fn start_phase<'a>(module_id: ModuleId, phase: Phase, state: &mut State<'a>) ->
                     ident_ids,
                 } = typechecked;
 
+                let mut imported_module_thunks = MutSet::default();
+
+                if let Some(imports) = state.module_cache.imports.get(&module_id) {
+                    for imported in imports.iter() {
+                        imported_module_thunks.extend(
+                            state.module_cache.top_level_thunks[imported]
+                                .iter()
+                                .copied(),
+                        );
+                    }
+                }
+
                 BuildTask::BuildPendingSpecializations {
                     layout_cache,
                     module_id,
                     module_timing,
                     solved_subs,
+                    imported_module_thunks,
                     decls,
                     ident_ids,
                     exposed_to_host: state.exposed_to_host.clone(),
@@ -950,6 +963,7 @@ enum BuildTask<'a> {
         module_timing: ModuleTiming,
         layout_cache: LayoutCache<'a>,
         solved_subs: Solved<Subs>,
+        imported_module_thunks: MutSet<Symbol>,
         module_id: ModuleId,
         ident_ids: IdentIds,
         decls: Vec<Declaration>,
@@ -3666,6 +3680,7 @@ fn make_specializations<'a>(
 fn build_pending_specializations<'a>(
     arena: &'a Bump,
     solved_subs: Solved<Subs>,
+    imported_module_thunks: MutSet<Symbol>,
     home: ModuleId,
     mut ident_ids: IdentIds,
     decls: Vec<Declaration>,
@@ -3678,6 +3693,9 @@ fn build_pending_specializations<'a>(
     let find_specializations_start = SystemTime::now();
     let mut procs = Procs::default();
 
+    debug_assert!(procs.imported_module_thunks.is_empty());
+    procs.imported_module_thunks = imported_module_thunks;
+
     let mut mono_problems = std::vec::Vec::new();
     let mut subs = solved_subs.into_inner();
     let mut mono_env = roc_mono::ir::Env {
@@ -3959,10 +3977,12 @@ where
             module_timing,
             layout_cache,
             solved_subs,
+            imported_module_thunks,
             exposed_to_host,
         } => Ok(build_pending_specializations(
             arena,
             solved_subs,
+            imported_module_thunks,
             module_id,
             ident_ids,
             decls,
diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index aaf86934ab..991249ee8e 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5725,8 +5725,11 @@ fn call_by_pointer<'a>(
         // cause issues. The caller (which is here) doesn't know whether the called is a closure
         // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
         // more in the future
+        let is_thunk =
+            procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol);
+
         match layout {
-            Layout::FunctionPointer(arg_layouts, ret_layout) if false => {
+            Layout::FunctionPointer(arg_layouts, ret_layout) if !is_thunk => {
                 if arg_layouts.iter().any(|l| l.contains_refcounted()) {
                     let name = env.unique_symbol();
                     let mut args = Vec::with_capacity_in(arg_layouts.len(), env.arena);

From 1c1c53ba950f3a8ecc9dfd1fc44bc4826369f717 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:38:10 +0100
Subject: [PATCH 26/33] flip map argument order

---
 examples/benchmarks/Base64.roc       | 58 ++++++++++++++--------------
 examples/benchmarks/Bytes/Decode.roc |  8 ++--
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 1fed392288..1f2ecad216 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -31,47 +31,47 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:
 loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
-        helper = \x, y, z ->
-            a : U32
-            a = Num.intCast x
-            b : U32
-            b = Num.intCast y
-            c : U32
-            c = Num.intCast z
-            combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
-            Loop
-                {
-                    remaining: remaining - 3,
-                    string: Str.concat string (bitsToChars combined 0)
-                }
-
-        Bytes.Decode.map3 helper
+        Bytes.Decode.map3 
             Bytes.Decode.u8
             Bytes.Decode.u8
             Bytes.Decode.u8
+            \x, y, z ->
+                a : U32
+                a = Num.intCast x
+                b : U32
+                b = Num.intCast y
+                c : U32
+                c = Num.intCast z
+                combined = Num.bitwiseOr (Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)) c
+                Loop
+                    {
+                        remaining: remaining - 3,
+                        string: Str.concat string (bitsToChars combined 0)
+                    }
 
     else if remaining == 0 then
         Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
-        helperX = \x, y ->
-            a : U32
-            a = Num.intCast x
-            b : U32
-            b = Num.intCast y
-            combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
-            Done (Str.concat string (bitsToChars combined 1))
+        Bytes.Decode.map2 
+            Bytes.Decode.u8
+            Bytes.Decode.u8
+            \x, y ->
+                a : U32
+                a = Num.intCast x
+                b : U32
+                b = Num.intCast y
+                combined = Num.bitwiseOr (Num.shiftLeftBy 16 a) (Num.shiftLeftBy 8 b)
+                Done (Str.concat string (bitsToChars combined 1))
 
-        Bytes.Decode.map2 helperX
-            Bytes.Decode.u8
-            Bytes.Decode.u8
     else
         # remaining = 1
+        Bytes.Decode.map 
             Bytes.Decode.u8
-                |> Bytes.Decode.map (\x -> 
-                    a : U32
-                    a = Num.intCast x
-                    Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2)))
+            \x -> 
+                a : U32
+                a = Num.intCast x
+                Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))
 
 
 bitsToChars : U32, Int * -> Str
diff --git a/examples/benchmarks/Bytes/Decode.roc b/examples/benchmarks/Bytes/Decode.roc
index db14d857a6..f1da59e389 100644
--- a/examples/benchmarks/Bytes/Decode.roc
+++ b/examples/benchmarks/Bytes/Decode.roc
@@ -30,8 +30,8 @@ map = \@Decoder decoder, transform ->
                 Bad e
 
 
-map2 : (a,b -> c), Decoder a, Decoder b -> Decoder c
-map2 = \transform, @Decoder decoder1, @Decoder decoder2 -> 
+map2 : Decoder a, Decoder b, (a, b -> c) -> Decoder c
+map2 = \@Decoder decoder1, @Decoder decoder2, transform -> 
     @Decoder \state1 -> 
         when decoder1 state1 is
             Good state2 a ->
@@ -45,8 +45,8 @@ map2 = \transform, @Decoder decoder1, @Decoder decoder2 ->
             Bad e ->
                 Bad e
 
-map3 : (a, b, c -> d), Decoder a, Decoder b, Decoder c -> Decoder d
-map3 = \transform, @Decoder decoder1, @Decoder decoder2, @Decoder decoder3 -> 
+map3 : Decoder a, Decoder b, Decoder c, (a, b, c -> d) -> Decoder d
+map3 = \@Decoder decoder1, @Decoder decoder2, @Decoder decoder3, transform -> 
     @Decoder \state1 -> 
         when decoder1 state1 is
             Good state2 a ->

From aff8266f0f6526bad8836628b528274f39bf6c55 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:40:46 +0100
Subject: [PATCH 27/33] move astar test

---
 cli/tests/cli_run.rs                                  | 4 ++--
 examples/benchmarks/{AStarTests.roc => TestAStar.roc} | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename examples/benchmarks/{AStarTests.roc => TestAStar.roc} (98%)

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index 6ad19aed58..2f81acaec2 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -231,9 +231,9 @@ mod cli_run {
     #[serial(astar)]
     fn run_astar_optimized_1() {
         check_output_with_stdin(
-            &example_file("benchmarks", "AStarTests.roc"),
+            &example_file("benchmarks", "TestAStar.roc"),
             "1",
-            "astar-tests",
+            "test-astar",
             &[],
             "True\n",
             false,
diff --git a/examples/benchmarks/AStarTests.roc b/examples/benchmarks/TestAStar.roc
similarity index 98%
rename from examples/benchmarks/AStarTests.roc
rename to examples/benchmarks/TestAStar.roc
index 1cb5909c8f..0401566b6a 100644
--- a/examples/benchmarks/AStarTests.roc
+++ b/examples/benchmarks/TestAStar.roc
@@ -1,4 +1,4 @@
-app "astar-tests"
+app "test-astar"
     packages { base: "platform" }
     imports [base.Task, AStar]
     provides [ main ] to base

From 17a44aab024877acf1d2822c155da97f96257baf Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 16:40:56 +0100
Subject: [PATCH 28/33] fix whitespace

---
 examples/benchmarks/Base64.roc | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index 1f2ecad216..c2be2d31fa 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -31,7 +31,7 @@ decodeBase64 = \width -> Bytes.Decode.loop loopHelp { remaining: width, string:
 loopHelp : { remaining : Nat, string : Str } -> Decoder (Bytes.Decode.Step { remaining : Nat, string : Str } Str)
 loopHelp = \{ remaining, string } ->
     if remaining >= 3 then
-        Bytes.Decode.map3 
+        Bytes.Decode.map3
             Bytes.Decode.u8
             Bytes.Decode.u8
             Bytes.Decode.u8
@@ -53,7 +53,7 @@ loopHelp = \{ remaining, string } ->
         Bytes.Decode.succeed (Done string)
 
     else if remaining == 2 then
-        Bytes.Decode.map2 
+        Bytes.Decode.map2
             Bytes.Decode.u8
             Bytes.Decode.u8
             \x, y ->
@@ -66,9 +66,9 @@ loopHelp = \{ remaining, string } ->
 
     else
         # remaining = 1
-        Bytes.Decode.map 
+        Bytes.Decode.map
             Bytes.Decode.u8
-            \x -> 
+            \x ->
                 a : U32
                 a = Num.intCast x
                 Done (Str.concat string (bitsToChars (Num.shiftLeftBy 16 a) 2))
@@ -90,33 +90,33 @@ bitsToCharsHelp = \bits, missing ->
     # The input is 24 bits, which we have to partition into 4 6-bit segments. We achieve this by
     # shifting to the right by (a multiple of) 6 to remove unwanted bits on the right, then `Num.bitwiseAnd`
     # with `0b111111` (which is 2^6 - 1 or 63) (so, 6 1s) to remove unwanted bits on the left.
-        
+
     # any 6-bit number is a valid base64 digit, so this is actually safe
     p =
         Num.shiftRightZfBy 18 bits
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     q =
         Num.bitwiseAnd (Num.shiftRightZfBy 12 bits) lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     r =
         Num.bitwiseAnd (Num.shiftRightZfBy 6 bits) lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     s =
         Num.bitwiseAnd bits lowest6BitsMask
             |> Num.intCast
-            |> unsafeToChar 
+            |> unsafeToChar
 
     equals : U8
     equals = 61
 
     when missing is
-        0 -> 
+        0 ->
             [ p, q, r, s ]
         1 ->
             [ p, q, r, equals ]

From a6edc58323f1d11e6544abd070afe728c6568ab7 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:25:14 +0100
Subject: [PATCH 29/33] make fromUtf8 do RC

---
 cli/tests/cli_run.rs                   |  12 ++
 compiler/builtins/bitcode/src/main.zig |   2 +-
 compiler/builtins/bitcode/src/str.zig  | 158 +++++++++++++++++--------
 compiler/builtins/src/bitcode.rs       |   2 +-
 compiler/can/src/builtins.rs           |   8 +-
 compiler/gen/src/llvm/build_str.rs     | 122 ++++---------------
 examples/benchmarks/Base64.roc         |  19 +--
 examples/benchmarks/TestBase64.roc     |  17 +++
 8 files changed, 172 insertions(+), 168 deletions(-)
 create mode 100644 examples/benchmarks/TestBase64.roc

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index 2f81acaec2..f3889ae374 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -240,6 +240,18 @@ mod cli_run {
         );
     }
 
+    #[test]
+    #[serial(base64)]
+    fn base64() {
+        check_output(
+            &example_file("benchmarks", "TestBase64.roc"),
+            "test-base64",
+            &[],
+            "SGVsbG8gV29ybGQ=",
+            true,
+        );
+    }
+
     #[test]
     #[serial(closure)]
     fn closure() {
diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig
index e124afe62c..e20d376b88 100644
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@@ -67,8 +67,8 @@ comptime {
     exportStrFn(str.strFromIntC, "from_int");
     exportStrFn(str.strFromFloatC, "from_float");
     exportStrFn(str.strEqual, "equal");
-    exportStrFn(str.validateUtf8Bytes, "validate_utf8_bytes");
     exportStrFn(str.strToBytesC, "to_bytes");
+    exportStrFn(str.fromUtf8C, "from_utf8");
 }
 
 // Export helpers - Must be run inside a comptime
diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index a752a37f0e..572eaa14e0 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -15,6 +15,7 @@ const InPlace = packed enum(u8) {
     Clone,
 };
 
+const SMALL_STR_MAX_LENGTH = small_string_size - 1;
 const small_string_size = 2 * @sizeOf(usize);
 const blank_small_string: [16]u8 = init_blank_small_string(small_string_size);
 
@@ -982,6 +983,71 @@ fn strToBytes(allocator: *Allocator, arg: RocStr) RocList {
     }
 }
 
+const FromUtf8Result = extern struct {
+    byte_index: usize,
+    string: RocStr,
+    is_ok: bool,
+    problem_code: Utf8ByteProblem,
+};
+
+pub fn fromUtf8C(arg: RocList, output: *FromUtf8Result) callconv(.C) void {
+    output.* = @call(.{ .modifier = always_inline }, fromUtf8, .{ std.heap.c_allocator, arg });
+}
+
+fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result {
+    const bytes = @ptrCast([*]const u8, arg.bytes)[0..arg.length];
+
+    if (unicode.utf8ValidateSlice(bytes)) {
+        // the output will be correct. Now we need to take ownership of the input
+        if (arg.len() <= SMALL_STR_MAX_LENGTH) {
+            // turn the bytes into a small string
+            const string = RocStr.init(allocator, @ptrCast([*]u8, arg.bytes), arg.len());
+
+            // then decrement the input list
+            const data_bytes = arg.len();
+            utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes);
+
+            return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte };
+        } else {
+            const byte_list = arg.makeUnique(allocator, @alignOf(usize), @sizeOf(u8));
+
+            const string = RocStr{ .str_bytes = byte_list.bytes, .str_len = byte_list.length };
+
+            return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte };
+        }
+    } else {
+        const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length);
+
+        // TODO what should we do RC-wise here
+        // const data_bytes = arg.len();
+        // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes);
+
+        return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem };
+    }
+}
+
+fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: Utf8ByteProblem } {
+    var index: usize = 0;
+
+    while (index < length) {
+        const nextNumBytes = numberOfNextCodepointBytes(bytes, length, index) catch |err| {
+            switch (err) {
+                error.UnexpectedEof => {
+                    return .{ .index = index, .problem = Utf8ByteProblem.UnexpectedEndOfSequence };
+                },
+                error.Utf8InvalidStartByte => return .{ .index = index, .problem = Utf8ByteProblem.InvalidStartByte },
+                error.Utf8ExpectedContinuation => return .{ .index = index, .problem = Utf8ByteProblem.ExpectedContinuation },
+                error.Utf8OverlongEncoding => return .{ .index = index, .problem = Utf8ByteProblem.OverlongEncoding },
+                error.Utf8EncodesSurrogateHalf => return .{ .index = index, .problem = Utf8ByteProblem.EncodesSurrogateHalf },
+                error.Utf8CodepointTooLarge => return .{ .index = index, .problem = Utf8ByteProblem.CodepointTooLarge },
+            }
+        };
+        index += nextNumBytes;
+    }
+
+    unreachable;
+}
+
 pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
     const bytes: []u8 = ptr[0..len];
     return @call(.{ .modifier = always_inline }, unicode.utf8ValidateSlice, .{bytes});
@@ -1019,76 +1085,74 @@ pub const Utf8ByteProblem = packed enum(u8) {
     OverlongEncoding = 4,
     UnexpectedEndOfSequence = 5,
 };
-pub const ValidateUtf8BytesResult = extern struct {
-    is_ok: bool, byte_index: usize, problem_code: Utf8ByteProblem
-};
 
-const is_ok_utf8_byte_response =
-    ValidateUtf8BytesResult{ .is_ok = true, .byte_index = 0, .problem_code = Utf8ByteProblem.UnexpectedEndOfSequence };
-inline fn toErrUtf8ByteResponse(byte_index: usize, problem_code: Utf8ByteProblem) ValidateUtf8BytesResult {
-    return ValidateUtf8BytesResult{ .is_ok = false, .byte_index = byte_index, .problem_code = problem_code };
+fn validateUtf8Bytes(bytes: [*]u8, length: usize) FromUtf8Result {
+    return fromUtf8(std.testing.allocator, RocList{ .bytes = bytes, .length = length });
 }
 
-// Validate that an array of bytes is valid UTF-8, but if it fails catch & return the error & byte index
-pub fn validateUtf8Bytes(ptr: [*]u8, len: usize) callconv(.C) ValidateUtf8BytesResult {
-    var index: usize = 0;
-    while (index < len) {
-        const nextNumBytes = numberOfNextCodepointBytes(ptr, len, index) catch |err| {
-            return toErrUtf8ByteResponse(
-                index,
-                switch (err) {
-                    error.UnexpectedEof => Utf8ByteProblem.UnexpectedEndOfSequence,
-                    error.Utf8InvalidStartByte => Utf8ByteProblem.InvalidStartByte,
-                    error.Utf8ExpectedContinuation => Utf8ByteProblem.ExpectedContinuation,
-                    error.Utf8OverlongEncoding => Utf8ByteProblem.OverlongEncoding,
-                    error.Utf8EncodesSurrogateHalf => Utf8ByteProblem.EncodesSurrogateHalf,
-                    error.Utf8CodepointTooLarge => Utf8ByteProblem.CodepointTooLarge,
-                },
-            );
-        };
-        index += nextNumBytes;
-    }
-    return is_ok_utf8_byte_response;
+fn validateUtf8BytesX(str: RocList) FromUtf8Result {
+    return fromUtf8(std.testing.allocator, str);
 }
 
+fn expectOk(result: FromUtf8Result) void {
+    expectEqual(result.is_ok, true);
+}
+
+fn sliceHelp(bytes: [*]const u8, length: usize) RocList {
+    var list = RocList.allocate(testing.allocator, @alignOf(usize), length, @sizeOf(u8));
+    @memcpy(list.bytes orelse unreachable, bytes, length);
+    list.length = length;
+
+    return list;
+}
+
+fn toErrUtf8ByteResponse(index: usize, problem: Utf8ByteProblem) FromUtf8Result {
+    return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = index, .problem_code = problem };
+}
+
+// NOTE on memory: the validate function consumes a RC token of the input. Since
+// we freshly created it (in `sliceHelp`), it has only one RC token, and input list will be deallocated.
+//
+// If we tested with big strings, we'd have to deallocate the output string, but never the input list
+
 test "validateUtf8Bytes: ascii" {
-    const str_len = 3;
-    var str: [str_len]u8 = "abc".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode œ" {
-    const str_len = 2;
-    var str: [str_len]u8 = "œ".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "œ";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode ∆" {
-    const str_len = 3;
-    var str: [str_len]u8 = "∆".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "∆";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: emoji" {
-    const str_len = 4;
-    var str: [str_len]u8 = "💖".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "💖";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: unicode ∆ in middle of array" {
-    const str_len = 9;
-    var str: [str_len]u8 = "œb∆c¬".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "œb∆c¬";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectEqual(is_ok_utf8_byte_response, validateUtf8Bytes(str_ptr, str_len));
+    expectOk(validateUtf8BytesX(list));
 }
 
 test "validateUtf8Bytes: invalid start byte" {
diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs
index b19117e106..125099af96 100644
--- a/compiler/builtins/src/bitcode.rs
+++ b/compiler/builtins/src/bitcode.rs
@@ -41,8 +41,8 @@ pub const STR_NUMBER_OF_BYTES: &str = "roc_builtins.str.number_of_bytes";
 pub const STR_FROM_INT: &str = "roc_builtins.str.from_int";
 pub const STR_FROM_FLOAT: &str = "roc_builtins.str.from_float";
 pub const STR_EQUAL: &str = "roc_builtins.str.equal";
-pub const STR_VALIDATE_UTF_BYTES: &str = "roc_builtins.str.validate_utf8_bytes";
 pub const STR_TO_BYTES: &str = "roc_builtins.str.to_bytes";
+pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8";
 
 pub const DICT_HASH: &str = "roc_builtins.dict.hash";
 pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str";
diff --git a/compiler/can/src/builtins.rs b/compiler/can/src/builtins.rs
index 7db64b296f..68cceaacf6 100644
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@@ -1598,7 +1598,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                 Access {
                     record_var,
                     ext_var: var_store.fresh(),
-                    field: "isOk".into(),
+                    field: "c_isOk".into(),
                     field_var: var_store.fresh(),
                     loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                 },
@@ -1610,7 +1610,7 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                 vec![Access {
                     record_var,
                     ext_var: var_store.fresh(),
-                    field: "str".into(),
+                    field: "b_str".into(),
                     field_var: var_store.fresh(),
                     loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                 }],
@@ -1627,14 +1627,14 @@ fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def {
                         Access {
                             record_var,
                             ext_var: var_store.fresh(),
-                            field: "problem".into(),
+                            field: "d_problem".into(),
                             field_var: var_store.fresh(),
                             loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                         },
                         Access {
                             record_var,
                             ext_var: var_store.fresh(),
-                            field: "byteIndex".into(),
+                            field: "a_byteIndex".into(),
                             field_var: var_store.fresh(),
                             loc_expr: Box::new(no_region(Var(Symbol::ARG_2))),
                         },
diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs
index 301b726fb3..514d483c06 100644
--- a/compiler/gen/src/llvm/build_str.rs
+++ b/compiler/gen/src/llvm/build_str.rs
@@ -1,13 +1,11 @@
 use crate::llvm::bitcode::{call_bitcode_fn, call_void_bitcode_fn};
 use crate::llvm::build::{complex_bitcast, Env, InPlace, Scope};
-use crate::llvm::build_list::{
-    allocate_list, build_basic_phi2, empty_polymorphic_list, list_len, load_list_ptr, store_list,
-};
-use crate::llvm::convert::{collection, get_ptr_type};
+use crate::llvm::build_list::{allocate_list, store_list};
+use crate::llvm::convert::collection;
 use inkwell::builder::Builder;
-use inkwell::types::{BasicTypeEnum, StructType};
+use inkwell::types::BasicTypeEnum;
 use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
-use inkwell::{AddressSpace, IntPredicate};
+use inkwell::AddressSpace;
 use roc_builtins::bitcode;
 use roc_module::symbol::Symbol;
 use roc_mono::layout::{Builtin, Layout};
@@ -300,43 +298,28 @@ pub fn str_to_bytes<'a, 'ctx, 'env>(
 /// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 }
 pub fn str_from_utf8<'a, 'ctx, 'env>(
     env: &Env<'a, 'ctx, 'env>,
-    parent: FunctionValue<'ctx>,
+    _parent: FunctionValue<'ctx>,
     original_wrapper: StructValue<'ctx>,
 ) -> BasicValueEnum<'ctx> {
     let builder = env.builder;
     let ctx = env.context;
 
-    let list_len = list_len(builder, original_wrapper);
-    let ptr_type = get_ptr_type(&ctx.i8_type().into(), AddressSpace::Generic);
-    let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
-
-    let result_type = env
-        .module
-        .get_struct_type("str.ValidateUtf8BytesResult")
-        .unwrap();
+    let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap();
     let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result");
 
     call_void_bitcode_fn(
         env,
-        &[result_ptr.into(), list_ptr.into(), list_len.into()],
-        &bitcode::STR_VALIDATE_UTF_BYTES,
+        &[
+            complex_bitcast(
+                env.builder,
+                original_wrapper.into(),
+                env.context.i128_type().into(),
+                "to_i128",
+            ),
+            result_ptr.into(),
+        ],
+        &bitcode::STR_FROM_UTF8,
     );
-    let utf8_validate_bytes_result = builder
-        .build_load(result_ptr, "load_utf8_validate_bytes_result")
-        .into_struct_value();
-
-    let is_ok = builder
-        .build_extract_value(utf8_validate_bytes_result, 0, "extract_extract_is_ok")
-        .unwrap()
-        .into_int_value();
-    let byte_index = builder
-        .build_extract_value(utf8_validate_bytes_result, 1, "extract_byte_index")
-        .unwrap()
-        .into_int_value();
-    let problem_code = builder
-        .build_extract_value(utf8_validate_bytes_result, 2, "extract_problem_code")
-        .unwrap()
-        .into_int_value();
 
     let record_type = env.context.struct_type(
         &[
@@ -348,71 +331,16 @@ pub fn str_from_utf8<'a, 'ctx, 'env>(
         false,
     );
 
-    let comparison = builder.build_int_compare(
-        IntPredicate::EQ,
-        is_ok,
-        ctx.bool_type().const_int(1, false),
-        "compare_is_ok",
-    );
+    let result_ptr_cast = env
+        .builder
+        .build_bitcast(
+            result_ptr,
+            record_type.ptr_type(AddressSpace::Generic),
+            "to_unnamed",
+        )
+        .into_pointer_value();
 
-    build_basic_phi2(
-        env,
-        parent,
-        comparison,
-        || {
-            // We have a valid utf8 byte sequence
-            // TODO: Should we do something different here if we're doing this in place?
-            let zig_str =
-                call_bitcode_fn(env, &[list_ptr.into(), list_len.into()], &bitcode::STR_INIT)
-                    .into_struct_value();
-            build_struct(
-                builder,
-                record_type,
-                vec![
-                    (
-                        env.ptr_int().const_int(0, false).into(),
-                        "insert_zeroed_byte_index",
-                    ),
-                    (zig_str_to_struct(env, zig_str).into(), "insert_str"),
-                    (ctx.bool_type().const_int(1, false).into(), "insert_is_ok"),
-                    (
-                        ctx.i8_type().const_int(0, false).into(),
-                        "insert_zeroed_problem",
-                    ),
-                ],
-            )
-            .into()
-        },
-        || {
-            // We do not have a valid utf8 byte sequence
-            build_struct(
-                builder,
-                record_type,
-                vec![
-                    (byte_index.into(), "insert_byte_index"),
-                    (empty_polymorphic_list(env), "insert_zeroed_str"),
-                    (ctx.bool_type().const_int(0, false).into(), "insert_is_ok"),
-                    (problem_code.into(), "insert_problem"),
-                ],
-            )
-            .into()
-        },
-        BasicTypeEnum::StructType(record_type),
-    )
-}
-
-fn build_struct<'env, 'ctx>(
-    builder: &'env Builder<'ctx>,
-    struct_type: StructType<'ctx>,
-    values: Vec<(BasicValueEnum<'ctx>, &str)>,
-) -> StructValue<'ctx> {
-    let mut val = struct_type.get_undef().into();
-    for (index, (value, name)) in values.iter().enumerate() {
-        val = builder
-            .build_insert_value(val, *value, index as u32, name)
-            .unwrap();
-    }
-    val.into_struct_value()
+    builder.build_load(result_ptr_cast, "load_utf8_validate_bytes_result")
 }
 
 /// Str.fromInt : Int -> Str
diff --git a/examples/benchmarks/Base64.roc b/examples/benchmarks/Base64.roc
index c2be2d31fa..c8d7c586f7 100644
--- a/examples/benchmarks/Base64.roc
+++ b/examples/benchmarks/Base64.roc
@@ -1,24 +1,7 @@
-app "base64"
-    packages { base: "platform" }
-    imports [base.Task, Bytes.Decode.{Decoder} ]
-    provides [ main ] to base
-
-IO a : Task.Task a []
+interface Base64 exposes [ fromBytes ] imports [ Bytes.Decode ]
 
 Decoder a : Bytes.Decode.Decoder a
 
-main : IO {}
-main =
-    # when fromBytes [ 0 ] is
-    when fromBytes (Str.toBytes "Hello World") is
-        Ok str ->
-            Task.putLine str
-
-        Err _ ->
-            Task.putLine "sadness"
-
-# ------
-
 
 fromBytes : List U8 -> Result Str Bytes.Decode.DecodeError
 fromBytes = \bytes ->
diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc
new file mode 100644
index 0000000000..75adbca8be
--- /dev/null
+++ b/examples/benchmarks/TestBase64.roc
@@ -0,0 +1,17 @@
+app "test-base64"
+    packages { base: "platform" }
+    imports [base.Task, Base64 ]
+    provides [ main ] to base
+
+IO a : Task.Task a []
+
+main : IO {}
+main =
+    # when fromBytes [ 0 ] is
+    when Base64.fromBytes (Str.toBytes "Hello World") is
+        Ok str ->
+            Task.putLine str
+
+        Err _ ->
+            Task.putLine "sadness"
+

From 75ee81db883b1e76e276b596807af6668867ea0f Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:27:02 +0100
Subject: [PATCH 30/33] fix base64 test output

---
 cli/tests/cli_run.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs
index f3889ae374..251259b490 100644
--- a/cli/tests/cli_run.rs
+++ b/cli/tests/cli_run.rs
@@ -247,7 +247,7 @@ mod cli_run {
             &example_file("benchmarks", "TestBase64.roc"),
             "test-base64",
             &[],
-            "SGVsbG8gV29ybGQ=",
+            "SGVsbG8gV29ybGQ=\n",
             true,
         );
     }

From 7304154452271f86c835a681a30112f8fe0483e2 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:36:16 +0100
Subject: [PATCH 31/33] update comment

---
 compiler/mono/src/ir.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs
index 991249ee8e..2ad3995fe0 100644
--- a/compiler/mono/src/ir.rs
+++ b/compiler/mono/src/ir.rs
@@ -5720,11 +5720,13 @@ fn call_by_pointer<'a>(
     let is_specialized = procs.specialized.keys().any(|(s, _)| *s == symbol);
     if env.is_imported_symbol(symbol) || procs.partial_procs.contains_key(&symbol) || is_specialized
     {
-        // TODO we should be able to call by name in this wrapper for "normal" functions
-        // but closures, specifically top-level values that are closures (by unification)
-        // cause issues. The caller (which is here) doesn't know whether the called is a closure
-        // so we're safe rather than sorry for now. Hopefully we can figure out how to call by name
-        // more in the future
+        // anything that is not a thunk can be called by-value in the wrapper
+        // (the above condition guarantees we're dealing with a top-level symbol)
+        //
+        // But thunks cannot be called by-value, since they are not really functions to all parts
+        // of the system (notably RC insertion). So we still call those by-pointer.
+        // Luckily such values were top-level originally (in the user code), and can therefore
+        // not be closures
         let is_thunk =
             procs.module_thunks.contains(&symbol) || procs.imported_module_thunks.contains(&symbol);
 

From 3537fa57d2d1463d737270281c2054db85c5db0a Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 21:46:58 +0100
Subject: [PATCH 32/33] decrement when the input is invalid utf8

---
 compiler/builtins/bitcode/src/str.zig | 6 +++---
 examples/benchmarks/TestBase64.roc    | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index 572eaa14e0..e18eca4306 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1018,9 +1018,9 @@ fn fromUtf8(allocator: *Allocator, arg: RocList) FromUtf8Result {
     } else {
         const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length);
 
-        // TODO what should we do RC-wise here
-        // const data_bytes = arg.len();
-        // utils.decref(allocator, @alignOf(usize), arg.list_bytes, data_bytes);
+        // consume the input list
+        const data_bytes = arg.len();
+        utils.decref(allocator, @alignOf(usize), arg.bytes, data_bytes);
 
         return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem };
     }
diff --git a/examples/benchmarks/TestBase64.roc b/examples/benchmarks/TestBase64.roc
index 75adbca8be..27c5617ebf 100644
--- a/examples/benchmarks/TestBase64.roc
+++ b/examples/benchmarks/TestBase64.roc
@@ -7,7 +7,6 @@ IO a : Task.Task a []
 
 main : IO {}
 main =
-    # when fromBytes [ 0 ] is
     when Base64.fromBytes (Str.toBytes "Hello World") is
         Ok str ->
             Task.putLine str

From 134f8a15e9e9baaa204990ff127352d4577bd5ba Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Wed, 24 Feb 2021 22:26:18 +0100
Subject: [PATCH 33/33] fix zig tests

---
 compiler/builtins/bitcode/src/str.zig | 98 +++++++++++++--------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig
index e18eca4306..62a88058e0 100644
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@@ -1155,102 +1155,100 @@ test "validateUtf8Bytes: unicode ∆ in middle of array" {
     expectOk(validateUtf8BytesX(list));
 }
 
+fn expectErr(list: RocList, index: usize, err: Utf8DecodeError, problem: Utf8ByteProblem) void {
+    const str_ptr = @ptrCast([*]u8, list.bytes);
+    const str_len = list.length;
+
+    expectError(err, numberOfNextCodepointBytes(str_ptr, str_len, index));
+    expectEqual(toErrUtf8ByteResponse(index, problem), validateUtf8Bytes(str_ptr, str_len));
+}
+
 test "validateUtf8Bytes: invalid start byte" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 4;
-    var str: [str_len]u8 = "ab\x80c".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "ab\x80c";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8InvalidStartByte, numberOfNextCodepointBytes(str_ptr, str_len, 2));
-    expectEqual(toErrUtf8ByteResponse(2, Utf8ByteProblem.InvalidStartByte), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 2, error.Utf8InvalidStartByte, Utf8ByteProblem.InvalidStartByte);
 }
 
 test "validateUtf8Bytes: unexpected eof for 2 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 4;
-    var str: [str_len]u8 = "abc\xc2".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xc2";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 2 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L426
-    const str_len = 5;
-    var str: [str_len]u8 = "abc\xc2\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xc2\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: unexpected eof for 3 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430
-    const str_len = 5;
-    var str: [str_len]u8 = "abc\xe0\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xe0\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 3 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L430
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xe0\xa0\xc0".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xe0\xa0\xc0";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: unexpected eof for 4 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xf0\x90\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x90\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.UnexpectedEof, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.UnexpectedEndOfSequence), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.UnexpectedEof, Utf8ByteProblem.UnexpectedEndOfSequence);
 }
 
 test "validateUtf8Bytes: expected continuation for 4 byte sequence" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L437
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf0\x90\x80\x00".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x90\x80\x00";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8ExpectedContinuation, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.ExpectedContinuation), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8ExpectedContinuation, Utf8ByteProblem.ExpectedContinuation);
 }
 
 test "validateUtf8Bytes: overlong" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L451
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf0\x80\x80\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf0\x80\x80\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8OverlongEncoding, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.OverlongEncoding), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8OverlongEncoding, Utf8ByteProblem.OverlongEncoding);
 }
 
 test "validateUtf8Bytes: codepoint out too large" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L465
-    const str_len = 7;
-    var str: [str_len]u8 = "abc\xf4\x90\x80\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xf4\x90\x80\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8CodepointTooLarge, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.CodepointTooLarge), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8CodepointTooLarge, Utf8ByteProblem.CodepointTooLarge);
 }
 
 test "validateUtf8Bytes: surrogate halves" {
     // https://github.com/ziglang/zig/blob/0.7.x/lib/std/unicode.zig#L468
-    const str_len = 6;
-    var str: [str_len]u8 = "abc\xed\xa0\x80".*;
-    const str_ptr: [*]u8 = &str;
+    const raw = "abc\xed\xa0\x80";
+    const ptr: [*]const u8 = @ptrCast([*]const u8, raw);
+    const list = sliceHelp(ptr, raw.len);
 
-    expectError(error.Utf8EncodesSurrogateHalf, numberOfNextCodepointBytes(str_ptr, str_len, 3));
-    expectEqual(toErrUtf8ByteResponse(3, Utf8ByteProblem.EncodesSurrogateHalf), validateUtf8Bytes(str_ptr, str_len));
+    expectErr(list, 3, error.Utf8EncodesSurrogateHalf, Utf8ByteProblem.EncodesSurrogateHalf);
 }