Merge pull request #4648 from roc-lang/i4636

2025-11-25 21:37:48 +00:00 · 2022-12-01 18:36:43 -06:00 · 2022-12-01 18:36:43 -06:00 · 0d80e741c2
commit 0d80e741c2
parent 9b4552608f 42e8e409d4
11 changed files with 230 additions and 59 deletions
--- a/crates/compiler/mono/src/ir.rs
+++ b/crates/compiler/mono/src/ir.rs
@ -2479,14 +2479,70 @@ fn from_can_let<'a>(

                lower_rest!(variable, cont.value)
            }
-            Var(original, _) | AbilityMember(original, _, _) => {
+            Var(original, _) | AbilityMember(original, _, _)
+                if procs.get_partial_proc(original).is_none() =>
+            {
                // a variable is aliased, e.g.
                //
                //  foo = bar
                //
-                // or
+                // We need to generate an IR that is free of local lvalue aliasing, as this aids in
+                // refcounting. As such, variable aliasing usually involves renaming the LHS in the
+                // rest of the program with the RHS (i.e. [foo->bar]); see `handle_variable_aliasing`
+                // below for the exact algorithm.
                //
-                //  foo = RBTRee.empty
+                // However, do not attempt to eliminate aliasing to procedures
+                // (either a function pointer or a thunk) here. Doing so is not necessary - if we
+                // have `var = f` where `f` is either a proc or thunk, in either case, `f` will be
+                // resolved to an rvalue, not an lvalue:
+                //
+                // - If `f` is a proc, we assign to `var` its closure data (even if the lambda set
+                //   of `f` is unary with no captures, we leave behind the empty closure data)
+                //
+                // - If `f` is a thunk, we force the thunk and assign to `var` its value.
+                //
+                // With this in mind, when `f` is a thunk or proper function, we are free to follow
+                // the usual (non-lvalue-aliasing) branch of assignment, and end up with correct
+                // code.
+                //
+                // ===
+                //
+                // Recording that an lvalue references a procedure or a thunk may open up
+                // opportunities for optimization - and indeed, recording this information may
+                // sometimes eliminate such unused lvalues, or inline closure data. However, in
+                // general, making sure this kind of aliasing works correctly is very difficult. As
+                // illustration, consider
+                //
+                //     getNum1 = \{} -> 1u64
+                //     getNum2 = \{} -> 2u64
+                //
+                //     dispatch = \fun -> fun {}
+                //
+                //     main =
+                //         myFun1 = getNum1
+                //         myFun2 = getNum2
+                //         dispatch (if Bool.true then myFun1 else myFun2)
+                //
+                // Suppose we leave nothing behind for the assignments `myFun* = getNum*`, and
+                // instead simply associate that they reference procs. In the if-then-else
+                // expression, we then need to construct the closure data for both getNum1 and
+                // getNum2 - but we do not know what lambdas they represent, as we only have access
+                // to the symbols `myFun1` and `myFun2`.
+                //
+                // While associations of `myFun1 -> getNum1` could be propogated, the story gets
+                // more complicated when the referenced proc itself resolves a lambda set with
+                // indirection; for example consider the amendment
+                //
+                //     getNum1 = @Dispatcher \{} -> 1u64
+                //     getNum2 = @Dispatcher \{} -> 2u64
+                //
+                // Now, even the association of `myFun1 -> getNum1` is not enough, as the lambda
+                // set of (if Bool.true then myFun1 else myFun2) would not be { getNum1, getNum2 }
+                // - it would be the binary lambda set of the anonymous closures created under the
+                // `@Dispatcher` wrappers.
+                //
+                // Trying to keep all this information in line has been error prone, and is not
+                // attempted.

                // TODO: right now we need help out rustc with the closure types;
                // it isn't able to infer the right lifetime bounds. See if we
@ -2531,18 +2587,69 @@ fn from_can_let<'a>(
                //              in
                //                  answer

-                let new_def = roc_can::def::Def {
-                    loc_pattern: def.loc_pattern,
-                    loc_expr: *nested_cont,
-                    pattern_vars: def.pattern_vars,
-                    annotation: def.annotation,
-                    expr_var: def.expr_var,
+                use roc_can::{def::Def, expr::Expr, pattern::Pattern};
+
+                let new_outer = match &nested_cont.value {
+                    &Expr::Closure(ClosureData {
+                        name: anon_name, ..
+                    }) => {
+                        // A wrinkle:
+                        //
+                        //   let f =
+                        //      let n = 1 in
+                        //      \{} -[#lam]-> n
+                        //
+                        // must become
+                        //
+                        //   let n = 1 in
+                        //   let #lam = \{} -[#lam]-> n in
+                        //   let f = #lam
+
+                        debug_assert_ne!(*symbol, anon_name);
+
+                        // #lam = \...
+                        let def_anon_closure = Box::new(Def {
+                            loc_pattern: Loc::at_zero(Pattern::Identifier(anon_name)),
+                            loc_expr: *nested_cont,
+                            expr_var: def.expr_var,
+                            pattern_vars: std::iter::once((anon_name, def.expr_var)).collect(),
+                            annotation: None,
+                        });
+
+                        // f = #lam
+                        let new_def = Box::new(Def {
+                            loc_pattern: def.loc_pattern,
+                            loc_expr: Loc::at_zero(Expr::Var(anon_name, def.expr_var)),
+                            expr_var: def.expr_var,
+                            pattern_vars: def.pattern_vars,
+                            annotation: def.annotation,
+                        });
+
+                        let new_inner = LetNonRec(new_def, cont);
+
+                        LetNonRec(
+                            nested_def,
+                            Box::new(Loc::at_zero(LetNonRec(
+                                def_anon_closure,
+                                Box::new(Loc::at_zero(new_inner)),
+                            ))),
+                        )
+                    }
+                    _ => {
+                        let new_def = Def {
+                            loc_pattern: def.loc_pattern,
+                            loc_expr: *nested_cont,
+                            pattern_vars: def.pattern_vars,
+                            annotation: def.annotation,
+                            expr_var: def.expr_var,
+                        };
+
+                        let new_inner = LetNonRec(Box::new(new_def), cont);
+
+                        LetNonRec(nested_def, Box::new(Loc::at_zero(new_inner)))
+                    }
                };

-                let new_inner = LetNonRec(Box::new(new_def), cont);
-
-                let new_outer = LetNonRec(nested_def, Box::new(Loc::at_zero(new_inner)));
-
                lower_rest!(variable, new_outer)
            }
            LetRec(nested_defs, nested_cont, cycle_mark) => {
@ -4053,11 +4160,6 @@ fn specialize_naked_symbol<'a>(
        }
    }

-    let result = match hole {
-        Stmt::Jump(id, _) => Stmt::Jump(*id, env.arena.alloc([symbol])),
-        _ => Stmt::Ret(symbol),
-    };
-
    // if the symbol is a function symbol, ensure it is properly specialized!
    let original = symbol;

@ -4069,8 +4171,8 @@ fn specialize_naked_symbol<'a>(
        procs,
        layout_cache,
        opt_fn_var,
-        symbol,
-        result,
+        assigned,
+        hole,
        original,
    )
 }
@ -4413,7 +4515,7 @@ pub fn with_hole<'a>(
                            layout_cache,
                            Some(variable),
                            symbol,
-                            stmt,
+                            env.arena.alloc(stmt),
                            symbol,
                        );
                    }
@ -5075,7 +5177,7 @@ pub fn with_hole<'a>(
                                    layout_cache,
                                    Some(record_var),
                                    specialized_structure_sym,
-                                    stmt,
+                                    env.arena.alloc(stmt),
                                    structure,
                                );
                            }
@ -8019,15 +8121,11 @@ where
        }
    }

-    // 2. Handle references to a known proc - again, we may be either aliasing the proc, or another
-    //    alias to a proc.
-    if procs.partial_procs.contains_key(right) {
-        // This is an alias to a function defined in this module.
-        // Attach the alias, then build the rest of the module, so that we reference and specialize
-        // the correct proc.
-        procs.partial_procs.insert_alias(left, right);
-        return build_rest(env, procs, layout_cache);
-    }
+    // We should never reference a partial proc - instead, we want to generate closure data and
+    // leave it there, even if the lambda set is unary. That way, we avoid having to try to resolve
+    // lambda set of the proc based on the symbol name, which can cause many problems!
+    // See my git blame for details.
+    debug_assert!(!procs.partial_procs.contains_key(right));

    // Otherwise we're dealing with an alias whose usages will tell us what specializations we
    // need. So let's figure those out first.
@ -8154,8 +8252,8 @@ fn specialize_symbol<'a>(
    procs: &mut Procs<'a>,
    layout_cache: &mut LayoutCache<'a>,
    arg_var: Option<Variable>,
-    symbol: Symbol,
-    result: Stmt<'a>,
+    assign_to: Symbol,
+    result: &'a Stmt<'a>,
    original: Symbol,
 ) -> Stmt<'a> {
    match procs.get_partial_proc(original) {
@ -8194,7 +8292,7 @@ fn specialize_symbol<'a>(
                            layout_cache,
                        );

-                        force_thunk(env, original, layout, symbol, env.arena.alloc(result))
+                        force_thunk(env, original, layout, assign_to, env.arena.alloc(result))
                    } else {
                        // Imported symbol, so it must have no captures niche (since
                        // top-levels can't capture)
@ -8212,7 +8310,7 @@ fn specialize_symbol<'a>(
                            layout_cache,
                        );

-                        let_empty_struct(symbol, env.arena.alloc(result))
+                        let_empty_struct(assign_to, env.arena.alloc(result))
                    }
                }

@ -8224,6 +8322,13 @@ fn specialize_symbol<'a>(
                        original,
                        (env.home, &arg_var),
                    );
+
+                    // Replaces references of `assign_to` in the rest of the block with `original`,
+                    // since we don't actually need to specialize the original symbol to a value.
+                    //
+                    // This usually means we are using a symbol received from a joinpoint.
+                    let mut result = result.clone();
+                    substitute_in_exprs(env.arena, &mut result, assign_to, original);
                    result
                }
            }
@ -8282,7 +8387,7 @@ fn specialize_symbol<'a>(
                            layout_cache,
                        );

-                        let closure_data = symbol;
+                        let closure_data = assign_to;

                        construct_closure_data(
                            env,
@ -8311,7 +8416,7 @@ fn specialize_symbol<'a>(
                            layout_cache,
                        );

-                        force_thunk(env, original, layout, symbol, env.arena.alloc(result))
+                        force_thunk(env, original, layout, assign_to, env.arena.alloc(result))
                    } else {
                        // even though this function may not itself capture,
                        // unification may still cause it to have an extra argument
@ -8343,7 +8448,7 @@ fn specialize_symbol<'a>(
                            lambda_set,
                            lambda_name,
                            &[],
-                            symbol,
+                            assign_to,
                            env.arena.alloc(result),
                        )
                    }
@ -8360,7 +8465,13 @@ fn specialize_symbol<'a>(
                        layout_cache,
                    );

-                    force_thunk(env, original, ret_layout, symbol, env.arena.alloc(result))
+                    force_thunk(
+                        env,
+                        original,
+                        ret_layout,
+                        assign_to,
+                        env.arena.alloc(result),
+                    )
                }
            }
        }
@ -8386,7 +8497,7 @@ fn assign_to_symbol<'a>(
                layout_cache,
                Some(arg_var),
                symbol,
-                result,
+                env.arena.alloc(result),
                original,
            )
        }
--- a/crates/compiler/mono/src/layout.rs
+++ b/crates/compiler/mono/src/layout.rs
@ -1387,7 +1387,12 @@ impl<'a> LambdaSet<'a> {
    where
        I: Interner<'a, Layout<'a>>,
    {
-        debug_assert!(self.contains(function_symbol), "function symbol not in set");
+        debug_assert!(
+            self.contains(function_symbol),
+            "function symbol {:?} not in set {:?}",
+            function_symbol,
+            self
+        );

        let comparator = |other_name: Symbol, other_captures_layouts: &[Layout]| {
            other_name == function_symbol
--- a/crates/compiler/test_mono/generated/anonymous_closure_lifted_to_named_issue_2403.txt
+++ b/crates/compiler/test_mono/generated/anonymous_closure_lifted_to_named_issue_2403.txt
@ -0,0 +1,8 @@
+procedure Test.4 (Test.5, Test.3):
+    ret Test.3;
+
+procedure Test.0 ():
+    let Test.3 : I64 = 1i64;
+    let Test.7 : {} = Struct {};
+    let Test.2 : I64 = CallByName Test.4 Test.7 Test.3;
+    ret Test.2;
--- a/crates/compiler/test_mono/generated/function_pointer_lambda_set.txt
+++ b/crates/compiler/test_mono/generated/function_pointer_lambda_set.txt
@ -0,0 +1,13 @@
+procedure Test.1 (Test.5):
+    let Test.9 : U64 = 1i64;
+    ret Test.9;
+
+procedure Test.2 (Test.3):
+    let Test.8 : {} = Struct {};
+    let Test.7 : U64 = CallByName Test.1 Test.8;
+    ret Test.7;
+
+procedure Test.0 ():
+    let Test.4 : {} = Struct {};
+    let Test.6 : U64 = CallByName Test.2 Test.4;
+    ret Test.6;
--- a/crates/compiler/test_mono/generated/issue_2725_alias_polymorphic_lambda.txt
+++ b/crates/compiler/test_mono/generated/issue_2725_alias_polymorphic_lambda.txt
@ -1,7 +1,8 @@
-procedure Test.2 (Test.3):
+procedure Test.1 (Test.3):
    ret Test.3;

 procedure Test.0 ():
+    let Test.2 : {} = Struct {};
    let Test.6 : I64 = 42i64;
-    let Test.5 : I64 = CallByName Test.2 Test.6;
+    let Test.5 : I64 = CallByName Test.1 Test.6;
    ret Test.5;
--- a/crates/compiler/test_mono/generated/lambda_capture_niches_have_captured_function_in_closure.txt
+++ b/crates/compiler/test_mono/generated/lambda_capture_niches_have_captured_function_in_closure.txt
@ -14,12 +14,12 @@ procedure Test.16 (Test.54):
    ret Test.56;

 procedure Test.2 (Test.7, Test.8):
-    let Test.9 : [C {} {}, C {} {}] = TagId(0) Test.7 Test.8;
-    ret Test.9;
+    let Test.30 : [C {} {}, C {} {}] = TagId(0) Test.7 Test.8;
+    ret Test.30;

 procedure Test.2 (Test.7, Test.8):
-    let Test.9 : [C {} {}, C {} {}] = TagId(1) Test.7 Test.8;
-    ret Test.9;
+    let Test.44 : [C {} {}, C {} {}] = TagId(1) Test.7 Test.8;
+    ret Test.44;

 procedure Test.3 (Test.17):
    let Test.36 : {} = Struct {};
--- a/crates/compiler/test_mono/generated/lambda_capture_niches_with_non_capturing_function.txt
+++ b/crates/compiler/test_mono/generated/lambda_capture_niches_with_non_capturing_function.txt
@ -49,8 +49,8 @@ procedure Test.0 ():
            jump Test.16 Test.17;
    
        case 1:
-            let Test.2 : [C , C U64, C {}] = TagId(0) ;
-            jump Test.16 Test.2;
+            let Test.23 : [C , C U64, C {}] = TagId(0) ;
+            jump Test.16 Test.23;
    
        default:
            let Test.26 : U64 = 1i64;
--- a/crates/compiler/test_mono/generated/specialize_closures.txt
+++ b/crates/compiler/test_mono/generated/specialize_closures.txt
@ -50,8 +50,8 @@ procedure Test.0 ():
    in
    let Test.23 : Int1 = CallByName Bool.2;
    if Test.23 then
-        let Test.7 : [C I64, C I64 Int1] = TagId(0) Test.4;
-        jump Test.20 Test.7;
+        let Test.19 : [C I64, C I64 Int1] = TagId(0) Test.4;
+        jump Test.20 Test.19;
    else
-        let Test.8 : [C I64, C I64 Int1] = TagId(1) Test.5 Test.6;
-        jump Test.20 Test.8;
+        let Test.19 : [C I64, C I64 Int1] = TagId(1) Test.5 Test.6;
+        jump Test.20 Test.19;
--- a/crates/compiler/test_mono/generated/specialize_lowlevel.txt
+++ b/crates/compiler/test_mono/generated/specialize_lowlevel.txt
@ -41,8 +41,8 @@ procedure Test.0 ():
    in
    let Test.20 : Int1 = CallByName Bool.2;
    if Test.20 then
-        let Test.6 : [C I64, C I64] = TagId(0) Test.4;
-        jump Test.18 Test.6;
+        let Test.17 : [C I64, C I64] = TagId(0) Test.4;
+        jump Test.18 Test.17;
    else
-        let Test.7 : [C I64, C I64] = TagId(1) Test.5;
-        jump Test.18 Test.7;
+        let Test.17 : [C I64, C I64] = TagId(1) Test.5;
+        jump Test.18 Test.17;
--- a/crates/compiler/test_mono/generated/unreachable_branch_is_eliminated_but_produces_lambda_specializations.txt
+++ b/crates/compiler/test_mono/generated/unreachable_branch_is_eliminated_but_produces_lambda_specializations.txt
@ -1,6 +1,6 @@
 procedure Test.1 (Test.2):
-    let Test.3 : Int1 = false;
-    ret Test.3;
+    let Test.14 : Int1 = false;
+    ret Test.14;

 procedure Test.3 (Test.13):
    let Test.15 : Str = "t1";
--- a/crates/compiler/test_mono/src/tests.rs
+++ b/crates/compiler/test_mono/src/tests.rs
@ -2049,3 +2049,36 @@ fn crash() {
        "#
    )
 }
+
+#[mono_test]
+fn function_pointer_lambda_set() {
+    indoc!(
+        r#"
+        app "test" provides [main] to "./platform"
+
+        number = \{} -> 1u64
+
+        parse = \parser -> parser {}
+
+        main =
+            parser = number
+            parse parser
+        "#
+    )
+}
+
+#[mono_test]
+fn anonymous_closure_lifted_to_named_issue_2403() {
+    indoc!(
+        r#"
+        app "test" provides [main] to "./platform"
+
+        main =
+            f =
+                n = 1
+                \{} -> n
+            g = f {}
+            g
+        "#
+    )
+}