Merge pull request #4648 from roc-lang/i4636

This commit is contained in:
Ayaz 2022-12-01 18:36:43 -06:00 committed by GitHub
commit 0d80e741c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 230 additions and 59 deletions

View file

@ -2479,14 +2479,70 @@ fn from_can_let<'a>(
lower_rest!(variable, cont.value)
}
Var(original, _) | AbilityMember(original, _, _) => {
Var(original, _) | AbilityMember(original, _, _)
if procs.get_partial_proc(original).is_none() =>
{
// a variable is aliased, e.g.
//
// foo = bar
//
// or
// We need to generate an IR that is free of local lvalue aliasing, as this aids in
// refcounting. As such, variable aliasing usually involves renaming the LHS in the
// rest of the program with the RHS (i.e. [foo->bar]); see `handle_variable_aliasing`
// below for the exact algorithm.
//
// foo = RBTRee.empty
// However, do not attempt to eliminate aliasing to procedures
// (either a function pointer or a thunk) here. Doing so is not necessary - if we
// have `var = f` where `f` is either a proc or thunk, in either case, `f` will be
// resolved to an rvalue, not an lvalue:
//
// - If `f` is a proc, we assign to `var` its closure data (even if the lambda set
// of `f` is unary with no captures, we leave behind the empty closure data)
//
// - If `f` is a thunk, we force the thunk and assign to `var` its value.
//
// With this in mind, when `f` is a thunk or proper function, we are free to follow
// the usual (non-lvalue-aliasing) branch of assignment, and end up with correct
// code.
//
// ===
//
// Recording that an lvalue references a procedure or a thunk may open up
// opportunities for optimization - and indeed, recording this information may
// sometimes eliminate such unused lvalues, or inline closure data. However, in
// general, making sure this kind of aliasing works correctly is very difficult. As
// illustration, consider
//
// getNum1 = \{} -> 1u64
// getNum2 = \{} -> 2u64
//
// dispatch = \fun -> fun {}
//
// main =
// myFun1 = getNum1
// myFun2 = getNum2
// dispatch (if Bool.true then myFun1 else myFun2)
//
// Suppose we leave nothing behind for the assignments `myFun* = getNum*`, and
// instead simply associate that they reference procs. In the if-then-else
// expression, we then need to construct the closure data for both getNum1 and
// getNum2 - but we do not know what lambdas they represent, as we only have access
// to the symbols `myFun1` and `myFun2`.
//
// While associations of `myFun1 -> getNum1` could be propogated, the story gets
// more complicated when the referenced proc itself resolves a lambda set with
// indirection; for example consider the amendment
//
// getNum1 = @Dispatcher \{} -> 1u64
// getNum2 = @Dispatcher \{} -> 2u64
//
// Now, even the association of `myFun1 -> getNum1` is not enough, as the lambda
// set of (if Bool.true then myFun1 else myFun2) would not be { getNum1, getNum2 }
// - it would be the binary lambda set of the anonymous closures created under the
// `@Dispatcher` wrappers.
//
// Trying to keep all this information in line has been error prone, and is not
// attempted.
// TODO: right now we need help out rustc with the closure types;
// it isn't able to infer the right lifetime bounds. See if we
@ -2531,18 +2587,69 @@ fn from_can_let<'a>(
// in
// answer
let new_def = roc_can::def::Def {
loc_pattern: def.loc_pattern,
loc_expr: *nested_cont,
pattern_vars: def.pattern_vars,
annotation: def.annotation,
expr_var: def.expr_var,
use roc_can::{def::Def, expr::Expr, pattern::Pattern};
let new_outer = match &nested_cont.value {
&Expr::Closure(ClosureData {
name: anon_name, ..
}) => {
// A wrinkle:
//
// let f =
// let n = 1 in
// \{} -[#lam]-> n
//
// must become
//
// let n = 1 in
// let #lam = \{} -[#lam]-> n in
// let f = #lam
debug_assert_ne!(*symbol, anon_name);
// #lam = \...
let def_anon_closure = Box::new(Def {
loc_pattern: Loc::at_zero(Pattern::Identifier(anon_name)),
loc_expr: *nested_cont,
expr_var: def.expr_var,
pattern_vars: std::iter::once((anon_name, def.expr_var)).collect(),
annotation: None,
});
// f = #lam
let new_def = Box::new(Def {
loc_pattern: def.loc_pattern,
loc_expr: Loc::at_zero(Expr::Var(anon_name, def.expr_var)),
expr_var: def.expr_var,
pattern_vars: def.pattern_vars,
annotation: def.annotation,
});
let new_inner = LetNonRec(new_def, cont);
LetNonRec(
nested_def,
Box::new(Loc::at_zero(LetNonRec(
def_anon_closure,
Box::new(Loc::at_zero(new_inner)),
))),
)
}
_ => {
let new_def = Def {
loc_pattern: def.loc_pattern,
loc_expr: *nested_cont,
pattern_vars: def.pattern_vars,
annotation: def.annotation,
expr_var: def.expr_var,
};
let new_inner = LetNonRec(Box::new(new_def), cont);
LetNonRec(nested_def, Box::new(Loc::at_zero(new_inner)))
}
};
let new_inner = LetNonRec(Box::new(new_def), cont);
let new_outer = LetNonRec(nested_def, Box::new(Loc::at_zero(new_inner)));
lower_rest!(variable, new_outer)
}
LetRec(nested_defs, nested_cont, cycle_mark) => {
@ -4053,11 +4160,6 @@ fn specialize_naked_symbol<'a>(
}
}
let result = match hole {
Stmt::Jump(id, _) => Stmt::Jump(*id, env.arena.alloc([symbol])),
_ => Stmt::Ret(symbol),
};
// if the symbol is a function symbol, ensure it is properly specialized!
let original = symbol;
@ -4069,8 +4171,8 @@ fn specialize_naked_symbol<'a>(
procs,
layout_cache,
opt_fn_var,
symbol,
result,
assigned,
hole,
original,
)
}
@ -4413,7 +4515,7 @@ pub fn with_hole<'a>(
layout_cache,
Some(variable),
symbol,
stmt,
env.arena.alloc(stmt),
symbol,
);
}
@ -5075,7 +5177,7 @@ pub fn with_hole<'a>(
layout_cache,
Some(record_var),
specialized_structure_sym,
stmt,
env.arena.alloc(stmt),
structure,
);
}
@ -8019,15 +8121,11 @@ where
}
}
// 2. Handle references to a known proc - again, we may be either aliasing the proc, or another
// alias to a proc.
if procs.partial_procs.contains_key(right) {
// This is an alias to a function defined in this module.
// Attach the alias, then build the rest of the module, so that we reference and specialize
// the correct proc.
procs.partial_procs.insert_alias(left, right);
return build_rest(env, procs, layout_cache);
}
// We should never reference a partial proc - instead, we want to generate closure data and
// leave it there, even if the lambda set is unary. That way, we avoid having to try to resolve
// lambda set of the proc based on the symbol name, which can cause many problems!
// See my git blame for details.
debug_assert!(!procs.partial_procs.contains_key(right));
// Otherwise we're dealing with an alias whose usages will tell us what specializations we
// need. So let's figure those out first.
@ -8154,8 +8252,8 @@ fn specialize_symbol<'a>(
procs: &mut Procs<'a>,
layout_cache: &mut LayoutCache<'a>,
arg_var: Option<Variable>,
symbol: Symbol,
result: Stmt<'a>,
assign_to: Symbol,
result: &'a Stmt<'a>,
original: Symbol,
) -> Stmt<'a> {
match procs.get_partial_proc(original) {
@ -8194,7 +8292,7 @@ fn specialize_symbol<'a>(
layout_cache,
);
force_thunk(env, original, layout, symbol, env.arena.alloc(result))
force_thunk(env, original, layout, assign_to, env.arena.alloc(result))
} else {
// Imported symbol, so it must have no captures niche (since
// top-levels can't capture)
@ -8212,7 +8310,7 @@ fn specialize_symbol<'a>(
layout_cache,
);
let_empty_struct(symbol, env.arena.alloc(result))
let_empty_struct(assign_to, env.arena.alloc(result))
}
}
@ -8224,6 +8322,13 @@ fn specialize_symbol<'a>(
original,
(env.home, &arg_var),
);
// Replaces references of `assign_to` in the rest of the block with `original`,
// since we don't actually need to specialize the original symbol to a value.
//
// This usually means we are using a symbol received from a joinpoint.
let mut result = result.clone();
substitute_in_exprs(env.arena, &mut result, assign_to, original);
result
}
}
@ -8282,7 +8387,7 @@ fn specialize_symbol<'a>(
layout_cache,
);
let closure_data = symbol;
let closure_data = assign_to;
construct_closure_data(
env,
@ -8311,7 +8416,7 @@ fn specialize_symbol<'a>(
layout_cache,
);
force_thunk(env, original, layout, symbol, env.arena.alloc(result))
force_thunk(env, original, layout, assign_to, env.arena.alloc(result))
} else {
// even though this function may not itself capture,
// unification may still cause it to have an extra argument
@ -8343,7 +8448,7 @@ fn specialize_symbol<'a>(
lambda_set,
lambda_name,
&[],
symbol,
assign_to,
env.arena.alloc(result),
)
}
@ -8360,7 +8465,13 @@ fn specialize_symbol<'a>(
layout_cache,
);
force_thunk(env, original, ret_layout, symbol, env.arena.alloc(result))
force_thunk(
env,
original,
ret_layout,
assign_to,
env.arena.alloc(result),
)
}
}
}
@ -8386,7 +8497,7 @@ fn assign_to_symbol<'a>(
layout_cache,
Some(arg_var),
symbol,
result,
env.arena.alloc(result),
original,
)
}

View file

@ -1387,7 +1387,12 @@ impl<'a> LambdaSet<'a> {
where
I: Interner<'a, Layout<'a>>,
{
debug_assert!(self.contains(function_symbol), "function symbol not in set");
debug_assert!(
self.contains(function_symbol),
"function symbol {:?} not in set {:?}",
function_symbol,
self
);
let comparator = |other_name: Symbol, other_captures_layouts: &[Layout]| {
other_name == function_symbol

View file

@ -0,0 +1,8 @@
procedure Test.4 (Test.5, Test.3):
ret Test.3;
procedure Test.0 ():
let Test.3 : I64 = 1i64;
let Test.7 : {} = Struct {};
let Test.2 : I64 = CallByName Test.4 Test.7 Test.3;
ret Test.2;

View file

@ -0,0 +1,13 @@
procedure Test.1 (Test.5):
let Test.9 : U64 = 1i64;
ret Test.9;
procedure Test.2 (Test.3):
let Test.8 : {} = Struct {};
let Test.7 : U64 = CallByName Test.1 Test.8;
ret Test.7;
procedure Test.0 ():
let Test.4 : {} = Struct {};
let Test.6 : U64 = CallByName Test.2 Test.4;
ret Test.6;

View file

@ -1,7 +1,8 @@
procedure Test.2 (Test.3):
procedure Test.1 (Test.3):
ret Test.3;
procedure Test.0 ():
let Test.2 : {} = Struct {};
let Test.6 : I64 = 42i64;
let Test.5 : I64 = CallByName Test.2 Test.6;
let Test.5 : I64 = CallByName Test.1 Test.6;
ret Test.5;

View file

@ -14,12 +14,12 @@ procedure Test.16 (Test.54):
ret Test.56;
procedure Test.2 (Test.7, Test.8):
let Test.9 : [C {} {}, C {} {}] = TagId(0) Test.7 Test.8;
ret Test.9;
let Test.30 : [C {} {}, C {} {}] = TagId(0) Test.7 Test.8;
ret Test.30;
procedure Test.2 (Test.7, Test.8):
let Test.9 : [C {} {}, C {} {}] = TagId(1) Test.7 Test.8;
ret Test.9;
let Test.44 : [C {} {}, C {} {}] = TagId(1) Test.7 Test.8;
ret Test.44;
procedure Test.3 (Test.17):
let Test.36 : {} = Struct {};

View file

@ -49,8 +49,8 @@ procedure Test.0 ():
jump Test.16 Test.17;
case 1:
let Test.2 : [C , C U64, C {}] = TagId(0) ;
jump Test.16 Test.2;
let Test.23 : [C , C U64, C {}] = TagId(0) ;
jump Test.16 Test.23;
default:
let Test.26 : U64 = 1i64;

View file

@ -50,8 +50,8 @@ procedure Test.0 ():
in
let Test.23 : Int1 = CallByName Bool.2;
if Test.23 then
let Test.7 : [C I64, C I64 Int1] = TagId(0) Test.4;
jump Test.20 Test.7;
let Test.19 : [C I64, C I64 Int1] = TagId(0) Test.4;
jump Test.20 Test.19;
else
let Test.8 : [C I64, C I64 Int1] = TagId(1) Test.5 Test.6;
jump Test.20 Test.8;
let Test.19 : [C I64, C I64 Int1] = TagId(1) Test.5 Test.6;
jump Test.20 Test.19;

View file

@ -41,8 +41,8 @@ procedure Test.0 ():
in
let Test.20 : Int1 = CallByName Bool.2;
if Test.20 then
let Test.6 : [C I64, C I64] = TagId(0) Test.4;
jump Test.18 Test.6;
let Test.17 : [C I64, C I64] = TagId(0) Test.4;
jump Test.18 Test.17;
else
let Test.7 : [C I64, C I64] = TagId(1) Test.5;
jump Test.18 Test.7;
let Test.17 : [C I64, C I64] = TagId(1) Test.5;
jump Test.18 Test.17;

View file

@ -1,6 +1,6 @@
procedure Test.1 (Test.2):
let Test.3 : Int1 = false;
ret Test.3;
let Test.14 : Int1 = false;
ret Test.14;
procedure Test.3 (Test.13):
let Test.15 : Str = "t1";

View file

@ -2049,3 +2049,36 @@ fn crash() {
"#
)
}
#[mono_test]
fn function_pointer_lambda_set() {
indoc!(
r#"
app "test" provides [main] to "./platform"
number = \{} -> 1u64
parse = \parser -> parser {}
main =
parser = number
parse parser
"#
)
}
#[mono_test]
fn anonymous_closure_lifted_to_named_issue_2403() {
indoc!(
r#"
app "test" provides [main] to "./platform"
main =
f =
n = 1
\{} -> n
g = f {}
g
"#
)
}