From a556cccb1eac533f12fd09946802bd323ef9a150 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 23 Apr 2022 23:52:49 +0200 Subject: [PATCH 01/26] populate the matrix --- compiler/can/src/def.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index e4cc226eb8..3cf6ca6e63 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1907,24 +1907,29 @@ fn correct_mutual_recursive_type_alias<'a>( ) -> ImMap { let symbols_introduced: Vec = original_aliases.keys().copied().collect(); - let all_successors_with_self = |symbol: &Symbol| -> Vec { - match original_aliases.get(symbol) { - Some(alias) => { - let mut loc_succ = alias.typ.symbols(); - // remove anything that is not defined in the current block - loc_succ.retain(|key| symbols_introduced.contains(key)); + let mut matrix = ReferenceMatrix::new(original_aliases.len()); - loc_succ + for (index, (_, alias)) in original_aliases.iter().enumerate() { + for referenced in alias.typ.symbols() { + match symbols_introduced.iter().position(|k| referenced == *k) { + None => { /* ignore */ } + Some(ref_id) => matrix.set_row_col(index, ref_id, true), } + } + } + + let all_successors_with_self = |symbol: &Symbol| -> Vec { + match symbols_introduced.iter().position(|k| symbol == k) { + Some(index) => matrix + .references_for(index) + .map(|r| symbols_introduced[r]) + .collect(), None => vec![], } }; - // TODO investigate should this be in a loop? - let defined_symbols: Vec = original_aliases.keys().copied().collect(); - let cycles = - ven_graph::strongly_connected_components(&defined_symbols, all_successors_with_self); + ven_graph::strongly_connected_components(&symbols_introduced, all_successors_with_self); let mut solved_aliases = ImMap::default(); for cycle in cycles { From f7010336bb0c206642a77d8ef05fabab2771345b Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 00:16:26 +0200 Subject: [PATCH 02/26] use vecs more --- compiler/can/src/def.rs | 55 +++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 3cf6ca6e63..bcdde10c17 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1902,15 +1902,25 @@ fn to_pending_value_def<'a>( /// Make aliases recursive fn correct_mutual_recursive_type_alias<'a>( env: &mut Env<'a>, - mut original_aliases: SendMap, + original_aliases: SendMap, var_store: &mut VarStore, ) -> ImMap { - let symbols_introduced: Vec = original_aliases.keys().copied().collect(); + let capacity = original_aliases.len(); + let mut matrix = ReferenceMatrix::new(capacity); - let mut matrix = ReferenceMatrix::new(original_aliases.len()); + let (symbols_introduced, mut aliases): (Vec<_>, Vec<_>) = original_aliases + .into_iter() + .map(|(k, v)| (k, Some(v))) + .unzip(); - for (index, (_, alias)) in original_aliases.iter().enumerate() { - for referenced in alias.typ.symbols() { + for (index, alias) in aliases.iter().enumerate() { + let it = alias + .as_ref() + .map(|a| a.typ.symbols().into_iter()) + .into_iter() + .flatten(); + + for referenced in it { match symbols_introduced.iter().position(|k| referenced == *k) { None => { /* ignore */ } Some(ref_id) => matrix.set_row_col(index, ref_id, true), @@ -1918,26 +1928,23 @@ fn correct_mutual_recursive_type_alias<'a>( } } - let all_successors_with_self = |symbol: &Symbol| -> Vec { - match symbols_introduced.iter().position(|k| symbol == k) { - Some(index) => matrix - .references_for(index) - .map(|r| symbols_introduced[r]) - .collect(), - None => vec![], - } - }; - - let cycles = - ven_graph::strongly_connected_components(&symbols_introduced, all_successors_with_self); let mut solved_aliases = ImMap::default(); + let group: Vec<_> = (0u32..capacity as u32).collect(); + + let cycles = matrix.strongly_connected_components(&group); + for cycle in cycles { debug_assert!(!cycle.is_empty()); let mut pending_aliases: ImMap<_, _> = cycle .iter() - .map(|&sym| (sym, original_aliases.remove(&sym).unwrap())) + .map(|index| { + ( + symbols_introduced[*index as usize], + aliases[*index as usize].take().unwrap(), + ) + }) .collect(); // Make sure we report only one error for the cycle, not an error for every @@ -1952,7 +1959,9 @@ fn correct_mutual_recursive_type_alias<'a>( // NB: ImMap::clone is O(1): https://docs.rs/im/latest/src/im/hash/map.rs.html#1527-1544 let mut to_instantiate = solved_aliases.clone().union(pending_aliases.clone()); - for &rec in cycle.iter() { + for index in cycle.iter() { + let rec = symbols_introduced[*index as usize]; + let alias = pending_aliases.get_mut(&rec).unwrap(); // Don't try to instantiate the alias itself in its definition. let original_alias_def = to_instantiate.remove(&rec).unwrap(); @@ -1992,14 +2001,18 @@ fn correct_mutual_recursive_type_alias<'a>( // The cycle we just instantiated and marked recursive may still be an illegal cycle, if // all the types in the cycle are narrow newtypes. We can't figure this out until now, // because we need all the types to be deeply instantiated. - let all_are_narrow = cycle.iter().all(|sym| { + let all_are_narrow = cycle.iter().all(|index| { + let sym = &symbols_introduced[*index as usize]; let typ = &pending_aliases.get(sym).unwrap().typ; matches!(typ, Type::RecursiveTagUnion(..)) && typ.is_narrow() }); if all_are_narrow { // This cycle is illegal! - let mut rest = cycle; + let mut rest: Vec = cycle + .into_iter() + .map(|i| symbols_introduced[i as usize]) + .collect(); let alias_name = rest.pop().unwrap(); let alias = pending_aliases.get_mut(&alias_name).unwrap(); From b0ceaf037242e981aa38e7128067d8cbdb846b99 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 01:04:59 +0200 Subject: [PATCH 03/26] instantiate aliases with a function --- compiler/can/src/def.rs | 11 +++++------ compiler/types/src/types.rs | 26 ++++++++++++++++---------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index bcdde10c17..04d5a2ccaa 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1966,13 +1966,12 @@ fn correct_mutual_recursive_type_alias<'a>( // Don't try to instantiate the alias itself in its definition. let original_alias_def = to_instantiate.remove(&rec).unwrap(); + let helper = |s| to_instantiate.get(&s); + let mut new_lambda_sets = ImSet::default(); - alias.typ.instantiate_aliases( - alias.region, - &to_instantiate, - var_store, - &mut new_lambda_sets, - ); + alias + .typ + .instantiate_aliases(alias.region, &helper, var_store, &mut new_lambda_sets); for lambda_set_var in new_lambda_sets { alias diff --git a/compiler/types/src/types.rs b/compiler/types/src/types.rs index 0f3536cd54..0e51c2dda4 100644 --- a/compiler/types/src/types.rs +++ b/compiler/types/src/types.rs @@ -120,13 +120,15 @@ impl RecordField { } } - pub fn instantiate_aliases( + pub fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, introduced: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { use RecordField::*; match self { @@ -168,13 +170,15 @@ impl LambdaSet { &mut self.0 } - fn instantiate_aliases( + fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, introduced: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { self.0 .instantiate_aliases(region, aliases, var_store, introduced) } @@ -1064,13 +1068,15 @@ impl Type { result } - pub fn instantiate_aliases( + pub fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, new_lambda_set_variables: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { use Type::*; match self { @@ -1138,7 +1144,7 @@ impl Type { ); } Apply(symbol, args, _) => { - if let Some(alias) = aliases.get(symbol) { + if let Some(alias) = aliases(*symbol) { // TODO switch to this, but we still need to check for recursion with the // `else` branch if false { From 452447232f5b4e52cd56177d0b51b64f1f55617f Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 02:18:43 +0200 Subject: [PATCH 04/26] bitvec all the things --- compiler/can/src/def.rs | 100 ++++++++++++++++----------- compiler/can/src/reference_matrix.rs | 5 ++ reporting/tests/test_reporting.rs | 50 +++++++------- 3 files changed, 88 insertions(+), 67 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 04d5a2ccaa..ed91f77c11 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -467,6 +467,7 @@ pub(crate) fn canonicalize_defs<'a>( // Now that we know the alias dependency graph, we can try to insert recursion variables // where aliases are recursive tag unions, or detect illegal recursions. let mut aliases = correct_mutual_recursive_type_alias(env, aliases, var_store); + for (symbol, alias) in aliases.iter() { scope.add_alias( *symbol, @@ -1908,19 +1909,10 @@ fn correct_mutual_recursive_type_alias<'a>( let capacity = original_aliases.len(); let mut matrix = ReferenceMatrix::new(capacity); - let (symbols_introduced, mut aliases): (Vec<_>, Vec<_>) = original_aliases - .into_iter() - .map(|(k, v)| (k, Some(v))) - .unzip(); + let (symbols_introduced, mut aliases): (Vec<_>, Vec<_>) = original_aliases.into_iter().unzip(); for (index, alias) in aliases.iter().enumerate() { - let it = alias - .as_ref() - .map(|a| a.typ.symbols().into_iter()) - .into_iter() - .flatten(); - - for referenced in it { + for referenced in alias.typ.symbols() { match symbols_introduced.iter().position(|k| referenced == *k) { None => { /* ignore */ } Some(ref_id) => matrix.set_row_col(index, ref_id, true), @@ -1928,7 +1920,8 @@ fn correct_mutual_recursive_type_alias<'a>( } } - let mut solved_aliases = ImMap::default(); + let mut solved_aliases_bitvec = bitvec::vec::BitVec::::repeat(false, capacity); + let mut pending_aliases_bitvec = bitvec::vec::BitVec::repeat(false, capacity); let group: Vec<_> = (0u32..capacity as u32).collect(); @@ -1937,15 +1930,12 @@ fn correct_mutual_recursive_type_alias<'a>( for cycle in cycles { debug_assert!(!cycle.is_empty()); - let mut pending_aliases: ImMap<_, _> = cycle - .iter() - .map(|index| { - ( - symbols_introduced[*index as usize], - aliases[*index as usize].take().unwrap(), - ) - }) - .collect(); + // zero out all the bits + pending_aliases_bitvec.set_elements(0); + + for index in cycle.iter() { + pending_aliases_bitvec.set(*index as usize, true); + } // Make sure we report only one error for the cycle, not an error for every // alias in the cycle. @@ -1955,23 +1945,43 @@ fn correct_mutual_recursive_type_alias<'a>( // depends on. // We only need to worry about symbols in this SCC or any prior one, since the SCCs // were sorted topologically, and we've already instantiated aliases coming from other - // modules. - // NB: ImMap::clone is O(1): https://docs.rs/im/latest/src/im/hash/map.rs.html#1527-1544 - let mut to_instantiate = solved_aliases.clone().union(pending_aliases.clone()); + let mut to_instantiate_bitvec = solved_aliases_bitvec | &pending_aliases_bitvec; for index in cycle.iter() { - let rec = symbols_introduced[*index as usize]; + let index = *index as usize; - let alias = pending_aliases.get_mut(&rec).unwrap(); // Don't try to instantiate the alias itself in its definition. - let original_alias_def = to_instantiate.remove(&rec).unwrap(); + to_instantiate_bitvec.set(index, false); - let helper = |s| to_instantiate.get(&s); + // now we do something sneaky. In `can_instantiate_symbol` we want to be able to + // take a reference to an `Alias` in the `aliases` vec. That would not work if + // we also had a mutable reference to an alias in that vec. So we swap out the + // type. + let alias_region = aliases[index].region; + let mut alias_type = Type::EmptyRec; + + std::mem::swap(&mut alias_type, &mut aliases[index].typ); + + let can_instantiate_symbol = |s| match symbols_introduced.iter().position(|i| *i == s) { + Some(s_index) if to_instantiate_bitvec[s_index] => aliases.get(s_index), + _ => None, + }; let mut new_lambda_sets = ImSet::default(); - alias - .typ - .instantiate_aliases(alias.region, &helper, var_store, &mut new_lambda_sets); + alias_type.instantiate_aliases( + alias_region, + &can_instantiate_symbol, + var_store, + &mut new_lambda_sets, + ); + + // swap the type back + std::mem::swap(&mut alias_type, &mut aliases[index].typ); + + // We can instantiate this alias in future iterations + to_instantiate_bitvec.set(index, true); + + let alias = &mut aliases[index]; for lambda_set_var in new_lambda_sets { alias @@ -1979,10 +1989,9 @@ fn correct_mutual_recursive_type_alias<'a>( .push(LambdaSet(Type::Variable(lambda_set_var))); } - to_instantiate.insert(rec, original_alias_def); - // Now mark the alias recursive, if it needs to be. - let is_self_recursive = alias.typ.contains_symbol(rec); + let rec = symbols_introduced[index]; + let is_self_recursive = cycle.len() == 1 && matrix.get_row_col(index, index); let is_mutually_recursive = cycle.len() > 1; if is_self_recursive || is_mutually_recursive { @@ -2001,20 +2010,24 @@ fn correct_mutual_recursive_type_alias<'a>( // all the types in the cycle are narrow newtypes. We can't figure this out until now, // because we need all the types to be deeply instantiated. let all_are_narrow = cycle.iter().all(|index| { - let sym = &symbols_introduced[*index as usize]; - let typ = &pending_aliases.get(sym).unwrap().typ; + let index = *index as usize; + let typ = &aliases[index].typ; matches!(typ, Type::RecursiveTagUnion(..)) && typ.is_narrow() }); if all_are_narrow { // This cycle is illegal! - let mut rest: Vec = cycle + + let mut cycle = cycle; + let first_index = cycle.pop().unwrap() as usize; + + let rest: Vec = cycle .into_iter() .map(|i| symbols_introduced[i as usize]) .collect(); - let alias_name = rest.pop().unwrap(); - let alias = pending_aliases.get_mut(&alias_name).unwrap(); + let alias_name = symbols_introduced[first_index]; + let alias = aliases.get_mut(first_index).unwrap(); mark_cyclic_alias( env, @@ -2026,11 +2039,14 @@ fn correct_mutual_recursive_type_alias<'a>( ) } - // Now, promote all resolved aliases in this cycle as solved. - solved_aliases.extend(pending_aliases); + // We've instantiated all we could, so all instantiatable aliases are solved now + solved_aliases_bitvec = to_instantiate_bitvec; } - solved_aliases + symbols_introduced + .into_iter() + .zip(aliases.into_iter()) + .collect() } fn make_tag_union_of_alias_recursive<'a>( diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index ab858d08c3..eef869c3b5 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -40,6 +40,11 @@ impl ReferenceMatrix { self.bitvec[index] } + #[inline(always)] + pub fn get_row_col(&self, row: usize, col: usize) -> bool { + self.bitvec[row * self.length + col] + } + pub fn is_recursive(&self, index: usize) -> bool { let mut scheduled = self.row_slice(index).to_bitvec(); let mut visited = self.row_slice(index).to_bitvec(); diff --git a/reporting/tests/test_reporting.rs b/reporting/tests/test_reporting.rs index 6d0a930545..b1929d9919 100644 --- a/reporting/tests/test_reporting.rs +++ b/reporting/tests/test_reporting.rs @@ -3264,15 +3264,15 @@ mod test_reporting { ── DUPLICATE FIELD NAME ────────────────────────────────── /code/proj/Main.roc ─ This record type defines the `.foo` field twice! - + 1│ a : { foo : Num.I64, bar : {}, foo : Str } ^^^^^^^^^^^^^ ^^^^^^^^^ - + In the rest of the program, I will only use the latter definition: - + 1│ a : { foo : Num.I64, bar : {}, foo : Str } ^^^^^^^^^ - + For clarity, remove the previous `.foo` definitions from this record type. "# @@ -3296,15 +3296,15 @@ mod test_reporting { ── DUPLICATE TAG NAME ──────────────────────────────────── /code/proj/Main.roc ─ This tag union type defines the `Foo` tag twice! - + 1│ a : [ Foo Num.I64, Bar {}, Foo Str ] ^^^^^^^^^^^ ^^^^^^^ - + In the rest of the program, I will only use the latter definition: - + 1│ a : [ Foo Num.I64, Bar {}, Foo Str ] ^^^^^^^ - + For clarity, remove the previous `Foo` definitions from this tag union type. "# @@ -3433,10 +3433,10 @@ mod test_reporting { ── TOO MANY TYPE ARGUMENTS ─────────────────────────────── /code/proj/Main.roc ─ The `Num` alias expects 1 type argument, but it got 2 instead: - + 1│ a : Num.Num Num.I64 Num.F64 ^^^^^^^^^^^^^^^^^^^^^^^ - + Are there missing parentheses? "# ), @@ -3459,10 +3459,10 @@ mod test_reporting { ── TOO MANY TYPE ARGUMENTS ─────────────────────────────── /code/proj/Main.roc ─ The `Num` alias expects 1 type argument, but it got 2 instead: - + 1│ f : Str -> Num.Num Num.I64 Num.F64 ^^^^^^^^^^^^^^^^^^^^^^^ - + Are there missing parentheses? "# ), @@ -3646,8 +3646,8 @@ mod test_reporting { This `ACons` global tag application has the type: [ ACons (Num (Integer Signed64)) [ - BCons (Num (Integer Signed64)) [ ACons Str [ BCons I64 a, BNil ], - ANil ], BNil ], ANil ] + BCons (Num (Integer Signed64)) [ ACons Str [ + BCons I64 (AList I64 I64), BNil ] as a, ANil ], BNil ], ANil ] But the type annotation on `x` says it should be: @@ -7092,20 +7092,20 @@ I need all branches in an `if` to have the same type! indoc!( r#" ── TYPE MISMATCH ───────────────────────────────────────── /code/proj/Main.roc ─ - + Something is off with the body of the `inner` definition: - + 3│ inner : * -> * 4│ inner = \y -> y ^ - + The type annotation on `inner` says this `y` value should have the type: - + * - + However, the type of this `y` value is connected to another type in a way that isn't reflected in this annotation. - + Tip: Any connection between types must use a named type variable, not a `*`! Maybe the annotation on `inner` should have a named type variable in place of the `*`? @@ -8867,21 +8867,21 @@ I need all branches in an `if` to have the same type! ^^^^^^^^^^^ Did you mean one of these? - + Type True Box Ok - + ── UNRECOGNIZED NAME ───────────────────────────────────── /code/proj/Main.roc ─ I cannot find a `UnknownType` value - + 3│ insertHelper : UnknownType, Type -> Type ^^^^^^^^^^^ - + Did you mean one of these? - + Type True insertHelper From e5afb156eb6a18380a3beb84e3ca4f2c6f6e3ea3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 13:11:55 +0200 Subject: [PATCH 05/26] fix comment --- compiler/can/src/def.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index ed91f77c11..e650bbf3a0 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1943,8 +1943,13 @@ fn correct_mutual_recursive_type_alias<'a>( // We need to instantiate the alias with any symbols in the currrent module it // depends on. - // We only need to worry about symbols in this SCC or any prior one, since the SCCs - // were sorted topologically, and we've already instantiated aliases coming from other + // + // the `strongly_connected_components` returns SCCs in a topologically sorted order: + // SCC_0 has those aliases that don't rely on any other, SCC_1 has only those that rely on SCC_1, etc. + // + // Hence, we only need to worry about symbols in the current SCC or any prior one. + // It cannot be using any of the others, and we've already instantiated aliases coming from other + // modules. let mut to_instantiate_bitvec = solved_aliases_bitvec | &pending_aliases_bitvec; for index in cycle.iter() { From 912d9db29376654ad7ddf9c9cc05763233390e4f Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 13:23:17 +0200 Subject: [PATCH 06/26] use one fewer bitvec --- compiler/can/src/def.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index e650bbf3a0..849109035d 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1921,7 +1921,6 @@ fn correct_mutual_recursive_type_alias<'a>( } let mut solved_aliases_bitvec = bitvec::vec::BitVec::::repeat(false, capacity); - let mut pending_aliases_bitvec = bitvec::vec::BitVec::repeat(false, capacity); let group: Vec<_> = (0u32..capacity as u32).collect(); @@ -1930,17 +1929,6 @@ fn correct_mutual_recursive_type_alias<'a>( for cycle in cycles { debug_assert!(!cycle.is_empty()); - // zero out all the bits - pending_aliases_bitvec.set_elements(0); - - for index in cycle.iter() { - pending_aliases_bitvec.set(*index as usize, true); - } - - // Make sure we report only one error for the cycle, not an error for every - // alias in the cycle. - let mut can_still_report_error = true; - // We need to instantiate the alias with any symbols in the currrent module it // depends on. // @@ -1948,9 +1936,16 @@ fn correct_mutual_recursive_type_alias<'a>( // SCC_0 has those aliases that don't rely on any other, SCC_1 has only those that rely on SCC_1, etc. // // Hence, we only need to worry about symbols in the current SCC or any prior one. - // It cannot be using any of the others, and we've already instantiated aliases coming from other - // modules. - let mut to_instantiate_bitvec = solved_aliases_bitvec | &pending_aliases_bitvec; + // It cannot be using any of the others, and we've already instantiated aliases coming from other modules. + let mut to_instantiate_bitvec = solved_aliases_bitvec; + + for index in cycle.iter() { + to_instantiate_bitvec.set(*index as usize, true); + } + + // Make sure we report only one error for the cycle, not an error for every + // alias in the cycle. + let mut can_still_report_error = true; for index in cycle.iter() { let index = *index as usize; From dbae80507162195dcf1ac711571900ece31750c1 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 13:43:04 +0200 Subject: [PATCH 07/26] minor cleanup to recursive alias creation --- compiler/can/src/def.rs | 49 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 849109035d..21e1f79162 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -2055,7 +2055,7 @@ fn make_tag_union_of_alias_recursive<'a>( alias: &mut Alias, others: Vec, var_store: &mut VarStore, - can_report_error: &mut bool, + can_report_cyclic_error: &mut bool, ) -> Result<(), ()> { let alias_args = alias .type_variables @@ -2070,7 +2070,7 @@ fn make_tag_union_of_alias_recursive<'a>( others, &mut alias.typ, var_store, - can_report_error, + can_report_cyclic_error, ); match made_recursive { @@ -2119,22 +2119,25 @@ fn make_tag_union_recursive_help<'a>( others: Vec, typ: &mut Type, var_store: &mut VarStore, - can_report_error: &mut bool, + can_report_cyclic_error: &mut bool, ) -> MakeTagUnionRecursive { use MakeTagUnionRecursive::*; - let Loc { - value: (symbol, args), - region: alias_region, - } = recursive_alias; - let vars = args.iter().map(|(_, t)| t.clone()).collect::>(); + let (symbol, args) = recursive_alias.value; + let alias_region = recursive_alias.region; + match typ { Type::TagUnion(tags, ext) => { let recursion_variable = var_store.fresh(); + let type_arguments = args.iter().map(|(_, t)| t.clone()).collect::>(); + let mut pending_typ = Type::RecursiveTagUnion(recursion_variable, tags.to_vec(), ext.clone()); - let substitution_result = - pending_typ.substitute_alias(symbol, &vars, &Type::Variable(recursion_variable)); + let substitution_result = pending_typ.substitute_alias( + symbol, + &type_arguments, + &Type::Variable(recursion_variable), + ); match substitution_result { Ok(()) => { // We can substitute the alias presence for the variable exactly. @@ -2160,18 +2163,22 @@ fn make_tag_union_recursive_help<'a>( actual, type_arguments, .. - } => make_tag_union_recursive_help( - env, - Loc::at_zero((symbol, type_arguments)), - region, - others, - actual, - var_store, - can_report_error, - ), + } => { + // try to make `actual` recursive + make_tag_union_recursive_help( + env, + Loc::at_zero((symbol, type_arguments)), + region, + others, + actual, + var_store, + can_report_cyclic_error, + ) + } _ => { - mark_cyclic_alias(env, typ, symbol, region, others, *can_report_error); - *can_report_error = false; + // take care to report a cyclic alias only once (not once for each alias in the cycle) + mark_cyclic_alias(env, typ, symbol, region, others, *can_report_cyclic_error); + *can_report_cyclic_error = false; Cyclic } From cf89dd7f8417896a783e23ff52083334451a746a Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 13:50:47 +0200 Subject: [PATCH 08/26] finetuning some comments --- compiler/can/src/def.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 21e1f79162..bd9312985a 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1953,10 +1953,12 @@ fn correct_mutual_recursive_type_alias<'a>( // Don't try to instantiate the alias itself in its definition. to_instantiate_bitvec.set(index, false); - // now we do something sneaky. In `can_instantiate_symbol` we want to be able to - // take a reference to an `Alias` in the `aliases` vec. That would not work if - // we also had a mutable reference to an alias in that vec. So we swap out the - // type. + // we run into a problem here where we want to modify an element in the aliases array, + // but also reference the other elements. The borrow checker, correctly, says no to that. + // + // So we get creative: we swap out the element we want to modify with a dummy. We can + // then freely modify the type we moved out, and the `to_instantiate_bitvec` mask + // prevents our dummy from being used. let alias_region = aliases[index].region; let mut alias_type = Type::EmptyRec; @@ -1981,13 +1983,12 @@ fn correct_mutual_recursive_type_alias<'a>( // We can instantiate this alias in future iterations to_instantiate_bitvec.set(index, true); - let alias = &mut aliases[index]; - - for lambda_set_var in new_lambda_sets { - alias - .lambda_set_variables - .push(LambdaSet(Type::Variable(lambda_set_var))); - } + // add any lambda sets that the instantiation created to the current alias + aliases[index].lambda_set_variables.extend( + new_lambda_sets + .iter() + .map(|var| LambdaSet(Type::Variable(*var))), + ); // Now mark the alias recursive, if it needs to be. let rec = symbols_introduced[index]; @@ -1998,7 +1999,7 @@ fn correct_mutual_recursive_type_alias<'a>( let _made_recursive = make_tag_union_of_alias_recursive( env, rec, - alias, + &mut aliases[index], vec![], var_store, &mut can_still_report_error, From d22a4711d42be01f587a717d728f3f62a721faee Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 15:36:00 +0200 Subject: [PATCH 09/26] use VecMap to store aliases --- compiler/can/src/def.rs | 34 ++++++++++++----------------- compiler/collections/src/vec_map.rs | 12 ++++++++++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index bd9312985a..6db394514f 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -11,7 +11,8 @@ use crate::reference_matrix::ReferenceMatrix; use crate::reference_matrix::TopologicalSort; use crate::scope::create_alias; use crate::scope::Scope; -use roc_collections::{ImEntry, ImMap, ImSet, MutMap, MutSet, SendMap}; +use roc_collections::VecMap; +use roc_collections::{ImSet, MutMap, MutSet, SendMap}; use roc_module::ident::Lowercase; use roc_module::symbol::IdentId; use roc_module::symbol::ModuleId; @@ -53,7 +54,7 @@ pub(crate) struct CanDefs { defs: Vec>, def_ordering: DefOrdering, - aliases: SendMap, + aliases: VecMap, } /// A Def that has had patterns and type annnotations canonicalized, @@ -351,7 +352,7 @@ pub(crate) fn canonicalize_defs<'a>( } let sorted = sort_type_defs_before_introduction(referenced_type_symbols); - let mut aliases = SendMap::default(); + let mut aliases = VecMap::default(); let mut abilities = MutMap::default(); for type_name in sorted { @@ -571,7 +572,7 @@ pub(crate) fn canonicalize_defs<'a>( defs, def_ordering, // The result needs a thread-safe `SendMap` - aliases: aliases.into_iter().collect(), + aliases, }, scope, output, @@ -1179,16 +1180,11 @@ fn single_can_def( fn add_annotation_aliases( type_annotation: &crate::annotation::Annotation, - aliases: &mut ImMap, + aliases: &mut VecMap, ) { for (name, alias) in type_annotation.aliases.iter() { - match aliases.entry(*name) { - ImEntry::Occupied(_) => { - // do nothing - } - ImEntry::Vacant(vacant) => { - vacant.insert(alias.clone()); - } + if !aliases.contains(name) { + aliases.insert(*name, alias.clone()); } } } @@ -1222,7 +1218,7 @@ fn canonicalize_pending_value_def<'a>( mut output: Output, scope: &mut Scope, var_store: &mut VarStore, - aliases: &mut ImMap, + aliases: &mut VecMap, abilities_in_scope: &[Symbol], ) -> TempOutput { use PendingValueDef::*; @@ -1903,13 +1899,13 @@ fn to_pending_value_def<'a>( /// Make aliases recursive fn correct_mutual_recursive_type_alias<'a>( env: &mut Env<'a>, - original_aliases: SendMap, + original_aliases: VecMap, var_store: &mut VarStore, -) -> ImMap { +) -> VecMap { let capacity = original_aliases.len(); let mut matrix = ReferenceMatrix::new(capacity); - let (symbols_introduced, mut aliases): (Vec<_>, Vec<_>) = original_aliases.into_iter().unzip(); + let (symbols_introduced, mut aliases) = original_aliases.unzip(); for (index, alias) in aliases.iter().enumerate() { for referenced in alias.typ.symbols() { @@ -2044,10 +2040,8 @@ fn correct_mutual_recursive_type_alias<'a>( solved_aliases_bitvec = to_instantiate_bitvec; } - symbols_introduced - .into_iter() - .zip(aliases.into_iter()) - .collect() + // Safety: both vectors are equal lenght and there are no duplicates + unsafe { VecMap::zip(symbols_introduced, aliases) } } fn make_tag_union_of_alias_recursive<'a>( diff --git a/compiler/collections/src/vec_map.rs b/compiler/collections/src/vec_map.rs index 57ab5c31b8..e2da82eb70 100644 --- a/compiler/collections/src/vec_map.rs +++ b/compiler/collections/src/vec_map.rs @@ -76,6 +76,18 @@ impl VecMap { pub fn values(&self) -> impl Iterator { self.values.iter() } + + pub fn unzip(self) -> (Vec, Vec) { + (self.keys, self.values) + } + + /// # Safety + /// + /// keys and values must have the same length, and there must not + /// be any duplicates in the keys vector + pub unsafe fn zip(keys: Vec, values: Vec) -> Self { + Self { keys, values } + } } impl Extend<(K, V)> for VecMap { From 0f4a5bdd74121a1c92b1607025a3babc1b1501d1 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 15:46:17 +0200 Subject: [PATCH 10/26] rename --- compiler/can/src/def.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 6db394514f..97510b8b45 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1203,7 +1203,7 @@ enum DefReferences { AnnotationWithoutBody, } -struct TempOutput { +struct DefOutput { output: Output, def: Def, references: DefReferences, @@ -1220,7 +1220,7 @@ fn canonicalize_pending_value_def<'a>( var_store: &mut VarStore, aliases: &mut VecMap, abilities_in_scope: &[Symbol], -) -> TempOutput { +) -> DefOutput { use PendingValueDef::*; // Make types for the body expr, even if we won't end up having a body. @@ -1321,7 +1321,7 @@ fn canonicalize_pending_value_def<'a>( vars_by_symbol.clone(), ); - TempOutput { + DefOutput { output, references: DefReferences::AnnotationWithoutBody, def, @@ -1434,7 +1434,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Function(closure_references), def, @@ -1453,7 +1453,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Value(refs), def, @@ -1548,7 +1548,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Function(closure_references), def, @@ -1567,7 +1567,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Value(refs), def, From a7e781ece2348586054050e00062bbd2012d41b3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 16:19:31 +0200 Subject: [PATCH 11/26] simplify earlier topological sort of abilities/aliases --- compiler/can/src/def.rs | 76 +++++++++-------------------- compiler/collections/src/vec_map.rs | 4 ++ 2 files changed, 27 insertions(+), 53 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 97510b8b45..5d941be248 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -170,66 +170,35 @@ impl Declaration { /// Returns a topologically sorted sequence of alias/opaque names fn sort_type_defs_before_introduction( - mut referenced_symbols: MutMap>, + referenced_symbols: VecMap>, ) -> Vec { - let defined_symbols: Vec = referenced_symbols.keys().copied().collect(); + let capacity = referenced_symbols.len(); + let mut matrix = ReferenceMatrix::new(capacity); - // find the strongly connected components and their relations - let sccs = { - // only retain symbols from the current set of defined symbols; the rest come from other modules - for v in referenced_symbols.iter_mut() { - v.1.retain(|x| defined_symbols.iter().any(|s| s == x)); - } + let (symbols, referenced) = referenced_symbols.unzip(); - let all_successors_with_self = |symbol: &Symbol| referenced_symbols[symbol].iter().copied(); - - ven_graph::strongly_connected_components(&defined_symbols, all_successors_with_self) - }; - - // then sort the strongly connected components - let groups: Vec<_> = (0..sccs.len()).collect(); - let mut group_symbols: Vec> = vec![Vec::new(); groups.len()]; - - let mut symbol_to_group_index = MutMap::default(); - let mut group_to_groups = vec![Vec::new(); groups.len()]; - - for (index, group) in sccs.iter().enumerate() { - for s in group { - symbol_to_group_index.insert(*s, index); - } - } - - for (index, group) in sccs.iter().enumerate() { - for s in group { - let reachable = &referenced_symbols[s]; - for r in reachable { - let new_index = symbol_to_group_index[r]; - - if new_index != index { - group_to_groups[index].push(new_index); - } + for (index, references) in referenced.iter().enumerate() { + for referenced in references { + match symbols.iter().position(|k| k == referenced) { + None => { /* not defined in this scope */ } + Some(ref_index) => matrix.set_row_col(index, ref_index, true), } } } - for v in group_symbols.iter_mut() { - v.sort(); - v.dedup(); + // find the strongly connected components and their relations + let nodes: Vec<_> = (0..capacity as u32).collect(); + let sccs = matrix.strongly_connected_components(&nodes); + + let mut output = Vec::with_capacity(capacity); + + for group in sccs { + for index in group { + output.push(symbols[index as usize]) + } } - let all_successors_with_self = |group: &usize| group_to_groups[*group].iter().copied(); - - // split into self-recursive and mutually recursive - match topological_sort(&groups, all_successors_with_self) { - Ok(result) => result - .iter() - .rev() - .flat_map(|group_index| sccs[*group_index].iter()) - .copied() - .collect(), - - Err(_loop_detected) => unreachable!("the groups cannot recurse"), - } + output } #[inline(always)] @@ -299,7 +268,7 @@ pub(crate) fn canonicalize_defs<'a>( let mut type_defs = MutMap::default(); let mut abilities_in_scope = Vec::new(); - let mut referenced_type_symbols = MutMap::default(); + let mut referenced_type_symbols = VecMap::default(); // Determine which idents we introduced in the course of this process. let mut symbols_introduced = MutMap::default(); @@ -454,7 +423,8 @@ pub(crate) fn canonicalize_defs<'a>( can_ann.typ.clone(), kind, ); - aliases.insert(symbol, alias.clone()); + + aliases.insert(symbol, alias); } TypeDef::Ability(name, members) => { diff --git a/compiler/collections/src/vec_map.rs b/compiler/collections/src/vec_map.rs index e2da82eb70..60b1608737 100644 --- a/compiler/collections/src/vec_map.rs +++ b/compiler/collections/src/vec_map.rs @@ -77,6 +77,10 @@ impl VecMap { self.values.iter() } + pub fn keys(&self) -> impl Iterator { + self.keys.iter() + } + pub fn unzip(self) -> (Vec, Vec) { (self.keys, self.values) } From b8db6aae8dccd3eb637d2790a84c216fb4216ff3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 16:51:51 +0200 Subject: [PATCH 12/26] use bitvec to return SCCs --- compiler/can/src/def.rs | 1 - compiler/can/src/reference_matrix.rs | 56 +++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 5d941be248..14e21aeab8 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -30,7 +30,6 @@ use roc_types::types::AliasKind; use roc_types::types::LambdaSet; use roc_types::types::{Alias, Type}; use std::fmt::Debug; -use ven_graph::topological_sort; #[derive(Clone, Debug)] pub struct Def { diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index eef869c3b5..32427f2d65 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -1,7 +1,8 @@ // see if we get better performance with different integer types -pub(crate) type Element = usize; -pub(crate) type BitVec = bitvec::vec::BitVec; -pub(crate) type BitSlice = bitvec::prelude::BitSlice; +type Order = bitvec::order::Lsb0; +type Element = usize; +type BitVec = bitvec::vec::BitVec; +type BitSlice = bitvec::prelude::BitSlice; /// A square boolean matrix used to store relations /// @@ -172,7 +173,12 @@ impl ReferenceMatrix { continue 'outer; } - break params.scc; + let mut result = Vec::new(); + for group in params.scc.components() { + result.push(group.map(|v| v as u32).collect()); + } + + break result; } } } @@ -202,7 +208,7 @@ struct Params { c: usize, p: Vec, s: Vec, - scc: Vec>, + scc: Sccs, scca: Vec, } @@ -219,7 +225,10 @@ impl Params { c: 0, s: Vec::new(), p: Vec::new(), - scc: Vec::new(), + scc: Sccs { + matrix: ReferenceMatrix::new(length), + components: 0, + }, scca: Vec::new(), } } @@ -260,15 +269,44 @@ fn recurse_onto(length: usize, bitvec: &BitVec, v: usize, params: &mut Params) { if params.p.last() == Some(&(v as u32)) { params.p.pop(); - let mut component = Vec::new(); while let Some(node) = params.s.pop() { - component.push(node); + params + .scc + .matrix + .set_row_col(params.scc.components, node as usize, true); params.scca.push(node); params.preorders[node as usize] = Preorder::Removed; if node as usize == v { break; } } - params.scc.push(component); + + params.scc.components += 1; + } +} + +#[derive(Debug)] +pub(crate) struct Sccs { + components: usize, + matrix: ReferenceMatrix, +} + +impl Sccs { + fn components(&self) -> impl Iterator + '_> + '_ { + // work around a panic when requesting a chunk size of 0 + let length = if self.matrix.length == 0 { + // the `.take(self.components)` ensures the resulting iterator will be empty + assert!(self.components == 0); + + 1 + } else { + self.matrix.length + }; + + self.matrix + .bitvec + .chunks(length) + .take(self.components) + .map(|row| row.iter_ones()) } } From 0569d2338557c7d6154da51427c2d7b0592572c6 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:10:28 +0200 Subject: [PATCH 13/26] add rows helper for Sccs --- compiler/can/src/reference_matrix.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index 32427f2d65..c422d14038 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -292,7 +292,7 @@ pub(crate) struct Sccs { } impl Sccs { - fn components(&self) -> impl Iterator + '_> + '_ { + pub fn components(&self) -> impl Iterator + '_> + '_ { // work around a panic when requesting a chunk size of 0 let length = if self.matrix.length == 0 { // the `.take(self.components)` ensures the resulting iterator will be empty @@ -309,4 +309,18 @@ impl Sccs { .take(self.components) .map(|row| row.iter_ones()) } + + pub fn rows(&self) -> std::iter::Take> { + // work around a panic when requesting a chunk size of 0 + let length = if self.matrix.length == 0 { + // the `.take(self.components)` ensures the resulting iterator will be empty + assert!(self.components == 0); + + 1 + } else { + self.matrix.length + }; + + self.matrix.bitvec.chunks(length).take(self.components) + } } From 62a80db379e6cdd029c633ee9031199096f66e20 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:21:52 +0200 Subject: [PATCH 14/26] move `correct_mutual_recursive_type_alias` over --- compiler/can/src/def.rs | 33 +++++++++------------------- compiler/can/src/reference_matrix.rs | 19 ++++++++++++++++ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 14e21aeab8..e0e872cf1b 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1889,10 +1889,8 @@ fn correct_mutual_recursive_type_alias<'a>( let group: Vec<_> = (0u32..capacity as u32).collect(); - let cycles = matrix.strongly_connected_components(&group); - - for cycle in cycles { - debug_assert!(!cycle.is_empty()); + for cycle in matrix.strongly_connected_components_prim(&group).rows() { + debug_assert!(cycle.count_ones() > 0); // We need to instantiate the alias with any symbols in the currrent module it // depends on. @@ -1902,19 +1900,13 @@ fn correct_mutual_recursive_type_alias<'a>( // // Hence, we only need to worry about symbols in the current SCC or any prior one. // It cannot be using any of the others, and we've already instantiated aliases coming from other modules. - let mut to_instantiate_bitvec = solved_aliases_bitvec; - - for index in cycle.iter() { - to_instantiate_bitvec.set(*index as usize, true); - } + let mut to_instantiate_bitvec = solved_aliases_bitvec | cycle; // Make sure we report only one error for the cycle, not an error for every // alias in the cycle. let mut can_still_report_error = true; - for index in cycle.iter() { - let index = *index as usize; - + for index in cycle.iter_ones() { // Don't try to instantiate the alias itself in its definition. to_instantiate_bitvec.set(index, false); @@ -1957,8 +1949,8 @@ fn correct_mutual_recursive_type_alias<'a>( // Now mark the alias recursive, if it needs to be. let rec = symbols_introduced[index]; - let is_self_recursive = cycle.len() == 1 && matrix.get_row_col(index, index); - let is_mutually_recursive = cycle.len() > 1; + let is_self_recursive = cycle.count_ones() == 1 && matrix.get_row_col(index, index); + let is_mutually_recursive = cycle.count_ones() > 1; if is_self_recursive || is_mutually_recursive { let _made_recursive = make_tag_union_of_alias_recursive( @@ -1975,22 +1967,17 @@ fn correct_mutual_recursive_type_alias<'a>( // The cycle we just instantiated and marked recursive may still be an illegal cycle, if // all the types in the cycle are narrow newtypes. We can't figure this out until now, // because we need all the types to be deeply instantiated. - let all_are_narrow = cycle.iter().all(|index| { - let index = *index as usize; + let all_are_narrow = cycle.iter_ones().all(|index| { let typ = &aliases[index].typ; matches!(typ, Type::RecursiveTagUnion(..)) && typ.is_narrow() }); if all_are_narrow { // This cycle is illegal! + let mut indices = cycle.iter_ones(); + let first_index = indices.next().unwrap(); - let mut cycle = cycle; - let first_index = cycle.pop().unwrap() as usize; - - let rest: Vec = cycle - .into_iter() - .map(|i| symbols_introduced[i as usize]) - .collect(); + let rest: Vec = indices.map(|i| symbols_introduced[i]).collect(); let alias_name = symbols_introduced[first_index]; let alias = aliases.get_mut(first_index).unwrap(); diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index c422d14038..a60666e121 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -181,6 +181,25 @@ impl ReferenceMatrix { break result; } } + + /// Get the strongly-connected components of the set of input nodes. + pub fn strongly_connected_components_prim(&self, nodes: &[u32]) -> Sccs { + let mut params = Params::new(self.length, nodes); + + 'outer: loop { + for (node, value) in params.preorders.iter().enumerate() { + if let Preorder::Removed = value { + continue; + } + + recurse_onto(self.length, &self.bitvec, node, &mut params); + + continue 'outer; + } + + break params.scc; + } + } } pub(crate) enum TopologicalSort { From 3d9cda57cc7759f54261c975e6b3ef17e5063904 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:28:46 +0200 Subject: [PATCH 15/26] moe the others over --- compiler/can/src/def.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index e0e872cf1b..ba68ca6238 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -187,13 +187,12 @@ fn sort_type_defs_before_introduction( // find the strongly connected components and their relations let nodes: Vec<_> = (0..capacity as u32).collect(); - let sccs = matrix.strongly_connected_components(&nodes); let mut output = Vec::with_capacity(capacity); - for group in sccs { - for index in group { - output.push(symbols[index as usize]) + for group in matrix.strongly_connected_components_prim(&nodes).rows() { + for index in group.iter_ones() { + output.push(symbols[index]) } } @@ -855,9 +854,9 @@ pub(crate) fn sort_can_defs( let sccs = def_ordering .references - .strongly_connected_components(&nodes_in_cycle); + .strongly_connected_components_prim(&nodes_in_cycle); - for cycle in sccs { + for cycle in sccs.rows() { // check whether the cycle is faulty, which is when it has // a direct successor in the current cycle. This catches things like: // @@ -867,11 +866,11 @@ pub(crate) fn sort_can_defs( // // p = q // q = p - let is_invalid_cycle = match cycle.get(0) { + let is_invalid_cycle = match cycle.iter_ones().next() { Some(def_id) => def_ordering .direct_references - .references_for(*def_id as usize) - .any(|key| cycle.contains(&(key as u32))), + .references_for(def_id) + .any(|key| cycle[key]), None => false, }; @@ -879,11 +878,11 @@ pub(crate) fn sort_can_defs( // We want to show the entire cycle in the error message, so expand it out. let mut entries = Vec::new(); - for def_id in &cycle { - let symbol = def_ordering.get_symbol(*def_id).unwrap(); - let def = &defs[*def_id as usize]; + for def_id in cycle.iter_ones() { + let symbol = def_ordering.get_symbol(def_id as u32).unwrap(); + let def = &defs[def_id]; - let expr_region = defs[*def_id as usize].as_ref().unwrap().loc_expr.region; + let expr_region = defs[def_id].as_ref().unwrap().loc_expr.region; let entry = CycleEntry { symbol, @@ -909,6 +908,7 @@ pub(crate) fn sort_can_defs( // // if it's not an invalid cycle, this is slightly inefficient, // because we know this becomes exactly one DeclareRec already + let cycle = cycle.iter_ones().map(|v| v as u32).collect(); groups.push(cycle); } From da9899e7bbaa9d3eb7afc7209d99d1fb54b31c0e Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:36:40 +0200 Subject: [PATCH 16/26] use a bitvec-based approach to return the SCCs --- compiler/can/src/def.rs | 26 +++++++------- compiler/can/src/reference_matrix.rs | 53 +++++----------------------- 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index ba68ca6238..fd8dbdd92b 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -190,7 +190,7 @@ fn sort_type_defs_before_introduction( let mut output = Vec::with_capacity(capacity); - for group in matrix.strongly_connected_components_prim(&nodes).rows() { + for group in matrix.strongly_connected_components(&nodes).groups() { for index in group.iter_ones() { output.push(symbols[index]) } @@ -854,9 +854,9 @@ pub(crate) fn sort_can_defs( let sccs = def_ordering .references - .strongly_connected_components_prim(&nodes_in_cycle); + .strongly_connected_components(&nodes_in_cycle); - for cycle in sccs.rows() { + for cycle in sccs.groups() { // check whether the cycle is faulty, which is when it has // a direct successor in the current cycle. This catches things like: // @@ -995,16 +995,16 @@ fn group_to_declaration( let sccs = def_ordering.references.strongly_connected_components(group); - for cycle in sccs { - if cycle.len() == 1 { - let def_id = cycle[0]; + for cycle in sccs.groups() { + if cycle.count_ones() == 1 { + let def_id = cycle.iter_ones().next().unwrap(); - match defs[def_id as usize].take() { + match defs[def_id].take() { Some(mut new_def) => { // there is only one definition in this cycle, so we only have // to check whether it recurses with itself; there is nobody else // to recurse with, or they would also be in this cycle. - let is_self_recursive = def_ordering.is_self_recursive(def_id); + let is_self_recursive = def_ordering.is_self_recursive(def_id as u32); if let Closure(ClosureData { recursive: recursive @ Recursive::NotRecursive, @@ -1028,7 +1028,7 @@ fn group_to_declaration( } None => { // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); + let symbol = def_ordering.get_symbol(def_id as u32); roc_error_macros::internal_error!("def not available {:?}", symbol) } @@ -1037,7 +1037,7 @@ fn group_to_declaration( let mut can_defs = Vec::new(); // Topological sort gives us the reverse of the sorting we want! - for def_id in cycle.into_iter().rev() { + for def_id in cycle.iter_ones().rev() { match defs[def_id as usize].take() { Some(mut new_def) => { // Determine recursivity of closures that are not tail-recursive @@ -1046,7 +1046,7 @@ fn group_to_declaration( .. }) = &mut new_def.loc_expr.value { - if def_ordering.references.is_recursive(def_id as usize) { + if def_ordering.references.is_recursive(def_id) { *recursive = Recursive::Recursive } } @@ -1059,7 +1059,7 @@ fn group_to_declaration( } None => { // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); + let symbol = def_ordering.get_symbol(def_id as u32); roc_error_macros::internal_error!("def not available {:?}", symbol) } @@ -1889,7 +1889,7 @@ fn correct_mutual_recursive_type_alias<'a>( let group: Vec<_> = (0u32..capacity as u32).collect(); - for cycle in matrix.strongly_connected_components_prim(&group).rows() { + for cycle in matrix.strongly_connected_components(&group).groups() { debug_assert!(cycle.count_ones() > 0); // We need to instantiate the alias with any symbols in the currrent module it diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index a60666e121..234340d28b 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -159,31 +159,7 @@ impl ReferenceMatrix { } /// Get the strongly-connected components of the set of input nodes. - pub fn strongly_connected_components(&self, nodes: &[u32]) -> Vec> { - let mut params = Params::new(self.length, nodes); - - 'outer: loop { - for (node, value) in params.preorders.iter().enumerate() { - if let Preorder::Removed = value { - continue; - } - - recurse_onto(self.length, &self.bitvec, node, &mut params); - - continue 'outer; - } - - let mut result = Vec::new(); - for group in params.scc.components() { - result.push(group.map(|v| v as u32).collect()); - } - - break result; - } - } - - /// Get the strongly-connected components of the set of input nodes. - pub fn strongly_connected_components_prim(&self, nodes: &[u32]) -> Sccs { + pub fn strongly_connected_components(&self, nodes: &[u32]) -> Sccs { let mut params = Params::new(self.length, nodes); 'outer: loop { @@ -311,25 +287,14 @@ pub(crate) struct Sccs { } impl Sccs { - pub fn components(&self) -> impl Iterator + '_> + '_ { - // work around a panic when requesting a chunk size of 0 - let length = if self.matrix.length == 0 { - // the `.take(self.components)` ensures the resulting iterator will be empty - assert!(self.components == 0); - - 1 - } else { - self.matrix.length - }; - - self.matrix - .bitvec - .chunks(length) - .take(self.components) - .map(|row| row.iter_ones()) - } - - pub fn rows(&self) -> std::iter::Take> { + /// Iterate over the individual components. Each component is represented as a bit vector where + /// a one indicates that the node is part of the group and a zero that it is not. + /// + /// A good way to get the actual nodes is the `.iter_ones()` method. + /// + /// It is guaranteed that a group is non-empty, and that flattening the groups gives a valid + /// topological ordering. + pub fn groups(&self) -> std::iter::Take> { // work around a panic when requesting a chunk size of 0 let length = if self.matrix.length == 0 { // the `.take(self.components)` ensures the resulting iterator will be empty From 6b0c4b5df0bf291c512d38e54a37456506d30901 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:41:01 +0200 Subject: [PATCH 17/26] remove some usize -> u32 casts --- compiler/can/src/def.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index fd8dbdd92b..08f07aac81 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -773,9 +773,9 @@ impl DefOrdering { None } - fn get_symbol(&self, id: u32) -> Option { + fn get_symbol(&self, id: usize) -> Option { for (ident_id, def_id) in self.symbol_to_id.iter() { - if id == *def_id { + if id as u32 == *def_id { return Some(Symbol::new(self.home, *ident_id)); } } @@ -783,13 +783,14 @@ impl DefOrdering { None } - fn is_self_recursive(&self, id: u32) -> bool { - debug_assert!(id < self.length); + fn is_self_recursive(&self, id: usize) -> bool { + let length = self.length as usize; + debug_assert!(id < length); // id'th row, id'th column - let index = (id * self.length) + id; + let index = (id * length) + id; - self.references.get(index as usize) + self.references.get(index) } #[inline(always)] @@ -879,7 +880,7 @@ pub(crate) fn sort_can_defs( let mut entries = Vec::new(); for def_id in cycle.iter_ones() { - let symbol = def_ordering.get_symbol(def_id as u32).unwrap(); + let symbol = def_ordering.get_symbol(def_id).unwrap(); let def = &defs[def_id]; let expr_region = defs[def_id].as_ref().unwrap().loc_expr.region; @@ -1004,7 +1005,7 @@ fn group_to_declaration( // there is only one definition in this cycle, so we only have // to check whether it recurses with itself; there is nobody else // to recurse with, or they would also be in this cycle. - let is_self_recursive = def_ordering.is_self_recursive(def_id as u32); + let is_self_recursive = def_ordering.is_self_recursive(def_id); if let Closure(ClosureData { recursive: recursive @ Recursive::NotRecursive, @@ -1028,7 +1029,7 @@ fn group_to_declaration( } None => { // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id as u32); + let symbol = def_ordering.get_symbol(def_id); roc_error_macros::internal_error!("def not available {:?}", symbol) } @@ -1059,7 +1060,7 @@ fn group_to_declaration( } None => { // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id as u32); + let symbol = def_ordering.get_symbol(def_id); roc_error_macros::internal_error!("def not available {:?}", symbol) } From fe76858e2dbf06cedd015c506e4f9e258b51ee5c Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 17:43:34 +0200 Subject: [PATCH 18/26] an error message that was changed is now what it was again --- reporting/tests/test_reporting.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reporting/tests/test_reporting.rs b/reporting/tests/test_reporting.rs index b1929d9919..53eac2a4b1 100644 --- a/reporting/tests/test_reporting.rs +++ b/reporting/tests/test_reporting.rs @@ -3646,8 +3646,8 @@ mod test_reporting { This `ACons` global tag application has the type: [ ACons (Num (Integer Signed64)) [ - BCons (Num (Integer Signed64)) [ ACons Str [ - BCons I64 (AList I64 I64), BNil ] as a, ANil ], BNil ], ANil ] + BCons (Num (Integer Signed64)) [ ACons Str [ BCons I64 a, BNil ], + ANil ], BNil ], ANil ] But the type annotation on `x` says it should be: From c5f433ab9498f5b54f586d7af742652692de770d Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 18:39:29 +0200 Subject: [PATCH 19/26] do our sorting using just SSCs --- compiler/can/src/def.rs | 268 +++++++++++++++++++--------------------- 1 file changed, 126 insertions(+), 142 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 08f07aac81..37473ad554 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -8,11 +8,10 @@ use crate::expr::{canonicalize_expr, Output, Recursive}; use crate::pattern::{bindings_from_patterns, canonicalize_def_header_pattern, Pattern}; use crate::procedure::References; use crate::reference_matrix::ReferenceMatrix; -use crate::reference_matrix::TopologicalSort; use crate::scope::create_alias; use crate::scope::Scope; use roc_collections::VecMap; -use roc_collections::{ImSet, MutMap, MutSet, SendMap}; +use roc_collections::{ImSet, MutMap, SendMap}; use roc_module::ident::Lowercase; use roc_module::symbol::IdentId; use roc_module::symbol::ModuleId; @@ -822,159 +821,144 @@ pub(crate) fn sort_can_defs( output.aliases.insert(symbol, alias); } - // TODO also do the same `addDirects` check elm/compiler does, so we can - // report an error if a recursive definition can't possibly terminate! - match def_ordering.references.topological_sort_into_groups() { - TopologicalSort::Groups { groups } => { - let mut declarations = Vec::new(); + let nodes: Vec<_> = (0..defs.len() as u32).collect(); + let sccs = def_ordering + .references + .strongly_connected_components(&nodes); - // groups are in reversed order - for group in groups.into_iter().rev() { - group_to_declaration(&def_ordering, &group, &mut defs, &mut declarations); - } + let mut declarations = Vec::new(); + let mut problems = Vec::new(); - (Ok(declarations), output) - } - TopologicalSort::HasCycles { - mut groups, - nodes_in_cycle, - } => { - let mut declarations = Vec::new(); - let mut problems = Vec::new(); + for group in sccs.groups() { + if group.count_ones() == 1 { + // a group with a single Def, nice and simple + let index = group.iter_ones().next().unwrap(); - // nodes_in_cycle are symbols that form a syntactic cycle. That isn't always a problem, - // and in general it's impossible to decide whether it is. So we use a crude heuristic: - // - // Definitions where the cycle occurs behind a lambda are OK - // - // boom = \_ -> boom {} - // - // But otherwise we report an error, e.g. - // - // foo = if b then foo else bar + let def = match defs[index].take() { + Some(def) => def, + None => { + // NOTE: a `_ = someDef` can mean we don't have a symbol here + let symbol = def_ordering.get_symbol(index); - let sccs = def_ordering - .references - .strongly_connected_components(&nodes_in_cycle); - - for cycle in sccs.groups() { - // check whether the cycle is faulty, which is when it has - // a direct successor in the current cycle. This catches things like: - // - // x = x - // - // or - // - // p = q - // q = p - let is_invalid_cycle = match cycle.iter_ones().next() { - Some(def_id) => def_ordering - .direct_references - .references_for(def_id) - .any(|key| cycle[key]), - None => false, - }; - - if is_invalid_cycle { - // We want to show the entire cycle in the error message, so expand it out. - let mut entries = Vec::new(); - - for def_id in cycle.iter_ones() { - let symbol = def_ordering.get_symbol(def_id).unwrap(); - let def = &defs[def_id]; - - let expr_region = defs[def_id].as_ref().unwrap().loc_expr.region; - - let entry = CycleEntry { - symbol, - symbol_region: def.as_ref().unwrap().loc_pattern.region, - expr_region, - }; - - entries.push(entry); - } - - // Sort them by line number to make the report more helpful. - entries.sort_by_key(|entry| entry.symbol_region); - - problems.push(Problem::RuntimeError(RuntimeError::CircularDef( - entries.clone(), - ))); - - declarations.push(Declaration::InvalidCycle(entries)); + roc_error_macros::internal_error!("def not available {:?}", symbol) } - - // if it's an invalid cycle, other groups may depend on the - // symbols defined here, so also push this cycle onto the groups - // - // if it's not an invalid cycle, this is slightly inefficient, - // because we know this becomes exactly one DeclareRec already - let cycle = cycle.iter_ones().map(|v| v as u32).collect(); - groups.push(cycle); - } - - // now we have a collection of groups whose dependencies are not cyclic. - // They are however not yet topologically sorted. Here we have to get a bit - // creative to get all the definitions in the correct sorted order. - - let mut group_ids = Vec::with_capacity(groups.len()); - let mut symbol_to_group_index = MutMap::default(); - for (i, group) in groups.iter().enumerate() { - for symbol in group { - symbol_to_group_index.insert(*symbol, i); - } - - group_ids.push(i); - } - - let successors_of_group = |group_id: &usize| { - let mut result = MutSet::default(); - - // for each symbol in this group - for symbol in &groups[*group_id] { - // find its successors - for succ in def_ordering.successors_without_self(*symbol) { - // and add its group to the result - match symbol_to_group_index.get(&succ) { - Some(index) => { - result.insert(*index); - } - None => unreachable!("no index for symbol {:?}", succ), - } - } - } - - // don't introduce any cycles to self - result.remove(group_id); - - result }; - match ven_graph::topological_sort_into_groups(&group_ids, successors_of_group) { - Ok(sorted_group_ids) => { - for sorted_group in sorted_group_ids.iter().rev() { - for group_id in sorted_group.iter().rev() { - let group = &groups[*group_id]; + let declaration = if def_ordering.direct_references.get_row_col(index, index) { + // This value references itself in an invalid way, e.g.: + // + // x = x - group_to_declaration( - &def_ordering, - group, - &mut defs, - &mut declarations, - ); - } - } + let symbol = def_ordering.get_symbol(index).unwrap(); + + let entry = CycleEntry { + symbol, + symbol_region: def.loc_pattern.region, + expr_region: def.loc_expr.region, + }; + + let entries = vec![entry]; + + problems.push(Problem::RuntimeError(RuntimeError::CircularDef( + entries.clone(), + ))); + + Declaration::InvalidCycle(entries) + } else if def_ordering.references.get_row_col(index, index) { + // this function calls itself, and must be typechecked as a recursive def + + let mut def = def; + + if let Closure(ClosureData { + recursive: recursive @ Recursive::NotRecursive, + .. + }) = &mut def.loc_expr.value + { + *recursive = Recursive::Recursive } - Err(_) => unreachable!("there should be no cycles now!"), - } - for problem in problems { - env.problem(problem); - } + Declaration::DeclareRec(vec![def]) + } else { + Declaration::Declare(def) + }; - (Ok(declarations), output) + declarations.push(declaration); + } else { + let nodes: Vec<_> = group.iter_ones().map(|v| v as u32).collect(); + let direct_sccs = def_ordering + .direct_references + .strongly_connected_components(&nodes); + + let declaration = if direct_sccs.groups().count() == 1 { + // all defs are part of the same direct cycle. That's invalid + let mut entries = Vec::with_capacity(group.count_ones()); + + for index in group.iter_ones() { + let def = match defs[index].take() { + Some(def) => def, + None => { + // NOTE: a `_ = someDef` can mean we don't have a symbol here + let symbol = def_ordering.get_symbol(index); + + roc_error_macros::internal_error!("def not available {:?}", symbol) + } + }; + + let symbol = def_ordering.get_symbol(index).unwrap(); + + let entry = CycleEntry { + symbol, + symbol_region: def.loc_pattern.region, + expr_region: def.loc_expr.region, + }; + + entries.push(entry) + } + + problems.push(Problem::RuntimeError(RuntimeError::CircularDef( + entries.clone(), + ))); + + Declaration::InvalidCycle(entries) + } else { + let mut rec_defs = Vec::with_capacity(group.count_ones()); + + for index in group.iter_ones() { + let def = match defs[index].take() { + Some(def) => def, + None => { + // NOTE: a `_ = someDef` can mean we don't have a symbol here + let symbol = def_ordering.get_symbol(index); + + roc_error_macros::internal_error!("def not available {:?}", symbol) + } + }; + + let mut def = def; + + if let Closure(ClosureData { + recursive: recursive @ Recursive::NotRecursive, + .. + }) = &mut def.loc_expr.value + { + *recursive = Recursive::Recursive + } + + rec_defs.push(def); + } + + Declaration::DeclareRec(rec_defs) + }; + + declarations.push(declaration); } } + + for problem in problems { + env.problem(problem); + } + + (Ok(declarations), output) } fn group_to_declaration( From 0aa7cb84790f157952781f4715a123a9ab1f062f Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 18:46:16 +0200 Subject: [PATCH 20/26] cleanup --- compiler/can/src/def.rs | 120 --------------------------- compiler/can/src/reference_matrix.rs | 32 +------ 2 files changed, 2 insertions(+), 150 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 37473ad554..3f1e3eec28 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -703,8 +703,6 @@ struct DefOrdering { // references without looking into closure bodies. // Used to spot definitely-wrong recursion direct_references: ReferenceMatrix, - - length: u32, } impl DefOrdering { @@ -721,7 +719,6 @@ impl DefOrdering { symbol_to_id, references: ReferenceMatrix::new(capacity), direct_references: ReferenceMatrix::new(capacity), - length: capacity as u32, } } @@ -781,28 +778,6 @@ impl DefOrdering { None } - - fn is_self_recursive(&self, id: usize) -> bool { - let length = self.length as usize; - debug_assert!(id < length); - - // id'th row, id'th column - let index = (id * length) + id; - - self.references.get(index) - } - - #[inline(always)] - fn successors(&self, id: u32) -> impl Iterator + '_ { - self.references - .references_for(id as usize) - .map(|x| x as u32) - } - - #[inline(always)] - fn successors_without_self(&self, id: u32) -> impl Iterator + '_ { - self.successors(id).filter(move |x| *x != id) - } } #[inline(always)] @@ -961,101 +936,6 @@ pub(crate) fn sort_can_defs( (Ok(declarations), output) } -fn group_to_declaration( - def_ordering: &DefOrdering, - group: &[u32], - defs: &mut [Option], - declarations: &mut Vec, -) { - use Declaration::*; - - // Patterns like - // - // { x, y } = someDef - // - // Can bind multiple symbols. When not incorrectly recursive (which is guaranteed in this function), - // normally `someDef` would be inserted twice. We use the region of the pattern as a unique key - // for a definition, so every definition is only inserted (thus typechecked and emitted) once - let mut seen_pattern_regions: Vec = Vec::with_capacity(2); - - let sccs = def_ordering.references.strongly_connected_components(group); - - for cycle in sccs.groups() { - if cycle.count_ones() == 1 { - let def_id = cycle.iter_ones().next().unwrap(); - - match defs[def_id].take() { - Some(mut new_def) => { - // there is only one definition in this cycle, so we only have - // to check whether it recurses with itself; there is nobody else - // to recurse with, or they would also be in this cycle. - let is_self_recursive = def_ordering.is_self_recursive(def_id); - - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut new_def.loc_expr.value - { - if is_self_recursive { - *recursive = Recursive::Recursive - } - } - - if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { - seen_pattern_regions.push(new_def.loc_pattern.region); - - if is_self_recursive { - declarations.push(DeclareRec(vec![new_def])); - } else { - declarations.push(Declare(new_def)); - } - } - } - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } - } - } else { - let mut can_defs = Vec::new(); - - // Topological sort gives us the reverse of the sorting we want! - for def_id in cycle.iter_ones().rev() { - match defs[def_id as usize].take() { - Some(mut new_def) => { - // Determine recursivity of closures that are not tail-recursive - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut new_def.loc_expr.value - { - if def_ordering.references.is_recursive(def_id) { - *recursive = Recursive::Recursive - } - } - - if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { - seen_pattern_regions.push(new_def.loc_pattern.region); - - can_defs.push(new_def); - } - } - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } - } - } - - declarations.push(DeclareRec(can_defs)); - } - } -} - fn pattern_to_vars_by_symbol( vars_by_symbol: &mut SendMap, pattern: &Pattern, diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index 234340d28b..13ba3caea2 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -36,40 +36,10 @@ impl ReferenceMatrix { self.bitvec.set(row * self.length + col, value) } - #[inline(always)] - pub fn get(&self, index: usize) -> bool { - self.bitvec[index] - } - #[inline(always)] pub fn get_row_col(&self, row: usize, col: usize) -> bool { self.bitvec[row * self.length + col] } - - pub fn is_recursive(&self, index: usize) -> bool { - let mut scheduled = self.row_slice(index).to_bitvec(); - let mut visited = self.row_slice(index).to_bitvec(); - - // yes this is a bit inefficient because rows are visited repeatedly. - while scheduled.any() { - for one in scheduled.iter_ones() { - if one == index { - return true; - } - - visited |= self.row_slice(one) - } - - // i.e. visited did not change - if visited.count_ones() == scheduled.count_ones() { - break; - } - - scheduled |= &visited; - } - - false - } } // Topological sort and strongly-connected components @@ -81,6 +51,7 @@ impl ReferenceMatrix { // // Thank you, Samuel! impl ReferenceMatrix { + #[allow(dead_code)] pub fn topological_sort_into_groups(&self) -> TopologicalSort { if self.length == 0 { return TopologicalSort::Groups { groups: Vec::new() }; @@ -178,6 +149,7 @@ impl ReferenceMatrix { } } +#[allow(dead_code)] pub(crate) enum TopologicalSort { /// There were no cycles, all nodes have been partitioned into groups Groups { groups: Vec> }, From 054907a4cb3739c15027f2ee2e69bfb2c8ce167e Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 19:06:08 +0200 Subject: [PATCH 21/26] clean up sorting code --- compiler/can/src/def.rs | 148 +++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 85 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 3f1e3eec28..f38963eeb3 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -796,131 +796,93 @@ pub(crate) fn sort_can_defs( output.aliases.insert(symbol, alias); } + macro_rules! take_def { + ($index:expr) => { + match defs[$index].take() { + Some(def) => def, + None => { + // NOTE: a `_ = someDef` can mean we don't have a symbol here + let symbol = def_ordering.get_symbol($index); + + roc_error_macros::internal_error!("def not available {:?}", symbol) + } + } + }; + } + let nodes: Vec<_> = (0..defs.len() as u32).collect(); + + // We first perform SCC based on any reference, both variable usage and calls + // considering both value definitions and function bodies. This will spot any + // recursive relations between any 2 definitions. let sccs = def_ordering .references .strongly_connected_components(&nodes); let mut declarations = Vec::new(); - let mut problems = Vec::new(); for group in sccs.groups() { if group.count_ones() == 1 { // a group with a single Def, nice and simple let index = group.iter_ones().next().unwrap(); - let def = match defs[index].take() { - Some(def) => def, - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(index); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } - }; + let def = take_def!(index); let declaration = if def_ordering.direct_references.get_row_col(index, index) { - // This value references itself in an invalid way, e.g.: - // - // x = x - + // a definition like `x = x + 1`, which is invalid in roc let symbol = def_ordering.get_symbol(index).unwrap(); - let entry = CycleEntry { - symbol, - symbol_region: def.loc_pattern.region, - expr_region: def.loc_expr.region, - }; + let entries = vec![make_cycle_entry(symbol, &def)]; - let entries = vec![entry]; - - problems.push(Problem::RuntimeError(RuntimeError::CircularDef( - entries.clone(), - ))); + let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); + env.problem(problem); Declaration::InvalidCycle(entries) } else if def_ordering.references.get_row_col(index, index) { // this function calls itself, and must be typechecked as a recursive def - - let mut def = def; - - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut def.loc_expr.value - { - *recursive = Recursive::Recursive - } - - Declaration::DeclareRec(vec![def]) + Declaration::DeclareRec(vec![mark_def_recursive(def)]) } else { Declaration::Declare(def) }; declarations.push(declaration); } else { + // There is something recursive going on between the Defs of this group. + // Now we use the direct_references to see if it is clearly invalid recursion, e.g. + // + // x = y + // y = x + // + // We allow indirect recursion (behind a lambda), e.g. + // + // boom = \{} -> boom {} + // + // In general we cannot spot faulty recursion (halting problem) so this is our best attempt let nodes: Vec<_> = group.iter_ones().map(|v| v as u32).collect(); let direct_sccs = def_ordering .direct_references .strongly_connected_components(&nodes); let declaration = if direct_sccs.groups().count() == 1 { - // all defs are part of the same direct cycle. That's invalid + // all defs are part of the same direct cycle, that is invalid! let mut entries = Vec::with_capacity(group.count_ones()); for index in group.iter_ones() { - let def = match defs[index].take() { - Some(def) => def, - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(index); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } - }; - + let def = take_def!(index); let symbol = def_ordering.get_symbol(index).unwrap(); - let entry = CycleEntry { - symbol, - symbol_region: def.loc_pattern.region, - expr_region: def.loc_expr.region, - }; - - entries.push(entry) + entries.push(make_cycle_entry(symbol, &def)) } - problems.push(Problem::RuntimeError(RuntimeError::CircularDef( - entries.clone(), - ))); + let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); + env.problem(problem); Declaration::InvalidCycle(entries) } else { - let mut rec_defs = Vec::with_capacity(group.count_ones()); - - for index in group.iter_ones() { - let def = match defs[index].take() { - Some(def) => def, - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(index); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } - }; - - let mut def = def; - - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut def.loc_expr.value - { - *recursive = Recursive::Recursive - } - - rec_defs.push(def); - } + let rec_defs = group + .iter_ones() + .map(|index| mark_def_recursive(take_def!(index))) + .collect(); Declaration::DeclareRec(rec_defs) }; @@ -929,11 +891,27 @@ pub(crate) fn sort_can_defs( } } - for problem in problems { - env.problem(problem); + (Ok(declarations), output) +} + +fn mark_def_recursive(mut def: Def) -> Def { + if let Closure(ClosureData { + recursive: recursive @ Recursive::NotRecursive, + .. + }) = &mut def.loc_expr.value + { + *recursive = Recursive::Recursive } - (Ok(declarations), output) + def +} + +fn make_cycle_entry(symbol: Symbol, def: &Def) -> CycleEntry { + CycleEntry { + symbol, + symbol_region: def.loc_pattern.region, + expr_region: def.loc_expr.region, + } } fn pattern_to_vars_by_symbol( From 01dfda29b05eef7603eb15d65900c983b86980c3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 19:47:55 +0200 Subject: [PATCH 22/26] fix test by swapping definition order --- compiler/solve/tests/solve_expr.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index da03df77e2..18f964cf0c 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -5297,11 +5297,12 @@ mod solve_expr { #[test] fn issue_2458() { + // TODO: the order of the alias definitions matters infer_eq_without_problem( indoc!( r#" - Foo a : [ Blah (Result (Bar a) { val: a }) ] Bar a : Foo a + Foo a : [ Blah (Result (Bar a) { val: a }) ] v : Bar U8 v = Blah (Ok (Blah (Err { val: 1 }))) From c9426e2ed2aa5d9069c96ed2551c0af00a9df0f1 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 20:07:38 +0200 Subject: [PATCH 23/26] fix alias instantiation problem --- compiler/can/src/def.rs | 14 ++++++++++++-- compiler/solve/tests/solve_expr.rs | 19 ++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index f38963eeb3..ebac774de2 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1717,7 +1717,15 @@ fn correct_mutual_recursive_type_alias<'a>( let capacity = original_aliases.len(); let mut matrix = ReferenceMatrix::new(capacity); - let (symbols_introduced, mut aliases) = original_aliases.unzip(); + // It is important that we instantiate with the aliases as they occur in the source. + // We must not instantiate A into B, then use the updated B to instantiate into C + // + // That is because B could be used different places. We don't want one place + // to mess with another. + let (symbols_introduced, uninstantiated_aliases) = original_aliases.unzip(); + + // the aliases that we will instantiate into + let mut aliases = uninstantiated_aliases.clone(); for (index, alias) in aliases.iter().enumerate() { for referenced in alias.typ.symbols() { @@ -1765,7 +1773,9 @@ fn correct_mutual_recursive_type_alias<'a>( std::mem::swap(&mut alias_type, &mut aliases[index].typ); let can_instantiate_symbol = |s| match symbols_introduced.iter().position(|i| *i == s) { - Some(s_index) if to_instantiate_bitvec[s_index] => aliases.get(s_index), + Some(s_index) if to_instantiate_bitvec[s_index] => { + uninstantiated_aliases.get(s_index) + } _ => None, }; diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index 18f964cf0c..c04a1cf2a8 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -5297,7 +5297,24 @@ mod solve_expr { #[test] fn issue_2458() { - // TODO: the order of the alias definitions matters + infer_eq_without_problem( + indoc!( + r#" + Foo a : [ Blah (Result (Bar a) { val: a }) ] + Bar a : Foo a + + v : Bar U8 + v = Blah (Ok (Blah (Err { val: 1 }))) + + v + "# + ), + "Bar U8", + ) + } + + #[test] + fn issue_2458_swapped_order() { infer_eq_without_problem( indoc!( r#" From cde4bd3ce0f9b9a1155de92025910296c979f093 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 20:09:28 +0200 Subject: [PATCH 24/26] fix spelling --- compiler/can/src/def.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index ebac774de2..29335b7be3 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1849,7 +1849,7 @@ fn correct_mutual_recursive_type_alias<'a>( solved_aliases_bitvec = to_instantiate_bitvec; } - // Safety: both vectors are equal lenght and there are no duplicates + // Safety: both vectors are equal length and there are no duplicates unsafe { VecMap::zip(symbols_introduced, aliases) } } From 3ec3976781e5482b45ef1888b9d4828a343eb087 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 24 Apr 2022 22:08:11 +0200 Subject: [PATCH 25/26] fix issue with wrong alias being used for instantiation --- compiler/can/src/def.rs | 60 ++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 29335b7be3..57ce47dd72 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1717,15 +1717,7 @@ fn correct_mutual_recursive_type_alias<'a>( let capacity = original_aliases.len(); let mut matrix = ReferenceMatrix::new(capacity); - // It is important that we instantiate with the aliases as they occur in the source. - // We must not instantiate A into B, then use the updated B to instantiate into C - // - // That is because B could be used different places. We don't want one place - // to mess with another. - let (symbols_introduced, uninstantiated_aliases) = original_aliases.unzip(); - - // the aliases that we will instantiate into - let mut aliases = uninstantiated_aliases.clone(); + let (symbols_introduced, mut aliases) = original_aliases.unzip(); for (index, alias) in aliases.iter().enumerate() { for referenced in alias.typ.symbols() { @@ -1739,8 +1731,17 @@ fn correct_mutual_recursive_type_alias<'a>( let mut solved_aliases_bitvec = bitvec::vec::BitVec::::repeat(false, capacity); let group: Vec<_> = (0u32..capacity as u32).collect(); + let sccs = matrix.strongly_connected_components(&group); - for cycle in matrix.strongly_connected_components(&group).groups() { + // sometimes we need to temporarily move some aliases somewhere + let scratchpad_capacity = sccs + .groups() + .map(|r| r.count_ones()) + .max() + .unwrap_or_default(); + let mut scratchpad = Vec::with_capacity(scratchpad_capacity); + + for cycle in sccs.groups() { debug_assert!(cycle.count_ones() > 0); // We need to instantiate the alias with any symbols in the currrent module it @@ -1761,21 +1762,34 @@ fn correct_mutual_recursive_type_alias<'a>( // Don't try to instantiate the alias itself in its definition. to_instantiate_bitvec.set(index, false); + // Within a recursive group, we must instantiate all aliases like how they came to the + // loop. So we cannot instantiate A into B, then the updated B into C in the same + // iteration. Hence, if we have more than one alias we sadly must clone and store + // the alias off to the side somewhere. After processing the group, we put the + // updated alias into the main `aliases` vector again, because for future groups + // we do need to use the instantiated version. + let alias = if cycle.count_ones() > 1 { + scratchpad.push((index, aliases[index].clone())); + + &mut scratchpad.last_mut().unwrap().1 + } else { + &mut aliases[index] + }; + // we run into a problem here where we want to modify an element in the aliases array, // but also reference the other elements. The borrow checker, correctly, says no to that. // // So we get creative: we swap out the element we want to modify with a dummy. We can // then freely modify the type we moved out, and the `to_instantiate_bitvec` mask // prevents our dummy from being used. - let alias_region = aliases[index].region; + + let alias_region = alias.region; let mut alias_type = Type::EmptyRec; - std::mem::swap(&mut alias_type, &mut aliases[index].typ); + std::mem::swap(&mut alias_type, &mut alias.typ); let can_instantiate_symbol = |s| match symbols_introduced.iter().position(|i| *i == s) { - Some(s_index) if to_instantiate_bitvec[s_index] => { - uninstantiated_aliases.get(s_index) - } + Some(s_index) if to_instantiate_bitvec[s_index] => aliases.get(s_index), _ => None, }; @@ -1787,14 +1801,20 @@ fn correct_mutual_recursive_type_alias<'a>( &mut new_lambda_sets, ); + let alias = if cycle.count_ones() > 1 { + &mut scratchpad.last_mut().unwrap().1 + } else { + &mut aliases[index] + }; + // swap the type back - std::mem::swap(&mut alias_type, &mut aliases[index].typ); + std::mem::swap(&mut alias_type, &mut alias.typ); // We can instantiate this alias in future iterations to_instantiate_bitvec.set(index, true); // add any lambda sets that the instantiation created to the current alias - aliases[index].lambda_set_variables.extend( + alias.lambda_set_variables.extend( new_lambda_sets .iter() .map(|var| LambdaSet(Type::Variable(*var))), @@ -1809,7 +1829,7 @@ fn correct_mutual_recursive_type_alias<'a>( let _made_recursive = make_tag_union_of_alias_recursive( env, rec, - &mut aliases[index], + alias, vec![], var_store, &mut can_still_report_error, @@ -1817,6 +1837,10 @@ fn correct_mutual_recursive_type_alias<'a>( } } + for (index, alias) in scratchpad.drain(..) { + aliases[index] = alias; + } + // The cycle we just instantiated and marked recursive may still be an illegal cycle, if // all the types in the cycle are narrow newtypes. We can't figure this out until now, // because we need all the types to be deeply instantiated. From f7c110dbc1797f085863cd7e98e26c8bb23dfecd Mon Sep 17 00:00:00 2001 From: Folkert Date: Mon, 25 Apr 2022 16:34:47 +0200 Subject: [PATCH 26/26] comments + renames --- compiler/can/src/def.rs | 69 +++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index 57ce47dd72..d3773a4108 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -1728,12 +1728,14 @@ fn correct_mutual_recursive_type_alias<'a>( } } - let mut solved_aliases_bitvec = bitvec::vec::BitVec::::repeat(false, capacity); + let mut solved_aliases = bitvec::vec::BitVec::::repeat(false, capacity); let group: Vec<_> = (0u32..capacity as u32).collect(); let sccs = matrix.strongly_connected_components(&group); - // sometimes we need to temporarily move some aliases somewhere + // scratchpad to store aliases that are modified in the current iteration. + // Only used when there is are more than one alias in a group. See below why + // this is needed. let scratchpad_capacity = sccs .groups() .map(|r| r.count_ones()) @@ -1752,36 +1754,62 @@ fn correct_mutual_recursive_type_alias<'a>( // // Hence, we only need to worry about symbols in the current SCC or any prior one. // It cannot be using any of the others, and we've already instantiated aliases coming from other modules. - let mut to_instantiate_bitvec = solved_aliases_bitvec | cycle; + let mut to_instantiate = solved_aliases | cycle; // Make sure we report only one error for the cycle, not an error for every // alias in the cycle. let mut can_still_report_error = true; for index in cycle.iter_ones() { - // Don't try to instantiate the alias itself in its definition. - to_instantiate_bitvec.set(index, false); + // Don't try to instantiate the alias itself in its own definition. + to_instantiate.set(index, false); // Within a recursive group, we must instantiate all aliases like how they came to the - // loop. So we cannot instantiate A into B, then the updated B into C in the same - // iteration. Hence, if we have more than one alias we sadly must clone and store - // the alias off to the side somewhere. After processing the group, we put the - // updated alias into the main `aliases` vector again, because for future groups - // we do need to use the instantiated version. - let alias = if cycle.count_ones() > 1 { + // loop. e.g. given + // + // A : [ ConsA B, NilA ] + // B : [ ConsB A, NilB ] + // + // Our goal is + // + // A : [ ConsA [ ConsB A, NilB ], NilA ] + // B : [ ConsB [ ConsA B, NilA ], NilB ] + // + // But if we would first instantiate B into A, then use the updated A to instantiate B, + // we get + // + // A : [ ConsA [ ConsB A, NilB ], NilA ] + // B : [ ConsB [ ConsA [ ConsB A, NilB ], NilA ], NilB ] + // + // Which is incorrect. We do need the instantiated version however. + // e.g. if in a next group we have: + // + // C : A + // + // Then we must use the instantiated version + // + // C : [ ConsA [ ConsB A, NilB ], NilA ] + // + // So, we cannot replace the original version of A with its instantiated version + // while we process A's group. We have to store the instantiated version until the + // current group is done, then move it to the `aliases` array. That is what the scratchpad is for. + let alias = if cycle.count_ones() == 1 { + // an optimization: we can modify the alias in the `aliases` list directly + // because it is the only alias in the group. + &mut aliases[index] + } else { scratchpad.push((index, aliases[index].clone())); &mut scratchpad.last_mut().unwrap().1 - } else { - &mut aliases[index] }; - // we run into a problem here where we want to modify an element in the aliases array, - // but also reference the other elements. The borrow checker, correctly, says no to that. + // Now, `alias` is possibly a mutable borrow from the `aliases` vector. But we also want + // to immutably borrow other elements from that vector to instantiate them into `alias`. + // The borrow checker disallows that. // // So we get creative: we swap out the element we want to modify with a dummy. We can - // then freely modify the type we moved out, and the `to_instantiate_bitvec` mask - // prevents our dummy from being used. + // then freely modify the type we moved out, and the `to_instantiate` mask + // makes sure that our dummy is not used. let alias_region = alias.region; let mut alias_type = Type::EmptyRec; @@ -1789,7 +1817,7 @@ fn correct_mutual_recursive_type_alias<'a>( std::mem::swap(&mut alias_type, &mut alias.typ); let can_instantiate_symbol = |s| match symbols_introduced.iter().position(|i| *i == s) { - Some(s_index) if to_instantiate_bitvec[s_index] => aliases.get(s_index), + Some(s_index) if to_instantiate[s_index] => aliases.get(s_index), _ => None, }; @@ -1811,7 +1839,7 @@ fn correct_mutual_recursive_type_alias<'a>( std::mem::swap(&mut alias_type, &mut alias.typ); // We can instantiate this alias in future iterations - to_instantiate_bitvec.set(index, true); + to_instantiate.set(index, true); // add any lambda sets that the instantiation created to the current alias alias.lambda_set_variables.extend( @@ -1837,6 +1865,7 @@ fn correct_mutual_recursive_type_alias<'a>( } } + // the current group has instantiated. Now we can move the updated aliases to the `aliases` vector for (index, alias) in scratchpad.drain(..) { aliases[index] = alias; } @@ -1870,7 +1899,7 @@ fn correct_mutual_recursive_type_alias<'a>( } // We've instantiated all we could, so all instantiatable aliases are solved now - solved_aliases_bitvec = to_instantiate_bitvec; + solved_aliases = to_instantiate; } // Safety: both vectors are equal length and there are no duplicates