diff --git a/compiler/can/src/def.rs b/compiler/can/src/def.rs index e4cc226eb8..d3773a4108 100644 --- a/compiler/can/src/def.rs +++ b/compiler/can/src/def.rs @@ -8,10 +8,10 @@ use crate::expr::{canonicalize_expr, Output, Recursive}; use crate::pattern::{bindings_from_patterns, canonicalize_def_header_pattern, Pattern}; use crate::procedure::References; use crate::reference_matrix::ReferenceMatrix; -use crate::reference_matrix::TopologicalSort; use crate::scope::create_alias; use crate::scope::Scope; -use roc_collections::{ImEntry, ImMap, ImSet, MutMap, MutSet, SendMap}; +use roc_collections::VecMap; +use roc_collections::{ImSet, MutMap, SendMap}; use roc_module::ident::Lowercase; use roc_module::symbol::IdentId; use roc_module::symbol::ModuleId; @@ -29,7 +29,6 @@ use roc_types::types::AliasKind; use roc_types::types::LambdaSet; use roc_types::types::{Alias, Type}; use std::fmt::Debug; -use ven_graph::topological_sort; #[derive(Clone, Debug)] pub struct Def { @@ -53,7 +52,7 @@ pub(crate) struct CanDefs { defs: Vec>, def_ordering: DefOrdering, - aliases: SendMap, + aliases: VecMap, } /// A Def that has had patterns and type annnotations canonicalized, @@ -169,66 +168,34 @@ impl Declaration { /// Returns a topologically sorted sequence of alias/opaque names fn sort_type_defs_before_introduction( - mut referenced_symbols: MutMap>, + referenced_symbols: VecMap>, ) -> Vec { - let defined_symbols: Vec = referenced_symbols.keys().copied().collect(); + let capacity = referenced_symbols.len(); + let mut matrix = ReferenceMatrix::new(capacity); - // find the strongly connected components and their relations - let sccs = { - // only retain symbols from the current set of defined symbols; the rest come from other modules - for v in referenced_symbols.iter_mut() { - v.1.retain(|x| defined_symbols.iter().any(|s| s == x)); - } + let (symbols, referenced) = referenced_symbols.unzip(); - let all_successors_with_self = |symbol: &Symbol| referenced_symbols[symbol].iter().copied(); - - ven_graph::strongly_connected_components(&defined_symbols, all_successors_with_self) - }; - - // then sort the strongly connected components - let groups: Vec<_> = (0..sccs.len()).collect(); - let mut group_symbols: Vec> = vec![Vec::new(); groups.len()]; - - let mut symbol_to_group_index = MutMap::default(); - let mut group_to_groups = vec![Vec::new(); groups.len()]; - - for (index, group) in sccs.iter().enumerate() { - for s in group { - symbol_to_group_index.insert(*s, index); - } - } - - for (index, group) in sccs.iter().enumerate() { - for s in group { - let reachable = &referenced_symbols[s]; - for r in reachable { - let new_index = symbol_to_group_index[r]; - - if new_index != index { - group_to_groups[index].push(new_index); - } + for (index, references) in referenced.iter().enumerate() { + for referenced in references { + match symbols.iter().position(|k| k == referenced) { + None => { /* not defined in this scope */ } + Some(ref_index) => matrix.set_row_col(index, ref_index, true), } } } - for v in group_symbols.iter_mut() { - v.sort(); - v.dedup(); + // find the strongly connected components and their relations + let nodes: Vec<_> = (0..capacity as u32).collect(); + + let mut output = Vec::with_capacity(capacity); + + for group in matrix.strongly_connected_components(&nodes).groups() { + for index in group.iter_ones() { + output.push(symbols[index]) + } } - let all_successors_with_self = |group: &usize| group_to_groups[*group].iter().copied(); - - // split into self-recursive and mutually recursive - match topological_sort(&groups, all_successors_with_self) { - Ok(result) => result - .iter() - .rev() - .flat_map(|group_index| sccs[*group_index].iter()) - .copied() - .collect(), - - Err(_loop_detected) => unreachable!("the groups cannot recurse"), - } + output } #[inline(always)] @@ -298,7 +265,7 @@ pub(crate) fn canonicalize_defs<'a>( let mut type_defs = MutMap::default(); let mut abilities_in_scope = Vec::new(); - let mut referenced_type_symbols = MutMap::default(); + let mut referenced_type_symbols = VecMap::default(); // Determine which idents we introduced in the course of this process. let mut symbols_introduced = MutMap::default(); @@ -351,7 +318,7 @@ pub(crate) fn canonicalize_defs<'a>( } let sorted = sort_type_defs_before_introduction(referenced_type_symbols); - let mut aliases = SendMap::default(); + let mut aliases = VecMap::default(); let mut abilities = MutMap::default(); for type_name in sorted { @@ -453,7 +420,8 @@ pub(crate) fn canonicalize_defs<'a>( can_ann.typ.clone(), kind, ); - aliases.insert(symbol, alias.clone()); + + aliases.insert(symbol, alias); } TypeDef::Ability(name, members) => { @@ -467,6 +435,7 @@ pub(crate) fn canonicalize_defs<'a>( // Now that we know the alias dependency graph, we can try to insert recursion variables // where aliases are recursive tag unions, or detect illegal recursions. let mut aliases = correct_mutual_recursive_type_alias(env, aliases, var_store); + for (symbol, alias) in aliases.iter() { scope.add_alias( *symbol, @@ -570,7 +539,7 @@ pub(crate) fn canonicalize_defs<'a>( defs, def_ordering, // The result needs a thread-safe `SendMap` - aliases: aliases.into_iter().collect(), + aliases, }, scope, output, @@ -734,8 +703,6 @@ struct DefOrdering { // references without looking into closure bodies. // Used to spot definitely-wrong recursion direct_references: ReferenceMatrix, - - length: u32, } impl DefOrdering { @@ -752,7 +719,6 @@ impl DefOrdering { symbol_to_id, references: ReferenceMatrix::new(capacity), direct_references: ReferenceMatrix::new(capacity), - length: capacity as u32, } } @@ -803,36 +769,15 @@ impl DefOrdering { None } - fn get_symbol(&self, id: u32) -> Option { + fn get_symbol(&self, id: usize) -> Option { for (ident_id, def_id) in self.symbol_to_id.iter() { - if id == *def_id { + if id as u32 == *def_id { return Some(Symbol::new(self.home, *ident_id)); } } None } - - fn is_self_recursive(&self, id: u32) -> bool { - debug_assert!(id < self.length); - - // id'th row, id'th column - let index = (id * self.length) + id; - - self.references.get(index as usize) - } - - #[inline(always)] - fn successors(&self, id: u32) -> impl Iterator + '_ { - self.references - .references_for(id as usize) - .map(|x| x as u32) - } - - #[inline(always)] - fn successors_without_self(&self, id: u32) -> impl Iterator + '_ { - self.successors(id).filter(move |x| *x != id) - } } #[inline(always)] @@ -851,253 +796,122 @@ pub(crate) fn sort_can_defs( output.aliases.insert(symbol, alias); } - // TODO also do the same `addDirects` check elm/compiler does, so we can - // report an error if a recursive definition can't possibly terminate! - match def_ordering.references.topological_sort_into_groups() { - TopologicalSort::Groups { groups } => { - let mut declarations = Vec::new(); - - // groups are in reversed order - for group in groups.into_iter().rev() { - group_to_declaration(&def_ordering, &group, &mut defs, &mut declarations); - } - - (Ok(declarations), output) - } - TopologicalSort::HasCycles { - mut groups, - nodes_in_cycle, - } => { - let mut declarations = Vec::new(); - let mut problems = Vec::new(); - - // nodes_in_cycle are symbols that form a syntactic cycle. That isn't always a problem, - // and in general it's impossible to decide whether it is. So we use a crude heuristic: - // - // Definitions where the cycle occurs behind a lambda are OK - // - // boom = \_ -> boom {} - // - // But otherwise we report an error, e.g. - // - // foo = if b then foo else bar - - let sccs = def_ordering - .references - .strongly_connected_components(&nodes_in_cycle); - - for cycle in sccs { - // check whether the cycle is faulty, which is when it has - // a direct successor in the current cycle. This catches things like: - // - // x = x - // - // or - // - // p = q - // q = p - let is_invalid_cycle = match cycle.get(0) { - Some(def_id) => def_ordering - .direct_references - .references_for(*def_id as usize) - .any(|key| cycle.contains(&(key as u32))), - None => false, - }; - - if is_invalid_cycle { - // We want to show the entire cycle in the error message, so expand it out. - let mut entries = Vec::new(); - - for def_id in &cycle { - let symbol = def_ordering.get_symbol(*def_id).unwrap(); - let def = &defs[*def_id as usize]; - - let expr_region = defs[*def_id as usize].as_ref().unwrap().loc_expr.region; - - let entry = CycleEntry { - symbol, - symbol_region: def.as_ref().unwrap().loc_pattern.region, - expr_region, - }; - - entries.push(entry); - } - - // Sort them by line number to make the report more helpful. - entries.sort_by_key(|entry| entry.symbol_region); - - problems.push(Problem::RuntimeError(RuntimeError::CircularDef( - entries.clone(), - ))); - - declarations.push(Declaration::InvalidCycle(entries)); - } - - // if it's an invalid cycle, other groups may depend on the - // symbols defined here, so also push this cycle onto the groups - // - // if it's not an invalid cycle, this is slightly inefficient, - // because we know this becomes exactly one DeclareRec already - groups.push(cycle); - } - - // now we have a collection of groups whose dependencies are not cyclic. - // They are however not yet topologically sorted. Here we have to get a bit - // creative to get all the definitions in the correct sorted order. - - let mut group_ids = Vec::with_capacity(groups.len()); - let mut symbol_to_group_index = MutMap::default(); - for (i, group) in groups.iter().enumerate() { - for symbol in group { - symbol_to_group_index.insert(*symbol, i); - } - - group_ids.push(i); - } - - let successors_of_group = |group_id: &usize| { - let mut result = MutSet::default(); - - // for each symbol in this group - for symbol in &groups[*group_id] { - // find its successors - for succ in def_ordering.successors_without_self(*symbol) { - // and add its group to the result - match symbol_to_group_index.get(&succ) { - Some(index) => { - result.insert(*index); - } - None => unreachable!("no index for symbol {:?}", succ), - } - } - } - - // don't introduce any cycles to self - result.remove(group_id); - - result - }; - - match ven_graph::topological_sort_into_groups(&group_ids, successors_of_group) { - Ok(sorted_group_ids) => { - for sorted_group in sorted_group_ids.iter().rev() { - for group_id in sorted_group.iter().rev() { - let group = &groups[*group_id]; - - group_to_declaration( - &def_ordering, - group, - &mut defs, - &mut declarations, - ); - } - } - } - Err(_) => unreachable!("there should be no cycles now!"), - } - - for problem in problems { - env.problem(problem); - } - - (Ok(declarations), output) - } - } -} - -fn group_to_declaration( - def_ordering: &DefOrdering, - group: &[u32], - defs: &mut [Option], - declarations: &mut Vec, -) { - use Declaration::*; - - // Patterns like - // - // { x, y } = someDef - // - // Can bind multiple symbols. When not incorrectly recursive (which is guaranteed in this function), - // normally `someDef` would be inserted twice. We use the region of the pattern as a unique key - // for a definition, so every definition is only inserted (thus typechecked and emitted) once - let mut seen_pattern_regions: Vec = Vec::with_capacity(2); - - let sccs = def_ordering.references.strongly_connected_components(group); - - for cycle in sccs { - if cycle.len() == 1 { - let def_id = cycle[0]; - - match defs[def_id as usize].take() { - Some(mut new_def) => { - // there is only one definition in this cycle, so we only have - // to check whether it recurses with itself; there is nobody else - // to recurse with, or they would also be in this cycle. - let is_self_recursive = def_ordering.is_self_recursive(def_id); - - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut new_def.loc_expr.value - { - if is_self_recursive { - *recursive = Recursive::Recursive - } - } - - if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { - seen_pattern_regions.push(new_def.loc_pattern.region); - - if is_self_recursive { - declarations.push(DeclareRec(vec![new_def])); - } else { - declarations.push(Declare(new_def)); - } - } - } + macro_rules! take_def { + ($index:expr) => { + match defs[$index].take() { + Some(def) => def, None => { // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); + let symbol = def_ordering.get_symbol($index); roc_error_macros::internal_error!("def not available {:?}", symbol) } } + }; + } + + let nodes: Vec<_> = (0..defs.len() as u32).collect(); + + // We first perform SCC based on any reference, both variable usage and calls + // considering both value definitions and function bodies. This will spot any + // recursive relations between any 2 definitions. + let sccs = def_ordering + .references + .strongly_connected_components(&nodes); + + let mut declarations = Vec::new(); + + for group in sccs.groups() { + if group.count_ones() == 1 { + // a group with a single Def, nice and simple + let index = group.iter_ones().next().unwrap(); + + let def = take_def!(index); + + let declaration = if def_ordering.direct_references.get_row_col(index, index) { + // a definition like `x = x + 1`, which is invalid in roc + let symbol = def_ordering.get_symbol(index).unwrap(); + + let entries = vec![make_cycle_entry(symbol, &def)]; + + let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); + env.problem(problem); + + Declaration::InvalidCycle(entries) + } else if def_ordering.references.get_row_col(index, index) { + // this function calls itself, and must be typechecked as a recursive def + Declaration::DeclareRec(vec![mark_def_recursive(def)]) + } else { + Declaration::Declare(def) + }; + + declarations.push(declaration); } else { - let mut can_defs = Vec::new(); + // There is something recursive going on between the Defs of this group. + // Now we use the direct_references to see if it is clearly invalid recursion, e.g. + // + // x = y + // y = x + // + // We allow indirect recursion (behind a lambda), e.g. + // + // boom = \{} -> boom {} + // + // In general we cannot spot faulty recursion (halting problem) so this is our best attempt + let nodes: Vec<_> = group.iter_ones().map(|v| v as u32).collect(); + let direct_sccs = def_ordering + .direct_references + .strongly_connected_components(&nodes); - // Topological sort gives us the reverse of the sorting we want! - for def_id in cycle.into_iter().rev() { - match defs[def_id as usize].take() { - Some(mut new_def) => { - // Determine recursivity of closures that are not tail-recursive - if let Closure(ClosureData { - recursive: recursive @ Recursive::NotRecursive, - .. - }) = &mut new_def.loc_expr.value - { - if def_ordering.references.is_recursive(def_id as usize) { - *recursive = Recursive::Recursive - } - } + let declaration = if direct_sccs.groups().count() == 1 { + // all defs are part of the same direct cycle, that is invalid! + let mut entries = Vec::with_capacity(group.count_ones()); - if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { - seen_pattern_regions.push(new_def.loc_pattern.region); + for index in group.iter_ones() { + let def = take_def!(index); + let symbol = def_ordering.get_symbol(index).unwrap(); - can_defs.push(new_def); - } - } - None => { - // NOTE: a `_ = someDef` can mean we don't have a symbol here - let symbol = def_ordering.get_symbol(def_id); - - roc_error_macros::internal_error!("def not available {:?}", symbol) - } + entries.push(make_cycle_entry(symbol, &def)) } - } - declarations.push(DeclareRec(can_defs)); + let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); + env.problem(problem); + + Declaration::InvalidCycle(entries) + } else { + let rec_defs = group + .iter_ones() + .map(|index| mark_def_recursive(take_def!(index))) + .collect(); + + Declaration::DeclareRec(rec_defs) + }; + + declarations.push(declaration); } } + + (Ok(declarations), output) +} + +fn mark_def_recursive(mut def: Def) -> Def { + if let Closure(ClosureData { + recursive: recursive @ Recursive::NotRecursive, + .. + }) = &mut def.loc_expr.value + { + *recursive = Recursive::Recursive + } + + def +} + +fn make_cycle_entry(symbol: Symbol, def: &Def) -> CycleEntry { + CycleEntry { + symbol, + symbol_region: def.loc_pattern.region, + expr_region: def.loc_expr.region, + } } fn pattern_to_vars_by_symbol( @@ -1178,16 +992,11 @@ fn single_can_def( fn add_annotation_aliases( type_annotation: &crate::annotation::Annotation, - aliases: &mut ImMap, + aliases: &mut VecMap, ) { for (name, alias) in type_annotation.aliases.iter() { - match aliases.entry(*name) { - ImEntry::Occupied(_) => { - // do nothing - } - ImEntry::Vacant(vacant) => { - vacant.insert(alias.clone()); - } + if !aliases.contains(name) { + aliases.insert(*name, alias.clone()); } } } @@ -1206,7 +1015,7 @@ enum DefReferences { AnnotationWithoutBody, } -struct TempOutput { +struct DefOutput { output: Output, def: Def, references: DefReferences, @@ -1221,9 +1030,9 @@ fn canonicalize_pending_value_def<'a>( mut output: Output, scope: &mut Scope, var_store: &mut VarStore, - aliases: &mut ImMap, + aliases: &mut VecMap, abilities_in_scope: &[Symbol], -) -> TempOutput { +) -> DefOutput { use PendingValueDef::*; // Make types for the body expr, even if we won't end up having a body. @@ -1324,7 +1133,7 @@ fn canonicalize_pending_value_def<'a>( vars_by_symbol.clone(), ); - TempOutput { + DefOutput { output, references: DefReferences::AnnotationWithoutBody, def, @@ -1437,7 +1246,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Function(closure_references), def, @@ -1456,7 +1265,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Value(refs), def, @@ -1551,7 +1360,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Function(closure_references), def, @@ -1570,7 +1379,7 @@ fn canonicalize_pending_value_def<'a>( output.union(can_output); - TempOutput { + DefOutput { output, references: DefReferences::Value(refs), def, @@ -1902,75 +1711,147 @@ fn to_pending_value_def<'a>( /// Make aliases recursive fn correct_mutual_recursive_type_alias<'a>( env: &mut Env<'a>, - mut original_aliases: SendMap, + original_aliases: VecMap, var_store: &mut VarStore, -) -> ImMap { - let symbols_introduced: Vec = original_aliases.keys().copied().collect(); +) -> VecMap { + let capacity = original_aliases.len(); + let mut matrix = ReferenceMatrix::new(capacity); - let all_successors_with_self = |symbol: &Symbol| -> Vec { - match original_aliases.get(symbol) { - Some(alias) => { - let mut loc_succ = alias.typ.symbols(); - // remove anything that is not defined in the current block - loc_succ.retain(|key| symbols_introduced.contains(key)); + let (symbols_introduced, mut aliases) = original_aliases.unzip(); - loc_succ + for (index, alias) in aliases.iter().enumerate() { + for referenced in alias.typ.symbols() { + match symbols_introduced.iter().position(|k| referenced == *k) { + None => { /* ignore */ } + Some(ref_id) => matrix.set_row_col(index, ref_id, true), } - None => vec![], } - }; + } - // TODO investigate should this be in a loop? - let defined_symbols: Vec = original_aliases.keys().copied().collect(); + let mut solved_aliases = bitvec::vec::BitVec::::repeat(false, capacity); - let cycles = - ven_graph::strongly_connected_components(&defined_symbols, all_successors_with_self); - let mut solved_aliases = ImMap::default(); + let group: Vec<_> = (0u32..capacity as u32).collect(); + let sccs = matrix.strongly_connected_components(&group); - for cycle in cycles { - debug_assert!(!cycle.is_empty()); + // scratchpad to store aliases that are modified in the current iteration. + // Only used when there is are more than one alias in a group. See below why + // this is needed. + let scratchpad_capacity = sccs + .groups() + .map(|r| r.count_ones()) + .max() + .unwrap_or_default(); + let mut scratchpad = Vec::with_capacity(scratchpad_capacity); - let mut pending_aliases: ImMap<_, _> = cycle - .iter() - .map(|&sym| (sym, original_aliases.remove(&sym).unwrap())) - .collect(); + for cycle in sccs.groups() { + debug_assert!(cycle.count_ones() > 0); + + // We need to instantiate the alias with any symbols in the currrent module it + // depends on. + // + // the `strongly_connected_components` returns SCCs in a topologically sorted order: + // SCC_0 has those aliases that don't rely on any other, SCC_1 has only those that rely on SCC_1, etc. + // + // Hence, we only need to worry about symbols in the current SCC or any prior one. + // It cannot be using any of the others, and we've already instantiated aliases coming from other modules. + let mut to_instantiate = solved_aliases | cycle; // Make sure we report only one error for the cycle, not an error for every // alias in the cycle. let mut can_still_report_error = true; - // We need to instantiate the alias with any symbols in the currrent module it - // depends on. - // We only need to worry about symbols in this SCC or any prior one, since the SCCs - // were sorted topologically, and we've already instantiated aliases coming from other - // modules. - // NB: ImMap::clone is O(1): https://docs.rs/im/latest/src/im/hash/map.rs.html#1527-1544 - let mut to_instantiate = solved_aliases.clone().union(pending_aliases.clone()); + for index in cycle.iter_ones() { + // Don't try to instantiate the alias itself in its own definition. + to_instantiate.set(index, false); - for &rec in cycle.iter() { - let alias = pending_aliases.get_mut(&rec).unwrap(); - // Don't try to instantiate the alias itself in its definition. - let original_alias_def = to_instantiate.remove(&rec).unwrap(); + // Within a recursive group, we must instantiate all aliases like how they came to the + // loop. e.g. given + // + // A : [ ConsA B, NilA ] + // B : [ ConsB A, NilB ] + // + // Our goal is + // + // A : [ ConsA [ ConsB A, NilB ], NilA ] + // B : [ ConsB [ ConsA B, NilA ], NilB ] + // + // But if we would first instantiate B into A, then use the updated A to instantiate B, + // we get + // + // A : [ ConsA [ ConsB A, NilB ], NilA ] + // B : [ ConsB [ ConsA [ ConsB A, NilB ], NilA ], NilB ] + // + // Which is incorrect. We do need the instantiated version however. + // e.g. if in a next group we have: + // + // C : A + // + // Then we must use the instantiated version + // + // C : [ ConsA [ ConsB A, NilB ], NilA ] + // + // So, we cannot replace the original version of A with its instantiated version + // while we process A's group. We have to store the instantiated version until the + // current group is done, then move it to the `aliases` array. That is what the scratchpad is for. + let alias = if cycle.count_ones() == 1 { + // an optimization: we can modify the alias in the `aliases` list directly + // because it is the only alias in the group. + &mut aliases[index] + } else { + scratchpad.push((index, aliases[index].clone())); + + &mut scratchpad.last_mut().unwrap().1 + }; + + // Now, `alias` is possibly a mutable borrow from the `aliases` vector. But we also want + // to immutably borrow other elements from that vector to instantiate them into `alias`. + // The borrow checker disallows that. + // + // So we get creative: we swap out the element we want to modify with a dummy. We can + // then freely modify the type we moved out, and the `to_instantiate` mask + // makes sure that our dummy is not used. + + let alias_region = alias.region; + let mut alias_type = Type::EmptyRec; + + std::mem::swap(&mut alias_type, &mut alias.typ); + + let can_instantiate_symbol = |s| match symbols_introduced.iter().position(|i| *i == s) { + Some(s_index) if to_instantiate[s_index] => aliases.get(s_index), + _ => None, + }; let mut new_lambda_sets = ImSet::default(); - alias.typ.instantiate_aliases( - alias.region, - &to_instantiate, + alias_type.instantiate_aliases( + alias_region, + &can_instantiate_symbol, var_store, &mut new_lambda_sets, ); - for lambda_set_var in new_lambda_sets { - alias - .lambda_set_variables - .push(LambdaSet(Type::Variable(lambda_set_var))); - } + let alias = if cycle.count_ones() > 1 { + &mut scratchpad.last_mut().unwrap().1 + } else { + &mut aliases[index] + }; - to_instantiate.insert(rec, original_alias_def); + // swap the type back + std::mem::swap(&mut alias_type, &mut alias.typ); + + // We can instantiate this alias in future iterations + to_instantiate.set(index, true); + + // add any lambda sets that the instantiation created to the current alias + alias.lambda_set_variables.extend( + new_lambda_sets + .iter() + .map(|var| LambdaSet(Type::Variable(*var))), + ); // Now mark the alias recursive, if it needs to be. - let is_self_recursive = alias.typ.contains_symbol(rec); - let is_mutually_recursive = cycle.len() > 1; + let rec = symbols_introduced[index]; + let is_self_recursive = cycle.count_ones() == 1 && matrix.get_row_col(index, index); + let is_mutually_recursive = cycle.count_ones() > 1; if is_self_recursive || is_mutually_recursive { let _made_recursive = make_tag_union_of_alias_recursive( @@ -1984,20 +1865,28 @@ fn correct_mutual_recursive_type_alias<'a>( } } + // the current group has instantiated. Now we can move the updated aliases to the `aliases` vector + for (index, alias) in scratchpad.drain(..) { + aliases[index] = alias; + } + // The cycle we just instantiated and marked recursive may still be an illegal cycle, if // all the types in the cycle are narrow newtypes. We can't figure this out until now, // because we need all the types to be deeply instantiated. - let all_are_narrow = cycle.iter().all(|sym| { - let typ = &pending_aliases.get(sym).unwrap().typ; + let all_are_narrow = cycle.iter_ones().all(|index| { + let typ = &aliases[index].typ; matches!(typ, Type::RecursiveTagUnion(..)) && typ.is_narrow() }); if all_are_narrow { // This cycle is illegal! - let mut rest = cycle; - let alias_name = rest.pop().unwrap(); + let mut indices = cycle.iter_ones(); + let first_index = indices.next().unwrap(); - let alias = pending_aliases.get_mut(&alias_name).unwrap(); + let rest: Vec = indices.map(|i| symbols_introduced[i]).collect(); + + let alias_name = symbols_introduced[first_index]; + let alias = aliases.get_mut(first_index).unwrap(); mark_cyclic_alias( env, @@ -2009,11 +1898,12 @@ fn correct_mutual_recursive_type_alias<'a>( ) } - // Now, promote all resolved aliases in this cycle as solved. - solved_aliases.extend(pending_aliases); + // We've instantiated all we could, so all instantiatable aliases are solved now + solved_aliases = to_instantiate; } - solved_aliases + // Safety: both vectors are equal length and there are no duplicates + unsafe { VecMap::zip(symbols_introduced, aliases) } } fn make_tag_union_of_alias_recursive<'a>( @@ -2022,7 +1912,7 @@ fn make_tag_union_of_alias_recursive<'a>( alias: &mut Alias, others: Vec, var_store: &mut VarStore, - can_report_error: &mut bool, + can_report_cyclic_error: &mut bool, ) -> Result<(), ()> { let alias_args = alias .type_variables @@ -2037,7 +1927,7 @@ fn make_tag_union_of_alias_recursive<'a>( others, &mut alias.typ, var_store, - can_report_error, + can_report_cyclic_error, ); match made_recursive { @@ -2086,22 +1976,25 @@ fn make_tag_union_recursive_help<'a>( others: Vec, typ: &mut Type, var_store: &mut VarStore, - can_report_error: &mut bool, + can_report_cyclic_error: &mut bool, ) -> MakeTagUnionRecursive { use MakeTagUnionRecursive::*; - let Loc { - value: (symbol, args), - region: alias_region, - } = recursive_alias; - let vars = args.iter().map(|(_, t)| t.clone()).collect::>(); + let (symbol, args) = recursive_alias.value; + let alias_region = recursive_alias.region; + match typ { Type::TagUnion(tags, ext) => { let recursion_variable = var_store.fresh(); + let type_arguments = args.iter().map(|(_, t)| t.clone()).collect::>(); + let mut pending_typ = Type::RecursiveTagUnion(recursion_variable, tags.to_vec(), ext.clone()); - let substitution_result = - pending_typ.substitute_alias(symbol, &vars, &Type::Variable(recursion_variable)); + let substitution_result = pending_typ.substitute_alias( + symbol, + &type_arguments, + &Type::Variable(recursion_variable), + ); match substitution_result { Ok(()) => { // We can substitute the alias presence for the variable exactly. @@ -2127,18 +2020,22 @@ fn make_tag_union_recursive_help<'a>( actual, type_arguments, .. - } => make_tag_union_recursive_help( - env, - Loc::at_zero((symbol, type_arguments)), - region, - others, - actual, - var_store, - can_report_error, - ), + } => { + // try to make `actual` recursive + make_tag_union_recursive_help( + env, + Loc::at_zero((symbol, type_arguments)), + region, + others, + actual, + var_store, + can_report_cyclic_error, + ) + } _ => { - mark_cyclic_alias(env, typ, symbol, region, others, *can_report_error); - *can_report_error = false; + // take care to report a cyclic alias only once (not once for each alias in the cycle) + mark_cyclic_alias(env, typ, symbol, region, others, *can_report_cyclic_error); + *can_report_cyclic_error = false; Cyclic } diff --git a/compiler/can/src/reference_matrix.rs b/compiler/can/src/reference_matrix.rs index ab858d08c3..13ba3caea2 100644 --- a/compiler/can/src/reference_matrix.rs +++ b/compiler/can/src/reference_matrix.rs @@ -1,7 +1,8 @@ // see if we get better performance with different integer types -pub(crate) type Element = usize; -pub(crate) type BitVec = bitvec::vec::BitVec; -pub(crate) type BitSlice = bitvec::prelude::BitSlice; +type Order = bitvec::order::Lsb0; +type Element = usize; +type BitVec = bitvec::vec::BitVec; +type BitSlice = bitvec::prelude::BitSlice; /// A square boolean matrix used to store relations /// @@ -36,33 +37,8 @@ impl ReferenceMatrix { } #[inline(always)] - pub fn get(&self, index: usize) -> bool { - self.bitvec[index] - } - - pub fn is_recursive(&self, index: usize) -> bool { - let mut scheduled = self.row_slice(index).to_bitvec(); - let mut visited = self.row_slice(index).to_bitvec(); - - // yes this is a bit inefficient because rows are visited repeatedly. - while scheduled.any() { - for one in scheduled.iter_ones() { - if one == index { - return true; - } - - visited |= self.row_slice(one) - } - - // i.e. visited did not change - if visited.count_ones() == scheduled.count_ones() { - break; - } - - scheduled |= &visited; - } - - false + pub fn get_row_col(&self, row: usize, col: usize) -> bool { + self.bitvec[row * self.length + col] } } @@ -75,6 +51,7 @@ impl ReferenceMatrix { // // Thank you, Samuel! impl ReferenceMatrix { + #[allow(dead_code)] pub fn topological_sort_into_groups(&self) -> TopologicalSort { if self.length == 0 { return TopologicalSort::Groups { groups: Vec::new() }; @@ -153,7 +130,7 @@ impl ReferenceMatrix { } /// Get the strongly-connected components of the set of input nodes. - pub fn strongly_connected_components(&self, nodes: &[u32]) -> Vec> { + pub fn strongly_connected_components(&self, nodes: &[u32]) -> Sccs { let mut params = Params::new(self.length, nodes); 'outer: loop { @@ -172,6 +149,7 @@ impl ReferenceMatrix { } } +#[allow(dead_code)] pub(crate) enum TopologicalSort { /// There were no cycles, all nodes have been partitioned into groups Groups { groups: Vec> }, @@ -197,7 +175,7 @@ struct Params { c: usize, p: Vec, s: Vec, - scc: Vec>, + scc: Sccs, scca: Vec, } @@ -214,7 +192,10 @@ impl Params { c: 0, s: Vec::new(), p: Vec::new(), - scc: Vec::new(), + scc: Sccs { + matrix: ReferenceMatrix::new(length), + components: 0, + }, scca: Vec::new(), } } @@ -255,15 +236,47 @@ fn recurse_onto(length: usize, bitvec: &BitVec, v: usize, params: &mut Params) { if params.p.last() == Some(&(v as u32)) { params.p.pop(); - let mut component = Vec::new(); while let Some(node) = params.s.pop() { - component.push(node); + params + .scc + .matrix + .set_row_col(params.scc.components, node as usize, true); params.scca.push(node); params.preorders[node as usize] = Preorder::Removed; if node as usize == v { break; } } - params.scc.push(component); + + params.scc.components += 1; + } +} + +#[derive(Debug)] +pub(crate) struct Sccs { + components: usize, + matrix: ReferenceMatrix, +} + +impl Sccs { + /// Iterate over the individual components. Each component is represented as a bit vector where + /// a one indicates that the node is part of the group and a zero that it is not. + /// + /// A good way to get the actual nodes is the `.iter_ones()` method. + /// + /// It is guaranteed that a group is non-empty, and that flattening the groups gives a valid + /// topological ordering. + pub fn groups(&self) -> std::iter::Take> { + // work around a panic when requesting a chunk size of 0 + let length = if self.matrix.length == 0 { + // the `.take(self.components)` ensures the resulting iterator will be empty + assert!(self.components == 0); + + 1 + } else { + self.matrix.length + }; + + self.matrix.bitvec.chunks(length).take(self.components) } } diff --git a/compiler/collections/src/vec_map.rs b/compiler/collections/src/vec_map.rs index 57ab5c31b8..60b1608737 100644 --- a/compiler/collections/src/vec_map.rs +++ b/compiler/collections/src/vec_map.rs @@ -76,6 +76,22 @@ impl VecMap { pub fn values(&self) -> impl Iterator { self.values.iter() } + + pub fn keys(&self) -> impl Iterator { + self.keys.iter() + } + + pub fn unzip(self) -> (Vec, Vec) { + (self.keys, self.values) + } + + /// # Safety + /// + /// keys and values must have the same length, and there must not + /// be any duplicates in the keys vector + pub unsafe fn zip(keys: Vec, values: Vec) -> Self { + Self { keys, values } + } } impl Extend<(K, V)> for VecMap { diff --git a/compiler/solve/tests/solve_expr.rs b/compiler/solve/tests/solve_expr.rs index 2411b13df5..af4418a7f1 100644 --- a/compiler/solve/tests/solve_expr.rs +++ b/compiler/solve/tests/solve_expr.rs @@ -5437,6 +5437,24 @@ mod solve_expr { ) } + #[test] + fn issue_2458_swapped_order() { + infer_eq_without_problem( + indoc!( + r#" + Bar a : Foo a + Foo a : [ Blah (Result (Bar a) { val: a }) ] + + v : Bar U8 + v = Blah (Ok (Blah (Err { val: 1 }))) + + v + "# + ), + "Bar U8", + ) + } + // https://github.com/rtfeldman/roc/issues/2379 #[test] fn copy_vars_referencing_copied_vars() { diff --git a/compiler/types/src/types.rs b/compiler/types/src/types.rs index 53b78b5d59..ece3c83a1b 100644 --- a/compiler/types/src/types.rs +++ b/compiler/types/src/types.rs @@ -120,13 +120,15 @@ impl RecordField { } } - pub fn instantiate_aliases( + pub fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, introduced: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { use RecordField::*; match self { @@ -168,13 +170,15 @@ impl LambdaSet { &mut self.0 } - fn instantiate_aliases( + fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, introduced: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { self.0 .instantiate_aliases(region, aliases, var_store, introduced) } @@ -1064,13 +1068,15 @@ impl Type { result } - pub fn instantiate_aliases( + pub fn instantiate_aliases<'a, F>( &mut self, region: Region, - aliases: &ImMap, + aliases: &'a F, var_store: &mut VarStore, new_lambda_set_variables: &mut ImSet, - ) { + ) where + F: Fn(Symbol) -> Option<&'a Alias>, + { use Type::*; match self { @@ -1138,7 +1144,7 @@ impl Type { ); } Apply(symbol, args, _) => { - if let Some(alias) = aliases.get(symbol) { + if let Some(alias) = aliases(*symbol) { // TODO switch to this, but we still need to check for recursion with the // `else` branch if false { diff --git a/reporting/tests/test_reporting.rs b/reporting/tests/test_reporting.rs index 3c3f55ffdf..4a091caa06 100644 --- a/reporting/tests/test_reporting.rs +++ b/reporting/tests/test_reporting.rs @@ -3292,15 +3292,15 @@ mod test_reporting { ── DUPLICATE FIELD NAME ────────────────────────────────── /code/proj/Main.roc ─ This record type defines the `.foo` field twice! - + 1│ a : { foo : Num.I64, bar : {}, foo : Str } ^^^^^^^^^^^^^ ^^^^^^^^^ - + In the rest of the program, I will only use the latter definition: - + 1│ a : { foo : Num.I64, bar : {}, foo : Str } ^^^^^^^^^ - + For clarity, remove the previous `.foo` definitions from this record type. "# @@ -3324,15 +3324,15 @@ mod test_reporting { ── DUPLICATE TAG NAME ──────────────────────────────────── /code/proj/Main.roc ─ This tag union type defines the `Foo` tag twice! - + 1│ a : [ Foo Num.I64, Bar {}, Foo Str ] ^^^^^^^^^^^ ^^^^^^^ - + In the rest of the program, I will only use the latter definition: - + 1│ a : [ Foo Num.I64, Bar {}, Foo Str ] ^^^^^^^ - + For clarity, remove the previous `Foo` definitions from this tag union type. "# @@ -3461,10 +3461,10 @@ mod test_reporting { ── TOO MANY TYPE ARGUMENTS ─────────────────────────────── /code/proj/Main.roc ─ The `Num` alias expects 1 type argument, but it got 2 instead: - + 1│ a : Num.Num Num.I64 Num.F64 ^^^^^^^^^^^^^^^^^^^^^^^ - + Are there missing parentheses? "# ), @@ -3487,10 +3487,10 @@ mod test_reporting { ── TOO MANY TYPE ARGUMENTS ─────────────────────────────── /code/proj/Main.roc ─ The `Num` alias expects 1 type argument, but it got 2 instead: - + 1│ f : Str -> Num.Num Num.I64 Num.F64 ^^^^^^^^^^^^^^^^^^^^^^^ - + Are there missing parentheses? "# ), @@ -7128,20 +7128,20 @@ I need all branches in an `if` to have the same type! indoc!( r#" ── TYPE MISMATCH ───────────────────────────────────────── /code/proj/Main.roc ─ - + Something is off with the body of the `inner` definition: - + 3│ inner : * -> * 4│ inner = \y -> y ^ - + The type annotation on `inner` says this `y` value should have the type: - + * - + However, the type of this `y` value is connected to another type in a way that isn't reflected in this annotation. - + Tip: Any connection between types must use a named type variable, not a `*`! Maybe the annotation on `inner` should have a named type variable in place of the `*`? @@ -8912,21 +8912,21 @@ I need all branches in an `if` to have the same type! ^^^^^^^^^^^ Did you mean one of these? - + Type True Box Ok - + ── UNRECOGNIZED NAME ───────────────────────────────────── /code/proj/Main.roc ─ I cannot find a `UnknownType` value - + 3│ insertHelper : UnknownType, Type -> Type ^^^^^^^^^^^ - + Did you mean one of these? - + Type True insertHelper