use crate::annotation::canonicalize_annotation; use crate::annotation::IntroducedVariables; use crate::env::Env; use crate::expr::Expr::{self, *}; use crate::expr::{ canonicalize_expr, local_successors, references_from_call, references_from_local, Output, Recursive, }; use crate::pattern::{bindings_from_patterns, canonicalize_pattern, Pattern}; use crate::procedure::References; use crate::scope::Scope; use roc_collections::all::{default_hasher, ImMap, ImSet, MutMap, MutSet, SendMap}; use roc_module::ident::Lowercase; use roc_module::symbol::Symbol; use roc_parse::ast; use roc_parse::pattern::PatternType; use roc_problem::can::{Problem, RuntimeError}; use roc_region::all::{Located, Region}; use roc_types::subs::{VarStore, Variable}; use roc_types::types::{Alias, Type}; use std::collections::HashMap; use std::fmt::Debug; use ven_graph::{strongly_connected_components, topological_sort_into_groups}; #[derive(Clone, Debug, PartialEq)] pub struct Def { pub loc_pattern: Located, pub loc_expr: Located, pub expr_var: Variable, pub pattern_vars: SendMap, pub annotation: Option, } #[derive(Clone, Debug, PartialEq)] pub struct Annotation { pub signature: Type, pub introduced_variables: IntroducedVariables, pub aliases: SendMap, pub region: Region, } #[derive(Debug)] pub struct CanDefs { pub refs_by_symbol: MutMap, pub can_defs_by_symbol: MutMap, pub aliases: SendMap, } /// A Def that has had patterns and type annnotations canonicalized, /// but no Expr canonicalization has happened yet. Also, it has had spaces /// and nesting resolved, and knows whether annotations are standalone or not. #[derive(Debug, Clone, PartialEq)] enum PendingDef<'a> { /// A standalone annotation with no body AnnotationOnly( &'a Located>, Located, &'a Located>, ), /// A body with no type annotation Body( &'a Located>, Located, &'a Located>, ), /// A body with a type annotation TypedBody( &'a Located>, Located, &'a Located>, &'a Located>, ), /// A type alias, e.g. `Ints : List Int` Alias { name: Located, vars: Vec>, ann: &'a Located>, }, /// An invalid alias, that is ignored in the rest of the pipeline /// e.g. a shadowed alias, or a definition like `MyAlias 1 : Int` /// with an incorrect pattern InvalidAlias, } #[derive(Clone, Debug, PartialEq)] #[allow(clippy::large_enum_variant)] pub enum Declaration { Declare(Def), DeclareRec(Vec), Builtin(Def), InvalidCycle(Vec, Vec<(Region /* pattern */, Region /* expr */)>), } impl Declaration { pub fn def_count(&self) -> usize { use Declaration::*; match self { Declare(_) => 1, DeclareRec(defs) => defs.len(), InvalidCycle(_, _) => 0, Builtin(_) => 0, } } } #[inline(always)] pub fn canonicalize_defs<'a>( env: &mut Env<'a>, mut output: Output, var_store: &mut VarStore, original_scope: &Scope, loc_defs: &'a [&'a Located>], pattern_type: PatternType, ) -> (CanDefs, Scope, Output, MutMap) { // Canonicalizing defs while detecting shadowing involves a multi-step process: // // 1. Go through each of the patterns. // 2. For each identifier pattern, get the scope.symbol() for the ident. (That symbol will use the home module for its module.) // 3. If that symbol is already in scope, then we're about to shadow it. Error! // 4. Otherwise, add it to the scope immediately, so we can detect shadowing within the same // pattern (e.g. (Foo a a) = ...) // 5. Add this canonicalized pattern and its corresponding ast::Expr to pending_exprs. // 5. Once every pattern has been processed and added to scope, go back and canonicalize the exprs from // pending_exprs, this time building up a canonical def for each one. // // This way, whenever any expr is doing lookups, it knows everything that's in scope - // even defs that appear after it in the source. // // This naturally handles recursion too, because a given expr which refers // to itself won't be processed until after its def has been added to scope. use roc_parse::ast::Def::*; // Record both the original and final idents from the scope, // so we can diff them while detecting unused defs. let mut scope = original_scope.clone(); let num_defs = loc_defs.len(); let mut refs_by_symbol = MutMap::default(); let mut can_defs_by_symbol = HashMap::with_capacity_and_hasher(num_defs, default_hasher()); let mut pending = Vec::with_capacity(num_defs); // TODO bump allocate this! let mut iter = loc_defs.iter().peekable(); // Canonicalize all the patterns, record shadowing problems, and store // the ast::Expr values in pending_exprs for further canonicalization // once we've finished assembling the entire scope. while let Some(loc_def) = iter.next() { // Any time we have an Annotation followed immediately by a Body, // check to see if their patterns are equivalent. If they are, // turn it into a TypedBody. Otherwise, give an error. let (new_output, pending_def) = match &loc_def.value { Annotation(pattern, annotation) | Nested(Annotation(pattern, annotation)) => { match iter.peek() { Some(Located { value: Body(body_pattern, body_expr), region: body_region, }) => { if pattern.value.equivalent(&body_pattern.value) { iter.next(); pending_typed_body( env, body_pattern, annotation, body_expr, var_store, &mut scope, pattern_type, ) } else if loc_def.region.lines_between(body_region) > 1 { // there is a line of whitespace between the annotation and the body // treat annotation and body separately to_pending_def(env, var_store, &loc_def.value, &mut scope, pattern_type) } else { // the pattern of the annotation does not match the pattern of the body directly below it env.problems.push(Problem::SignatureDefMismatch { annotation_pattern: pattern.region, def_pattern: body_pattern.region, }); // both the annotation and definition are skipped! iter.next(); continue; } } _ => to_pending_def(env, var_store, &loc_def.value, &mut scope, pattern_type), } } _ => to_pending_def(env, var_store, &loc_def.value, &mut scope, pattern_type), }; output.union(new_output); // store the top-level defs, used to ensure that closures won't capture them if let PatternType::TopLevelDef = pattern_type { match &pending_def { PendingDef::AnnotationOnly(_, loc_can_pattern, _) | PendingDef::Body(_, loc_can_pattern, _) | PendingDef::TypedBody(_, loc_can_pattern, _, _) => env.top_level_symbols.extend( bindings_from_patterns(std::iter::once(loc_can_pattern)) .iter() .map(|t| t.0), ), PendingDef::Alias { .. } | PendingDef::InvalidAlias => {} } } // Record the ast::Expr for later. We'll do another pass through these // once we have the entire scope assembled. If we were to canonicalize // the exprs right now, they wouldn't have symbols in scope from defs // that get would have gotten added later in the defs list! pending.push(pending_def); } if cfg!(debug_assertions) { env.home.register_debug_idents(&env.ident_ids); } let mut aliases = SendMap::default(); let mut value_defs = Vec::new(); for pending_def in pending.into_iter() { match pending_def { PendingDef::Alias { name, vars, ann } => { let symbol = name.value; let mut can_ann = canonicalize_annotation(env, &mut scope, &ann.value, ann.region, var_store); // all referenced symbols in an alias must be symbols output .references .referenced_aliases .extend(can_ann.aliases.keys().copied()); // if an alias definition uses an alias, the used alias is referenced output .references .lookups .extend(can_ann.aliases.keys().copied()); let mut can_vars: Vec> = Vec::with_capacity(vars.len()); let mut is_phantom = false; for loc_lowercase in vars { if let Some(var) = can_ann .introduced_variables .var_by_name(&loc_lowercase.value) { // This is a valid lowercase rigid var for the alias. can_vars.push(Located { value: (loc_lowercase.value.clone(), *var), region: loc_lowercase.region, }); } else { is_phantom = true; env.problems.push(Problem::PhantomTypeArgument { alias: symbol, variable_region: loc_lowercase.region, variable_name: loc_lowercase.value.clone(), }); } } if is_phantom { // Bail out continue; } if can_ann.typ.contains_symbol(symbol) { make_tag_union_recursive( env, symbol, name.region, vec![], &mut can_ann.typ, var_store, &mut false, ); } scope.add_alias(symbol, ann.region, can_vars.clone(), can_ann.typ.clone()); let alias = scope.lookup_alias(symbol).expect("alias is added to scope"); aliases.insert(symbol, alias.clone()); } other => value_defs.push(other), } } correct_mutual_recursive_type_alias(env, &mut aliases, var_store); // Now that we have the scope completely assembled, and shadowing resolved, // we're ready to canonicalize any body exprs. for pending_def in value_defs.into_iter() { output = canonicalize_pending_def( env, pending_def, output, &mut scope, &mut can_defs_by_symbol, var_store, &mut refs_by_symbol, &mut aliases, ); // TODO we should do something with these references; they include // things like type annotations. } // Determine which idents we introduced in the course of this process. let mut symbols_introduced = MutMap::default(); for (symbol, region) in scope.symbols() { if !original_scope.contains_symbol(*symbol) { symbols_introduced.insert(*symbol, *region); } } // This returns both the defs info as well as the new scope. // // We have to return the new scope because we added defs to it // (and those lookups shouldn't fail later, e.g. when canonicalizing // the return expr), but we didn't want to mutate the original scope // directly because we wanted to keep a clone of it around to diff // when looking for unused idents. // // We have to return the scope separately from the defs, because the // defs need to get moved later. ( CanDefs { refs_by_symbol, can_defs_by_symbol, aliases, }, scope, output, symbols_introduced, ) } #[inline(always)] pub fn sort_can_defs( env: &mut Env<'_>, defs: CanDefs, mut output: Output, ) -> (Result, RuntimeError>, Output) { let CanDefs { refs_by_symbol, can_defs_by_symbol, aliases, } = defs; for (symbol, alias) in aliases.into_iter() { output.aliases.insert(symbol, alias); } // Determine the full set of references by traversing the graph. let mut visited_symbols = MutSet::default(); let returned_lookups = ImSet::clone(&output.references.lookups); // Start with the return expression's referenced locals. They're the only ones that count! // // If I have two defs which reference each other, but neither of them is referenced // in the return expression, I don't want either of them (or their references) to end up // in the final output.references. They were unused, and so were their references! // // The reason we need a graph here is so we don't overlook transitive dependencies. // For example, if I have `a = b + 1` and the def returns `a + 1`, then the // def as a whole references both `a` *and* `b`, even though it doesn't // directly mention `b` - because `a` depends on `b`. If we didn't traverse a graph here, // we'd erroneously give a warning that `b` was unused since it wasn't directly referenced. for symbol in returned_lookups.into_iter() { // We only care about local symbols in this analysis. if symbol.module_id() == env.home { // Traverse the graph and look up *all* the references for this local symbol. let refs = references_from_local(symbol, &mut visited_symbols, &refs_by_symbol, &env.closures); output.references = output.references.union(refs); } } for symbol in ImSet::clone(&output.references.calls).into_iter() { // Traverse the graph and look up *all* the references for this call. // Reuse the same visited_symbols as before; if we already visited it, // we won't learn anything new from visiting it again! let refs = references_from_call(symbol, &mut visited_symbols, &refs_by_symbol, &env.closures); output.references = output.references.union(refs); } let mut defined_symbols: Vec = Vec::new(); let mut defined_symbols_set: ImSet = ImSet::default(); for symbol in can_defs_by_symbol.keys().into_iter() { defined_symbols.push(*symbol); defined_symbols_set.insert(*symbol); } // Use topological sort to reorder the defs based on their dependencies to one another. // This way, during code gen, no def will refer to a value that hasn't been initialized yet. // As a bonus, the topological sort also reveals any cycles between the defs, allowing // us to give a CircularAssignment error for invalid (mutual) recursion, and a `DeclareRec` for mutually // recursive definitions. // All successors that occur in the body of a symbol. let all_successors_without_self = |symbol: &Symbol| -> ImSet { // This may not be in refs_by_symbol. For example, the `f` in `f x` here: // // f = \z -> z // // (\x -> // a = f x // x // ) // // It's not part of the current defs (the one with `a = f x`); rather, // it's in the enclosing scope. It's still referenced though, so successors // will receive it as an argument! match refs_by_symbol.get(symbol) { Some((_, references)) => { // We can only sort the symbols at the current level. That is safe because // symbols defined at higher levels cannot refer to symbols at lower levels. // Therefore they can never form a cycle! // // In the above example, `f` cannot reference `a`, and in the closure // a call to `f` cannot cycle back to `a`. let mut loc_succ = local_successors(&references, &env.closures); // if the current symbol is a closure, peek into its body if let Some(References { lookups, .. }) = env.closures.get(symbol) { let home = env.home; for lookup in lookups { if lookup != symbol && lookup.module_id() == home { // DO NOT register a self-call behind a lambda! // // We allow `boom = \_ -> boom {}`, but not `x = x` loc_succ.insert(*lookup); } } } // remove anything that is not defined in the current block loc_succ.retain(|key| defined_symbols_set.contains(key)); loc_succ } None => ImSet::default(), } }; // All successors that occur in the body of a symbol, including the symbol itself // This is required to determine whether a symbol is recursive. Recursive symbols // (that are not faulty) always need a DeclareRec, even if there is just one symbol in the // group let mut all_successors_with_self = |symbol: &Symbol| -> ImSet { // This may not be in refs_by_symbol. For example, the `f` in `f x` here: // // f = \z -> z // // (\x -> // a = f x // x // ) // // It's not part of the current defs (the one with `a = f x`); rather, // it's in the enclosing scope. It's still referenced though, so successors // will receive it as an argument! match refs_by_symbol.get(symbol) { Some((_, references)) => { // We can only sort the symbols at the current level. That is safe because // symbols defined at higher levels cannot refer to symbols at lower levels. // Therefore they can never form a cycle! // // In the above example, `f` cannot reference `a`, and in the closure // a call to `f` cannot cycle back to `a`. let mut loc_succ = local_successors(&references, &env.closures); // if the current symbol is a closure, peek into its body if let Some(References { lookups, .. }) = env.closures.get(symbol) { for lookup in lookups { loc_succ.insert(*lookup); } } // remove anything that is not defined in the current block loc_succ.retain(|key| defined_symbols_set.contains(key)); loc_succ } None => ImSet::default(), } }; // If a symbol is a direct successor of itself, there is an invalid cycle. // The difference with the function above is that this one does not look behind lambdas, // but does consider direct self-recursion. let direct_successors = |symbol: &Symbol| -> ImSet { match refs_by_symbol.get(symbol) { Some((_, references)) => { let mut loc_succ = local_successors(&references, &env.closures); // NOTE: if the symbol is a closure we DONT look into its body // remove anything that is not defined in the current block loc_succ.retain(|key| defined_symbols_set.contains(key)); // NOTE: direct recursion does matter here: `x = x` is invalid recursion! loc_succ } None => ImSet::default(), } }; // TODO also do the same `addDirects` check elm/compiler does, so we can // report an error if a recursive definition can't possibly terminate! match topological_sort_into_groups(defined_symbols.as_slice(), all_successors_without_self) { Ok(groups) => { let mut declarations = Vec::new(); // groups are in reversed order for group in groups.into_iter().rev() { group_to_declaration( group, &env.closures, &mut all_successors_with_self, &can_defs_by_symbol, &mut declarations, ); } (Ok(declarations), output) } Err((groups, nodes_in_cycle)) => { let mut declarations = Vec::new(); let mut problems = Vec::new(); // groups are in reversed order for group in groups.into_iter().rev() { group_to_declaration( group, &env.closures, &mut all_successors_with_self, &can_defs_by_symbol, &mut declarations, ); } // nodes_in_cycle are symbols that form a syntactic cycle. That isn't always a problem, // and in general it's impossible to decide whether it is. So we use a crude heuristic: // // Definitions where the cycle occurs behind a lambda are OK // // boom = \_ -> boom {} // // But otherwise we report an error, e.g. // // foo = if b then foo else bar for cycle in strongly_connected_components(&nodes_in_cycle, all_successors_without_self) { // check whether the cycle is faulty, which is when it has // a direct successor in the current cycle. This catches things like: // // x = x // // or // // p = q // q = p let is_invalid_cycle = match cycle.get(0) { Some(symbol) => { let mut succs = direct_successors(symbol); succs.retain(|key| cycle.contains(key)); !succs.is_empty() } None => false, }; if is_invalid_cycle { // We want to show the entire cycle in the error message, so expand it out. let mut loc_symbols = Vec::new(); for symbol in cycle { match refs_by_symbol.get(&symbol) { None => unreachable!( r#"Symbol `{:?}` not found in refs_by_symbol! refs_by_symbol was: {:?}"#, symbol, refs_by_symbol ), Some((region, _)) => { loc_symbols.push(Located::at(*region, symbol)); } } } let mut regions = Vec::with_capacity(can_defs_by_symbol.len()); for def in can_defs_by_symbol.values() { regions.push((def.loc_pattern.region, def.loc_expr.region)); } // Sort them by line number to make the report more helpful. loc_symbols.sort(); regions.sort(); let symbols_in_cycle: Vec = loc_symbols.into_iter().map(|s| s.value).collect(); problems.push(Problem::RuntimeError(RuntimeError::CircularDef( symbols_in_cycle.clone(), regions.clone(), ))); declarations.push(Declaration::InvalidCycle(symbols_in_cycle, regions)); } else { // slightly inefficient, because we know this becomes exactly one DeclareRec already group_to_declaration( cycle, &env.closures, &mut all_successors_with_self, &can_defs_by_symbol, &mut declarations, ); } } for problem in problems { env.problem(problem); } (Ok(declarations), output) } } } fn group_to_declaration( group: Vec, closures: &MutMap, successors: &mut dyn FnMut(&Symbol) -> ImSet, can_defs_by_symbol: &MutMap, declarations: &mut Vec, ) { use Declaration::*; // We want only successors in the current group, otherwise definitions get duplicated let filtered_successors = |symbol: &Symbol| -> ImSet { let mut result = successors(symbol); result.retain(|key| group.contains(key)); result }; // Patterns like // // { x, y } = someDef // // Can bind multiple symbols. When not incorrectly recursive (which is guaranteed in this function), // normally `someDef` would be inserted twice. We use the region of the pattern as a unique key // for a definition, so every definition is only inserted (thus typechecked and emitted) once let mut seen_pattern_regions: ImSet = ImSet::default(); for cycle in strongly_connected_components(&group, filtered_successors) { if cycle.len() == 1 { let symbol = &cycle[0]; if let Some(can_def) = can_defs_by_symbol.get(&symbol) { let mut new_def = can_def.clone(); // Determine recursivity of closures that are not tail-recursive if let Closure { recursive: recursive @ Recursive::NotRecursive, .. } = &mut new_def.loc_expr.value { *recursive = closure_recursivity(*symbol, closures); } let is_recursive = successors(&symbol).contains(&symbol); if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { if is_recursive { declarations.push(DeclareRec(vec![new_def.clone()])); } else { declarations.push(Declare(new_def.clone())); } seen_pattern_regions.insert(new_def.loc_pattern.region); } } } else { let mut can_defs = Vec::new(); // Topological sort gives us the reverse of the sorting we want! for symbol in cycle.into_iter().rev() { if let Some(can_def) = can_defs_by_symbol.get(&symbol) { let mut new_def = can_def.clone(); // Determine recursivity of closures that are not tail-recursive if let Closure { recursive: recursive @ Recursive::NotRecursive, .. } = &mut new_def.loc_expr.value { *recursive = closure_recursivity(symbol, closures); } if !seen_pattern_regions.contains(&new_def.loc_pattern.region) { can_defs.push(new_def.clone()); } seen_pattern_regions.insert(new_def.loc_pattern.region); } } declarations.push(DeclareRec(can_defs)); } } } fn pattern_to_vars_by_symbol( vars_by_symbol: &mut SendMap, pattern: &Pattern, expr_var: Variable, ) { use Pattern::*; match pattern { Identifier(symbol) => { vars_by_symbol.insert(*symbol, expr_var); } AppliedTag { arguments, .. } => { for (var, nested) in arguments { pattern_to_vars_by_symbol(vars_by_symbol, &nested.value, *var); } } RecordDestructure { destructs, .. } => { for destruct in destructs { vars_by_symbol.insert(destruct.value.symbol, destruct.value.var); } } NumLiteral(_, _) | IntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | Underscore | MalformedPattern(_, _) | UnsupportedPattern(_) => {} Shadowed(_, _) => {} } } // TODO trim down these arguments! #[allow(clippy::too_many_arguments)] #[allow(clippy::cognitive_complexity)] fn canonicalize_pending_def<'a>( env: &mut Env<'a>, pending_def: PendingDef<'a>, mut output: Output, scope: &mut Scope, can_defs_by_symbol: &mut MutMap, var_store: &mut VarStore, refs_by_symbol: &mut MutMap, aliases: &mut SendMap, ) -> Output { use PendingDef::*; // Make types for the body expr, even if we won't end up having a body. let expr_var = var_store.fresh(); let mut vars_by_symbol = SendMap::default(); match pending_def { AnnotationOnly(_, loc_can_pattern, loc_ann) => { // annotation sans body cannot introduce new rigids that are visible in other annotations // but the rigids can show up in type error messages, so still register them let ann = canonicalize_annotation(env, scope, &loc_ann.value, loc_ann.region, var_store); // Record all the annotation's references in output.references.lookups for symbol in ann.references { output.references.lookups.insert(symbol); output.references.referenced_aliases.insert(symbol); } aliases.extend(ann.aliases.iter().cloned()); output.introduced_variables.union(&ann.introduced_variables); pattern_to_vars_by_symbol(&mut vars_by_symbol, &loc_can_pattern.value, expr_var); let typ = ann.typ; let arity = typ.arity(); // Fabricate a body for this annotation, that will error at runtime let value = Expr::RuntimeError(RuntimeError::NoImplementation); let is_closure = arity > 0; let loc_can_expr = if !is_closure { Located { value, region: loc_ann.region, } } else { let symbol = env.gen_unique_symbol(); // generate a fake pattern for each argument. this makes signatures // that are functions only crash when they are applied. let mut underscores = Vec::with_capacity(arity); for _ in 0..arity { let underscore: Located = Located { value: Pattern::Underscore, region: Region::zero(), }; underscores.push((var_store.fresh(), underscore)); } let body_expr = Located { value, region: loc_ann.region, }; Located { value: Closure { function_type: var_store.fresh(), closure_type: var_store.fresh(), closure_ext_var: var_store.fresh(), return_type: var_store.fresh(), name: symbol, captured_symbols: Vec::new(), recursive: Recursive::NotRecursive, arguments: underscores, loc_body: Box::new(body_expr), }, region: loc_ann.region, } }; for (_, (symbol, _)) in scope.idents() { if !vars_by_symbol.contains_key(&symbol) { continue; } // We could potentially avoid some clones here by using Rc strategically, // but the total amount of cloning going on here should typically be minimal. can_defs_by_symbol.insert( *symbol, Def { expr_var, // TODO try to remove this .clone()! loc_pattern: loc_can_pattern.clone(), loc_expr: Located { region: loc_can_expr.region, // TODO try to remove this .clone()! value: loc_can_expr.value.clone(), }, pattern_vars: im::HashMap::clone(&vars_by_symbol), annotation: Some(Annotation { signature: typ.clone(), introduced_variables: output.introduced_variables.clone(), aliases: ann.aliases.clone(), region: loc_ann.region, }), }, ); } } Alias { name, ann, vars } => { let symbol = name.value; let can_ann = canonicalize_annotation(env, scope, &ann.value, ann.region, var_store); // Record all the annotation's references in output.references.lookups for symbol in can_ann.references { output.references.lookups.insert(symbol); output.references.referenced_aliases.insert(symbol); } let mut can_vars: Vec> = Vec::with_capacity(vars.len()); for loc_lowercase in vars { if let Some(var) = can_ann .introduced_variables .var_by_name(&loc_lowercase.value) { // This is a valid lowercase rigid var for the alias. can_vars.push(Located { value: (loc_lowercase.value.clone(), *var), region: loc_lowercase.region, }); } else { env.problems.push(Problem::PhantomTypeArgument { alias: symbol, variable_region: loc_lowercase.region, variable_name: loc_lowercase.value.clone(), }); } } scope.add_alias(symbol, name.region, can_vars.clone(), can_ann.typ.clone()); if can_ann.typ.contains_symbol(symbol) { // the alias is recursive. If it's a tag union, we attempt to fix this if let Type::TagUnion(tags, ext) = can_ann.typ { // re-canonicalize the alias with the alias already in scope let rec_var = var_store.fresh(); let mut rec_type_union = Type::RecursiveTagUnion(rec_var, tags, ext); rec_type_union.substitute_alias(symbol, &Type::Variable(rec_var)); scope.add_alias(symbol, name.region, can_vars, rec_type_union); } else { env.problems .push(Problem::CyclicAlias(symbol, name.region, vec![])); return output; } } let alias = scope.lookup_alias(symbol).expect("alias was not added"); aliases.insert(symbol, alias.clone()); output .introduced_variables .union(&can_ann.introduced_variables); } InvalidAlias => { // invalid aliases (shadowed, incorrect patterns) get ignored } TypedBody(loc_pattern, loc_can_pattern, loc_ann, loc_expr) => { let ann = canonicalize_annotation(env, scope, &loc_ann.value, loc_ann.region, var_store); // Record all the annotation's references in output.references.lookups for symbol in ann.references { output.references.lookups.insert(symbol); output.references.referenced_aliases.insert(symbol); } let typ = ann.typ; for (symbol, alias) in ann.aliases.clone() { aliases.insert(symbol, alias); } output.introduced_variables.union(&ann.introduced_variables); // bookkeeping for tail-call detection. If we're assigning to an // identifier (e.g. `f = \x -> ...`), then this symbol can be tail-called. let outer_identifier = env.tailcallable_symbol; if let Pattern::Identifier(ref defined_symbol) = &loc_can_pattern.value { env.tailcallable_symbol = Some(*defined_symbol); }; // regiser the name of this closure, to make sure the closure won't capture it's own name if let (Pattern::Identifier(ref defined_symbol), &ast::Expr::Closure(_, _)) = (&loc_can_pattern.value, &loc_expr.value) { env.closure_name_symbol = Some(*defined_symbol); }; pattern_to_vars_by_symbol(&mut vars_by_symbol, &loc_can_pattern.value, expr_var); let (mut loc_can_expr, can_output) = canonicalize_expr(env, var_store, scope, loc_expr.region, &loc_expr.value); output.references = output.references.union(can_output.references.clone()); // reset the tailcallable_symbol env.tailcallable_symbol = outer_identifier; // see below: a closure needs a fresh References! let mut is_closure = false; // First, make sure we are actually assigning an identifier instead of (for example) a tag. // // If we're assigning (UserId userId) = ... then this is certainly not a closure declaration, // which also implies it's not a self tail call! // // Only defs of the form (foo = ...) can be closure declarations or self tail calls. if let ( &ast::Pattern::Identifier(ref _name), &Pattern::Identifier(ref defined_symbol), &Closure { function_type, closure_type, closure_ext_var, return_type, name: ref symbol, ref arguments, loc_body: ref body, ref captured_symbols, .. }, ) = ( &loc_pattern.value, &loc_can_pattern.value, &loc_can_expr.value, ) { is_closure = true; // Since everywhere in the code it'll be referred to by its defined name, // remove its generated name from the closure map. (We'll re-insert it later.) let references = env.closures.remove(&symbol).unwrap_or_else(|| { panic!( "Tried to remove symbol {:?} from procedures, but it was not found: {:?}", symbol, env.closures ) }); // Re-insert the closure into the map, under its defined name. // closures don't have a name, and therefore pick a fresh symbol. But in this // case, the closure has a proper name (e.g. `foo` in `foo = \x y -> ...` // and we want to reference it by that name. env.closures.insert(*defined_symbol, references); // The closure is self tail recursive iff it tail calls itself (by defined name). let is_recursive = match can_output.tail_call { Some(ref symbol) if symbol == defined_symbol => Recursive::TailRecursive, _ => Recursive::NotRecursive, }; // Recursion doesn't count as referencing. (If it did, all recursive functions // would result in circular def errors!) refs_by_symbol .entry(*defined_symbol) .and_modify(|(_, refs)| { refs.lookups = refs.lookups.without(defined_symbol); }); // renamed_closure_def = Some(&defined_symbol); loc_can_expr.value = Closure { function_type, closure_type, closure_ext_var, return_type, name: *defined_symbol, captured_symbols: captured_symbols.clone(), recursive: is_recursive, arguments: arguments.clone(), loc_body: body.clone(), }; } // Store the referenced locals in the refs_by_symbol map, so we can later figure out // which defined names reference each other. for (_, (symbol, region)) in scope.idents() { if !vars_by_symbol.contains_key(&symbol) { continue; } let refs = // Functions' references don't count in defs. // See 3d5a2560057d7f25813112dfa5309956c0f9e6a9 and its // parent commit for the bug this fixed! if is_closure { References::new() } else { can_output.references.clone() }; refs_by_symbol.insert(*symbol, (*region, refs)); can_defs_by_symbol.insert( *symbol, Def { expr_var, // TODO try to remove this .clone()! loc_pattern: loc_can_pattern.clone(), loc_expr: Located { region: loc_can_expr.region, // TODO try to remove this .clone()! value: loc_can_expr.value.clone(), }, pattern_vars: im::HashMap::clone(&vars_by_symbol), annotation: Some(Annotation { signature: typ.clone(), introduced_variables: output.introduced_variables.clone(), aliases: ann.aliases.clone(), region: loc_ann.region, }), }, ); } } // If we have a pattern, then the def has a body (that is, it's not a // standalone annotation), so we need to canonicalize the pattern and expr. Body(loc_pattern, loc_can_pattern, loc_expr) => { // bookkeeping for tail-call detection. If we're assigning to an // identifier (e.g. `f = \x -> ...`), then this symbol can be tail-called. let outer_identifier = env.tailcallable_symbol; if let ( &ast::Pattern::Identifier(ref _name), &Pattern::Identifier(ref defined_symbol), ) = (&loc_pattern.value, &loc_can_pattern.value) { env.tailcallable_symbol = Some(*defined_symbol); // TODO isn't types_by_symbol enough? Do we need vars_by_symbol too? vars_by_symbol.insert(*defined_symbol, expr_var); }; // regiser the name of this closure, to make sure the closure won't capture it's own name if let (Pattern::Identifier(ref defined_symbol), &ast::Expr::Closure(_, _)) = (&loc_can_pattern.value, &loc_expr.value) { env.closure_name_symbol = Some(*defined_symbol); }; let (mut loc_can_expr, can_output) = canonicalize_expr(env, var_store, scope, loc_expr.region, &loc_expr.value); // reset the tailcallable_symbol env.tailcallable_symbol = outer_identifier; // see below: a closure needs a fresh References! let mut is_closure = false; // First, make sure we are actually assigning an identifier instead of (for example) a tag. // // If we're assigning (UserId userId) = ... then this is certainly not a closure declaration, // which also implies it's not a self tail call! // // Only defs of the form (foo = ...) can be closure declarations or self tail calls. if let ( &ast::Pattern::Identifier(ref _name), &Pattern::Identifier(ref defined_symbol), &Closure { function_type, closure_type, closure_ext_var, return_type, name: ref symbol, ref arguments, loc_body: ref body, ref captured_symbols, .. }, ) = ( &loc_pattern.value, &loc_can_pattern.value, &loc_can_expr.value, ) { is_closure = true; // Since everywhere in the code it'll be referred to by its defined name, // remove its generated name from the closure map. (We'll re-insert it later.) let references = env.closures.remove(&symbol).unwrap_or_else(|| { panic!( "Tried to remove symbol {:?} from procedures, but it was not found: {:?}", symbol, env.closures ) }); // Re-insert the closure into the map, under its defined name. // closures don't have a name, and therefore pick a fresh symbol. But in this // case, the closure has a proper name (e.g. `foo` in `foo = \x y -> ...` // and we want to reference it by that name. env.closures.insert(*defined_symbol, references); // The closure is self tail recursive iff it tail calls itself (by defined name). let is_recursive = match can_output.tail_call { Some(ref symbol) if symbol == defined_symbol => Recursive::TailRecursive, _ => Recursive::NotRecursive, }; // Recursion doesn't count as referencing. (If it did, all recursive functions // would result in circular def errors!) refs_by_symbol .entry(*defined_symbol) .and_modify(|(_, refs)| { refs.lookups = refs.lookups.without(defined_symbol); }); loc_can_expr.value = Closure { function_type, closure_type, closure_ext_var, return_type, name: *defined_symbol, captured_symbols: captured_symbols.clone(), recursive: is_recursive, arguments: arguments.clone(), loc_body: body.clone(), }; } // Store the referenced locals in the refs_by_symbol map, so we can later figure out // which defined names reference each other. for (symbol, region) in bindings_from_patterns(std::iter::once(&loc_can_pattern)) { let refs = // Functions' references don't count in defs. // See 3d5a2560057d7f25813112dfa5309956c0f9e6a9 and its // parent commit for the bug this fixed! if is_closure { References::new() } else { can_output.references.clone() }; refs_by_symbol.insert(symbol, (region, refs)); can_defs_by_symbol.insert( symbol, Def { expr_var, // TODO try to remove this .clone()! loc_pattern: loc_can_pattern.clone(), loc_expr: Located { // TODO try to remove this .clone()! region: loc_can_expr.region, value: loc_can_expr.value.clone(), }, pattern_vars: im::HashMap::clone(&vars_by_symbol), annotation: None, }, ); } output.union(can_output); } }; output } #[inline(always)] pub fn can_defs_with_return<'a>( env: &mut Env<'a>, var_store: &mut VarStore, scope: Scope, loc_defs: &'a [&'a Located>], loc_ret: &'a Located>, ) -> (Expr, Output) { let (unsorted, mut scope, defs_output, symbols_introduced) = canonicalize_defs( env, Output::default(), var_store, &scope, loc_defs, PatternType::DefExpr, ); // The def as a whole is a tail call iff its return expression is a tail call. // Use its output as a starting point because its tail_call already has the right answer! let (ret_expr, mut output) = canonicalize_expr(env, var_store, &mut scope, loc_ret.region, &loc_ret.value); output .introduced_variables .union(&defs_output.introduced_variables); output.references = output.references.union(defs_output.references); // Now that we've collected all the references, check to see if any of the new idents // we defined went unused by the return expression. If any were unused, report it. for (symbol, region) in symbols_introduced { if !output.references.has_lookup(symbol) { env.problem(Problem::UnusedDef(symbol, region)); } } let (can_defs, output) = sort_can_defs(env, unsorted, output); match can_defs { Ok(decls) => { let mut loc_expr: Located = ret_expr; for declaration in decls.into_iter().rev() { loc_expr = Located { region: Region::zero(), value: decl_to_let(var_store, declaration, loc_expr), }; } (loc_expr.value, output) } Err(err) => (RuntimeError(err), output), } } fn decl_to_let(var_store: &mut VarStore, decl: Declaration, loc_ret: Located) -> Expr { match decl { Declaration::Declare(def) => { Expr::LetNonRec(Box::new(def), Box::new(loc_ret), var_store.fresh()) } Declaration::DeclareRec(defs) => Expr::LetRec(defs, Box::new(loc_ret), var_store.fresh()), Declaration::InvalidCycle(symbols, regions) => { Expr::RuntimeError(RuntimeError::CircularDef(symbols, regions)) } Declaration::Builtin(_) => { // Builtins should only be added to top-level decls, not to let-exprs! unreachable!() } } } fn closure_recursivity(symbol: Symbol, closures: &MutMap) -> Recursive { let mut visited = MutSet::default(); let mut stack = Vec::new(); if let Some(references) = closures.get(&symbol) { for v in &references.calls { stack.push(*v); } // while there are symbols left to visit while let Some(nested_symbol) = stack.pop() { if nested_symbol == symbol { return Recursive::Recursive; } // if the called symbol not yet in the graph if !visited.contains(&nested_symbol) { // add it to the visited set // if it calls any functions if let Some(nested_references) = closures.get(&nested_symbol) { // add its called to the stack for v in &nested_references.calls { stack.push(*v); } } visited.insert(nested_symbol); } } } Recursive::NotRecursive } fn to_pending_def<'a>( env: &mut Env<'a>, var_store: &mut VarStore, def: &'a ast::Def<'a>, scope: &mut Scope, pattern_type: PatternType, ) -> (Output, PendingDef<'a>) { use roc_parse::ast::Def::*; match def { Annotation(loc_pattern, loc_ann) => { // This takes care of checking for shadowing and adding idents to scope. let (output, loc_can_pattern) = canonicalize_pattern( env, var_store, scope, pattern_type, &loc_pattern.value, loc_pattern.region, ); ( output, PendingDef::AnnotationOnly(loc_pattern, loc_can_pattern, loc_ann), ) } Body(loc_pattern, loc_expr) => { // This takes care of checking for shadowing and adding idents to scope. let (output, loc_can_pattern) = canonicalize_pattern( env, var_store, scope, pattern_type, &loc_pattern.value, loc_pattern.region, ); ( output, PendingDef::Body(loc_pattern, loc_can_pattern, loc_expr), ) } Alias { name, vars, ann } => { let region = Region::span_across(&name.region, &ann.region); match scope.introduce( name.value.into(), &env.exposed_ident_ids, &mut env.ident_ids, region, ) { Ok(symbol) => { let mut can_rigids: Vec> = Vec::with_capacity(vars.len()); for loc_var in vars.iter() { match loc_var.value { ast::Pattern::Identifier(name) if name.chars().next().unwrap().is_lowercase() => { let lowercase = Lowercase::from(name); can_rigids.push(Located { value: lowercase, region: loc_var.region, }); } _ => { // any other pattern in this position is a syntax error. env.problems.push(Problem::InvalidAliasRigid { alias_name: symbol, region: loc_var.region, }); return (Output::default(), PendingDef::InvalidAlias); } } } ( Output::default(), PendingDef::Alias { name: Located { region: name.region, value: symbol, }, vars: can_rigids, ann, }, ) } Err((original_region, loc_shadowed_symbol)) => { env.problem(Problem::ShadowingInAnnotation { original_region, shadow: loc_shadowed_symbol, }); (Output::default(), PendingDef::InvalidAlias) } } } SpaceBefore(sub_def, _) | SpaceAfter(sub_def, _) | Nested(sub_def) => { to_pending_def(env, var_store, sub_def, scope, pattern_type) } NotYetImplemented(s) => todo!("{}", s), } } fn pending_typed_body<'a>( env: &mut Env<'a>, loc_pattern: &'a Located>, loc_ann: &'a Located>, loc_expr: &'a Located>, var_store: &mut VarStore, scope: &mut Scope, pattern_type: PatternType, ) -> (Output, PendingDef<'a>) { // This takes care of checking for shadowing and adding idents to scope. let (output, loc_can_pattern) = canonicalize_pattern( env, var_store, scope, pattern_type, &loc_pattern.value, loc_pattern.region, ); ( output, PendingDef::TypedBody(loc_pattern, loc_can_pattern, loc_ann, loc_expr), ) } /// Make aliases recursive fn correct_mutual_recursive_type_alias<'a>( env: &mut Env<'a>, aliases: &mut SendMap, var_store: &mut VarStore, ) { let mut symbols_introduced = ImSet::default(); for (key, _) in aliases.iter() { symbols_introduced.insert(*key); } let all_successors_with_self = |symbol: &Symbol| -> ImSet { match aliases.get(symbol) { Some(alias) => { let mut loc_succ = alias.typ.symbols(); // remove anything that is not defined in the current block loc_succ.retain(|key| symbols_introduced.contains(key)); loc_succ } None => ImSet::default(), } }; let all_successors_without_self = |symbol: &Symbol| -> ImSet { match aliases.get(symbol) { Some(alias) => { let mut loc_succ = alias.typ.symbols(); // remove anything that is not defined in the current block loc_succ.retain(|key| symbols_introduced.contains(key)); loc_succ.remove(&symbol); loc_succ } None => ImSet::default(), } }; let originals = aliases.clone(); // TODO investigate should this be in a loop? let defined_symbols: Vec = aliases.keys().copied().collect(); // split into self-recursive and mutually recursive match topological_sort_into_groups(&defined_symbols, all_successors_with_self) { Ok(_) => { // no mutual recursion in any alias } Err((_, mutually_recursive_symbols)) => { for cycle in strongly_connected_components( &mutually_recursive_symbols, all_successors_without_self, ) { // make sure we report only one error for the cycle, not an error for every // alias in the cycle. let mut can_still_report_error = true; // TODO use itertools to be more efficient here for rec in &cycle { let mut to_instantiate = ImMap::default(); let mut others = Vec::with_capacity(cycle.len() - 1); for other in &cycle { if rec != other { others.push(*other); if let Some(alias) = originals.get(other) { to_instantiate.insert(*other, alias.clone()); } } } if let Some(alias) = aliases.get_mut(rec) { alias.typ.instantiate_aliases( alias.region, &to_instantiate, var_store, &mut ImSet::default(), ); make_tag_union_recursive( env, *rec, alias.region, others, &mut alias.typ, var_store, &mut can_still_report_error, ); } } } } } } fn make_tag_union_recursive<'a>( env: &mut Env<'a>, symbol: Symbol, region: Region, others: Vec, typ: &mut Type, var_store: &mut VarStore, can_report_error: &mut bool, ) { match typ { Type::TagUnion(tags, ext) => { let rec_var = var_store.fresh(); *typ = Type::RecursiveTagUnion(rec_var, tags.to_vec(), ext.clone()); typ.substitute_alias(symbol, &Type::Variable(rec_var)); } Type::RecursiveTagUnion(_, _, _) => {} Type::Alias(_, _, actual) => make_tag_union_recursive( env, symbol, region, others, actual, var_store, can_report_error, ), _ => { let problem = roc_types::types::Problem::CyclicAlias(symbol, region, others.clone()); *typ = Type::Erroneous(problem); // ensure cyclic error is only reported for one element of the cycle if *can_report_error { *can_report_error = false; let problem = Problem::CyclicAlias(symbol, region, others); env.problems.push(problem); } } } }