diff --git a/src/canonicalize.rs b/src/canonicalize.rs index 7ae04e3c3d..5d249e09df 100644 --- a/src/canonicalize.rs +++ b/src/canonicalize.rs @@ -4,7 +4,7 @@ use operator::Operator::Pizza; use operator::Associativity::*; use collections::{ImSet, ImMap, MutMap}; use std::cmp::Ordering; -use expr::{Ident, VariantName, Path}; +use expr::{Ident, VariantName}; use expr; use self::PatternType::*; @@ -20,20 +20,18 @@ pub enum Expr { Char(char), // Lookups - Var(Resolved), - CallByName(Resolved, Vec>), - InterpolatedStr(Vec<(String, Resolved)>, String), + Var(Symbol), + FunctionPointer(Symbol), + CallByName(Symbol, Vec>), + InterpolatedStr(Vec<(String, Expr)>, String), // Pattern Matching - Case(Box>, Vec<(Located, Located)>), - Assign(Vec<(Located, Located)>, Box>), - AnonymousClosure(Option>, Vec>, Box>), - NamedClosure(Symbol, Option>, Vec>, Box>), - TailRecursiveClosure(Symbol, Option>, Vec>, Box>), + Case(Box>, Vec<(Pattern, Located)>), + Assign(Vec<(Pattern, Located)>, Box>), // Application Apply(Box>, Vec>), - ApplyVariant(Resolved, Option>>), + ApplyVariant(Symbol, Option>>), // Product Types EmptyRecord, @@ -43,7 +41,10 @@ pub enum Expr { Operator(Box>, Located, Box>), // Runtime Errors - InvalidPrecedence(PrecedenceProblem, Box>) + InvalidPrecedence(PrecedenceProblem, Box>), + UnrecognizedFunctionName(Located), + UnrecognizedConstant(Located), + UnrecognizedVariant(Located), } /// Problems that can occur in the course of canonicalization. @@ -54,6 +55,7 @@ pub enum Problem { UnrecognizedConstant(Located), UnrecognizedVariant(Located), UnusedAssignment(Located), + UnusedArgument(Located), PrecedenceProblem(PrecedenceProblem), // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! UnsupportedPattern(PatternType, Located) @@ -63,124 +65,164 @@ pub enum Problem { /// codegen can generate a runtime error if this pattern is reached. #[derive(Clone, Debug, PartialEq)] pub enum Pattern { - Identifier(LocalSymbol), - Variant(Resolved, Option>>), + Identifier(Symbol), + Variant(Symbol, Option>), Integer(i64), Fraction(i64, i64), ExactString(String), EmptyRecordLiteral, Underscore, + + // Runtime Exceptions Shadowed(Located), + UnrecognizedVariant(Located), // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! UnsupportedPattern(Located) } -/// An ident or variant name, possibly unrecognized, possibly referring to either a toplevel or local symbol. +/// A globally unique identifier, used for both vars and variants. +/// It will be used directly in code gen. #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum Symbol { - /// An ident or variant name referencing a toplevel declaration. - Global(GlobalSymbol), +pub struct Symbol(String); - /// An ident referencing a local assignment, not a toplevel declaration. - Local(LocalSymbol), +impl Symbol { + pub fn new(prefix: &str, name: &str) -> Symbol { + Symbol(format!("{}{}", prefix, name)) + } + + pub fn from_variant(variant_name: &VariantName, home: &str) -> Symbol { + match &variant_name { + &VariantName::Unqualified(ref name) => + Symbol::new(home, name), + + &VariantName::Qualified(ref path, ref name) => + Symbol::new(path, name), + } + } } -/// An ident or variant name in the globlal scope; that is, something defined in the toplevel of some module. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct GlobalSymbol(String); +impl Into for Symbol { + fn into(self) -> String { + let Symbol(string) = self; -/// An ident referencing a local assignment - *not* something defined in the toplevel. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct LocalSymbol(String); - -impl Into for LocalSymbol { - fn into(self) -> Ident { - let LocalSymbol(name) = self; - - Ident::Unqualified(name) + string } } #[derive(Clone, Debug, PartialEq)] -pub enum Resolved { - /// This is the unique symbol we'll use in codegen. - Recognized(T), - - /// These will codegen to runtime errors. - UnrecognizedFunctionName(Located), - UnrecognizedConstant(Located), - UnrecognizedVariant(Located), +struct Scope { + pub idents: ImMap, + symbol_prefix: String, + next_unique_id: u64, } -impl GlobalSymbol { - pub fn new(path: Path, name: String) -> GlobalSymbol { - GlobalSymbol(format!("{}.{}", path.into_string(), name)) +impl Scope { + pub fn new(symbol_prefix: String, declared_idents: ImMap) -> Scope { + Scope { + symbol_prefix, + + // This is used to generate unique names for anonymous closures. + // It always begins at 0. + next_unique_id: 0, + + idents: declared_idents + } } - pub fn recognized(path: Path, name: String) -> Resolved { - Resolved::Recognized(GlobalSymbol::new(path, name)) + pub fn symbol(&self, name: &str) -> Symbol { + Symbol::new(&self.symbol_prefix, name) + } + + pub fn gen_unique_symbol(&mut self) -> Symbol { + self.next_unique_id = self.next_unique_id + 1; + + Symbol::new(&self.symbol_prefix, &self.next_unique_id.to_string()) } } -impl LocalSymbol { - pub fn new(name: String) -> LocalSymbol { - LocalSymbol(name) - } - - pub fn recognized(name: String) -> Resolved { - Resolved::Recognized(LocalSymbol::new(name)) - } +struct Procedure { + name: Option, + closes_over: ImSet, + is_self_tail_recursive: bool, + definition: Region, + args: Vec, + body: Expr } -impl Symbol { - pub fn resolved_global(path: Path, name: String) -> Resolved { - Resolved::Recognized(Symbol::Global(GlobalSymbol::new(path, name))) - } - - pub fn resolved_local(name: String) -> Resolved { - Resolved::Recognized(Symbol::Local(LocalSymbol::new(name))) - } - - pub fn global(path: Path, name: String) -> Symbol { - Symbol::Global(GlobalSymbol::new(path, name)) - } - - pub fn local(name: String) -> Symbol { - Symbol::Local(LocalSymbol::new(name)) +impl Procedure { + pub fn new(definition: Region, closes_over: ImSet, args: Vec, body: Expr) -> Procedure { + Procedure { + name: None, + closes_over, + is_self_tail_recursive: false, + definition, + args, + body + } } } /// The canonicalization environment for a particular module. struct Env { /// The module's path. Unqualified references to identifiers and variant names are assumed - /// to be relative to this Path, and will be turned into Qualified references accordingly. - home: Path, + /// to be relative to this path. + home: String, /// Problems we've encountered along the way, which will be reported to the user at the end. problems: Vec, /// Variants either declared in this module, or imported. - declared_variants: ImMap<(Path, String), Located>, + variants: ImMap>, + + /// Former closures converted to top-level procedures. + procedures: MutMap, } impl Env { - pub fn new(home: Path, declared_variants: ImMap<(Path, String), Located>) -> Env { - Env { home, declared_variants, problems: Vec::new() } + pub fn new(home: String, declared_variants: ImMap>) -> Env { + Env { + home, + variants: declared_variants, + problems: Vec::new(), + procedures: MutMap::default(), + } } pub fn problem(&mut self, problem: Problem) -> () { self.problems.push(problem) } + + pub fn register_closure( + &mut self, + symbol: Symbol, + closes_over: ImSet, + args: Vec, + body: Expr, + definition: Region + ) -> () { + // We can't if the closure is self tail recursive yet, because it doesn't know its final name yet. + // (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name. + let is_self_tail_recursive = false; + let name = None; // The Assign logic is also responsible for setting names after the fact. + let procedure = Procedure {closes_over, args, name, body, is_self_tail_recursive, definition}; + + self.procedures.insert(symbol, procedure); + } } pub fn canonicalize_declaration( - home: Path, + home: String, + name: &str, loc_expr: Located, - declared_idents: &ImMap>, - declared_variants: &ImMap<(Path, String), Located>, + declared_idents: &ImMap, + declared_variants: &ImMap>, ) -> (Located, Output, Vec) { + // If we're canonicalizing the declaration `foo = ...` inside the `Main` module, + // scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0" + let scope_prefix = format!("{}${}$", home, name); + let mut scope = Scope::new(scope_prefix, declared_idents.clone()); let mut env = Env::new(home, declared_variants.clone()); - let (mut new_loc_expr, output) = canonicalize(&mut env, loc_expr, declared_idents); + let (mut new_loc_expr, output) = canonicalize(&mut env, &mut scope, loc_expr); // Apply operator precedence and associativity rules once, after canonicalization is // otherwise complete. If we did this *during* canonicalization, then each time we @@ -194,32 +236,56 @@ pub fn canonicalize_declaration( #[derive(Clone, Debug, PartialEq)] pub struct Output { - pub referenced_idents: ImSet, - pub applied_variants: ImSet<(Path, String)>, + pub references: References, pub tail_call: Option, } +#[derive(Clone, Debug, PartialEq)] +pub struct References { + pub locals: ImSet, + pub globals: ImSet, + pub variants: ImSet, +} + +impl References { + pub fn new() -> References { + References { + locals: ImSet::default(), + globals: ImSet::default(), + variants: ImSet::default(), + } + } + + pub fn union(mut self, other: References) -> Self { + self.locals = self.locals.union(other.locals); + self.globals = self.globals.union(other.globals); + self.variants = self.variants.union(other.variants); + + self + } + + pub fn has_local(&self, symbol: &Symbol) -> bool { + self.locals.contains(symbol) + } + + pub fn has_variant(&self, symbol: &Symbol) -> bool { + self.variants.contains(symbol) + } +} + impl Output { pub fn new() -> Output { Output { - referenced_idents: ImSet::default(), - applied_variants: ImSet::default(), + references: References::new(), tail_call: None, } } - - pub fn union_usages(mut self, other: Output) -> Self { - self.referenced_idents = self.referenced_idents.union(other.referenced_idents); - self.applied_variants = self.applied_variants.union(other.applied_variants); - - self - } } fn canonicalize( env: &mut Env, + scope: &mut Scope, loc_expr: Located, - idents_in_scope: &ImMap>, ) -> (Located, Output) { use self::Expr::*; @@ -235,9 +301,9 @@ fn canonicalize( expr::Expr::If(loc_cond, loc_true, loc_false) => { // Canonicalize the nested expressions - let (cond_expr, cond_out) = canonicalize(env, *loc_cond, idents_in_scope); - let (true_expr, true_out) = canonicalize(env, *loc_true, idents_in_scope); - let (false_expr, false_out) = canonicalize(env, *loc_false, idents_in_scope); + let (cond_expr, cond_out) = canonicalize(env, scope, *loc_cond); + let (true_expr, true_out) = canonicalize(env, scope, *loc_true); + let (false_expr, false_out) = canonicalize(env, scope, *loc_false); // Incorporate all three expressions into a combined Output value. let expr = If(Box::new(cond_expr), Box::new(true_expr), Box::new(false_expr)); @@ -250,32 +316,30 @@ fn canonicalize( output.tail_call = true_out.tail_call; } - output.referenced_idents = output.referenced_idents.union(true_out.referenced_idents); - output.referenced_idents = output.referenced_idents.union(false_out.referenced_idents); - output.applied_variants = output.applied_variants.union(true_out.applied_variants); - output.applied_variants = output.applied_variants.union(false_out.applied_variants); + // To evaluate the whole if-expression, we depend on all the values that both branches depend on. + output.references = output.references.union(true_out.references); + output.references = output.references.union(false_out.references); (expr, output) }, expr::Expr::Apply(loc_fn, loc_args) => { // Canonicalize the function expression and its arguments - let (fn_expr, fn_out) = canonicalize(env, *loc_fn, idents_in_scope); + let (fn_expr, mut output) = canonicalize(env, scope, *loc_fn); let mut args = Vec::new(); let mut outputs = Vec::new(); for loc_arg in loc_args { - let (arg_expr, arg_out) = canonicalize(env, loc_arg, idents_in_scope); + let (arg_expr, arg_out) = canonicalize(env, scope, loc_arg); args.push(arg_expr); outputs.push(arg_out); } let expr = Apply(Box::new(fn_expr), args); - let mut output = fn_out; for arg_out in outputs { - output = output.union_usages(arg_out); + output.references = output.references.union(arg_out.references); } // We're not tail-calling a symbol (by name), we're tail-calling a function value. @@ -286,19 +350,19 @@ fn canonicalize( expr::Expr::Operator(loc_left, op, loc_right) => { // Canonicalize the nested expressions - let (left_expr, left_out) = canonicalize(&mut *env, *loc_left, idents_in_scope); - let (right_expr, right_out) = canonicalize(&mut *env, *loc_right, idents_in_scope); + let (left_expr, left_out) = canonicalize(env, scope, *loc_left); + let (right_expr, mut output) = canonicalize(env, scope, *loc_right); // Incorporate both expressions into a combined Output value. - let mut output = left_out.union_usages(right_out); + output.references = output.references.union(left_out.references); // The pizza operator is the only one that can be a tail call, // because it's the only one that can call a function by name. output.tail_call = match op.value { Pizza => { match &right_expr.value { - Var(Resolved::Recognized(sym)) => Some(sym.clone()), - CallByName(Resolved::Recognized(sym), _) => Some(sym.clone()), + &Var(ref sym) => Some(sym.clone()), + &CallByName(ref sym, _) => Some(sym.clone()), _ => None } }, @@ -311,372 +375,359 @@ fn canonicalize( }, expr::Expr::Var(ident) => { - // check if present in idents_in_scope... - // if not, NAMING PROBLEM - // if so, include it in referenced_idents - let mut referenced_idents = ImSet::default(); - let symbol = - resolve_ident(&env, ident, &mut referenced_idents, idents_in_scope) - .unwrap_or_else(|ident| { + let mut output = Output::new(); + let can_expr = + match resolve_ident(&env, &scope, ident, &mut output.references) { + Ok(symbol) => Var(symbol), + Err(ident) => { let loc_ident = Located {region, value: ident}; env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); - Resolved::UnrecognizedConstant(loc_ident) - }); + UnrecognizedConstant(loc_ident) + } + }; - ( - Var(symbol), - Output {referenced_idents, applied_variants: ImSet::default(), tail_call: None} - ) + (can_expr, output) }, expr::Expr::CallByName(ident, args) => { - // check if function name is present in idents_in_scope... - // if not, NAMING PROBLEM - // if so, include it in referenced_idents - let mut referenced_idents = ImSet::default(); - let mut applied_variants = ImSet::default(); + // Canonicalize the arguments and union their references into our output. + // We'll do this even if the function name isn't recognized, since we still + // want to report canonicalization problems with the function's arguments, + // and their references still matter for purposes of detecting unused things. + let mut output = Output::new(); + let mut can_args = Vec::with_capacity(args.len()); - let symbol = - resolve_ident(&env, ident, &mut referenced_idents, idents_in_scope) - .unwrap_or_else(|ident| { + for arg in args { + let (loc_expr, arg_output) = canonicalize(env, scope, arg); + + output.references = output.references.union(arg_output.references); + + can_args.push(loc_expr); + } + + let can_expr = + match resolve_ident(&env, &scope, ident, &mut output.references) { + Ok(symbol) => { + // CallByName expressions are considered tail calls, + // so that their parents in the expression tree will + // correctly inherit tail-call-ness from them. + output.tail_call = Some(symbol.clone()); + + CallByName(symbol, can_args) + } + Err(ident) => { let loc_ident = Located {region, value: ident}; env.problem(Problem::UnrecognizedFunctionName(loc_ident.clone())); - Resolved::UnrecognizedFunctionName(loc_ident) - }); + UnrecognizedFunctionName(loc_ident) + } + }; - let mut new_args = Vec::with_capacity(args.len()); - - for arg in args { - let (loc_expr, output) = canonicalize(&mut *env, arg, &idents_in_scope); - - referenced_idents = referenced_idents.clone().union(output.referenced_idents); - applied_variants = applied_variants.clone().union(output.applied_variants); - - new_args.push(loc_expr); - } - - // If we recognized the symbol, this is a tail call! - let tail_call = match &symbol { - &Resolved::Recognized(ref sym) => Some(sym.clone()), - _ => None - }; - - let output = Output {referenced_idents, applied_variants, tail_call}; - - (CallByName(symbol, new_args), output) + (can_expr, output) }, expr::Expr::InterpolatedStr(pairs, suffix) => { - let mut referenced_idents = ImSet::default(); - let applied_variants = ImSet::default(); - - let new_pairs: Vec<(String, Resolved)> = pairs.into_iter().map(|(string, ident)| { - // check if present in idents_in_scope... - // if not, NAMING PROBLEM - // if so, include it in referenced_idents - let ident_region = ident.region; - let symbol = - resolve_ident(&env, ident.value, &mut referenced_idents, idents_in_scope) - .unwrap_or_else(|value| { - let loc_ident = Located {region: ident_region, value}; + let mut output = Output::new(); + let can_pairs: Vec<(String, Expr)> = pairs.into_iter().map(|(string, loc_ident)| { + // From a language design perspective, we only permit idents in interpolation. + // However, in a canonical Expr we store it as a full Expr, not a Symbol. + // This is so that we can resolve it to either Var or Unrecognized; if we + // stored it as a Symbol, we couldn't record runtime errors here. + let can_expr = + match resolve_ident(&env, &scope, loc_ident.value, &mut output.references) { + Ok(symbol) => Var(symbol), + Err(ident) => { + let loc_ident = Located {region: loc_ident.region, value: ident}; env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); - Resolved::UnrecognizedConstant(loc_ident) - }); + UnrecognizedConstant(loc_ident) + } + }; - (string, symbol) + (string, can_expr) }).collect(); - let output = Output {referenced_idents, applied_variants, tail_call: None}; - - (InterpolatedStr(new_pairs, suffix), output) + (InterpolatedStr(can_pairs, suffix), output) } expr::Expr::ApplyVariant(variant_name, opt_args) => { - // check if present in declared_variants... - // if not, NAMING ERROR - // if so, return applied_variants which includes it - let mut referenced_idents = ImSet::default(); - let mut applied_variants = ImSet::default(); + // Canonicalize the arguments and union their references into our output. + // We'll do this even if the variant name isn't recognized, since we still + // want to report canonicalization problems with the variant's arguments, + // and their references still matter for purposes of detecting unused things. + let mut output = Output::new(); - let symbol = - resolve_variant_name(&env, variant_name, &mut applied_variants) - .unwrap_or_else(|value| { - let loc_variant = Located {region, value}; + let opt_can_args = match opt_args { + Some(args) => { + let mut can_args = Vec::with_capacity(args.len()); + + for arg in args { + let (loc_expr, arg_output) = canonicalize(env, scope, arg); + + output.references = output.references.union(arg_output.references); + + can_args.push(loc_expr); + } + + Some(can_args) + } + None => None + }; + + let can_expr = + match resolve_variant_name(&env, &scope, variant_name, &mut output.references) { + Ok(symbol) => ApplyVariant(symbol, opt_can_args), + Err(variant_name) => { + let loc_variant = Located {region, value: variant_name}; env.problem(Problem::UnrecognizedVariant(loc_variant.clone())); - Resolved::UnrecognizedVariant(loc_variant) - }); + UnrecognizedVariant(loc_variant) + } + }; - let new_opt_args = opt_args.map(|args| { - let mut new_args = Vec::with_capacity(args.len()); - - for arg in args { - let (loc_expr, output) = canonicalize(&mut *env, arg, &idents_in_scope); - - referenced_idents = referenced_idents.clone().union(output.referenced_idents); - applied_variants = applied_variants.clone().union(output.applied_variants); - - new_args.push(loc_expr); - } - - new_args - }); - - let output = Output {referenced_idents, applied_variants, tail_call: None}; - - (ApplyVariant(symbol, new_opt_args), output) + (can_expr, output) } expr::Expr::Assign(assignments, box_loc_returned) => { - use self::Pattern::*; + // The body expression gets a new scope for canonicalization. + // Shadow `scope` to make sure we don't accidentally use the original one for the + // rest of this block. + let mut scope = scope.clone(); - let mut applied_variants = ImSet::default(); - let mut new_idents_in_scope = ImMap::default(); + // Add the assigned identifiers to scope. If there's a collision, it means there + // was shadowing, which will be handled later. + let assigned_idents: ImMap = + idents_from_patterns(assignments.clone().into_iter().map(|(loc_pattern, _)| loc_pattern), &scope); - // Record all the new idents we're adding to scope - for (loc_pattern, _) in assignments.iter() { - add_idents_to_scope(loc_pattern.clone(), &mut new_idents_in_scope); - } + scope.idents = scope.idents.union(assigned_idents.clone()); - // Add the new_idents_in_scope to idents_in_scope. If there's a collision, - // it means there was shadowing, so keep the original mapping from ident_in_scope. - // Shadowing means the mapping from new_idents_in_scope will be removed later. - let mut combined_idents_in_scope = idents_in_scope.clone().union(new_idents_in_scope.clone()); - let mut referenced_idents_by_assigned_name: MutMap)> = MutMap::default(); + let mut refs_by_assignment: MutMap = MutMap::default(); - let can_assignments = assignments.into_iter().map(|(loc_pattern, expr)| { + let can_assignments: Vec<(Pattern, Located)> = assignments.into_iter().map(|(loc_pattern, expr)| { // Each assignment gets to have all the idents in scope that are assigned in this // block. Order of assignments doesn't matter, thanks to referential transparency! - let (loc_can_expr, can_output) = canonicalize(env, expr, &combined_idents_in_scope); - - applied_variants = applied_variants.clone().union(can_output.applied_variants); + let (loc_can_expr, can_output) = canonicalize(env, &mut scope, expr); // Exclude the current ident from shadowable_idents; you can't shadow yourself! // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = combined_idents_in_scope.clone(); + let mut shadowable_idents = scope.idents.clone(); remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - let can_pattern = canonicalize_pattern(env, &Assignment, loc_pattern.clone(), &mut combined_idents_in_scope, &mut shadowable_idents); + let can_pattern = canonicalize_pattern(env, &mut scope, &Assignment, &loc_pattern, &mut shadowable_idents); - // Store the referenced idents in a map, so we can later figure out which - // assigned names reference each other. - for name in local_idents_in_pattern(&can_pattern.value) { - referenced_idents_by_assigned_name.insert(name, (loc_pattern.region, can_output.referenced_idents.clone())); + // Store the referenced locals in the refs_by_assignment map, so we can later figure out + // which assigned names reference each other. + for (symbol, region) in idents_from_patterns(std::iter::once(loc_pattern.clone()), &scope).values() { + let refs = can_output.references.clone(); + + refs_by_assignment.insert(symbol.clone(), (*region, refs)); } - // Give closures names (and tail-recursive status) where appropriate - let region = loc_can_expr.region; - let can_expr = match &can_pattern.value { + // Give closures names (and tail-recursive status) where appropriate. + let can_expr = match (&loc_pattern.value, &can_pattern) { // First, make sure we are actually assigning an identifier instead of (for example) a variant. - // If we're assigning (UserId userId) = ... then this is by definition not a self tail call! - // (We could theoretically support certain scenarios like that, but it doesn't seem worthwhile; - // all we'd be saving anyone is the step of refactoring the closure out to have its own name.) - // By our definition, only assignments of the form (foo = ...) can be self tail calls. - &Identifier(ref local_symbol_ref) => { + // + // If we're assigning (UserId userId) = ... then this is certainly not a closure declaration, + // which also implies it's not a self tail call! + // + // Only assignments of the form (foo = ...) can be closure declarations or self tail calls. + (&expr::Pattern::Identifier(ref name), &Pattern::Identifier(ref assigned_symbol)) => { match loc_can_expr.value { - AnonymousClosure(closed_over, args, body) => { - let closure_symbol = Symbol::Local(local_symbol_ref.clone()); - let is_self_tail_recursive = match can_output.tail_call { - None => false, - Some(symbol) => symbol == closure_symbol + FunctionPointer(anonymous_closure_symbol) => { + // Since everywhere in the code it'll be referred to by its assigned name, + // remove its generated name from the procedure map. (We'll re-insert it later.) + let mut procedure = env.procedures.remove(&anonymous_closure_symbol).unwrap(); + + // The original ident name will be used for debugging and stack traces. + procedure.name = Some(name.clone()); + + // The closure is self tail recursive iff it tail calls itself (by assigned name). + procedure.is_self_tail_recursive = match &can_output.tail_call { + &None => false, + &Some(ref symbol) => symbol == assigned_symbol }; - if is_self_tail_recursive { - TailRecursiveClosure(closure_symbol, closed_over, args, body) - } else { - NamedClosure(closure_symbol, closed_over, args, body) - } + // Re-insert the procedure into the map, under its assigned name. This way, + // when code elsewhere calls it by assigned name, it'll resolve properly. + env.procedures.insert(assigned_symbol.clone(), procedure); + + // Return a pointer to the assigned symbol, since the auto-generated one no + // longer references any entry in the procedure map! + FunctionPointer(assigned_symbol.clone()) }, non_closure => non_closure } }, - &Variant(_, _) | &EmptyRecordLiteral | &Shadowed(_) | &UnsupportedPattern(_) - | &Integer(_) | &Fraction(_, _) | &ExactString(_) | &Underscore => loc_can_expr.value, + _ => loc_can_expr.value }; - (can_pattern, Located {region, value: can_expr}) + (can_pattern, Located {region: loc_can_expr.region, value: can_expr}) }).collect(); // The assignment as a whole is a tail call iff its return expression is a tail call. - // We use its output as a starting point because its tail_call already has the right answer! - let (ret_expr, mut output) = canonicalize(env, *box_loc_returned, &combined_idents_in_scope); + // Use its output as a starting point because its tail_call already has the right answer! + let (ret_expr, mut output) = canonicalize(env, &mut scope, *box_loc_returned); - output.applied_variants = output.applied_variants.clone().union(applied_variants); + // Determine the full set of references by traversing the graph. + let mut visited_symbols = ImSet::default(); - // Determine the full set of referenced_idents by traversing the graph - let mut referenced_idents = output.referenced_idents.clone(); - let mut visited = ImSet::default(); - - // Start with the return expression's referenced_idents. They are the only ones that count! - // For example, if I have two assignments which reference each other, but neither of them - // is referenced in the return expression, I don't want either of them to end up in the - // final referenced_idents. + // Start with the return expression's referenced locals. They are the only ones that count! + // + // If I have two assignments which reference each other, but neither of them + // is referenced in the return expression, I don't want either of them (or their references) + // to end up in the final output.references. They were unused, and so were their references! // // The reason we need a graph here is so we don't overlook transitive dependencies. // For example, if I have `a = b + 1` and the assignment returns `a + 1`, then the // assignment as a whole references both `a` *and* `b`, even though it doesn't // directly mention `b` - because `a` depends on `b`. If we didn't traverse a graph here, // we'd erroneously give a warning that `b` was unused since it wasn't directly referenced. - for ident in output.referenced_idents.clone() { - match ident { - // Only consider local referenced idents; the global ones don't need the graph. - unqualified @ Ident::Unqualified(_) => { - // Traverse the graph and look up *all* the references for this name - let refs = get_all_referenced(unqualified, &mut visited, &referenced_idents_by_assigned_name); + for symbol in output.references.locals.clone().into_iter() { + // Traverse the graph and look up *all* the references for this local symbol. + let refs = get_all_referenced(symbol, &mut visited_symbols, &refs_by_assignment); - referenced_idents = referenced_idents.union(refs); - } - Ident::Qualified(_, _) => () - }; + output.references = output.references.union(refs); } // Now that we've collected all the references, check to see if any of the new idents - // we defined were unused. If any were, report it. - for ident in new_idents_in_scope.keys() { - if !ident.is_qualified() && !referenced_idents.contains(&ident) { - match ident { - unqualified @ Ident::Unqualified(_) => { - match referenced_idents_by_assigned_name.get(&unqualified) { - Some((region, _)) => { - let loc_ident = Located {region: region.clone(), value: unqualified.clone()}; + // we defined went unused by the return expression. If any were unused, report it. + for (ident, (symbol, region)) in assigned_idents { + if !output.references.has_local(&symbol) { + let loc_ident = Located {region: region.clone(), value: ident.clone()}; - env.problem(Problem::UnusedAssignment(loc_ident)); - }, - None => unreachable!() - }; - }, - Ident::Qualified(_, _) => () - } + env.problem(Problem::UnusedAssignment(loc_ident)); } } - // TODO somewhere in here, build 2 graphs of all the idents - // 1. eval_deps - topological sort this and look for cycles (add a cycle test and an eval order test!) - // 2. references - do a dfs search on each referenced local value in ret, to see which - output.referenced_idents = referenced_idents; - (Assign(can_assignments, Box::new(ret_expr)), output) }, expr::Expr::Closure(loc_arg_patterns, box_loc_body_expr) => { - let mut new_idents_in_scope = ImMap::default(); + // The globally unique symbol that will refer to this closure once it gets converted + // into a top-level procedure for code gen. + // + // The symbol includes the module name, the top-level declaration name, and the + // index (0-based) of the closure within that declaration. + // + // Example: "MyModule$main$3" if this is the 4th closure in MyModule.main. + let symbol = scope.gen_unique_symbol(); - // Record all the new idents we're adding to scope - for loc_pattern in loc_arg_patterns.iter() { - add_idents_to_scope(loc_pattern.clone(), &mut new_idents_in_scope); - } + // The body expression gets a new scope for canonicalization. + // Shadow `scope` to make sure we don't accidentally use the original one for the + // rest of this block. + let mut scope = scope.clone(); - // Add the new_idents_in_scope to idents_in_scope. If there's a collision, - // it means there was shadowing, so keep the original mapping from ident_in_scope. - // Shadowing means the mapping from new_idents_in_scope will be removed later. - let mut combined_idents_in_scope = idents_in_scope.clone().union(new_idents_in_scope.clone()); + // Add the arguments' idents to scope.idents. If there's a collision, + // it means there was shadowing, which will be handled later. + let arg_idents: ImMap = + idents_from_patterns(loc_arg_patterns.clone().into_iter(), &scope); - let can_args = loc_arg_patterns.into_iter().map(|loc_pattern| { + scope.idents = scope.idents.union(arg_idents.clone()); + + let can_args: Vec = loc_arg_patterns.into_iter().map(|loc_pattern| { // Exclude the current ident from shadowable_idents; you can't shadow yourself! // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = combined_idents_in_scope.clone(); + let mut shadowable_idents = scope.idents.clone(); remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - canonicalize_pattern(env, &FunctionArg, loc_pattern, &mut combined_idents_in_scope, &mut shadowable_idents) + canonicalize_pattern(env, &mut scope, &FunctionArg, &loc_pattern, &mut shadowable_idents) }).collect(); + let (loc_body_expr, output) = canonicalize(env, &mut scope, *box_loc_body_expr); - let (body_expr, output) = canonicalize(env, *box_loc_body_expr, &combined_idents_in_scope); + // We only ever need to close over locals. Globals are always available! + let mut closes_over: ImSet = output.references.locals.clone(); - // Now that we've collected all the references, check to see if any of the new idents - // we defined were unused. If any were, report it. - for (ident, loc_expr) in new_idents_in_scope { - if !output.referenced_idents.contains(&ident) { - env.problem(Problem::UnusedAssignment(loc_expr)); + // Now that we've collected all the references, check to see if any of the args we defined + // went unreferenced. If any did, report them as unused arguments. + for (ident, (arg_symbol, region)) in arg_idents { + if !output.references.has_local(&arg_symbol) { + // The body never referenced this argument we declared. It's an unused argument! + env.problem(Problem::UnusedArgument(Located {region, value: ident})); } + + // If it's an argument, we shouldn't close over it. + // (We need to explicitly remove these because we start by + // closing over *all* referenced locals, including args.) + closes_over.remove(&arg_symbol); } - // While we still have the info, determine what locals we will need to close over in code gen. - let closed_over_locals = if output.referenced_idents.is_empty() { - None - } else { - let locals = output.referenced_idents.iter().filter_map(|ident| { - // Only close over locally assigned idents; globals are always available. - if !ident.is_qualified() - // If it's not in scope, it'll be a NAMING ERROR at runtime, and - // attempting to close over it will fail. Leave it out! - && combined_idents_in_scope.contains_key(&ident) - { - Some(LocalSymbol::new(ident.clone().name())) - } else { - None - } - }).collect::>(); + // We've finished analyzing the closure. Register it as a top-level procedure in the Env! + env.register_closure(symbol.clone(), closes_over, can_args, loc_body_expr.value, region); - if locals.is_empty() { - None - } else { - Some(locals) - } - }; - - (AnonymousClosure(closed_over_locals, can_args, Box::new(body_expr)), output) + // Always return a function pointer, in case that's how the closure is being used (e.g. with Apply). + // It's possible that Assign will rewrite this. In that case, Assign will need to know the symbol we + // used here, to look up the closure's info before renaming it. This pointer gives Assign that symbol. + (FunctionPointer(symbol), output) }, expr::Expr::Case(loc_cond, branches) => { - // Canonicalize the nested expressions - let (can_cond, mut output) = canonicalize(env, *loc_cond, idents_in_scope); + // Canonicalize the conditional + let (can_cond, mut output) = canonicalize(env, scope, *loc_cond); + let mut can_branches = Vec::with_capacity(branches.len()); let mut recorded_tail_call = false; - // Clear the initial tail_call, since it depends only on the branches. - // The branches should overwrite this, so it will only come up if - // there are no branches, but that is possible! A case with no branches - // is a runtime error, but code gen thinking this is actually tail recursive - // could have more serious consequences than a runtime error. - output.tail_call = None; + for (loc_pattern, loc_expr) in branches { + // Each case branch gets a new scope for canonicalization. + // Shadow `scope` to make sure we don't accidentally use the original one for the + // rest of this block. + let mut scope = scope.clone(); - let can_branches = branches.into_iter().map(|(loc_pattern, loc_expr)| { // Exclude the current ident from shadowable_idents; you can't shadow yourself! // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = idents_in_scope.clone(); + let mut shadowable_idents = scope.idents.clone(); remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - let can_pattern = canonicalize_pattern(env, &CaseBranch, loc_pattern.clone(), &mut idents_in_scope.clone(), &mut idents_in_scope.clone()); + let can_pattern = canonicalize_pattern(env, &mut scope, &CaseBranch, &loc_pattern, &mut shadowable_idents); // Patterns introduce new idents to the scope! - let mut new_idents_in_scope = ImMap::default(); + // Add the assigned identifiers to scope. If there's a collision, it means there + // was shadowing, which will be handled later. + let assigned_idents: ImMap = + idents_from_patterns(std::iter::once(loc_pattern), &scope); - add_idents_to_scope(loc_pattern, &mut new_idents_in_scope); + scope.idents = scope.idents.union(assigned_idents.clone()); - let branch_idents_in_scope = idents_in_scope.clone().union(new_idents_in_scope.clone()); - let (can_expr, branch_out) = canonicalize(env, loc_expr, &branch_idents_in_scope); + let (can_expr, branch_output) = canonicalize(env, &mut scope, loc_expr); - output.applied_variants = output.applied_variants.clone().union(branch_out.applied_variants); - output.referenced_idents = output.referenced_idents.clone().union(branch_out.referenced_idents); + output.references = output.references.union(branch_output.references); // If all branches are tail calling the same symbol, then so is the conditional as a whole. if !recorded_tail_call { // If we haven't recorded output.tail_call yet, record it. - output.tail_call = branch_out.tail_call; + output.tail_call = branch_output.tail_call; recorded_tail_call = true; - } else if branch_out.tail_call != output.tail_call { - // If we recorded output.tail_call, but what we recorded is - // different from what we just saw, then game over. This isn't - // a potential self tail call! + } else if branch_output.tail_call != output.tail_call { + // If we recorded output.tail_call, but what we recorded differs from what we just saw, + // then game over. This can't possibly be a self tail call! output.tail_call = None; } // Now that we've collected all the references for this branch, check to see if // any of the new idents it defined were unused. If any were, report it. - for (ident, loc_expr) in new_idents_in_scope { - if !output.referenced_idents.contains(&ident) { - env.problem(Problem::UnusedAssignment(loc_expr)); + for (ident, (symbol, region)) in assigned_idents { + if !output.references.has_local(&symbol) { + let loc_ident = Located {region: region.clone(), value: ident.clone()}; + + env.problem(Problem::UnusedAssignment(loc_ident)); } } - (can_pattern, can_expr) - }).collect(); + can_branches.push((can_pattern, can_expr)); + } + + // One of the branches should have flipped this, so this should only happen + // in the situation where the case had no branches. That can come up, though! + // A case with no branches is a runtime error, but it will mess things up + // if code gen mistakenly thinks this is a tail call just because its condition + // happend to be one. (The condition gave us our initial output value.) + if !recorded_tail_call { + output.tail_call = None; + } // Incorporate all three expressions into a combined Output value. let expr = Case(Box::new(can_cond), can_branches); @@ -696,93 +747,71 @@ fn canonicalize( } fn get_all_referenced( - name: Ident, - visited: &mut ImSet, - referenced_idents_by_assigned_name: &MutMap)> -) -> ImSet { - match referenced_idents_by_assigned_name.get(&name) { - Some((_, idents)) => { - let mut output = ImSet::default(); + assigned_symbol: Symbol, + visited: &mut ImSet, + refs_by_assignment: &MutMap +) -> References { + match refs_by_assignment.get(&assigned_symbol) { + Some((_, refs)) => { + let mut answer = References::new(); - visited.insert(name); + visited.insert(assigned_symbol); - for ident in idents { - match ident { - unqualified @ Ident::Unqualified(_) => { - output.insert(unqualified.clone()); + for local in refs.locals.clone() { + if !visited.contains(&local) { + let other_refs = get_all_referenced(local.clone(), visited, refs_by_assignment); - if !visited.contains(&unqualified) { - let other_refs = get_all_referenced(unqualified.clone(), visited, referenced_idents_by_assigned_name); + answer = answer.union(other_refs); + } - output = output.union(other_refs); - } - } - Ident::Qualified(_, _) => () - }; + answer.locals.insert(local); } - output + answer }, - None => ImSet::default() + None => References::new() } } -fn local_idents_in_pattern(pattern: &Pattern) -> ImSet { - use self::Pattern::*; +fn idents_from_patterns(loc_patterns: I, scope: &Scope) -> ImMap +where I: Iterator> +{ + let mut answer = ImMap::default(); - let mut output = ImSet::default(); + for loc_pattern in loc_patterns { + add_idents_from_pattern(loc_pattern, scope, &mut answer); + } - // Insert any identifiers we find into the referenced_idents_by_name map. - match pattern { - &Identifier(ref local_symbol_ref) => { - output.insert(local_symbol_ref.clone().into()); - }, - &Variant(_, ref opt_args) => { - match opt_args { - &Some(ref loc_args) => { - for loc_arg in loc_args { - output = output.union(local_idents_in_pattern(&loc_arg.value)); - } - }, - &None => () - } - }, - &Shadowed(_) | &Integer(_) | &Fraction(_, _) | &UnsupportedPattern(_) - | &ExactString(_) | &EmptyRecordLiteral | &Underscore => () - }; - - output + answer } - -fn add_idents_to_scope( - pattern: Located, - idents_in_scope: &mut ImMap> +/// helper function for idents_from_patterns +fn add_idents_from_pattern( + loc_pattern: Located, + scope: &Scope, + answer: &mut ImMap ) { use expr::Pattern::*; - match pattern.value { + match loc_pattern.value { Identifier(name) => { - let loc_ident = Located { - region: pattern.region.clone(), - value: Ident::Unqualified(name.clone()) - }; + let symbol = scope.symbol(&name); - idents_in_scope.insert(Ident::Unqualified(name), loc_ident); + answer.insert(Ident::Unqualified(name), (symbol, loc_pattern.region)); }, Variant(_, Some(loc_args)) => { for loc_arg in loc_args { - add_idents_to_scope(loc_arg, idents_in_scope); + add_idents_from_pattern(loc_arg, scope, answer); } }, Variant(_, None) | Integer(_) | Fraction(_, _) | ExactString(_) - | EmptyRecordLiteral | Underscore => {} + | EmptyRecordLiteral | Underscore => () } } fn remove_idents( pattern: expr::Pattern, - idents: &mut ImMap> + idents: &mut ImMap ) { use expr::Pattern::*; @@ -802,28 +831,41 @@ fn remove_idents( #[inline(always)] // This is shared code between Var() and CallByName(); it was inlined when handwritten fn resolve_ident( env: &Env, + scope: &Scope, ident: Ident, - referenced_idents: &mut ImSet, - idents_in_scope: &ImMap>, -) -> Result, Ident> { - if idents_in_scope.contains_key(&ident) { - referenced_idents.insert(ident.clone()); + references: &mut References +) -> Result { + if scope.idents.contains_key(&ident) { + let recognized = match ident { + Ident::Unqualified(name) => { + let symbol = scope.symbol(&name); - match ident { - Ident::Unqualified(name) => Ok(Symbol::resolved_local(name)), - Ident::Qualified(path, name) => Ok(Symbol::resolved_global(path, name)) - } + references.locals.insert(symbol.clone()); + + symbol + } + Ident::Qualified(path, name) => { + let symbol = Symbol::new(&path, &name); + + references.globals.insert(symbol.clone()); + + symbol + } + }; + + Ok(recognized) } else { match ident { Ident::Unqualified(name) => { // Try again, this time using the current module as the path. - let path = env.home.clone(); - let qualified = Ident::Qualified(path.clone(), name.clone()); + let qualified = Ident::Qualified(env.home.clone(), name.clone()); - if idents_in_scope.contains_key(&qualified) { - referenced_idents.insert(qualified.clone()); + if scope.idents.contains_key(&qualified) { + let symbol = Symbol::new(&env.home, &name); - Ok(Symbol::resolved_global(path, name)) + references.globals.insert(symbol.clone()); + + Ok(symbol) } else { // We couldn't find the unqualified ident in scope. NAMING PROBLEM! Err(Ident::Unqualified(name)) @@ -843,21 +885,19 @@ fn resolve_ident( #[inline(always)] fn resolve_variant_name( env: &Env, + scope: &Scope, variant_name: VariantName, - applied_variants: &mut ImSet<(Path, String)>, -) -> Result, VariantName> { - let qualified = match variant_name { - VariantName::Unqualified(name) => (env.home.clone(), name), - VariantName::Qualified(path, name) => (path, name) - }; + references: &mut References +) -> Result { + let symbol = Symbol::from_variant(&variant_name, &env.home); - if env.declared_variants.contains_key(&qualified) { - applied_variants.insert(qualified.clone()); + if env.variants.contains_key(&symbol) { + references.variants.insert(symbol.clone()); - Ok(GlobalSymbol::recognized(qualified.0, qualified.1)) + Ok(symbol) } else { // We couldn't find the qualified variant name in scope. NAMING PROBLEM! - Err(VariantName::Qualified(qualified.0, qualified.1)) + Err(variant_name) } } @@ -874,19 +914,19 @@ pub enum PatternType { fn canonicalize_pattern( env: &mut Env, + scope: &mut Scope, pattern_type: &PatternType, - loc_pattern: Located, - idents_in_scope: &mut ImMap>, - shadowable_idents: &mut ImMap>, -) -> Located { + loc_pattern: &Located, + shadowable_idents: &mut ImMap, +) -> Pattern { use expr::Pattern::*; let region = loc_pattern.region; - let new_pattern = match &loc_pattern.value { + match &loc_pattern.value { &Identifier(ref name) => { let unqualified_ident = Ident::Unqualified(name.clone()); - // We use shadowable_idents for this, and not idents_in_scope, because for assignments + // We use shadowable_idents for this, and not scope, because for assignments // they are different. When canonicalizing a particular assignment, that new // ident is in scope (for recursion) but not shadowable. // @@ -894,44 +934,50 @@ fn canonicalize_pattern( // so that it can refer to itself without getting a naming problem, but it should not // be in the collection of shadowable idents because you can't shadow yourself! match shadowable_idents.get(&unqualified_ident) { - Some(shadowed_ident) => { + Some((_, region)) => { + let loc_shadowed_ident = Located {region: region.clone(), value: unqualified_ident}; + // This is already in scope, meaning it's about to be shadowed. // Shadowing is not allowed! - env.problem(Problem::Shadowing(shadowed_ident.clone())); + env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); // Change this Pattern to a Shadowed variant, so that // codegen knows to generate a runtime exception here. - Pattern::Shadowed(shadowed_ident.clone()) + Pattern::Shadowed(loc_shadowed_ident) }, None => { + // Make sure we aren't shadowing something in the home module's scope. let qualified_ident = Ident::Qualified(env.home.clone(), unqualified_ident.name()); - match idents_in_scope.get(&qualified_ident) { - Some(shadowed_ident) => { + match scope.idents.get(&qualified_ident) { + Some((_, region)) => { + let loc_shadowed_ident = Located {region: region.clone(), value: qualified_ident}; + // This is already in scope, meaning it's about to be shadowed. // Shadowing is not allowed! - env.problem(Problem::Shadowing(shadowed_ident.clone())); + env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); // Change this Pattern to a Shadowed variant, so that // codegen knows to generate a runtime exception here. - Pattern::Shadowed(shadowed_ident.clone()) + Pattern::Shadowed(loc_shadowed_ident) }, None => { let new_ident = qualified_ident.clone(); let new_name = qualified_ident.name(); + let symbol = scope.symbol(&new_name); // This is a fresh identifier that wasn't already in scope. // Add it to scope! - let located = Located {region, value: expr::Ident::Unqualified(new_name.clone())}; + let symbol_and_region = (symbol.clone(), region); - // Add this to both idents_in_scope *and* shadowable_idents. + // Add this to both scope.idents *and* shadowable_idents. // The latter is relevant when recursively canonicalizing Variant patterns, // which can bring multiple new idents into scope. For example, it's important // that we catch (Blah foo foo) as being an example of shadowing. - idents_in_scope.insert(new_ident.clone(), located.clone()); - shadowable_idents.insert(new_ident, located); + scope.idents.insert(new_ident.clone(), symbol_and_region.clone()); + shadowable_idents.insert(new_ident, symbol_and_region); - Pattern::Identifier(LocalSymbol::new(new_name)) + Pattern::Identifier(symbol) } } } @@ -939,17 +985,14 @@ fn canonicalize_pattern( }, &Variant(ref loc_name, ref opt_args) => { - // Canonicalize the variant's name. - let can_name = canonicalize_variant_name(env, loc_name.clone()); - // Canonicalize the variant's arguments, if it has any. - let opt_can_args: Option>> = match opt_args { + let opt_can_args: Option> = match opt_args { None => None, Some(loc_args) => { - let mut can_args:Vec> = Vec::new(); + let mut can_args:Vec = Vec::new(); for loc_arg in loc_args { - let can_arg = canonicalize_pattern(env, pattern_type, loc_arg.clone(), idents_in_scope, shadowable_idents); + let can_arg = canonicalize_pattern(env, scope, pattern_type, &loc_arg, shadowable_idents); can_args.push(can_arg); } @@ -958,7 +1001,18 @@ fn canonicalize_pattern( } }; - Pattern::Variant(can_name, opt_can_args) + // Canonicalize the variant's name. + let symbol = Symbol::from_variant(&loc_name.value, &env.home); + + if env.variants.contains_key(&symbol) { + // No problems; the qualified variant name was in scope! + Pattern::Variant(symbol, opt_can_args) + } else { + // We couldn't find the variant name in scope. NAMING PROBLEM! + env.problem(Problem::UnrecognizedVariant(loc_name.clone())); + + Pattern::UnrecognizedVariant(loc_name.clone()) + } }, &Integer(ref num) => { @@ -990,9 +1044,7 @@ fn canonicalize_pattern( }, &EmptyRecordLiteral => Pattern::EmptyRecordLiteral, - }; - - Located {region, value: new_pattern} + } } /// When we detect an unsupported pattern type (e.g. 5 = 1 + 2 is unsupported because you can't @@ -1005,26 +1057,6 @@ fn unsupported_pattern(env: &mut Env, pattern_type: PatternType, region: &Region Pattern::UnsupportedPattern(loc_problem_pattern) } -fn canonicalize_variant_name( - env: &mut Env, - loc_name: Located -) -> Resolved { - let qualified_name = match &loc_name.value { - &VariantName::Unqualified(ref name) => ( env.home.clone(), name.clone() ), - &VariantName::Qualified(ref path, ref name) => ( path.clone(), name.clone() ) - }; - - if env.declared_variants.contains_key(&qualified_name) { - // No problems; the qualified variant name was in scope! - Symbol::resolved_global(qualified_name.0, qualified_name.1) - } else { - // We couldn't find the variant name in scope. NAMING PROBLEM! - env.problem(Problem::UnrecognizedVariant(loc_name.clone())); - - Resolved::UnrecognizedVariant(loc_name) - } -} - // OPERATOR PRECEDENCE // Precedence logic adapted from Gluon by Markus Westerlind, MIT licensed diff --git a/src/expr.rs b/src/expr.rs index aa86d8ec81..f4d49cf879 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -40,7 +40,7 @@ pub enum Expr { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum VariantName { Unqualified(String), - Qualified(Path, String), + Qualified(String, String), } /// An identifier, possibly fully-qualified with a module name @@ -49,7 +49,7 @@ pub enum VariantName { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Ident { Unqualified(String), - Qualified(Path, String), + Qualified(String, String), } impl Ident { @@ -68,34 +68,6 @@ impl Ident { } } -/// A path to a module, which may include the package it came from. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct Path(String); - -impl Path { - pub fn new(string: String) -> Path { - Path(string) - } - - pub fn into_string(self) -> String { - let Path(str) = self; - - str - } -} - -impl Into for Path { - fn into(self) -> String { - self.into_string() - } -} - -impl From for Path { - fn from(str: String) -> Self { - Path(str) - } -} - impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -103,7 +75,7 @@ impl fmt::Display for Ident { write!(f, "{}", name) }, Ident::Qualified(path, name) => { - write!(f, "{}.{}", path.clone().into_string(), name) + write!(f, "{}.{}", path, name) } } } @@ -116,7 +88,7 @@ impl fmt::Display for VariantName { write!(f, "{}", name) }, VariantName::Qualified(path, name) => { - write!(f, "{}.{}", path.clone().into_string(), name) + write!(f, "{}.{}", path, name) } } } diff --git a/src/optimize.rs b/src/optimize.rs new file mode 100644 index 0000000000..d9a707ce09 --- /dev/null +++ b/src/optimize.rs @@ -0,0 +1,36 @@ +// PHILOSOPHY +// +// Focus on optimizations which are only safe in the absence of side effects, and leave the rest to LLVM. +// +// This focus may lead to some optimizations becoming transitively in scope. For example, some deforestation +// examples in the MSR paper benefit from multiple rounds of interleaved deforestation, beta-reduction, and inlining. +// To get those benefits, we'd have to do some inlining and beta-reduction that we could otherwise leave to LLVM's +// inlining and constant propagation/folding. +// +// Even if we're doing those things, it may still make sense to have LLVM do a pass for them as well, since +// early LLVM optimization passes may unlock later opportunities for inlining and constant propagation/folding. +// +// INLINING +// +// If a function is called exactly once (it's a helper function), presumably we always want to inline those. +// If a function is "small enough" it's probably worth inlining too. +// +// FUSION +// +// https://www.microsoft.com/en-us/research/wp-content/uploads/2016/07/deforestation-short-cut.pdf +// +// Basic approach: +// +// Do list stuff using `build` passing Cons Nil (like a cons list) and then do foldr/build substitution/reduction. +// Afterwards, we can do a separate pass to flatten nested Cons structures into properly initialized RRBTs. +// This way we get both deforestation and efficient RRBT construction. Should work for the other collection types too. +// +// It looks like we need to do some amount of inlining and beta reductions on the Roc side, rather than +// leaving all of those to LLVM. +// +// Advanced approach: +// +// Express operations like map and filter in terms of toStream and fromStream, to unlock more deforestation. +// More info on here: +// +// https://wiki.haskell.org/GHC_optimisations#Fusion diff --git a/tests/test_canonicalize.rs b/tests/test_canonicalize.rs index 5d791b0e10..bc381a7039 100644 --- a/tests/test_canonicalize.rs +++ b/tests/test_canonicalize.rs @@ -9,28 +9,27 @@ mod helpers; #[cfg(test)] mod test_canonicalize { use roc::canonicalize; - use roc::canonicalize::{Expr, Output, Problem, Resolved, LocalSymbol, Symbol}; + use roc::canonicalize::{Expr, Output, Problem, Symbol, References}; use roc::canonicalize::Expr::*; - use roc::canonicalize::Pattern::*; - use roc::expr::{Path, Ident}; - use roc::operator::Operator::*; + use roc::expr::{Ident}; use roc::expr; - use roc::region::Located; + use roc::region::{Located, Region}; use roc::parse; use roc::collections::{ImMap, ImSet}; use roc::parse_state::{IndentablePosition}; use combine::{Parser, eof}; use combine::stream::state::{State}; - use helpers::{loc, loc_box, zero_loc_expr}; + use helpers::{loc, zero_loc_expr}; fn can_expr(expr_str: &str) -> (Expr, Output, Vec) { - can_expr_with(expr_str, &ImMap::default(), &ImMap::default()) + can_expr_with("testDecl", expr_str, &ImMap::default(), &ImMap::default()) } fn can_expr_with( + name: &str, expr_str: &str, - declared_idents: &ImMap>, - declared_variants: &ImMap<(Path, String), Located>, + declared_idents: &ImMap, + declared_variants: &ImMap>, ) -> (Expr, Output, Vec) { let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default()); let expr = match parse::expr().skip(eof()).easy_parse(parse_state) { @@ -47,23 +46,15 @@ mod test_canonicalize { } }; - let home = Path::new("TestModule".to_string()); + let home = "TestModule".to_string(); let (loc_expr, output, problems) = - canonicalize::canonicalize_declaration(home, loc(zero_loc_expr(expr)), declared_idents, declared_variants); + canonicalize::canonicalize_declaration(home, name, loc(zero_loc_expr(expr)), declared_idents, declared_variants); (loc_expr.value, output, problems) } - fn recognized_local_sym(string: &str) -> Resolved { - Resolved::Recognized(local_sym(string)) - } - - fn local_sym(string: &str) -> Symbol { - Symbol::Local(local(string)) - } - - fn local(string: &str) -> LocalSymbol { - LocalSymbol::new(string.to_string()) + fn sym(name: &str) -> Symbol { + Symbol::new("TestModule$testDecl$", name) } fn unqualified(string :&str) -> Ident { @@ -74,32 +65,29 @@ mod test_canonicalize { Problem::UnusedAssignment(loc(unqualified(string))) } - fn check_output( - output: Output, - applied_variants: Vec<(Path, &str)>, - referenced_idents: Vec<(Option, &str)>, - tail_call: Option - ) { - assert_eq!( - output, - Output { - referenced_idents: - ImSet::from( - referenced_idents.into_iter().map(|(opt_path, str_ref)| - match opt_path { - Some(path) => Ident::Qualified(path, str_ref.to_string()), - None => Ident::Unqualified(str_ref.to_string()) - } - ).collect::>() - ), - applied_variants: - ImSet::from( - applied_variants.into_iter().map(|(path, str_ref)| - (path, str_ref.to_string()), - ).collect::>()), - tail_call + struct Out<'a> { + locals: Vec<&'a str>, + globals: Vec<&'a str>, + variants: Vec<&'a str>, + tail_call: Option<&'a str> + } + + impl<'a> Into for Out<'a> { + fn into(self) -> Output { + fn vec_to_set<'b>(vec: Vec<&'b str>) -> ImSet { + ImSet::from(vec.into_iter().map(sym).collect::>()) } - ); + + let references = References { + locals: vec_to_set(self.locals), + globals: vec_to_set(self.globals), + variants: vec_to_set(self.variants) + }; + + let tail_call = self.tail_call.map(sym); + + Output {references, tail_call} + } } #[test] @@ -113,15 +101,20 @@ mod test_canonicalize { ]); assert_eq!(expr, - Var(Resolved::UnrecognizedConstant(loc(Ident::Unqualified("x".to_string())))) + UnrecognizedConstant(loc(Ident::Unqualified("x".to_string()))) ); - check_output(output, vec![], vec![], None); + assert_eq!(output, Out { + locals: vec![], + globals: vec![], + variants: vec![], + tail_call: None + }.into()); } #[test] fn complex_unrecognized_constant() { - let (expr, output, problems) = can_expr(indoc!(r#" + let (_, output, problems) = can_expr(indoc!(r#" a = 5 b = 6 @@ -132,25 +125,12 @@ mod test_canonicalize { Problem::UnrecognizedConstant(loc(Ident::Unqualified("z".to_string()))) ]); - assert_eq!(expr, - Assign( - vec![ - (loc(Identifier(local("a"))), loc(Int(5))), - (loc(Identifier(local("b"))), loc(Int(6))), - ], - loc_box(Operator( - loc_box(Var(recognized_local_sym("a"))), - loc(Plus), - loc_box(Operator( - loc_box(Var(recognized_local_sym("b"))), - loc(Star), - loc_box(Var(Resolved::UnrecognizedConstant(loc(Ident::Unqualified("z".to_string()))))) - )), - )) - ) - ); - - check_output(output, vec![], vec![(None, "a"), (None, "b")], None); + assert_eq!(output, Out { + locals: vec!["a", "b"], + globals: vec![], + variants: vec![], + tail_call: None + }.into()); } #[test] @@ -166,7 +146,12 @@ mod test_canonicalize { assert_eq!(problems, vec![unused("b"), unused("a")]); - check_output(output, vec![], vec![(None, "c")], None); + assert_eq!(output, Out { + locals: vec!["c"], + globals: vec![], + variants: vec![], + tail_call: None + }.into()); } @@ -184,11 +169,12 @@ mod test_canonicalize { assert_eq!(problems, vec![]); - check_output(output, - vec![], - vec![(None, "num"), (None, "fibonacci")], - Some(local_sym("fibonacci")) - ); + assert_eq!(output, Out { + locals: vec!["num", "fibonacci"], + globals: vec![], + variants: vec![], + tail_call: Some("fibonacci") + }.into()); } // UNSUPPORTED PATTERNS