Attempt to sort all the symbols.

This commit is contained in:
Richard Feldman 2019-07-31 22:48:02 -04:00
parent 83cbc1d927
commit e3e92b56fb
6 changed files with 240 additions and 79 deletions

View file

@ -2,7 +2,7 @@ use region::{Located, Region};
use operator::Operator; use operator::Operator;
use operator::Operator::Pizza; use operator::Operator::Pizza;
use operator::Associativity::*; use operator::Associativity::*;
use collections::{ImSet, ImMap, MutMap}; use collections::{ImSortedSet, ImSortedMap, MutMap, MutSortedMap, MutSet};
use std::cmp::Ordering; use std::cmp::Ordering;
use expr::{Ident, VariantName}; use expr::{Ident, VariantName};
use expr; use expr;
@ -85,7 +85,7 @@ pub enum Pattern {
/// A globally unique identifier, used for both vars and variants. /// A globally unique identifier, used for both vars and variants.
/// It will be used directly in code gen. /// It will be used directly in code gen.
#[derive(Clone, Debug, PartialEq, Eq, Hash)] #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Symbol(String); pub struct Symbol(String);
impl Symbol { impl Symbol {
@ -114,13 +114,13 @@ impl Into<String> for Symbol {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
struct Scope { struct Scope {
pub idents: ImMap<Ident, (Symbol, Region)>, pub idents: ImSortedMap<Ident, (Symbol, Region)>,
symbol_prefix: String, symbol_prefix: String,
next_unique_id: u64, next_unique_id: u64,
} }
impl Scope { impl Scope {
pub fn new(symbol_prefix: String, declared_idents: ImMap<Ident, (Symbol, Region)>) -> Scope { pub fn new(symbol_prefix: String, declared_idents: ImSortedMap<Ident, (Symbol, Region)>) -> Scope {
Scope { Scope {
symbol_prefix, symbol_prefix,
@ -146,22 +146,22 @@ impl Scope {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Procedure { pub struct Procedure {
pub name: Option<String>, pub name: Option<String>,
pub closes_over: ImSet<Symbol>,
pub is_self_tail_recursive: bool, pub is_self_tail_recursive: bool,
pub definition: Region, pub definition: Region,
pub args: Vec<Pattern>, pub args: Vec<Pattern>,
pub body: Expr pub body: Expr,
pub references: References,
} }
impl Procedure { impl Procedure {
pub fn new(definition: Region, closes_over: ImSet<Symbol>, args: Vec<Pattern>, body: Expr) -> Procedure { pub fn new(definition: Region, args: Vec<Pattern>, body: Expr, references: References) -> Procedure {
Procedure { Procedure {
name: None, name: None,
closes_over,
is_self_tail_recursive: false, is_self_tail_recursive: false,
definition, definition,
args, args,
body body,
references,
} }
} }
} }
@ -176,19 +176,19 @@ struct Env {
problems: Vec<Problem>, problems: Vec<Problem>,
/// Variants either declared in this module, or imported. /// Variants either declared in this module, or imported.
variants: ImMap<Symbol, Located<expr::VariantName>>, variants: ImSortedMap<Symbol, Located<expr::VariantName>>,
/// Former closures converted to top-level procedures. /// Former closures converted to top-level procedures.
procedures: MutMap<Symbol, Procedure>, procedures: MutSortedMap<Symbol, Procedure>,
} }
impl Env { impl Env {
pub fn new(home: String, declared_variants: ImMap<Symbol, Located<expr::VariantName>>) -> Env { pub fn new(home: String, declared_variants: ImSortedMap<Symbol, Located<expr::VariantName>>) -> Env {
Env { Env {
home, home,
variants: declared_variants, variants: declared_variants,
problems: Vec::new(), problems: Vec::new(),
procedures: MutMap::default(), procedures: MutSortedMap::default(),
} }
} }
@ -199,16 +199,16 @@ impl Env {
pub fn register_closure( pub fn register_closure(
&mut self, &mut self,
symbol: Symbol, symbol: Symbol,
closes_over: ImSet<Symbol>,
args: Vec<Pattern>, args: Vec<Pattern>,
body: Expr, body: Expr,
definition: Region definition: Region,
references: References
) -> () { ) -> () {
// We can't if the closure is self tail recursive yet, because it doesn't know its final name yet. // We can't if the closure is self tail recursive yet, because it doesn't know its final name yet.
// (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name. // (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name.
let is_self_tail_recursive = false; let is_self_tail_recursive = false;
let name = None; // The Assign logic is also responsible for setting names after the fact. let name = None; // The Assign logic is also responsible for setting names after the fact.
let procedure = Procedure {closes_over, args, name, body, is_self_tail_recursive, definition}; let procedure = Procedure {args, name, body, is_self_tail_recursive, definition, references};
self.procedures.insert(symbol, procedure); self.procedures.insert(symbol, procedure);
} }
@ -218,9 +218,9 @@ pub fn canonicalize_declaration(
home: String, home: String,
name: &str, name: &str,
loc_expr: Located<expr::Expr>, loc_expr: Located<expr::Expr>,
declared_idents: &ImMap<Ident, (Symbol, Region)>, declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<expr::VariantName>>, declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
) -> (Located<Expr>, Output, Vec<Problem>, MutMap<Symbol, Procedure>) { ) -> (Located<Expr>, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
// If we're canonicalizing the declaration `foo = ...` inside the `Main` module, // If we're canonicalizing the declaration `foo = ...` inside the `Main` module,
// scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0" // scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0"
let scope_prefix = format!("{}${}$", home, name); let scope_prefix = format!("{}${}$", home, name);
@ -244,19 +244,24 @@ pub struct Output {
pub tail_call: Option<Symbol>, pub tail_call: Option<Symbol>,
} }
/// These are all ordered sets because they end up getting traversed in a graph search
/// to determine how assignments shuold be ordered. We want builds to be reproducible,
/// so it's important that building the same code gives the same order every time!
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct References { pub struct References {
pub locals: ImSet<Symbol>, pub locals: ImSortedSet<Symbol>,
pub globals: ImSet<Symbol>, pub globals: ImSortedSet<Symbol>,
pub variants: ImSet<Symbol>, pub variants: ImSortedSet<Symbol>,
pub calls: ImSortedSet<Symbol>,
} }
impl References { impl References {
pub fn new() -> References { pub fn new() -> References {
References { References {
locals: ImSet::default(), locals: ImSortedSet::default(),
globals: ImSet::default(), globals: ImSortedSet::default(),
variants: ImSet::default(), variants: ImSortedSet::default(),
calls: ImSortedSet::default(),
} }
} }
@ -264,6 +269,7 @@ impl References {
self.locals = self.locals.union(other.locals); self.locals = self.locals.union(other.locals);
self.globals = self.globals.union(other.globals); self.globals = self.globals.union(other.globals);
self.variants = self.variants.union(other.variants); self.variants = self.variants.union(other.variants);
self.calls = self.calls.union(other.calls);
self self
} }
@ -414,6 +420,9 @@ fn canonicalize(
let can_expr = let can_expr =
match resolve_ident(&env, &scope, ident, &mut output.references) { match resolve_ident(&env, &scope, ident, &mut output.references) {
Ok(symbol) => { Ok(symbol) => {
// Record that we did, in fact, call this symbol.
output.references.calls.insert(symbol.clone());
// CallByName expressions are considered tail calls, // CallByName expressions are considered tail calls,
// so that their parents in the expression tree will // so that their parents in the expression tree will
// correctly inherit tail-call-ness from them. // correctly inherit tail-call-ness from them.
@ -505,13 +514,13 @@ fn canonicalize(
// Add the assigned identifiers to scope. If there's a collision, it means there // Add the assigned identifiers to scope. If there's a collision, it means there
// was shadowing, which will be handled later. // was shadowing, which will be handled later.
let assigned_idents: ImMap<Ident, (Symbol, Region)> = let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
idents_from_patterns(assignments.clone().into_iter().map(|(loc_pattern, _)| loc_pattern), &scope); idents_from_patterns(assignments.clone().into_iter().map(|(loc_pattern, _)| loc_pattern), &scope);
scope.idents = scope.idents.union(assigned_idents.clone()); scope.idents = scope.idents.union(assigned_idents.clone());
let mut refs_by_assignment: MutMap<Symbol, (Located<Ident>, References)> = MutMap::default(); let mut refs_by_assignment: MutMap<Symbol, (Located<Ident>, References)> = MutMap::default();
let mut can_assignments_by_symbol: MutMap<Symbol, (Pattern, Located<Expr>)> = MutMap::default(); let mut can_assignments_by_symbol: MutSortedMap<Symbol, (Pattern, Located<Expr>)> = MutSortedMap::default();
for (loc_pattern, expr) in assignments { for (loc_pattern, expr) in assignments {
// Each assignment gets to have all the idents in scope that are assigned in this // Each assignment gets to have all the idents in scope that are assigned in this
@ -564,6 +573,14 @@ fn canonicalize(
// when code elsewhere calls it by assigned name, it'll resolve properly. // when code elsewhere calls it by assigned name, it'll resolve properly.
env.procedures.insert(assigned_symbol.clone(), procedure); env.procedures.insert(assigned_symbol.clone(), procedure);
// Recursion doesn't count as referencing. (If it did, all recursive functions
// would result in circular assignment errors!)
refs_by_assignment
.entry(assigned_symbol.clone())
.and_modify(|(_, refs)| {
refs.locals = refs.locals.without(assigned_symbol);
});
// Return a pointer to the assigned symbol, since the auto-generated one no // Return a pointer to the assigned symbol, since the auto-generated one no
// longer references any entry in the procedure map! // longer references any entry in the procedure map!
FunctionPointer(assigned_symbol.clone()) FunctionPointer(assigned_symbol.clone())
@ -587,7 +604,7 @@ fn canonicalize(
let (ret_expr, mut output) = canonicalize(env, &mut scope, *box_loc_returned); let (ret_expr, mut output) = canonicalize(env, &mut scope, *box_loc_returned);
// Determine the full set of references by traversing the graph. // Determine the full set of references by traversing the graph.
let mut visited_symbols = ImSet::default(); let mut visited_symbols = MutSet::default();
// Start with the return expression's referenced locals. They are the only ones that count! // Start with the return expression's referenced locals. They are the only ones that count!
// //
@ -602,7 +619,16 @@ fn canonicalize(
// we'd erroneously give a warning that `b` was unused since it wasn't directly referenced. // we'd erroneously give a warning that `b` was unused since it wasn't directly referenced.
for symbol in output.references.locals.clone().into_iter() { for symbol in output.references.locals.clone().into_iter() {
// Traverse the graph and look up *all* the references for this local symbol. // Traverse the graph and look up *all* the references for this local symbol.
let refs = get_all_referenced(symbol, &mut visited_symbols, &refs_by_assignment); let refs = references_from_local(symbol, &mut visited_symbols, &refs_by_assignment, &env.procedures);
output.references = output.references.union(refs);
}
for symbol in output.references.calls.clone().into_iter() {
// Traverse the graph and look up *all* the references for this call.
// Reuse the same visited_symbols as before; if we already visited it, we
// won't learn anything new from visiting it again!
let refs = references_from_call(symbol, &mut visited_symbols, &refs_by_assignment, &env.procedures);
output.references = output.references.union(refs); output.references = output.references.union(refs);
} }
@ -621,7 +647,7 @@ fn canonicalize(
// This way, during code gen, no assignment will refer to a value that hasn't been initialized yet. // This way, during code gen, no assignment will refer to a value that hasn't been initialized yet.
// As a bonus, the topological sort also reveals any cycles between the assignments, allowing // As a bonus, the topological sort also reveals any cycles between the assignments, allowing
// us to give a CircularAssignment error. // us to give a CircularAssignment error.
let successors = |symbol: &Symbol| -> ImSet<Symbol> { let successors = |symbol: &Symbol| -> ImSortedSet<Symbol> {
let (_, references) = refs_by_assignment.get(symbol).unwrap(); let (_, references) = refs_by_assignment.get(symbol).unwrap();
references.locals.clone() references.locals.clone()
@ -635,6 +661,7 @@ fn canonicalize(
let can_assignments = let can_assignments =
sorted_symbols sorted_symbols
.into_iter() .into_iter()
.rev() // Topological sort gives us the reverse of the sorting we want!
.map(|symbol| can_assignments_by_symbol.get(&symbol).unwrap().clone()) .map(|symbol| can_assignments_by_symbol.get(&symbol).unwrap().clone())
.collect(); .collect();
@ -675,7 +702,7 @@ fn canonicalize(
// Add the arguments' idents to scope.idents. If there's a collision, // Add the arguments' idents to scope.idents. If there's a collision,
// it means there was shadowing, which will be handled later. // it means there was shadowing, which will be handled later.
let arg_idents: ImMap<Ident, (Symbol, Region)> = let arg_idents: ImSortedMap<Ident, (Symbol, Region)> =
idents_from_patterns(loc_arg_patterns.clone().into_iter(), &scope); idents_from_patterns(loc_arg_patterns.clone().into_iter(), &scope);
scope.idents = scope.idents.union(arg_idents.clone()); scope.idents = scope.idents.union(arg_idents.clone());
@ -704,12 +731,14 @@ fn canonicalize(
output.references.locals.remove(&arg_symbol); output.references.locals.remove(&arg_symbol);
} }
// We only ever need to close over locals. Globals are always available! // We've finished analyzing the closure. Its references.locals are now the values it closes over,
// Note: This must happen *after* removing args from locals. Never close over arguments! // since we removed the only locals it shouldn't close over (its arguments).
let closes_over: ImSet<Symbol> = output.references.locals.clone(); // Register it as a top-level procedure in the Env!
env.register_closure(symbol.clone(), can_args, loc_body_expr.value, region, output.references);
// We've finished analyzing the closure. Register it as a top-level procedure in the Env! // Having now registered the closure's references, the function pointer that remains has
env.register_closure(symbol.clone(), closes_over, can_args, loc_body_expr.value, region); // no references. The references we registered will be used only if this symbol gets called!
output.references = References::new();
// Always return a function pointer, in case that's how the closure is being used (e.g. with Apply). // Always return a function pointer, in case that's how the closure is being used (e.g. with Apply).
// It's possible that Assign will rewrite this. In that case, Assign will need to know the symbol we // It's possible that Assign will rewrite this. In that case, Assign will need to know the symbol we
@ -739,7 +768,7 @@ fn canonicalize(
// Patterns introduce new idents to the scope! // Patterns introduce new idents to the scope!
// Add the assigned identifiers to scope. If there's a collision, it means there // Add the assigned identifiers to scope. If there's a collision, it means there
// was shadowing, which will be handled later. // was shadowing, which will be handled later.
let assigned_idents: ImMap<Ident, (Symbol, Region)> = let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
idents_from_patterns(std::iter::once(loc_pattern), &scope); idents_from_patterns(std::iter::once(loc_pattern), &scope);
scope.idents = scope.idents.union(assigned_idents.clone()); scope.idents = scope.idents.union(assigned_idents.clone());
@ -798,10 +827,11 @@ fn canonicalize(
(Located {region, value: expr}, output) (Located {region, value: expr}, output)
} }
fn get_all_referenced<T>( fn references_from_local<T>(
assigned_symbol: Symbol, assigned_symbol: Symbol,
visited: &mut ImSet<Symbol>, visited: &mut MutSet<Symbol>,
refs_by_assignment: &MutMap<Symbol, (T, References)> refs_by_assignment: &MutMap<Symbol, (T, References)>,
procedures: &MutSortedMap<Symbol, Procedure>,
) -> References { ) -> References {
match refs_by_assignment.get(&assigned_symbol) { match refs_by_assignment.get(&assigned_symbol) {
Some((_, refs)) => { Some((_, refs)) => {
@ -809,14 +839,24 @@ fn get_all_referenced<T>(
visited.insert(assigned_symbol); visited.insert(assigned_symbol);
for local in refs.locals.clone() { for local in refs.locals.iter() {
if !visited.contains(&local) { if !visited.contains(&local) {
let other_refs = get_all_referenced(local.clone(), visited, refs_by_assignment); let other_refs = references_from_local(local.clone(), visited, refs_by_assignment, procedures);
answer = answer.union(other_refs); answer = answer.union(other_refs);
} }
answer.locals.insert(local); answer.locals.insert(local.clone());
}
for call in refs.calls.iter() {
if !visited.contains(&call) {
let other_refs = references_from_call(call.clone(), visited, refs_by_assignment, procedures);
answer = answer.union(other_refs);
}
answer.calls.insert(call.clone());
} }
answer answer
@ -825,10 +865,47 @@ fn get_all_referenced<T>(
} }
} }
fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImMap<Ident, (Symbol, Region)> fn references_from_call<T>(
call_symbol: Symbol,
visited: &mut MutSet<Symbol>,
refs_by_assignment: &MutMap<Symbol, (T, References)>,
procedures: &MutSortedMap<Symbol, Procedure>,
) -> References {
// This shuold be safe to unwrap. All unrecognized call symbols should have been recorded as
// such, and should never have made it into output.references.calls!
let procedure = procedures.get(&call_symbol).unwrap();
let mut answer = procedure.references.clone();
visited.insert(call_symbol);
for closed_over_local in procedure.references.locals.iter() {
if !visited.contains(&closed_over_local) {
let other_refs = references_from_local(closed_over_local.clone(), visited, refs_by_assignment, procedures);
answer = answer.union(other_refs);
}
answer.locals.insert(closed_over_local.clone());
}
for call in procedure.references.calls.iter() {
if !visited.contains(&call) {
let other_refs = references_from_call(call.clone(), visited, refs_by_assignment, procedures);
answer = answer.union(other_refs);
}
answer.calls.insert(call.clone());
}
answer
}
fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImSortedMap<Ident, (Symbol, Region)>
where I: Iterator<Item = Located<expr::Pattern>> where I: Iterator<Item = Located<expr::Pattern>>
{ {
let mut answer = ImMap::default(); let mut answer = ImSortedMap::default();
for loc_pattern in loc_patterns { for loc_pattern in loc_patterns {
add_idents_from_pattern(loc_pattern, scope, &mut answer); add_idents_from_pattern(loc_pattern, scope, &mut answer);
@ -841,7 +918,7 @@ where I: Iterator<Item = Located<expr::Pattern>>
fn add_idents_from_pattern( fn add_idents_from_pattern(
loc_pattern: Located<expr::Pattern>, loc_pattern: Located<expr::Pattern>,
scope: &Scope, scope: &Scope,
answer: &mut ImMap<Ident, (Symbol, Region)> answer: &mut ImSortedMap<Ident, (Symbol, Region)>
) { ) {
use expr::Pattern::*; use expr::Pattern::*;
@ -863,7 +940,7 @@ fn add_idents_from_pattern(
fn remove_idents( fn remove_idents(
pattern: expr::Pattern, pattern: expr::Pattern,
idents: &mut ImMap<Ident, (Symbol, Region)> idents: &mut ImSortedMap<Ident, (Symbol, Region)>
) { ) {
use expr::Pattern::*; use expr::Pattern::*;
@ -968,7 +1045,7 @@ fn canonicalize_pattern(
scope: &mut Scope, scope: &mut Scope,
pattern_type: &PatternType, pattern_type: &PatternType,
loc_pattern: &Located<expr::Pattern>, loc_pattern: &Located<expr::Pattern>,
shadowable_idents: &mut ImMap<Ident, (Symbol, Region)>, shadowable_idents: &mut ImSortedMap<Ident, (Symbol, Region)>,
) -> Pattern { ) -> Pattern {
use expr::Pattern::*; use expr::Pattern::*;

View file

@ -19,3 +19,18 @@ pub type ImMap<K, V> =
pub type ImSet<K> = pub type ImSet<K> =
im_rc::hashset::HashSet<K, BuildHasherDefault<FxHasher>>; im_rc::hashset::HashSet<K, BuildHasherDefault<FxHasher>>;
// OrdMap equivalents, for naming symmetry.
// Someday we may switch these implementations out.
pub type MutSortedMap<K, V> =
std::collections::BTreeMap<K, V>;
pub type MutSortedSet<K> =
std::collections::BTreeSet<K>;
pub type ImSortedMap<K, V> =
im_rc::ordmap::OrdMap<K, V>;
pub type ImSortedSet<K> =
im_rc::ordset::OrdSet<K>;

View file

@ -46,7 +46,7 @@ pub enum VariantName {
/// An identifier, possibly fully-qualified with a module name /// An identifier, possibly fully-qualified with a module name
/// e.g. (Http.Request from http) /// e.g. (Http.Request from http)
/// Parameterized on a phantom marker for whether it has been canonicalized /// Parameterized on a phantom marker for whether it has been canonicalized
#[derive(Clone, Debug, PartialEq, Eq, Hash)] #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Ident { pub enum Ident {
Unqualified(String), Unqualified(String),
Qualified(String, String), Qualified(String, String),

View file

@ -1,6 +1,6 @@
use std::fmt; use std::fmt;
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
pub struct Region { pub struct Region {
pub start_line: u32, pub start_line: u32,
pub start_col: u32, pub start_col: u32,
@ -9,7 +9,7 @@ pub struct Region {
pub end_col: u32, pub end_col: u32,
} }
#[derive(Copy, Clone, Eq, PartialEq)] #[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
pub struct Located<T> { pub struct Located<T> {
pub region: Region, pub region: Region,
pub value: T, pub value: T,

View file

@ -1,7 +1,6 @@
use roc::expr::{Expr, Pattern}; use roc::expr::{Expr, Pattern};
use roc::region::{Located, Region}; use roc::region::{Located, Region};
use std::hash::Hash; use roc::collections::{MutSortedMap};
use roc::collections::{MutMap};
pub fn loc_box<T>(val: T) -> Box<Located<T>> { pub fn loc_box<T>(val: T) -> Box<Located<T>> {
Box::new(loc(val)) Box::new(loc(val))
@ -74,11 +73,11 @@ pub fn zero_loc_pattern(loc_pattern: Located<Pattern>) -> Located<Pattern> {
} }
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs #[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs
pub fn mut_map_from_pairs<K, V, I>(pairs: I) -> MutMap<K, V> pub fn mut_sorted_map_from_pairs<K, V, I>(pairs: I) -> MutSortedMap<K, V>
where I: IntoIterator<Item=(K, V)>, where I: IntoIterator<Item=(K, V)>,
K: Hash + Eq K: Ord
{ {
let mut answer = MutMap::default(); let mut answer = MutSortedMap::default();
for (key, value) in pairs { for (key, value) in pairs {
answer.insert(key, value); answer.insert(key, value);

View file

@ -11,27 +11,28 @@ mod test_canonicalize {
use roc::canonicalize; use roc::canonicalize;
use roc::canonicalize::{Expr, Output, Problem, Symbol, References, Procedure, Pattern}; use roc::canonicalize::{Expr, Output, Problem, Symbol, References, Procedure, Pattern};
use roc::canonicalize::Expr::*; use roc::canonicalize::Expr::*;
use roc::canonicalize::Pattern::*;
use roc::expr::{Ident}; use roc::expr::{Ident};
use roc::expr; use roc::expr;
use roc::operator::Operator; use roc::operator::Operator;
use roc::region::{Located, Region}; use roc::region::{Located, Region};
use roc::parse; use roc::parse;
use roc::collections::{ImMap, ImSet, MutMap}; use roc::collections::{ImSortedMap, ImSortedSet, MutSortedMap};
use roc::parse_state::{IndentablePosition}; use roc::parse_state::{IndentablePosition};
use combine::{Parser, eof}; use combine::{Parser, eof};
use combine::stream::state::{State}; use combine::stream::state::{State};
use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_map_from_pairs}; use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_sorted_map_from_pairs};
fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) { fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
can_expr_with("testDecl", expr_str, &ImMap::default(), &ImMap::default()) can_expr_with("testDecl", expr_str, &ImSortedMap::default(), &ImSortedMap::default())
} }
fn can_expr_with( fn can_expr_with(
name: &str, name: &str,
expr_str: &str, expr_str: &str,
declared_idents: &ImMap<Ident, (Symbol, Region)>, declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<expr::VariantName>>, declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) { ) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default()); let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default());
let expr = match parse::expr().skip(eof()).easy_parse(parse_state) { let expr = match parse::expr().skip(eof()).easy_parse(parse_state) {
Ok((expr, state)) => { Ok((expr, state)) => {
@ -70,19 +71,17 @@ mod test_canonicalize {
locals: Vec<&'a str>, locals: Vec<&'a str>,
globals: Vec<&'a str>, globals: Vec<&'a str>,
variants: Vec<&'a str>, variants: Vec<&'a str>,
calls: Vec<&'a str>,
tail_call: Option<&'a str> tail_call: Option<&'a str>
} }
impl<'a> Into<Output> for Out<'a> { impl<'a> Into<Output> for Out<'a> {
fn into(self) -> Output { fn into(self) -> Output {
fn vec_to_set<'b>(vec: Vec<&'b str>) -> ImSet<Symbol> {
ImSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
}
let references = References { let references = References {
locals: vec_to_set(self.locals), locals: vec_to_set(self.locals),
globals: vec_to_set(self.globals), globals: vec_to_set(self.globals),
variants: vec_to_set(self.variants) variants: vec_to_set(self.variants),
calls: vec_to_set(self.calls),
}; };
let tail_call = self.tail_call.map(sym); let tail_call = self.tail_call.map(sym);
@ -91,6 +90,10 @@ mod test_canonicalize {
} }
} }
fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSortedSet<Symbol> {
ImSortedSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
}
// BASIC CANONICALIZATION // BASIC CANONICALIZATION
#[test] #[test]
@ -109,14 +112,14 @@ mod test_canonicalize {
locals: vec!["func"], locals: vec!["func"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec!["func"],
tail_call: None tail_call: None
}.into()); }.into());
assert_eq!(procedures, assert_eq!(procedures,
mut_map_from_pairs(vec![(sym("func"), mut_sorted_map_from_pairs(vec![(sym("func"),
Procedure { Procedure {
name: Some("func".to_string()), name: Some("func".to_string()),
closes_over: ImSet::default(),
is_self_tail_recursive: false, is_self_tail_recursive: false,
definition: empty_region(), definition: empty_region(),
args: vec![Pattern::Identifier(sym("arg"))], args: vec![Pattern::Identifier(sym("arg"))],
@ -124,7 +127,13 @@ mod test_canonicalize {
loc_box(Expr::Var(sym("arg"))), loc_box(Expr::Var(sym("arg"))),
loc(Operator::Plus), loc(Operator::Plus),
loc_box(Expr::Int(1)) loc_box(Expr::Int(1))
) ),
references: References {
locals: vec_to_set(vec![]),
globals: vec_to_set(vec![]),
variants: vec_to_set(vec![]),
calls: vec_to_set(vec![]),
}
})]) })])
); );
} }
@ -149,6 +158,7 @@ mod test_canonicalize {
locals: vec!["func", "local"], locals: vec!["func", "local"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec!["func"],
tail_call: None tail_call: None
}.into()); }.into());
} }
@ -173,11 +183,11 @@ mod test_canonicalize {
locals: vec!["local"], locals: vec!["local"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec![],
tail_call: None tail_call: None
}.into()); }.into());
} }
// UNRECOGNIZED // UNRECOGNIZED
#[test] #[test]
@ -198,6 +208,7 @@ mod test_canonicalize {
locals: vec![], locals: vec![],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec![],
tail_call: None tail_call: None
}.into()); }.into());
} }
@ -219,6 +230,7 @@ mod test_canonicalize {
locals: vec!["a", "b"], locals: vec!["a", "b"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec![],
tail_call: None tail_call: None
}.into()); }.into());
} }
@ -226,11 +238,12 @@ mod test_canonicalize {
// UNUSED // UNUSED
#[test] #[test]
fn mutual_unused_closed_over_vars() { fn mutual_unused_circular_vars() {
// This should report that both a and b are unused, since the return expr never references them. // This should report that both a and b are unused, since the return expr never references them.
// It should not report them as circular, since we haven't solved the halting problem here.
let (_, output, problems, _) = can_expr(indoc!(r#" let (_, output, problems, _) = can_expr(indoc!(r#"
a = \_ -> b 7 a = \arg -> if arg > 0 then b 7 else 0
b = \_ -> a 6 b = \arg -> if arg > 0 then a (arg - 1) else 0
c = 5 c = 5
c c
@ -242,10 +255,9 @@ mod test_canonicalize {
locals: vec!["c"], locals: vec!["c"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec![],
tail_call: None tail_call: None
}.into()); }.into());
panic!("TODO this shuoldn't report circular assignment problems; we haven't solved the halting problem here!");
} }
#[test] #[test]
@ -266,6 +278,7 @@ mod test_canonicalize {
locals: vec!["fibonacci"], locals: vec!["fibonacci"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec!["fibonacci"],
tail_call: Some("fibonacci") tail_call: Some("fibonacci")
}.into()); }.into());
} }
@ -289,6 +302,7 @@ mod test_canonicalize {
locals: vec!["func", "x", "y", "z"], locals: vec!["func", "x", "y", "z"],
globals: vec![], globals: vec![],
variants: vec![], variants: vec![],
calls: vec!["func"],
tail_call: None tail_call: None
}.into()); }.into());
@ -304,7 +318,59 @@ mod test_canonicalize {
"#)).0); "#)).0);
} }
// TODO test reordering where closed-over values are part of the dependency chain #[test]
fn reorder_closed_over_assignments() {
let (expr, output, problems, _) = can_expr(indoc!(r#"
z = func1 x
x = 9
y = func2 3
func1 = \arg -> func2 arg + y
func2 = \arg -> arg + x
z
"#));
assert_eq!(problems, vec![]);
assert_eq!(output, Out {
locals: vec!["func1", "func2", "x", "y", "z"],
globals: vec![],
variants: vec![],
calls: vec!["func1", "func2"],
tail_call: None
}.into());
// This should get reordered to the following, so that in code gen
// everything will have been set before it gets read.
// (The order of the function definitions doesn't matter.)
assert_assignment_order(expr,
vec!["func1", "x", "z", "func2", "y"],
);
}
fn assert_assignment_order(expr: Expr, expected_strings: Vec<&str>) {
match expr {
Assign(assignments, _) => {
let expected_symbols: Vec<Symbol> = expected_strings.into_iter().map(sym).collect();
let actual_symbols: Vec<Symbol> = assignments.into_iter().map(|(pattern, _)| {
match pattern {
Identifier(symbol) => {
symbol
},
_ => {
panic!("Called assert_assignment_order passing an Assign expr with non-Identifier patterns!");
}
}
}).collect();
assert_eq!(actual_symbols, expected_symbols);
}
_ => {
panic!("Called assert_assignment_order passing a non-Assign expr!");
}
}
}
// CIRCULAR ASSIGNMENT // CIRCULAR ASSIGNMENT
@ -329,6 +395,10 @@ mod test_canonicalize {
panic!("TODO strongly_connected_component doesn't sort these, but we want them sorted!"); panic!("TODO strongly_connected_component doesn't sort these, but we want them sorted!");
} }
// TODO verify that Apply handles output.references.calls correctly
// UNSUPPORTED PATTERNS // UNSUPPORTED PATTERNS
// TODO verify that in closures and assignments, you can't assign to int/string/underscore/etc // TODO verify that in closures and assignments, you can't assign to int/string/underscore/etc