Overhaul canonicalization to use global symbols.

This commit is contained in:
Richard Feldman 2019-07-30 21:41:38 -04:00
parent 1b5df3f1c3
commit 74ad51aae9
4 changed files with 608 additions and 582 deletions

File diff suppressed because it is too large Load diff

View file

@ -40,7 +40,7 @@ pub enum Expr {
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum VariantName {
Unqualified(String),
Qualified(Path, String),
Qualified(String, String),
}
/// An identifier, possibly fully-qualified with a module name
@ -49,7 +49,7 @@ pub enum VariantName {
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Ident {
Unqualified(String),
Qualified(Path, String),
Qualified(String, String),
}
impl Ident {
@ -68,34 +68,6 @@ impl Ident {
}
}
/// A path to a module, which may include the package it came from.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Path(String);
impl Path {
pub fn new(string: String) -> Path {
Path(string)
}
pub fn into_string(self) -> String {
let Path(str) = self;
str
}
}
impl Into<String> for Path {
fn into(self) -> String {
self.into_string()
}
}
impl From<String> for Path {
fn from(str: String) -> Self {
Path(str)
}
}
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
@ -103,7 +75,7 @@ impl fmt::Display for Ident {
write!(f, "{}", name)
},
Ident::Qualified(path, name) => {
write!(f, "{}.{}", path.clone().into_string(), name)
write!(f, "{}.{}", path, name)
}
}
}
@ -116,7 +88,7 @@ impl fmt::Display for VariantName {
write!(f, "{}", name)
},
VariantName::Qualified(path, name) => {
write!(f, "{}.{}", path.clone().into_string(), name)
write!(f, "{}.{}", path, name)
}
}
}

36
src/optimize.rs Normal file
View file

@ -0,0 +1,36 @@
// PHILOSOPHY
//
// Focus on optimizations which are only safe in the absence of side effects, and leave the rest to LLVM.
//
// This focus may lead to some optimizations becoming transitively in scope. For example, some deforestation
// examples in the MSR paper benefit from multiple rounds of interleaved deforestation, beta-reduction, and inlining.
// To get those benefits, we'd have to do some inlining and beta-reduction that we could otherwise leave to LLVM's
// inlining and constant propagation/folding.
//
// Even if we're doing those things, it may still make sense to have LLVM do a pass for them as well, since
// early LLVM optimization passes may unlock later opportunities for inlining and constant propagation/folding.
//
// INLINING
//
// If a function is called exactly once (it's a helper function), presumably we always want to inline those.
// If a function is "small enough" it's probably worth inlining too.
//
// FUSION
//
// https://www.microsoft.com/en-us/research/wp-content/uploads/2016/07/deforestation-short-cut.pdf
//
// Basic approach:
//
// Do list stuff using `build` passing Cons Nil (like a cons list) and then do foldr/build substitution/reduction.
// Afterwards, we can do a separate pass to flatten nested Cons structures into properly initialized RRBTs.
// This way we get both deforestation and efficient RRBT construction. Should work for the other collection types too.
//
// It looks like we need to do some amount of inlining and beta reductions on the Roc side, rather than
// leaving all of those to LLVM.
//
// Advanced approach:
//
// Express operations like map and filter in terms of toStream and fromStream, to unlock more deforestation.
// More info on here:
//
// https://wiki.haskell.org/GHC_optimisations#Fusion

View file

@ -9,28 +9,27 @@ mod helpers;
#[cfg(test)]
mod test_canonicalize {
use roc::canonicalize;
use roc::canonicalize::{Expr, Output, Problem, Resolved, LocalSymbol, Symbol};
use roc::canonicalize::{Expr, Output, Problem, Symbol, References};
use roc::canonicalize::Expr::*;
use roc::canonicalize::Pattern::*;
use roc::expr::{Path, Ident};
use roc::operator::Operator::*;
use roc::expr::{Ident};
use roc::expr;
use roc::region::Located;
use roc::region::{Located, Region};
use roc::parse;
use roc::collections::{ImMap, ImSet};
use roc::parse_state::{IndentablePosition};
use combine::{Parser, eof};
use combine::stream::state::{State};
use helpers::{loc, loc_box, zero_loc_expr};
use helpers::{loc, zero_loc_expr};
fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>) {
can_expr_with(expr_str, &ImMap::default(), &ImMap::default())
can_expr_with("testDecl", expr_str, &ImMap::default(), &ImMap::default())
}
fn can_expr_with(
name: &str,
expr_str: &str,
declared_idents: &ImMap<Ident, Located<expr::Ident>>,
declared_variants: &ImMap<(Path, String), Located<expr::VariantName>>,
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<expr::VariantName>>,
) -> (Expr, Output, Vec<Problem>) {
let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default());
let expr = match parse::expr().skip(eof()).easy_parse(parse_state) {
@ -47,23 +46,15 @@ mod test_canonicalize {
}
};
let home = Path::new("TestModule".to_string());
let home = "TestModule".to_string();
let (loc_expr, output, problems) =
canonicalize::canonicalize_declaration(home, loc(zero_loc_expr(expr)), declared_idents, declared_variants);
canonicalize::canonicalize_declaration(home, name, loc(zero_loc_expr(expr)), declared_idents, declared_variants);
(loc_expr.value, output, problems)
}
fn recognized_local_sym(string: &str) -> Resolved<Symbol> {
Resolved::Recognized(local_sym(string))
}
fn local_sym(string: &str) -> Symbol {
Symbol::Local(local(string))
}
fn local(string: &str) -> LocalSymbol {
LocalSymbol::new(string.to_string())
fn sym(name: &str) -> Symbol {
Symbol::new("TestModule$testDecl$", name)
}
fn unqualified(string :&str) -> Ident {
@ -74,32 +65,29 @@ mod test_canonicalize {
Problem::UnusedAssignment(loc(unqualified(string)))
}
fn check_output(
output: Output,
applied_variants: Vec<(Path, &str)>,
referenced_idents: Vec<(Option<Path>, &str)>,
tail_call: Option<Symbol>
) {
assert_eq!(
output,
Output {
referenced_idents:
ImSet::from(
referenced_idents.into_iter().map(|(opt_path, str_ref)|
match opt_path {
Some(path) => Ident::Qualified(path, str_ref.to_string()),
None => Ident::Unqualified(str_ref.to_string())
}
).collect::<Vec<_>>()
),
applied_variants:
ImSet::from(
applied_variants.into_iter().map(|(path, str_ref)|
(path, str_ref.to_string()),
).collect::<Vec<_>>()),
tail_call
struct Out<'a> {
locals: Vec<&'a str>,
globals: Vec<&'a str>,
variants: Vec<&'a str>,
tail_call: Option<&'a str>
}
impl<'a> Into<Output> for Out<'a> {
fn into(self) -> Output {
fn vec_to_set<'b>(vec: Vec<&'b str>) -> ImSet<Symbol> {
ImSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
}
);
let references = References {
locals: vec_to_set(self.locals),
globals: vec_to_set(self.globals),
variants: vec_to_set(self.variants)
};
let tail_call = self.tail_call.map(sym);
Output {references, tail_call}
}
}
#[test]
@ -113,15 +101,20 @@ mod test_canonicalize {
]);
assert_eq!(expr,
Var(Resolved::UnrecognizedConstant(loc(Ident::Unqualified("x".to_string()))))
UnrecognizedConstant(loc(Ident::Unqualified("x".to_string())))
);
check_output(output, vec![], vec![], None);
assert_eq!(output, Out {
locals: vec![],
globals: vec![],
variants: vec![],
tail_call: None
}.into());
}
#[test]
fn complex_unrecognized_constant() {
let (expr, output, problems) = can_expr(indoc!(r#"
let (_, output, problems) = can_expr(indoc!(r#"
a = 5
b = 6
@ -132,25 +125,12 @@ mod test_canonicalize {
Problem::UnrecognizedConstant(loc(Ident::Unqualified("z".to_string())))
]);
assert_eq!(expr,
Assign(
vec![
(loc(Identifier(local("a"))), loc(Int(5))),
(loc(Identifier(local("b"))), loc(Int(6))),
],
loc_box(Operator(
loc_box(Var(recognized_local_sym("a"))),
loc(Plus),
loc_box(Operator(
loc_box(Var(recognized_local_sym("b"))),
loc(Star),
loc_box(Var(Resolved::UnrecognizedConstant(loc(Ident::Unqualified("z".to_string())))))
)),
))
)
);
check_output(output, vec![], vec![(None, "a"), (None, "b")], None);
assert_eq!(output, Out {
locals: vec!["a", "b"],
globals: vec![],
variants: vec![],
tail_call: None
}.into());
}
#[test]
@ -166,7 +146,12 @@ mod test_canonicalize {
assert_eq!(problems, vec![unused("b"), unused("a")]);
check_output(output, vec![], vec![(None, "c")], None);
assert_eq!(output, Out {
locals: vec!["c"],
globals: vec![],
variants: vec![],
tail_call: None
}.into());
}
@ -184,11 +169,12 @@ mod test_canonicalize {
assert_eq!(problems, vec![]);
check_output(output,
vec![],
vec![(None, "num"), (None, "fibonacci")],
Some(local_sym("fibonacci"))
);
assert_eq!(output, Out {
locals: vec!["num", "fibonacci"],
globals: vec![],
variants: vec![],
tail_call: Some("fibonacci")
}.into());
}
// UNSUPPORTED PATTERNS