diff --git a/Cargo.lock b/Cargo.lock index 992cb7ac20..5463109bc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,11 +16,6 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "ascii" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "autocfg" version = "0.1.4" @@ -54,18 +49,6 @@ dependencies = [ "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "combine" -version = "3.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "ascii 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "difference" version = "2.0.0" @@ -76,11 +59,6 @@ name = "dogged" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "either" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "env_logger" version = "0.6.2" @@ -440,7 +418,6 @@ name = "roc" version = "0.1.0" dependencies = [ "bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -517,19 +494,6 @@ name = "unindent" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "unreachable" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "winapi" version = "0.3.6" @@ -552,17 +516,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum ascii 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "91e320562a8fa3286a481b7189f89578ace6b20df99e123c87f2f509c957c5d6" "checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" "checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" "checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2638df109789fe360f0d9998c5438dd19a36678aaf845e46f285b688b1a1657a" -"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" "checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" "checksum fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1055159ac82fb210c813303f716b6c8db57ace9d5ec2dbbc2e1d7a864c1dd74e" @@ -613,8 +574,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "834b4441326c660336850c5c0926cc20548e848967a5f57bc20c2b741c8d41f4" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 39a3879967..499bc7ce15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,6 @@ authors = ["Richard Feldman "] dogged = { version = "0.2.0", optional = true } log = "0.4.8" petgraph = { version = "0.4.5", optional = true } -combine = "3.8.1" im-rc = "13.0.0" fraction = "0.6.2" num = "0.2.0" diff --git a/src/can/env.rs b/src/can/env.rs new file mode 100644 index 0000000000..4dcd7c634f --- /dev/null +++ b/src/can/env.rs @@ -0,0 +1,66 @@ +use can::expr::Expr; +use can::pattern::Pattern; +use can::problem::Problem; +use can::procedure::{Procedure, References}; +use can::symbol::Symbol; +use collections::{ImMap, MutMap}; +use parse::ast; +use region::{Located, Region}; + +/// The canonicalization environment for a particular module. +pub struct Env { + /// The module's path. Unqualified references to identifiers and variant names are assumed + /// to be relative to this path. + pub home: String, + + /// Problems we've encountered along the way, which will be reported to the user at the end. + pub problems: Vec, + + /// Variants either declared in this module, or imported. + pub variants: ImMap>>, + + /// Former closures converted to top-level procedures. + pub procedures: MutMap, +} + +impl Env { + pub fn new( + home: String, + declared_variants: ImMap>>, + ) -> Env { + Env { + home, + variants: declared_variants, + problems: Vec::new(), + procedures: MutMap::default(), + } + } + + pub fn problem(&mut self, problem: Problem) -> () { + self.problems.push(problem) + } + + pub fn register_closure( + &mut self, + symbol: Symbol, + args: Vec>, + body: Located, + definition: Region, + references: References, + ) -> () { + // We can't if the closure is self tail recursive yet, because it doesn't know its final name yet. + // (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name. + let is_self_tail_recursive = false; + let name = None; // The Assign logic is also responsible for setting names after the fact. + let procedure = Procedure { + args, + name, + body, + is_self_tail_recursive, + definition, + references, + }; + + self.procedures.insert(symbol, procedure); + } +} diff --git a/src/can/expr.rs b/src/can/expr.rs new file mode 100644 index 0000000000..15607796fa --- /dev/null +++ b/src/can/expr.rs @@ -0,0 +1,46 @@ +use can::pattern::Pattern; +use can::problem::RuntimeError; +use can::symbol::Symbol; +use operator::Operator; +use region::Located; +use std::i64; + +#[derive(Clone, Debug, PartialEq)] +pub enum Expr { + // Literals + Int(i64), + Float(f64), + EmptyStr, + Str(Box), + Char(char), // OBSOLETE + List(Vec>), + EmptyList, + + // Lookups + Var(Symbol), + /// Works the same as Var, but has an important marking purpose. + /// See 13623e3f5f65ea2d703cf155f16650c1e8246502 for the bug this fixed. + FunctionPointer(Symbol), + /// We have a separate variant for this so that we can report errors + /// (including type errors later) in the context of the sugar rather than + /// confusingly talking about the desugared version the user can't see. + InterpolatedStr(Vec<(Box, Located)>, Box), + + // Pattern Matching + Case(Box>, Vec<(Located, Located)>), + Assign(Vec<(Located, Located)>, Box>), + + // Application + Call(Box>, Vec>), + ApplyVariant(Symbol, Option>>), + + // Product Types + EmptyRecord, + + // Sugar + If(Box>, Box>, Box>), + Operator(Box>, Located, Box>), + + // Compiles, but will crash if reached + RuntimeError(RuntimeError), +} diff --git a/src/can/mod.rs b/src/can/mod.rs new file mode 100644 index 0000000000..758e157398 --- /dev/null +++ b/src/can/mod.rs @@ -0,0 +1,1334 @@ +use self::env::Env; +use self::expr::Expr; +// use self::pattern::Pattern; +// use self::pattern::PatternType::*; +use self::problem::Problem; +use self::problem::RuntimeError::*; +use self::procedure::{Procedure, References}; +use self::scope::Scope; +use self::symbol::Symbol; +use collections::{ImMap, ImSet, MutMap, MutSet}; +use ident::Ident; +// use graph::{strongly_connected_component, topological_sort}; +use operator::Operator; +// use operator::Operator::Pizza; +use parse::ast; +use region::{Located, Region}; +use std::i64; + +pub mod env; +pub mod expr; +pub mod pattern; +pub mod problem; +pub mod procedure; +pub mod scope; +pub mod string; +pub mod symbol; + +pub fn canonicalize_declaration<'a>( + home: String, + name: &str, + loc_expr: Located>, + declared_idents: &ImMap, + declared_variants: &ImMap>>, +) -> ( + Located, + Output, + Vec, + MutMap, +) { + // If we're canonicalizing the declaration `foo = ...` inside the `Main` module, + // scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0" + let scope_prefix = format!("{}${}$", home, name); + let mut scope = Scope::new(scope_prefix, declared_idents.clone()); + let mut env = Env::new(home, declared_variants.clone()); + let (mut new_loc_expr, output) = canonicalize(&mut env, &mut scope, loc_expr); + + // Apply operator precedence and associativity rules once, after canonicalization is + // otherwise complete. If we did this *during* canonicalization, then each time we + // visited an Operator node we'd recursively try to apply this to each of its nested + // operators, and thena again on *their* nested operators, ultimately applying the + // rules multiple times unnecessarily. + new_loc_expr = apply_precedence_and_associativity(&mut env, new_loc_expr); + + (new_loc_expr, output, env.problems, env.procedures) +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Output { + pub references: References, + pub tail_call: Option, +} + +impl Output { + pub fn new() -> Output { + Output { + references: References::new(), + tail_call: None, + } + } +} + +fn canonicalize<'a>( + env: &mut Env, + scope: &mut Scope, + loc_expr: Located>, +) -> (Located, Output) { + use self::Expr::*; + + let (expr, output) = match loc_expr.value { + ast::Expr::Int(string) => (int_from_parsed(string, &mut env.problems), Output::new()), + ast::Expr::Float(string) => (float_from_parsed(string, &mut env.problems), Output::new()), + ast::Expr::EmptyRecord => (EmptyRecord, Output::new()), + ast::Expr::Str(string) => (Str(string.into()), Output::new()), + ast::Expr::EmptyStr => (EmptyStr, Output::new()), + ast::Expr::EmptyList => (EmptyList, Output::new()), + ast::Expr::List(elems) => { + let mut output = Output::new(); + let mut can_elems = Vec::with_capacity(elems.len()); + + for loc_elem in elems.into_iter() { + let (can_expr, elem_out) = canonicalize(env, scope, loc_elem); + + output.references = output.references.union(elem_out.references); + + can_elems.push(can_expr); + } + + // A list literal is never a tail call! + output.tail_call = None; + + (List(can_elems), output) + } + + //ast::Expr::If(loc_cond, loc_true, loc_false) => { + // // Canonicalize the nested expressions + // let (cond_expr, cond_out) = canonicalize(env, scope, *loc_cond); + // let (true_expr, true_out) = canonicalize(env, scope, *loc_true); + // let (false_expr, false_out) = canonicalize(env, scope, *loc_false); + + // // Incorporate all three expressions into a combined Output value. + // let expr = If( + // Box::new(cond_expr), + // Box::new(true_expr), + // Box::new(false_expr), + // ); + // let mut output = cond_out; + + // // If both branches are tail calling the same symbol, then so is the conditional as a whole. + // // Also, if both branches are not tail calls (tail_call == None), then so is the conditional. + // // If the branches are different, we leave the default of None as-is. + // if true_out.tail_call == false_out.tail_call { + // output.tail_call = true_out.tail_call; + // } + + // // To evaluate the whole if-expression, we depend on all the values that both branches depend on. + // output.references = output.references.union(true_out.references); + // output.references = output.references.union(false_out.references); + + // (expr, output) + //} + + //ast::Expr::Apply(loc_fn, loc_args) => { + // // Canonicalize the function expression and its arguments + // let (fn_expr, mut output) = canonicalize(env, scope, *loc_fn); + // let mut args = Vec::new(); + // let mut outputs = Vec::new(); + + // for loc_arg in loc_args { + // let (arg_expr, arg_out) = canonicalize(env, scope, loc_arg); + + // args.push(arg_expr); + // outputs.push(arg_out); + // } + + // match &fn_expr.value { + // &Var(ref sym) => { + // output.references.calls.insert(sym.clone()); + // } + // _ => (), + // }; + + // let expr = Call(Box::new(fn_expr), args); + + // for arg_out in outputs { + // output.references = output.references.union(arg_out.references); + // } + + // // We're not tail-calling a symbol (by name), we're tail-calling a function value. + // output.tail_call = None; + + // (expr, output) + //} + + //expr::Expr::Operator(loc_left, op, loc_right) => { + // // Canonicalize the nested expressions + // let (left_expr, left_out) = canonicalize(env, scope, *loc_left); + // let (right_expr, mut output) = canonicalize(env, scope, *loc_right); + + // // Incorporate both expressions into a combined Output value. + // output.references = output.references.union(left_out.references); + + // // The pizza operator is the only one that can be a tail call, + // // because it's the only one that can call a function by name. + // output.tail_call = match op.value { + // Pizza => match &right_expr.value { + // &Var(ref sym) => Some(sym.clone()), + // &Call(ref loc_boxed_expr, _) => match (*loc_boxed_expr.clone()).value { + // Var(sym) => Some(sym), + // _ => None, + // }, + // _ => None, + // }, + // _ => None, + // }; + + // let expr = Operator(Box::new(left_expr), op, Box::new(right_expr)); + + // (expr, output) + //} + + //expr::Expr::Var(ident) => { + // let mut output = Output::new(); + // let can_expr = match resolve_ident(&env, &scope, ident, &mut output.references) { + // Ok(symbol) => Var(symbol), + // Err(ident) => { + // let loc_ident = Located { + // region: loc_expr.region.clone(), + // value: ident, + // }; + + // env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); + + // RuntimeError(UnrecognizedConstant(loc_ident)) + // } + // }; + + // (can_expr, output) + //} + + //expr::Expr::InterpolatedStr(pairs, suffix) => { + // let mut output = Output::new(); + // let can_pairs: Vec<(String, Located)> = pairs + // .into_iter() + // .map(|(string, loc_ident)| { + // // From a language design perspective, we only permit idents in interpolation. + // // However, in a canonical Expr we store it as a full Expr, not a Symbol. + // // This is so that we can resolve it to either Var or Unrecognized; if we + // // stored it as a Symbol, we couldn't record runtime errors here. + // let can_expr = match resolve_ident( + // &env, + // &scope, + // loc_ident.value, + // &mut output.references, + // ) { + // Ok(symbol) => Var(symbol), + // Err(ident) => { + // let loc_ident = Located { + // region: loc_ident.region.clone(), + // value: ident, + // }; + + // env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); + + // RuntimeError(UnrecognizedConstant(loc_ident)) + // } + // }; + + // ( + // string, + // Located { + // region: loc_ident.region, + // value: can_expr, + // }, + // ) + // }) + // .collect(); + + // (InterpolatedStr(can_pairs, suffix), output) + //} + + //expr::Expr::ApplyVariant(variant_name, opt_args) => { + // // Canonicalize the arguments and union their references into our output. + // // We'll do this even if the variant name isn't recognized, since we still + // // want to report canonicalization problems with the variant's arguments, + // // and their references still matter for purposes of detecting unused things. + // let mut output = Output::new(); + + // let opt_can_args = match opt_args { + // Some(args) => { + // let mut can_args = Vec::with_capacity(args.len()); + + // for arg in args { + // let (loc_expr, arg_output) = canonicalize(env, scope, arg); + + // output.references = output.references.union(arg_output.references); + + // can_args.push(loc_expr); + // } + + // Some(can_args) + // } + // None => None, + // }; + + // let can_expr = match resolve_variant_name(&env, variant_name, &mut output.references) { + // Ok(symbol) => ApplyVariant(symbol, opt_can_args), + // Err(variant_name) => { + // let loc_variant = Located { + // region: loc_expr.region.clone(), + // value: variant_name, + // }; + + // env.problem(Problem::UnrecognizedVariant(loc_variant.clone())); + + // RuntimeError(UnrecognizedVariant(loc_variant)) + // } + // }; + + // (can_expr, output) + //} + + //expr::Expr::Assign(assignments, box_loc_returned) => { + // // The body expression gets a new scope for canonicalization. + // // Shadow `scope` to make sure we don't accidentally use the original one for the + // // rest of this block. + // let mut scope = scope.clone(); + + // // Add the assigned identifiers to scope. If there's a collision, it means there + // // was shadowing, which will be handled later. + // let assigned_idents: Vec<(Ident, (Symbol, Region))> = idents_from_patterns( + // assignments + // .clone() + // .iter() + // .map(|(loc_pattern, _)| loc_pattern), + // &scope, + // ); + + // scope.idents = union_pairs(scope.idents, assigned_idents.iter()); + + // let mut refs_by_assignment: MutMap, References)> = + // MutMap::default(); + // let mut can_assignments_by_symbol: MutMap, Located)> = + // MutMap::default(); + + // for (loc_pattern, expr) in assignments { + // // Each assignment gets to have all the idents in scope that are assigned in this + // // block. Order of assignments doesn't matter, thanks to referential transparency! + // let (loc_can_expr, can_output) = canonicalize(env, &mut scope, expr); + + // // Exclude the current ident from shadowable_idents; you can't shadow yourself! + // // (However, still include it in scope, because you *can* recursively refer to yourself.) + // let mut shadowable_idents = scope.idents.clone(); + // remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); + + // let loc_can_pattern = canonicalize_pattern( + // env, + // &mut scope, + // &Assignment, + // &loc_pattern, + // &mut shadowable_idents, + // ); + // let mut renamed_closure_assignment: Option<&Symbol> = None; + + // // Give closures names (and tail-recursive status) where appropriate. + // let can_expr = match ( + // &loc_pattern.value, + // &loc_can_pattern.value, + // &loc_can_expr.value, + // ) { + // // First, make sure we are actually assigning an identifier instead of (for example) a variant. + // // + // // If we're assigning (UserId userId) = ... then this is certainly not a closure declaration, + // // which also implies it's not a self tail call! + // // + // // Only assignments of the form (foo = ...) can be closure declarations or self tail calls. + // ( + // &expr::Pattern::Identifier(ref name), + // &Pattern::Identifier(ref assigned_symbol), + // &FunctionPointer(ref symbol), + // ) => { + // // Since everywhere in the code it'll be referred to by its assigned name, + // // remove its generated name from the procedure map. (We'll re-insert it later.) + // let mut procedure = env.procedures.remove(&symbol).unwrap(); + + // // The original ident name will be used for debugging and stack traces. + // procedure.name = Some(name.clone()); + + // // The closure is self tail recursive iff it tail calls itself (by assigned name). + // procedure.is_self_tail_recursive = match &can_output.tail_call { + // &None => false, + // &Some(ref symbol) => symbol == assigned_symbol, + // }; + + // // Re-insert the procedure into the map, under its assigned name. This way, + // // when code elsewhere calls it by assigned name, it'll resolve properly. + // env.procedures.insert(assigned_symbol.clone(), procedure); + + // // Recursion doesn't count as referencing. (If it did, all recursive functions + // // would result in circular assignment errors!) + // refs_by_assignment + // .entry(assigned_symbol.clone()) + // .and_modify(|(_, refs)| { + // refs.locals = refs.locals.without(assigned_symbol); + // }); + + // renamed_closure_assignment = Some(&assigned_symbol); + + // // Return a reference to the assigned symbol, since the auto-generated one no + // // longer references any entry in the procedure map! + // Var(assigned_symbol.clone()) + // } + // _ => loc_can_expr.value, + // }; + + // let mut assigned_symbols = Vec::new(); + + // // Store the referenced locals in the refs_by_assignment map, so we can later figure out + // // which assigned names reference each other. + // for (ident, (symbol, region)) in + // idents_from_patterns(std::iter::once(&loc_pattern), &scope) + // { + // let refs = + // // Functions' references don't count in assignments. + // // See 3d5a2560057d7f25813112dfa5309956c0f9e6a9 and its + // // parent commit for the bug this fixed! + // if renamed_closure_assignment == Some(&symbol) { + // References::new() + // } else { + // can_output.references.clone() + // }; + + // refs_by_assignment.insert( + // symbol.clone(), + // ( + // Located { + // value: ident, + // region, + // }, + // refs, + // ), + // ); + + // assigned_symbols.push(symbol.clone()); + // } + + // for symbol in assigned_symbols { + // can_assignments_by_symbol.insert( + // symbol, + // ( + // loc_can_pattern.clone(), + // Located { + // region: loc_can_expr.region.clone(), + // value: can_expr.clone(), + // }, + // ), + // ); + // } + // } + + // // The assignment as a whole is a tail call iff its return expression is a tail call. + // // Use its output as a starting point because its tail_call already has the right answer! + // let (ret_expr, mut output) = canonicalize(env, &mut scope, *box_loc_returned); + + // // Determine the full set of references by traversing the graph. + // let mut visited_symbols = MutSet::default(); + + // // Start with the return expression's referenced locals. They are the only ones that count! + // // + // // If I have two assignments which reference each other, but neither of them + // // is referenced in the return expression, I don't want either of them (or their references) + // // to end up in the final output.references. They were unused, and so were their references! + // // + // // The reason we need a graph here is so we don't overlook transitive dependencies. + // // For example, if I have `a = b + 1` and the assignment returns `a + 1`, then the + // // assignment as a whole references both `a` *and* `b`, even though it doesn't + // // directly mention `b` - because `a` depends on `b`. If we didn't traverse a graph here, + // // we'd erroneously give a warning that `b` was unused since it wasn't directly referenced. + // for symbol in output.references.locals.clone().into_iter() { + // // Traverse the graph and look up *all* the references for this local symbol. + // let refs = references_from_local( + // symbol, + // &mut visited_symbols, + // &refs_by_assignment, + // &env.procedures, + // ); + + // output.references = output.references.union(refs); + // } + + // for symbol in output.references.calls.clone().into_iter() { + // // Traverse the graph and look up *all* the references for this call. + // // Reuse the same visited_symbols as before; if we already visited it, we + // // won't learn anything new from visiting it again! + // let refs = references_from_call( + // symbol, + // &mut visited_symbols, + // &refs_by_assignment, + // &env.procedures, + // ); + + // output.references = output.references.union(refs); + // } + + // // Now that we've collected all the references, check to see if any of the new idents + // // we defined went unused by the return expression. If any were unused, report it. + // for (ident, (symbol, region)) in assigned_idents.clone() { + // if !output.references.has_local(&symbol) { + // let loc_ident = Located { + // region: region.clone(), + // value: ident.clone(), + // }; + + // env.problem(Problem::UnusedAssignment(loc_ident)); + // } + // } + + // // Use topological sort to reorder the assignments based on their dependencies to one another. + // // This way, during code gen, no assignment will refer to a value that hasn't been initialized yet. + // // As a bonus, the topological sort also reveals any cycles between the assignments, allowing + // // us to give a CircularAssignment error. + // let successors = |symbol: &Symbol| -> ImSet { + // let (_, references) = refs_by_assignment.get(symbol).unwrap(); + + // local_successors(&references, &env.procedures) + // }; + + // let assigned_symbols: Vec = can_assignments_by_symbol + // .keys() + // .into_iter() + // .map(Symbol::clone) + // .collect(); + + // match topological_sort(assigned_symbols.as_slice(), successors) { + // Ok(sorted_symbols) => { + // let can_assignments = sorted_symbols + // .into_iter() + // .rev() // Topological sort gives us the reverse of the sorting we want! + // .map(|symbol| can_assignments_by_symbol.get(&symbol).unwrap().clone()) + // .collect(); + + // (Assign(can_assignments, Box::new(ret_expr)), output) + // } + // Err(node_in_cycle) => { + // // We have one node we know is in the cycle. + // // We want to show the entire cycle in the error message, so expand it out. + // let mut loc_idents_in_cycle: Vec> = + // strongly_connected_component(&node_in_cycle, successors) + // .into_iter() + // .rev() // Strongly connected component gives us the reverse of the sorting we want! + // .map(|symbol| refs_by_assignment.get(&symbol).unwrap().0.clone()) + // .collect(); + + // loc_idents_in_cycle = sort_cyclic_idents( + // loc_idents_in_cycle, + // &mut assigned_idents.iter().map(|(ident, _)| ident), + // ); + + // env.problem(Problem::CircularAssignment(loc_idents_in_cycle.clone())); + + // let can_assignments = can_assignments_by_symbol + // .values() + // .map(|tuple| tuple.clone()) + // .collect(); + + // ( + // RuntimeError(CircularAssignment( + // loc_idents_in_cycle, + // can_assignments, + // Box::new(ret_expr), + // )), + // output, + // ) + // } + // } + //} + + //expr::Expr::Closure(loc_arg_patterns, box_loc_body_expr) => { + // // The globally unique symbol that will refer to this closure once it gets converted + // // into a top-level procedure for code gen. + // // + // // The symbol includes the module name, the top-level declaration name, and the + // // index (0-based) of the closure within that declaration. + // // + // // Example: "MyModule$main$3" if this is the 4th closure in MyModule.main. + // let symbol = scope.gen_unique_symbol(); + + // // The body expression gets a new scope for canonicalization. + // // Shadow `scope` to make sure we don't accidentally use the original one for the + // // rest of this block. + // let mut scope = scope.clone(); + + // let arg_idents: Vec<(Ident, (Symbol, Region))> = + // idents_from_patterns(loc_arg_patterns.iter(), &scope); + + // // Add the arguments' idents to scope.idents. If there's a collision, + // // it means there was shadowing, which will be handled later. + // scope.idents = union_pairs(scope.idents, arg_idents.iter()); + + // let can_args: Vec> = loc_arg_patterns + // .into_iter() + // .map(|loc_pattern| { + // // Exclude the current ident from shadowable_idents; you can't shadow yourself! + // // (However, still include it in scope, because you *can* recursively refer to yourself.) + // let mut shadowable_idents = scope.idents.clone(); + // remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); + + // canonicalize_pattern( + // env, + // &mut scope, + // &FunctionArg, + // &loc_pattern, + // &mut shadowable_idents, + // ) + // }) + // .collect(); + // let (loc_body_expr, mut output) = canonicalize(env, &mut scope, *box_loc_body_expr); + + // // Now that we've collected all the references, check to see if any of the args we defined + // // went unreferenced. If any did, report them as unused arguments. + // for (ident, (arg_symbol, region)) in arg_idents { + // if !output.references.has_local(&arg_symbol) { + // // The body never referenced this argument we declared. It's an unused argument! + // env.problem(Problem::UnusedArgument(Located { + // region, + // value: ident, + // })); + // } + + // // We shouldn't ultimately count arguments as referenced locals. Otherwise, + // // we end up with weird conclusions like the expression (\x -> x + 1) + // // references the (nonexistant) local variable x! + // output.references.locals.remove(&arg_symbol); + // } + + // // We've finished analyzing the closure. Its references.locals are now the values it closes over, + // // since we removed the only locals it shouldn't close over (its arguments). + // // Register it as a top-level procedure in the Env! + // env.register_closure( + // symbol.clone(), + // can_args, + // loc_body_expr, + // loc_expr.region.clone(), + // output.references.clone(), + // ); + + // // Always return a function pointer, in case that's how the closure is being used (e.g. with Apply). + // (FunctionPointer(symbol), output) + //} + + //expr::Expr::Case(loc_cond, branches) => { + // // Canonicalize the conditional + // let (can_cond, mut output) = canonicalize(env, scope, *loc_cond); + // let mut can_branches = Vec::with_capacity(branches.len()); + // let mut recorded_tail_call = false; + + // for (loc_pattern, loc_expr) in branches { + // // Each case branch gets a new scope for canonicalization. + // // Shadow `scope` to make sure we don't accidentally use the original one for the + // // rest of this block. + // let mut scope = scope.clone(); + + // // Exclude the current ident from shadowable_idents; you can't shadow yourself! + // // (However, still include it in scope, because you *can* recursively refer to yourself.) + // let mut shadowable_idents = scope.idents.clone(); + // remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); + + // let loc_can_pattern = canonicalize_pattern( + // env, + // &mut scope, + // &CaseBranch, + // &loc_pattern, + // &mut shadowable_idents, + // ); + + // // Patterns introduce new idents to the scope! + // // Add the assigned identifiers to scope. If there's a collision, it means there + // // was shadowing, which will be handled later. + // let assigned_idents: Vec<(Ident, (Symbol, Region))> = + // idents_from_patterns(std::iter::once(&loc_pattern), &scope); + + // scope.idents = union_pairs(scope.idents, assigned_idents.iter()); + + // let (can_expr, branch_output) = canonicalize(env, &mut scope, loc_expr); + + // output.references = output.references.union(branch_output.references); + + // // If all branches are tail calling the same symbol, then so is the conditional as a whole. + // if !recorded_tail_call { + // // If we haven't recorded output.tail_call yet, record it. + // output.tail_call = branch_output.tail_call; + // recorded_tail_call = true; + // } else if branch_output.tail_call != output.tail_call { + // // If we recorded output.tail_call, but what we recorded differs from what we just saw, + // // then game over. This can't possibly be a self tail call! + // output.tail_call = None; + // } + + // // Now that we've collected all the references for this branch, check to see if + // // any of the new idents it defined were unused. If any were, report it. + // for (ident, (symbol, region)) in assigned_idents { + // if !output.references.has_local(&symbol) { + // let loc_ident = Located { + // region: region.clone(), + // value: ident.clone(), + // }; + + // env.problem(Problem::UnusedAssignment(loc_ident)); + // } + // } + + // can_branches.push((loc_can_pattern, can_expr)); + // } + + // // One of the branches should have flipped this, so this should only happen + // // in the situation where the case had no branches. That can come up, though! + // // A case with no branches is a runtime error, but it will mess things up + // // if code gen mistakenly thinks this is a tail call just because its condition + // // happend to be one. (The condition gave us our initial output value.) + // if !recorded_tail_call { + // output.tail_call = None; + // } + + // // Incorporate all three expressions into a combined Output value. + // let expr = Case(Box::new(can_cond), can_branches); + + // (expr, output) + //} + ast::Expr::HexInt(string) => (hex_from_parsed(string, &mut env.problems), Output::new()), + ast::Expr::BinaryInt(string) => (bin_from_parsed(string, &mut env.problems), Output::new()), + ast::Expr::OctalInt(string) => (oct_from_parsed(string, &mut env.problems), Output::new()), + _ => { + panic!( + "TODO restore the rest of canonicalize()'s branches {:?}", + local_successors(&References::new(), &MutMap::default()) + ); + } + }; + + // At the end, diff used_idents and assigned_idents to see which were unused. + // Add warnings for those! + + // In a later phase, unused top level declarations won't get monomorphized or code-genned. + // We aren't going to bother with DCE at the level of local assignments. It's going to be + // a rounding error anyway (especially given that they'll be surfaced as warnings), LLVM will + // DCE them in optimized builds, and it's not worth the bookkeeping for dev builds. + ( + Located { + region: loc_expr.region.clone(), + value: expr, + }, + output, + ) +} + +fn _union_pairs<'a, K, V, I>(mut map: ImMap, pairs: I) -> ImMap +where + I: Iterator, + K: std::hash::Hash + Eq + Clone, + K: 'a, + V: Clone, + V: 'a, +{ + for (ref k, ref v) in pairs { + map.insert(k.clone(), v.clone()); + } + + map +} + +fn local_successors( + references: &References, + procedures: &MutMap, +) -> ImSet { + let mut answer = references.locals.clone(); + + for call_symbol in references.calls.iter() { + answer = answer.union(call_successors(call_symbol, procedures)); + } + + answer +} + +fn call_successors(call_symbol: &Symbol, procedures: &MutMap) -> ImSet { + // TODO (this comment should be moved to a GH issue) this may cause an infinite loop if 2 procedures reference each other; may need to track visited procedures! + match procedures.get(call_symbol) { + Some(procedure) => { + let mut answer = local_successors(&procedure.references, procedures); + + answer.insert(call_symbol.clone()); + + answer + } + None => ImSet::default(), + } +} + +fn _references_from_local( + assigned_symbol: Symbol, + visited: &mut MutSet, + refs_by_assignment: &MutMap, + procedures: &MutMap, +) -> References { + match refs_by_assignment.get(&assigned_symbol) { + Some((_, refs)) => { + let mut answer = References::new(); + + visited.insert(assigned_symbol); + + for local in refs.locals.iter() { + if !visited.contains(&local) { + let other_refs = _references_from_local( + local.clone(), + visited, + refs_by_assignment, + procedures, + ); + + answer = answer.union(other_refs); + } + + answer.locals.insert(local.clone()); + } + + for call in refs.calls.iter() { + if !visited.contains(&call) { + let other_refs = _references_from_call( + call.clone(), + visited, + refs_by_assignment, + procedures, + ); + + answer = answer.union(other_refs); + } + + answer.calls.insert(call.clone()); + } + + answer + } + None => { + // This should never happen! If the local was not recognized, it should not have been + // added to the local references. + unreachable!(); + } + } +} + +/// When we get a list of cyclic idents, the first node listed is a matter of chance. +/// This reorders the list such that the first node listed is always alphabetically the lowest, +/// while preserving the overall order of the cycle. +/// +/// Example: the cycle (c ---> a ---> b) becomes (a ---> b ---> c) +pub fn sort_cyclic_idents<'a, I>( + loc_idents: Vec>, + ordered_idents: &mut I, +) -> Vec> +where + I: Iterator, +{ + // Find the first ident in ordered_idents that also appears in loc_idents. + let first_ident = ordered_idents + .find(|ident| { + loc_idents + .iter() + .any(|loc_ident| &&loc_ident.value == ident) + }) + .unwrap(); + + let mut answer = Vec::with_capacity(loc_idents.len()); + let mut end = Vec::with_capacity(loc_idents.len()); + let mut encountered_first_ident = false; + + for loc_ident in loc_idents { + if encountered_first_ident { + answer.push(loc_ident); + } else if &loc_ident.value == first_ident { + encountered_first_ident = true; + + answer.push(loc_ident); + } else { + end.push(loc_ident); + } + } + + // Add the contents of `end` to the end of the answer. + answer.extend_from_slice(end.as_slice()); + + answer +} + +fn _references_from_call( + call_symbol: Symbol, + visited: &mut MutSet, + refs_by_assignment: &MutMap, + procedures: &MutMap, +) -> References { + match procedures.get(&call_symbol) { + Some(procedure) => { + let mut answer = procedure.references.clone(); + + visited.insert(call_symbol); + + for closed_over_local in procedure.references.locals.iter() { + if !visited.contains(&closed_over_local) { + let other_refs = _references_from_local( + closed_over_local.clone(), + visited, + refs_by_assignment, + procedures, + ); + + answer = answer.union(other_refs); + } + + answer.locals.insert(closed_over_local.clone()); + } + + for call in procedure.references.calls.iter() { + if !visited.contains(&call) { + let other_refs = _references_from_call( + call.clone(), + visited, + refs_by_assignment, + procedures, + ); + + answer = answer.union(other_refs); + } + + answer.calls.insert(call.clone()); + } + + answer + } + None => { + // If the call symbol was not in the procedures map, that means we're calling a non-function and + // will get a type mismatch later. For now, assume no references as a result of the "call." + References::new() + } + } +} + +//fn idents_from_patterns<'a, I>(loc_patterns: I, scope: &Scope) -> Vec<(Ident, (Symbol, Region))> +//where +// I: Iterator>>, +//{ +// let mut answer = Vec::new(); + +// for loc_pattern in loc_patterns { +// add_idents_from_pattern(loc_pattern, scope, &mut answer); +// } + +// answer +//} + +///// helper function for idents_from_patterns +//fn add_idents_from_pattern<'a>( +// loc_pattern: &Located>, +// scope: &Scope, +// answer: &mut Vec<(Ident, (Symbol, Region))>, +//) { +// use parse::ast::Pattern::*; + +// match &loc_pattern.value { +// &Identifier(ref name) => { +// let symbol = scope.symbol(&name); + +// answer.push(( +// Ident::Unqualified(name.clone()), +// (symbol, loc_pattern.region.clone()), +// )); +// } +// &Variant(_, ref opt_loc_args) => match opt_loc_args { +// &None => (), +// &Some(ref loc_args) => { +// for loc_arg in loc_args.iter() { +// add_idents_from_pattern(loc_arg, scope, answer); +// } +// } +// }, +// &IntLiteral(_) | &FloatLiteral(_) | &ExactString(_) | &EmptyRecordLiteral | &Underscore => { +// () +// } +// } +//} + +//fn remove_idents(pattern: expr::Pattern, idents: &mut ImMap) { +// use expr::Pattern::*; + +// match pattern { +// Identifier(name) => { +// idents.remove(&(Ident::Unqualified(name))); +// } +// Variant(_, Some(loc_args)) => { +// for loc_arg in loc_args { +// remove_idents(loc_arg.value, idents); +// } +// } +// Variant(_, None) +// | IntLiteral(_) +// | FloatLiteral(_) +// | ExactString(_) +// | EmptyRecordLiteral +// | Underscore => {} +// } +//} + +///// If it could not be found, return it unchanged as an Err. +//#[inline(always)] // This is shared code between Var and InterpolatedStr; it was inlined when handwritten +//fn resolve_ident( +// env: &Env, +// scope: &Scope, +// ident: Ident, +// references: &mut References, +//) -> Result { +// if scope.idents.contains_key(&ident) { +// let recognized = match ident { +// Ident::Unqualified(name) => { +// let symbol = scope.symbol(&name); + +// references.locals.insert(symbol.clone()); + +// symbol +// } +// Ident::Qualified(path, name) => { +// let symbol = Symbol::new(&path, &name); + +// references.globals.insert(symbol.clone()); + +// symbol +// } +// }; + +// Ok(recognized) +// } else { +// match ident { +// Ident::Unqualified(name) => { +// // Try again, this time using the current module as the path. +// let qualified = Ident::Qualified(env.home.clone(), name.clone()); + +// if scope.idents.contains_key(&qualified) { +// let symbol = Symbol::new(&env.home, &name); + +// references.globals.insert(symbol.clone()); + +// Ok(symbol) +// } else { +// // We couldn't find the unqualified ident in scope. NAMING PROBLEM! +// Err(Ident::Unqualified(name)) +// } +// } +// qualified @ Ident::Qualified(_, _) => { +// // We couldn't find the qualified ident in scope. NAMING PROBLEM! +// Err(qualified) +// } +// } +// } +//} + +///// Translate a VariantName into a resolved symbol if it's found in env.declared_variants. +///// If it could not be found, return it unchanged as an Err. +//#[inline(always)] +//fn resolve_variant_name( +// env: &Env, +// variant_name: VariantName, +// references: &mut References, +//) -> Result { +// let symbol = Symbol::from_variant(&variant_name, &env.home); + +// if env.variants.contains_key(&symbol) { +// references.variants.insert(symbol.clone()); + +// Ok(symbol) +// } else { +// // We couldn't find the qualified variant name in scope. NAMING PROBLEM! +// Err(variant_name) +// } +//} + +// OPERATOR PRECEDENCE + +// Precedence logic adapted from Gluon by Markus Westerlind, MIT licensed +// https://github.com/gluon-lang/gluon +// Thank you, Markus! +fn new_op_expr( + left: Box>, + op: Located, + right: Box>, +) -> Located { + let new_region = Region { + start_line: left.region.start_line, + start_col: left.region.start_col, + + end_line: right.region.end_line, + end_col: right.region.end_col, + }; + let new_expr = Expr::Operator(left, op, right); + + Located { + value: new_expr, + region: new_region, + } +} + +/// Reorder the expression tree based on operator precedence and associativity rules. +/// In many languages, this can fail due to (for example) <| and |> having the same +/// precedence but different associativity. Languages which support custom operators with +/// user-defined precedence and associativity (e.g. Haskell) can have many such errors. +/// +/// By design, Roc neither allows custom operators nor has any built-in operators with +/// the same precedence and different associativity, so this operation always succeeds +/// and can never produce any user-facing errors. +fn apply_precedence_and_associativity(env: &mut Env, expr: Located) -> Located { + use can::problem::PrecedenceProblem::*; + use operator::Associativity::*; + use std::cmp::Ordering; + + // NOTE: A potentially nice performance optimization here would be to use + // arena bump allocation for Infixes, arg_stack, and op_stack. As long as we + // allocate each element inside arg_stack outside the arena, this should end + // up being a decent bit more efficient. + let mut infixes = Infixes::new(expr); + let mut arg_stack: Vec>> = Vec::new(); + let mut op_stack: Vec> = Vec::new(); + + while let Some(token) = infixes.next() { + match token { + InfixToken::Arg(next_expr) => arg_stack.push(next_expr), + InfixToken::Op(next_op) => { + match op_stack.pop() { + Some(stack_op) => { + match next_op.value.cmp(&stack_op.value) { + Ordering::Less => { + // Inline + let right = arg_stack.pop().unwrap(); + let left = arg_stack.pop().unwrap(); + + infixes.next_op = Some(next_op); + arg_stack.push(Box::new(new_op_expr(left, stack_op, right))); + } + + Ordering::Greater => { + // Swap + op_stack.push(stack_op); + op_stack.push(next_op); + } + + Ordering::Equal => { + match ( + next_op.value.associativity(), + stack_op.value.associativity(), + ) { + (LeftAssociative, LeftAssociative) => { + // Inline + let right = arg_stack.pop().unwrap(); + let left = arg_stack.pop().unwrap(); + + infixes.next_op = Some(next_op); + arg_stack + .push(Box::new(new_op_expr(left, stack_op, right))); + } + + (RightAssociative, RightAssociative) => { + // Swap + op_stack.push(stack_op); + op_stack.push(next_op); + } + + (NonAssociative, NonAssociative) => { + // Both operators were non-associative, e.g. (True == False == False). + // We should tell the author to disambiguate by grouping them with parens. + let problem = BothNonAssociative(next_op.clone(), stack_op); + + env.problem(Problem::PrecedenceProblem(problem.clone())); + + let right = arg_stack.pop().unwrap(); + let left = arg_stack.pop().unwrap(); + let broken_expr = new_op_expr(left, next_op, right); + let region = broken_expr.region.clone(); + let value = Expr::RuntimeError(InvalidPrecedence( + problem, + Box::new(broken_expr), + )); + + return Located { region, value }; + } + + _ => { + // The operators had the same precedence but different associativity. + // + // In many languages, this case can happen due to (for example) <| and |> having the same + // precedence but different associativity. Languages which support custom operators with + // (e.g. Haskell) can potentially have arbitrarily many of these cases. + // + // By design, Roc neither allows custom operators nor has any built-in operators with + // the same precedence and different associativity, so this should never happen! + panic!("Operators had the same associativity, but different precedence. This should never happen!"); + } + } + } + } + } + None => op_stack.push(next_op), + }; + } + } + } + + for op in op_stack.into_iter().rev() { + let right = arg_stack.pop().unwrap(); + let left = arg_stack.pop().unwrap(); + + arg_stack.push(Box::new(new_op_expr(left, op, right))); + } + + assert_eq!(arg_stack.len(), 1); + + *arg_stack.pop().unwrap() +} + +#[derive(Debug, Clone, PartialEq)] +enum InfixToken { + Arg(Box>), + Op(Located), +} + +/// An iterator that takes an expression that has had its operators grouped +/// with _right associativity_, and yeilds a sequence of `InfixToken`s. This +/// is useful for reparsing the operators with their correct associativies +/// and precedences. +/// +/// For example, the expression: +/// +/// ```text +/// (1 + (2 ^ (4 * (6 - 8)))) +/// ``` +/// +/// Will result in the following iterations: +/// +/// ```text +/// Arg: 1 +/// Op: + +/// Arg: 2 +/// Op: ^ +/// Arg: 4 +/// Op: * +/// Arg: 6 +/// Op: - +/// Arg: 8 +/// ``` +struct Infixes { + /// The next part of the expression that we need to flatten + remaining_expr: Option>>, + /// Cached operator from a previous iteration + next_op: Option>, +} + +impl Infixes { + fn new(expr: Located) -> Infixes { + Infixes { + remaining_expr: Some(Box::new(expr)), + next_op: None, + } + } +} + +impl Iterator for Infixes { + type Item = InfixToken; + + fn next(&mut self) -> Option { + match self.next_op.take() { + Some(op) => Some(InfixToken::Op(op)), + None => self.remaining_expr.take().map(|boxed_expr| { + let expr = *boxed_expr; + + match expr.value { + Expr::Operator(left, op, right) => { + self.remaining_expr = Some(right); + self.next_op = Some(op); + + InfixToken::Arg(left) + } + _ => InfixToken::Arg(Box::new(expr)), + } + }), + } + } +} + +#[inline(always)] +fn float_from_parsed<'a>(raw: &str, problems: &mut Vec) -> Expr { + // Ignore underscores. + match raw.replace("_", "").parse::() { + Ok(float) if float.is_finite() => Expr::Float(float), + _ => { + let runtime_error = FloatOutsideRange(raw.into()); + + problems.push(Problem::RuntimeError(runtime_error.clone())); + + Expr::RuntimeError(runtime_error) + } + } +} + +#[inline(always)] +fn int_from_parsed<'a>(raw: &str, problems: &mut Vec) -> Expr { + // Ignore underscores. + match raw.replace("_", "").parse::() { + Ok(int) => Expr::Int(int), + Err(_) => { + let runtime_error = IntOutsideRange(raw.into()); + + problems.push(Problem::RuntimeError(runtime_error.clone())); + + Expr::RuntimeError(runtime_error) + } + } +} + +#[inline(always)] +fn hex_from_parsed<'a>(raw: &str, problems: &mut Vec) -> Expr { + // Ignore underscores. + match i64::from_str_radix(raw.replace("_", "").as_str(), 16) { + Ok(int) => Expr::Int(int), + Err(parse_err) => { + let runtime_error = InvalidHex(parse_err, raw.into()); + + problems.push(Problem::RuntimeError(runtime_error.clone())); + + Expr::RuntimeError(runtime_error) + } + } +} + +#[inline(always)] +fn oct_from_parsed<'a>(raw: &str, problems: &mut Vec) -> Expr { + // Ignore underscores. + match i64::from_str_radix(raw.replace("_", "").as_str(), 8) { + Ok(int) => Expr::Int(int), + Err(parse_err) => { + let runtime_error = InvalidOctal(parse_err, raw.into()); + + problems.push(Problem::RuntimeError(runtime_error.clone())); + + Expr::RuntimeError(runtime_error) + } + } +} + +#[inline(always)] +fn bin_from_parsed<'a>(raw: &str, problems: &mut Vec) -> Expr { + // Ignore underscores. + match i64::from_str_radix(raw.replace("_", "").as_str(), 2) { + Ok(int) => Expr::Int(int), + Err(parse_err) => { + let runtime_error = InvalidBinary(parse_err, raw.into()); + + problems.push(Problem::RuntimeError(runtime_error.clone())); + + Expr::RuntimeError(runtime_error) + } + } +} diff --git a/src/can/pattern.rs b/src/can/pattern.rs new file mode 100644 index 0000000000..c0b72b869e --- /dev/null +++ b/src/can/pattern.rs @@ -0,0 +1,203 @@ +use can::env::Env; +use can::problem::Problem; +use can::scope::Scope; +use can::symbol::Symbol; +use collections::ImMap; +use ident::{Ident, VariantName}; +use parse::ast; +use region::{Located, Region}; + +/// A pattern, including possible problems (e.g. shadowing) so that +/// codegen can generate a runtime error if this pattern is reached. +#[derive(Clone, Debug, PartialEq)] +pub enum Pattern { + Identifier(Symbol), + Variant(Symbol), + AppliedVariant(Symbol, Vec>), + IntLiteral(i64), + FloatLiteral(f64), + ExactString(String), + EmptyRecordLiteral, + Underscore, + + // Runtime Exceptions + Shadowed(Located), + UnrecognizedVariant(Located), + // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! + UnsupportedPattern(Region), +} + +/// Different patterns are supported in different circumstances. +/// For example, case branches can pattern match on number literals, but +/// assignments and function args can't. Underscore is supported in function +/// arg patterns and in case branch patterns, but not in assignments. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum PatternType { + Assignment, + FunctionArg, + CaseBranch, +} + +pub fn canonicalize_pattern( + _env: &mut Env, + _scope: &mut Scope, + _pattern_type: &PatternType, + _loc_pattern: &Located, + _shadowable_idents: &mut ImMap, +) -> Located { + panic!("TODO restore can_pattern"); + //use can::ast::Pattern::*; + + //let region = loc_pattern.region.clone(); + //let pattern = match &loc_pattern.value { + // &Identifier(ref name) => { + // let unqualified_ident = Ident::Unqualified(name.to_string()); + + // // We use shadowable_idents for this, and not scope, because for assignments + // // they are different. When canonicalizing a particular assignment, that new + // // ident is in scope (for recursion) but not shadowable. + // // + // // For example, when canonicalizing (fibonacci = ...), `fibonacci` should be in scope + // // so that it can refer to itself without getting a naming problem, but it should not + // // be in the collection of shadowable idents because you can't shadow yourself! + // match shadowable_idents.get(&unqualified_ident) { + // Some((_, region)) => { + // let loc_shadowed_ident = Located { + // region: region.clone(), + // value: unqualified_ident, + // }; + + // // This is already in scope, meaning it's about to be shadowed. + // // Shadowing is not allowed! + // env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); + + // // Change this Pattern to a Shadowed variant, so that + // // codegen knows to generate a runtime exception here. + // Pattern::Shadowed(loc_shadowed_ident) + // } + // None => { + // // Make sure we aren't shadowing something in the home module's scope. + // let qualified_ident = + // Ident::Qualified(env.home.clone(), unqualified_ident.name()); + + // match scope.idents.get(&qualified_ident) { + // Some((_, region)) => { + // let loc_shadowed_ident = Located { + // region: region.clone(), + // value: qualified_ident, + // }; + + // // This is already in scope, meaning it's about to be shadowed. + // // Shadowing is not allowed! + // env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); + + // // Change this Pattern to a Shadowed variant, so that + // // codegen knows to generate a runtime exception here. + // Pattern::Shadowed(loc_shadowed_ident) + // } + // None => { + // let new_ident = qualified_ident.clone(); + // let new_name = qualified_ident.name(); + // let symbol = scope.symbol(&new_name); + + // // This is a fresh identifier that wasn't already in scope. + // // Add it to scope! + // let symbol_and_region = (symbol.clone(), region.clone()); + + // // Add this to both scope.idents *and* shadowable_idents. + // // The latter is relevant when recursively canonicalizing Variant patterns, + // // which can bring multiple new idents into scope. For example, it's important + // // that we catch (Blah foo foo) as being an example of shadowing. + // scope + // .idents + // .insert(new_ident.clone(), symbol_and_region.clone()); + // shadowable_idents.insert(new_ident, symbol_and_region); + + // Pattern::Identifier(symbol) + // } + // } + // } + // } + // } + + // &AppliedVariant((ref loc_name, ref loc_args)) => { + // // Canonicalize the variant's arguments. + // let mut can_args: Vec> = Vec::new(); + + // for loc_arg in loc_args { + // let loc_can_arg = + // canonicalize_pattern(env, scope, pattern_type, &loc_arg, shadowable_idents); + + // can_args.push(loc_can_arg); + // } + + // // Canonicalize the variant's name. + // let symbol = Symbol::from_variant(&loc_name.value, &env.home); + + // if env.variants.contains_key(&symbol) { + // // No problems; the qualified variant name was in scope! + // Pattern::AppliedVariant(symbol, can_args) + // } else { + // // We couldn't find the variant name in scope. NAMING PROBLEM! + // env.problem(Problem::UnrecognizedVariant(loc_name.clone())); + + // Pattern::UnrecognizedVariant(loc_name.clone()) + // } + // } + + // &Variant(ref loc_name) => { + // // Canonicalize the variant's name. + // let symbol = Symbol::from_variant(&loc_name.value, &env.home); + + // if env.variants.contains_key(&symbol) { + // // No problems; the qualified variant name was in scope! Pattern::Variant(symbol) + // } else { + // // We couldn't find the variant name in scope. NAMING PROBLEM! + // env.problem(Problem::UnrecognizedVariant(loc_name.clone())); + + // Pattern::UnrecognizedVariant(loc_name.clone()) + // } + // } + + // &IntLiteral(ref num) => match pattern_type { + // CaseBranch => Pattern::IntLiteral(*num), + // ptype @ Assignment | ptype @ FunctionArg => { + // unsupported_pattern(env, *ptype, region) + // } + // }, + + // &FloatLiteral(ref num) => match pattern_type { + // CaseBranch => Pattern::FloatLiteral(*num), + // ptype @ Assignment | ptype @ FunctionArg => { + // unsupported_pattern(env, *ptype, region) + // } + // }, + + // &ExactString(ref string) => match pattern_type { + // CaseBranch => Pattern::ExactString(string.clone()), + // ptype @ Assignment | ptype @ FunctionArg => { + // unsupported_pattern(env, *ptype, region) + // } + // }, + + // &Underscore => match pattern_type { + // CaseBranch | FunctionArg => Pattern::Underscore, + // Assignment => unsupported_pattern(env, Assignment, region), + // }, + + // &EmptyRecordLiteral => Pattern::EmptyRecordLiteral, + //}; + + //Located { + // region, + // value: pattern, + //} +} + +/// When we detect an unsupported pattern type (e.g. 5 = 1 + 2 is unsupported because you can't +/// assign to Int patterns), report it to Env and return an UnsupportedPattern runtime error pattern. +fn _unsupported_pattern<'a>(env: &mut Env, pattern_type: PatternType, region: Region) -> Pattern { + env.problem(Problem::UnsupportedPattern(pattern_type, region.clone())); + + Pattern::UnsupportedPattern(region) +} diff --git a/src/can/problem.rs b/src/can/problem.rs new file mode 100644 index 0000000000..3bc2eca4f9 --- /dev/null +++ b/src/can/problem.rs @@ -0,0 +1,44 @@ +use can::expr::Expr; +use can::pattern::{Pattern, PatternType}; +use ident::{Ident, VariantName}; +use operator::Operator; +use region::{Located, Region}; + +/// Problems that can occur in the course of canonicalization. +#[derive(Clone, Debug, PartialEq)] +pub enum Problem { + Shadowing(Located), + UnrecognizedFunctionName(Located), + UnrecognizedConstant(Located), + UnrecognizedVariant(Located), + UnusedAssignment(Located), + UnusedArgument(Located), + PrecedenceProblem(PrecedenceProblem), + // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! + UnsupportedPattern(PatternType, Region), + CircularAssignment(Vec>), + RuntimeError(RuntimeError), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum PrecedenceProblem { + BothNonAssociative(Located, Located), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum RuntimeError { + InvalidPrecedence(PrecedenceProblem, Box>), + UnrecognizedFunctionName(Located), + UnrecognizedConstant(Located), + UnrecognizedVariant(Located), + FloatOutsideRange(Box), + IntOutsideRange(Box), + InvalidHex(std::num::ParseIntError, Box), + InvalidOctal(std::num::ParseIntError, Box), + InvalidBinary(std::num::ParseIntError, Box), + CircularAssignment( + Vec>, + Vec<(Located, Located)>, + Box>, + ), +} diff --git a/src/can/procedure.rs b/src/can/procedure.rs new file mode 100644 index 0000000000..abf61ef7aa --- /dev/null +++ b/src/can/procedure.rs @@ -0,0 +1,72 @@ +use can::expr::Expr; +use can::pattern::Pattern; +use can::symbol::Symbol; +use collections::ImSet; +use region::{Located, Region}; + +#[derive(Clone, Debug, PartialEq)] +pub struct Procedure { + pub name: Option, + pub is_self_tail_recursive: bool, + pub definition: Region, + pub args: Vec>, + pub body: Located, + pub references: References, +} + +impl Procedure { + pub fn new( + definition: Region, + args: Vec>, + body: Located, + references: References, + ) -> Procedure { + Procedure { + name: None, + is_self_tail_recursive: false, + definition, + args, + body, + references, + } + } +} + +/// These are all ordered sets because they end up getting traversed in a graph search +/// to determine how assignments shuold be ordered. We want builds to be reproducible, +/// so it's important that building the same code gives the same order every time! +#[derive(Clone, Debug, PartialEq)] +pub struct References { + pub locals: ImSet, + pub globals: ImSet, + pub variants: ImSet, + pub calls: ImSet, +} + +impl References { + pub fn new() -> References { + References { + locals: ImSet::default(), + globals: ImSet::default(), + variants: ImSet::default(), + calls: ImSet::default(), + } + } + + pub fn union(mut self, other: References) -> Self { + self.locals = self.locals.union(other.locals); + self.globals = self.globals.union(other.globals); + self.variants = self.variants.union(other.variants); + self.calls = self.calls.union(other.calls); + + self + } + + pub fn has_local(&self, symbol: &Symbol) -> bool { + self.locals.contains(symbol) + } + + pub fn has_variant(&self, symbol: &Symbol) -> bool { + self.variants.contains(symbol) + } +} diff --git a/src/can/scope.rs b/src/can/scope.rs new file mode 100644 index 0000000000..7ee6aefe99 --- /dev/null +++ b/src/can/scope.rs @@ -0,0 +1,35 @@ +use can::symbol::Symbol; +use collections::ImMap; +use ident::Ident; +use region::Region; + +#[derive(Clone, Debug, PartialEq)] +pub struct Scope { + pub idents: ImMap, + symbol_prefix: String, + next_unique_id: u64, +} + +impl Scope { + pub fn new(symbol_prefix: String, declared_idents: ImMap) -> Scope { + Scope { + symbol_prefix, + + // This is used to generate unique names for anonymous closures. + // It always begins at 0. + next_unique_id: 0, + + idents: declared_idents, + } + } + + pub fn symbol(&self, name: &str) -> Symbol { + Symbol::new(&self.symbol_prefix, name) + } + + pub fn gen_unique_symbol(&mut self) -> Symbol { + self.next_unique_id = self.next_unique_id + 1; + + Symbol::new(&self.symbol_prefix, &self.next_unique_id.to_string()) + } +} diff --git a/src/can/string.rs b/src/can/string.rs new file mode 100644 index 0000000000..d73f9deea5 --- /dev/null +++ b/src/can/string.rs @@ -0,0 +1,445 @@ +// use bumpalo::collections::string::String; +// use bumpalo::collections::vec::Vec; +use bumpalo::Bump; +use parse::ast::Expr; +// use parse::ast::{Attempting, Expr}; +// use parse::ident; +// use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State}; +// use parse::problems::{Problem, Problems}; +// use region::{Loc, Region}; +use region::Region; +// use std::char; +// use std::iter::Peekable; + +pub fn canonical_string_literal<'a>(_arena: &Bump, _raw: &'a str, _region: Region) -> Expr<'a> { + panic!("TODO restore canonicalization"); +} +// let mut problems = std::vec::Vec::new(); + +// // Stores the accumulated string characters +// let mut buf = String::new_in(arena); + +// // This caches the total string length of interpolated_pairs. Every +// // time we add a new pair to interpolated_pairs, we increment this +// // by the sum of whatever we parsed in order to obtain that pair. +// let mut buf_col_offset: usize = 0; + +// // Stores interpolated identifiers, if any. +// let mut interpolated_pairs = Vec::new_in(arena); + +// let mut chars = raw.chars(); + +// while let Some(ch) = chars.next() { +// match ch { +// // If it's a backslash, escape things. +// '\\' => match chars.next() { +// Some(next_ch) => { +// if let Some(ident) = handle_escaped_char( +// arena, +// &state, +// next_ch, +// &mut chars, +// &mut buf, +// &mut problems, +// )? { +// let expr = Expr::Var(ident); + +// // +2 for `\(` and then another +1 for `)` at the end +// let parsed_length = buf.len() + 2 + ident.len() + 1; + +// // Casting should always succeed in this section, because +// // if this string literal overflowed our maximum +// // line length, that would have already happened back +// // in the parsing step, and we never would have reached +// // this code. Still, debug_assert that they won't! +// debug_assert!(buf_col_offset <= std::u16::MAX as usize); +// debug_assert!(ident.len() <= std::u16::MAX as usize); +// debug_assert!((parsed_length - ident.len() - 1) <= std::u16::MAX as usize); + +// let start_line = state.line; + +// // Subtract ident length and another 1 for the `)` +// let start_col = state.column +// + buf_col_offset as u16 +// + (parsed_length - ident.len() - 1) as u16; +// let ident_region = Region { +// start_line, +// start_col, +// end_line: start_line, +// end_col: start_col + ident.len() as u16 - 1, +// }; +// let loc_expr = Loc { +// region: ident_region, +// value: expr, +// }; + +// // Push the accumulated string into the pairs list, +// // along with the ident that came after it. +// interpolated_pairs.push((buf.into_bump_str(), loc_expr)); + +// // Reset the buffer so we start working on a new string. +// buf = String::new_in(arena); + +// // Advance the cached offset of how many chars we've parsed, +// // so the next time we see an interpolated ident, we can +// // correctly calculate its region. +// buf_col_offset += parsed_length; +// } +// } +// None => { +// problems.push(loc_char(Problem::TrailingBackslash, &state, buf.len())); +// } +// }, +// '\t' => { +// // Tabs are syntax errors. +// problems.push(loc_char(Problem::Tab, &state, buf.len())); +// } +// '\r' => { +// // Carriage returns aren't allowed in string literals. +// problems.push(loc_char(Problem::CarriageReturn, &state, buf.len())); +// } +// normal_char => buf.push(normal_char), +// } +// } + +// // We ran out of characters; this is the end of the string! +// if problems.is_empty() { +// let final_str = buf.into_bump_str(); + +// if interpolated_pairs.is_empty() { +// Expr::Str(final_str) +// } else { +// let tuple_ref = arena.alloc((interpolated_pairs.into_bump_slice(), final_str)); + +// Expr::InterpolatedStr(tuple_ref) +// } +// } else { +// Expr::MalformedStr(problems.into_boxed_slice()) +// } +// } + +// fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc { +// let start_line = state.line; +// let start_col = state.column + buf_len as u16; +// let end_line = start_line; +// // All invalid chars should have a length of 1 +// let end_col = state.column + 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// Loc { region, value } +// } + +// fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc { +// let start_line = state.line; +// let start_col = state.column + buf_len as u16; +// let end_line = start_line; +// // escapes should all be 2 chars long +// let end_col = state.column + 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// Loc { region, value } +// } + +// fn loc_escaped_unicode<'a, V>( +// value: V, +// state: &State<'a>, +// buf_len: usize, +// hex_str_len: usize, +// ) -> Loc { +// let start_line = state.line; +// // +1 due to the `"` which precedes buf. +// let start_col = state.column + buf_len as u16 + 1; +// let end_line = start_line; +// // +3 due to the `\u{` and another + 1 due to the `}` +// // -1 to prevent overshooting because end col is inclusive. +// let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// Loc { region, value } +// } + +// #[inline(always)] +// fn handle_escaped_char<'a, I>( +// arena: &'a Bump, +// state: &State<'a>, +// ch: char, +// chars: &mut Peekable, +// buf: &mut String<'a>, +// problems: &mut Problems, +// ) -> Result, (Fail, State<'a>)> +// where +// I: Iterator, +// { +// match ch { +// '\\' => buf.push('\\'), +// '"' => buf.push('"'), +// 't' => buf.push('\t'), +// 'n' => buf.push('\n'), +// 'r' => buf.push('\r'), +// '0' => buf.push('\0'), // We explicitly support null characters, as we +// // can't be sure we won't receive them from Rust. +// 'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?, +// '(' => { +// let ident = parse_interpolated_ident(arena, state, chars)?; + +// return Ok(Some(ident)); +// } +// '\t' => { +// // Report and continue. +// // Tabs are syntax errors, but maybe the rest of the string is fine! +// problems.push(loc_escaped_char(Problem::Tab, &state, buf.len())); +// } +// '\r' => { +// // Report and continue. +// // Carriage returns aren't allowed in string literals, +// // but maybe the rest of the string is fine! +// problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len())); +// } +// '\n' => { +// // Report and bail out. +// // We can't safely assume where the string was supposed to end. +// problems.push(loc_escaped_char( +// Problem::NewlineInLiteral, +// &state, +// buf.len(), +// )); + +// return Err(unexpected_eof( +// buf.len(), +// Attempting::UnicodeEscape, +// state.clone(), +// )); +// } +// _ => { +// // Report and continue. +// // An unsupported escaped char (e.g. \q) shouldn't halt parsing. +// problems.push(loc_escaped_char( +// Problem::UnsupportedEscapedChar, +// &state, +// buf.len(), +// )); +// } +// } + +// Ok(None) +// } + +// #[inline(always)] +// fn handle_escaped_unicode<'a, I>( +// arena: &'a Bump, +// state: &State<'a>, +// chars: &mut Peekable, +// buf: &mut String<'a>, +// problems: &mut Problems, +// ) -> Result<(), (Fail, State<'a>)> +// where +// I: Iterator, +// { +// // \u{00A0} is how you specify a Unicode code point, +// // so we should always see a '{' next. +// if chars.next() != Some('{') { +// let start_line = state.line; +// // +1 due to the `"` which precedes buf +// let start_col = state.column + 1 + buf.len() as u16; +// let end_line = start_line; + +// // All we parsed was `\u`, so end on the column after `\`'s column. +// let end_col = start_col + 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// problems.push(Loc { +// region, +// value: Problem::NoUnicodeDigits, +// }); + +// // The rest of the string literal might be fine. Keep parsing! +// return Ok(()); +// } + +// // Record the point in the string literal where we started parsing `\u` +// let start_of_unicode = buf.len(); + +// // Stores the accumulated unicode digits +// let mut hex_str = String::new_in(arena); + +// while let Some(hex_char) = chars.next() { +// match hex_char { +// '}' => { +// // Done! Validate and add it to the buffer. +// match u32::from_str_radix(&hex_str, 16) { +// Ok(code_pt) => { +// if code_pt > 0x10FFFF { +// let start_line = state.line; +// // +1 due to the `"` which precedes buf +// // +3 due to the `\u{` which precedes the hex digits +// let start_col = state.column + 1 + buf.len() as u16 + 3; +// let end_line = start_line; + +// // We want to underline only the number. That's the error! +// // -1 because we want to end on the last digit, not +// // overshoot it. +// let end_col = start_col + hex_str.len() as u16 - 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// problems.push(Loc { +// region, +// value: Problem::UnicodeCodePointTooLarge, +// }); +// } else { +// // If it all checked out, add it to +// // the main buffer. +// match char::from_u32(code_pt) { +// Some(ch) => buf.push(ch), +// None => { +// problems.push(loc_escaped_unicode( +// Problem::InvalidUnicodeCodePoint, +// &state, +// start_of_unicode, +// hex_str.len(), +// )); +// } +// } +// } +// } +// Err(_) => { +// let problem = if hex_str.is_empty() { +// Problem::NoUnicodeDigits +// } else { +// Problem::NonHexCharsInUnicodeCodePoint +// }; + +// problems.push(loc_escaped_unicode( +// problem, +// &state, +// start_of_unicode, +// hex_str.len(), +// )); +// } +// } + +// // We are now done processing the unicode portion of the string, +// // so exit the loop without further advancing the iterator. +// return Ok(()); +// } +// '\t' => { +// // Report and continue. +// // Tabs are syntax errors, but maybe the rest of the string is fine! +// problems.push(loc_escaped_unicode( +// Problem::Tab, +// &state, +// start_of_unicode, +// hex_str.len(), +// )); +// } +// '\r' => { +// // Report and continue. +// // Carriage returns aren't allowed in string literals, +// // but maybe the rest of the string is fine! +// problems.push(loc_escaped_unicode( +// Problem::CarriageReturn, +// &state, +// start_of_unicode, +// hex_str.len(), +// )); +// } +// '\n' => { +// // Report and bail out. +// // We can't safely assume where the string was supposed to end. +// problems.push(loc_escaped_unicode( +// Problem::NewlineInLiteral, +// &state, +// start_of_unicode, +// hex_str.len(), +// )); + +// return Err(unexpected_eof( +// buf.len(), +// Attempting::UnicodeEscape, +// state.clone(), +// )); +// } +// normal_char => hex_str.push(normal_char), +// } + +// // If we're about to hit the end of the string, and we didn't already +// // complete parsing a valid unicode escape sequence, this is a malformed +// // escape sequence - it wasn't terminated! +// if chars.peek() == Some(&'"') { +// // Record a problem and exit the loop early, so the string literal +// // parsing logic can consume the quote and do its job as normal. +// let start_line = state.line; +// // +1 due to the `"` which precedes buf. +// let start_col = state.column + buf.len() as u16 + 1; +// let end_line = start_line; +// // +3 due to the `\u{` +// // -1 to prevent overshooting because end col is inclusive. +// let end_col = start_col + 3 + hex_str.len() as u16 - 1; + +// let region = Region { +// start_line, +// start_col, +// end_line, +// end_col, +// }; + +// problems.push(Loc { +// region, +// value: Problem::MalformedEscapedUnicode, +// }); + +// return Ok(()); +// } +// } + +// Ok(()) +// } + +// #[inline(always)] +// fn parse_interpolated_ident<'a, I>( +// arena: &'a Bump, +// state: &State<'a>, +// chars: &mut Peekable, +// ) -> Result<&'a str, (Fail, State<'a>)> +// where +// I: Iterator, +// { +// // This will return Err on invalid identifiers like "if" +// let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?; + +// // Make sure we got a closing ) to end the interpolation. +// match next_char { +// Some(')') => Ok(string), +// Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)), +// None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)), +// } +// } diff --git a/src/can/symbol.rs b/src/can/symbol.rs new file mode 100644 index 0000000000..52fc8f1c0b --- /dev/null +++ b/src/can/symbol.rs @@ -0,0 +1,28 @@ +use ident::VariantName; + +/// A globally unique identifier, used for both vars and variants. +/// It will be used directly in code gen. +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Symbol(String); + +impl Symbol { + pub fn new(prefix: &str, name: &str) -> Symbol { + Symbol(format!("{}{}", prefix, name)) + } + + pub fn from_variant(variant_name: &VariantName, home: &str) -> Symbol { + match &variant_name { + &VariantName::Unqualified(ref name) => Symbol::new(home, name), + + &VariantName::Qualified(ref path, ref name) => Symbol::new(path, name), + } + } +} + +impl Into for Symbol { + fn into(self) -> String { + let Symbol(string) = self; + + string + } +} diff --git a/src/canonicalize.rs b/src/canonicalize.rs deleted file mode 100644 index faf48993ef..0000000000 --- a/src/canonicalize.rs +++ /dev/null @@ -1,1710 +0,0 @@ -use self::PatternType::*; -use collections::{ImMap, ImSet, MutMap, MutSet}; -use expr; -use expr::{Ident, VariantName}; -use graph::{strongly_connected_component, topological_sort}; -use operator::Associativity::*; -use operator::Operator; -use operator::Operator::Pizza; -use region::{Located, Region}; -use std::cmp::Ordering; - -// #[derive(Clone, Debug, PartialEq)] -// pub enum CanExpr { -// // Literals -// Int(i64), -// Float(f64), -// EmptyStr, -// Str(Box), -// Char(char), -// List(Vec>), -// EmptyList, -// EmptyRecord, -// } - -// fn _canonicalize<'a>(raw: &'a str, expr: Expr<'a>) -> CanExpr { -// use self::CanExpr::*; - -// match expr { -// Expr::Int(num) => Int(num), -// Expr::Float(num) => Float(num), -// Expr::EmptyRecord => EmptyRecord, -// Expr::ShortStr(bytes) => { -// let boxed: Box = unsafe { -// // This is safe because these bytes were read directly out -// // of a utf-8 string, along appropriate code point boundaries. -// std::str::from_utf8_unchecked(&bytes) -// }.into(); - -// Str(boxed) -// }, -// Expr::MedStr(offset, len) => { -// let boxed: Box = raw[offset..(offset + len as usize)].into(); - -// Str(boxed) -// } -// Expr::LongStr(boxed_str) => Str((*boxed_str).into()), -// Expr::EmptyStr => EmptyStr, -// Expr::EmptyList => EmptyList, -// _ => panic!("disco") -// } -// } - -#[derive(Clone, Debug, PartialEq)] -pub enum Expr { - // Literals - Int(i64), - Float(f64), - EmptyStr, - Str(String), - Char(char), - List(Vec>), - EmptyList, - - // Lookups - Var(Symbol), - /// Works the same as Var, but has an important marking purpose. - /// See 13623e3f5f65ea2d703cf155f16650c1e8246502 for the bug this fixed. - FunctionPointer(Symbol), - InterpolatedStr(Vec<(String, Located)>, String), - - // Pattern Matching - Case(Box>, Vec<(Located, Located)>), - Assign(Vec<(Located, Located)>, Box>), - - // Application - Call(Box>, Vec>), - ApplyVariant(Symbol, Option>>), - - // Product Types - EmptyRecord, - - // Sugar - If(Box>, Box>, Box>), - Operator(Box>, Located, Box>), - - // Runtime Errors - InvalidPrecedence(PrecedenceProblem, Box>), - UnrecognizedFunctionName(Located), - UnrecognizedConstant(Located), - UnrecognizedVariant(Located), - CircularAssignment( - Vec>, - Vec<(Located, Located)>, - Box>, - ), -} - -/// Problems that can occur in the course of canonicalization. -#[derive(Clone, Debug, PartialEq)] -pub enum Problem { - Shadowing(Located), - UnrecognizedFunctionName(Located), - UnrecognizedConstant(Located), - UnrecognizedVariant(Located), - UnusedAssignment(Located), - UnusedArgument(Located), - PrecedenceProblem(PrecedenceProblem), - // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! - UnsupportedPattern(PatternType, Located), - CircularAssignment(Vec>), -} - -/// A pattern, including possible problems (e.g. shadowing) so that -/// codegen can generate a runtime error if this pattern is reached. -#[derive(Clone, Debug, PartialEq)] -pub enum Pattern { - Identifier(Symbol), - Variant(Symbol, Option>>), - IntLiteral(i64), - FloatLiteral(f64), - ExactString(String), - EmptyRecordLiteral, - Underscore, - - // Runtime Exceptions - Shadowed(Located), - UnrecognizedVariant(Located), - // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! - UnsupportedPattern(Located), -} - -/// A globally unique identifier, used for both vars and variants. -/// It will be used directly in code gen. -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct Symbol(String); - -impl Symbol { - pub fn new(prefix: &str, name: &str) -> Symbol { - Symbol(format!("{}{}", prefix, name)) - } - - pub fn from_variant(variant_name: &VariantName, home: &str) -> Symbol { - match &variant_name { - &VariantName::Unqualified(ref name) => Symbol::new(home, name), - - &VariantName::Qualified(ref path, ref name) => Symbol::new(path, name), - } - } -} - -impl Into for Symbol { - fn into(self) -> String { - let Symbol(string) = self; - - string - } -} - -#[derive(Clone, Debug, PartialEq)] -struct Scope { - pub idents: ImMap, - symbol_prefix: String, - next_unique_id: u64, -} - -impl Scope { - pub fn new(symbol_prefix: String, declared_idents: ImMap) -> Scope { - Scope { - symbol_prefix, - - // This is used to generate unique names for anonymous closures. - // It always begins at 0. - next_unique_id: 0, - - idents: declared_idents, - } - } - - pub fn symbol(&self, name: &str) -> Symbol { - Symbol::new(&self.symbol_prefix, name) - } - - pub fn gen_unique_symbol(&mut self) -> Symbol { - self.next_unique_id = self.next_unique_id + 1; - - Symbol::new(&self.symbol_prefix, &self.next_unique_id.to_string()) - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Procedure { - pub name: Option, - pub is_self_tail_recursive: bool, - pub definition: Region, - pub args: Vec>, - pub body: Located, - pub references: References, -} - -impl Procedure { - pub fn new( - definition: Region, - args: Vec>, - body: Located, - references: References, - ) -> Procedure { - Procedure { - name: None, - is_self_tail_recursive: false, - definition, - args, - body, - references, - } - } -} - -/// The canonicalization environment for a particular module. -struct Env { - /// The module's path. Unqualified references to identifiers and variant names are assumed - /// to be relative to this path. - home: String, - - /// Problems we've encountered along the way, which will be reported to the user at the end. - problems: Vec, - - /// Variants either declared in this module, or imported. - variants: ImMap>, - - /// Former closures converted to top-level procedures. - procedures: MutMap, -} - -impl Env { - pub fn new(home: String, declared_variants: ImMap>) -> Env { - Env { - home, - variants: declared_variants, - problems: Vec::new(), - procedures: MutMap::default(), - } - } - - pub fn problem(&mut self, problem: Problem) -> () { - self.problems.push(problem) - } - - pub fn register_closure( - &mut self, - symbol: Symbol, - args: Vec>, - body: Located, - definition: Region, - references: References, - ) -> () { - // We can't if the closure is self tail recursive yet, because it doesn't know its final name yet. - // (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name. - let is_self_tail_recursive = false; - let name = None; // The Assign logic is also responsible for setting names after the fact. - let procedure = Procedure { - args, - name, - body, - is_self_tail_recursive, - definition, - references, - }; - - self.procedures.insert(symbol, procedure); - } -} - -pub fn canonicalize_declaration( - home: String, - name: &str, - loc_expr: Located, - declared_idents: &ImMap, - declared_variants: &ImMap>, -) -> ( - Located, - Output, - Vec, - MutMap, -) { - // If we're canonicalizing the declaration `foo = ...` inside the `Main` module, - // scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0" - let scope_prefix = format!("{}${}$", home, name); - let mut scope = Scope::new(scope_prefix, declared_idents.clone()); - let mut env = Env::new(home, declared_variants.clone()); - let (mut new_loc_expr, output) = canonicalize(&mut env, &mut scope, loc_expr); - - // Apply operator precedence and associativity rules once, after canonicalization is - // otherwise complete. If we did this *during* canonicalization, then each time we - // visited an Operator node we'd recursively try to apply this to each of its nested - // operators, and thena again on *their* nested operators, ultimately applying the - // rules multiple times unnecessarily. - new_loc_expr = apply_precedence_and_associativity(&mut env, new_loc_expr); - - (new_loc_expr, output, env.problems, env.procedures) -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Output { - pub references: References, - pub tail_call: Option, -} - -/// These are all ordered sets because they end up getting traversed in a graph search -/// to determine how assignments shuold be ordered. We want builds to be reproducible, -/// so it's important that building the same code gives the same order every time! -#[derive(Clone, Debug, PartialEq)] -pub struct References { - pub locals: ImSet, - pub globals: ImSet, - pub variants: ImSet, - pub calls: ImSet, -} - -impl References { - pub fn new() -> References { - References { - locals: ImSet::default(), - globals: ImSet::default(), - variants: ImSet::default(), - calls: ImSet::default(), - } - } - - pub fn union(mut self, other: References) -> Self { - self.locals = self.locals.union(other.locals); - self.globals = self.globals.union(other.globals); - self.variants = self.variants.union(other.variants); - self.calls = self.calls.union(other.calls); - - self - } - - pub fn has_local(&self, symbol: &Symbol) -> bool { - self.locals.contains(symbol) - } - - pub fn has_variant(&self, symbol: &Symbol) -> bool { - self.variants.contains(symbol) - } -} - -impl Output { - pub fn new() -> Output { - Output { - references: References::new(), - tail_call: None, - } - } -} - -fn canonicalize( - env: &mut Env, - scope: &mut Scope, - loc_expr: Located, -) -> (Located, Output) { - use self::Expr::*; - - let (expr, output) = match loc_expr.value { - expr::Expr::Int(num) => (Int(num), Output::new()), - expr::Expr::Float(num) => (Float(num), Output::new()), - expr::Expr::EmptyRecord => (EmptyRecord, Output::new()), - expr::Expr::Str(string) => (Str(string), Output::new()), - expr::Expr::Char(ch) => (Char(ch), Output::new()), - expr::Expr::EmptyStr => (EmptyStr, Output::new()), - expr::Expr::EmptyList => (EmptyList, Output::new()), - expr::Expr::List(elems) => { - let mut output = Output::new(); - let mut can_elems = Vec::with_capacity(elems.len()); - - for loc_elem in elems { - let (can_expr, elem_out) = canonicalize(env, scope, loc_elem); - - output.references = output.references.union(elem_out.references); - - can_elems.push(can_expr); - } - - // A list literal is never a tail call! - output.tail_call = None; - - (List(can_elems), output) - } - - expr::Expr::If(loc_cond, loc_true, loc_false) => { - // Canonicalize the nested expressions - let (cond_expr, cond_out) = canonicalize(env, scope, *loc_cond); - let (true_expr, true_out) = canonicalize(env, scope, *loc_true); - let (false_expr, false_out) = canonicalize(env, scope, *loc_false); - - // Incorporate all three expressions into a combined Output value. - let expr = If( - Box::new(cond_expr), - Box::new(true_expr), - Box::new(false_expr), - ); - let mut output = cond_out; - - // If both branches are tail calling the same symbol, then so is the conditional as a whole. - // Also, if both branches are not tail calls (tail_call == None), then so is the conditional. - // If the branches are different, we leave the default of None as-is. - if true_out.tail_call == false_out.tail_call { - output.tail_call = true_out.tail_call; - } - - // To evaluate the whole if-expression, we depend on all the values that both branches depend on. - output.references = output.references.union(true_out.references); - output.references = output.references.union(false_out.references); - - (expr, output) - } - - expr::Expr::Apply(loc_fn, loc_args) => { - // Canonicalize the function expression and its arguments - let (fn_expr, mut output) = canonicalize(env, scope, *loc_fn); - let mut args = Vec::new(); - let mut outputs = Vec::new(); - - for loc_arg in loc_args { - let (arg_expr, arg_out) = canonicalize(env, scope, loc_arg); - - args.push(arg_expr); - outputs.push(arg_out); - } - - match &fn_expr.value { - &Var(ref sym) => { - output.references.calls.insert(sym.clone()); - } - _ => (), - }; - - let expr = Call(Box::new(fn_expr), args); - - for arg_out in outputs { - output.references = output.references.union(arg_out.references); - } - - // We're not tail-calling a symbol (by name), we're tail-calling a function value. - output.tail_call = None; - - (expr, output) - } - - expr::Expr::Operator(loc_left, op, loc_right) => { - // Canonicalize the nested expressions - let (left_expr, left_out) = canonicalize(env, scope, *loc_left); - let (right_expr, mut output) = canonicalize(env, scope, *loc_right); - - // Incorporate both expressions into a combined Output value. - output.references = output.references.union(left_out.references); - - // The pizza operator is the only one that can be a tail call, - // because it's the only one that can call a function by name. - output.tail_call = match op.value { - Pizza => match &right_expr.value { - &Var(ref sym) => Some(sym.clone()), - &Call(ref loc_boxed_expr, _) => match (*loc_boxed_expr.clone()).value { - Var(sym) => Some(sym), - _ => None, - }, - _ => None, - }, - _ => None, - }; - - let expr = Operator(Box::new(left_expr), op, Box::new(right_expr)); - - (expr, output) - } - - expr::Expr::Var(ident) => { - let mut output = Output::new(); - let can_expr = match resolve_ident(&env, &scope, ident, &mut output.references) { - Ok(symbol) => Var(symbol), - Err(ident) => { - let loc_ident = Located { - region: loc_expr.region.clone(), - value: ident, - }; - - env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); - - UnrecognizedConstant(loc_ident) - } - }; - - (can_expr, output) - } - - expr::Expr::InterpolatedStr(pairs, suffix) => { - let mut output = Output::new(); - let can_pairs: Vec<(String, Located)> = pairs - .into_iter() - .map(|(string, loc_ident)| { - // From a language design perspective, we only permit idents in interpolation. - // However, in a canonical Expr we store it as a full Expr, not a Symbol. - // This is so that we can resolve it to either Var or Unrecognized; if we - // stored it as a Symbol, we couldn't record runtime errors here. - let can_expr = match resolve_ident( - &env, - &scope, - loc_ident.value, - &mut output.references, - ) { - Ok(symbol) => Var(symbol), - Err(ident) => { - let loc_ident = Located { - region: loc_ident.region.clone(), - value: ident, - }; - - env.problem(Problem::UnrecognizedConstant(loc_ident.clone())); - - UnrecognizedConstant(loc_ident) - } - }; - - ( - string, - Located { - region: loc_ident.region, - value: can_expr, - }, - ) - }) - .collect(); - - (InterpolatedStr(can_pairs, suffix), output) - } - - expr::Expr::ApplyVariant(variant_name, opt_args) => { - // Canonicalize the arguments and union their references into our output. - // We'll do this even if the variant name isn't recognized, since we still - // want to report canonicalization problems with the variant's arguments, - // and their references still matter for purposes of detecting unused things. - let mut output = Output::new(); - - let opt_can_args = match opt_args { - Some(args) => { - let mut can_args = Vec::with_capacity(args.len()); - - for arg in args { - let (loc_expr, arg_output) = canonicalize(env, scope, arg); - - output.references = output.references.union(arg_output.references); - - can_args.push(loc_expr); - } - - Some(can_args) - } - None => None, - }; - - let can_expr = match resolve_variant_name(&env, variant_name, &mut output.references) { - Ok(symbol) => ApplyVariant(symbol, opt_can_args), - Err(variant_name) => { - let loc_variant = Located { - region: loc_expr.region.clone(), - value: variant_name, - }; - - env.problem(Problem::UnrecognizedVariant(loc_variant.clone())); - - UnrecognizedVariant(loc_variant) - } - }; - - (can_expr, output) - } - - expr::Expr::Assign(assignments, box_loc_returned) => { - // The body expression gets a new scope for canonicalization. - // Shadow `scope` to make sure we don't accidentally use the original one for the - // rest of this block. - let mut scope = scope.clone(); - - // Add the assigned identifiers to scope. If there's a collision, it means there - // was shadowing, which will be handled later. - let assigned_idents: Vec<(Ident, (Symbol, Region))> = idents_from_patterns( - assignments - .clone() - .iter() - .map(|(loc_pattern, _)| loc_pattern), - &scope, - ); - - scope.idents = union_pairs(scope.idents, assigned_idents.iter()); - - let mut refs_by_assignment: MutMap, References)> = - MutMap::default(); - let mut can_assignments_by_symbol: MutMap, Located)> = - MutMap::default(); - - for (loc_pattern, expr) in assignments { - // Each assignment gets to have all the idents in scope that are assigned in this - // block. Order of assignments doesn't matter, thanks to referential transparency! - let (loc_can_expr, can_output) = canonicalize(env, &mut scope, expr); - - // Exclude the current ident from shadowable_idents; you can't shadow yourself! - // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = scope.idents.clone(); - remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - - let loc_can_pattern = canonicalize_pattern( - env, - &mut scope, - &Assignment, - &loc_pattern, - &mut shadowable_idents, - ); - let mut renamed_closure_assignment: Option<&Symbol> = None; - - // Give closures names (and tail-recursive status) where appropriate. - let can_expr = match ( - &loc_pattern.value, - &loc_can_pattern.value, - &loc_can_expr.value, - ) { - // First, make sure we are actually assigning an identifier instead of (for example) a variant. - // - // If we're assigning (UserId userId) = ... then this is certainly not a closure declaration, - // which also implies it's not a self tail call! - // - // Only assignments of the form (foo = ...) can be closure declarations or self tail calls. - ( - &expr::Pattern::Identifier(ref name), - &Pattern::Identifier(ref assigned_symbol), - &FunctionPointer(ref symbol), - ) => { - // Since everywhere in the code it'll be referred to by its assigned name, - // remove its generated name from the procedure map. (We'll re-insert it later.) - let mut procedure = env.procedures.remove(&symbol).unwrap(); - - // The original ident name will be used for debugging and stack traces. - procedure.name = Some(name.clone()); - - // The closure is self tail recursive iff it tail calls itself (by assigned name). - procedure.is_self_tail_recursive = match &can_output.tail_call { - &None => false, - &Some(ref symbol) => symbol == assigned_symbol, - }; - - // Re-insert the procedure into the map, under its assigned name. This way, - // when code elsewhere calls it by assigned name, it'll resolve properly. - env.procedures.insert(assigned_symbol.clone(), procedure); - - // Recursion doesn't count as referencing. (If it did, all recursive functions - // would result in circular assignment errors!) - refs_by_assignment - .entry(assigned_symbol.clone()) - .and_modify(|(_, refs)| { - refs.locals = refs.locals.without(assigned_symbol); - }); - - renamed_closure_assignment = Some(&assigned_symbol); - - // Return a reference to the assigned symbol, since the auto-generated one no - // longer references any entry in the procedure map! - Var(assigned_symbol.clone()) - } - _ => loc_can_expr.value, - }; - - let mut assigned_symbols = Vec::new(); - - // Store the referenced locals in the refs_by_assignment map, so we can later figure out - // which assigned names reference each other. - for (ident, (symbol, region)) in - idents_from_patterns(std::iter::once(&loc_pattern), &scope) - { - let refs = - // Functions' references don't count in assignments. - // See 3d5a2560057d7f25813112dfa5309956c0f9e6a9 and its - // parent commit for the bug this fixed! - if renamed_closure_assignment == Some(&symbol) { - References::new() - } else { - can_output.references.clone() - }; - - refs_by_assignment.insert( - symbol.clone(), - ( - Located { - value: ident, - region, - }, - refs, - ), - ); - - assigned_symbols.push(symbol.clone()); - } - - for symbol in assigned_symbols { - can_assignments_by_symbol.insert( - symbol, - ( - loc_can_pattern.clone(), - Located { - region: loc_can_expr.region.clone(), - value: can_expr.clone(), - }, - ), - ); - } - } - - // The assignment as a whole is a tail call iff its return expression is a tail call. - // Use its output as a starting point because its tail_call already has the right answer! - let (ret_expr, mut output) = canonicalize(env, &mut scope, *box_loc_returned); - - // Determine the full set of references by traversing the graph. - let mut visited_symbols = MutSet::default(); - - // Start with the return expression's referenced locals. They are the only ones that count! - // - // If I have two assignments which reference each other, but neither of them - // is referenced in the return expression, I don't want either of them (or their references) - // to end up in the final output.references. They were unused, and so were their references! - // - // The reason we need a graph here is so we don't overlook transitive dependencies. - // For example, if I have `a = b + 1` and the assignment returns `a + 1`, then the - // assignment as a whole references both `a` *and* `b`, even though it doesn't - // directly mention `b` - because `a` depends on `b`. If we didn't traverse a graph here, - // we'd erroneously give a warning that `b` was unused since it wasn't directly referenced. - for symbol in output.references.locals.clone().into_iter() { - // Traverse the graph and look up *all* the references for this local symbol. - let refs = references_from_local( - symbol, - &mut visited_symbols, - &refs_by_assignment, - &env.procedures, - ); - - output.references = output.references.union(refs); - } - - for symbol in output.references.calls.clone().into_iter() { - // Traverse the graph and look up *all* the references for this call. - // Reuse the same visited_symbols as before; if we already visited it, we - // won't learn anything new from visiting it again! - let refs = references_from_call( - symbol, - &mut visited_symbols, - &refs_by_assignment, - &env.procedures, - ); - - output.references = output.references.union(refs); - } - - // Now that we've collected all the references, check to see if any of the new idents - // we defined went unused by the return expression. If any were unused, report it. - for (ident, (symbol, region)) in assigned_idents.clone() { - if !output.references.has_local(&symbol) { - let loc_ident = Located { - region: region.clone(), - value: ident.clone(), - }; - - env.problem(Problem::UnusedAssignment(loc_ident)); - } - } - - // Use topological sort to reorder the assignments based on their dependencies to one another. - // This way, during code gen, no assignment will refer to a value that hasn't been initialized yet. - // As a bonus, the topological sort also reveals any cycles between the assignments, allowing - // us to give a CircularAssignment error. - let successors = |symbol: &Symbol| -> ImSet { - let (_, references) = refs_by_assignment.get(symbol).unwrap(); - - local_successors(&references, &env.procedures) - }; - - let assigned_symbols: Vec = can_assignments_by_symbol - .keys() - .into_iter() - .map(Symbol::clone) - .collect(); - - match topological_sort(assigned_symbols.as_slice(), successors) { - Ok(sorted_symbols) => { - let can_assignments = sorted_symbols - .into_iter() - .rev() // Topological sort gives us the reverse of the sorting we want! - .map(|symbol| can_assignments_by_symbol.get(&symbol).unwrap().clone()) - .collect(); - - (Assign(can_assignments, Box::new(ret_expr)), output) - } - Err(node_in_cycle) => { - // We have one node we know is in the cycle. - // We want to show the entire cycle in the error message, so expand it out. - let mut loc_idents_in_cycle: Vec> = - strongly_connected_component(&node_in_cycle, successors) - .into_iter() - .rev() // Strongly connected component gives us the reverse of the sorting we want! - .map(|symbol| refs_by_assignment.get(&symbol).unwrap().0.clone()) - .collect(); - - loc_idents_in_cycle = sort_cyclic_idents( - loc_idents_in_cycle, - &mut assigned_idents.iter().map(|(ident, _)| ident), - ); - - env.problem(Problem::CircularAssignment(loc_idents_in_cycle.clone())); - - let can_assignments = can_assignments_by_symbol - .values() - .map(|tuple| tuple.clone()) - .collect(); - - ( - CircularAssignment( - loc_idents_in_cycle, - can_assignments, - Box::new(ret_expr), - ), - output, - ) - } - } - } - - expr::Expr::Closure(loc_arg_patterns, box_loc_body_expr) => { - // The globally unique symbol that will refer to this closure once it gets converted - // into a top-level procedure for code gen. - // - // The symbol includes the module name, the top-level declaration name, and the - // index (0-based) of the closure within that declaration. - // - // Example: "MyModule$main$3" if this is the 4th closure in MyModule.main. - let symbol = scope.gen_unique_symbol(); - - // The body expression gets a new scope for canonicalization. - // Shadow `scope` to make sure we don't accidentally use the original one for the - // rest of this block. - let mut scope = scope.clone(); - - let arg_idents: Vec<(Ident, (Symbol, Region))> = - idents_from_patterns(loc_arg_patterns.iter(), &scope); - - // Add the arguments' idents to scope.idents. If there's a collision, - // it means there was shadowing, which will be handled later. - scope.idents = union_pairs(scope.idents, arg_idents.iter()); - - let can_args: Vec> = loc_arg_patterns - .into_iter() - .map(|loc_pattern| { - // Exclude the current ident from shadowable_idents; you can't shadow yourself! - // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = scope.idents.clone(); - remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - - canonicalize_pattern( - env, - &mut scope, - &FunctionArg, - &loc_pattern, - &mut shadowable_idents, - ) - }) - .collect(); - let (loc_body_expr, mut output) = canonicalize(env, &mut scope, *box_loc_body_expr); - - // Now that we've collected all the references, check to see if any of the args we defined - // went unreferenced. If any did, report them as unused arguments. - for (ident, (arg_symbol, region)) in arg_idents { - if !output.references.has_local(&arg_symbol) { - // The body never referenced this argument we declared. It's an unused argument! - env.problem(Problem::UnusedArgument(Located { - region, - value: ident, - })); - } - - // We shouldn't ultimately count arguments as referenced locals. Otherwise, - // we end up with weird conclusions like the expression (\x -> x + 1) - // references the (nonexistant) local variable x! - output.references.locals.remove(&arg_symbol); - } - - // We've finished analyzing the closure. Its references.locals are now the values it closes over, - // since we removed the only locals it shouldn't close over (its arguments). - // Register it as a top-level procedure in the Env! - env.register_closure( - symbol.clone(), - can_args, - loc_body_expr, - loc_expr.region.clone(), - output.references.clone(), - ); - - // Always return a function pointer, in case that's how the closure is being used (e.g. with Apply). - (FunctionPointer(symbol), output) - } - - expr::Expr::Case(loc_cond, branches) => { - // Canonicalize the conditional - let (can_cond, mut output) = canonicalize(env, scope, *loc_cond); - let mut can_branches = Vec::with_capacity(branches.len()); - let mut recorded_tail_call = false; - - for (loc_pattern, loc_expr) in branches { - // Each case branch gets a new scope for canonicalization. - // Shadow `scope` to make sure we don't accidentally use the original one for the - // rest of this block. - let mut scope = scope.clone(); - - // Exclude the current ident from shadowable_idents; you can't shadow yourself! - // (However, still include it in scope, because you *can* recursively refer to yourself.) - let mut shadowable_idents = scope.idents.clone(); - remove_idents(loc_pattern.value.clone(), &mut shadowable_idents); - - let loc_can_pattern = canonicalize_pattern( - env, - &mut scope, - &CaseBranch, - &loc_pattern, - &mut shadowable_idents, - ); - - // Patterns introduce new idents to the scope! - // Add the assigned identifiers to scope. If there's a collision, it means there - // was shadowing, which will be handled later. - let assigned_idents: Vec<(Ident, (Symbol, Region))> = - idents_from_patterns(std::iter::once(&loc_pattern), &scope); - - scope.idents = union_pairs(scope.idents, assigned_idents.iter()); - - let (can_expr, branch_output) = canonicalize(env, &mut scope, loc_expr); - - output.references = output.references.union(branch_output.references); - - // If all branches are tail calling the same symbol, then so is the conditional as a whole. - if !recorded_tail_call { - // If we haven't recorded output.tail_call yet, record it. - output.tail_call = branch_output.tail_call; - recorded_tail_call = true; - } else if branch_output.tail_call != output.tail_call { - // If we recorded output.tail_call, but what we recorded differs from what we just saw, - // then game over. This can't possibly be a self tail call! - output.tail_call = None; - } - - // Now that we've collected all the references for this branch, check to see if - // any of the new idents it defined were unused. If any were, report it. - for (ident, (symbol, region)) in assigned_idents { - if !output.references.has_local(&symbol) { - let loc_ident = Located { - region: region.clone(), - value: ident.clone(), - }; - - env.problem(Problem::UnusedAssignment(loc_ident)); - } - } - - can_branches.push((loc_can_pattern, can_expr)); - } - - // One of the branches should have flipped this, so this should only happen - // in the situation where the case had no branches. That can come up, though! - // A case with no branches is a runtime error, but it will mess things up - // if code gen mistakenly thinks this is a tail call just because its condition - // happend to be one. (The condition gave us our initial output value.) - if !recorded_tail_call { - output.tail_call = None; - } - - // Incorporate all three expressions into a combined Output value. - let expr = Case(Box::new(can_cond), can_branches); - - (expr, output) - } - }; - - // At the end, diff used_idents and assigned_idents to see which were unused. - // Add warnings for those! - - // In a later phase, unused top level declarations won't get monomorphized or code-genned. - // We aren't going to bother with DCE at the level of local assignments. It's going to be - // a rounding error anyway (especially given that they'll be surfaced as warnings), LLVM will - // DCE them in optimized builds, and it's not worth the bookkeeping for dev builds. - ( - Located { - region: loc_expr.region.clone(), - value: expr, - }, - output, - ) -} - -fn union_pairs<'a, K, V, I>(mut map: ImMap, pairs: I) -> ImMap -where - I: Iterator, - K: std::hash::Hash + Eq + Clone, - K: 'a, - V: Clone, - V: 'a, -{ - for (ref k, ref v) in pairs { - map.insert(k.clone(), v.clone()); - } - - map -} - -fn local_successors( - references: &References, - procedures: &MutMap, -) -> ImSet { - let mut answer = references.locals.clone(); - - for call_symbol in references.calls.iter() { - answer = answer.union(call_successors(call_symbol, procedures)); - } - - answer -} - -fn call_successors(call_symbol: &Symbol, procedures: &MutMap) -> ImSet { - // TODO (this comment should be moved to a GH issue) this may cause an infinite loop if 2 procedures reference each other; may need to track visited procedures! - match procedures.get(call_symbol) { - Some(procedure) => { - let mut answer = local_successors(&procedure.references, procedures); - - answer.insert(call_symbol.clone()); - - answer - } - None => ImSet::default(), - } -} - -fn references_from_local( - assigned_symbol: Symbol, - visited: &mut MutSet, - refs_by_assignment: &MutMap, - procedures: &MutMap, -) -> References { - match refs_by_assignment.get(&assigned_symbol) { - Some((_, refs)) => { - let mut answer = References::new(); - - visited.insert(assigned_symbol); - - for local in refs.locals.iter() { - if !visited.contains(&local) { - let other_refs = references_from_local( - local.clone(), - visited, - refs_by_assignment, - procedures, - ); - - answer = answer.union(other_refs); - } - - answer.locals.insert(local.clone()); - } - - for call in refs.calls.iter() { - if !visited.contains(&call) { - let other_refs = - references_from_call(call.clone(), visited, refs_by_assignment, procedures); - - answer = answer.union(other_refs); - } - - answer.calls.insert(call.clone()); - } - - answer - } - None => { - // This should never happen! If the local was not recognized, it should not have been - // added to the local references. - unreachable!(); - } - } -} - -/// When we get a list of cyclic idents, the first node listed is a matter of chance. -/// This reorders the list such that the first node listed is always alphabetically the lowest, -/// while preserving the overall order of the cycle. -/// -/// Example: the cycle (c ---> a ---> b) becomes (a ---> b ---> c) -pub fn sort_cyclic_idents<'a, I>( - loc_idents: Vec>, - ordered_idents: &mut I, -) -> Vec> -where - I: Iterator, -{ - // Find the first ident in ordered_idents that also appears in loc_idents. - let first_ident = ordered_idents - .find(|ident| { - loc_idents - .iter() - .any(|loc_ident| &&loc_ident.value == ident) - }) - .unwrap(); - - let mut answer = Vec::with_capacity(loc_idents.len()); - let mut end = Vec::with_capacity(loc_idents.len()); - let mut encountered_first_ident = false; - - for loc_ident in loc_idents { - if encountered_first_ident { - answer.push(loc_ident); - } else if &loc_ident.value == first_ident { - encountered_first_ident = true; - - answer.push(loc_ident); - } else { - end.push(loc_ident); - } - } - - // Add the contents of `end` to the end of the answer. - answer.extend_from_slice(end.as_slice()); - - answer -} - -fn references_from_call( - call_symbol: Symbol, - visited: &mut MutSet, - refs_by_assignment: &MutMap, - procedures: &MutMap, -) -> References { - match procedures.get(&call_symbol) { - Some(procedure) => { - let mut answer = procedure.references.clone(); - - visited.insert(call_symbol); - - for closed_over_local in procedure.references.locals.iter() { - if !visited.contains(&closed_over_local) { - let other_refs = references_from_local( - closed_over_local.clone(), - visited, - refs_by_assignment, - procedures, - ); - - answer = answer.union(other_refs); - } - - answer.locals.insert(closed_over_local.clone()); - } - - for call in procedure.references.calls.iter() { - if !visited.contains(&call) { - let other_refs = - references_from_call(call.clone(), visited, refs_by_assignment, procedures); - - answer = answer.union(other_refs); - } - - answer.calls.insert(call.clone()); - } - - answer - } - None => { - // If the call symbol was not in the procedures map, that means we're calling a non-function and - // will get a type mismatch later. For now, assume no references as a result of the "call." - References::new() - } - } -} - -fn idents_from_patterns<'a, I>(loc_patterns: I, scope: &Scope) -> Vec<(Ident, (Symbol, Region))> -where - I: Iterator>, -{ - let mut answer = Vec::new(); - - for loc_pattern in loc_patterns { - add_idents_from_pattern(loc_pattern, scope, &mut answer); - } - - answer -} - -/// helper function for idents_from_patterns -fn add_idents_from_pattern( - loc_pattern: &Located, - scope: &Scope, - answer: &mut Vec<(Ident, (Symbol, Region))>, -) { - use expr::Pattern::*; - - match &loc_pattern.value { - &Identifier(ref name) => { - let symbol = scope.symbol(&name); - - answer.push(( - Ident::Unqualified(name.clone()), - (symbol, loc_pattern.region.clone()), - )); - } - &Variant(_, ref opt_loc_args) => match opt_loc_args { - &None => (), - &Some(ref loc_args) => { - for loc_arg in loc_args.iter() { - add_idents_from_pattern(loc_arg, scope, answer); - } - } - }, - &IntLiteral(_) | &FloatLiteral(_) | &ExactString(_) | &EmptyRecordLiteral | &Underscore => { - () - } - } -} - -fn remove_idents(pattern: expr::Pattern, idents: &mut ImMap) { - use expr::Pattern::*; - - match pattern { - Identifier(name) => { - idents.remove(&(Ident::Unqualified(name))); - } - Variant(_, Some(loc_args)) => { - for loc_arg in loc_args { - remove_idents(loc_arg.value, idents); - } - } - Variant(_, None) - | IntLiteral(_) - | FloatLiteral(_) - | ExactString(_) - | EmptyRecordLiteral - | Underscore => {} - } -} - -/// If it could not be found, return it unchanged as an Err. -#[inline(always)] // This is shared code between Var and InterpolatedStr; it was inlined when handwritten -fn resolve_ident( - env: &Env, - scope: &Scope, - ident: Ident, - references: &mut References, -) -> Result { - if scope.idents.contains_key(&ident) { - let recognized = match ident { - Ident::Unqualified(name) => { - let symbol = scope.symbol(&name); - - references.locals.insert(symbol.clone()); - - symbol - } - Ident::Qualified(path, name) => { - let symbol = Symbol::new(&path, &name); - - references.globals.insert(symbol.clone()); - - symbol - } - }; - - Ok(recognized) - } else { - match ident { - Ident::Unqualified(name) => { - // Try again, this time using the current module as the path. - let qualified = Ident::Qualified(env.home.clone(), name.clone()); - - if scope.idents.contains_key(&qualified) { - let symbol = Symbol::new(&env.home, &name); - - references.globals.insert(symbol.clone()); - - Ok(symbol) - } else { - // We couldn't find the unqualified ident in scope. NAMING PROBLEM! - Err(Ident::Unqualified(name)) - } - } - qualified @ Ident::Qualified(_, _) => { - // We couldn't find the qualified ident in scope. NAMING PROBLEM! - Err(qualified) - } - } - } -} - -/// Translate a VariantName into a resolved symbol if it's found in env.declared_variants. -/// If it could not be found, return it unchanged as an Err. -#[inline(always)] -fn resolve_variant_name( - env: &Env, - variant_name: VariantName, - references: &mut References, -) -> Result { - let symbol = Symbol::from_variant(&variant_name, &env.home); - - if env.variants.contains_key(&symbol) { - references.variants.insert(symbol.clone()); - - Ok(symbol) - } else { - // We couldn't find the qualified variant name in scope. NAMING PROBLEM! - Err(variant_name) - } -} - -/// Different patterns are supported in different circumstances. -/// For example, case branches can pattern match on number literals, but -/// assignments and function args can't. Underscore is supported in function -/// arg patterns and in case branch patterns, but not in assignments. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum PatternType { - Assignment, - FunctionArg, - CaseBranch, -} - -fn canonicalize_pattern( - env: &mut Env, - scope: &mut Scope, - pattern_type: &PatternType, - loc_pattern: &Located, - shadowable_idents: &mut ImMap, -) -> Located { - use expr::Pattern::*; - - let region = loc_pattern.region.clone(); - let pattern = match &loc_pattern.value { - &Identifier(ref name) => { - let unqualified_ident = Ident::Unqualified(name.clone()); - - // We use shadowable_idents for this, and not scope, because for assignments - // they are different. When canonicalizing a particular assignment, that new - // ident is in scope (for recursion) but not shadowable. - // - // For example, when canonicalizing (fibonacci = ...), `fibonacci` should be in scope - // so that it can refer to itself without getting a naming problem, but it should not - // be in the collection of shadowable idents because you can't shadow yourself! - match shadowable_idents.get(&unqualified_ident) { - Some((_, region)) => { - let loc_shadowed_ident = Located { - region: region.clone(), - value: unqualified_ident, - }; - - // This is already in scope, meaning it's about to be shadowed. - // Shadowing is not allowed! - env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); - - // Change this Pattern to a Shadowed variant, so that - // codegen knows to generate a runtime exception here. - Pattern::Shadowed(loc_shadowed_ident) - } - None => { - // Make sure we aren't shadowing something in the home module's scope. - let qualified_ident = - Ident::Qualified(env.home.clone(), unqualified_ident.name()); - - match scope.idents.get(&qualified_ident) { - Some((_, region)) => { - let loc_shadowed_ident = Located { - region: region.clone(), - value: qualified_ident, - }; - - // This is already in scope, meaning it's about to be shadowed. - // Shadowing is not allowed! - env.problem(Problem::Shadowing(loc_shadowed_ident.clone())); - - // Change this Pattern to a Shadowed variant, so that - // codegen knows to generate a runtime exception here. - Pattern::Shadowed(loc_shadowed_ident) - } - None => { - let new_ident = qualified_ident.clone(); - let new_name = qualified_ident.name(); - let symbol = scope.symbol(&new_name); - - // This is a fresh identifier that wasn't already in scope. - // Add it to scope! - let symbol_and_region = (symbol.clone(), region.clone()); - - // Add this to both scope.idents *and* shadowable_idents. - // The latter is relevant when recursively canonicalizing Variant patterns, - // which can bring multiple new idents into scope. For example, it's important - // that we catch (Blah foo foo) as being an example of shadowing. - scope - .idents - .insert(new_ident.clone(), symbol_and_region.clone()); - shadowable_idents.insert(new_ident, symbol_and_region); - - Pattern::Identifier(symbol) - } - } - } - } - } - - &Variant(ref loc_name, ref opt_args) => { - // Canonicalize the variant's arguments, if it has any. - let opt_can_args: Option>> = match opt_args { - None => None, - Some(loc_args) => { - let mut can_args: Vec> = Vec::new(); - - for loc_arg in loc_args { - let loc_can_arg = canonicalize_pattern( - env, - scope, - pattern_type, - &loc_arg, - shadowable_idents, - ); - - can_args.push(loc_can_arg); - } - - Some(can_args) - } - }; - - // Canonicalize the variant's name. - let symbol = Symbol::from_variant(&loc_name.value, &env.home); - - if env.variants.contains_key(&symbol) { - // No problems; the qualified variant name was in scope! - Pattern::Variant(symbol, opt_can_args) - } else { - // We couldn't find the variant name in scope. NAMING PROBLEM! - env.problem(Problem::UnrecognizedVariant(loc_name.clone())); - - Pattern::UnrecognizedVariant(loc_name.clone()) - } - } - - &IntLiteral(ref num) => match pattern_type { - CaseBranch => Pattern::IntLiteral(*num), - ptype @ Assignment | ptype @ FunctionArg => { - unsupported_pattern(env, *ptype, ®ion, &loc_pattern.value) - } - }, - - &FloatLiteral(ref num) => match pattern_type { - CaseBranch => Pattern::FloatLiteral(*num), - ptype @ Assignment | ptype @ FunctionArg => { - unsupported_pattern(env, *ptype, ®ion, &loc_pattern.value) - } - }, - - &ExactString(ref string) => match pattern_type { - CaseBranch => Pattern::ExactString(string.clone()), - ptype @ Assignment | ptype @ FunctionArg => { - unsupported_pattern(env, *ptype, ®ion, &loc_pattern.value) - } - }, - - &Underscore => match pattern_type { - CaseBranch | FunctionArg => Pattern::Underscore, - Assignment => unsupported_pattern(env, Assignment, ®ion, &loc_pattern.value), - }, - - &EmptyRecordLiteral => Pattern::EmptyRecordLiteral, - }; - - Located { - region, - value: pattern, - } -} - -/// When we detect an unsupported pattern type (e.g. 5 = 1 + 2 is unsupported because you can't -/// assign to Int patterns), report it to Env and return an UnsupportedPattern runtime error pattern. -fn unsupported_pattern( - env: &mut Env, - pattern_type: PatternType, - region: &Region, - pattern: &expr::Pattern, -) -> Pattern { - let loc_problem_pattern = Located { - region: region.clone(), - value: pattern.clone(), - }; - - env.problem(Problem::UnsupportedPattern( - pattern_type, - loc_problem_pattern.clone(), - )); - - Pattern::UnsupportedPattern(loc_problem_pattern) -} - -// OPERATOR PRECEDENCE - -// Precedence logic adapted from Gluon by Markus Westerlind, MIT licensed -// https://github.com/gluon-lang/gluon -// Thank you, Markus! -#[derive(Clone, Debug, PartialEq)] -pub enum PrecedenceProblem { - BothNonAssociative(Located, Located), -} - -fn new_op_expr( - left: Box>, - op: Located, - right: Box>, -) -> Located { - let new_region = Region { - start_line: left.region.start_line, - start_col: left.region.start_col, - - end_line: right.region.end_line, - end_col: right.region.end_col, - }; - let new_expr = Expr::Operator(left, op, right); - - Located::new(new_expr, new_region) -} - -/// Reorder the expression tree based on operator precedence and associativity rules. -/// In many languages, this can fail due to (for example) <| and |> having the same -/// precedence but different associativity. Languages which support custom operators with -/// user-defined precedence and associativity (e.g. Haskell) can have many such errors. -/// -/// By design, Roc neither allows custom operators nor has any built-in operators with -/// the same precedence and different associativity, so this operation always succeeds -/// and can never produce any user-facing errors. -fn apply_precedence_and_associativity(env: &mut Env, expr: Located) -> Located { - use self::PrecedenceProblem::*; - - // NOTE: A potentially nice performance optimization here would be to use - // arena bump allocation for Infixes, arg_stack, and op_stack. As long as we - // allocate each element inside arg_stack outside the arena, this should end - // up being a decent bit more efficient. - let mut infixes = Infixes::new(expr); - let mut arg_stack: Vec>> = Vec::new(); - let mut op_stack: Vec> = Vec::new(); - - while let Some(token) = infixes.next() { - match token { - InfixToken::Arg(next_expr) => arg_stack.push(next_expr), - InfixToken::Op(next_op) => { - match op_stack.pop() { - Some(stack_op) => { - match next_op.value.cmp(&stack_op.value) { - Ordering::Less => { - // Inline - let right = arg_stack.pop().unwrap(); - let left = arg_stack.pop().unwrap(); - - infixes.next_op = Some(next_op); - arg_stack.push(Box::new(new_op_expr(left, stack_op, right))); - } - - Ordering::Greater => { - // Swap - op_stack.push(stack_op); - op_stack.push(next_op); - } - - Ordering::Equal => { - match ( - next_op.value.associativity(), - stack_op.value.associativity(), - ) { - (LeftAssociative, LeftAssociative) => { - // Inline - let right = arg_stack.pop().unwrap(); - let left = arg_stack.pop().unwrap(); - - infixes.next_op = Some(next_op); - arg_stack - .push(Box::new(new_op_expr(left, stack_op, right))); - } - - (RightAssociative, RightAssociative) => { - // Swap - op_stack.push(stack_op); - op_stack.push(next_op); - } - - (NonAssociative, NonAssociative) => { - // Both operators were non-associative, e.g. (True == False == False). - // We should tell the author to disambiguate by grouping them with parens. - let problem = BothNonAssociative(next_op.clone(), stack_op); - - env.problem(Problem::PrecedenceProblem(problem.clone())); - - let right = arg_stack.pop().unwrap(); - let left = arg_stack.pop().unwrap(); - let broken_expr = new_op_expr(left, next_op, right); - let region = broken_expr.region.clone(); - let value = - Expr::InvalidPrecedence(problem, Box::new(broken_expr)); - - return Located { region, value }; - } - - _ => { - // The operators had the same precedence but different associativity. - // - // In many languages, this case can happen due to (for example) <| and |> having the same - // precedence but different associativity. Languages which support custom operators with - // (e.g. Haskell) can potentially have arbitrarily many of these cases. - // - // By design, Roc neither allows custom operators nor has any built-in operators with - // the same precedence and different associativity, so this should never happen! - panic!("Operators had the same associativity, but different precedence. This should never happen!"); - } - } - } - } - } - None => op_stack.push(next_op), - }; - } - } - } - - for op in op_stack.into_iter().rev() { - let right = arg_stack.pop().unwrap(); - let left = arg_stack.pop().unwrap(); - - arg_stack.push(Box::new(new_op_expr(left, op, right))); - } - - assert_eq!(arg_stack.len(), 1); - - *arg_stack.pop().unwrap() -} - -#[derive(Debug, Clone, PartialEq)] -enum InfixToken { - Arg(Box>), - Op(Located), -} - -/// An iterator that takes an expression that has had its operators grouped -/// with _right associativity_, and yeilds a sequence of `InfixToken`s. This -/// is useful for reparsing the operators with their correct associativies -/// and precedences. -/// -/// For example, the expression: -/// -/// ```text -/// (1 + (2 ^ (4 * (6 - 8)))) -/// ``` -/// -/// Will result in the following iterations: -/// -/// ```text -/// Arg: 1 -/// Op: + -/// Arg: 2 -/// Op: ^ -/// Arg: 4 -/// Op: * -/// Arg: 6 -/// Op: - -/// Arg: 8 -/// ``` -struct Infixes { - /// The next part of the expression that we need to flatten - remaining_expr: Option>>, - /// Cached operator from a previous iteration - next_op: Option>, -} - -impl Infixes { - fn new(expr: Located) -> Infixes { - Infixes { - remaining_expr: Some(Box::new(expr)), - next_op: None, - } - } -} - -impl Iterator for Infixes { - type Item = InfixToken; - - fn next(&mut self) -> Option { - match self.next_op.take() { - Some(op) => Some(InfixToken::Op(op)), - None => self.remaining_expr.take().map(|boxed_expr| { - let expr = *boxed_expr; - - match expr.value { - Expr::Operator(left, op, right) => { - self.remaining_expr = Some(right); - self.next_op = Some(op); - - InfixToken::Arg(left) - } - _ => InfixToken::Arg(Box::new(expr)), - } - }), - } - } -} diff --git a/src/constrain.rs b/src/constrain.rs index a731cf0877..7ff6e06ef5 100644 --- a/src/constrain.rs +++ b/src/constrain.rs @@ -1,5 +1,7 @@ -use canonicalize::Expr::{self, *}; -use canonicalize::{Pattern, Procedure, Symbol}; +use can::expr::Expr::{self, *}; +use can::pattern::Pattern; +use can::procedure::Procedure; +use can::symbol::Symbol; use collections::ImMap; use operator::{ArgSide, Operator}; use region::{Located, Region}; @@ -93,13 +95,14 @@ fn constrain_op( ) -> Constraint { let op = loc_op.value; let op_types = Type::for_operator(op); - let fn_var = subs.mk_flex_var(); + // TODO use fn_var + let _fn_var = subs.mk_flex_var(); let ret_var = subs.mk_flex_var(); let ret_type = Variable(ret_var); let ret_reason = Reason::OperatorRet(op); let expected_ret_type = ForReason(ret_reason, op_types.ret, region.clone()); - let (l_var, l_con) = constrain_op_arg( + let (_l_var, l_con) = constrain_op_arg( ArgSide::Left, bound_vars, subs, @@ -107,7 +110,7 @@ fn constrain_op( op_types.left, l_loc_expr, ); - let (r_var, r_con) = constrain_op_arg( + let (_r_var, r_con) = constrain_op_arg( ArgSide::Right, bound_vars, subs, @@ -116,8 +119,8 @@ fn constrain_op( r_loc_expr, ); - let vars = vec![fn_var, ret_var, l_var, r_var]; // TODO occurs check! + // let vars = vec![fn_var, ret_var, l_var, r_var]; // return $ exists (funcVar:resultVar:argVars) $ CAnd ... And(vec![ @@ -226,7 +229,7 @@ pub fn constrain_defs( subs: &mut Subs, ret_con: Constraint, ) -> Constraint { - let mut rigid_info = Info::with_capacity(assignments.len()); + let rigid_info = Info::with_capacity(assignments.len()); let mut flex_info = Info::with_capacity(assignments.len()); for (loc_pattern, loc_expr) in assignments { @@ -320,7 +323,7 @@ fn string() -> Type { builtin_type("String", "String", Vec::new()) } -fn num(var: Variable) -> Type { +fn _num(var: Variable) -> Type { builtin_type("Num", "Num", vec![Type::Variable(var)]) } @@ -514,9 +517,9 @@ pub fn constrain_procedure( } struct Args { - vars: Vec, - typ: Type, - ret_type: Type, + pub vars: Vec, + pub typ: Type, + pub ret_type: Type, } fn constrain_args(args: I, subs: &mut Subs, state: &mut PatternState) -> Args @@ -571,7 +574,7 @@ struct PatternState { impl PatternState { pub fn add_pattern(&mut self, loc_pattern: Located, expected: Expected) { - use canonicalize::Pattern::*; + use can::pattern::Pattern::*; let region = loc_pattern.region; diff --git a/src/deprecated/mod.rs b/src/deprecated/mod.rs deleted file mode 100644 index 2e827b4296..0000000000 --- a/src/deprecated/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod parse; -pub mod parse_state; diff --git a/src/deprecated/parse.rs b/src/deprecated/parse.rs deleted file mode 100644 index 6dd784af03..0000000000 --- a/src/deprecated/parse.rs +++ /dev/null @@ -1,1070 +0,0 @@ -use expr::{Expr, Ident, Pattern, VariantName}; -use operator::Operator; -use region::{Located, Region}; - -use deprecated::parse_state::IndentablePosition; -use std::char; - -use combine::error::{Consumed, ParseError}; -use combine::parser::char::{alpha_num, char, digit, hex_digit, spaces, string, HexDigit}; -use combine::parser::combinator::{look_ahead, not_followed_by}; -use combine::parser::item::{any, position, satisfy, satisfy_map, value}; -use combine::parser::repeat::{ - count_min_max, many, sep_by, sep_by1, skip_many, skip_many1, skip_until, -}; -use combine::stream::state::State; -use combine::stream::{Positioned, Stream}; -use combine::{ - attempt, between, choice, eof, many1, optional, parser, unexpected, unexpected_any, Parser, -}; - -pub const ERR_EMPTY_CHAR: &'static str = "EMPTY_CHAR"; - -pub fn parse_string( - string: &str, -) -> Result, combine::easy::Errors> { - let parse_state = State::with_positioner(string, IndentablePosition::default()); - - located(expr()) - .skip(eof()) - .easy_parse(parse_state) - .map(|(expr, _)| expr) -} - -pub fn expr() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - spaces().with(expr_body(0)).skip(whitespace_or_eof()) -} - -fn located(parser: P) -> impl Parser> -where - I: Stream, - I::Error: ParseError, - I: Positioned, - P: Parser, -{ - attempt(position().and(parser)) - .and(position()) - .map(|((start, val), end)| { - Located::new( - val, - Region { - start_line: start.line, - start_col: start.column, - - end_line: end.line, - end_col: end.column, - }, - ) - }) -} - -fn indentation() -> impl Parser -where - I: Stream, - I::Error: ParseError, - I: Positioned, -{ - position().map(|pos: IndentablePosition| pos.indent_col) -} - -fn whitespace_or_eof() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice((whitespace1(), eof().with(value(())))) -} - -fn skipped_whitespace_char() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice(( - char(' ').with(value(())), - char('\n').with(value(())), - block_comment(), - inline_comment(), - )) -} - -fn whitespace() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - skip_many(skipped_whitespace_char()) -} - -fn whitespace1() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - skip_many1(skipped_whitespace_char()) -} - -fn block_comment() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // This uses skip_until to make sure we don't bother saving anything - // until we hit the closing ###, and then uses skip(string("###")) - // to actually consume the closing ###. - attempt( - // 4+ consecutive '#' characters is *not* considered a - // block comment. It's for "drawing horizontal lines" like so: - // ########################################################### - string("###").skip(satisfy(|c| c != '#')), - ) - .with(skip_until(attempt(string("###")))) - .skip(string("###")) -} - -fn inline_comment() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - char('#') - .skip(skip_many(satisfy(|c| c != '\n'))) - .with(value(())) -} - -fn indented_whitespaces(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - skip_many(skipped_indented_whitespace_char(min_indent)) -} - -fn indented_whitespaces1(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - skip_many1(skipped_indented_whitespace_char(min_indent)) -} - -fn skipped_indented_whitespace_char(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice(( - char(' ').with(value(())), - block_comment(), - inline_comment(), - // If we hit a newline, it must be followed by: - // - // - Any number of blank lines (which may contain only spaces) - // - At least min_indent spaces, or else eof() - char('\n') - .skip(skip_many(char('\n').skip(optional(attempt( - skip_many(char(' ')).skip(look_ahead(char('\n'))), - ))))) - .skip(choice(( - many::, _>(char(' ')).then(move |chars| { - if chars.len() < min_indent as usize { - unexpected("outdent").left() - } else { - value(()).right() - } - }), - eof().with(value(())), - ))) - .with(value(())), - )) -} - -/// This is separate from expr_body for the sake of function application, -/// so it can stop parsing when it reaches an operator (since they have -/// higher precedence.) -fn function_arg_expr(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - function_arg_expr_(min_indent) -} - -parser! { - #[inline(always)] - fn function_arg_expr_[I](min_indent_ref: u16)(I) -> Expr - where [ I: Stream ] - { - // TODO figure out why min_indent_ref has the type &mut u16 - let min_indent = *min_indent_ref; - - // Rules for expressions that can go in function arguments: - // - // 1. Don't parse operators, because they have a higher - // precedence than function application. - // 2. Don't parse assignments unless they're wrapped in parens. - // 3. Don't parse variants; those will be handled separately by - // the function arg parser (it only accepts non-applied variants) - // 4. Parse variables but not functions. - choice(( - closure(min_indent), - apply_with_parens(min_indent), - list(min_indent), - string("{}").with(value(Expr::EmptyRecord)), - string_literal(), - int_or_frac_literal(), - negative_int_or_frac_literal(), - char_literal(), - if_expr(min_indent), - case_expr(min_indent), - )) - } -} - -fn expr_body(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - expr_body_(min_indent) -} - -// This macro allows recursive parsers -parser! { - #[inline(always)] - fn expr_body_[I](min_indent_ref: u16)(I) -> Expr - where [ I: Stream ] - { - // TODO figure out why min_indent_ref has the type &mut u16 - let min_indent = *min_indent_ref; - - located(choice(( - function_arg_expr(min_indent), - assignment(min_indent), - apply_variant(min_indent), - func_or_var(min_indent), - ))) - .and( - // Optionally follow the expression with an operator, - // - // e.g. In the expression (1 + 2), the subexpression 1 is - // followed by the operator + and another subexpression, 2 - optional( - attempt( - indented_whitespaces(min_indent) - .with(located(operator())) - .skip(indented_whitespaces(min_indent)) - .and(located(expr_body(min_indent))) - ) - ) - ).map(|(expr1, opt_op)| { - match opt_op { - None => expr1.value, - Some((op, expr2)) => { - Expr::Operator(Box::new(expr1), op, Box::new(expr2)) - }, - } - }) - } -} - -pub fn if_expr(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - attempt(string("if").skip(indented_whitespaces1(min_indent))) - .with(located(expr_body(min_indent))) - .skip(indented_whitespaces1(min_indent)) - .skip(string("then")) - .skip(indented_whitespaces1(min_indent)) - .and(located(expr_body(min_indent))) - .skip(indented_whitespaces1(min_indent)) - .skip(string("else")) - .skip(indented_whitespaces1(min_indent)) - .and(located(expr_body(min_indent))) - .map(|((conditional, then_branch), else_branch)| { - Expr::If( - Box::new(conditional), - Box::new(then_branch), - Box::new(else_branch), - ) - }) -} - -pub fn case_expr(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - attempt(string("case").skip(indented_whitespaces1(min_indent))) - .with(located(expr_body(min_indent))) - .and(many::, _>( - attempt( - skip_many(indented_whitespaces1(min_indent)) - .with(string("when").skip(indented_whitespaces1(min_indent))), - ) - .with(located(pattern(min_indent))) - .skip(indented_whitespaces1(min_indent)) - .skip(string("then")) - .skip(indented_whitespaces1(min_indent)) - .and(located(expr_body(min_indent))), - )) - .map(|(conditional, branches)| { - if branches.is_empty() { - // TODO handle this more gracefully - panic!("encountered case-expression with no branches!") - } else { - Expr::Case(Box::new(conditional), branches) - } - }) -} - -pub fn list(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - char('['), - char(']'), - sep_by( - indented_whitespaces(min_indent) - .with(located(expr_body(min_indent))) - .skip(indented_whitespaces(min_indent)), - char(','), - ), - ) - .map(|loc_elems: Vec>| { - if loc_elems.is_empty() { - Expr::EmptyList - } else { - Expr::List(loc_elems) - } - }) -} - -pub fn apply_with_parens(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - char('('), - char(')'), - indented_whitespaces(min_indent) - .with(located(expr_body(min_indent))) - .skip(indented_whitespaces(min_indent)), - ) - .and( - // Parenthetical expressions can optionally be followed by - // whitespace and one or more whitespace-separated expressions, - // meaning this is function application! - optional(attempt(apply_args(min_indent))), - ) - .map( - |(located_expr, opt_args): (Located, Option>>)| match opt_args { - // If there was nothing after the parens, that's okay; this is still parens, but not application. - None => located_expr.value, - Some(args) => Expr::Apply(Box::new(located_expr), args), - }, - ) -} - -#[inline(always)] -fn function_arg(min_indent: u16) -> impl Parser> -where - I: Stream, - I::Error: ParseError, -{ - located(choice(( - // Don't use apply_with_parens here, because it will think anything following - // this parenthetical expr is an argument *to be passed to the parenthetical expr*. - between( - char('('), - char(')'), - indented_whitespaces(min_indent) - .with(expr_body(min_indent)) - .skip(indented_whitespaces(min_indent)), - ), - // Don't parse operators, because they have a higher - // precedence than function application. If we see one, - // we're done! - function_arg_expr(min_indent), - // Variants can't be applied in function args without parens; - // (foo Bar baz) will pass 2 arguments to foo, rather than parsing like (foo (Bar baz)) - attempt(variant_name()) - .map(|name| Expr::ApplyVariant(VariantName::Unqualified(name), None)), - // Functions can't be called by name in function args without parens; - // (foo bar baz) will pass 2 arguments to foo, rather than parsing like (foo (bar baz)) - attempt(ident()).map(|name| Expr::Var(Ident::Unqualified(name))), - ))) -} - -pub fn apply_args(min_indent: u16) -> impl Parser>> -where - I: Stream, - I::Error: ParseError, -{ - // Function application always begins with whitespace. - attempt( - indented_whitespaces1(min_indent) - .skip( - // If there's a reserved keyword next, this isn't function application after all! - not_followed_by(choice((string("then"), string("else"), string("when")))), - ) - .with( - // Arguments are whitespace-separated. - sep_by1( - function_arg(min_indent), - // Only consume these spaces if there's another argument after them. - // Otherwise we consume too much and mess up indentation checking! - attempt(indented_whitespaces1(min_indent).skip( - // Any of these indicates we've hit the end of the argument list. - not_followed_by(choice(( - string(","), - string(")"), - string("]"), - string("}"), - operator().with(value("")), - string("then"), - string("else"), - string("when"), - eof().with(value("")), - ))), - )), - ), - ), - ) -} - -pub fn operator() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice(( - string("==").map(|_| Operator::Equals), - string("&&").map(|_| Operator::And), - // either < or <= - char('<').with(optional(char('=')).map(|opt_eq| { - if opt_eq.is_none() { - Operator::LessThan - } else { - Operator::LessThanOrEq - } - })), - // either > or >= - char('>').with(optional(char('=')).map(|opt_eq| { - if opt_eq.is_none() { - Operator::GreaterThan - } else { - Operator::GreaterThanOrEq - } - })), - // either || or |> - char('|').with( - char('>') - .map(|_| Operator::Pizza) - .or(char('|').map(|_| Operator::Or)), - ), - // either / or // - char('/').with(optional(char('/')).map(|opt_slash| { - if opt_slash.is_none() { - Operator::Slash - } else { - Operator::DoubleSlash - } - })), - string("~/").map(|_| Operator::TildeSlash), - char('+').map(|_| Operator::Plus), - char('-').map(|_| Operator::Minus), - char('*').map(|_| Operator::Star), - char('/').map(|_| Operator::Slash), - char('^').map(|_| Operator::Caret), - char('%').map(|_| Operator::Percent), - char('<').map(|_| Operator::LessThan), - char('>').map(|_| Operator::GreaterThan), - )) -} - -pub fn nested_assignment( - min_indent: u16, -) -> impl Parser, Located)> -where - I: Stream, - I::Error: ParseError, -{ - attempt( - located(pattern(min_indent)).and(indentation()) - .skip(whitespace()) - .and( - char('=').with(indentation()) - // If the "=" after the identifier turns out to be - // either "==" or "=>" then this is not a declaration! - .skip(not_followed_by(choice((char('='), char('>'))))) - ) - .skip(whitespace()) - .then(move |((var_pattern, original_indent), equals_sign_indent)| { - if original_indent < min_indent { - unexpected_any("this assignment is outdented too far").left() - } else if equals_sign_indent < original_indent /* `<` because '=' should be same indent or greater */ { - unexpected_any("the = in this assignment seems outdented").left() - } else { - located(expr_body(original_indent + 1 /* declaration body must be indented relative to original decl */)) - .skip(whitespace1()) - .and(indentation()) - .then(move |(var_expr, in_expr_indent)| { - if in_expr_indent != original_indent { - unexpected_any("the return expression was indented differently from the original assignment").left() - } else { - value((var_pattern.to_owned(), var_expr)).right() - } - }).right() - } - }) - ) -} - -pub fn assignment(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - attempt( - located(pattern(min_indent)).and(indentation()) - .skip(whitespace()) - .and( - char('=').with(indentation()) - // If the "=" after the identifier turns out to be - // either "==" or "=>" then this is not a declaration! - .skip(not_followed_by(choice((char('='), char('>'))))) - ) - ) - .skip(whitespace()) - .then(move |((first_assignment_pattern, original_indent), equals_sign_indent)| { - if original_indent < min_indent { - unexpected_any("this assignment is outdented too far").left() - } else if equals_sign_indent < original_indent /* `<` because '=' should be same indent or greater */ { - unexpected_any("the = in this assignment seems outdented").left() - } else { - located(expr_body(original_indent + 1 /* declaration body must be indented relative to original decl */)) - .skip(whitespace1()) - // Parse any additional assignments that appear right after this one - .and(many::, _>(nested_assignment(original_indent))) - .and(located(expr_body(original_indent)).and(indentation())) - .then(move |((first_assignment_expr, mut assignments), (in_expr, in_expr_indent))| { - if in_expr_indent != original_indent { - unexpected_any("the return expression was indented differently from the original assignment").left() - } else { - assignments.insert(0, (first_assignment_pattern.clone(), first_assignment_expr)); - - value(Expr::Assign(assignments, Box::new(in_expr))).right() - } - }).right() - } - }) -} - -pub fn func_or_var(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - located(ident()).and(optional(apply_args(min_indent))).map( - |(loc_name, opt_args): (Located, Option>>)| { - // Use optional(sep_by1()) over sep_by() to avoid - // allocating a Vec in the common case where this is a var - match opt_args { - None => Expr::Var(Ident::Unqualified(loc_name.value)), - Some(args) => Expr::Apply( - Box::new(Located { - region: loc_name.region, - value: Expr::Var(Ident::Unqualified(loc_name.value)), - }), - args, - ), - } - }, - ) -} - -/// e.g. \x y => expr -pub fn closure(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - char('\\') - .skip(indented_whitespaces(min_indent)) - .with(sep_by1( - located(pattern(min_indent)), - attempt(many1::, _>( - skipped_indented_whitespace_char(min_indent).skip(not_followed_by(string("->"))), - )), - )) - .skip(indented_whitespaces(min_indent)) - .skip(string("->")) - .skip(indented_whitespaces1(min_indent)) - .and(located(expr_body(min_indent))) - .map(|(patterns, closure_body)| Expr::Closure(patterns, Box::new(closure_body))) -} - -parser! { - #[inline(always)] - fn pattern[I](min_indent_ref: u16)(I) -> Pattern - where [ I: Stream ] - { - let min_indent = *min_indent_ref; - - choice(( - char('_').map(|_| Pattern::Underscore), - string("{}").map(|_| Pattern::EmptyRecordLiteral), - match_variant(min_indent), - int_or_frac_pattern(), // This goes before ident() so number literals aren't mistaken for malformed idents. - ident().map(Pattern::Identifier), - )) - } -} - -pub fn apply_variant(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - attempt(variant_name()) - .and(optional(attempt(apply_args(min_indent)))) - .map(|(name, opt_args): (String, Option>>)| { - Expr::ApplyVariant(VariantName::Unqualified(name), opt_args) - }) -} - -pub fn match_variant(min_indent: u16) -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - attempt(located(variant_name())) - .and(optional(attempt(indented_whitespaces(min_indent).with( - sep_by1( - located(pattern(min_indent)), - attempt(indented_whitespaces1(min_indent).skip(not_followed_by(string("then")))), - ), - )))) - .map( - |(loc_name, opt_args): (Located, Option>>)| - // Use optional(sep_by1()) over sep_by() to avoid - // allocating a Vec in case the variant is empty - Pattern::Variant( - Located { region: loc_name.region, value: VariantName::Unqualified(loc_name.value)}, - opt_args - ), - ) -} - -pub fn variant_name() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // Variants must begin with an uppercase letter, but can have any - // combination of letters or numbers afterwards. - // No underscores, dashes, or apostrophes. - look_ahead(satisfy(|ch: char| ch.is_uppercase())) - .with(many1::, _>(alpha_num())) - .map(|chars| chars.into_iter().collect()) -} - -pub fn ident() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // Identifiers must begin with a lowercase letter, but can have any - // combination of letters or numbers afterwards. - // No underscores, dashes, or apostrophes. - many1::, _>(alpha_num()).then(|chars: Vec| { - let valid_start_char = chars[0].is_lowercase(); - - if valid_start_char { - let ident_str: String = chars.into_iter().collect(); - - match ident_str.as_str() { - "if" => unexpected_any("Reserved keyword `if`").left(), - "then" => unexpected_any("Reserved keyword `then`").left(), - "else" => unexpected_any("Reserved keyword `else`").left(), - "case" => unexpected_any("Reserved keyword `case`").left(), - "when" => unexpected_any("Reserved keyword `when`").left(), - _ => value(ident_str).right(), - } - } else { - unexpected_any("First character in an identifier that was not a lowercase letter") - .left() - } - }) -} - -pub fn string_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - char('"'), - char('"'), - many::)>, _>(choice(( - // Handle the edge cases where the interpolation happens - // to be at the very beginning of the string literal, - // or immediately following the previous interpolation. - attempt(string("\\(")) - .with(value("".to_string())) - .and(located(ident()).skip(char(')'))), - // Parse a bunch of non-interpolated characters until we hit \( - many1::, _>(string_body()) - .map(|chars: Vec| chars.into_iter().collect::()) - .and(choice(( - attempt(string("\\(").with(located(ident()).skip(char(')')))), - // If we never encountered \( then we hit the end of - // the string literal. Use empty Ident here because - // we're going to pop this Ident off the array anyhow. - located(value("".to_string())), - ))), - ))) - .map(|mut pairs| { - match pairs.pop() { - None => Expr::EmptyStr, - Some((trailing_str, located_name)) => { - let mut ident_pairs = pairs - .into_iter() - .map(|(string, located_name)| { - ( - string, - located_name.map(|name| Ident::Unqualified(name.clone())), - ) - }) - .collect::)>>(); - - if located_name.value.is_empty() { - if ident_pairs.is_empty() { - // We didn't find any interpolation at all. This is a string literal! - Expr::Str(trailing_str.to_string()) - } else { - Expr::InterpolatedStr(ident_pairs, trailing_str.to_string()) - } - } else { - // This is an interpolated string where the interpolation - // happened to occur at the very end of the literal. - - // Put the tuple back. - ident_pairs.push(( - trailing_str, - located_name.map(|name| Ident::Unqualified(name.clone())), - )); - - Expr::InterpolatedStr(ident_pairs, "".to_string()) - } - } - } - }), - ) -} - -pub fn char_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between(char('\''), char('\''), char_body().expected(ERR_EMPTY_CHAR)).map(|ch| Expr::Char(ch)) -} - -fn unicode_code_pt() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // You can put up to 6 hex digits inside \u{...} - // e.g. \u{00A0} or \u{101010} - // They must be no more than 10FFFF - let hex_code_pt = - count_min_max::, HexDigit>(1, 6, hex_digit()) - .then(|hex_digits| { - let hex_str:String = hex_digits.into_iter().collect(); - - match u32::from_str_radix(&hex_str, 16) { - Ok(code_pt) => { - if code_pt > 0x10FFFF { - unexpected_any("Invalid Unicode code point. It must be no more than \\u{10FFFF}.").right() - } else { - match char::from_u32(code_pt) { - Some(ch) => value(ch).left(), - None => unexpected_any("Invalid Unicode code point.").right() - } - } - }, - Err(_) => { - unexpected_any("Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F)").right() - } - } - }); - - char('u').with(between(char('{'), char('}'), hex_code_pt)) -} - -fn string_body() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - parser(|input: &mut I| { - let (parsed_char, consumed) = try!(any().parse_lazy(input).into()); - let mut escaped = satisfy_map(|escaped_char| { - // NOTE! When modifying this, revisit char_body too! - // Their implementations are similar but not the same. - match escaped_char { - '"' => Some('"'), - '\\' => Some('\\'), - 't' => Some('\t'), - 'n' => Some('\n'), - 'r' => Some('\r'), - _ => None, - } - }); - - match parsed_char { - '\\' => { - if look_ahead(char('(')).parse_stream(input).is_ok() { - // If we hit a \( then we're doing string interpolation. - // Bail out after consuming the backslash! - Err(Consumed::Empty(I::Error::empty(input.position()).into())) - } else { - consumed.combine(|_| { - // Try to parse basic backslash-escaped literals - // e.g. \t, \n, \r - escaped.parse_stream(input).or_else(|_| - // If we didn't find any of those, try \u{...} - unicode_code_pt().parse_stream(input)) - }) - } - } - '"' => { - // Never consume a double quote unless it was preceded by a - // backslash. This means we're at the end of the string literal! - Err(Consumed::Empty(I::Error::empty(input.position()).into())) - } - _ => Ok((parsed_char, consumed)), - } - }) -} - -fn char_body() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - parser(|input: &mut I| { - let (parsed_char, consumed) = try!(any().parse_lazy(input).into()); - let mut escaped = satisfy_map(|escaped_char| { - // NOTE! When modifying this, revisit string_body too! - // Their implementations are similar but not the same. - match escaped_char { - '\'' => Some('\''), - '\\' => Some('\\'), - 't' => Some('\t'), - 'n' => Some('\n'), - 'r' => Some('\r'), - _ => None, - } - }); - - match parsed_char { - '\\' => { - consumed.combine(|_| { - // Try to parse basic backslash-escaped literals - // e.g. \t, \n, \r - escaped.parse_stream(input).or_else(|_| - // If we didn't find any of those, try \u{...} - unicode_code_pt().parse_stream(input)) - }) - } - '\'' => { - // We should never consume a single quote unless - // it's preceded by a backslash - Err(Consumed::Empty(I::Error::empty(input.position()).into())) - } - _ => Ok((parsed_char, consumed)), - } - }) -} - -pub fn digits_after_decimal() -> impl Parser> -where - I: Stream, - I::Error: ParseError, -{ - // We expect these to be digits, but read any alphanumeric characters - // because it could turn out they're malformed identifiers which - // happen to begin with a number. We'll check for that at the end. - many1::, _>(alpha_num()) -} - -pub fn digits_before_decimal() -> impl Parser> -where - I: Stream, - I::Error: ParseError, -{ - // Digits before the decimal point can be underscore-separated - // e.g. one million can be written as 1_000_000 - many1::, _>(alpha_num().skip(optional(attempt(char('_').skip( - // Don't mistake keywords like `then` and `else` for - // space-separated digits! - not_followed_by(choice((string("then"), string("else"), string("when")))), - ))))) -} - -pub fn negative_int_or_frac_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // Do this lookahead to decide if we should parse this as a number. - // This matters because once we commit to parsing it as a number, - // we may discover non-digit chars, indicating this is actually an - // invalid identifier. (e.g. "523foo" looks like a number, but turns - // out to be an invalid identifier on closer inspection.) - look_ahead(char('-').with(digit())) - .skip(any()) // skip over the minus sign we already know is there - .with(digits_before_decimal()) - .and(optional(char('.').with(digits_after_decimal()))) - .then(|(int_digits, decimals): (Vec, Option>)| { - // TODO check length of digits and make sure not to overflow - let int_str: String = int_digits.into_iter().collect(); - - match ( int_str.parse::(), decimals ) { - (Ok(int_val), None) => { - value(Expr::Int(-int_val as i64)).right() - }, - (Ok(_), Some(nums)) => { - let decimal_str: String = nums.into_iter().collect(); - - match format!("{}.{}", int_str, decimal_str).parse::() { - Ok(float) => { - value(Expr::Float(-float)).right() - }, - Err(_) => { - unexpected_any("looked like a negative Float literal but was actually malformed identifier").left() - } - } - }, - (Err(_), _) => - unexpected_any("looked like a negative number literal but was actually malformed identifier").left() - } - }) -} - -pub fn int_or_frac_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // Confirm that it starts with a digit; otherwise, it's potentially an identifier! - look_ahead(digit()) - .with(digits_before_decimal()) - .and(optional(char('.').with(digits_after_decimal()))) - .then(|(int_digits, decimals): (Vec, Option>)| { - // TODO check length of digits and make sure not to overflow - let int_str: String = int_digits.into_iter().collect(); - - match (int_str.parse::(), decimals) { - (Ok(int_val), None) => value(Expr::Int(int_val as i64)).right(), - (Ok(int_val), Some(nums)) => { - let decimal_str: String = nums.into_iter().collect(); - - match format!("{}.{}", int_str, decimal_str).parse::() { - Ok(float) => value(Expr::Float(float)).right(), - Err(_) => unexpected_any( - "non-digit characters after decimal point in a number literal", - ) - .left(), - } - } - (Err(_), _) => unexpected_any( - "looked like a number literal but was actually malformed identifier", - ) - .left(), - } - }) -} - -/// TODO find a way to remove the code duplication between this and int_or_frac_literal -/// without sacrificing performance. I attempted to do this in 0062e83d03d389f0f07e33e1e7929e77825d774f -/// but couldn't figure out how to address the resulting compiler error, which was: -/// "cannot move out of captured outer variable in an `FnMut` closure" -pub fn int_or_frac_pattern() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - // We expect these to be digits, but read any alphanumeric characters - // because it could turn out they're malformed identifiers which - // happen to begin with a number. We'll check for that at the end. - let digits_after_decimal = many1::, _>(alpha_num()); - - // Digits before the decimal point can be underscore-separated - // e.g. one million can be written as 1_000_000 - let digits_before_decimal = - many1::, _>(alpha_num().skip(optional(attempt(char('_').skip( - // Don't mistake keywords like `then` and `else` for - // space-separated digits! - not_followed_by(choice((string("then"), string("else"), string("when")))), - ))))); - - optional(attempt(char('-'))) - // Do this lookahead to decide if we should parse this as a number. - // This matters because once we commit to parsing it as a number, - // we may discover non-digit chars, indicating this is actually an - // invalid identifier. (e.g. "523foo" looks like a number, but turns - // out to be an invalid identifier on closer inspection.) - .and(look_ahead(digit())) - .and(digits_before_decimal) - .and(optional(char('.').with(digits_after_decimal))) - .then( - |(((opt_minus, _), int_digits), decimals): ( - ((Option, _), Vec), - Option>, - )| { - let is_positive = opt_minus.is_none(); - - // TODO check length of digits and make sure not to overflow - let int_str: String = int_digits.into_iter().collect(); - - match (int_str.parse::(), decimals) { - (Ok(int_val), None) => { - if is_positive { - value(Pattern::IntLiteral(int_val as i64)).right() - } else { - value(Pattern::IntLiteral(-int_val as i64)).right() - } - } - (Ok(int_val), Some(nums)) => { - let decimal_str: String = nums.into_iter().collect(); - - match format!("{}.{}", int_str, decimal_str).parse::() { - Ok(float) => value(Pattern::FloatLiteral(float)).right(), - Err(_) => unexpected_any( - "non-digit characters after decimal point in a number literal", - ) - .left(), - } - } - (Err(_), _) => { - unexpected_any("looked like a number but was actually malformed identifier") - .left() - } - } - }, - ) -} diff --git a/src/deprecated/parse_state.rs b/src/deprecated/parse_state.rs deleted file mode 100644 index 970b9b36fb..0000000000 --- a/src/deprecated/parse_state.rs +++ /dev/null @@ -1,100 +0,0 @@ -use combine::lib::fmt; - -use combine::stream::state::{Positioner, RangePositioner}; -use combine::stream::Resetable; - -// Plan: -// -// 1. Let space parsers check indentation. They should expect indentation to only ever increase (right?) when -// doing a many_whitespaces or many1_whitespaces. Multline strings can have separate whitespace parsers. -// 2. For any expression that has subexpressions (e.g. ifs, parens, operators) record their indentation levels -// by doing .and(position()) followed by .and_then() which says "I can have a declaration inside me as -// long as the entire decl is indented more than me." -// 3. Make an alternative to RangeStreamOnce where uncons_while barfs on \t (or maybe just do this in whitespaces?) - -/// Struct which represents a position in a source file. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] -pub struct IndentablePosition { - /// Current line of the input - pub line: u32, - /// Current column of the input - pub column: u16, - - /// Current indentation level, in columns (so no indent is col 1 - this saves an arithmetic operation.) - pub indent_col: u16, - - // true at the beginning of each line, then false after encountering the first nonspace char. - pub is_indenting: bool, -} - -clone_resetable! { () IndentablePosition } - -impl Default for IndentablePosition { - fn default() -> Self { - IndentablePosition { - line: 1, - column: 1, - indent_col: 1, - is_indenting: true, - } - } -} - -impl fmt::Display for IndentablePosition { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "line: {}, column: {}, indent_col: {}, is_indenting: {}", - self.line, self.column, self.indent_col, self.is_indenting - ) - } -} - -impl IndentablePosition { - pub fn new() -> Self { - IndentablePosition::default() - } -} - -impl Positioner for IndentablePosition { - type Position = IndentablePosition; - - #[inline(always)] - fn position(&self) -> IndentablePosition { - self.clone() - } - - #[inline] - fn update(&mut self, item: &char) { - match *item { - '\n' => { - self.column = 1; - self.line += 1; - self.indent_col = 1; - self.is_indenting = true; - } - ' ' => { - self.column += 1; - } - _ => { - if self.is_indenting { - // As soon as we hit a nonspace char, we're done indenting. - // It doesn't count as an indent until we hit a nonspace character though! - // Until that point it's still a blank line, not an indented one. - self.indent_col = self.column; - self.is_indenting = false; - } - - self.column += 1; - } - } - } -} - -impl<'a> RangePositioner for IndentablePosition { - fn update_range(&mut self, range: &&'a str) { - for c in range.chars() { - self.update(&c); - } - } -} diff --git a/src/eval.rs b/src/eval.rs deleted file mode 100644 index 4b77e881f4..0000000000 --- a/src/eval.rs +++ /dev/null @@ -1,721 +0,0 @@ -use expr::{Expr, Ident, Pattern, VariantName}; -use expr::Pattern::*; -use expr; -use operator::Operator::*; -use operator::Operator; -use std::rc::Rc; -use std::fmt; -use collections::ImMap; -use self::Problem::*; -use fraction::Fraction; -use region::{Located, Region}; - -pub fn eval(expr: Located) -> Evaluated { - scoped_eval(prepare_for_eval(expr), &ImMap::default()) -} - -fn prepare_for_eval(expr: Located) -> Located { - expr::apply_precedence_and_associativity(expr).unwrap().map(&apply_pizza) -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Evaluated { - // Literals - Int(i64), - Frac(Fraction), - Approx(f64), - EmptyStr, - Str(String), - InterpolatedStr(Vec<(String, Ident)>, String), - Char(char), - Closure(Vec, Box>, Scope), - - // Sum Types - ApplyVariant(VariantName, Option>), - - // Product Types - EmptyRecord, - - // Errors - EvalError(Region, Problem) -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Problem { - UnrecognizedVarName(String), - TypeMismatch(String), - ReassignedVarName(String), - WrongArity(u32 /* Expected */, u32 /* Provided */), - NotEqual, // Used when (for example) a string literal pattern match fails - NoBranchesMatched, -} - -type Scope = ImMap>; - -pub fn scoped_eval(expr: Located, vars: &Scope) -> Evaluated { - use self::Evaluated::*; - - let region = expr.region; - - match expr.value { - Expr::Int(num) => Int(num), - Expr::EmptyStr => EmptyStr, - Expr::Str(string) => Str(string), - Expr::Frac(numerator, denominator) => Frac(fraction_from_i64s(numerator, denominator)), - Expr::Approx(num) => Approx(num), - Expr::Char(ch) => Char(ch), - Expr::Closure(args, body) => Closure(args.into_iter().map(|e| e.value).collect(), body, vars.clone()), - Expr::EmptyRecord => EmptyRecord, - - // Resolve variable names - Expr::Var(ident) => { - let ident_name = ident.name(); - - match vars.get(&ident_name) { - Some(resolved) => (**resolved).clone(), - None => EvalError(region, UnrecognizedVarName(ident_name)) - } - } - - Expr::InterpolatedStr(pairs, trailing_str) => { - let mut output = String::new(); - - for (string, loc_ident) in pairs.into_iter() { - let ident_name = loc_ident.value.name(); - - match vars.get(&ident_name) { - Some(resolved) => { - match **resolved { - Str(ref var_string) => { - output.push_str(string.as_str()); - output.push_str(var_string.as_str()); - }, - _ => { - return EvalError(region, TypeMismatch(ident_name)); - } - } - }, - None => { return EvalError(region, UnrecognizedVarName(ident_name)); } - } - } - - output.push_str(trailing_str.as_str()); - - Str(output) - }, - - Expr::Assign(located_pattern, assigned_expr, returned_expr) => { - eval_assign(located_pattern, *assigned_expr, *returned_expr, vars) - } - - Expr::CallByName(ident, args) => { - let ident_name = ident.name(); - - let func_expr = match vars.get(&ident_name) { - Some(resolved) => (**resolved).clone(), - None => EvalError(region, UnrecognizedVarName(ident_name)) - }; - - eval_apply(region, func_expr, args, vars) - }, - - Expr::ApplyVariant(name, None) => ApplyVariant(name, None), - - Expr::ApplyVariant(name, Some(exprs)) => { - ApplyVariant( - name, - Some(exprs.into_iter().map(|arg| scoped_eval(arg, vars)).collect()) - ) - } - - Expr::Apply(func_expr, args) => { - eval_apply(region, scoped_eval(*func_expr, vars), args, vars) - }, - - Expr::Case(condition, branches) => { - eval_case(region, scoped_eval(*condition, vars), branches, vars) - }, - - Expr::Operator(left_arg, op, right_arg) => { - eval_operator( - region, - &scoped_eval(*left_arg, vars), - op.value, - &scoped_eval(*right_arg, vars) - ) - }, - - Expr::If(condition, if_true, if_false) => { - match scoped_eval(*condition, vars) { - ApplyVariant(variant_name, None) => { - match variant_name.name().as_str() { - "True" => scoped_eval(*if_true, vars), - "False" => scoped_eval(*if_false, vars), - _ => EvalError(region, TypeMismatch("non-Bool used in `if` condition".to_string())) - } - }, - _ => EvalError(region, TypeMismatch("non-Bool used in `if` condition".to_string())) - } - } - } -} - -fn eval_assign(pattern: Located, assigned_expr: Located, returned_expr: Located, vars: &Scope) -> Evaluated { - use self::Evaluated::*; - - let pattern_region = pattern.region; - - match pattern.value { - Identifier(ident) => { - let ident_name = ident.name(); - - if vars.contains_key(&ident_name) { - EvalError(pattern_region, ReassignedVarName(ident_name)) - } else { - // Create a new scope containing the new declaration. - let mut new_vars = vars.clone(); - let evaluated_defn = scoped_eval(assigned_expr, vars); - - new_vars.insert(ident_name, Rc::new(evaluated_defn)); - - // Evaluate in_expr with that new scope's variables. - scoped_eval(returned_expr, &new_vars) - } - }, - - Integer(_) => { - panic!("You cannot assign integers to other values!"); - }, - - Fraction(_, _) => { - panic!("You cannot assign fractions to other values!"); - }, - - Variant(_name, _patterns) => { - panic!("Pattern matching on variants is not yet supported!"); - }, - - Underscore => { - panic!("Cannot assign to the _ pattern!"); - }, - - Pattern::EmptyRecordLiteral => { - panic!("Cannot assign to the {} pattern!"); - }, - } -} - -#[inline(always)] -pub fn call(region: Region, evaluated: Evaluated, args: Vec>) -> Evaluated { - eval_apply(region, evaluated, args, &HashMap::new()) -} - -#[inline(always)] -fn eval_apply(region: Region, evaluated: Evaluated, args: Vec>, vars: &Scope) -> Evaluated { - use self::Evaluated::*; - - match evaluated { - Closure(arg_patterns, body, closure_vars) => { - let combined_vars = vars.clone().union(closure_vars); - let evaluated_args = - args.into_iter() - .map(|arg| scoped_eval(arg, &combined_vars)) - .collect(); - - match eval_closure(evaluated_args, arg_patterns, &combined_vars) { - Ok(new_vars) => scoped_eval(*body, &new_vars), - Err(prob) => EvalError(region, prob) - } - }, - val => { - EvalError(region, TypeMismatch(format!("Tried to call a non-function: {}", val))) - } - } -} - -#[inline(always)] -fn eval_closure(args: Vec, arg_patterns: Vec, vars: &Scope) - -> Result -{ - if arg_patterns.len() == args.len() { - // Create a new scope for the function to use. - let mut new_vars = vars.clone(); - - for ( arg, pattern ) in args.into_iter().zip(arg_patterns) { - pattern_match(&arg, &pattern, &mut new_vars)?; - } - - Ok(new_vars) - } else { - Err(WrongArity(arg_patterns.len() as u32, args.len() as u32)) - } -} - -fn bool_variant(is_true: bool) -> Evaluated { - if is_true { - Evaluated::ApplyVariant(VariantName::Unqualified("True".to_string()), None) - } else { - Evaluated::ApplyVariant(VariantName::Unqualified("False".to_string()), None) - } -} - -fn eq(region: Region, evaluated1: &Evaluated, evaluated2: &Evaluated) -> Evaluated { - use self::Evaluated::*; - - match (evaluated1, evaluated2) { - // All functions are defined as equal - (Closure(_, _, _), Closure(_, _, _)) => bool_variant(true), - - (ApplyVariant(left, None), ApplyVariant(right, None)) => { - bool_variant(left == right) - }, - - (ApplyVariant(left, Some(left_args)), ApplyVariant(right, Some(right_args))) => { - bool_variant(left == right && left_args.len() == right_args.len()) - }, - - (ApplyVariant(_, None), ApplyVariant(_, Some(_))) => { - bool_variant(false) - }, - - (ApplyVariant(_, Some(_)), ApplyVariant(_, None)) => { - bool_variant(false) - }, - - (Int(left), Int(right)) => bool_variant(left == right), - (Str(left), Str(right)) => bool_variant(left == right), - (Char(left), Char(right)) => bool_variant(left == right), - (Frac(left), Frac(right)) => bool_variant(left == right), - - (_, _) => EvalError(region, TypeMismatch("tried to use == on two values with incompatible types".to_string())), - } -} - -fn bool_from_variant_name(name: &VariantName) -> Option { - match name.clone().name().as_str() { - "True" => Some(true), - "False" => Some(false), - _ => None - } -} - -#[inline(always)] -fn eval_operator(region: Region, left_expr: &Evaluated, op: Operator, right_expr: &Evaluated) -> Evaluated { - use self::Evaluated::*; - - // TODO in the future, replace these with named function calls to stdlib - match (left_expr, op, right_expr) { - // Equals - (_, Equals, _) => eq(region, left_expr, right_expr), - - // And - (ApplyVariant(left_name, None), And, ApplyVariant(right_name, None)) => { - match (bool_from_variant_name(left_name), bool_from_variant_name(right_name)) { - (Some(left_bool), Some(right_bool)) => bool_variant(left_bool && right_bool), - _ => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())), - } - } - (_, And, _) => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())), - - // Or - (ApplyVariant(left_name, None), Or, ApplyVariant(right_name, None)) => { - match (bool_from_variant_name(left_name), bool_from_variant_name(right_name)) { - (Some(left_bool), Some(right_bool)) => bool_variant(left_bool || right_bool), - _ => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())), - } - } - (_, Or, _) => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())), - - // LessThan - (Int(left_num), LessThan, Int(right_num)) => bool_variant(left_num < right_num), - (Frac(left_num), LessThan, Frac(right_num)) => bool_variant(left_num < right_num), - (Int(_), LessThan, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac < Int. Explicitly convert them to the same type first!".to_string())), - (Frac(_), LessThan, Int(_)) => EvalError(region,TypeMismatch("tried check Int < Frac. Explicitly convert them to the same type first!".to_string())), - (_, LessThan, _) => EvalError(region, TypeMismatch("tried to check if one non-number < another non-number".to_string())), - - // LessThanOrEq - (Int(left_num), LessThanOrEq, Int(right_num)) => bool_variant(left_num <= right_num), - (Frac(left_num), LessThanOrEq, Frac(right_num)) => bool_variant(left_num <= right_num), - (Int(_), LessThanOrEq, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac <= Int. Explicitly convert them to the same type first!".to_string())), - (Frac(_), LessThanOrEq, Int(_)) => EvalError(region, TypeMismatch("tried check Int <= Frac. Explicitly convert them to the same type first!".to_string())), - (_, LessThanOrEq, _) => EvalError(region, TypeMismatch("tried to check if one non-number <= another non-number".to_string())), - - // GreaterThan - (Int(left_num), GreaterThan, Int(right_num)) => bool_variant(left_num > right_num), - (Frac(left_num), GreaterThan, Frac(right_num)) => bool_variant(left_num > right_num), - (Int(_), GreaterThan, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac > Int. Explicitly convert them to the same type first!".to_string())), - (Frac(_), GreaterThan, Int(_)) => EvalError(region, TypeMismatch("tried check Int > Frac. Explicitly convert them to the same type first!".to_string())), - (_, GreaterThan, _) => EvalError(region, TypeMismatch("tried to check if one non-number > another non-number".to_string())), - - // GreaterThanOrEq - (Int(left_num), GreaterThanOrEq, Int(right_num)) => bool_variant(left_num >= right_num), - (Frac(left_num), GreaterThanOrEq, Frac(right_num)) => bool_variant(left_num >= right_num), - (Int(_), GreaterThanOrEq, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac >= Int. Explicitly convert them to the same type first!".to_string())), - (Frac(_), GreaterThanOrEq, Int(_)) => EvalError(region, TypeMismatch("tried check Int >= Frac. Explicitly convert them to the same type first!".to_string())), - (_, GreaterThanOrEq, _) => EvalError(region, TypeMismatch("tried to check if one non-number >= another non-number".to_string())), - - // Plus - (Int(left_num), Plus, Int(right_num)) => Int(left_num.checked_add(*right_num).unwrap_or_else(|| panic!("Integer overflow on +"))), - (Frac(left_num), Plus, Frac(right_num)) => Frac(left_num + right_num), - - (Int(_), Plus, Frac(_)) => EvalError(region, TypeMismatch("tried to add Frac to Int. Explicitly convert them to the same type first!".to_string())), - - (Frac(_), Plus, Int(_)) => EvalError(region, TypeMismatch("tried to add Int to Frac. Explicitly convert them to the same type first!".to_string())), - - (_, Plus, _) => EvalError(region, TypeMismatch("tried to add non-numbers".to_string())), - - // Star - (Int(left_num), Star, Int(right_num)) => Int(left_num.checked_mul(*right_num).unwrap_or_else(|| panic!("Integer overflow on *"))), - (Frac(left_num), Star, Frac(right_num)) => Frac(left_num * right_num), - - (Int(_), Star, Frac(_)) => EvalError(region, TypeMismatch("tried to multiply Int by Frac. Explicitly convert them to the same type first!".to_string())), - - (Frac(_), Star, Int(_)) => EvalError(region, TypeMismatch("tried to multiply Frac by Int. Explicitly convert them to the same type first!".to_string())), - - (_, Star, _) => EvalError(region, TypeMismatch("tried to multiply non-numbers".to_string())), - - // Minus - (Int(left_num), Minus, Int(right_num)) => Int(left_num.checked_sub(*right_num).unwrap_or_else(|| panic!("Integer underflow on -"))), - (Frac(left_num), Minus, Frac(right_num)) => Frac(left_num - right_num), - - (Int(_), Minus, Frac(_)) => EvalError(region, TypeMismatch("tried to subtract Frac from Int. Explicitly convert them to the same type first!".to_string())), - - (Frac(_), Minus, Int(_)) => EvalError(region, TypeMismatch("tried to subtract Int from Frac. Explicitly convert them to the same type first!".to_string())), - - (_, Minus, _) => EvalError(region, TypeMismatch("tried to subtract non-numbers".to_string())), - - // Caret - (Int(left_num), Caret, Int(right_num)) => Int(left_num.checked_pow(*right_num as u32 /* TODO panic if this cast fails */).unwrap_or_else(|| panic!("Integer underflow on ^"))), - (Frac(_), Caret, Frac(_)) => EvalError(region, TypeMismatch("tried to use ^ with a Frac, which is not yet supported on either side of the ^ operator.".to_string())), - - (_, Caret, _) => EvalError(region, TypeMismatch("tried to use ^ on non-numbers".to_string())), - - // Slash - (Frac(left_num), Slash, Frac(right_num)) => { - let answer = left_num / right_num; - - if answer.is_finite() { - ok_variant(Frac(answer)) - } else { - err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None)) - } - }, - - (Int(_), Slash, Int(_)) => EvalError(region, TypeMismatch("tried to divide two Int values. Explicitly convert them to Frac values, or use Int division (the // operator).".to_string())), - (Approx(_), Slash, Approx(_)) => EvalError(region, TypeMismatch("tried to divide two Approx values. Explicitly convert them to Frac values, or use Approx division (the ~/ operator).".to_string())), - (Int(_), Slash, Frac(_)) => EvalError(region, TypeMismatch("tried to divide Int by Frac. Explicitly convert them to the same type first!".to_string())), - (Frac(_), Slash, Int(_)) => EvalError(region, TypeMismatch("tried to divide Frac by Int. Explicitly convert them to the same type first!".to_string())), - - (_, Slash, _) => EvalError(region, TypeMismatch("tried to divide non-numbers".to_string())), - - // DoubleSlash - (Int(left_num), DoubleSlash, Int(right_num)) => Int(left_num / right_num), - - (Approx(_), DoubleSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do integer division on two Approx values. Explicitly convert them to Int values, or use Approx division (the ~/ operator).".to_string())), - (Frac(_), DoubleSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to do integer division on two Frac values. Explicitly conver them to Int values, or use Frac division (the / operator).".to_string())), - (Int(_), DoubleSlash, Frac(_)) => EvalError(region,TypeMismatch("tried to integer-divide Int by Frac".to_string())), - (Frac(_), DoubleSlash, Int(_)) => EvalError(region, TypeMismatch("tried to integer-divide Frac by Int".to_string())), - - (_, DoubleSlash, _) => EvalError(region, TypeMismatch("tried to do integer division on two non-numbers".to_string())), - - // TildeSlash - (Approx(left_num), TildeSlash, Approx(right_num)) => { - let answer = left_num / right_num; - - if answer.is_finite() { - ok_variant(Approx(answer)) - } else { - err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None)) - } - }, - - (Int(_), TildeSlash, Int(_)) => EvalError(region, TypeMismatch("tried to do Approx division on two Int values. Explicitly convert them to Approx values, or use Int division (the // operator).".to_string())), - (Frac(_), TildeSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to do Approx division on two Frac values. Explicitly conver them to Approx values, or use Frac division (the / operator).".to_string())), - (Int(_), TildeSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do Int ~/ Approx. Explicitly convert both to Approx first!".to_string())), - (Frac(_), TildeSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do Frac ~/ Approx. Explicitly convert both to Approx first!".to_string())), - - (Approx(_), TildeSlash, Int(_)) => EvalError(region, TypeMismatch("tried to divide Approx ~/ Int. Explicitly convert both to Approx first!".to_string())), - (Approx(_), TildeSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to divide Approx ~/ Frac. Explicitly convert both to Approx first!".to_string())), - - (_, TildeSlash, _) => EvalError(region, TypeMismatch("tried to divide non-numbers".to_string())), - - // Percent - (Int(left_num), Percent, Int(right_num)) => Int(left_num % right_num), - (Frac(left_num), Percent, Frac(right_num)) => { - let answer = left_num % right_num; - - if answer.is_finite() { - ok_variant(Frac(answer)) - } else { - err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None)) - } - }, - - (Int(_), Percent, Frac(_)) => EvalError(region, TypeMismatch("tried to do Int % Frac. Explicitly convert them to the same type first!".to_string())), - - (Frac(_), Percent, Int(_)) => EvalError(region, TypeMismatch("tried to do Frac % Int. Explicitly convert them to the same type first!".to_string())), - - (_, Percent, _) => EvalError(region, TypeMismatch("tried to use % on non-numbers".to_string())), - - // Pizza - (_, Pizza, _) => { panic!("There was a |> operator that hadn't been removed prior to eval time. This should never happen!"); } - } -} - -#[inline(always)] -fn eval_case(region: Region, evaluated: Evaluated, branches: Vec<(Located, Located)>, vars: &Scope) -> Evaluated { - use self::Evaluated::*; - - for (pattern, definition) in branches { - let mut branch_vars = vars.clone(); - - if pattern_match(&evaluated, &pattern.value, &mut branch_vars).is_ok() { - return scoped_eval(definition, &branch_vars); - } - } - - EvalError(region, NoBranchesMatched) -} - -fn pattern_match(evaluated: &Evaluated, pattern: &Pattern, vars: &mut Scope) -> Result<(), Problem> { - use self::Evaluated::*; - - match pattern { - Identifier(name) => { - vars.insert(name.clone().name(), Rc::new(evaluated.clone())); - - Ok(()) - }, - Underscore => { - // Underscore matches anything, and records no new vars. - Ok(()) - }, - EmptyRecordLiteral => { - match evaluated { - EmptyRecord => Ok(()), - expr => Err(TypeMismatch( - format!("Wanted a `{}`, but was given `{}`.", "{}", expr) - )) - } - }, - - Integer(pattern_num) => { - match evaluated { - Int(evaluated_num) => { - if *pattern_num == *evaluated_num { - Ok(()) - } else { - Err(Problem::NotEqual) - } - }, - - expr => Err(TypeMismatch( - format!("Wanted a `{}`, but was given `{}`.", "{}", expr) - )) - } - }, - - Fraction(numerator, denominator) => { - match evaluated { - Frac(actual_frac) => { - let expected_frac = fraction_from_i64s(*numerator, *denominator); - - if expected_frac == *actual_frac { - Ok(()) - } else { - Err(Problem::NotEqual) - } - }, - - expr => Err(TypeMismatch( - format!("Wanted a `{}`, but was given `{}`.", "{}", expr) - )) - } - } - - Variant(pattern_variant_name, opt_pattern_contents) => { - match evaluated { - ApplyVariant(applied_variant_name, opt_applied_contents) => { - if *pattern_variant_name != *applied_variant_name { - return Err(TypeMismatch( - format!("Wanted a `{}` variant, but was given a `{}` variant.", - pattern_variant_name, - applied_variant_name - ) - ) - ); - } - - match (opt_pattern_contents, opt_applied_contents) { - ( Some(ref pattern_contents), Some(applied_contents) ) => { - if pattern_contents.len() == applied_contents.len() { - // Recursively pattern match - for ( pattern_val, applied_val ) in pattern_contents.into_iter().zip(applied_contents) { - pattern_match(applied_val, &pattern_val.value, vars)?; - } - - Ok(()) - } else { - Err(WrongArity( - pattern_contents.len() as u32, - applied_contents.len() as u32 - ) - ) - } - }, - ( None, None ) => { - // It's the variant we expected, but it has no values in it, - // so we don't insert anything into vars. - Ok(()) - }, - ( None, Some(contents) ) => { - // It's the variant we expected, but the arity is wrong. - Err(WrongArity(contents.len() as u32, 0)) - }, - ( Some(patterns), None ) => { - // It's the variant we expected, but the arity is wrong. - Err(WrongArity(0, patterns.len() as u32)) - }, - } - }, - _ => { - Err(TypeMismatch(format!("Wanted to destructure a `{}` variant, but was given a non-variant.", pattern_variant_name))) - } - } - } - } -} - -impl fmt::Display for Evaluated { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use self::Evaluated::*; - - match self { - // PRIMITIVES - Int(num) => write!(f, "{}", *num), - Frac(fraction) => { - let numerator = *fraction.numer().unwrap(); - let denominator = *fraction.denom().unwrap(); - - if denominator == 10 { - write!(f, "{}", (numerator as f64 / 10.0)) - } else { - write!(f, "{}/{}", numerator, denominator) - } - }, - Approx(num) => write!(f, "~{}", *num), - Str(string) => { - let escaped_str = - (*string) - .replace("\\", "\\\\") - .replace("\"", "\\\"") - .replace("\t", "\\t") - .replace("\n", "\\n") - .replace("\r", "\\r"); - - write!(f, "\"{}\"", escaped_str) - }, - Char(ch) => write!(f, "'{}'", *ch), - Closure(args, _, _) => write!(f, "<{}-argument function>", args.len()), - ApplyVariant(name, opt_exprs) => { - match opt_exprs { - None => write!(f, "{}", name.clone().name()), - Some(exprs) => { - let contents = - exprs.into_iter() - .map(|expr| format!(" {}", expr)) - .collect::>() - .join(","); - - write!(f, "{}{}", name.clone().name(), contents) - } - } - }, - - // ERRORS - EvalError(region, problem) => write!(f, "ERROR: {} at {}", format!("{}", problem), format!("line {}, column {}", region.start_line, region.start_col)), - - // UNFORMATTED - _ => write!(f, "") - } - } -} - -impl fmt::Display for Problem { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Problem::UnrecognizedVarName(name) => write!(f, "Unrecognized var name `{}`", name), - Problem::NoBranchesMatched => write!(f, "No branches matched in this case-expression"), - Problem::TypeMismatch(info) => write!(f, "Type Mismatch - {}", info), - Problem::ReassignedVarName(name) => write!(f, "Reassigned constant - {}", name), - Problem::NotEqual => write!(f, "Pattern match on literal value failed; the branch wasn't equal."), - Problem::WrongArity(expected_arity, provided_arity) => { - if provided_arity > expected_arity { - write!(f, "Too many arguments! Needed {} arguments, but got {}", expected_arity, provided_arity) - } else { - write!(f, "Missing arguments! Needed {} arguments, but got {}", expected_arity, provided_arity) - } - } - } - } -} - -fn ok_variant(contents: Evaluated) -> Evaluated{ - Evaluated::ApplyVariant(VariantName::Unqualified("Ok".to_string()), Some(vec![contents])) -} - -fn err_variant(contents: Evaluated) -> Evaluated { - Evaluated::ApplyVariant(VariantName::Unqualified("Err".to_string()), Some(vec![contents])) -} - -fn fraction_from_i64s(numerator: i64, denominator: i64) -> Fraction { - if numerator.is_negative() { - Fraction::new_neg(numerator as u64, denominator as u64) - } else { - Fraction::new(numerator as u64, denominator as u64) - } -} - -fn apply_pizza(expr: &Expr) -> Expr { - use expr::Expr::*; - - expr.walk(&|sub_expr| { - // TODO can we avoid cloning here somehow, without resorting to a macro? - match sub_expr.clone() { - Operator(boxed_loc_left, loc_op, boxed_loc_right) => { - let loc_left = *boxed_loc_left; - let loc_right = *boxed_loc_right; - let left_region = loc_left.region; - let right_region = loc_left.region; - let op_region = loc_op.region; - - match ( loc_left.value, loc_op.value, loc_right.value ) { - (left_arg, Pizza, Expr::Var(name)) => { - Expr::CallByName( - name, - vec![Located { region: left_region, value: left_arg }] - ) - }, - (left_arg, Pizza, Expr::CallByName(name, mut args)) => { - args.push(Located { region: left_region, value: left_arg }); - - CallByName(name, args) - }, - (left_arg, Pizza, Expr::Apply(applied_expr, mut args)) => { - args.push(Located { region: left_region, value: left_arg }); - - Apply(applied_expr, args) - }, - (left, op, right) => { - Operator( - Box::new(Located { region: left_region, value: left }), - Located { region: op_region, value: op }, - Box::new(Located { region: right_region, value: right }), - ) - } - } - }, - other => other - } - }) -} diff --git a/src/expr.rs b/src/expr.rs index c8e2c9878c..0c4299aea7 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -34,58 +34,6 @@ pub enum Expr { Operator(Box>, Located, Box>), } -/// A variant name, possibly fully-qualified with a module name -/// e.g. (Result.Ok) -/// Parameterized on a phantom marker for whether it has been canonicalized -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum VariantName { - Unqualified(String), - Qualified(String, String), -} - -/// An identifier, possibly fully-qualified with a module name -/// e.g. (Http.Request from http) -/// Parameterized on a phantom marker for whether it has been canonicalized -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum Ident { - Unqualified(String), - Qualified(String, String), -} - -impl Ident { - pub fn is_qualified(&self) -> bool { - match &self { - &Ident::Unqualified(_) => false, - &Ident::Qualified(_, _) => true, - } - } - - pub fn name(self) -> String { - match self { - Ident::Unqualified(name) => name, - Ident::Qualified(_, name) => name, - } - } -} - -impl fmt::Display for Ident { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Ident::Unqualified(name) => write!(f, "{}", name), - Ident::Qualified(path, name) => write!(f, "{}.{}", path, name), - } - } -} - -impl fmt::Display for VariantName { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VariantName::Unqualified(name) => write!(f, "{}", name), - VariantName::Qualified(path, name) => write!(f, "{}.{}", path, name), - } - } -} - #[derive(Clone, Debug, PartialEq)] pub enum Pattern { Identifier(String), diff --git a/src/ident.rs b/src/ident.rs new file mode 100644 index 0000000000..c5d53ab270 --- /dev/null +++ b/src/ident.rs @@ -0,0 +1,53 @@ +use std::fmt::{self, Display, Formatter}; + +/// A variant name, possibly fully-qualified with a module name +/// e.g. (Result.Ok) +/// Parameterized on a phantom marker for whether it has been canonicalized +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum VariantName { + Unqualified(String), + Qualified(String, String), +} + +/// An identifier, possibly fully-qualified with a module name +/// e.g. (Http.Request from http) +/// Parameterized on a phantom marker for whether it has been canonicalized +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum Ident { + Unqualified(String), + Qualified(String, String), +} + +impl Ident { + pub fn is_qualified(&self) -> bool { + match &self { + &Ident::Unqualified(_) => false, + &Ident::Qualified(_, _) => true, + } + } + + pub fn name(self) -> String { + match self { + Ident::Unqualified(name) => name, + Ident::Qualified(_, name) => name, + } + } +} + +impl Display for Ident { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Ident::Unqualified(name) => write!(f, "{}", name), + Ident::Qualified(path, name) => write!(f, "{}.{}", path, name), + } + } +} + +impl Display for VariantName { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + VariantName::Unqualified(name) => write!(f, "{}", name), + VariantName::Qualified(path, name) => write!(f, "{}.{}", path, name), + } + } +} diff --git a/src/infer.rs b/src/infer.rs index 56d3c2c15f..df77541f11 100644 --- a/src/infer.rs +++ b/src/infer.rs @@ -1,4 +1,6 @@ -use canonicalize::{Expr, Procedure, Symbol}; +use can::expr::Expr; +use can::procedure::Procedure; +use can::symbol::Symbol; use collections::{ImMap, MutMap}; use constrain::{constrain, constrain_procedure}; use region::Located; diff --git a/src/lib.rs b/src/lib.rs index c8ff38ecb8..0b2c7dc028 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,12 @@ -// pub mod eval; -pub mod canonicalize; +pub mod can; pub mod collections; -pub mod deprecated; -pub mod expr; pub mod graph; +pub mod ident; pub mod operator; -pub mod parse; // DEPRECATED +pub mod parse; pub mod region; -// pub mod string; +pub mod string; pub mod constrain; pub mod ena; @@ -25,7 +23,5 @@ extern crate fxhash; extern crate im_rc; extern crate num; -#[macro_use] -extern crate combine; // OBSOLETE #[macro_use] extern crate log; diff --git a/src/module.rs b/src/module.rs deleted file mode 100644 index d4312ceea0..0000000000 --- a/src/module.rs +++ /dev/null @@ -1,15 +0,0 @@ -use expr::{Pattern, Ident}; - -pub struct Module { - name: Ident, - exposes: Vec, - uses: Vec, - decls: Vec, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Decl { - Assign(Pattern, Box, Box), - // TODO Alias - // TODO SumType -} diff --git a/src/parse/ast.rs b/src/parse/ast.rs index daa9f660c0..d7c282123f 100644 --- a/src/parse/ast.rs +++ b/src/parse/ast.rs @@ -1,9 +1,8 @@ +use bumpalo::collections::vec::Vec; use operator::Operator; -use parse::problems::Problem; use region::Loc; use std::fmt::{self, Display, Formatter}; -pub type Ident = str; pub type VariantName = str; /// A parsed expression. This uses lifetimes extensively for two reasons: @@ -23,50 +22,45 @@ pub type VariantName = str; #[derive(Clone, Debug, PartialEq)] pub enum Expr<'a> { // Number Literals - Int(i64), - Float(f64), + Float(&'a str), + Int(&'a str), + HexInt(&'a str), + OctalInt(&'a str), + BinaryInt(&'a str), // String Literals EmptyStr, Str(&'a str), - /// basically InterpolatedStr(Vec<(String, Loc)>, String) - InterpolatedStr(&'a (&'a [(&'a str, Loc>)], &'a str)), + BlockStr(&'a [&'a str]), // List literals EmptyList, - List(&'a [Loc>]), + List(Vec<'a, Loc>>), + // // Lookups + // Var(&'a str), - // Lookups - Var(&'a Ident), + // // Pattern Matching + // Case(&'a (Loc>, [(Loc>, Loc>)])), + // Closure(&'a (&'a [Loc>], Loc>)), + // /// basically Assign(Vec<(Loc, Loc)>, Loc) + // Assign(&'a (&'a [(Loc>, Loc>)], Loc>)), - // Pattern Matching - Case(&'a (Loc>, [(Loc>, Loc>)])), - Closure(&'a (&'a [Loc>], Loc>)), - /// basically Assign(Vec<(Loc, Loc)>, Loc) - Assign(&'a (&'a [(Loc>, Loc>)], Loc>)), - - // Application - Call(&'a (Loc>, [Loc>])), - ApplyVariant(&'a (&'a VariantName, [Loc>])), - Variant(&'a VariantName), + // // Application + // Call(&'a (Loc>, [Loc>])), + // ApplyVariant(&'a (&'a VariantName, [Loc>])), + // Variant(&'a VariantName), // Product Types EmptyRecord, - - // Sugar - If(&'a (Loc>, Loc>, Loc>)), + // // Sugar + // If(&'a (Loc>, Loc>, Loc>)), Operator(&'a (Loc>, Loc, Loc>)), - - // Runtime errors - MalformedStr(Box<[Loc]>), - MalformedInt(Problem), - MalformedFloat(Problem), } #[derive(Clone, Debug, PartialEq)] pub enum Pattern<'a> { // Identifier - Identifier(&'a Ident), + Identifier(&'a str), // Variant Variant(&'a VariantName), @@ -82,33 +76,35 @@ pub enum Pattern<'a> { #[test] fn expr_size() { - // The size of the Expr data structure should be exactly 3 machine words. + // The size of the Expr data structure should be exactly 5 machine words. // This test helps avoid regressions wich accidentally increase its size! - // - // Worth noting that going up to 4 machine words is probably not a big deal; - // an 8-byte cache line will only fit 2 of these regardless. assert_eq!( std::mem::size_of::(), // TODO [move this comment to an issue] We should be able to get this // down to 2, which would mean we could fit 4 of these nodes in a single - // 64-byte cache line instead of only being able to fit 2. + // 64-byte cache line instead of only being able to fit 1. // // Doing this would require, among other things: // 1. Making a str replacement where the length is stored as u32 instead of usize, // to leave room for the tagged union's u8 tag. // (Alternatively could store it as (&'a &'a str), but ew.) - // 2. Figuring out why &'a (Foo, Bar) by default takes up 24 bytes in Rust. + // 2. Similarly, making a slice replacement like that str replacement, and + // also where it doesn't share the bytes with anything else - so its + // elements can be consumed without having to clone them (unlike a slice). + // That's the only reason we're using Vec right now instead of slices - + // if we used slices, we'd have to clone their elements during canonicalization + // just to iterate over them and canonicalize them normally. + // 3. Figuring out why (&'a (Foo, Bar)) by default takes up 24 bytes in Rust. // I assume it's because the struct is being stored inline instead of // as a pointer, but in this case we actually do want the pointer! // We want to have the lifetime and we want to avoid using the unsafe keyword, // but we also want this to only store 1 pointer in the AST node. // Hopefully there's a way! // - // It's also possible that going up to 4 machine words might yield even - // better performance, due to more data structures being inlinable, - // and therefore having fewer pointers to chase. This seems worth - // investigating as well. - std::mem::size_of::() * 3 + // It's also possible that 4 machine words might yield better performance + // than 2, due to more data structures being inlinable, and therefore + // having fewer pointers to chase. This seems worth investigating as well. + std::mem::size_of::() * 5 ); } @@ -151,6 +147,7 @@ pub enum Attempting { List, Keyword, StringLiteral, + RecordLiteral, InterpolatedString, NumberLiteral, UnicodeEscape, @@ -165,7 +162,15 @@ impl<'a> Display for Expr<'a> { match self { EmptyStr => write!(f, "\"\""), - _ => panic!("TODO"), + Str(string) => write!(f, "\"{}\"", string), + BlockStr(lines) => write!(f, "\"\"\"{}\"\"\"", lines.join("\n")), + Int(string) => string.fmt(f), + Float(string) => string.fmt(f), + HexInt(string) => write!(f, "0x{}", string), + BinaryInt(string) => write!(f, "0b{}", string), + OctalInt(string) => write!(f, "0o{}", string), + EmptyRecord => write!(f, "{}", "{}"), + other => panic!("TODO implement Display for AST variant {:?}", other), } } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 985d6e2c4c..330d9a05ee 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,18 +1,76 @@ pub mod ast; pub mod ident; +pub mod module; pub mod number_literal; pub mod parser; pub mod problems; pub mod string_literal; +use bumpalo::Bump; +use operator::Operator; use parse::ast::{Attempting, Expr}; use parse::number_literal::number_literal; -use parse::parser::{attempt, one_of2, Parser}; +use parse::parser::{ + and, attempt, lazy, loc, map, map_with_arena, one_of3, optional, string, unexpected, + unexpected_eof, val, Parser, State, +}; use parse::string_literal::string_literal; pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> { - attempt( - Attempting::Expression, - one_of2(number_literal(), string_literal()), + map_with_arena( + and( + attempt( + Attempting::Expression, + loc(one_of3( + record_literal(), + number_literal(), + string_literal(), + )), + ), + optional(and(loc(operator()), loc(val(Expr::Str("blah"))))), + ), + |arena, (loc_expr1, opt_operator)| match opt_operator { + Some((loc_op, loc_expr2)) => { + let tuple = arena.alloc((loc_expr1, loc_op, loc_expr2)); + + Expr::Operator(tuple) + } + None => loc_expr1.value, + }, ) } + +pub fn operator<'a>() -> impl Parser<'a, Operator> { + val(Operator::Plus) + // one_of3( + // map(string("+"), |_| Operator::Plus), + // map(string("-"), |_| Operator::Minus), + // map(string("*"), |_| Operator::Star), + // ) +} + +pub fn record_literal<'a>() -> impl Parser<'a, Expr<'a>> { + move |_arena: &'a Bump, state: State<'a>| { + let mut chars = state.input.chars(); + + match chars.next() { + Some('{') => (), + Some(other_char) => { + return Err(unexpected(other_char, 0, state, Attempting::RecordLiteral)); + } + None => { + return Err(unexpected_eof(0, Attempting::RecordLiteral, state)); + } + } + + match chars.next() { + Some('}') => { + let next_state = state.advance_without_indenting(2)?; + + Ok((Expr::EmptyRecord, next_state)) + } + Some(other_char) => Err(unexpected(other_char, 0, state, Attempting::RecordLiteral)), + None => Err(unexpected_eof(0, Attempting::RecordLiteral, state)), + } + } +} diff --git a/src/parse/module.rs b/src/parse/module.rs new file mode 100644 index 0000000000..d0786782fe --- /dev/null +++ b/src/parse/module.rs @@ -0,0 +1,16 @@ +use ident::Ident; +use parse::ast::{Expr, Pattern}; + +pub struct Module<'a> { + pub name: Ident, + pub exposes: Vec, + pub uses: Vec, + pub decls: Vec>, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Decl<'a> { + Def(Pattern<'a>, Expr<'a>, Expr<'a>), + // TODO Alias + // TODO SumType +} diff --git a/src/parse/number_literal.rs b/src/parse/number_literal.rs index bfefdef0c0..a3baa6fef6 100644 --- a/src/parse/number_literal.rs +++ b/src/parse/number_literal.rs @@ -1,19 +1,16 @@ -use bumpalo::collections::string::String; -use bumpalo::Bump; use parse::ast::{Attempting, Expr}; use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; -use parse::problems::Problem; use std::char; pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { - move |arena: &'a Bump, state: State<'a>| { + move |_arena, state: State<'a>| { let mut chars = state.input.chars(); match chars.next() { Some(first_ch) => { // Number literals must start with either an '-' or a digit. if first_ch == '-' || first_ch.is_ascii_digit() { - parse_number_literal(first_ch, &mut chars, arena, state) + parse_number_literal(first_ch, &mut chars, state) } else { Err(unexpected( first_ch, @@ -32,61 +29,69 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { fn parse_number_literal<'a, I>( first_ch: char, chars: &mut I, - arena: &'a Bump, state: State<'a>, ) -> ParseResult<'a, Expr<'a>> where I: Iterator, { - let mut before_decimal = String::with_capacity_in(1, arena); - let mut after_decimal = String::new_in(arena); - let mut has_decimal_point = false; - let mut chars_skipped = 0; + use self::LiteralType::*; - // Put the first character into the buffer, even if all we've parsed so - // far is a minus sign. - // - // We have to let i64::parse handle the minus sign (if it's there), because - // otherwise if we ask it to parse i64::MIN.to_string() as a positive i64, - // it errors because that positive number doesn't fit in an i64! - before_decimal.push(first_ch); + let mut typ = Int; + + // We already parsed 1 character (which may have been a minus sign). + let mut chars_parsed = 1; while let Some(next_ch) = chars.next() { - match next_ch { - digit if next_ch.is_ascii_digit() => { - if has_decimal_point { - after_decimal.push(digit); - } else { - before_decimal.push(digit); - } - } - '_' => { - // Underscores are allowed, and disregarded. - chars_skipped += 1; - } - '.' => { - if has_decimal_point { - // You only get one decimal point! - let len = before_decimal.len() + after_decimal.len() + chars_skipped; + chars_parsed += 1; - return Err(unexpected('.', len, state, Attempting::NumberLiteral)); - } else { - chars_skipped += 1; - has_decimal_point = true; - } - } - invalid_char => { - if before_decimal.is_empty() { - // No digits! We likely parsed a minus sign that's actually an operator. - let len = before_decimal.len() + after_decimal.len() + chars_skipped; - return Err(unexpected( - invalid_char, - len, - state, - Attempting::NumberLiteral, - )); - } + let err_unexpected = || { + Err(unexpected( + next_ch, + chars_parsed, + state.clone(), + Attempting::NumberLiteral, + )) + }; + // Returns true iff so far we have parsed the given char and no other chars. + let so_far_parsed = |ch| chars_parsed == 2 && first_ch == ch; + + // We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not). + // If you want that, do something like (negate 0x01). + // + // I'm open to changing this policy (that is, allowing support for + // negative escaped ints), but it'll complicate parsing logic and seems + // nonessential, so I'm leaving it out for now. + if next_ch == '.' { + if typ == Float { + // You only get one decimal point! + return err_unexpected(); + } else { + typ = Float; + } + } else if next_ch == 'x' { + if so_far_parsed('0') { + typ = Hex; + } else { + return err_unexpected(); + } + } else if next_ch == 'b' { + if so_far_parsed('0') { + typ = Binary; + } else { + return err_unexpected(); + } + } else if next_ch == 'o' { + if so_far_parsed('0') { + typ = Octal; + } else { + return err_unexpected(); + } + } else if !next_ch.is_ascii_digit() && next_ch != '_' { + if so_far_parsed('-') { + // No digits! We likely parsed a minus sign that's actually an operator. + return err_unexpected(); + } else { // We hit an invalid number literal character; we're done! break; } @@ -96,41 +101,25 @@ where // At this point we have a number, and will definitely succeed. // If the number is malformed (outside the supported range), // we'll succeed with an appropriate Expr which records that. - let expr = if has_decimal_point { - let mut f64_buf = String::with_capacity_in( - before_decimal.len() - // +1 for the decimal point itself - + 1 - + after_decimal.len(), - arena, - ); - - f64_buf.push_str(&before_decimal); - f64_buf.push('.'); - f64_buf.push_str(&after_decimal); - - // TODO [convert this comment to an issue] - we can get better - // performance here by inlining string.parse() for the f64 case, - // since we've already done the work of validating that each char - // is a digit, plus we also already separately parsed the minus - // sign and dot. - match f64_buf.parse::() { - Ok(float) if float.is_finite() => Expr::Float(float), - _ => Expr::MalformedFloat(Problem::OutsideSupportedRange), - } - } else { - // TODO [convert this comment to an issue] - we can get better - // performance here by inlining string.parse() for the i64 case, - // since we've already done the work of validating that each char - // is a digit. - match before_decimal.parse::() { - Ok(int_val) => Expr::Int(int_val), - Err(_) => Expr::MalformedInt(Problem::OutsideSupportedRange), - } + let expr = match typ { + Int => Expr::Int(&state.input[0..chars_parsed]), + Float => Expr::Float(&state.input[0..chars_parsed]), + // For these we trim off the 0x/0o/0b part + Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]), + Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]), + Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]), }; - let total_chars_parsed = before_decimal.len() + chars_skipped; - let state = state.advance_without_indenting(total_chars_parsed)?; + let next_state = state.advance_without_indenting(chars_parsed)?; - Ok((expr, state)) + Ok((expr, next_state)) +} + +#[derive(Debug, PartialEq, Eq)] +enum LiteralType { + Int, + Float, + Hex, + Octal, + Binary, } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 1b0354377b..d6a396eca8 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; use parse::ast::Attempting; -use region::Region; +use region::{Located, Region}; use std::char; // Strategy: @@ -190,6 +190,21 @@ pub trait Parser<'a, Output> { fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>; } +pub struct BoxedParser<'a, Output> { + parser: &'a (dyn Parser<'a, Output> + 'a), +} + +impl<'a, Output> BoxedParser<'a, Output> { + fn new

(arena: &'a Bump, parser: P) -> Self + where + P: Parser<'a, Output> + 'a, + { + BoxedParser { + parser: arena.alloc(parser), + } + } +} + impl<'a, F, Output> Parser<'a, Output> for F where F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>, @@ -199,6 +214,22 @@ where } } +pub fn val<'a, Val>(value: Val) -> impl Parser<'a, Val> +where + Val: Clone, +{ + move |_, state| Ok((value.clone(), state)) +} + +/// Needed for recursive parsers +pub fn lazy<'a, F, P, Val>(get_parser: F) -> impl Parser<'a, Val> +where + F: Fn() -> P, + P: Parser<'a, Val>, +{ + move |arena, state| get_parser().parse(arena, state) +} + pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After> where P: Parser<'a, Before>, @@ -211,6 +242,18 @@ where } } +pub fn map_with_arena<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After> +where + P: Parser<'a, Before>, + F: Fn(&'a Bump, Before) -> After, +{ + move |arena, state| { + parser + .parse(arena, state) + .map(|(output, next_state)| (transform(arena, output), next_state)) + } +} + pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val> where P: Parser<'a, Val>, @@ -226,6 +269,32 @@ where } } +pub fn loc<'a, P, Val>(parser: P) -> impl Parser<'a, Located> +where + P: Parser<'a, Val>, +{ + move |arena, state: State<'a>| { + let start_col = state.column; + let start_line = state.line; + + match parser.parse(arena, state) { + Ok((value, state)) => { + let end_col = state.column; + let end_line = state.line; + let region = Region { + start_col, + start_line, + end_col, + end_line, + }; + + Ok((Located { region, value }, state)) + } + Err((fail, state)) => Err((fail, state)), + } + } +} + pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>> where P: Parser<'a, A>, @@ -317,6 +386,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> { let input = state.input; let len = string.len(); + // TODO do this comparison in one SIMD instruction (on supported systems) match input.get(0..len) { Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)), _ => Err(unexpected_eof(len, Attempting::Keyword, state)), @@ -378,6 +448,46 @@ where // satisfies(any, |ch| ch.is_whitespace()) // } +pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)> +where + P1: Parser<'a, A>, + P2: Parser<'a, B>, +{ + move |arena: &'a Bump, state: State<'a>| { + let original_attempting = state.attempting; + + match p1.parse(arena, state) { + Ok((out1, state)) => match p2.parse(arena, state) { + Ok((out2, state)) => Ok(((out1, out2), state)), + Err((fail, state)) => Err(( + Fail { + attempting: original_attempting, + ..fail + }, + state, + )), + }, + Err((fail, state)) => Err(( + Fail { + attempting: original_attempting, + ..fail + }, + state, + )), + } + } +} + +pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option> +where + P: Parser<'a, T>, +{ + move |arena: &'a Bump, state: State<'a>| match parser.parse(arena, state) { + Ok((out1, state)) => Ok((Some(out1), state)), + Err((_, state)) => Ok((None, state)), + } +} + pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A> where P1: Parser<'a, A>, diff --git a/src/parse/string_literal.rs b/src/parse/string_literal.rs index 0c0cff29be..2e8bbe896a 100644 --- a/src/parse/string_literal.rs +++ b/src/parse/string_literal.rs @@ -1,18 +1,11 @@ -use bumpalo::collections::string::String; -use bumpalo::collections::vec::Vec; use bumpalo::Bump; use parse::ast::{Attempting, Expr}; -use parse::ident; -use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State}; -use parse::problems::{Problem, Problems}; -use region::{Loc, Region}; +use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use std::char; -use std::iter::Peekable; pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { move |arena: &'a Bump, state: State<'a>| { - let mut problems = std::vec::Vec::new(); - let mut chars = state.input.chars().peekable(); + let mut chars = state.input.chars(); // String literals must start with a quote. // If this doesn't, it must not be a string literal! @@ -26,464 +19,75 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { } } - // If we have precisely an empty string here, don't bother allocating - // a buffer; instead, return EmptyStr immediately. - if chars.peek() == Some(&'"') { - return Ok(( - Expr::EmptyStr, - // 2 because `""` has length 2 - state.advance_without_indenting(2)?, - )); - } - - // Stores the accumulated string characters - let mut buf = String::new_in(arena); - - // This caches the total string length of interpolated_pairs. Every - // time we add a new pair to interpolated_pairs, we increment this - // by the sum of whatever we parsed in order to obtain that pair. - let mut buf_col_offset: usize = 0; - - // Stores interpolated identifiers, if any. - let mut interpolated_pairs = Vec::new_in(arena); + // At the parsing stage we keep the entire raw string, because the formatter + // needs the raw string. (For example, so it can "remember" whether you + // wrote \u{...} or the actual unicode character itself.) + // + // Later, in canonicalization, we'll do things like resolving + // unicode escapes and string interpolation. + // + // Since we're keeping the entire raw string, all we need to track is + // how many characters we've parsed. So far, that's 1 (the opening `"`). + let mut parsed_chars = 1; + let mut prev_ch = '"'; while let Some(ch) = chars.next() { - match ch { - // If it's a backslash, escape things. - '\\' => match chars.next() { - Some(next_ch) => { - if let Some(ident) = handle_escaped_char( - arena, - &state, - next_ch, - &mut chars, - &mut buf, - &mut problems, - )? { - let expr = Expr::Var(ident); + parsed_chars += 1; - // +2 for `\(` and then another +1 for `)` at the end - let parsed_length = buf.len() + 2 + ident.len() + 1; - - // It's okay if casting fails in this section, because - // we're going to check for line length overflow at the - // end anyway. That will render this region useless, - // but the user wasn't going to see this region - // anyway if the line length overflowed. - let start_line = state.line; - - // Subtract ident length and another 1 for the `)` - let start_col = state.column - + buf_col_offset as u16 - + (parsed_length - ident.len() - 1) as u16; - let ident_region = Region { - start_line, - start_col, - end_line: start_line, - end_col: start_col + ident.len() as u16 - 1, - }; - let loc_expr = Loc { - region: ident_region, - value: expr, - }; - - // Push the accumulated string into the pairs list, - // along with the ident that came after it. - interpolated_pairs.push((buf.into_bump_str(), loc_expr)); - - // Reset the buffer so we start working on a new string. - buf = String::new_in(arena); - - // Advance the cached offset of how many chars we've parsed, - // so the next time we see an interpolated ident, we can - // correctly calculate its region. - buf_col_offset += parsed_length; - } - } - None => { - // We ran out of characters before finding a closed quote; - // let the loop finish normally, so we end up returning - // the error that the string was not terminated. - // - // (There's the separate problem of a trailing backslash, - // but often that will get fixed in the course of - // addressing the missing closed quote.) - () - } - }, - '"' => { - // We found a closed quote; this is the end of the string! - let len_with_quotes = buf.len() + 2; - let expr = if problems.is_empty() { - let final_str = buf.into_bump_str(); - - if interpolated_pairs.is_empty() { - Expr::Str(final_str) - } else { - let tuple_ref = - arena.alloc((interpolated_pairs.into_bump_slice(), final_str)); - - Expr::InterpolatedStr(tuple_ref) - } + // Potentially end the string (unless this is an escaped `"`!) + if ch == '"' && prev_ch != '\\' { + let expr = if parsed_chars == 2 { + if let Some('"') = chars.next() { + // If the first three chars were all `"`, then this + // literal begins with `"""` and is a block string. + return parse_block_string(arena, state, &mut chars); } else { - Expr::MalformedStr(problems.into_boxed_slice()) - }; + Expr::EmptyStr + } + } else { + // Start at 1 so we omit the opening `"`. + // Subtract 1 from parsed_chars so we omit the closing `"`. + Expr::Str(&state.input[1..(parsed_chars - 1)]) + }; - let next_state = state.advance_without_indenting(len_with_quotes)?; + let next_state = state.advance_without_indenting(parsed_chars)?; - return Ok((expr, next_state)); - } - '\t' => { - // Report the problem and continue. Tabs are syntax errors, - // but maybe the rest of the string is fine! - problems.push(loc_char(Problem::Tab, &state, buf.len())); - } - '\r' => { - // Carriage returns aren't allowed in string literals, - // but maybe the rest of the string is fine! - problems.push(loc_char(Problem::CarriageReturn, &state, buf.len())); - } - '\n' => { - // We hit a newline before a close quote. - // We can't safely assume where the string was supposed - // to end, so this is an unrecoverable error. - return Err(unexpected('\n', 0, state, Attempting::StringLiteral)); - } - normal_char => buf.push(normal_char), + return Ok((expr, next_state)); + } else if ch == '\n' { + // This is a single-line string, which cannot have newlines! + // Treat this as an unclosed string literal, and consume + // all remaining chars. This will mask all other errors, but + // it should make it easiest to debug; the file will be a giant + // error starting from where the open quote appeared. + return Err(unexpected( + '\n', + state.input.len() - 1, + state, + Attempting::StringLiteral, + )); + } else { + prev_ch = ch; } } // We ran out of characters before finding a closed quote Err(unexpected_eof( - buf.len(), + parsed_chars, Attempting::StringLiteral, state.clone(), )) } } -fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc { - let start_line = state.line; - let start_col = state.column + buf_len as u16; - let end_line = start_line; - // All invalid chars should have a length of 1 - let end_col = state.column + 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - Loc { region, value } -} - -fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc { - let start_line = state.line; - let start_col = state.column + buf_len as u16; - let end_line = start_line; - // escapes should all be 2 chars long - let end_col = state.column + 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - Loc { region, value } -} - -fn loc_escaped_unicode<'a, V>( - value: V, - state: &State<'a>, - buf_len: usize, - hex_str_len: usize, -) -> Loc { - let start_line = state.line; - // +1 due to the `"` which precedes buf. - let start_col = state.column + buf_len as u16 + 1; - let end_line = start_line; - // +3 due to the `\u{` and another + 1 due to the `}` - // -1 to prevent overshooting because end col is inclusive. - let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - Loc { region, value } -} - -#[inline(always)] -fn handle_escaped_char<'a, I>( - arena: &'a Bump, - state: &State<'a>, - ch: char, - chars: &mut Peekable, - buf: &mut String<'a>, - problems: &mut Problems, -) -> Result, (Fail, State<'a>)> +fn parse_block_string<'a, I>( + _arena: &'a Bump, + _state: State<'a>, + _chars: &mut I, +) -> ParseResult<'a, Expr<'a>> where I: Iterator, { - match ch { - '\\' => buf.push('\\'), - '"' => buf.push('"'), - 't' => buf.push('\t'), - 'n' => buf.push('\n'), - 'r' => buf.push('\r'), - '0' => buf.push('\0'), // We explicitly support null characters, as we - // can't be sure we won't receive them from Rust. - 'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?, - '(' => { - let ident = parse_interpolated_ident(arena, state, chars)?; - - return Ok(Some(ident)); - } - '\t' => { - // Report and continue. - // Tabs are syntax errors, but maybe the rest of the string is fine! - problems.push(loc_escaped_char(Problem::Tab, &state, buf.len())); - } - '\r' => { - // Report and continue. - // Carriage returns aren't allowed in string literals, - // but maybe the rest of the string is fine! - problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len())); - } - '\n' => { - // Report and bail out. - // We can't safely assume where the string was supposed to end. - problems.push(loc_escaped_char( - Problem::NewlineInLiteral, - &state, - buf.len(), - )); - - return Err(unexpected_eof( - buf.len(), - Attempting::UnicodeEscape, - state.clone(), - )); - } - _ => { - // Report and continue. - // An unsupported escaped char (e.g. \q) shouldn't halt parsing. - problems.push(loc_escaped_char( - Problem::UnsupportedEscapedChar, - &state, - buf.len(), - )); - } - } - - Ok(None) -} - -#[inline(always)] -fn handle_escaped_unicode<'a, I>( - arena: &'a Bump, - state: &State<'a>, - chars: &mut Peekable, - buf: &mut String<'a>, - problems: &mut Problems, -) -> Result<(), (Fail, State<'a>)> -where - I: Iterator, -{ - // \u{00A0} is how you specify a Unicode code point, - // so we should always see a '{' next. - if chars.next() != Some('{') { - let start_line = state.line; - // +1 due to the `"` which precedes buf - let start_col = state.column + 1 + buf.len() as u16; - let end_line = start_line; - - // All we parsed was `\u`, so end on the column after `\`'s column. - let end_col = start_col + 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - problems.push(Loc { - region, - value: Problem::NoUnicodeDigits, - }); - - // The rest of the string literal might be fine. Keep parsing! - return Ok(()); - } - - // Record the point in the string literal where we started parsing `\u` - let start_of_unicode = buf.len(); - - // Stores the accumulated unicode digits - let mut hex_str = String::new_in(arena); - - while let Some(hex_char) = chars.next() { - match hex_char { - '}' => { - // Done! Validate and add it to the buffer. - match u32::from_str_radix(&hex_str, 16) { - Ok(code_pt) => { - if code_pt > 0x10FFFF { - let start_line = state.line; - // +1 due to the `"` which precedes buf - // +3 due to the `\u{` which precedes the hex digits - let start_col = state.column + 1 + buf.len() as u16 + 3; - let end_line = start_line; - - // We want to underline only the number. That's the error! - // -1 because we want to end on the last digit, not - // overshoot it. - let end_col = start_col + hex_str.len() as u16 - 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - problems.push(Loc { - region, - value: Problem::UnicodeCodePointTooLarge, - }); - } else { - // If it all checked out, add it to - // the main buffer. - match char::from_u32(code_pt) { - Some(ch) => buf.push(ch), - None => { - problems.push(loc_escaped_unicode( - Problem::InvalidUnicodeCodePoint, - &state, - start_of_unicode, - hex_str.len(), - )); - } - } - } - } - Err(_) => { - let problem = if hex_str.is_empty() { - Problem::NoUnicodeDigits - } else { - Problem::NonHexCharsInUnicodeCodePoint - }; - - problems.push(loc_escaped_unicode( - problem, - &state, - start_of_unicode, - hex_str.len(), - )); - } - } - - // We are now done processing the unicode portion of the string, - // so exit the loop without further advancing the iterator. - return Ok(()); - } - '\t' => { - // Report and continue. - // Tabs are syntax errors, but maybe the rest of the string is fine! - problems.push(loc_escaped_unicode( - Problem::Tab, - &state, - start_of_unicode, - hex_str.len(), - )); - } - '\r' => { - // Report and continue. - // Carriage returns aren't allowed in string literals, - // but maybe the rest of the string is fine! - problems.push(loc_escaped_unicode( - Problem::CarriageReturn, - &state, - start_of_unicode, - hex_str.len(), - )); - } - '\n' => { - // Report and bail out. - // We can't safely assume where the string was supposed to end. - problems.push(loc_escaped_unicode( - Problem::NewlineInLiteral, - &state, - start_of_unicode, - hex_str.len(), - )); - - return Err(unexpected_eof( - buf.len(), - Attempting::UnicodeEscape, - state.clone(), - )); - } - normal_char => hex_str.push(normal_char), - } - - // If we're about to hit the end of the string, and we didn't already - // complete parsing a valid unicode escape sequence, this is a malformed - // escape sequence - it wasn't terminated! - if chars.peek() == Some(&'"') { - // Record a problem and exit the loop early, so the string literal - // parsing logic can consume the quote and do its job as normal. - let start_line = state.line; - // +1 due to the `"` which precedes buf. - let start_col = state.column + buf.len() as u16 + 1; - let end_line = start_line; - // +3 due to the `\u{` - // -1 to prevent overshooting because end col is inclusive. - let end_col = start_col + 3 + hex_str.len() as u16 - 1; - - let region = Region { - start_line, - start_col, - end_line, - end_col, - }; - - problems.push(Loc { - region, - value: Problem::MalformedEscapedUnicode, - }); - - return Ok(()); - } - } - - Ok(()) -} - -#[inline(always)] -fn parse_interpolated_ident<'a, I>( - arena: &'a Bump, - state: &State<'a>, - chars: &mut Peekable, -) -> Result<&'a str, (Fail, State<'a>)> -where - I: Iterator, -{ - // This will return Err on invalid identifiers like "if" - let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?; - - // Make sure we got a closing ) to end the interpolation. - match next_char { - Some(')') => Ok(string), - Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)), - None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)), - } + // So far we have consumed the `"""` and that's it. + let _parsed_chars = 3; + panic!("TODO parse block string, advance state, etc"); } diff --git a/src/region.rs b/src/region.rs index 0c10cbb4f2..21880667cc 100644 --- a/src/region.rs +++ b/src/region.rs @@ -52,7 +52,19 @@ pub struct Located { } impl Located { - pub fn new(value: T, region: Region) -> Located { + pub fn new( + start_line: u32, + start_col: u16, + end_line: u32, + end_col: u16, + value: T, + ) -> Located { + let region = Region { + start_line, + start_col, + end_line, + end_col, + }; Located { value, region } } } diff --git a/src/solve.rs b/src/solve.rs index 244edf1a5f..ce14e6afe3 100644 --- a/src/solve.rs +++ b/src/solve.rs @@ -11,7 +11,7 @@ // , _errors :: [Error.Error] // } -use canonicalize::Symbol; +use can::symbol::Symbol; use collections::ImMap; use subs::{Content, Descriptor, FlatType, Subs, Variable}; use types::Constraint::{self, *}; @@ -23,13 +23,15 @@ pub fn solve(env: &Env, subs: &mut Subs, constraint: Constraint) { // println!("\nSolving:\n\n\t{:?}\n\n", constraint); match constraint { True => (), - Eq(typ, expected_type, region) => { + Eq(typ, expected_type, _region) => { + // TODO use region? let actual = type_to_variable(subs, typ); let expected = type_to_variable(subs, expected_type.get_type()); subs.union(actual, expected); } - Lookup(symbol, expected_type, region) => { + Lookup(symbol, expected_type, _region) => { + // TODO use region? let actual = subs.copy_var(env.get(&symbol).unwrap_or_else(|| { panic!("Could not find symbol {:?} in env {:?}", symbol, env) diff --git a/src/string.rs b/src/string.rs index c664faf554..3d11b3cfea 100644 --- a/src/string.rs +++ b/src/string.rs @@ -192,7 +192,7 @@ impl Into for RocStr { fn into(self) -> String { let len_msbyte = self.len_msbyte(); - panic!("I'm not sure this works the way we want it to. Need to review."); + // TODO I'm not sure this works the way we want it to. Need to review. if flagged_as_short_string(len_msbyte) { // Drop the "is this a short string?" flag @@ -208,7 +208,6 @@ impl Into for RocStr { // same memory layout as a Rust &str slice. let str_slice = unsafe { mem::transmute::<[u8; 16], &str>(self.0.raw) }; let string = str_slice.to_string(); - let mut roc_str_mut = self; // Drop will deallocate the bytes, which we don't want in this case. // String is using those bytes now! @@ -241,14 +240,14 @@ impl From for RocStr { RocStr(InnerStr { raw: buffer }) } else { - let bytes_ptr = string.as_bytes().clone().as_ptr(); - let long = LongStr { - bytes: MaybeUninit::new(bytes_ptr), - length: str_len, - }; - panic!("TODO: use mem::forget on the string and steal its bytes!"); - RocStr(InnerStr { long }) + // let bytes_ptr = string.as_bytes().clone().as_ptr(); + // let long = LongStr { + // bytes: MaybeUninit::new(bytes_ptr), + // length: str_len, + // }; + + // RocStr(InnerStr { long }) } } } diff --git a/src/subs.rs b/src/subs.rs index f571561dd6..2f95c866fb 100644 --- a/src/subs.rs +++ b/src/subs.rs @@ -75,7 +75,6 @@ impl Subs { } pub fn mk_flex_var(&mut self) -> Variable { - /// TODO is "flex" the same as "unbound" and "rigid" the same as "bound"?! self.fresh(flex_var_descriptor()) } diff --git a/src/types.rs b/src/types.rs index 46fca73a29..a8e66d1ff0 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,4 +1,4 @@ -use canonicalize::Symbol; +use can::symbol::Symbol; use collections::ImMap; use operator::{ArgSide, Operator}; use region::Located; diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index 56c5a2a57c..2c12a83d83 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -1,183 +1,76 @@ -use combine::error::ParseError; -use combine::stream::state::State; -use combine::stream::Stream; -use combine::{eof, Parser}; -use roc::collections::MutMap; -use roc::deprecated::parse_state::IndentablePosition; -use roc::expr::{Expr, Pattern}; +extern crate bumpalo; +extern crate roc; + +use self::bumpalo::Bump; +use roc::can; +use roc::can::expr::Expr; +use roc::can::problem::Problem; +use roc::can::procedure::Procedure; +use roc::can::symbol::Symbol; +use roc::can::Output; +use roc::collections::{ImMap, MutMap}; +use roc::ident::Ident; +use roc::parse; +use roc::parse::ast::{self, Attempting}; +use roc::parse::parser::{Fail, Parser, State}; use roc::region::{Located, Region}; -use std::hash::Hash; -pub fn loc_box(val: T) -> Box> { - Box::new(loc(val)) +pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result, Fail> { + let state = State::new(&input, Attempting::Module); + let parser = parse::expr(); + let answer = parser.parse(&arena, state); + + answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail) } -pub fn loc(val: T) -> Located { - Located::new(val, Region::zero()) -} - -pub fn located( - start_line: u32, - start_col: u16, - end_line: u32, - end_col: u16, - val: T, -) -> Located { - Located::new( - val, - Region { - start_line, - start_col, - end_line, - end_col, - }, +#[allow(dead_code)] +pub fn can_expr(expr_str: &str) -> (Expr, Output, Vec, MutMap) { + can_expr_with( + &Bump::new(), + "blah", + expr_str, + &ImMap::default(), + &ImMap::default(), ) } -pub fn zero_loc(located_val: Located) -> Located { - loc(located_val.value) +#[allow(dead_code)] +pub fn can_expr_with( + arena: &Bump, + name: &str, + expr_str: &str, + declared_idents: &ImMap, + declared_variants: &ImMap>>, +) -> (Expr, Output, Vec, MutMap) { + let expr = parse_with(&arena, expr_str).unwrap_or_else(|_| { + panic!( + "can_expr_with() got a parse error when attempting to canonicalize:\n\n{:?}", + expr_str + ) + }); + + let home = "Test".to_string(); + let (loc_expr, output, problems, procedures) = can::canonicalize_declaration( + home, + name, + Located::new(0, 0, 0, 0, expr), + declared_idents, + declared_variants, + ); + + (loc_expr.value, output, problems, procedures) } -/// Zero out the parse locations on everything in this Expr, so we can compare expected/actual without -/// having to account for that. -pub fn zero_loc_expr(expr: Expr) -> Expr { - use roc::expr::Expr::*; +// pub fn mut_map_from_pairs(pairs: I) -> MutMap +// where +// I: IntoIterator, +// K: Hash + Eq, +// { +// let mut answer = MutMap::default(); - match expr { - Int(_) | Float(_) | EmptyStr | Str(_) | Char(_) | Var(_) | EmptyRecord | EmptyList => expr, - InterpolatedStr(pairs, string) => InterpolatedStr( - pairs - .into_iter() - .map(|(prefix, ident)| (prefix, zero_loc(ident))) - .collect(), - string, - ), - List(elems) => { - let zeroed_elems = elems - .into_iter() - .map(|loc_expr| loc(zero_loc_expr(loc_expr.value))) - .collect(); +// for (key, value) in pairs { +// answer.insert(key, value); +// } - List(zeroed_elems) - } - Assign(assignments, loc_ret) => { - let zeroed_assignments = assignments - .into_iter() - .map(|(pattern, loc_expr)| { - ( - zero_loc_pattern(pattern), - loc(zero_loc_expr(loc_expr.value)), - ) - }) - .collect(); - - Assign(zeroed_assignments, loc_box(zero_loc_expr((*loc_ret).value))) - } - Apply(fn_expr, args) => Apply( - loc_box(zero_loc_expr((*fn_expr).value)), - args.into_iter() - .map(|arg| loc(zero_loc_expr(arg.value))) - .collect(), - ), - Operator(left, op, right) => Operator( - loc_box(zero_loc_expr((*left).value)), - zero_loc(op), - loc_box(zero_loc_expr((*right).value)), - ), - Closure(patterns, body) => Closure( - patterns.into_iter().map(zero_loc).collect(), - loc_box(zero_loc_expr((*body).value)), - ), - ApplyVariant(_, None) => expr, - ApplyVariant(name, Some(args)) => ApplyVariant( - name, - Some( - args.into_iter() - .map(|arg| loc(zero_loc_expr(arg.value))) - .collect(), - ), - ), - If(condition, if_true, if_false) => If( - loc_box(zero_loc_expr((*condition).value)), - loc_box(zero_loc_expr((*if_true).value)), - loc_box(zero_loc_expr((*if_false).value)), - ), - Case(condition, branches) => Case( - loc_box(zero_loc_expr((*condition).value)), - branches - .into_iter() - .map(|(pattern, loc_expr)| { - ( - zero_loc_pattern(pattern), - loc(zero_loc_expr(loc_expr.value)), - ) - }) - .collect(), - ), - } -} - -/// Zero out the parse locations on everything in this Pattern, so we can compare expected/actual without -/// having to account for that. -pub fn zero_loc_pattern(loc_pattern: Located) -> Located { - use roc::expr::Pattern::*; - - let pattern = loc_pattern.value; - - match pattern { - Identifier(_) | IntLiteral(_) | FloatLiteral(_) | ExactString(_) | EmptyRecordLiteral - | Underscore => loc(pattern), - Variant(loc_name, None) => loc(Variant(loc(loc_name.value), None)), - Variant(loc_name, Some(opt_located_patterns)) => loc(Variant( - loc(loc_name.value), - Some( - opt_located_patterns - .into_iter() - .map(|loc_pat| zero_loc_pattern(loc_pat)) - .collect(), - ), - )), - } -} - -#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs -pub fn mut_map_from_pairs(pairs: I) -> MutMap -where - I: IntoIterator, - K: Hash + Eq, -{ - let mut answer = MutMap::default(); - - for (key, value) in pairs { - answer.insert(key, value); - } - - answer -} - -// PARSE HELPERS - -#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs -pub fn standalone_expr() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - roc::deprecated::parse::expr().skip(eof()) -} - -#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs -pub fn parse_without_loc(actual_str: &str) -> Result<(Expr, String), String> { - parse_standalone(actual_str).map(|(expr, leftover)| (zero_loc_expr(expr), leftover)) -} - -#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs -pub fn parse_standalone(actual_str: &str) -> Result<(Expr, String), String> { - let parse_state: State<&str, IndentablePosition> = - State::with_positioner(actual_str, IndentablePosition::default()); - - match standalone_expr().easy_parse(parse_state) { - Ok((expr, state)) => Ok((expr, state.input.to_string())), - Err(errors) => Err(errors.to_string()), - } -} +// answer +// } diff --git a/tests/test_canonicalize.rs b/tests/test_canonicalize.rs index e4aeee5059..8d6571200e 100644 --- a/tests/test_canonicalize.rs +++ b/tests/test_canonicalize.rs @@ -1,683 +1,902 @@ #[macro_use] extern crate pretty_assertions; -#[macro_use] -extern crate indoc; -extern crate combine; +// #[macro_use] +// extern crate indoc; +extern crate bumpalo; extern crate roc; mod helpers; #[cfg(test)] mod test_canonicalize { - use helpers::{loc, loc_box, mut_map_from_pairs, parse_without_loc, zero_loc_expr}; - use roc::canonicalize; - use roc::canonicalize::Expr::*; - use roc::canonicalize::Pattern::*; - use roc::canonicalize::{Expr, Output, Pattern, Problem, Procedure, References, Symbol}; - use roc::collections::{ImMap, ImSet, MutMap}; - use roc::expr::{Ident, VariantName}; - use roc::operator::Operator; - use roc::region::{Located, Region}; + use bumpalo::Bump; + use helpers::can_expr_with; + use roc::can::expr::Expr::{self, *}; + use roc::can::problem::RuntimeError; + use roc::collections::ImMap; + use std::{f64, i64}; - fn can_expr(expr_str: &str) -> (Expr, Output, Vec, MutMap) { - can_expr_with("blah", expr_str, &ImMap::default(), &ImMap::default()) + // fn sym(name: &str) -> Symbol { + // Symbol::new("Test$blah$", name) + // } + + // fn unqualified(string: &str) -> Ident { + // Ident::Unqualified(string.to_string()) + // } + + // fn unqualifieds(strings: Vec<&str>) -> Vec { + // strings.into_iter().map(unqualified).collect() + // } + + // fn loc_unqualifieds(strings: Vec<&str>) -> Vec> { + // strings + // .into_iter() + // .map(|string| loc(unqualified(string))) + // .collect() + // } + + // fn unused(string: &str) -> Problem { + // Problem::UnusedAssignment(loc(unqualified(string))) + // } + + // struct Out<'a> { + // locals: Vec<&'a str>, + // globals: Vec<&'a str>, + // variants: Vec<&'a str>, + // calls: Vec<&'a str>, + // tail_call: Option<&'a str>, + // } + + // impl<'a> Into for Out<'a> { + // fn into(self) -> Output { + // let references = References { + // locals: vec_to_set(self.locals), + // globals: vec_to_set(self.globals), + // variants: vec_to_set(self.variants), + // calls: vec_to_set(self.calls), + // }; + + // let tail_call = self.tail_call.map(sym); + + // Output { + // references, + // tail_call, + // } + // } + // } + + // fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSet { + // ImSet::from(vec.into_iter().map(sym).collect::>()) + // } + + fn assert_can(input: &str, expected: Expr) { + let arena = Bump::new(); + let (actual, _, _, _) = + can_expr_with(&arena, "Blah", input, &ImMap::default(), &ImMap::default()); + + assert_eq!(expected, actual); } - fn can_expr_with( - name: &str, - expr_str: &str, - declared_idents: &ImMap, - declared_variants: &ImMap>, - ) -> (Expr, Output, Vec, MutMap) { - let (expr, unparsed) = parse_without_loc(expr_str).unwrap_or_else(|errors| { - panic!( - "Parse error trying to parse \"{}\" - {}", - expr_str.to_string(), - errors.to_string() - ) - }); - - assert_eq!(unparsed, "".to_string()); - - let home = "Test".to_string(); - let (loc_expr, output, problems, procedures) = canonicalize::canonicalize_declaration( - home, - name, - loc(zero_loc_expr(expr)), - declared_idents, - declared_variants, - ); - - (loc_expr.value, output, problems, procedures) - } - - fn sym(name: &str) -> Symbol { - Symbol::new("Test$blah$", name) - } - - fn unqualified(string: &str) -> Ident { - Ident::Unqualified(string.to_string()) - } - - fn unqualifieds(strings: Vec<&str>) -> Vec { - strings.into_iter().map(unqualified).collect() - } - - fn loc_unqualifieds(strings: Vec<&str>) -> Vec> { - strings - .into_iter() - .map(|string| loc(unqualified(string))) - .collect() - } - - fn unused(string: &str) -> Problem { - Problem::UnusedAssignment(loc(unqualified(string))) - } - - struct Out<'a> { - locals: Vec<&'a str>, - globals: Vec<&'a str>, - variants: Vec<&'a str>, - calls: Vec<&'a str>, - tail_call: Option<&'a str>, - } - - impl<'a> Into for Out<'a> { - fn into(self) -> Output { - let references = References { - locals: vec_to_set(self.locals), - globals: vec_to_set(self.globals), - variants: vec_to_set(self.variants), - calls: vec_to_set(self.calls), - }; - - let tail_call = self.tail_call.map(sym); - - Output { - references, - tail_call, - } - } - } - - fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSet { - ImSet::from(vec.into_iter().map(sym).collect::>()) - } - - // BASIC CANONICALIZATION + // NUMBER LITERALS #[test] - fn closure_args_are_not_locals() { - // "arg" shouldn't make it into output.locals, because - // it only exists in the closure's arguments. - let (_, output, problems, procedures) = can_expr(indoc!( - r#" - func = \arg -> arg + 1 + fn int_too_large() { + let string = (i64::MAX as i128 + 1).to_string(); - 3 + func 2 - "# - )); - - assert_eq!(problems, vec![]); - - assert_eq!( - output, - Out { - locals: vec!["func"], - globals: vec![], - variants: vec![], - calls: vec!["func"], - tail_call: None - } - .into() - ); - - assert_eq!( - procedures, - mut_map_from_pairs(vec![( - sym("func"), - Procedure { - name: Some("func".to_string()), - is_self_tail_recursive: false, - definition: Region::zero(), - args: vec![loc(Pattern::Identifier(sym("arg")))], - body: loc(Expr::Operator( - loc_box(Expr::Var(sym("arg"))), - loc(Operator::Plus), - loc_box(Expr::Int(1)) - )), - references: References { - locals: vec_to_set(vec![]), - globals: vec_to_set(vec![]), - variants: vec_to_set(vec![]), - calls: vec_to_set(vec![]), - } - } - )]) + assert_can( + &string.clone(), + RuntimeError(RuntimeError::IntOutsideRange(string.into())), ); } #[test] - fn closing_over_locals() { - // "local" should be used, because the closure used it. - // However, "unused" should be unused. - let (_, output, problems, _) = can_expr(indoc!( - r#" - local = 5 - unused = 6 - func = \arg -> arg + local + fn int_too_small() { + let string = (i64::MIN as i128 - 1).to_string(); - 3 + func 2 - "# - )); - - assert_eq!( - problems, - vec![Problem::UnusedAssignment(loc(Ident::Unqualified( - "unused".to_string() - )))] - ); - - assert_eq!( - output, - Out { - locals: vec!["func", "local"], - globals: vec![], - variants: vec![], - calls: vec!["func"], - tail_call: None - } - .into() + assert_can( + &string.clone(), + RuntimeError(RuntimeError::IntOutsideRange(string.into())), ); } #[test] - fn unused_closure() { - // "unused" should be unused because it's in func, which is unused. - let (_, output, problems, _) = can_expr(indoc!( - r#" - local = 5 - unused = 6 - func = \arg -> arg + unused + fn float_too_large() { + let string = format!("{}1.0", f64::MAX); - local - "# - )); - - assert_eq!( - problems, - vec![ - Problem::UnusedAssignment(loc(Ident::Unqualified("unused".to_string()))), - Problem::UnusedAssignment(loc(Ident::Unqualified("func".to_string()))), - ] - ); - - assert_eq!( - output, - Out { - locals: vec!["local"], - globals: vec![], - variants: vec![], - calls: vec![], - tail_call: None - } - .into() - ); - } - - // UNRECOGNIZED - - #[test] - fn basic_unrecognized_constant() { - let (expr, output, problems, _) = can_expr(indoc!( - r#" - x - "# - )); - - assert_eq!( - problems, - vec![Problem::UnrecognizedConstant(loc(Ident::Unqualified( - "x".to_string() - )))] - ); - - assert_eq!( - expr, - UnrecognizedConstant(loc(Ident::Unqualified("x".to_string()))) - ); - - assert_eq!( - output, - Out { - locals: vec![], - globals: vec![], - variants: vec![], - calls: vec![], - tail_call: None - } - .into() + assert_can( + &string.clone(), + RuntimeError(RuntimeError::FloatOutsideRange(string.into())), ); } #[test] - fn complex_unrecognized_constant() { - let (_, output, problems, _) = can_expr(indoc!( - r#" - a = 5 - b = 6 + fn float_too_small() { + let string = format!("{}1.0", f64::MIN); - a + b * z - "# - )); - - assert_eq!( - problems, - vec![Problem::UnrecognizedConstant(loc(Ident::Unqualified( - "z".to_string() - )))] - ); - - assert_eq!( - output, - Out { - locals: vec!["a", "b"], - globals: vec![], - variants: vec![], - calls: vec![], - tail_call: None - } - .into() + assert_can( + &string.clone(), + RuntimeError(RuntimeError::FloatOutsideRange(string.into())), ); } - // UNUSED + // LOCALS - #[test] - fn mutual_unused_circular_vars() { - // This should report that both a and b are unused, since the return expr never references them. - // It should not report them as circular, since we haven't solved the halting problem here. - let (_, output, problems, _) = can_expr(indoc!( - r#" - a = \arg -> if arg > 0 then b 7 else 0 - b = \arg -> if arg > 0 then a (arg - 1) else 0 - c = 5 + //#[test] + //fn closure_args_are_not_locals() { + // // "arg" shouldn't make it into output.locals, because + // // it only exists in the closure's arguments. + // let (_, output, problems, procedures) = can_expr(indoc!( + // r#" + // func = \arg -> arg + 1 - c - "# - )); + // 3 + func 2 + // "# + // )); - assert_eq!(problems, vec![unused("a"), unused("b")]); + // assert_eq!(problems, vec![]); - assert_eq!( - output, - Out { - locals: vec!["c"], - globals: vec![], - variants: vec![], - calls: vec![], - tail_call: None - } - .into() - ); - } + // assert_eq!( + // output, + // Out { + // locals: vec!["func"], + // globals: vec![], + // variants: vec![], + // calls: vec!["func"], + // tail_call: None + // } + // .into() + // ); - #[test] - fn can_fibonacci() { - let (_, output, problems, _) = can_expr(indoc!( - r#" - fibonacci = \num -> - if num < 2 then - num - else - fibonacci (num - 1) + fibonacci (num - 2) + // assert_eq!( + // procedures, + // mut_map_from_pairs(vec![( + // sym("func"), + // Procedure { + // name: Some("func".to_string()), + // is_self_tail_recursive: false, + // definition: Region::zero(), + // args: vec![loc(Pattern::Identifier(sym("arg")))], + // body: loc(Expr::Operator( + // loc_box(Expr::Var(sym("arg"))), + // loc(Operator::Plus), + // loc_box(Expr::Int(1)) + // )), + // references: References { + // locals: vec_to_set(vec![]), + // globals: vec_to_set(vec![]), + // variants: vec_to_set(vec![]), + // calls: vec_to_set(vec![]), + // } + // } + // )]) + // ); + //} - fibonacci 9 - "# - )); + //#[test] + //fn closing_over_locals() { + // // "local" should be used, because the closure used it. + // // However, "unused" should be unused. + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // local = 5 + // unused = 6 + // func = \arg -> arg + local - assert_eq!(problems, vec![]); + // 3 + func 2 + // "# + // )); - assert_eq!( - output, - Out { - locals: vec!["fibonacci"], - globals: vec![], - variants: vec![], - calls: vec!["fibonacci"], - tail_call: None - } - .into() - ); - } + // assert_eq!( + // problems, + // vec![Problem::UnusedAssignment(loc(Ident::Unqualified( + // "unused".to_string() + // )))] + // ); - #[test] - fn can_tail_call() { - // TODO check the global params - make sure this - // is considered a tail call, even though it only - // calls itself from one branch! - let (_, output, problems, _) = can_expr(indoc!( - r#" - factorial = \num -> - factorialHelp num 0 + // assert_eq!( + // output, + // Out { + // locals: vec!["func", "local"], + // globals: vec![], + // variants: vec![], + // calls: vec!["func"], + // tail_call: None + // } + // .into() + // ); + //} - factorialHelp = \num total -> - if num == 0 then - total - else - factorialHelp (num - 1) (total * num) + //#[test] + //fn unused_closure() { + // // "unused" should be unused because it's in func, which is unused. + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // local = 5 + // unused = 6 + // func = \arg -> arg + unused - factorial 9 - "# - )); + // local + // "# + // )); - assert_eq!(problems, vec![]); + // assert_eq!( + // problems, + // vec![ + // Problem::UnusedAssignment(loc(Ident::Unqualified("unused".to_string()))), + // Problem::UnusedAssignment(loc(Ident::Unqualified("func".to_string()))), + // ] + // ); - assert_eq!( - output, - Out { - locals: vec!["factorial", "factorialHelp"], - globals: vec![], - variants: vec![], - calls: vec!["factorial", "factorialHelp"], - tail_call: None - } - .into() - ); - } + // assert_eq!( + // output, + // Out { + // locals: vec!["local"], + // globals: vec![], + // variants: vec![], + // calls: vec![], + // tail_call: None + // } + // .into() + // ); + //} - #[test] - fn transitively_used_function() { - // This should report that neither a nor b are unused, - // since if you never call a function but do return it, that's okay! - let (_, output, problems, _) = can_expr(indoc!( - r#" - a = \_ -> 42 - b = a + //// UNRECOGNIZED - b - "# - )); + //#[test] + //fn basic_unrecognized_constant() { + // let (expr, output, problems, _) = can_expr(indoc!( + // r#" + // x + // "# + // )); - assert_eq!(problems, Vec::new()); + // assert_eq!( + // problems, + // vec![Problem::UnrecognizedConstant(loc(Ident::Unqualified( + // "x".to_string() + // )))] + // ); - assert_eq!( - output, - Out { - locals: vec!["a", "b"], - globals: vec![], - variants: vec![], - calls: vec![], - tail_call: None - } - .into() - ); - } + // assert_eq!( + // expr, + // UnrecognizedConstant(loc(Ident::Unqualified("x".to_string()))) + // ); - // ASSIGNMENT REORDERING + // assert_eq!( + // output, + // Out { + // locals: vec![], + // globals: vec![], + // variants: vec![], + // calls: vec![], + // tail_call: None + // } + // .into() + // ); + //} - #[test] - fn reorder_assignments() { - let (expr, output, problems, _) = can_expr(indoc!( - r#" - increment = \arg -> arg + 1 - z = (increment 2) + y - y = x + 1 - x = 9 + //#[test] + //fn complex_unrecognized_constant() { + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // a = 5 + // b = 6 - z * 3 - "# - )); + // a + b * z + // "# + // )); - assert_eq!(problems, vec![]); + // assert_eq!( + // problems, + // vec![Problem::UnrecognizedConstant(loc(Ident::Unqualified( + // "z".to_string() + // )))] + // ); - assert_eq!( - output, - Out { - locals: vec!["increment", "x", "y", "z"], - globals: vec![], - variants: vec![], - calls: vec!["increment"], - tail_call: None - } - .into() - ); + // assert_eq!( + // output, + // Out { + // locals: vec!["a", "b"], + // globals: vec![], + // variants: vec![], + // calls: vec![], + // tail_call: None + // } + // .into() + // ); + //} - let symbols = assigned_symbols(expr); + //// UNUSED - // In code gen, for everything to have been set before it gets read, - // the following must be true about when things are assigned: - // - // x must be assigned before y - // y must be assigned before z - // - // The order of the increment function doesn't matter. - assert_before("x", "y", &symbols); - assert_before("y", "z", &symbols); - } + //#[test] + //fn mutual_unused_circular_vars() { + // // This should report that both a and b are unused, since the return expr never references them. + // // It should not report them as circular, since we haven't solved the halting problem here. + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // a = \arg -> if arg > 0 then b 7 else 0 + // b = \arg -> if arg > 0 then a (arg - 1) else 0 + // c = 5 - #[test] - fn reorder_closed_over_assignments() { - let (expr, output, problems, _) = can_expr(indoc!( - r#" - z = func1 x - x = 9 - y = func2 3 - func1 = \arg -> func2 arg + y - func2 = \arg -> arg + x + // c + // "# + // )); - z - "# - )); + // assert_eq!(problems, vec![unused("a"), unused("b")]); - assert_eq!(problems, vec![]); + // assert_eq!( + // output, + // Out { + // locals: vec!["c"], + // globals: vec![], + // variants: vec![], + // calls: vec![], + // tail_call: None + // } + // .into() + // ); + //} - assert_eq!( - output, - Out { - locals: vec!["func1", "func2", "x", "y", "z"], - globals: vec![], - variants: vec![], - calls: vec!["func1", "func2"], - tail_call: None - } - .into() - ); + //#[test] + //fn can_fibonacci() { + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // fibonacci = \num -> + // if num < 2 then + // num + // else + // fibonacci (num - 1) + fibonacci (num - 2) - let symbols = assigned_symbols(expr); + // fibonacci 9 + // "# + // )); - // In code gen, for everything to have been set before it gets read, - // the following must be true about when things are assigned: - // - // x and func2 must be assigned (in either order) before y - // y and func1 must be assigned (in either order) before z - assert_before("x", "y", &symbols); - assert_before("func2", "y", &symbols); + // assert_eq!(problems, vec![]); - assert_before("func1", "z", &symbols); - assert_before("y", "z", &symbols); - } + // assert_eq!( + // output, + // Out { + // locals: vec!["fibonacci"], + // globals: vec![], + // variants: vec![], + // calls: vec!["fibonacci"], + // tail_call: None + // } + // .into() + // ); + //} - fn assert_before(before: &str, after: &str, symbols: &Vec) { - assert_ne!(before, after); + //#[test] + //fn can_tail_call() { + // // TODO check the global params - make sure this + // // is considered a tail call, even though it only + // // calls itself from one branch! + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // factorial = \num -> + // factorialHelp num 0 - let before_symbol = sym(before); - let after_symbol = sym(after); - let before_index = symbols - .iter() - .position(|symbol| symbol == &before_symbol) - .unwrap_or_else(|| { - panic!( - "error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", - before, - after, - sym(before), - symbols - ) - }); - let after_index = symbols - .iter() - .position(|symbol| symbol == &after_symbol) - .unwrap_or_else(|| { - panic!( - "error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", - before, - after, - sym(after), - symbols - ) - }); + // factorialHelp = \num total -> + // if num == 0 then + // total + // else + // factorialHelp (num - 1) (total * num) - if before_index == after_index { - panic!( - "error in assert_before({:?}, {:?}): both were at index {} in {:?}", - before, after, after_index, symbols - ); - } else if before_index > after_index { - panic!("error in assert_before: {:?} appeared *after* {:?} (not before, as expected) in {:?}", before, after, symbols); - } - } + // factorial 9 + // "# + // )); - fn assigned_symbols(expr: Expr) -> Vec { - match expr { - Assign(assignments, _) => { - assignments.into_iter().map(|(pattern, _)| { - match pattern.value { - Identifier(symbol) => { - symbol - }, - _ => { - panic!("Called assigned_symbols passing an Assign expr with non-Identifier patterns!"); - } - } - }).collect() - }, - _ => { - panic!("Called assigned_symbols passing a non-Assign expr!"); - } - } - } + // assert_eq!(problems, vec![]); - // CIRCULAR ASSIGNMENT + // assert_eq!( + // output, + // Out { + // locals: vec!["factorial", "factorialHelp"], + // globals: vec![], + // variants: vec![], + // calls: vec!["factorial", "factorialHelp"], + // tail_call: None + // } + // .into() + // ); + //} - #[test] - fn circular_assignment() { - let (_, _, problems, _) = can_expr(indoc!( - r#" - c = d + 3 - b = 2 + c - d = a + 7 - a = b + 1 + //#[test] + //fn transitively_used_function() { + // // This should report that neither a nor b are unused, + // // since if you never call a function but do return it, that's okay! + // let (_, output, problems, _) = can_expr(indoc!( + // r#" + // a = \_ -> 42 + // b = a - 2 + d - "# - )); + // b + // "# + // )); - assert_eq!( - problems, - vec![Problem::CircularAssignment(vec![ - // c should appear first because it's assigned first in the original expression. - loc(unqualified("c")), - loc(unqualified("d")), - loc(unqualified("a")), - loc(unqualified("b")), - ])] - ); - } + // assert_eq!(problems, Vec::new()); - #[test] - fn always_function() { - // There was a bug where this reported UnusedArgument("val") - // since it was used only in the returned function only. - let (_, _, problems, _) = can_expr(indoc!( - r#" - \val -> \_ -> val - "# - )); + // assert_eq!( + // output, + // Out { + // locals: vec!["a", "b"], + // globals: vec![], + // variants: vec![], + // calls: vec![], + // tail_call: None + // } + // .into() + // ); + //} - assert_eq!(problems, vec![]); - } + //// ASSIGNMENT REORDERING - // TODO verify that Apply handles output.references.calls correctly + //#[test] + //fn reorder_assignments() { + // let (expr, output, problems, _) = can_expr(indoc!( + // r#" + // increment = \arg -> arg + 1 + // z = (increment 2) + y + // y = x + 1 + // x = 9 - // UNSUPPORTED PATTERNS + // z * 3 + // "# + // )); - // TODO verify that in closures and assignments, you can't assign to int/string/underscore/etc + // assert_eq!(problems, vec![]); - // OPERATOR PRECEDENCE + // assert_eq!( + // output, + // Out { + // locals: vec!["increment", "x", "y", "z"], + // globals: vec![], + // variants: vec![], + // calls: vec!["increment"], + // tail_call: None + // } + // .into() + // ); - // fn parse_with_precedence(input: &str) -> Result<(Expr, &str), easy::Errors> { - // parse_without_loc(input) - // .map(|(expr, remaining)| (expr::apply_precedence_and_associativity(loc(expr)).unwrap().value, remaining)) + // let symbols = assigned_symbols(expr); + + // // In code gen, for everything to have been set before it gets read, + // // the following must be true about when things are assigned: + // // + // // x must be assigned before y + // // y must be assigned before z + // // + // // The order of the increment function doesn't matter. + // assert_before("x", "y", &symbols); + // assert_before("y", "z", &symbols); + //} + + //#[test] + //fn reorder_closed_over_assignments() { + // let (expr, output, problems, _) = can_expr(indoc!( + // r#" + // z = func1 x + // x = 9 + // y = func2 3 + // func1 = \arg -> func2 arg + y + // func2 = \arg -> arg + x + + // z + // "# + // )); + + // assert_eq!(problems, vec![]); + + // assert_eq!( + // output, + // Out { + // locals: vec!["func1", "func2", "x", "y", "z"], + // globals: vec![], + // variants: vec![], + // calls: vec!["func1", "func2"], + // tail_call: None + // } + // .into() + // ); + + // let symbols = assigned_symbols(expr); + + // // In code gen, for everything to have been set before it gets read, + // // the following must be true about when things are assigned: + // // + // // x and func2 must be assigned (in either order) before y + // // y and func1 must be assigned (in either order) before z + // assert_before("x", "y", &symbols); + // assert_before("func2", "y", &symbols); + + // assert_before("func1", "z", &symbols); + // assert_before("y", "z", &symbols); + //} + + //fn assert_before(before: &str, after: &str, symbols: &Vec) { + // assert_ne!(before, after); + + // let before_symbol = sym(before); + // let after_symbol = sym(after); + // let before_index = symbols + // .iter() + // .position(|symbol| symbol == &before_symbol) + // .unwrap_or_else(|| { + // panic!( + // "error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", + // before, + // after, + // sym(before), + // symbols + // ) + // }); + // let after_index = symbols + // .iter() + // .position(|symbol| symbol == &after_symbol) + // .unwrap_or_else(|| { + // panic!( + // "error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", + // before, + // after, + // sym(after), + // symbols + // ) + // }); + + // if before_index == after_index { + // panic!( + // "error in assert_before({:?}, {:?}): both were at index {} in {:?}", + // before, after, after_index, symbols + // ); + // } else if before_index > after_index { + // panic!("error in assert_before: {:?} appeared *after* {:?} (not before, as expected) in {:?}", before, after, symbols); + // } + //} + + //fn assigned_symbols(expr: Expr) -> Vec { + // match expr { + // Assign(assignments, _) => { + // assignments.into_iter().map(|(pattern, _)| { + // match pattern.value { + // Identifier(symbol) => { + // symbol + // }, + // _ => { + // panic!("Called assigned_symbols passing an Assign expr with non-Identifier patterns!"); + // } + // } + // }).collect() + // }, + // _ => { + // panic!("Called assigned_symbols passing a non-Assign expr!"); + // } + // } + //} + + //// CIRCULAR ASSIGNMENT + + //#[test] + //fn circular_assignment() { + // let (_, _, problems, _) = can_expr(indoc!( + // r#" + // c = d + 3 + // b = 2 + c + // d = a + 7 + // a = b + 1 + + // 2 + d + // "# + // )); + + // assert_eq!( + // problems, + // vec![Problem::CircularAssignment(vec![ + // // c should appear first because it's assigned first in the original expression. + // loc(unqualified("c")), + // loc(unqualified("d")), + // loc(unqualified("a")), + // loc(unqualified("b")), + // ])] + // ); + //} + + //#[test] + //fn always_function() { + // // There was a bug where this reported UnusedArgument("val") + // // since it was used only in the returned function only. + // let (_, _, problems, _) = can_expr(indoc!( + // r#" + // \val -> \_ -> val + // "# + // )); + + // assert_eq!(problems, vec![]); + //} + + //// TODO verify that Apply handles output.references.calls correctly + + //// UNSUPPORTED PATTERNS + + //// TODO verify that in closures and assignments, you can't assign to int/string/underscore/etc + + //// OPERATOR PRECEDENCE + + //// fn parse_with_precedence(input: &str) -> Result<(Expr, &str), easy::Errors> { + //// parse_without_loc(input) + //// .map(|(expr, remaining)| (expr::apply_precedence_and_associativity(loc(expr)).unwrap().value, remaining)) + //// } + + //// #[test] + //// fn two_operator_precedence() { + //// assert_eq!( + //// parse_with_precedence("x + y * 5"), + //// Ok((Operator( + //// loc_box(var("x")), + //// loc(Plus), + //// loc_box( + //// Operator( + //// loc_box(var("y")), + //// loc(Star), + //// loc_box(Int(5)) + //// ) + //// ), + //// ), + //// "")) + //// ); + + //// assert_eq!( + //// parse_with_precedence("x * y + 5"), + //// Ok((Operator( + //// loc_box( + //// Operator( + //// loc_box(var("x")), + //// loc(Star), + //// loc_box(var("y")), + //// ) + //// ), + //// loc(Plus), + //// loc_box(Int(5)) + //// ), + //// "")) + //// ); + //// } + + //// #[test] + //// fn compare_and() { + //// assert_eq!( + //// parse_with_precedence("x > 1 || True"), + //// Ok((Operator( + //// loc_box( + //// Operator( + //// loc_box(var("x")), + //// loc(GreaterThan), + //// loc_box(Int(1)) + //// ) + //// ), + //// loc(Or), + //// loc_box(ApplyVariant(vname("True"), None)) + //// ), + //// "")) + //// ); + //// } + + //// HELPERS + + //#[test] + //fn sort_cyclic_idents() { + // let assigned_idents = unqualifieds(vec!["blah", "c", "b", "d", "a"]); + + // assert_eq!( + // can::sort_cyclic_idents( + // loc_unqualifieds(vec!["a", "b", "c", "d"]), + // &mut assigned_idents.iter() + // ), + // loc_unqualifieds(vec!["c", "d", "a", "b"]) + // ); + //} + // + // + //// STRING LITERALS + + // + // #[test] + // fn string_with_valid_unicode_escapes() { + // expect_parsed_str("x\u{00A0}x", r#""x\u{00A0}x""#); + // expect_parsed_str("x\u{101010}x", r#""x\u{101010}x""#); // } // #[test] - // fn two_operator_precedence() { - // assert_eq!( - // parse_with_precedence("x + y * 5"), - // Ok((Operator( - // loc_box(var("x")), - // loc(Plus), - // loc_box( - // Operator( - // loc_box(var("y")), - // loc(Star), - // loc_box(Int(5)) - // ) - // ), - // ), - // "")) - // ); - - // assert_eq!( - // parse_with_precedence("x * y + 5"), - // Ok((Operator( - // loc_box( - // Operator( - // loc_box(var("x")), - // loc(Star), - // loc_box(var("y")), - // ) - // ), - // loc(Plus), - // loc_box(Int(5)) - // ), - // "")) + // fn string_with_too_large_unicode_escape() { + // // Should be too big - max size should be 10FFFF. + // // (Rust has this restriction. I assume it's a good idea.) + // assert_malformed_str( + // r#""abc\u{110000}def""#, + // vec![Located::new(0, 7, 0, 12, Problem::UnicodeCodePointTooLarge)], // ); // } // #[test] - // fn compare_and() { - // assert_eq!( - // parse_with_precedence("x > 1 || True"), - // Ok((Operator( - // loc_box( - // Operator( - // loc_box(var("x")), - // loc(GreaterThan), - // loc_box(Int(1)) - // ) - // ), - // loc(Or), - // loc_box(ApplyVariant(vname("True"), None)) - // ), - // "")) + // fn string_with_no_unicode_digits() { + // // No digits specified + // assert_malformed_str( + // r#""blah\u{}foo""#, + // vec![Located::new(0, 5, 0, 8, Problem::NoUnicodeDigits)], // ); // } - // HELPERS + // #[test] + // fn string_with_no_unicode_opening_brace() { + // // No opening curly brace. It can't be sure if the closing brace + // // was intended to be a closing brace for the unicode escape, so + // // report that there were no digits specified. + // assert_malformed_str( + // r#""abc\u00A0}def""#, + // vec![Located::new(0, 4, 0, 5, Problem::NoUnicodeDigits)], + // ); + // } - #[test] - fn sort_cyclic_idents() { - let assigned_idents = unqualifieds(vec!["blah", "c", "b", "d", "a"]); + // #[test] + // fn string_with_no_unicode_closing_brace() { + // // No closing curly brace + // assert_malformed_str( + // r#""blah\u{stuff""#, + // vec![Located::new(0, 5, 0, 12, Problem::MalformedEscapedUnicode)], + // ); + // } - assert_eq!( - canonicalize::sort_cyclic_idents( - loc_unqualifieds(vec!["a", "b", "c", "d"]), - &mut assigned_idents.iter() - ), - loc_unqualifieds(vec!["c", "d", "a", "b"]) - ); - } + // #[test] + // fn string_with_no_unicode_braces() { + // // No curly braces + // assert_malformed_str( + // r#""zzzz\uzzzzz""#, + // vec![Located::new(0, 5, 0, 6, Problem::NoUnicodeDigits)], + // ); + // } + + // #[test] + // fn string_with_interpolation_at_start() { + // let input = indoc!( + // r#" + // "\(abc)defg" + // "# + // ); + // let (args, ret) = (vec![("", Located::new(0, 2, 0, 4, Var("abc")))], "defg"); + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), + // actual + // ); + // } + + // #[test] + // fn string_with_interpolation_at_end() { + // let input = indoc!( + // r#" + // "abcd\(efg)" + // "# + // ); + // let (args, ret) = (vec![("abcd", Located::new(0, 6, 0, 8, Var("efg")))], ""); + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), + // actual + // ); + // } + + // #[test] + // fn string_with_interpolation_in_middle() { + // let input = indoc!( + // r#" + // "abc\(defg)hij" + // "# + // ); + // let (args, ret) = (vec![("abc", Located::new(0, 5, 0, 8, Var("defg")))], "hij"); + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), + // actual + // ); + // } + + // #[test] + // fn string_with_two_interpolations_in_middle() { + // let input = indoc!( + // r#" + // "abc\(defg)hi\(jkl)mn" + // "# + // ); + // let (args, ret) = ( + // vec![ + // ("abc", Located::new(0, 5, 0, 8, Var("defg"))), + // ("hi", Located::new(0, 14, 0, 16, Var("jkl"))), + // ], + // "mn", + // ); + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), + // actual + // ); + // } + + // #[test] + // fn string_with_four_interpolations() { + // let input = indoc!( + // r#" + // "\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)" + // "# + // ); + // let (args, ret) = ( + // vec![ + // ("", Located::new(0, 2, 0, 4, Var("abc"))), + // ("def", Located::new(0, 11, 0, 13, Var("ghi"))), + // ("jkl", Located::new(0, 20, 0, 22, Var("mno"))), + // ("pqrs", Located::new(0, 30, 0, 32, Var("tuv"))), + // ], + // "", + // ); + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), + // actual + // ); + // } + + // #[test] + // fn string_with_escaped_interpolation() { + // assert_parses_to( + // // This should NOT be string interpolation, because of the \\ + // indoc!( + // r#" + // "abcd\\(efg)hij" + // "# + // ), + // Str(r#"abcd\(efg)hij"#.into()), + // ); + // } + // + + // #[test] + // fn string_without_escape() { + // expect_parsed_str("a", r#""a""#); + // expect_parsed_str("ab", r#""ab""#); + // expect_parsed_str("abc", r#""abc""#); + // expect_parsed_str("123", r#""123""#); + // expect_parsed_str("abc123", r#""abc123""#); + // expect_parsed_str("123abc", r#""123abc""#); + // expect_parsed_str("123 abc 456 def", r#""123 abc 456 def""#); + // } + + // #[test] + // fn string_with_special_escapes() { + // expect_parsed_str(r#"x\x"#, r#""x\\x""#); + // expect_parsed_str(r#"x"x"#, r#""x\"x""#); + // expect_parsed_str("x\tx", r#""x\tx""#); + // expect_parsed_str("x\rx", r#""x\rx""#); + // expect_parsed_str("x\nx", r#""x\nx""#); + // } + + // fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec>) { + // let arena = Bump::new(); + // let actual = parse_with(&arena, input); + + // assert_eq!( + // Ok(Expr::MalformedStr(expected_probs.into_boxed_slice())), + // actual + // ); + // } + // + // TODO test what happens when interpolated strings contain 1+ malformed idents + // TODO test hex/oct/binary conversion to numbers } diff --git a/tests/test_deprecated_parse.rs b/tests/test_deprecated_parse.rs deleted file mode 100644 index 39fd798596..0000000000 --- a/tests/test_deprecated_parse.rs +++ /dev/null @@ -1,1449 +0,0 @@ -#[macro_use] -extern crate pretty_assertions; -#[macro_use] -extern crate indoc; -extern crate combine; -extern crate roc; - -mod helpers; - -#[cfg(test)] -mod test_parse { - use combine::easy; - use combine::stream::state::State; - use combine::Parser; - use helpers::{loc, loc_box, parse_without_loc, standalone_expr, zero_loc_expr}; - use roc::deprecated::parse_state::IndentablePosition; - use roc::expr::Expr::*; - use roc::expr::Pattern::*; - use roc::expr::{Expr, Ident, VariantName}; - use roc::operator::Operator::*; - use roc::region::Located; - - // PARSE TEST HELPERS - - fn easy_parse_standalone( - actual_str: &str, - ) -> Result<(Expr, &str), easy::Errors> { - let parse_state = State::with_positioner(actual_str, IndentablePosition::default()); - - standalone_expr() - .easy_parse(parse_state) - .map(|(expr, state)| (expr, state.input)) - } - - fn assert_fully_parses(actual_str: &str, expected_expr: Expr) { - assert_eq!( - Ok((expected_expr, "".to_string())), - parse_without_loc(actual_str) - ); - } - - fn var(name: &str) -> Expr { - Var(Ident::Unqualified(name.to_string())) - } - - fn call_by_name(name: &str, args: Vec>) -> Expr { - Apply( - loc_box(Var(raw(name))), - args.into_iter() - .map(|loc_expr| loc(zero_loc_expr(loc_expr.value))) - .collect(), - ) - } - - // LIST LITERALS - - #[test] - fn empty_list() { - assert_fully_parses( - indoc!( - r#" - [] - "# - ), - EmptyList, - ); - } - - #[test] - fn single_list() { - assert_fully_parses( - indoc!( - r#" - [ 1 ] - "# - ), - List(vec![loc(Int(1))]), - ); - } - - #[test] - fn multi_list() { - assert_fully_parses( - indoc!( - r#" - [1 , 2,3] - "# - ), - List(vec![loc(Int(1)), loc(Int(2)), loc(Int(3))]), - ); - } - - #[test] - fn list_as_arg() { - assert_fully_parses( - indoc!( - r#" - func [ 1, 2,3 ] - "# - ), - call_by_name( - "func", - vec![loc(List(vec![loc(Int(1)), loc(Int(2)), loc(Int(3))]))], - ), - ); - } - - #[test] - fn list_with_function_call() { - assert_fully_parses( - indoc!( - r#" - [ foo 1 ] - "# - ), - List(vec![loc(call_by_name("foo", vec![loc(Int(1))]))]), - ); - } - - #[test] - fn list_with_multiple_function_calls() { - assert_fully_parses( - indoc!( - r#" - [ foo 1, bar 2 ] - "# - ), - List(vec![ - loc(call_by_name("foo", vec![loc(Int(1))])), - loc(call_by_name("bar", vec![loc(Int(2))])), - ]), - ); - } - - // STRING LITERALS - - fn expect_parsed_str<'a>(expected_str: &'a str, actual_str: &'a str) { - assert_fully_parses(actual_str, Expr::Str(expected_str.to_string())) - } - - fn expect_parsed_str_error<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - #[test] - fn empty_string() { - assert_fully_parses( - indoc!( - r#" - "" - "# - ), - EmptyStr, - ); - } - - #[test] - fn string_without_escape() { - expect_parsed_str("a", "\"a\""); - expect_parsed_str("ab", "\"ab\""); - expect_parsed_str("abc", "\"abc\""); - expect_parsed_str("123", "\"123\""); - expect_parsed_str("abc123", "\"abc123\""); - expect_parsed_str("123abc", "\"123abc\""); - expect_parsed_str("123 abc 456 def", "\"123 abc 456 def\""); - } - - #[test] - fn string_with_special_escapes() { - expect_parsed_str("x\\x", "\"x\\\\x\""); - expect_parsed_str("x\"x", "\"x\\\"x\""); - expect_parsed_str("x\tx", "\"x\\tx\""); - expect_parsed_str("x\rx", "\"x\\rx\""); - expect_parsed_str("x\nx", "\"x\\nx\""); - } - - #[test] - fn string_with_escaped_interpolation() { - assert_fully_parses( - // This should NOT be string interpolation, because of the \\ - indoc!( - r#" - "abcd\\(efg)hij" - "# - ), - Expr::Str(r#"abcd\(efg)hij"#.to_string()), - ); - } - - fn raw(string: &str) -> Ident { - Ident::Unqualified(string.to_string()) - } - - #[test] - fn string_with_interpolation_at_end() { - assert_fully_parses( - indoc!( - r#" - "abcd\(efg)" - "# - ), - InterpolatedStr(vec![("abcd".to_string(), loc(raw("efg")))], "".to_string()), - ); - } - - #[test] - fn string_with_interpolation() { - assert_fully_parses( - indoc!( - r#" - "abcd\(efg)hij" - "# - ), - InterpolatedStr( - vec![("abcd".to_string(), loc(raw("efg")))], - "hij".to_string(), - ), - ); - } - - #[test] - fn string_with_single_qoute() { - // This shoud NOT be escaped in a string. - expect_parsed_str("x'x", r#""x'x""#); - } - - #[test] - fn string_with_valid_unicode_escapes() { - expect_parsed_str("x\u{00A0}x", r#""x\u{00A0}x""#); - expect_parsed_str("x\u{101010}x", r#""x\u{101010}x""#); - } - - #[test] - fn string_with_invalid_unicode_escapes() { - // Should be too big - max size should be 10FFFF. - // (Rust has this restriction. I assume it's a good idea.) - expect_parsed_str_error(r#""x\u{110000}x""#); - - // No digits specified - expect_parsed_str_error(r#""x\u{}x""#); - - // No opening curly brace - expect_parsed_str_error(r#""x\u}x""#); - - // No closing curly brace - expect_parsed_str_error(r#""x\u{x""#); - - // No curly braces - expect_parsed_str_error(r#""x\ux""#); - } - - // // CHAR LITERALS - - fn expect_parsed_char<'a>(expected: char, actual_str: &'a str) { - assert_fully_parses(actual_str, Char(expected)) - } - - fn expect_parsed_char_error<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - #[test] - fn empty_char() { - if easy_parse_standalone("''").is_ok() { - panic!("Expected parse error"); - } - } - - #[test] - fn char_without_escape() { - expect_parsed_char('a', "'a'"); - expect_parsed_char('1', "'1'"); - expect_parsed_char(' ', "' '"); - } - - #[test] - fn char_with_special_escapes() { - expect_parsed_char('\\', r#"'\\'"#); - expect_parsed_char('\'', r#"'\''"#); - expect_parsed_char('\t', r#"'\t'"#); - expect_parsed_char('\r', r#"'\r'"#); - expect_parsed_char('\n', r#"'\n'"#); - } - - #[test] - fn char_with_double_qoute() { - // This shoud NOT be escaped in a char. - expect_parsed_char('"', r#"'"'"#); - } - - #[test] - fn char_with_unicode_escapes() { - expect_parsed_char('\u{00A0}', r#"'\u{00A0}'"#); - expect_parsed_char('\u{101010}', r#"'\u{101010}'"#); - } - - #[test] - fn char_with_invalid_unicode_escapes() { - // Should be too big - max size should be 10FFFF. - // (Rust has this rechariction. I assume it's a good idea.) - expect_parsed_char_error(r#""x\u{110000}x""#); - - // No digits specified - expect_parsed_char_error(r#""x\u{}x""#); - - // No opening curly brace - expect_parsed_char_error(r#""x\u}x""#); - - // No closing curly brace - expect_parsed_char_error(r#""x\u{x""#); - - // No curly braces - expect_parsed_char_error(r#""x\ux""#); - } - - // // NUMBER LITERALS - - fn expect_parsed_float<'a>(expected: f64, actual: &str) { - assert_eq!( - Ok((Float(expected), "".to_string())), - parse_without_loc(actual) - ); - } - - fn expect_parsed_int<'a>(expected: i64, actual: &str) { - assert_eq!( - Ok((Int(expected), "".to_string())), - parse_without_loc(actual) - ); - } - - #[test] - fn positive_int() { - expect_parsed_int(1234, "1234"); - } - - #[test] - fn negative_int() { - expect_parsed_int(-1234, "-1234"); - } - - #[test] - fn positive_float() { - expect_parsed_float(123.45, "123.45"); - expect_parsed_float(42.00, "42.00"); - } - - #[test] - fn negative_float() { - expect_parsed_float(-1234.567, "-1234.567"); - expect_parsed_float(-192.0, "-192.0"); - } - - #[test] - fn ints_with_underscores() { - expect_parsed_int(987654321, "987_6_5_432_1"); - expect_parsed_int(-1234567890, "-1_234_567_890"); - } - - #[test] - fn fracs_with_spaces() { - expect_parsed_float(-1234.567, "-1_23_4.567"); - expect_parsed_float(-192.0, "-19_2.0"); - expect_parsed_float(123.45, "1_2_3.45"); - expect_parsed_float(42.00, "4_2.00"); - } - - #[test] - fn single_operator_with_var() { - assert_eq!( - // It's important that this isn't mistaken for - // a declaration like (x = 1) - parse_without_loc("x == 1"), - Ok(( - Operator(loc_box(var("x")), loc(Equals), loc_box(Int(1))), - "".to_string() - )) - ); - } - - #[test] - fn comparison_operators() { - assert_eq!( - parse_without_loc("x >= 0"), - Ok(( - Operator(loc_box(var("x")), loc(GreaterThanOrEq), loc_box(Int(0))), - "".to_string() - )) - ); - assert_eq!( - parse_without_loc("x > 0"), - Ok(( - Operator(loc_box(var("x")), loc(GreaterThan), loc_box(Int(0))), - "".to_string() - )) - ); - assert_eq!( - parse_without_loc("x <= 0"), - Ok(( - Operator(loc_box(var("x")), loc(LessThanOrEq), loc_box(Int(0))), - "".to_string() - )) - ); - assert_eq!( - parse_without_loc("x < 0"), - Ok(( - Operator(loc_box(var("x")), loc(LessThan), loc_box(Int(0))), - "".to_string() - )) - ); - } - - #[test] - fn single_operator() { - match parse_without_loc("1234 + 567") { - Ok((Operator(v1, op, v2), string)) => { - assert_eq!(string, "".to_string()); - assert_eq!((*v1).value, Int(1234)); - assert_eq!(op.value, Plus); - assert_eq!((*v2).value, Int(567)); - } - - _ => panic!("Expression didn't parse"), - } - } - - #[test] - fn multiple_operators() { - assert_eq!( - parse_without_loc("1 + 2 ~/ 3"), - Ok(( - Operator( - loc_box(Int(1)), - loc(Plus), - loc_box(Operator(loc_box(Int(2)), loc(TildeSlash), loc_box(Int(3)))) - ), - "".to_string() - )) - ); - } - - #[test] - fn operators_with_parens() { - assert_eq!( - parse_without_loc("(1 + 2)"), - Ok(( - Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2))), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(1 - 2)"), - Ok(( - Operator(loc_box(Int(1)), loc(Minus), loc_box(Int(2))), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(1 + 2 * 3)"), - Ok(( - Operator( - loc_box(Int(1)), - loc(Plus), - loc_box(Operator(loc_box(Int(2)), loc(Star), loc_box(Int(3)))) - ), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("1 + (2 * 3)"), - Ok(( - Operator( - loc_box(Int(1)), - loc(Plus), - loc_box(Operator(loc_box(Int(2)), loc(Star), loc_box(Int(3)))) - ), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(1 + 2) * 3"), - Ok(( - Operator( - loc_box(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2)))), - loc(Star), - loc_box(Int(3)), - ), - "".to_string() - )) - ); - } - - // VAR - - fn expect_parsed_var<'a>(expected_str: &'a str) { - assert_eq!( - Ok((var(expected_str), "".to_string())), - parse_without_loc(expected_str) - ); - } - - fn expect_parsed_var_error<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - #[test] - fn basic_var() { - expect_parsed_var("x"); - expect_parsed_var("x2"); - expect_parsed_var("foo"); - expect_parsed_var("foo2furious"); - } - - #[test] - fn invalid_var() { - expect_parsed_var_error("5x"); - expect_parsed_var_error("2foo2furious"); - expect_parsed_var_error("2Foo2Furious"); - } - - #[test] - fn var_with_parens() { - assert_eq!(parse_without_loc("( x)"), Ok((var("x"), "".to_string()))); - assert_eq!(parse_without_loc("(x )"), Ok((var("x"), "".to_string()))); - assert_eq!(parse_without_loc("( x )"), Ok((var("x"), "".to_string()))); - } - - fn vname(name: &str) -> VariantName { - VariantName::Unqualified(name.to_string()) - } - - // APPLY - - fn expect_parsed_apply<'a>(parse_str: &'a str, expr1: Expr, expr2: Expr) { - assert_eq!( - Ok((Apply(loc_box(expr1), vec![loc(expr2)]), "".to_string())), - parse_without_loc(parse_str) - ); - } - - fn expect_parsed_apply_error<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - #[test] - fn apply() { - expect_parsed_apply("(x) y", var("x"), var("y")); - - expect_parsed_apply("(x 5) y", call_by_name("x", vec![loc(Int(5))]), var("y")); - - expect_parsed_apply( - "(x 5) (y 6)", - call_by_name("x", vec![loc(Int(5))]), - call_by_name("y", vec![loc(Int(6))]), - ); - - expect_parsed_apply("(5) (6)", Int(5), Int(6)); - } - - #[test] - fn invalid_apply() { - expect_parsed_apply_error("(x 5)y"); - } - - // TODO write a bunch of parenthetical expression tests - try to repeat - // all of the above tests except with parens too! - // Also, verify them all with variable paren counts; ((foo)) should work. - - // CLOSURE - - #[test] - fn single_arg_closure() { - assert_fully_parses( - indoc!( - r#" - \a -> b - "# - ), - Closure(vec![loc(Identifier("a".to_string()))], loc_box(var("b"))), - ); - } - - #[test] - fn multi_arg_closure() { - assert_fully_parses( - indoc!( - r#" - \a b -> c - "# - ), - Closure( - vec![ - loc(Identifier("a".to_string())), - loc(Identifier("b".to_string())), - ], - loc_box(var("c")), - ), - ); - } - - #[test] - fn assign_closure() { - assert_fully_parses( - indoc!( - r#" - foo = \a b -> c - - foo - "# - ), - Assign( - vec![( - loc(Identifier("foo".to_string())), - loc(Closure( - vec![ - loc(Identifier("a".to_string())), - loc(Identifier("b".to_string())), - ], - loc_box(var("c")), - )), - )], - loc_box(var("foo")), - ), - ); - } - - #[test] - fn call_named_closure() { - assert_fully_parses( - indoc!( - r#" - x = \a b -> 5 - - foo 1 - "# - ), - Assign( - vec![( - loc(Identifier("x".to_string())), - loc(Closure( - vec![ - loc(Identifier("a".to_string())), - loc(Identifier("b".to_string())), - ], - loc_box(Int(5)), - )), - )], - loc_box(call_by_name("foo", vec![loc(Int(1))])), - ), - ); - } - - #[test] - fn multiple_assign_call_closure() { - assert_fully_parses( - indoc!( - r#" - foo = \a b -> 7 - bar = 5 - - baz 1 - "# - ), - Assign( - vec![ - ( - loc(Identifier("foo".to_string())), - loc(Closure( - vec![ - loc(Identifier("a".to_string())), - loc(Identifier("b".to_string())), - ], - loc_box(Int(7)), - )), - ), - (loc(Identifier("bar".to_string())), loc(Int(5))), - ], - loc_box(call_by_name("baz", vec![loc(Int(1))])), - ), - ); - } - - // FUNC - - fn expect_parsed_func<'a>(parse_str: &'a str, func_str: &'a str, args: Vec>) { - assert_eq!( - Ok((call_by_name(func_str, args), "".to_string())), - parse_without_loc(parse_str) - ); - } - - fn expect_parsed_func_syntax_problem<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - fn expect_parsed_func_error<'a>(actual_str: &'a str) { - assert!( - parse_without_loc(actual_str).is_err(), - "Expected parsing error" - ); - } - - #[test] - fn single_arg_func() { - expect_parsed_func("f 1", "f", vec![loc(Int(1))]); - expect_parsed_func("foo bar", "foo", vec![loc(var("bar"))]); - expect_parsed_func("foo \"hi\"", "foo", vec![loc(Str("hi".to_string()))]); - } - - #[test] - fn multi_arg_func() { - expect_parsed_func( - "f 1 23 456", - "f", - vec![loc(Int(1)), loc(Int(23)), loc(Int(456))], - ); - expect_parsed_func("foo bar 'z'", "foo", vec![loc(var("bar")), loc(Char('z'))]); - expect_parsed_func( - "foo \"hi\" 1 blah", - "foo", - vec![loc(Str("hi".to_string())), loc(Int(1)), loc(var("blah"))], - ); - } - - #[test] - fn multi_arg_func_with_parens() { - expect_parsed_func( - "f (1) 23 456", - "f", - vec![loc(Int(1)), loc(Int(23)), loc(Int(456))], - ); - expect_parsed_func( - "foo bar ('z')", - "foo", - vec![loc(var("bar")), loc(Char('z'))], - ); - expect_parsed_func( - "foo 1 (bar \"hi\") 2 (blah)", - "foo", - vec![ - loc(Int(1)), - loc(call_by_name("bar", vec![loc(Str("hi".to_string()))])), - loc(Int(2)), - loc(var("blah")), - ], - ); - } - - #[test] - fn multiline_func() { - expect_parsed_func("f\n 1", "f", vec![loc(Int(1))]); - expect_parsed_func( - "foo bar\n 'z'", - "foo", - vec![loc(var("bar")), loc(Char('z'))], - ); - expect_parsed_func( - "foo \"hi\"\n 1\n blah", - "foo", - vec![loc(Str("hi".to_string())), loc(Int(1)), loc(var("blah"))], - ); - } - - #[test] - fn func_with_operator() { - assert_eq!( - parse_without_loc("f 5 + 6"), - Ok(( - Operator( - loc_box(call_by_name("f", vec![loc(Int(5))],)), - loc(Plus), - loc_box(Int(6)) - ), - "".to_string() - )) - ); - } - - #[test] - fn func_with_operator_and_multiple_args() { - assert_eq!( - parse_without_loc("f 1 2 3 + 6"), - Ok(( - Operator( - loc_box(call_by_name( - "f", - vec![loc(Int(1)), loc(Int(2)), loc(Int(3))], - )), - loc(Plus), - loc_box(Int(6)) - ), - "".to_string() - )) - ); - } - - #[test] - fn invalid_func() { - expect_parsed_func_syntax_problem("1 f"); - expect_parsed_func_syntax_problem("(1 f)"); - } - - // PARENS - - #[test] - fn basic_parens() { - expect_parsed_int(1, "(1)"); - expect_parsed_int(-2, "((-2))"); - expect_parsed_str("a", "(\"a\")"); - expect_parsed_str("abc", "((\"abc\"))"); - expect_parsed_func("(f 1)", "f", vec![loc(Int(1))]); - expect_parsed_func("(foo bar)", "foo", vec![loc(var("bar"))]); - } - - #[test] - fn parens_with_spaces() { - expect_parsed_func("(a 1 )", "a", vec![loc(Int(1))]); - expect_parsed_func("( b \"y\")", "b", vec![loc(Str("y".to_string()))]); - expect_parsed_func("( c \"z\" )", "c", vec![loc(Str("z".to_string()))]); - } - - #[test] - fn invalid_parens_func() { - expect_parsed_func_error("(1 f"); - expect_parsed_func_error("(f 1"); - } - - // CASE - - #[test] - fn one_branch_case() { - assert_eq!( - parse_without_loc("case 1 when x then 2"), - Ok(( - Case( - loc_box(Int(1)), - vec![(loc(Identifier("x".to_string())), loc(Int(2)))] - ), - "".to_string() - )) - ); - } - - #[test] - fn case_matching_multi_arg_variant() { - assert_eq!( - parse_without_loc("case 1 when Foo bar baz then 2"), - Ok(( - Case( - loc_box(Int(1)), - vec![( - loc(Variant( - loc(vname("Foo")), - Some(vec![ - loc(Identifier("bar".to_string())), - loc(Identifier("baz".to_string())) - ]) - )), - loc(Int(2)) - )] - ), - "".to_string() - )) - ); - } - - #[test] - fn two_branch_case() { - assert_eq!( - parse_without_loc("case 1 when x then 2 when y then 3"), - Ok(( - Case( - loc_box(Int(1)), - vec![ - (loc(Identifier("x".to_string())), loc(Int(2))), - (loc(Identifier("y".to_string())), loc(Int(3))) - ] - ), - "".to_string() - )) - ); - } - - #[test] - fn two_branch_case_with_two_newlines() { - assert_eq!( - parse_without_loc("case a\n\n when b then 1\n\n when\n c then 2"), - Ok(( - Case( - loc_box(var("a")), - vec![ - (loc(Identifier("b".to_string())), loc(Int(1))), - (loc(Identifier("c".to_string())), loc(Int(2))), - ] - ), - "".to_string() - )) - ); - } - - #[test] - fn multi_newline_case_regression() { - assert_eq!( - parse_without_loc("a =\n case x\n when b then 1\n\n when c then 2\na"), - Ok(( - Assign( - vec![( - loc(Identifier("a".to_string())), - loc(Case( - loc_box(var("x")), - vec![ - (loc(Identifier("b".to_string())), loc(Int(1))), - (loc(Identifier("c".to_string())), loc(Int(2))), - ] - )) - )], - loc_box(var("a")) - ), - "".to_string() - )) - ); - } - - #[test] - fn case_with_two_newlines() { - assert_eq!( - parse_without_loc("case a\n\n when b then 1"), - Ok(( - Case( - loc_box(var("a")), - vec![(loc(Identifier("b".to_string())), loc(Int(1))),] - ), - "".to_string() - )) - ); - } - - #[test] - fn case_with_number_pattern() { - assert_eq!( - parse_without_loc("case 1 when 2 then 3"), - Ok(( - Case(loc_box(Int(1)), vec![(loc(IntLiteral(2)), loc(Int(3))),]), - "".to_string() - )) - ); - } - - #[test] - fn case_with_empty_variant() { - assert_eq!( - parse_without_loc("case 1 when Foo then 3"), - Ok(( - Case( - loc_box(Int(1)), - vec![(loc(Variant(loc(vname("Foo")), None)), loc(Int(3))),] - ), - "".to_string() - )) - ); - } - - #[test] - fn case_with_nonempty_variant() { - assert_eq!( - parse_without_loc("case 1 when Foo x then 3"), - Ok(( - Case( - loc_box(Int(1)), - vec![( - loc(Variant( - loc(vname("Foo")), - Some(vec![loc(Identifier("x".to_string()))]) - )), - loc(Int(3)) - ),] - ), - "".to_string() - )) - ); - } - - #[test] - fn case_with_two_branches_and_function_call() { - assert_eq!( - parse_without_loc("case 0 when 2 then foo 9 when 1 then bar 8"), - Ok(( - Case( - loc_box(Int(0)), - vec![ - ( - loc(IntLiteral(2)), - loc(call_by_name("foo", vec![loc(Int(9))])) - ), - ( - loc(IntLiteral(1)), - loc(call_by_name("bar", vec![loc(Int(8))])) - ), - ] - ), - "".to_string() - )) - ); - } - - // IF - - #[test] - fn indented_if() { - assert_eq!( - parse_without_loc("if 12345 then\n 54321\n else 1337"), - Ok(( - If(loc_box(Int(12345)), loc_box(Int(54321)), loc_box(Int(1337))), - "".to_string() - )) - ); - } - - #[test] - fn if_underscore_separated_number() { - assert_eq!( - parse_without_loc("if 12_34_5 then 5_4_32_1 else 1_3_37"), - Ok(( - If(loc_box(Int(12345)), loc_box(Int(54321)), loc_box(Int(1337))), - "".to_string() - )) - ); - } - - #[test] - fn single_line_if() { - assert_eq!( - parse_without_loc("if foo then 1 else 2"), - Ok(( - If(loc_box(var("foo")), loc_box(Int(1)), loc_box(Int(2))), - "".to_string() - )) - ); - } - - // INLINE COMMENT - - #[test] - fn inline_comment() { - assert_eq!( - parse_without_loc("if 12345 then # blah blah\n 54321 #whee!\n else 1337"), - Ok(( - If(loc_box(Int(12345)), loc_box(Int(54321)), loc_box(Int(1337))), - "".to_string() - )) - ); - } - - #[test] - fn inline_comment_in_assignment() { - assert_eq!( - parse_without_loc("foo = 1\n# comment\nbar"), - Ok(( - Assign( - vec![(loc(Identifier("foo".to_string())), loc(Int(1)))], - loc_box(var("bar")), - ), - "".to_string() - )) - ); - } - - #[test] - fn horizontal_line_comment() { - assert_eq!( - parse_without_loc("if 12345 then ##### Heading #####\n 54321 #whee!\n else 1337"), - Ok(( - If(loc_box(Int(12345)), loc_box(Int(54321)), loc_box(Int(1337))), - "".to_string() - )) - ); - } - - // BLOCK COMMENT - - #[test] - fn block_comment() { - assert_eq!( - parse_without_loc( - "if 12345### blah\n\nblah etc\nwhee #comment ###then\n 54321\n else 1337" - ), - Ok(( - If(loc_box(Int(12345)), loc_box(Int(54321)), loc_box(Int(1337))), - "".to_string() - )) - ); - } - - // VARIANT - - #[test] - fn basic_variant() { - assert_eq!( - parse_without_loc("Abc"), - Ok((ApplyVariant(vname("Abc"), None), "".to_string())) - ); - } - - #[test] - fn variant_with_one_arg() { - assert_eq!( - parse_without_loc("Bbc 1"), - Ok(( - ApplyVariant(vname("Bbc"), Some(vec![loc(Int(1))])), - "".to_string() - )) - ); - } - - #[test] - fn variant_with_two_args() { - assert_eq!( - parse_without_loc("Bbc 1 2"), - Ok(( - ApplyVariant(vname("Bbc"), Some(vec![loc(Int(1)), loc(Int(2))])), - "".to_string() - )) - ); - } - - #[test] - fn variant_regression() { - // Somehow parsing the variant "Abc" worked but "Foo" failed (?!) - assert_eq!( - parse_without_loc("F"), - Ok((ApplyVariant(vname("F"), None), "".to_string())) - ); - } - - // COMPLEX EXPRESSIONS - - #[test] - fn nested_let_variant() { - assert_eq!( - parse_without_loc("one = Abc\n\ntwo = Bar\n\none"), - Ok(( - Assign( - vec![ - ( - loc(Identifier("one".to_string())), - loc(ApplyVariant(vname("Abc"), None)), - ), - ( - loc(Identifier("two".to_string())), - loc(ApplyVariant(vname("Bar"), None)), - ) - ], - loc_box(var("one")) - ), - "".to_string() - )) - ); - } - - #[test] - fn complex_expressions() { - expect_parsed_apply( - "(x 5) (y + (f 6))", - call_by_name("x", vec![loc(Int(5))]), - Operator( - loc_box(var("y")), - loc(Plus), - loc_box(call_by_name("f", vec![loc(Int(6))])), - ), - ); - - assert_eq!( - parse_without_loc("(x 5)"), - Ok((call_by_name("x", vec![loc(Int(5))]), "".to_string())) - ); - - assert_eq!(parse_without_loc("(5)"), Ok((Int(5), "".to_string()))); - - assert_eq!( - parse_without_loc("((1905))"), - Ok((Int(1905), "".to_string())) - ); - - assert_eq!( - parse_without_loc("6 + (685)"), - Ok(( - Operator(loc_box(Int(6)), loc(Plus), loc_box(Int(685))), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("12 + 34"), - Ok(( - Operator(loc_box(Int(12)), loc(Plus), loc_box(Int(34))), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(51) + 19"), - Ok(( - Operator(loc_box(Int(51)), loc(Plus), loc_box(Int(19))), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(x 5) + 123"), - Ok(( - Operator( - loc_box(call_by_name("x", vec![loc(Int(5))])), - loc(Plus), - loc_box(Int(123)) - ), - "".to_string() - )) - ); - - assert_eq!( - parse_without_loc("(x 5) + (2 * y)"), - Ok(( - Operator( - loc_box(call_by_name("x", vec![loc(Int(5))])), - loc(Plus), - loc_box(Operator(loc_box(Int(2)), loc(Star), loc_box(var("y")))) - ), - "".to_string() - )) - ); - } - - // ASSIGN - - #[test] - fn assign_with_function_application() { - assert_eq!( - parse_without_loc("abc =\n y 1\n\nabc"), - Ok(( - Assign( - vec![( - loc(Identifier("abc".to_string())), - loc(call_by_name("y", vec![loc(Int(1))])) - )], - loc_box(var("abc")) - ), - "".to_string() - )) - ) - } - - #[test] - fn assign_returning_number() { - assert_eq!( - // let x = 5 in -10 - parse_without_loc("x = 5\n-10"), - Ok(( - Assign( - vec![(loc(Identifier("x".to_string())), loc(Int(5)))], - loc_box(Int(-10)) - ), - "".to_string() - )) - ); - - assert_eq!( - // let x = 5 in 10 - parse_without_loc("x=5\n-10"), - Ok(( - Assign( - vec![(loc(Identifier("x".to_string())), loc(Int(5)))], - loc_box(Int(-10)) - ), - "".to_string() - )) - ); - } - - #[test] - fn assign_with_operator() { - assert_eq!( - // let x = 5 + 10 in -20 - parse_without_loc("x =(5 + 10)\n-20"), - Ok(( - Assign( - vec![( - loc(Identifier("x".to_string())), - loc(Operator(loc_box(Int(5)), loc(Plus), loc_box(Int(10)))), - )], - loc_box(Int(-20)) - ), - "".to_string() - )) - ); - - assert_eq!( - // let x = 5 + 10 in -20 - parse_without_loc("x= 5 + 10\n-20"), - Ok(( - Assign( - vec![( - loc(Identifier("x".to_string())), - loc(Operator(loc_box(Int(5)), loc(Plus), loc_box(Int(10)))), - )], - loc_box(Int(-20)) - ), - "".to_string() - )) - ); - - assert_eq!( - // let x = 5 + 10 in -20 - parse_without_loc("x=5\n + 10\n-20"), - Ok(( - Assign( - vec![( - loc(Identifier("x".to_string())), - loc(Operator(loc_box(Int(5)), loc(Plus), loc_box(Int(10)))), - )], - loc_box(Int(-20)) - ), - "".to_string() - )) - ); - } - - #[test] - fn invalid_assign_returning_number() { - assert!( - parse_without_loc("x=5\n -10").is_err(), - "Expected parsing error" - ); - } - - #[test] - fn assign_multiple() { - assert_fully_parses( - indoc!( - r#" - x = 5 - y = 12 - z = 7 - 3 - "# - ), - Assign( - vec![ - (loc(Identifier("x".to_string())), loc(Int(5))), - (loc(Identifier("y".to_string())), loc(Int(12))), - (loc(Identifier("z".to_string())), loc(Int(7))), - ], - loc_box(Int(3)), - ), - ); - - assert_eq!( - // let x = 5 in let y = 12 in 3 - parse_without_loc("x = 5 - -3\ny = 12 + 7\n3 * -5"), - Ok(( - Assign( - vec![ - ( - loc(Identifier("x".to_string())), - loc(Operator(loc_box(Int(5)), loc(Minus), loc_box(Int(-3)))) - ), - ( - loc(Identifier("y".to_string())), - loc(Operator(loc_box(Int(12)), loc(Plus), loc_box(Int(7)))) - ) - ], - loc_box(Operator(loc_box(Int(3)), loc(Star), loc_box(Int(-5)))), - ), - "".to_string() - )) - ); - } - - #[test] - fn assign_returning_var() { - assert_eq!( - parse_without_loc("x=5\nx"), - Ok(( - Assign( - vec![(loc(Identifier("x".to_string())), loc(Int(5)))], - loc_box(var("x")) - ), - "".to_string() - )) - ); - } - - #[test] - fn bad_equals_indent_let() { - assert!( - parse_without_loc(" x=\n5\n\n5").is_err(), - "Expected parsing error" - ); - } - - #[test] - fn regression_on_calling_function_named_c() { - // This was broken because case-expressions were greedily consuming 'c' characters for "case" - assert_eq!( - parse_without_loc("f = \\x -> c 1\n\nf"), - Ok(( - Assign( - vec![( - loc(Identifier("f".to_string())), - loc(Closure( - vec![loc(Identifier("x".to_string()))], - loc_box(call_by_name("c", vec![loc(Int(1))])) - )), - )], - loc_box(var("f")) - ), - "".to_string() - )) - ); - } - - #[test] - fn regression_on_passing_arguments_named_i() { - // This was broken because if-expressions were greedily consuming 'i' characters for "if" - assert_eq!( - parse_without_loc("x i"), - Ok((call_by_name("x", vec![loc(var("i"))]), "".to_string())) - ); - } - -} diff --git a/tests/test_eval.rs b/tests/test_eval.rs deleted file mode 100644 index f891a6185a..0000000000 --- a/tests/test_eval.rs +++ /dev/null @@ -1,127 +0,0 @@ -// #[macro_use] extern crate pretty_assertions; -extern crate combine; -extern crate fraction; - -extern crate roc; - -#[cfg(test)] -mod test_eval { - // use roc::operator::Operator::*; - // use roc::expr::Pattern::*; - // use roc::expr::Expr::*; - // use roc::expr::{Expr, Raw}; - // use roc::eval; - // use roc::eval::Evaluated; - // use roc::region::{Located, Region}; - // use fraction::Fraction; - - // fn loc_box(val: T) -> Box> { - // Box::new(loc(val)) - // } - - // fn eval(expr: Expr) -> Evaluated { - // eval::eval(loc(expr)) - // } - - // fn loc(val: T) -> Located { - // Located::new(val, Region { - // start_line: 0, - // start_col: 0, - - // end_line: 0, - // end_col: 0, - // }) - // } - - // #[test] - // fn one_plus_one() { - // assert_eq!( - // eval(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(1)))), - // Evaluated::Int(2) - // ); - // } - - // #[test] - // fn point_one_plus_point_two() { - // // 0.1 + 0.2 == 0.3 THAT'S WHAT'S UP - // assert_eq!( - // eval(Operator(loc_box(Frac(1, 10)), loc(Plus), loc_box(Frac(2, 10)))), - // Evaluated::Frac(Fraction::new(3u64, 10u64)) - // ); - // } - - // #[test] - // fn addition_reduces() { - // assert_eq!( - // eval(Operator(loc_box(Frac(1, 3)), loc(Plus), loc_box(Frac(7, 14)))), - // Evaluated::Frac(Fraction::new(5u64, 6u64)) - // ); - // } - - // #[test] - // fn division_reduces() { - // assert_eq!( - // eval(Operator(loc_box(Frac(1, 3)), loc(Slash), loc_box(Frac(7, 14)))), - // Evaluated::ApplyVariant( - // "Ok".to_string(), - // Some(vec![Evaluated::Frac(Fraction::new(2u64, 3u64))]) - // ) - // ); - // } - - // #[test] - // fn division_by_zero() { - // assert_eq!( - // eval(Operator(loc_box(Frac(1, 10)), loc(Slash), loc_box(Frac(0, 10)))), - // Evaluated::ApplyVariant( - // "Err".to_string(), - // Some(vec![Evaluated::ApplyVariant("DivisionByZero".to_string(), None)]) - // ) - // ); - // } - - // #[test] - // fn string_interpolation() { - // assert_eq!( - // eval( - // Assign(loc(Identifier("foo".to_string())), loc_box(Str("one".to_string())), - // loc_box(Assign(loc(Identifier("bar".to_string())), loc_box(Str("two".to_string())), - // loc_box(Assign(loc(Identifier("baz".to_string())), loc_box(Str("three".to_string())), - // loc_box(InterpolatedStr( - // // "hi_\(foo)_\(bar)_\(baz)_string!" - // vec![ - // ("hi_".to_string(), loc(Raw::new("foo".to_string()))), - // ("_".to_string(), loc(Raw::new("bar".to_string()))), - // ("_".to_string(), loc(Raw::new("baz".to_string()))), - // ], - // "_string!".to_string() - // )) - // ))))) - // ), - // Evaluated::Str("hi_one_two_three_string!".to_string()) - // ); - // } - - // #[test] - // fn if_else() { - // assert_eq!( - // eval( - // If(loc_box(ApplyVariant("True".to_string(), None)), - // loc_box(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2)))), - // loc_box(Operator(loc_box(Int(4)), loc(Plus), loc_box(Int(5)))) - // ) - // ), - // Evaluated::Int(3) - // ); - - // assert_eq!( - // eval( - // If(loc_box(ApplyVariant("False".to_string(), None)), - // loc_box(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2)))), - // loc_box(Operator(loc_box(Int(4)), loc(Plus), loc_box(Int(5)))) - // ) - // ), - // Evaluated::Int(9) - // ); - // } -} diff --git a/tests/test_format.rs b/tests/test_format.rs index 01307f5370..1bed0986a8 100644 --- a/tests/test_format.rs +++ b/tests/test_format.rs @@ -3,13 +3,10 @@ extern crate pretty_assertions; #[macro_use] extern crate indoc; extern crate bumpalo; -extern crate combine; // OBSOLETE extern crate roc; -mod helpers; - #[cfg(test)] -mod test_formatter { +mod test_format { use bumpalo::Bump; use roc::parse; use roc::parse::ast::{Attempting, Expr}; @@ -85,21 +82,28 @@ mod test_formatter { )); } - // #[test] - // fn basic_string() { - // assert_formats_same(indoc!( - // r#" - // "blah" - // "# - // )); - // } + #[test] + fn basic_string() { + assert_formats_same(indoc!( + r#" + "blah" + "# + )); + } - // #[test] - // fn escaped_unicode_string() { - // assert_formats_same(indoc!( - // r#" - // "unicode: \u{A00A}!" - // "# - // )); - // } + #[test] + fn escaped_unicode_string() { + assert_formats_same(indoc!( + r#" + "unicode: \u{A00A}!" + "# + )); + } + + // RECORD LITERALS + + #[test] + fn empty_record() { + assert_formats_same("{}"); + } } diff --git a/tests/test_infer.rs b/tests/test_infer.rs index 62a33283df..3867722415 100644 --- a/tests/test_infer.rs +++ b/tests/test_infer.rs @@ -3,83 +3,47 @@ extern crate pretty_assertions; #[macro_use] extern crate indoc; -extern crate combine; extern crate roc; mod helpers; #[cfg(test)] mod test_infer { - use helpers::{loc, parse_without_loc}; - use roc::canonicalize::{self, Expr, Procedure, Symbol}; - use roc::collections::{ImMap, MutMap}; - use roc::expr::{Ident, VariantName}; + use helpers::can_expr; + // use roc::can::symbol::Symbol; + // use roc::ident::{Ident, VariantName}; use roc::infer::infer_expr; use roc::pretty_print_types::content_to_string; - use roc::region::{Located, Region}; - use roc::subs::Content::{self, *}; + use roc::region::Located; + // use roc::subs::Content::{self, *}; use roc::subs::Subs; - use roc::subs::{FlatType, Variable}; - use roc::types::Type::*; - use roc::types::{Problem, Type}; + // use roc::subs::{FlatType, Variable}; + // use roc::types::Type::*; + // use roc::types::{Problem, Type}; // HELPERS fn infer_eq(src: &str, expected: &str) { - let (expr, procedures) = can_expr(src); + let (expr, _, _, procedures) = can_expr(src); let mut subs = Subs::new(); - let content = infer_expr(&mut subs, loc(expr), procedures); + let content = infer_expr(&mut subs, Located::new(0, 0, 0, 0, expr), procedures); let actual_str = content_to_string(content, &mut subs); assert_eq!(actual_str, expected.to_string()); } - fn can_expr(expr_str: &str) -> (Expr, MutMap) { - can_expr_with("blah", expr_str, &ImMap::default(), &ImMap::default()) - } + // fn apply(module_name: &str, type_name: &str, args: Vec) -> Content { + // Structure(FlatType::Apply( + // module_name.to_string(), + // type_name.to_string(), + // args, + // )) + // } - fn can_expr_with( - name: &str, - expr_str: &str, - declared_idents: &ImMap, - declared_variants: &ImMap>, - ) -> (Expr, MutMap) { - let (expr, unparsed) = parse_without_loc(expr_str).unwrap_or_else(|errors| { - panic!( - "Parse error trying to parse \"{}\" - {}", - expr_str.to_string(), - errors.to_string() - ) - }); - - assert_eq!(unparsed, "".to_string()); - - let home = "Test".to_string(); - let (loc_expr, _, problems, procedures) = canonicalize::canonicalize_declaration( - home, - name, - loc(expr), - declared_idents, - declared_variants, - ); - - assert_eq!(problems, Vec::new()); - - (loc_expr.value, procedures) - } - - fn apply(module_name: &str, type_name: &str, args: Vec) -> Content { - Structure(FlatType::Apply( - module_name.to_string(), - type_name.to_string(), - args, - )) - } - - fn var(num: u32) -> Variable { - Variable::new_for_testing_only(num) - } + // fn var(num: u32) -> Variable { + // Variable::new_for_testing_only(num) + // } #[test] fn empty_record() { @@ -92,7 +56,7 @@ mod test_infer { } #[test] - fn fractional_literal() { + fn float_literal() { infer_eq("0.5", "Float"); } @@ -108,459 +72,459 @@ mod test_infer { ); } - #[test] - fn empty_string() { - infer_eq( - indoc!( - r#" - "" - "# - ), - "String", - ); - } + // #[test] + // fn empty_string() { + // infer_eq( + // indoc!( + // r#" + // "" + // "# + // ), + // "String", + // ); + // } - // LIST + // // LIST - #[test] - fn empty_list() { - infer_eq( - indoc!( - r#" - [] - "# - ), - "List *", - ); - } + // #[test] + // fn empty_list() { + // infer_eq( + // indoc!( + // r#" + // [] + // "# + // ), + // "List *", + // ); + // } - #[test] - fn list_of_lists() { - infer_eq( - indoc!( - r#" - [[]] - "# - ), - "List (List *)", - ); - } + // #[test] + // fn list_of_lists() { + // infer_eq( + // indoc!( + // r#" + // [[]] + // "# + // ), + // "List (List *)", + // ); + // } - #[test] - fn triple_nested_list() { - infer_eq( - indoc!( - r#" - [[[]]] - "# - ), - "List (List (List *))", - ); - } + // #[test] + // fn triple_nested_list() { + // infer_eq( + // indoc!( + // r#" + // [[[]]] + // "# + // ), + // "List (List (List *))", + // ); + // } - #[test] - fn nested_empty_list() { - infer_eq( - indoc!( - r#" - [ [], [ [] ] ] - "# - ), - "List (List (List *))", - ); - } + // #[test] + // fn nested_empty_list() { + // infer_eq( + // indoc!( + // r#" + // [ [], [ [] ] ] + // "# + // ), + // "List (List (List *))", + // ); + // } - #[test] - fn list_of_one_int() { - infer_eq( - indoc!( - r#" - [42] - "# - ), - "List Int", - ); - } + // #[test] + // fn list_of_one_int() { + // infer_eq( + // indoc!( + // r#" + // [42] + // "# + // ), + // "List Int", + // ); + // } - #[test] - fn triple_nested_int_list() { - infer_eq( - indoc!( - r#" - [[[ 5 ]]] - "# - ), - "List (List (List Int))", - ); - } + // #[test] + // fn triple_nested_int_list() { + // infer_eq( + // indoc!( + // r#" + // [[[ 5 ]]] + // "# + // ), + // "List (List (List Int))", + // ); + // } - #[test] - fn list_of_ints() { - infer_eq( - indoc!( - r#" - [ 1, 2, 3 ] - "# - ), - "List Int", - ); - } + // #[test] + // fn list_of_ints() { + // infer_eq( + // indoc!( + // r#" + // [ 1, 2, 3 ] + // "# + // ), + // "List Int", + // ); + // } - #[test] - fn nested_list_of_ints() { - infer_eq( - indoc!( - r#" - [ [ 1 ], [ 2, 3 ] ] - "# - ), - "List (List Int)", - ); - } + // #[test] + // fn nested_list_of_ints() { + // infer_eq( + // indoc!( + // r#" + // [ [ 1 ], [ 2, 3 ] ] + // "# + // ), + // "List (List Int)", + // ); + // } - #[test] - fn list_of_one_string() { - infer_eq( - indoc!( - r#" - [ "cowabunga" ] - "# - ), - "List String", - ); - } + // #[test] + // fn list_of_one_string() { + // infer_eq( + // indoc!( + // r#" + // [ "cowabunga" ] + // "# + // ), + // "List String", + // ); + // } - #[test] - fn triple_nested_string_list() { - infer_eq( - indoc!( - r#" - [[[ "foo" ]]] - "# - ), - "List (List (List String))", - ); - } + // #[test] + // fn triple_nested_string_list() { + // infer_eq( + // indoc!( + // r#" + // [[[ "foo" ]]] + // "# + // ), + // "List (List (List String))", + // ); + // } - #[test] - fn list_of_strings() { - infer_eq( - indoc!( - r#" - [ "foo", "bar" ] - "# - ), - "List String", - ); - } + // #[test] + // fn list_of_strings() { + // infer_eq( + // indoc!( + // r#" + // [ "foo", "bar" ] + // "# + // ), + // "List String", + // ); + // } - // INTERPOLATED STRING + // // INTERPOLATED STRING - #[test] - fn infer_interpolated_string() { - infer_eq( - indoc!( - r#" - whatItIs = "great" + // #[test] + // fn infer_interpolated_string() { + // infer_eq( + // indoc!( + // r#" + // whatItIs = "great" - "type inference is \(whatItIs)!" - "# - ), - "String", - ); - } + // "type inference is \(whatItIs)!" + // "# + // ), + // "String", + // ); + // } - // LIST MISMATCH + // // LIST MISMATCH - #[test] - fn mismatch_heterogeneous_list() { - infer_eq( - indoc!( - r#" - [ "foo", 5 ] - "# - ), - "List ", - ); - } + // #[test] + // fn mismatch_heterogeneous_list() { + // infer_eq( + // indoc!( + // r#" + // [ "foo", 5 ] + // "# + // ), + // "List ", + // ); + // } - #[test] - fn mismatch_heterogeneous_nested_list() { - infer_eq( - indoc!( - r#" - [ [ "foo", 5 ] ] - "# - ), - "List (List )", - ); - } + // #[test] + // fn mismatch_heterogeneous_nested_list() { + // infer_eq( + // indoc!( + // r#" + // [ [ "foo", 5 ] ] + // "# + // ), + // "List (List )", + // ); + // } - #[test] - fn mismatch_heterogeneous_nested_empty_list() { - infer_eq( - indoc!( - r#" - [ [ 1 ], [ [] ] ] - "# - ), - "List (List )", - ); - } + // #[test] + // fn mismatch_heterogeneous_nested_empty_list() { + // infer_eq( + // indoc!( + // r#" + // [ [ 1 ], [ [] ] ] + // "# + // ), + // "List (List )", + // ); + // } - // CLOSURE + // // CLOSURE - #[test] - fn always_return_empty_record() { - infer_eq( - indoc!( - r#" - \_ -> {} - "# - ), - "* -> {}", - ); - } + // #[test] + // fn always_return_empty_record() { + // infer_eq( + // indoc!( + // r#" + // \_ -> {} + // "# + // ), + // "* -> {}", + // ); + // } - #[test] - fn two_arg_return_int() { - infer_eq( - indoc!( - r#" - \_ _ -> 42 - "# - ), - "*, * -> Int", - ); - } + // #[test] + // fn two_arg_return_int() { + // infer_eq( + // indoc!( + // r#" + // \_ _ -> 42 + // "# + // ), + // "*, * -> Int", + // ); + // } - #[test] - fn three_arg_return_string() { - infer_eq( - indoc!( - r#" - \_ _ _ -> "test!" - "# - ), - "*, *, * -> String", - ); - } + // #[test] + // fn three_arg_return_string() { + // infer_eq( + // indoc!( + // r#" + // \_ _ _ -> "test!" + // "# + // ), + // "*, *, * -> String", + // ); + // } - // ASSIGN + // // ASSIGN - #[test] - fn assign_empty_record() { - infer_eq( - indoc!( - r#" - foo = {} + // #[test] + // fn assign_empty_record() { + // infer_eq( + // indoc!( + // r#" + // foo = {} - foo - "# - ), - "{}", - ); - } + // foo + // "# + // ), + // "{}", + // ); + // } - #[test] - fn assign_string() { - infer_eq( - indoc!( - r#" - str = "thing" + // #[test] + // fn assign_string() { + // infer_eq( + // indoc!( + // r#" + // str = "thing" - str - "# - ), - "String", - ); - } + // str + // "# + // ), + // "String", + // ); + // } - #[test] - fn assign_1_arg_closure() { - infer_eq( - indoc!( - r#" - fn = \_ -> {} + // #[test] + // fn assign_1_arg_closure() { + // infer_eq( + // indoc!( + // r#" + // fn = \_ -> {} - fn - "# - ), - "* -> {}", - ); - } + // fn + // "# + // ), + // "* -> {}", + // ); + // } - #[test] - fn assign_2_arg_closure() { - infer_eq( - indoc!( - r#" - func = \_ _ -> 42 + // #[test] + // fn assign_2_arg_closure() { + // infer_eq( + // indoc!( + // r#" + // func = \_ _ -> 42 - func - "# - ), - "*, * -> Int", - ); - } + // func + // "# + // ), + // "*, * -> Int", + // ); + // } - #[test] - fn assign_3_arg_closure() { - infer_eq( - indoc!( - r#" - f = \_ _ _ -> "test!" + // #[test] + // fn assign_3_arg_closure() { + // infer_eq( + // indoc!( + // r#" + // f = \_ _ _ -> "test!" - f - "# - ), - "*, *, * -> String", - ); - } + // f + // "# + // ), + // "*, *, * -> String", + // ); + // } - #[test] - fn assign_multiple_functions() { - infer_eq( - indoc!( - r#" - a = \_ _ _ -> "test!" + // #[test] + // fn assign_multiple_functions() { + // infer_eq( + // indoc!( + // r#" + // a = \_ _ _ -> "test!" - b = a + // b = a - b - "# - ), - "*, *, * -> String", - ); - } + // b + // "# + // ), + // "*, *, * -> String", + // ); + // } - #[test] - fn assign_multiple_strings() { - infer_eq( - indoc!( - r#" - a = "test!" + // #[test] + // fn assign_multiple_strings() { + // infer_eq( + // indoc!( + // r#" + // a = "test!" - b = a + // b = a - b - "# - ), - "String", - ); - } + // b + // "# + // ), + // "String", + // ); + // } - #[test] - fn assign_multiple_nums() { - infer_eq( - indoc!( - r#" - c = b + // #[test] + // fn assign_multiple_nums() { + // infer_eq( + // indoc!( + // r#" + // c = b - b = a + // b = a - a = 42 + // a = 42 - c - "# - ), - "Int", - ); - } + // c + // "# + // ), + // "Int", + // ); + // } - // CALLING FUNCTIONS + // // CALLING FUNCTIONS - #[test] - fn call_returns_num() { - infer_eq( - indoc!( - r#" - alwaysFive = \_ -> 5 + // #[test] + // fn call_returns_num() { + // infer_eq( + // indoc!( + // r#" + // alwaysFive = \_ -> 5 - alwaysFive "stuff" - "# - ), - "Int", - ); - } + // alwaysFive "stuff" + // "# + // ), + // "Int", + // ); + // } - #[test] - fn call_returns_list() { - infer_eq( - indoc!( - r#" - enlist = \val -> [ val ] + // #[test] + // fn call_returns_list() { + // infer_eq( + // indoc!( + // r#" + // enlist = \val -> [ val ] - enlist 5 - "# - ), - "List Int", - ); - } + // enlist 5 + // "# + // ), + // "List Int", + // ); + // } - // TODO type annotations - // TODO fix identity inference - // TODO BoundTypeVariables - // TODO conditionals + // // TODO type annotations + // // TODO fix identity inference + // // TODO BoundTypeVariables + // // TODO conditionals - // #[test] - // fn indirect_always() { - // infer_eq( - // indoc!(r#" - // always = \val -> (\_ -> val) - // alwaysFoo = always "foo" + // // #[test] + // // fn indirect_always() { + // // infer_eq( + // // indoc!(r#" + // // always = \val -> (\_ -> val) + // // alwaysFoo = always "foo" - // alwaysFoo 42 - // "#), - // "String" - // ); - // } + // // alwaysFoo 42 + // // "#), + // // "String" + // // ); + // // } - // #[test] - // fn identity() { - // infer_eq( - // indoc!(r#" - // \val -> val - // "#), - // "a -> a" - // ); - // } + // // #[test] + // // fn identity() { + // // infer_eq( + // // indoc!(r#" + // // \val -> val + // // "#), + // // "a -> a" + // // ); + // // } - // #[test] - // fn always_function() { - // infer_eq( - // indoc!(r#" - // \val -> \_ -> val - // "#), - // "a -> (* -> a)" - // ); - // } + // // #[test] + // // fn always_function() { + // // infer_eq( + // // indoc!(r#" + // // \val -> \_ -> val + // // "#), + // // "a -> (* -> a)" + // // ); + // // } - // OPERATORS + // // OPERATORS - #[test] - fn div_operator() { - infer_eq( - indoc!( - r#" - \l r -> l / r - "# - ), - "Float, Float -> Float", - ); - } + // #[test] + // fn div_operator() { + // infer_eq( + // indoc!( + // r#" + // \l r -> l / r + // "# + // ), + // "Float, Float -> Float", + // ); + // } - #[test] - fn basic_division() { - infer_eq( - indoc!( - r#" - 1 / 2 - "# - ), - "Float", - ); - } + // #[test] + // fn basic_division() { + // infer_eq( + // indoc!( + // r#" + // 1 / 2 + // "# + // ), + // "Float", + // ); + // } // #[test] // fn basic_addition() { diff --git a/tests/test_parse.rs b/tests/test_parse.rs index 56337b8830..e03652e353 100644 --- a/tests/test_parse.rs +++ b/tests/test_parse.rs @@ -3,7 +3,6 @@ extern crate pretty_assertions; #[macro_use] extern crate indoc; extern crate bumpalo; -extern crate combine; // OBSOLETE extern crate roc; extern crate quickcheck; @@ -14,25 +13,15 @@ extern crate quickcheck_macros; mod helpers; #[cfg(test)] -mod test_parser { +mod test_parse { use bumpalo::Bump; - use helpers::located; - use roc::parse; + use helpers::parse_with; use roc::parse::ast::Attempting; use roc::parse::ast::Expr::{self, *}; - use roc::parse::parser::{Fail, FailReason, Parser, State}; - use roc::parse::problems::Problem; - use roc::region::{Located, Region}; + use roc::parse::parser::{Fail, FailReason}; + use roc::region::Region; use std::{f64, i64}; - fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result, Fail> { - let state = State::new(&input, Attempting::Module); - let parser = parse::expr(); - let answer = parser.parse(&arena, state); - - answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail) - } - fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) { let arena = Bump::new(); let actual = parse_with(&arena, input); @@ -48,16 +37,6 @@ mod test_parser { assert_eq!(Err(expected_fail), actual); } - fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec>) { - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(Expr::MalformedStr(expected_probs.into_boxed_slice())), - actual - ); - } - // STRING LITERALS fn expect_parsed_str(input: &str, expected: &str) { @@ -77,7 +56,7 @@ mod test_parser { } #[test] - fn one_char_list() { + fn one_char_string() { assert_parses_to( indoc!( r#" @@ -89,7 +68,7 @@ mod test_parser { } #[test] - fn multi_char_list() { + fn multi_char_string() { assert_parses_to( indoc!( r#" @@ -113,24 +92,11 @@ mod test_parser { #[test] fn string_with_special_escapes() { - expect_parsed_str(r#"x\x"#, r#""x\\x""#); - expect_parsed_str(r#"x"x"#, r#""x\"x""#); - expect_parsed_str("x\tx", r#""x\tx""#); - expect_parsed_str("x\rx", r#""x\rx""#); - expect_parsed_str("x\nx", r#""x\nx""#); - } - - #[test] - fn string_with_escaped_interpolation() { - assert_parses_to( - // This should NOT be string interpolation, because of the \\ - indoc!( - r#" - "abcd\\(efg)hij" - "# - ), - Str(r#"abcd\(efg)hij"#.into()), - ); + expect_parsed_str(r#"x\\x"#, r#""x\\x""#); + expect_parsed_str(r#"x\"x"#, r#""x\"x""#); + expect_parsed_str(r#"x\tx"#, r#""x\tx""#); + expect_parsed_str(r#"x\rx"#, r#""x\rx""#); + expect_parsed_str(r#"x\nx"#, r#""x\nx""#); } #[test] @@ -139,159 +105,6 @@ mod test_parser { expect_parsed_str("x'x", r#""x'x""#); } - #[test] - fn string_with_valid_unicode_escapes() { - expect_parsed_str("x\u{00A0}x", r#""x\u{00A0}x""#); - expect_parsed_str("x\u{101010}x", r#""x\u{101010}x""#); - } - - #[test] - fn string_with_too_large_unicode_escape() { - // Should be too big - max size should be 10FFFF. - // (Rust has this restriction. I assume it's a good idea.) - assert_malformed_str( - r#""abc\u{110000}def""#, - vec![located(0, 7, 0, 12, Problem::UnicodeCodePointTooLarge)], - ); - } - - #[test] - fn string_with_no_unicode_digits() { - // No digits specified - assert_malformed_str( - r#""blah\u{}foo""#, - vec![located(0, 5, 0, 8, Problem::NoUnicodeDigits)], - ); - } - - #[test] - fn string_with_no_unicode_opening_brace() { - // No opening curly brace. It can't be sure if the closing brace - // was intended to be a closing brace for the unicode escape, so - // report that there were no digits specified. - assert_malformed_str( - r#""abc\u00A0}def""#, - vec![located(0, 4, 0, 5, Problem::NoUnicodeDigits)], - ); - } - - #[test] - fn string_with_no_unicode_closing_brace() { - // No closing curly brace - assert_malformed_str( - r#""blah\u{stuff""#, - vec![located(0, 5, 0, 12, Problem::MalformedEscapedUnicode)], - ); - } - - #[test] - fn string_with_no_unicode_braces() { - // No curly braces - assert_malformed_str( - r#""zzzz\uzzzzz""#, - vec![located(0, 5, 0, 6, Problem::NoUnicodeDigits)], - ); - } - - #[test] - fn string_with_interpolation_at_start() { - let input = indoc!( - r#" - "\(abc)defg" - "# - ); - let (args, ret) = (vec![("", located(0, 2, 0, 4, Var("abc")))], "defg"); - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), - actual - ); - } - - #[test] - fn string_with_interpolation_at_end() { - let input = indoc!( - r#" - "abcd\(efg)" - "# - ); - let (args, ret) = (vec![("abcd", located(0, 6, 0, 8, Var("efg")))], ""); - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), - actual - ); - } - - #[test] - fn string_with_interpolation_in_middle() { - let input = indoc!( - r#" - "abc\(defg)hij" - "# - ); - let (args, ret) = (vec![("abc", located(0, 5, 0, 8, Var("defg")))], "hij"); - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), - actual - ); - } - - #[test] - fn string_with_two_interpolations_in_middle() { - let input = indoc!( - r#" - "abc\(defg)hi\(jkl)mn" - "# - ); - let (args, ret) = ( - vec![ - ("abc", located(0, 5, 0, 8, Var("defg"))), - ("hi", located(0, 14, 0, 16, Var("jkl"))), - ], - "mn", - ); - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), - actual - ); - } - - #[test] - fn string_with_four_interpolations() { - let input = indoc!( - r#" - "\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)" - "# - ); - let (args, ret) = ( - vec![ - ("", located(0, 2, 0, 4, Var("abc"))), - ("def", located(0, 11, 0, 13, Var("ghi"))), - ("jkl", located(0, 20, 0, 22, Var("mno"))), - ("pqrs", located(0, 30, 0, 32, Var("tuv"))), - ], - "", - ); - let arena = Bump::new(); - let actual = parse_with(&arena, input); - - assert_eq!( - Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), - actual - ); - } - #[test] fn empty_source_file() { assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Expression); @@ -322,124 +135,109 @@ mod test_parser { #[test] fn zero_int() { - assert_parses_to("0", Int(0)); + assert_parses_to("0", Int("0")); } #[test] fn positive_int() { - assert_parses_to("1", Int(1)); - assert_parses_to("42", Int(42)); + assert_parses_to("1", Int("1")); + assert_parses_to("42", Int("42")); } #[test] fn negative_int() { - assert_parses_to("-1", Int(-1)); - assert_parses_to("-42", Int(-42)); + assert_parses_to("-1", Int("-1")); + assert_parses_to("-42", Int("-42")); } #[test] fn highest_int() { - assert_parses_to(i64::MAX.to_string().as_str(), Int(i64::MAX)); + assert_parses_to( + i64::MAX.to_string().as_str(), + Int(i64::MAX.to_string().as_str()), + ); } #[test] fn lowest_int() { - assert_parses_to(i64::MIN.to_string().as_str(), Int(i64::MIN)); + assert_parses_to( + i64::MIN.to_string().as_str(), + Int(i64::MIN.to_string().as_str()), + ); } #[test] fn int_with_underscore() { - assert_parses_to("1_2_34_567", Int(1234567)); - assert_parses_to("-1_2_34_567", Int(-1234567)); + assert_parses_to("1_2_34_567", Int("1_2_34_567")); + assert_parses_to("-1_2_34_567", Int("-1_2_34_567")); // The following cases are silly. They aren't supported on purpose, // but there would be a performance cost to explicitly disallowing them, // which doesn't seem like it would benefit anyone. - assert_parses_to("1_", Int(1)); - assert_parses_to("1__23", Int(123)); + assert_parses_to("1_", Int("1_")); + assert_parses_to("1__23", Int("1__23")); } #[quickcheck] fn all_i64_values_parse(num: i64) { - assert_parses_to(num.to_string().as_str(), Int(num)); - } - - #[test] - fn int_too_large() { - assert_parses_to( - (i64::MAX as i128 + 1).to_string().as_str(), - MalformedInt(Problem::OutsideSupportedRange), - ); - } - - #[test] - fn int_too_small() { - assert_parses_to( - (i64::MIN as i128 - 1).to_string().as_str(), - MalformedInt(Problem::OutsideSupportedRange), - ); + assert_parses_to(num.to_string().as_str(), Int(num.to_string().as_str())); } // FLOAT LITERALS #[test] fn zero_float() { - assert_parses_to("0.0", Float(0.0)); + assert_parses_to("0.0", Float("0.0")); } #[test] fn positive_float() { - assert_parses_to("1.0", Float(1.0)); - assert_parses_to("1.1", Float(1.1)); - assert_parses_to("42.0", Float(42.0)); - assert_parses_to("42.9", Float(42.9)); - } - - #[test] - fn highest_float() { - assert_parses_to(&format!("{}.0", f64::MAX), Float(f64::MAX)); + assert_parses_to("1.0", Float("1.0")); + assert_parses_to("1.1", Float("1.1")); + assert_parses_to("42.0", Float("42.0")); + assert_parses_to("42.9", Float("42.9")); } #[test] fn negative_float() { - assert_parses_to("-1.0", Float(-1.0)); - assert_parses_to("-1.1", Float(-1.1)); - assert_parses_to("-42.0", Float(-42.0)); - assert_parses_to("-42.9", Float(-42.9)); - } - - #[test] - fn lowest_float() { - assert_parses_to(&format!("{}.0", f64::MIN), Float(f64::MIN)); + assert_parses_to("-1.0", Float("-1.0")); + assert_parses_to("-1.1", Float("-1.1")); + assert_parses_to("-42.0", Float("-42.0")); + assert_parses_to("-42.9", Float("-42.9")); } #[test] fn float_with_underscores() { - assert_parses_to("1_23_456.0_1_23_456", Float(123456.0123456)); - assert_parses_to("-1_23_456.0_1_23_456", Float(-123456.0123456)); + assert_parses_to("1_23_456.0_1_23_456", Float("1_23_456.0_1_23_456")); + assert_parses_to("-1_23_456.0_1_23_456", Float("-1_23_456.0_1_23_456")); + } + + #[test] + fn highest_float() { + let string = format!("{}.0", f64::MAX); + + assert_parses_to(&string, Float(&string)); + } + + #[test] + fn lowest_float() { + let string = format!("{}.0", f64::MIN); + + assert_parses_to(&string, Float(&string)); } #[quickcheck] fn all_f64_values_parse(num: f64) { - assert_parses_to(num.to_string().as_str(), Float(num)); + assert_parses_to(num.to_string().as_str(), Float(num.to_string().as_str())); } + // RECORD LITERALS + #[test] - fn float_too_large() { - assert_parses_to( - format!("{}1.0", f64::MAX).as_str(), - MalformedFloat(Problem::OutsideSupportedRange), - ); + fn empty_record() { + assert_parses_to("{}", EmptyRecord); } - #[test] - fn float_too_small() { - assert_parses_to( - format!("{}1.0", f64::MIN).as_str(), - MalformedFloat(Problem::OutsideSupportedRange), - ); - } - - // TODO test what happens when interpolated strings contain 1+ malformed idents + // TODO test hex/oct/binary parsing // // TODO test for \t \r and \n in string literals *outside* unicode escape sequence! //