Record raw strings during parse step

This commit is contained in:
Richard Feldman 2019-09-16 00:25:31 -04:00
parent fa9e074488
commit d54cf81f7b
40 changed files with 4111 additions and 7400 deletions

41
Cargo.lock generated
View file

@ -16,11 +16,6 @@ dependencies = [
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ascii"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "autocfg"
version = "0.1.4"
@ -54,18 +49,6 @@ dependencies = [
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "combine"
version = "3.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ascii 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "difference"
version = "2.0.0"
@ -76,11 +59,6 @@ name = "dogged"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "either"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "env_logger"
version = "0.6.2"
@ -440,7 +418,6 @@ name = "roc"
version = "0.1.0"
dependencies = [
"bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -517,19 +494,6 @@ name = "unindent"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.6"
@ -552,17 +516,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum ascii 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "91e320562a8fa3286a481b7189f89578ace6b20df99e123c87f2f509c957c5d6"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2638df109789fe360f0d9998c5438dd19a36678aaf845e46f285b688b1a1657a"
"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b"
"checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1055159ac82fb210c813303f716b6c8db57ace9d5ec2dbbc2e1d7a864c1dd74e"
@ -613,8 +574,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "834b4441326c660336850c5c0926cc20548e848967a5f57bc20c2b741c8d41f4"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -7,7 +7,6 @@ authors = ["Richard Feldman <oss@rtfeldman.com>"]
dogged = { version = "0.2.0", optional = true }
log = "0.4.8"
petgraph = { version = "0.4.5", optional = true }
combine = "3.8.1"
im-rc = "13.0.0"
fraction = "0.6.2"
num = "0.2.0"

66
src/can/env.rs Normal file
View file

@ -0,0 +1,66 @@
use can::expr::Expr;
use can::pattern::Pattern;
use can::problem::Problem;
use can::procedure::{Procedure, References};
use can::symbol::Symbol;
use collections::{ImMap, MutMap};
use parse::ast;
use region::{Located, Region};
/// The canonicalization environment for a particular module.
pub struct Env {
/// The module's path. Unqualified references to identifiers and variant names are assumed
/// to be relative to this path.
pub home: String,
/// Problems we've encountered along the way, which will be reported to the user at the end.
pub problems: Vec<Problem>,
/// Variants either declared in this module, or imported.
pub variants: ImMap<Symbol, Located<Box<ast::VariantName>>>,
/// Former closures converted to top-level procedures.
pub procedures: MutMap<Symbol, Procedure>,
}
impl Env {
pub fn new(
home: String,
declared_variants: ImMap<Symbol, Located<Box<ast::VariantName>>>,
) -> Env {
Env {
home,
variants: declared_variants,
problems: Vec::new(),
procedures: MutMap::default(),
}
}
pub fn problem(&mut self, problem: Problem) -> () {
self.problems.push(problem)
}
pub fn register_closure(
&mut self,
symbol: Symbol,
args: Vec<Located<Pattern>>,
body: Located<Expr>,
definition: Region,
references: References,
) -> () {
// We can't if the closure is self tail recursive yet, because it doesn't know its final name yet.
// (Assign sets that.) Assume this is false, and let Assign change it to true after it sets final name.
let is_self_tail_recursive = false;
let name = None; // The Assign logic is also responsible for setting names after the fact.
let procedure = Procedure {
args,
name,
body,
is_self_tail_recursive,
definition,
references,
};
self.procedures.insert(symbol, procedure);
}
}

46
src/can/expr.rs Normal file
View file

@ -0,0 +1,46 @@
use can::pattern::Pattern;
use can::problem::RuntimeError;
use can::symbol::Symbol;
use operator::Operator;
use region::Located;
use std::i64;
#[derive(Clone, Debug, PartialEq)]
pub enum Expr {
// Literals
Int(i64),
Float(f64),
EmptyStr,
Str(Box<str>),
Char(char), // OBSOLETE
List(Vec<Located<Expr>>),
EmptyList,
// Lookups
Var(Symbol),
/// Works the same as Var, but has an important marking purpose.
/// See 13623e3f5f65ea2d703cf155f16650c1e8246502 for the bug this fixed.
FunctionPointer(Symbol),
/// We have a separate variant for this so that we can report errors
/// (including type errors later) in the context of the sugar rather than
/// confusingly talking about the desugared version the user can't see.
InterpolatedStr(Vec<(Box<str>, Located<Expr>)>, Box<str>),
// Pattern Matching
Case(Box<Located<Expr>>, Vec<(Located<Pattern>, Located<Expr>)>),
Assign(Vec<(Located<Pattern>, Located<Expr>)>, Box<Located<Expr>>),
// Application
Call(Box<Located<Expr>>, Vec<Located<Expr>>),
ApplyVariant(Symbol, Option<Vec<Located<Expr>>>),
// Product Types
EmptyRecord,
// Sugar
If(Box<Located<Expr>>, Box<Located<Expr>>, Box<Located<Expr>>),
Operator(Box<Located<Expr>>, Located<Operator>, Box<Located<Expr>>),
// Compiles, but will crash if reached
RuntimeError(RuntimeError),
}

1334
src/can/mod.rs Normal file

File diff suppressed because it is too large Load diff

203
src/can/pattern.rs Normal file
View file

@ -0,0 +1,203 @@
use can::env::Env;
use can::problem::Problem;
use can::scope::Scope;
use can::symbol::Symbol;
use collections::ImMap;
use ident::{Ident, VariantName};
use parse::ast;
use region::{Located, Region};
/// A pattern, including possible problems (e.g. shadowing) so that
/// codegen can generate a runtime error if this pattern is reached.
#[derive(Clone, Debug, PartialEq)]
pub enum Pattern {
Identifier(Symbol),
Variant(Symbol),
AppliedVariant(Symbol, Vec<Located<Pattern>>),
IntLiteral(i64),
FloatLiteral(f64),
ExactString(String),
EmptyRecordLiteral,
Underscore,
// Runtime Exceptions
Shadowed(Located<Ident>),
UnrecognizedVariant(Located<VariantName>),
// Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments!
UnsupportedPattern(Region),
}
/// Different patterns are supported in different circumstances.
/// For example, case branches can pattern match on number literals, but
/// assignments and function args can't. Underscore is supported in function
/// arg patterns and in case branch patterns, but not in assignments.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PatternType {
Assignment,
FunctionArg,
CaseBranch,
}
pub fn canonicalize_pattern(
_env: &mut Env,
_scope: &mut Scope,
_pattern_type: &PatternType,
_loc_pattern: &Located<ast::Pattern>,
_shadowable_idents: &mut ImMap<Ident, (Symbol, Region)>,
) -> Located<Pattern> {
panic!("TODO restore can_pattern");
//use can::ast::Pattern::*;
//let region = loc_pattern.region.clone();
//let pattern = match &loc_pattern.value {
// &Identifier(ref name) => {
// let unqualified_ident = Ident::Unqualified(name.to_string());
// // We use shadowable_idents for this, and not scope, because for assignments
// // they are different. When canonicalizing a particular assignment, that new
// // ident is in scope (for recursion) but not shadowable.
// //
// // For example, when canonicalizing (fibonacci = ...), `fibonacci` should be in scope
// // so that it can refer to itself without getting a naming problem, but it should not
// // be in the collection of shadowable idents because you can't shadow yourself!
// match shadowable_idents.get(&unqualified_ident) {
// Some((_, region)) => {
// let loc_shadowed_ident = Located {
// region: region.clone(),
// value: unqualified_ident,
// };
// // This is already in scope, meaning it's about to be shadowed.
// // Shadowing is not allowed!
// env.problem(Problem::Shadowing(loc_shadowed_ident.clone()));
// // Change this Pattern to a Shadowed variant, so that
// // codegen knows to generate a runtime exception here.
// Pattern::Shadowed(loc_shadowed_ident)
// }
// None => {
// // Make sure we aren't shadowing something in the home module's scope.
// let qualified_ident =
// Ident::Qualified(env.home.clone(), unqualified_ident.name());
// match scope.idents.get(&qualified_ident) {
// Some((_, region)) => {
// let loc_shadowed_ident = Located {
// region: region.clone(),
// value: qualified_ident,
// };
// // This is already in scope, meaning it's about to be shadowed.
// // Shadowing is not allowed!
// env.problem(Problem::Shadowing(loc_shadowed_ident.clone()));
// // Change this Pattern to a Shadowed variant, so that
// // codegen knows to generate a runtime exception here.
// Pattern::Shadowed(loc_shadowed_ident)
// }
// None => {
// let new_ident = qualified_ident.clone();
// let new_name = qualified_ident.name();
// let symbol = scope.symbol(&new_name);
// // This is a fresh identifier that wasn't already in scope.
// // Add it to scope!
// let symbol_and_region = (symbol.clone(), region.clone());
// // Add this to both scope.idents *and* shadowable_idents.
// // The latter is relevant when recursively canonicalizing Variant patterns,
// // which can bring multiple new idents into scope. For example, it's important
// // that we catch (Blah foo foo) as being an example of shadowing.
// scope
// .idents
// .insert(new_ident.clone(), symbol_and_region.clone());
// shadowable_idents.insert(new_ident, symbol_and_region);
// Pattern::Identifier(symbol)
// }
// }
// }
// }
// }
// &AppliedVariant((ref loc_name, ref loc_args)) => {
// // Canonicalize the variant's arguments.
// let mut can_args: Vec<Located<Pattern>> = Vec::new();
// for loc_arg in loc_args {
// let loc_can_arg =
// canonicalize_pattern(env, scope, pattern_type, &loc_arg, shadowable_idents);
// can_args.push(loc_can_arg);
// }
// // Canonicalize the variant's name.
// let symbol = Symbol::from_variant(&loc_name.value, &env.home);
// if env.variants.contains_key(&symbol) {
// // No problems; the qualified variant name was in scope!
// Pattern::AppliedVariant(symbol, can_args)
// } else {
// // We couldn't find the variant name in scope. NAMING PROBLEM!
// env.problem(Problem::UnrecognizedVariant(loc_name.clone()));
// Pattern::UnrecognizedVariant(loc_name.clone())
// }
// }
// &Variant(ref loc_name) => {
// // Canonicalize the variant's name.
// let symbol = Symbol::from_variant(&loc_name.value, &env.home);
// if env.variants.contains_key(&symbol) {
// // No problems; the qualified variant name was in scope! Pattern::Variant(symbol)
// } else {
// // We couldn't find the variant name in scope. NAMING PROBLEM!
// env.problem(Problem::UnrecognizedVariant(loc_name.clone()));
// Pattern::UnrecognizedVariant(loc_name.clone())
// }
// }
// &IntLiteral(ref num) => match pattern_type {
// CaseBranch => Pattern::IntLiteral(*num),
// ptype @ Assignment | ptype @ FunctionArg => {
// unsupported_pattern(env, *ptype, region)
// }
// },
// &FloatLiteral(ref num) => match pattern_type {
// CaseBranch => Pattern::FloatLiteral(*num),
// ptype @ Assignment | ptype @ FunctionArg => {
// unsupported_pattern(env, *ptype, region)
// }
// },
// &ExactString(ref string) => match pattern_type {
// CaseBranch => Pattern::ExactString(string.clone()),
// ptype @ Assignment | ptype @ FunctionArg => {
// unsupported_pattern(env, *ptype, region)
// }
// },
// &Underscore => match pattern_type {
// CaseBranch | FunctionArg => Pattern::Underscore,
// Assignment => unsupported_pattern(env, Assignment, region),
// },
// &EmptyRecordLiteral => Pattern::EmptyRecordLiteral,
//};
//Located {
// region,
// value: pattern,
//}
}
/// When we detect an unsupported pattern type (e.g. 5 = 1 + 2 is unsupported because you can't
/// assign to Int patterns), report it to Env and return an UnsupportedPattern runtime error pattern.
fn _unsupported_pattern<'a>(env: &mut Env, pattern_type: PatternType, region: Region) -> Pattern {
env.problem(Problem::UnsupportedPattern(pattern_type, region.clone()));
Pattern::UnsupportedPattern(region)
}

44
src/can/problem.rs Normal file
View file

@ -0,0 +1,44 @@
use can::expr::Expr;
use can::pattern::{Pattern, PatternType};
use ident::{Ident, VariantName};
use operator::Operator;
use region::{Located, Region};
/// Problems that can occur in the course of canonicalization.
#[derive(Clone, Debug, PartialEq)]
pub enum Problem {
Shadowing(Located<Ident>),
UnrecognizedFunctionName(Located<Ident>),
UnrecognizedConstant(Located<Ident>),
UnrecognizedVariant(Located<VariantName>),
UnusedAssignment(Located<Ident>),
UnusedArgument(Located<Ident>),
PrecedenceProblem(PrecedenceProblem),
// Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments!
UnsupportedPattern(PatternType, Region),
CircularAssignment(Vec<Located<Ident>>),
RuntimeError(RuntimeError),
}
#[derive(Clone, Debug, PartialEq)]
pub enum PrecedenceProblem {
BothNonAssociative(Located<Operator>, Located<Operator>),
}
#[derive(Clone, Debug, PartialEq)]
pub enum RuntimeError {
InvalidPrecedence(PrecedenceProblem, Box<Located<Expr>>),
UnrecognizedFunctionName(Located<Ident>),
UnrecognizedConstant(Located<Ident>),
UnrecognizedVariant(Located<VariantName>),
FloatOutsideRange(Box<str>),
IntOutsideRange(Box<str>),
InvalidHex(std::num::ParseIntError, Box<str>),
InvalidOctal(std::num::ParseIntError, Box<str>),
InvalidBinary(std::num::ParseIntError, Box<str>),
CircularAssignment(
Vec<Located<Ident>>,
Vec<(Located<Pattern>, Located<Expr>)>,
Box<Located<Expr>>,
),
}

72
src/can/procedure.rs Normal file
View file

@ -0,0 +1,72 @@
use can::expr::Expr;
use can::pattern::Pattern;
use can::symbol::Symbol;
use collections::ImSet;
use region::{Located, Region};
#[derive(Clone, Debug, PartialEq)]
pub struct Procedure {
pub name: Option<String>,
pub is_self_tail_recursive: bool,
pub definition: Region,
pub args: Vec<Located<Pattern>>,
pub body: Located<Expr>,
pub references: References,
}
impl Procedure {
pub fn new(
definition: Region,
args: Vec<Located<Pattern>>,
body: Located<Expr>,
references: References,
) -> Procedure {
Procedure {
name: None,
is_self_tail_recursive: false,
definition,
args,
body,
references,
}
}
}
/// These are all ordered sets because they end up getting traversed in a graph search
/// to determine how assignments shuold be ordered. We want builds to be reproducible,
/// so it's important that building the same code gives the same order every time!
#[derive(Clone, Debug, PartialEq)]
pub struct References {
pub locals: ImSet<Symbol>,
pub globals: ImSet<Symbol>,
pub variants: ImSet<Symbol>,
pub calls: ImSet<Symbol>,
}
impl References {
pub fn new() -> References {
References {
locals: ImSet::default(),
globals: ImSet::default(),
variants: ImSet::default(),
calls: ImSet::default(),
}
}
pub fn union(mut self, other: References) -> Self {
self.locals = self.locals.union(other.locals);
self.globals = self.globals.union(other.globals);
self.variants = self.variants.union(other.variants);
self.calls = self.calls.union(other.calls);
self
}
pub fn has_local(&self, symbol: &Symbol) -> bool {
self.locals.contains(symbol)
}
pub fn has_variant(&self, symbol: &Symbol) -> bool {
self.variants.contains(symbol)
}
}

35
src/can/scope.rs Normal file
View file

@ -0,0 +1,35 @@
use can::symbol::Symbol;
use collections::ImMap;
use ident::Ident;
use region::Region;
#[derive(Clone, Debug, PartialEq)]
pub struct Scope {
pub idents: ImMap<Ident, (Symbol, Region)>,
symbol_prefix: String,
next_unique_id: u64,
}
impl Scope {
pub fn new(symbol_prefix: String, declared_idents: ImMap<Ident, (Symbol, Region)>) -> Scope {
Scope {
symbol_prefix,
// This is used to generate unique names for anonymous closures.
// It always begins at 0.
next_unique_id: 0,
idents: declared_idents,
}
}
pub fn symbol(&self, name: &str) -> Symbol {
Symbol::new(&self.symbol_prefix, name)
}
pub fn gen_unique_symbol(&mut self) -> Symbol {
self.next_unique_id = self.next_unique_id + 1;
Symbol::new(&self.symbol_prefix, &self.next_unique_id.to_string())
}
}

445
src/can/string.rs Normal file
View file

@ -0,0 +1,445 @@
// use bumpalo::collections::string::String;
// use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use parse::ast::Expr;
// use parse::ast::{Attempting, Expr};
// use parse::ident;
// use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
// use parse::problems::{Problem, Problems};
// use region::{Loc, Region};
use region::Region;
// use std::char;
// use std::iter::Peekable;
pub fn canonical_string_literal<'a>(_arena: &Bump, _raw: &'a str, _region: Region) -> Expr<'a> {
panic!("TODO restore canonicalization");
}
// let mut problems = std::vec::Vec::new();
// // Stores the accumulated string characters
// let mut buf = String::new_in(arena);
// // This caches the total string length of interpolated_pairs. Every
// // time we add a new pair to interpolated_pairs, we increment this
// // by the sum of whatever we parsed in order to obtain that pair.
// let mut buf_col_offset: usize = 0;
// // Stores interpolated identifiers, if any.
// let mut interpolated_pairs = Vec::new_in(arena);
// let mut chars = raw.chars();
// while let Some(ch) = chars.next() {
// match ch {
// // If it's a backslash, escape things.
// '\\' => match chars.next() {
// Some(next_ch) => {
// if let Some(ident) = handle_escaped_char(
// arena,
// &state,
// next_ch,
// &mut chars,
// &mut buf,
// &mut problems,
// )? {
// let expr = Expr::Var(ident);
// // +2 for `\(` and then another +1 for `)` at the end
// let parsed_length = buf.len() + 2 + ident.len() + 1;
// // Casting should always succeed in this section, because
// // if this string literal overflowed our maximum
// // line length, that would have already happened back
// // in the parsing step, and we never would have reached
// // this code. Still, debug_assert that they won't!
// debug_assert!(buf_col_offset <= std::u16::MAX as usize);
// debug_assert!(ident.len() <= std::u16::MAX as usize);
// debug_assert!((parsed_length - ident.len() - 1) <= std::u16::MAX as usize);
// let start_line = state.line;
// // Subtract ident length and another 1 for the `)`
// let start_col = state.column
// + buf_col_offset as u16
// + (parsed_length - ident.len() - 1) as u16;
// let ident_region = Region {
// start_line,
// start_col,
// end_line: start_line,
// end_col: start_col + ident.len() as u16 - 1,
// };
// let loc_expr = Loc {
// region: ident_region,
// value: expr,
// };
// // Push the accumulated string into the pairs list,
// // along with the ident that came after it.
// interpolated_pairs.push((buf.into_bump_str(), loc_expr));
// // Reset the buffer so we start working on a new string.
// buf = String::new_in(arena);
// // Advance the cached offset of how many chars we've parsed,
// // so the next time we see an interpolated ident, we can
// // correctly calculate its region.
// buf_col_offset += parsed_length;
// }
// }
// None => {
// problems.push(loc_char(Problem::TrailingBackslash, &state, buf.len()));
// }
// },
// '\t' => {
// // Tabs are syntax errors.
// problems.push(loc_char(Problem::Tab, &state, buf.len()));
// }
// '\r' => {
// // Carriage returns aren't allowed in string literals.
// problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
// }
// normal_char => buf.push(normal_char),
// }
// }
// // We ran out of characters; this is the end of the string!
// if problems.is_empty() {
// let final_str = buf.into_bump_str();
// if interpolated_pairs.is_empty() {
// Expr::Str(final_str)
// } else {
// let tuple_ref = arena.alloc((interpolated_pairs.into_bump_slice(), final_str));
// Expr::InterpolatedStr(tuple_ref)
// }
// } else {
// Expr::MalformedStr(problems.into_boxed_slice())
// }
// }
// fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
// let start_line = state.line;
// let start_col = state.column + buf_len as u16;
// let end_line = start_line;
// // All invalid chars should have a length of 1
// let end_col = state.column + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
// let start_line = state.line;
// let start_col = state.column + buf_len as u16;
// let end_line = start_line;
// // escapes should all be 2 chars long
// let end_col = state.column + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// fn loc_escaped_unicode<'a, V>(
// value: V,
// state: &State<'a>,
// buf_len: usize,
// hex_str_len: usize,
// ) -> Loc<V> {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf.
// let start_col = state.column + buf_len as u16 + 1;
// let end_line = start_line;
// // +3 due to the `\u{` and another + 1 due to the `}`
// // -1 to prevent overshooting because end col is inclusive.
// let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// #[inline(always)]
// fn handle_escaped_char<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// ch: char,
// chars: &mut Peekable<I>,
// buf: &mut String<'a>,
// problems: &mut Problems,
// ) -> Result<Option<&'a str>, (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// match ch {
// '\\' => buf.push('\\'),
// '"' => buf.push('"'),
// 't' => buf.push('\t'),
// 'n' => buf.push('\n'),
// 'r' => buf.push('\r'),
// '0' => buf.push('\0'), // We explicitly support null characters, as we
// // can't be sure we won't receive them from Rust.
// 'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?,
// '(' => {
// let ident = parse_interpolated_ident(arena, state, chars)?;
// return Ok(Some(ident));
// }
// '\t' => {
// // Report and continue.
// // Tabs are syntax errors, but maybe the rest of the string is fine!
// problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
// }
// '\r' => {
// // Report and continue.
// // Carriage returns aren't allowed in string literals,
// // but maybe the rest of the string is fine!
// problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
// }
// '\n' => {
// // Report and bail out.
// // We can't safely assume where the string was supposed to end.
// problems.push(loc_escaped_char(
// Problem::NewlineInLiteral,
// &state,
// buf.len(),
// ));
// return Err(unexpected_eof(
// buf.len(),
// Attempting::UnicodeEscape,
// state.clone(),
// ));
// }
// _ => {
// // Report and continue.
// // An unsupported escaped char (e.g. \q) shouldn't halt parsing.
// problems.push(loc_escaped_char(
// Problem::UnsupportedEscapedChar,
// &state,
// buf.len(),
// ));
// }
// }
// Ok(None)
// }
// #[inline(always)]
// fn handle_escaped_unicode<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// chars: &mut Peekable<I>,
// buf: &mut String<'a>,
// problems: &mut Problems,
// ) -> Result<(), (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// // \u{00A0} is how you specify a Unicode code point,
// // so we should always see a '{' next.
// if chars.next() != Some('{') {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf
// let start_col = state.column + 1 + buf.len() as u16;
// let end_line = start_line;
// // All we parsed was `\u`, so end on the column after `\`'s column.
// let end_col = start_col + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::NoUnicodeDigits,
// });
// // The rest of the string literal might be fine. Keep parsing!
// return Ok(());
// }
// // Record the point in the string literal where we started parsing `\u`
// let start_of_unicode = buf.len();
// // Stores the accumulated unicode digits
// let mut hex_str = String::new_in(arena);
// while let Some(hex_char) = chars.next() {
// match hex_char {
// '}' => {
// // Done! Validate and add it to the buffer.
// match u32::from_str_radix(&hex_str, 16) {
// Ok(code_pt) => {
// if code_pt > 0x10FFFF {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf
// // +3 due to the `\u{` which precedes the hex digits
// let start_col = state.column + 1 + buf.len() as u16 + 3;
// let end_line = start_line;
// // We want to underline only the number. That's the error!
// // -1 because we want to end on the last digit, not
// // overshoot it.
// let end_col = start_col + hex_str.len() as u16 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::UnicodeCodePointTooLarge,
// });
// } else {
// // If it all checked out, add it to
// // the main buffer.
// match char::from_u32(code_pt) {
// Some(ch) => buf.push(ch),
// None => {
// problems.push(loc_escaped_unicode(
// Problem::InvalidUnicodeCodePoint,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// }
// }
// }
// Err(_) => {
// let problem = if hex_str.is_empty() {
// Problem::NoUnicodeDigits
// } else {
// Problem::NonHexCharsInUnicodeCodePoint
// };
// problems.push(loc_escaped_unicode(
// problem,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// }
// // We are now done processing the unicode portion of the string,
// // so exit the loop without further advancing the iterator.
// return Ok(());
// }
// '\t' => {
// // Report and continue.
// // Tabs are syntax errors, but maybe the rest of the string is fine!
// problems.push(loc_escaped_unicode(
// Problem::Tab,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// '\r' => {
// // Report and continue.
// // Carriage returns aren't allowed in string literals,
// // but maybe the rest of the string is fine!
// problems.push(loc_escaped_unicode(
// Problem::CarriageReturn,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// '\n' => {
// // Report and bail out.
// // We can't safely assume where the string was supposed to end.
// problems.push(loc_escaped_unicode(
// Problem::NewlineInLiteral,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// return Err(unexpected_eof(
// buf.len(),
// Attempting::UnicodeEscape,
// state.clone(),
// ));
// }
// normal_char => hex_str.push(normal_char),
// }
// // If we're about to hit the end of the string, and we didn't already
// // complete parsing a valid unicode escape sequence, this is a malformed
// // escape sequence - it wasn't terminated!
// if chars.peek() == Some(&'"') {
// // Record a problem and exit the loop early, so the string literal
// // parsing logic can consume the quote and do its job as normal.
// let start_line = state.line;
// // +1 due to the `"` which precedes buf.
// let start_col = state.column + buf.len() as u16 + 1;
// let end_line = start_line;
// // +3 due to the `\u{`
// // -1 to prevent overshooting because end col is inclusive.
// let end_col = start_col + 3 + hex_str.len() as u16 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::MalformedEscapedUnicode,
// });
// return Ok(());
// }
// }
// Ok(())
// }
// #[inline(always)]
// fn parse_interpolated_ident<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// chars: &mut Peekable<I>,
// ) -> Result<&'a str, (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// // This will return Err on invalid identifiers like "if"
// let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?;
// // Make sure we got a closing ) to end the interpolation.
// match next_char {
// Some(')') => Ok(string),
// Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)),
// None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)),
// }
// }

28
src/can/symbol.rs Normal file
View file

@ -0,0 +1,28 @@
use ident::VariantName;
/// A globally unique identifier, used for both vars and variants.
/// It will be used directly in code gen.
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Symbol(String);
impl Symbol {
pub fn new(prefix: &str, name: &str) -> Symbol {
Symbol(format!("{}{}", prefix, name))
}
pub fn from_variant(variant_name: &VariantName, home: &str) -> Symbol {
match &variant_name {
&VariantName::Unqualified(ref name) => Symbol::new(home, name),
&VariantName::Qualified(ref path, ref name) => Symbol::new(path, name),
}
}
}
impl Into<String> for Symbol {
fn into(self) -> String {
let Symbol(string) = self;
string
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,7 @@
use canonicalize::Expr::{self, *};
use canonicalize::{Pattern, Procedure, Symbol};
use can::expr::Expr::{self, *};
use can::pattern::Pattern;
use can::procedure::Procedure;
use can::symbol::Symbol;
use collections::ImMap;
use operator::{ArgSide, Operator};
use region::{Located, Region};
@ -93,13 +95,14 @@ fn constrain_op(
) -> Constraint {
let op = loc_op.value;
let op_types = Type::for_operator(op);
let fn_var = subs.mk_flex_var();
// TODO use fn_var
let _fn_var = subs.mk_flex_var();
let ret_var = subs.mk_flex_var();
let ret_type = Variable(ret_var);
let ret_reason = Reason::OperatorRet(op);
let expected_ret_type = ForReason(ret_reason, op_types.ret, region.clone());
let (l_var, l_con) = constrain_op_arg(
let (_l_var, l_con) = constrain_op_arg(
ArgSide::Left,
bound_vars,
subs,
@ -107,7 +110,7 @@ fn constrain_op(
op_types.left,
l_loc_expr,
);
let (r_var, r_con) = constrain_op_arg(
let (_r_var, r_con) = constrain_op_arg(
ArgSide::Right,
bound_vars,
subs,
@ -116,8 +119,8 @@ fn constrain_op(
r_loc_expr,
);
let vars = vec![fn_var, ret_var, l_var, r_var];
// TODO occurs check!
// let vars = vec![fn_var, ret_var, l_var, r_var];
// return $ exists (funcVar:resultVar:argVars) $ CAnd ...
And(vec![
@ -226,7 +229,7 @@ pub fn constrain_defs(
subs: &mut Subs,
ret_con: Constraint,
) -> Constraint {
let mut rigid_info = Info::with_capacity(assignments.len());
let rigid_info = Info::with_capacity(assignments.len());
let mut flex_info = Info::with_capacity(assignments.len());
for (loc_pattern, loc_expr) in assignments {
@ -320,7 +323,7 @@ fn string() -> Type {
builtin_type("String", "String", Vec::new())
}
fn num(var: Variable) -> Type {
fn _num(var: Variable) -> Type {
builtin_type("Num", "Num", vec![Type::Variable(var)])
}
@ -514,9 +517,9 @@ pub fn constrain_procedure(
}
struct Args {
vars: Vec<Variable>,
typ: Type,
ret_type: Type,
pub vars: Vec<Variable>,
pub typ: Type,
pub ret_type: Type,
}
fn constrain_args<I>(args: I, subs: &mut Subs, state: &mut PatternState) -> Args
@ -571,7 +574,7 @@ struct PatternState {
impl PatternState {
pub fn add_pattern(&mut self, loc_pattern: Located<Pattern>, expected: Expected<Type>) {
use canonicalize::Pattern::*;
use can::pattern::Pattern::*;
let region = loc_pattern.region;

View file

@ -1,2 +0,0 @@
pub mod parse;
pub mod parse_state;

File diff suppressed because it is too large Load diff

View file

@ -1,100 +0,0 @@
use combine::lib::fmt;
use combine::stream::state::{Positioner, RangePositioner};
use combine::stream::Resetable;
// Plan:
//
// 1. Let space parsers check indentation. They should expect indentation to only ever increase (right?) when
// doing a many_whitespaces or many1_whitespaces. Multline strings can have separate whitespace parsers.
// 2. For any expression that has subexpressions (e.g. ifs, parens, operators) record their indentation levels
// by doing .and(position()) followed by .and_then() which says "I can have a declaration inside me as
// long as the entire decl is indented more than me."
// 3. Make an alternative to RangeStreamOnce where uncons_while barfs on \t (or maybe just do this in whitespaces?)
/// Struct which represents a position in a source file.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct IndentablePosition {
/// Current line of the input
pub line: u32,
/// Current column of the input
pub column: u16,
/// Current indentation level, in columns (so no indent is col 1 - this saves an arithmetic operation.)
pub indent_col: u16,
// true at the beginning of each line, then false after encountering the first nonspace char.
pub is_indenting: bool,
}
clone_resetable! { () IndentablePosition }
impl Default for IndentablePosition {
fn default() -> Self {
IndentablePosition {
line: 1,
column: 1,
indent_col: 1,
is_indenting: true,
}
}
}
impl fmt::Display for IndentablePosition {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"line: {}, column: {}, indent_col: {}, is_indenting: {}",
self.line, self.column, self.indent_col, self.is_indenting
)
}
}
impl IndentablePosition {
pub fn new() -> Self {
IndentablePosition::default()
}
}
impl Positioner<char> for IndentablePosition {
type Position = IndentablePosition;
#[inline(always)]
fn position(&self) -> IndentablePosition {
self.clone()
}
#[inline]
fn update(&mut self, item: &char) {
match *item {
'\n' => {
self.column = 1;
self.line += 1;
self.indent_col = 1;
self.is_indenting = true;
}
' ' => {
self.column += 1;
}
_ => {
if self.is_indenting {
// As soon as we hit a nonspace char, we're done indenting.
// It doesn't count as an indent until we hit a nonspace character though!
// Until that point it's still a blank line, not an indented one.
self.indent_col = self.column;
self.is_indenting = false;
}
self.column += 1;
}
}
}
}
impl<'a> RangePositioner<char, &'a str> for IndentablePosition {
fn update_range(&mut self, range: &&'a str) {
for c in range.chars() {
self.update(&c);
}
}
}

View file

@ -1,721 +0,0 @@
use expr::{Expr, Ident, Pattern, VariantName};
use expr::Pattern::*;
use expr;
use operator::Operator::*;
use operator::Operator;
use std::rc::Rc;
use std::fmt;
use collections::ImMap;
use self::Problem::*;
use fraction::Fraction;
use region::{Located, Region};
pub fn eval(expr: Located<Expr>) -> Evaluated {
scoped_eval(prepare_for_eval(expr), &ImMap::default())
}
fn prepare_for_eval(expr: Located<Expr>) -> Located<Expr> {
expr::apply_precedence_and_associativity(expr).unwrap().map(&apply_pizza)
}
#[derive(Clone, Debug, PartialEq)]
pub enum Evaluated {
// Literals
Int(i64),
Frac(Fraction),
Approx(f64),
EmptyStr,
Str(String),
InterpolatedStr(Vec<(String, Ident)>, String),
Char(char),
Closure(Vec<Pattern>, Box<Located<Expr>>, Scope),
// Sum Types
ApplyVariant(VariantName, Option<Vec<Evaluated>>),
// Product Types
EmptyRecord,
// Errors
EvalError(Region, Problem)
}
#[derive(Clone, Debug, PartialEq)]
pub enum Problem {
UnrecognizedVarName(String),
TypeMismatch(String),
ReassignedVarName(String),
WrongArity(u32 /* Expected */, u32 /* Provided */),
NotEqual, // Used when (for example) a string literal pattern match fails
NoBranchesMatched,
}
type Scope = ImMap<String, Rc<Evaluated>>;
pub fn scoped_eval(expr: Located<Expr>, vars: &Scope) -> Evaluated {
use self::Evaluated::*;
let region = expr.region;
match expr.value {
Expr::Int(num) => Int(num),
Expr::EmptyStr => EmptyStr,
Expr::Str(string) => Str(string),
Expr::Frac(numerator, denominator) => Frac(fraction_from_i64s(numerator, denominator)),
Expr::Approx(num) => Approx(num),
Expr::Char(ch) => Char(ch),
Expr::Closure(args, body) => Closure(args.into_iter().map(|e| e.value).collect(), body, vars.clone()),
Expr::EmptyRecord => EmptyRecord,
// Resolve variable names
Expr::Var(ident) => {
let ident_name = ident.name();
match vars.get(&ident_name) {
Some(resolved) => (**resolved).clone(),
None => EvalError(region, UnrecognizedVarName(ident_name))
}
}
Expr::InterpolatedStr(pairs, trailing_str) => {
let mut output = String::new();
for (string, loc_ident) in pairs.into_iter() {
let ident_name = loc_ident.value.name();
match vars.get(&ident_name) {
Some(resolved) => {
match **resolved {
Str(ref var_string) => {
output.push_str(string.as_str());
output.push_str(var_string.as_str());
},
_ => {
return EvalError(region, TypeMismatch(ident_name));
}
}
},
None => { return EvalError(region, UnrecognizedVarName(ident_name)); }
}
}
output.push_str(trailing_str.as_str());
Str(output)
},
Expr::Assign(located_pattern, assigned_expr, returned_expr) => {
eval_assign(located_pattern, *assigned_expr, *returned_expr, vars)
}
Expr::CallByName(ident, args) => {
let ident_name = ident.name();
let func_expr = match vars.get(&ident_name) {
Some(resolved) => (**resolved).clone(),
None => EvalError(region, UnrecognizedVarName(ident_name))
};
eval_apply(region, func_expr, args, vars)
},
Expr::ApplyVariant(name, None) => ApplyVariant(name, None),
Expr::ApplyVariant(name, Some(exprs)) => {
ApplyVariant(
name,
Some(exprs.into_iter().map(|arg| scoped_eval(arg, vars)).collect())
)
}
Expr::Apply(func_expr, args) => {
eval_apply(region, scoped_eval(*func_expr, vars), args, vars)
},
Expr::Case(condition, branches) => {
eval_case(region, scoped_eval(*condition, vars), branches, vars)
},
Expr::Operator(left_arg, op, right_arg) => {
eval_operator(
region,
&scoped_eval(*left_arg, vars),
op.value,
&scoped_eval(*right_arg, vars)
)
},
Expr::If(condition, if_true, if_false) => {
match scoped_eval(*condition, vars) {
ApplyVariant(variant_name, None) => {
match variant_name.name().as_str() {
"True" => scoped_eval(*if_true, vars),
"False" => scoped_eval(*if_false, vars),
_ => EvalError(region, TypeMismatch("non-Bool used in `if` condition".to_string()))
}
},
_ => EvalError(region, TypeMismatch("non-Bool used in `if` condition".to_string()))
}
}
}
}
fn eval_assign(pattern: Located<Pattern>, assigned_expr: Located<Expr>, returned_expr: Located<Expr>, vars: &Scope) -> Evaluated {
use self::Evaluated::*;
let pattern_region = pattern.region;
match pattern.value {
Identifier(ident) => {
let ident_name = ident.name();
if vars.contains_key(&ident_name) {
EvalError(pattern_region, ReassignedVarName(ident_name))
} else {
// Create a new scope containing the new declaration.
let mut new_vars = vars.clone();
let evaluated_defn = scoped_eval(assigned_expr, vars);
new_vars.insert(ident_name, Rc::new(evaluated_defn));
// Evaluate in_expr with that new scope's variables.
scoped_eval(returned_expr, &new_vars)
}
},
Integer(_) => {
panic!("You cannot assign integers to other values!");
},
Fraction(_, _) => {
panic!("You cannot assign fractions to other values!");
},
Variant(_name, _patterns) => {
panic!("Pattern matching on variants is not yet supported!");
},
Underscore => {
panic!("Cannot assign to the _ pattern!");
},
Pattern::EmptyRecordLiteral => {
panic!("Cannot assign to the {} pattern!");
},
}
}
#[inline(always)]
pub fn call(region: Region, evaluated: Evaluated, args: Vec<Located<Expr>>) -> Evaluated {
eval_apply(region, evaluated, args, &HashMap::new())
}
#[inline(always)]
fn eval_apply(region: Region, evaluated: Evaluated, args: Vec<Located<Expr>>, vars: &Scope) -> Evaluated {
use self::Evaluated::*;
match evaluated {
Closure(arg_patterns, body, closure_vars) => {
let combined_vars = vars.clone().union(closure_vars);
let evaluated_args =
args.into_iter()
.map(|arg| scoped_eval(arg, &combined_vars))
.collect();
match eval_closure(evaluated_args, arg_patterns, &combined_vars) {
Ok(new_vars) => scoped_eval(*body, &new_vars),
Err(prob) => EvalError(region, prob)
}
},
val => {
EvalError(region, TypeMismatch(format!("Tried to call a non-function: {}", val)))
}
}
}
#[inline(always)]
fn eval_closure(args: Vec<Evaluated>, arg_patterns: Vec<Pattern>, vars: &Scope)
-> Result<Scope, Problem>
{
if arg_patterns.len() == args.len() {
// Create a new scope for the function to use.
let mut new_vars = vars.clone();
for ( arg, pattern ) in args.into_iter().zip(arg_patterns) {
pattern_match(&arg, &pattern, &mut new_vars)?;
}
Ok(new_vars)
} else {
Err(WrongArity(arg_patterns.len() as u32, args.len() as u32))
}
}
fn bool_variant(is_true: bool) -> Evaluated {
if is_true {
Evaluated::ApplyVariant(VariantName::Unqualified("True".to_string()), None)
} else {
Evaluated::ApplyVariant(VariantName::Unqualified("False".to_string()), None)
}
}
fn eq(region: Region, evaluated1: &Evaluated, evaluated2: &Evaluated) -> Evaluated {
use self::Evaluated::*;
match (evaluated1, evaluated2) {
// All functions are defined as equal
(Closure(_, _, _), Closure(_, _, _)) => bool_variant(true),
(ApplyVariant(left, None), ApplyVariant(right, None)) => {
bool_variant(left == right)
},
(ApplyVariant(left, Some(left_args)), ApplyVariant(right, Some(right_args))) => {
bool_variant(left == right && left_args.len() == right_args.len())
},
(ApplyVariant(_, None), ApplyVariant(_, Some(_))) => {
bool_variant(false)
},
(ApplyVariant(_, Some(_)), ApplyVariant(_, None)) => {
bool_variant(false)
},
(Int(left), Int(right)) => bool_variant(left == right),
(Str(left), Str(right)) => bool_variant(left == right),
(Char(left), Char(right)) => bool_variant(left == right),
(Frac(left), Frac(right)) => bool_variant(left == right),
(_, _) => EvalError(region, TypeMismatch("tried to use == on two values with incompatible types".to_string())),
}
}
fn bool_from_variant_name(name: &VariantName) -> Option<bool> {
match name.clone().name().as_str() {
"True" => Some(true),
"False" => Some(false),
_ => None
}
}
#[inline(always)]
fn eval_operator(region: Region, left_expr: &Evaluated, op: Operator, right_expr: &Evaluated) -> Evaluated {
use self::Evaluated::*;
// TODO in the future, replace these with named function calls to stdlib
match (left_expr, op, right_expr) {
// Equals
(_, Equals, _) => eq(region, left_expr, right_expr),
// And
(ApplyVariant(left_name, None), And, ApplyVariant(right_name, None)) => {
match (bool_from_variant_name(left_name), bool_from_variant_name(right_name)) {
(Some(left_bool), Some(right_bool)) => bool_variant(left_bool && right_bool),
_ => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())),
}
}
(_, And, _) => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())),
// Or
(ApplyVariant(left_name, None), Or, ApplyVariant(right_name, None)) => {
match (bool_from_variant_name(left_name), bool_from_variant_name(right_name)) {
(Some(left_bool), Some(right_bool)) => bool_variant(left_bool || right_bool),
_ => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())),
}
}
(_, Or, _) => EvalError(region, TypeMismatch("tried to use && on non-bools".to_string())),
// LessThan
(Int(left_num), LessThan, Int(right_num)) => bool_variant(left_num < right_num),
(Frac(left_num), LessThan, Frac(right_num)) => bool_variant(left_num < right_num),
(Int(_), LessThan, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac < Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), LessThan, Int(_)) => EvalError(region,TypeMismatch("tried check Int < Frac. Explicitly convert them to the same type first!".to_string())),
(_, LessThan, _) => EvalError(region, TypeMismatch("tried to check if one non-number < another non-number".to_string())),
// LessThanOrEq
(Int(left_num), LessThanOrEq, Int(right_num)) => bool_variant(left_num <= right_num),
(Frac(left_num), LessThanOrEq, Frac(right_num)) => bool_variant(left_num <= right_num),
(Int(_), LessThanOrEq, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac <= Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), LessThanOrEq, Int(_)) => EvalError(region, TypeMismatch("tried check Int <= Frac. Explicitly convert them to the same type first!".to_string())),
(_, LessThanOrEq, _) => EvalError(region, TypeMismatch("tried to check if one non-number <= another non-number".to_string())),
// GreaterThan
(Int(left_num), GreaterThan, Int(right_num)) => bool_variant(left_num > right_num),
(Frac(left_num), GreaterThan, Frac(right_num)) => bool_variant(left_num > right_num),
(Int(_), GreaterThan, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac > Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), GreaterThan, Int(_)) => EvalError(region, TypeMismatch("tried check Int > Frac. Explicitly convert them to the same type first!".to_string())),
(_, GreaterThan, _) => EvalError(region, TypeMismatch("tried to check if one non-number > another non-number".to_string())),
// GreaterThanOrEq
(Int(left_num), GreaterThanOrEq, Int(right_num)) => bool_variant(left_num >= right_num),
(Frac(left_num), GreaterThanOrEq, Frac(right_num)) => bool_variant(left_num >= right_num),
(Int(_), GreaterThanOrEq, Frac(_)) => EvalError(region, TypeMismatch("tried check Frac >= Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), GreaterThanOrEq, Int(_)) => EvalError(region, TypeMismatch("tried check Int >= Frac. Explicitly convert them to the same type first!".to_string())),
(_, GreaterThanOrEq, _) => EvalError(region, TypeMismatch("tried to check if one non-number >= another non-number".to_string())),
// Plus
(Int(left_num), Plus, Int(right_num)) => Int(left_num.checked_add(*right_num).unwrap_or_else(|| panic!("Integer overflow on +"))),
(Frac(left_num), Plus, Frac(right_num)) => Frac(left_num + right_num),
(Int(_), Plus, Frac(_)) => EvalError(region, TypeMismatch("tried to add Frac to Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), Plus, Int(_)) => EvalError(region, TypeMismatch("tried to add Int to Frac. Explicitly convert them to the same type first!".to_string())),
(_, Plus, _) => EvalError(region, TypeMismatch("tried to add non-numbers".to_string())),
// Star
(Int(left_num), Star, Int(right_num)) => Int(left_num.checked_mul(*right_num).unwrap_or_else(|| panic!("Integer overflow on *"))),
(Frac(left_num), Star, Frac(right_num)) => Frac(left_num * right_num),
(Int(_), Star, Frac(_)) => EvalError(region, TypeMismatch("tried to multiply Int by Frac. Explicitly convert them to the same type first!".to_string())),
(Frac(_), Star, Int(_)) => EvalError(region, TypeMismatch("tried to multiply Frac by Int. Explicitly convert them to the same type first!".to_string())),
(_, Star, _) => EvalError(region, TypeMismatch("tried to multiply non-numbers".to_string())),
// Minus
(Int(left_num), Minus, Int(right_num)) => Int(left_num.checked_sub(*right_num).unwrap_or_else(|| panic!("Integer underflow on -"))),
(Frac(left_num), Minus, Frac(right_num)) => Frac(left_num - right_num),
(Int(_), Minus, Frac(_)) => EvalError(region, TypeMismatch("tried to subtract Frac from Int. Explicitly convert them to the same type first!".to_string())),
(Frac(_), Minus, Int(_)) => EvalError(region, TypeMismatch("tried to subtract Int from Frac. Explicitly convert them to the same type first!".to_string())),
(_, Minus, _) => EvalError(region, TypeMismatch("tried to subtract non-numbers".to_string())),
// Caret
(Int(left_num), Caret, Int(right_num)) => Int(left_num.checked_pow(*right_num as u32 /* TODO panic if this cast fails */).unwrap_or_else(|| panic!("Integer underflow on ^"))),
(Frac(_), Caret, Frac(_)) => EvalError(region, TypeMismatch("tried to use ^ with a Frac, which is not yet supported on either side of the ^ operator.".to_string())),
(_, Caret, _) => EvalError(region, TypeMismatch("tried to use ^ on non-numbers".to_string())),
// Slash
(Frac(left_num), Slash, Frac(right_num)) => {
let answer = left_num / right_num;
if answer.is_finite() {
ok_variant(Frac(answer))
} else {
err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None))
}
},
(Int(_), Slash, Int(_)) => EvalError(region, TypeMismatch("tried to divide two Int values. Explicitly convert them to Frac values, or use Int division (the // operator).".to_string())),
(Approx(_), Slash, Approx(_)) => EvalError(region, TypeMismatch("tried to divide two Approx values. Explicitly convert them to Frac values, or use Approx division (the ~/ operator).".to_string())),
(Int(_), Slash, Frac(_)) => EvalError(region, TypeMismatch("tried to divide Int by Frac. Explicitly convert them to the same type first!".to_string())),
(Frac(_), Slash, Int(_)) => EvalError(region, TypeMismatch("tried to divide Frac by Int. Explicitly convert them to the same type first!".to_string())),
(_, Slash, _) => EvalError(region, TypeMismatch("tried to divide non-numbers".to_string())),
// DoubleSlash
(Int(left_num), DoubleSlash, Int(right_num)) => Int(left_num / right_num),
(Approx(_), DoubleSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do integer division on two Approx values. Explicitly convert them to Int values, or use Approx division (the ~/ operator).".to_string())),
(Frac(_), DoubleSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to do integer division on two Frac values. Explicitly conver them to Int values, or use Frac division (the / operator).".to_string())),
(Int(_), DoubleSlash, Frac(_)) => EvalError(region,TypeMismatch("tried to integer-divide Int by Frac".to_string())),
(Frac(_), DoubleSlash, Int(_)) => EvalError(region, TypeMismatch("tried to integer-divide Frac by Int".to_string())),
(_, DoubleSlash, _) => EvalError(region, TypeMismatch("tried to do integer division on two non-numbers".to_string())),
// TildeSlash
(Approx(left_num), TildeSlash, Approx(right_num)) => {
let answer = left_num / right_num;
if answer.is_finite() {
ok_variant(Approx(answer))
} else {
err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None))
}
},
(Int(_), TildeSlash, Int(_)) => EvalError(region, TypeMismatch("tried to do Approx division on two Int values. Explicitly convert them to Approx values, or use Int division (the // operator).".to_string())),
(Frac(_), TildeSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to do Approx division on two Frac values. Explicitly conver them to Approx values, or use Frac division (the / operator).".to_string())),
(Int(_), TildeSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do Int ~/ Approx. Explicitly convert both to Approx first!".to_string())),
(Frac(_), TildeSlash, Approx(_)) => EvalError(region, TypeMismatch("tried to do Frac ~/ Approx. Explicitly convert both to Approx first!".to_string())),
(Approx(_), TildeSlash, Int(_)) => EvalError(region, TypeMismatch("tried to divide Approx ~/ Int. Explicitly convert both to Approx first!".to_string())),
(Approx(_), TildeSlash, Frac(_)) => EvalError(region, TypeMismatch("tried to divide Approx ~/ Frac. Explicitly convert both to Approx first!".to_string())),
(_, TildeSlash, _) => EvalError(region, TypeMismatch("tried to divide non-numbers".to_string())),
// Percent
(Int(left_num), Percent, Int(right_num)) => Int(left_num % right_num),
(Frac(left_num), Percent, Frac(right_num)) => {
let answer = left_num % right_num;
if answer.is_finite() {
ok_variant(Frac(answer))
} else {
err_variant(ApplyVariant(VariantName::Unqualified("DivisionByZero".to_string()), None))
}
},
(Int(_), Percent, Frac(_)) => EvalError(region, TypeMismatch("tried to do Int % Frac. Explicitly convert them to the same type first!".to_string())),
(Frac(_), Percent, Int(_)) => EvalError(region, TypeMismatch("tried to do Frac % Int. Explicitly convert them to the same type first!".to_string())),
(_, Percent, _) => EvalError(region, TypeMismatch("tried to use % on non-numbers".to_string())),
// Pizza
(_, Pizza, _) => { panic!("There was a |> operator that hadn't been removed prior to eval time. This should never happen!"); }
}
}
#[inline(always)]
fn eval_case(region: Region, evaluated: Evaluated, branches: Vec<(Located<Pattern>, Located<Expr>)>, vars: &Scope) -> Evaluated {
use self::Evaluated::*;
for (pattern, definition) in branches {
let mut branch_vars = vars.clone();
if pattern_match(&evaluated, &pattern.value, &mut branch_vars).is_ok() {
return scoped_eval(definition, &branch_vars);
}
}
EvalError(region, NoBranchesMatched)
}
fn pattern_match(evaluated: &Evaluated, pattern: &Pattern, vars: &mut Scope) -> Result<(), Problem> {
use self::Evaluated::*;
match pattern {
Identifier(name) => {
vars.insert(name.clone().name(), Rc::new(evaluated.clone()));
Ok(())
},
Underscore => {
// Underscore matches anything, and records no new vars.
Ok(())
},
EmptyRecordLiteral => {
match evaluated {
EmptyRecord => Ok(()),
expr => Err(TypeMismatch(
format!("Wanted a `{}`, but was given `{}`.", "{}", expr)
))
}
},
Integer(pattern_num) => {
match evaluated {
Int(evaluated_num) => {
if *pattern_num == *evaluated_num {
Ok(())
} else {
Err(Problem::NotEqual)
}
},
expr => Err(TypeMismatch(
format!("Wanted a `{}`, but was given `{}`.", "{}", expr)
))
}
},
Fraction(numerator, denominator) => {
match evaluated {
Frac(actual_frac) => {
let expected_frac = fraction_from_i64s(*numerator, *denominator);
if expected_frac == *actual_frac {
Ok(())
} else {
Err(Problem::NotEqual)
}
},
expr => Err(TypeMismatch(
format!("Wanted a `{}`, but was given `{}`.", "{}", expr)
))
}
}
Variant(pattern_variant_name, opt_pattern_contents) => {
match evaluated {
ApplyVariant(applied_variant_name, opt_applied_contents) => {
if *pattern_variant_name != *applied_variant_name {
return Err(TypeMismatch(
format!("Wanted a `{}` variant, but was given a `{}` variant.",
pattern_variant_name,
applied_variant_name
)
)
);
}
match (opt_pattern_contents, opt_applied_contents) {
( Some(ref pattern_contents), Some(applied_contents) ) => {
if pattern_contents.len() == applied_contents.len() {
// Recursively pattern match
for ( pattern_val, applied_val ) in pattern_contents.into_iter().zip(applied_contents) {
pattern_match(applied_val, &pattern_val.value, vars)?;
}
Ok(())
} else {
Err(WrongArity(
pattern_contents.len() as u32,
applied_contents.len() as u32
)
)
}
},
( None, None ) => {
// It's the variant we expected, but it has no values in it,
// so we don't insert anything into vars.
Ok(())
},
( None, Some(contents) ) => {
// It's the variant we expected, but the arity is wrong.
Err(WrongArity(contents.len() as u32, 0))
},
( Some(patterns), None ) => {
// It's the variant we expected, but the arity is wrong.
Err(WrongArity(0, patterns.len() as u32))
},
}
},
_ => {
Err(TypeMismatch(format!("Wanted to destructure a `{}` variant, but was given a non-variant.", pattern_variant_name)))
}
}
}
}
}
impl fmt::Display for Evaluated {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Evaluated::*;
match self {
// PRIMITIVES
Int(num) => write!(f, "{}", *num),
Frac(fraction) => {
let numerator = *fraction.numer().unwrap();
let denominator = *fraction.denom().unwrap();
if denominator == 10 {
write!(f, "{}", (numerator as f64 / 10.0))
} else {
write!(f, "{}/{}", numerator, denominator)
}
},
Approx(num) => write!(f, "~{}", *num),
Str(string) => {
let escaped_str =
(*string)
.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\t", "\\t")
.replace("\n", "\\n")
.replace("\r", "\\r");
write!(f, "\"{}\"", escaped_str)
},
Char(ch) => write!(f, "'{}'", *ch),
Closure(args, _, _) => write!(f, "<{}-argument function>", args.len()),
ApplyVariant(name, opt_exprs) => {
match opt_exprs {
None => write!(f, "{}", name.clone().name()),
Some(exprs) => {
let contents =
exprs.into_iter()
.map(|expr| format!(" {}", expr))
.collect::<Vec<_>>()
.join(",");
write!(f, "{}{}", name.clone().name(), contents)
}
}
},
// ERRORS
EvalError(region, problem) => write!(f, "ERROR: {} at {}", format!("{}", problem), format!("line {}, column {}", region.start_line, region.start_col)),
// UNFORMATTED
_ => write!(f, "<partially evaluated expression>")
}
}
}
impl fmt::Display for Problem {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Problem::UnrecognizedVarName(name) => write!(f, "Unrecognized var name `{}`", name),
Problem::NoBranchesMatched => write!(f, "No branches matched in this case-expression"),
Problem::TypeMismatch(info) => write!(f, "Type Mismatch - {}", info),
Problem::ReassignedVarName(name) => write!(f, "Reassigned constant - {}", name),
Problem::NotEqual => write!(f, "Pattern match on literal value failed; the branch wasn't equal."),
Problem::WrongArity(expected_arity, provided_arity) => {
if provided_arity > expected_arity {
write!(f, "Too many arguments! Needed {} arguments, but got {}", expected_arity, provided_arity)
} else {
write!(f, "Missing arguments! Needed {} arguments, but got {}", expected_arity, provided_arity)
}
}
}
}
}
fn ok_variant(contents: Evaluated) -> Evaluated{
Evaluated::ApplyVariant(VariantName::Unqualified("Ok".to_string()), Some(vec![contents]))
}
fn err_variant(contents: Evaluated) -> Evaluated {
Evaluated::ApplyVariant(VariantName::Unqualified("Err".to_string()), Some(vec![contents]))
}
fn fraction_from_i64s(numerator: i64, denominator: i64) -> Fraction {
if numerator.is_negative() {
Fraction::new_neg(numerator as u64, denominator as u64)
} else {
Fraction::new(numerator as u64, denominator as u64)
}
}
fn apply_pizza(expr: &Expr) -> Expr {
use expr::Expr::*;
expr.walk(&|sub_expr| {
// TODO can we avoid cloning here somehow, without resorting to a macro?
match sub_expr.clone() {
Operator(boxed_loc_left, loc_op, boxed_loc_right) => {
let loc_left = *boxed_loc_left;
let loc_right = *boxed_loc_right;
let left_region = loc_left.region;
let right_region = loc_left.region;
let op_region = loc_op.region;
match ( loc_left.value, loc_op.value, loc_right.value ) {
(left_arg, Pizza, Expr::Var(name)) => {
Expr::CallByName(
name,
vec![Located { region: left_region, value: left_arg }]
)
},
(left_arg, Pizza, Expr::CallByName(name, mut args)) => {
args.push(Located { region: left_region, value: left_arg });
CallByName(name, args)
},
(left_arg, Pizza, Expr::Apply(applied_expr, mut args)) => {
args.push(Located { region: left_region, value: left_arg });
Apply(applied_expr, args)
},
(left, op, right) => {
Operator(
Box::new(Located { region: left_region, value: left }),
Located { region: op_region, value: op },
Box::new(Located { region: right_region, value: right }),
)
}
}
},
other => other
}
})
}

View file

@ -34,58 +34,6 @@ pub enum Expr {
Operator(Box<Located<Expr>>, Located<Operator>, Box<Located<Expr>>),
}
/// A variant name, possibly fully-qualified with a module name
/// e.g. (Result.Ok)
/// Parameterized on a phantom marker for whether it has been canonicalized
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum VariantName {
Unqualified(String),
Qualified(String, String),
}
/// An identifier, possibly fully-qualified with a module name
/// e.g. (Http.Request from http)
/// Parameterized on a phantom marker for whether it has been canonicalized
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Ident {
Unqualified(String),
Qualified(String, String),
}
impl Ident {
pub fn is_qualified(&self) -> bool {
match &self {
&Ident::Unqualified(_) => false,
&Ident::Qualified(_, _) => true,
}
}
pub fn name(self) -> String {
match self {
Ident::Unqualified(name) => name,
Ident::Qualified(_, name) => name,
}
}
}
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Ident::Unqualified(name) => write!(f, "{}", name),
Ident::Qualified(path, name) => write!(f, "{}.{}", path, name),
}
}
}
impl fmt::Display for VariantName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VariantName::Unqualified(name) => write!(f, "{}", name),
VariantName::Qualified(path, name) => write!(f, "{}.{}", path, name),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Pattern {
Identifier(String),

53
src/ident.rs Normal file
View file

@ -0,0 +1,53 @@
use std::fmt::{self, Display, Formatter};
/// A variant name, possibly fully-qualified with a module name
/// e.g. (Result.Ok)
/// Parameterized on a phantom marker for whether it has been canonicalized
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum VariantName {
Unqualified(String),
Qualified(String, String),
}
/// An identifier, possibly fully-qualified with a module name
/// e.g. (Http.Request from http)
/// Parameterized on a phantom marker for whether it has been canonicalized
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Ident {
Unqualified(String),
Qualified(String, String),
}
impl Ident {
pub fn is_qualified(&self) -> bool {
match &self {
&Ident::Unqualified(_) => false,
&Ident::Qualified(_, _) => true,
}
}
pub fn name(self) -> String {
match self {
Ident::Unqualified(name) => name,
Ident::Qualified(_, name) => name,
}
}
}
impl Display for Ident {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Ident::Unqualified(name) => write!(f, "{}", name),
Ident::Qualified(path, name) => write!(f, "{}.{}", path, name),
}
}
}
impl Display for VariantName {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
VariantName::Unqualified(name) => write!(f, "{}", name),
VariantName::Qualified(path, name) => write!(f, "{}.{}", path, name),
}
}
}

View file

@ -1,4 +1,6 @@
use canonicalize::{Expr, Procedure, Symbol};
use can::expr::Expr;
use can::procedure::Procedure;
use can::symbol::Symbol;
use collections::{ImMap, MutMap};
use constrain::{constrain, constrain_procedure};
use region::Located;

View file

@ -1,14 +1,12 @@
// pub mod eval;
pub mod canonicalize;
pub mod can;
pub mod collections;
pub mod deprecated;
pub mod expr;
pub mod graph;
pub mod ident;
pub mod operator;
pub mod parse; // DEPRECATED
pub mod parse;
pub mod region;
// pub mod string;
pub mod string;
pub mod constrain;
pub mod ena;
@ -25,7 +23,5 @@ extern crate fxhash;
extern crate im_rc;
extern crate num;
#[macro_use]
extern crate combine; // OBSOLETE
#[macro_use]
extern crate log;

View file

@ -1,15 +0,0 @@
use expr::{Pattern, Ident};
pub struct Module {
name: Ident,
exposes: Vec<Ident>,
uses: Vec<Ident>,
decls: Vec<Decl>,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Decl {
Assign(Pattern, Box<Expr>, Box<Expr>),
// TODO Alias
// TODO SumType
}

View file

@ -1,9 +1,8 @@
use bumpalo::collections::vec::Vec;
use operator::Operator;
use parse::problems::Problem;
use region::Loc;
use std::fmt::{self, Display, Formatter};
pub type Ident = str;
pub type VariantName = str;
/// A parsed expression. This uses lifetimes extensively for two reasons:
@ -23,50 +22,45 @@ pub type VariantName = str;
#[derive(Clone, Debug, PartialEq)]
pub enum Expr<'a> {
// Number Literals
Int(i64),
Float(f64),
Float(&'a str),
Int(&'a str),
HexInt(&'a str),
OctalInt(&'a str),
BinaryInt(&'a str),
// String Literals
EmptyStr,
Str(&'a str),
/// basically InterpolatedStr(Vec<(String, Loc<Expr>)>, String)
InterpolatedStr(&'a (&'a [(&'a str, Loc<Expr<'a>>)], &'a str)),
BlockStr(&'a [&'a str]),
// List literals
EmptyList,
List(&'a [Loc<Expr<'a>>]),
List(Vec<'a, Loc<Expr<'a>>>),
// // Lookups
// Var(&'a str),
// Lookups
Var(&'a Ident),
// // Pattern Matching
// Case(&'a (Loc<Expr<'a>>, [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])),
// Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
// /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
// Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
// Pattern Matching
Case(&'a (Loc<Expr<'a>>, [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])),
Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
/// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
// Application
Call(&'a (Loc<Expr<'a>>, [Loc<Expr<'a>>])),
ApplyVariant(&'a (&'a VariantName, [Loc<Expr<'a>>])),
Variant(&'a VariantName),
// // Application
// Call(&'a (Loc<Expr<'a>>, [Loc<Expr<'a>>])),
// ApplyVariant(&'a (&'a VariantName, [Loc<Expr<'a>>])),
// Variant(&'a VariantName),
// Product Types
EmptyRecord,
// Sugar
If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
// // Sugar
// If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
Operator(&'a (Loc<Expr<'a>>, Loc<Operator>, Loc<Expr<'a>>)),
// Runtime errors
MalformedStr(Box<[Loc<Problem>]>),
MalformedInt(Problem),
MalformedFloat(Problem),
}
#[derive(Clone, Debug, PartialEq)]
pub enum Pattern<'a> {
// Identifier
Identifier(&'a Ident),
Identifier(&'a str),
// Variant
Variant(&'a VariantName),
@ -82,33 +76,35 @@ pub enum Pattern<'a> {
#[test]
fn expr_size() {
// The size of the Expr data structure should be exactly 3 machine words.
// The size of the Expr data structure should be exactly 5 machine words.
// This test helps avoid regressions wich accidentally increase its size!
//
// Worth noting that going up to 4 machine words is probably not a big deal;
// an 8-byte cache line will only fit 2 of these regardless.
assert_eq!(
std::mem::size_of::<Expr>(),
// TODO [move this comment to an issue] We should be able to get this
// down to 2, which would mean we could fit 4 of these nodes in a single
// 64-byte cache line instead of only being able to fit 2.
// 64-byte cache line instead of only being able to fit 1.
//
// Doing this would require, among other things:
// 1. Making a str replacement where the length is stored as u32 instead of usize,
// to leave room for the tagged union's u8 tag.
// (Alternatively could store it as (&'a &'a str), but ew.)
// 2. Figuring out why &'a (Foo, Bar) by default takes up 24 bytes in Rust.
// 2. Similarly, making a slice replacement like that str replacement, and
// also where it doesn't share the bytes with anything else - so its
// elements can be consumed without having to clone them (unlike a slice).
// That's the only reason we're using Vec right now instead of slices -
// if we used slices, we'd have to clone their elements during canonicalization
// just to iterate over them and canonicalize them normally.
// 3. Figuring out why (&'a (Foo, Bar)) by default takes up 24 bytes in Rust.
// I assume it's because the struct is being stored inline instead of
// as a pointer, but in this case we actually do want the pointer!
// We want to have the lifetime and we want to avoid using the unsafe keyword,
// but we also want this to only store 1 pointer in the AST node.
// Hopefully there's a way!
//
// It's also possible that going up to 4 machine words might yield even
// better performance, due to more data structures being inlinable,
// and therefore having fewer pointers to chase. This seems worth
// investigating as well.
std::mem::size_of::<usize>() * 3
// It's also possible that 4 machine words might yield better performance
// than 2, due to more data structures being inlinable, and therefore
// having fewer pointers to chase. This seems worth investigating as well.
std::mem::size_of::<usize>() * 5
);
}
@ -151,6 +147,7 @@ pub enum Attempting {
List,
Keyword,
StringLiteral,
RecordLiteral,
InterpolatedString,
NumberLiteral,
UnicodeEscape,
@ -165,7 +162,15 @@ impl<'a> Display for Expr<'a> {
match self {
EmptyStr => write!(f, "\"\""),
_ => panic!("TODO"),
Str(string) => write!(f, "\"{}\"", string),
BlockStr(lines) => write!(f, "\"\"\"{}\"\"\"", lines.join("\n")),
Int(string) => string.fmt(f),
Float(string) => string.fmt(f),
HexInt(string) => write!(f, "0x{}", string),
BinaryInt(string) => write!(f, "0b{}", string),
OctalInt(string) => write!(f, "0o{}", string),
EmptyRecord => write!(f, "{}", "{}"),
other => panic!("TODO implement Display for AST variant {:?}", other),
}
}
}

View file

@ -1,18 +1,76 @@
pub mod ast;
pub mod ident;
pub mod module;
pub mod number_literal;
pub mod parser;
pub mod problems;
pub mod string_literal;
use bumpalo::Bump;
use operator::Operator;
use parse::ast::{Attempting, Expr};
use parse::number_literal::number_literal;
use parse::parser::{attempt, one_of2, Parser};
use parse::parser::{
and, attempt, lazy, loc, map, map_with_arena, one_of3, optional, string, unexpected,
unexpected_eof, val, Parser, State,
};
use parse::string_literal::string_literal;
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
map_with_arena(
and(
attempt(
Attempting::Expression,
one_of2(number_literal(), string_literal()),
loc(one_of3(
record_literal(),
number_literal(),
string_literal(),
)),
),
optional(and(loc(operator()), loc(val(Expr::Str("blah"))))),
),
|arena, (loc_expr1, opt_operator)| match opt_operator {
Some((loc_op, loc_expr2)) => {
let tuple = arena.alloc((loc_expr1, loc_op, loc_expr2));
Expr::Operator(tuple)
}
None => loc_expr1.value,
},
)
}
pub fn operator<'a>() -> impl Parser<'a, Operator> {
val(Operator::Plus)
// one_of3(
// map(string("+"), |_| Operator::Plus),
// map(string("-"), |_| Operator::Minus),
// map(string("*"), |_| Operator::Star),
// )
}
pub fn record_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |_arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars();
match chars.next() {
Some('{') => (),
Some(other_char) => {
return Err(unexpected(other_char, 0, state, Attempting::RecordLiteral));
}
None => {
return Err(unexpected_eof(0, Attempting::RecordLiteral, state));
}
}
match chars.next() {
Some('}') => {
let next_state = state.advance_without_indenting(2)?;
Ok((Expr::EmptyRecord, next_state))
}
Some(other_char) => Err(unexpected(other_char, 0, state, Attempting::RecordLiteral)),
None => Err(unexpected_eof(0, Attempting::RecordLiteral, state)),
}
}
}

16
src/parse/module.rs Normal file
View file

@ -0,0 +1,16 @@
use ident::Ident;
use parse::ast::{Expr, Pattern};
pub struct Module<'a> {
pub name: Ident,
pub exposes: Vec<Ident>,
pub uses: Vec<Ident>,
pub decls: Vec<Decl<'a>>,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Decl<'a> {
Def(Pattern<'a>, Expr<'a>, Expr<'a>),
// TODO Alias
// TODO SumType
}

View file

@ -1,19 +1,16 @@
use bumpalo::collections::string::String;
use bumpalo::Bump;
use parse::ast::{Attempting, Expr};
use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use parse::problems::Problem;
use std::char;
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |arena: &'a Bump, state: State<'a>| {
move |_arena, state: State<'a>| {
let mut chars = state.input.chars();
match chars.next() {
Some(first_ch) => {
// Number literals must start with either an '-' or a digit.
if first_ch == '-' || first_ch.is_ascii_digit() {
parse_number_literal(first_ch, &mut chars, arena, state)
parse_number_literal(first_ch, &mut chars, state)
} else {
Err(unexpected(
first_ch,
@ -32,61 +29,69 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
fn parse_number_literal<'a, I>(
first_ch: char,
chars: &mut I,
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, Expr<'a>>
where
I: Iterator<Item = char>,
{
let mut before_decimal = String::with_capacity_in(1, arena);
let mut after_decimal = String::new_in(arena);
let mut has_decimal_point = false;
let mut chars_skipped = 0;
use self::LiteralType::*;
// Put the first character into the buffer, even if all we've parsed so
// far is a minus sign.
//
// We have to let i64::parse handle the minus sign (if it's there), because
// otherwise if we ask it to parse i64::MIN.to_string() as a positive i64,
// it errors because that positive number doesn't fit in an i64!
before_decimal.push(first_ch);
let mut typ = Int;
// We already parsed 1 character (which may have been a minus sign).
let mut chars_parsed = 1;
while let Some(next_ch) = chars.next() {
match next_ch {
digit if next_ch.is_ascii_digit() => {
if has_decimal_point {
after_decimal.push(digit);
} else {
before_decimal.push(digit);
}
}
'_' => {
// Underscores are allowed, and disregarded.
chars_skipped += 1;
}
'.' => {
if has_decimal_point {
// You only get one decimal point!
let len = before_decimal.len() + after_decimal.len() + chars_skipped;
chars_parsed += 1;
return Err(unexpected('.', len, state, Attempting::NumberLiteral));
} else {
chars_skipped += 1;
has_decimal_point = true;
}
}
invalid_char => {
if before_decimal.is_empty() {
// No digits! We likely parsed a minus sign that's actually an operator.
let len = before_decimal.len() + after_decimal.len() + chars_skipped;
return Err(unexpected(
invalid_char,
len,
state,
let err_unexpected = || {
Err(unexpected(
next_ch,
chars_parsed,
state.clone(),
Attempting::NumberLiteral,
));
}
))
};
// Returns true iff so far we have parsed the given char and no other chars.
let so_far_parsed = |ch| chars_parsed == 2 && first_ch == ch;
// We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not).
// If you want that, do something like (negate 0x01).
//
// I'm open to changing this policy (that is, allowing support for
// negative escaped ints), but it'll complicate parsing logic and seems
// nonessential, so I'm leaving it out for now.
if next_ch == '.' {
if typ == Float {
// You only get one decimal point!
return err_unexpected();
} else {
typ = Float;
}
} else if next_ch == 'x' {
if so_far_parsed('0') {
typ = Hex;
} else {
return err_unexpected();
}
} else if next_ch == 'b' {
if so_far_parsed('0') {
typ = Binary;
} else {
return err_unexpected();
}
} else if next_ch == 'o' {
if so_far_parsed('0') {
typ = Octal;
} else {
return err_unexpected();
}
} else if !next_ch.is_ascii_digit() && next_ch != '_' {
if so_far_parsed('-') {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
} else {
// We hit an invalid number literal character; we're done!
break;
}
@ -96,41 +101,25 @@ where
// At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that.
let expr = if has_decimal_point {
let mut f64_buf = String::with_capacity_in(
before_decimal.len()
// +1 for the decimal point itself
+ 1
+ after_decimal.len(),
arena,
);
f64_buf.push_str(&before_decimal);
f64_buf.push('.');
f64_buf.push_str(&after_decimal);
// TODO [convert this comment to an issue] - we can get better
// performance here by inlining string.parse() for the f64 case,
// since we've already done the work of validating that each char
// is a digit, plus we also already separately parsed the minus
// sign and dot.
match f64_buf.parse::<f64>() {
Ok(float) if float.is_finite() => Expr::Float(float),
_ => Expr::MalformedFloat(Problem::OutsideSupportedRange),
}
} else {
// TODO [convert this comment to an issue] - we can get better
// performance here by inlining string.parse() for the i64 case,
// since we've already done the work of validating that each char
// is a digit.
match before_decimal.parse::<i64>() {
Ok(int_val) => Expr::Int(int_val),
Err(_) => Expr::MalformedInt(Problem::OutsideSupportedRange),
}
let expr = match typ {
Int => Expr::Int(&state.input[0..chars_parsed]),
Float => Expr::Float(&state.input[0..chars_parsed]),
// For these we trim off the 0x/0o/0b part
Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]),
Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]),
Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]),
};
let total_chars_parsed = before_decimal.len() + chars_skipped;
let state = state.advance_without_indenting(total_chars_parsed)?;
let next_state = state.advance_without_indenting(chars_parsed)?;
Ok((expr, state))
Ok((expr, next_state))
}
#[derive(Debug, PartialEq, Eq)]
enum LiteralType {
Int,
Float,
Hex,
Octal,
Binary,
}

View file

@ -1,7 +1,7 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use parse::ast::Attempting;
use region::Region;
use region::{Located, Region};
use std::char;
// Strategy:
@ -190,6 +190,21 @@ pub trait Parser<'a, Output> {
fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>;
}
pub struct BoxedParser<'a, Output> {
parser: &'a (dyn Parser<'a, Output> + 'a),
}
impl<'a, Output> BoxedParser<'a, Output> {
fn new<P>(arena: &'a Bump, parser: P) -> Self
where
P: Parser<'a, Output> + 'a,
{
BoxedParser {
parser: arena.alloc(parser),
}
}
}
impl<'a, F, Output> Parser<'a, Output> for F
where
F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>,
@ -199,6 +214,22 @@ where
}
}
pub fn val<'a, Val>(value: Val) -> impl Parser<'a, Val>
where
Val: Clone,
{
move |_, state| Ok((value.clone(), state))
}
/// Needed for recursive parsers
pub fn lazy<'a, F, P, Val>(get_parser: F) -> impl Parser<'a, Val>
where
F: Fn() -> P,
P: Parser<'a, Val>,
{
move |arena, state| get_parser().parse(arena, state)
}
pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
where
P: Parser<'a, Before>,
@ -211,6 +242,18 @@ where
}
}
pub fn map_with_arena<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
where
P: Parser<'a, Before>,
F: Fn(&'a Bump, Before) -> After,
{
move |arena, state| {
parser
.parse(arena, state)
.map(|(output, next_state)| (transform(arena, output), next_state))
}
}
pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val>
where
P: Parser<'a, Val>,
@ -226,6 +269,32 @@ where
}
}
pub fn loc<'a, P, Val>(parser: P) -> impl Parser<'a, Located<Val>>
where
P: Parser<'a, Val>,
{
move |arena, state: State<'a>| {
let start_col = state.column;
let start_line = state.line;
match parser.parse(arena, state) {
Ok((value, state)) => {
let end_col = state.column;
let end_line = state.line;
let region = Region {
start_col,
start_line,
end_col,
end_line,
};
Ok((Located { region, value }, state))
}
Err((fail, state)) => Err((fail, state)),
}
}
}
pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
where
P: Parser<'a, A>,
@ -317,6 +386,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
let input = state.input;
let len = string.len();
// TODO do this comparison in one SIMD instruction (on supported systems)
match input.get(0..len) {
Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)),
_ => Err(unexpected_eof(len, Attempting::Keyword, state)),
@ -378,6 +448,46 @@ where
// satisfies(any, |ch| ch.is_whitespace())
// }
pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
where
P1: Parser<'a, A>,
P2: Parser<'a, B>,
{
move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
Ok((out1, state)) => match p2.parse(arena, state) {
Ok((out2, state)) => Ok(((out1, out2), state)),
Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
},
Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
}
}
}
pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
where
P: Parser<'a, T>,
{
move |arena: &'a Bump, state: State<'a>| match parser.parse(arena, state) {
Ok((out1, state)) => Ok((Some(out1), state)),
Err((_, state)) => Ok((None, state)),
}
}
pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A>
where
P1: Parser<'a, A>,

View file

@ -1,18 +1,11 @@
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use parse::ast::{Attempting, Expr};
use parse::ident;
use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
use parse::problems::{Problem, Problems};
use region::{Loc, Region};
use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char;
use std::iter::Peekable;
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |arena: &'a Bump, state: State<'a>| {
let mut problems = std::vec::Vec::new();
let mut chars = state.input.chars().peekable();
let mut chars = state.input.chars();
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
@ -26,464 +19,75 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
}
}
// If we have precisely an empty string here, don't bother allocating
// a buffer; instead, return EmptyStr immediately.
if chars.peek() == Some(&'"') {
return Ok((
Expr::EmptyStr,
// 2 because `""` has length 2
state.advance_without_indenting(2)?,
));
}
// Stores the accumulated string characters
let mut buf = String::new_in(arena);
// This caches the total string length of interpolated_pairs. Every
// time we add a new pair to interpolated_pairs, we increment this
// by the sum of whatever we parsed in order to obtain that pair.
let mut buf_col_offset: usize = 0;
// Stores interpolated identifiers, if any.
let mut interpolated_pairs = Vec::new_in(arena);
// At the parsing stage we keep the entire raw string, because the formatter
// needs the raw string. (For example, so it can "remember" whether you
// wrote \u{...} or the actual unicode character itself.)
//
// Later, in canonicalization, we'll do things like resolving
// unicode escapes and string interpolation.
//
// Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1;
let mut prev_ch = '"';
while let Some(ch) = chars.next() {
match ch {
// If it's a backslash, escape things.
'\\' => match chars.next() {
Some(next_ch) => {
if let Some(ident) = handle_escaped_char(
arena,
&state,
next_ch,
&mut chars,
&mut buf,
&mut problems,
)? {
let expr = Expr::Var(ident);
parsed_chars += 1;
// +2 for `\(` and then another +1 for `)` at the end
let parsed_length = buf.len() + 2 + ident.len() + 1;
// It's okay if casting fails in this section, because
// we're going to check for line length overflow at the
// end anyway. That will render this region useless,
// but the user wasn't going to see this region
// anyway if the line length overflowed.
let start_line = state.line;
// Subtract ident length and another 1 for the `)`
let start_col = state.column
+ buf_col_offset as u16
+ (parsed_length - ident.len() - 1) as u16;
let ident_region = Region {
start_line,
start_col,
end_line: start_line,
end_col: start_col + ident.len() as u16 - 1,
};
let loc_expr = Loc {
region: ident_region,
value: expr,
};
// Push the accumulated string into the pairs list,
// along with the ident that came after it.
interpolated_pairs.push((buf.into_bump_str(), loc_expr));
// Reset the buffer so we start working on a new string.
buf = String::new_in(arena);
// Advance the cached offset of how many chars we've parsed,
// so the next time we see an interpolated ident, we can
// correctly calculate its region.
buf_col_offset += parsed_length;
}
}
None => {
// We ran out of characters before finding a closed quote;
// let the loop finish normally, so we end up returning
// the error that the string was not terminated.
//
// (There's the separate problem of a trailing backslash,
// but often that will get fixed in the course of
// addressing the missing closed quote.)
()
}
},
'"' => {
// We found a closed quote; this is the end of the string!
let len_with_quotes = buf.len() + 2;
let expr = if problems.is_empty() {
let final_str = buf.into_bump_str();
if interpolated_pairs.is_empty() {
Expr::Str(final_str)
// Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' {
let expr = if parsed_chars == 2 {
if let Some('"') = chars.next() {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut chars);
} else {
let tuple_ref =
arena.alloc((interpolated_pairs.into_bump_slice(), final_str));
Expr::InterpolatedStr(tuple_ref)
Expr::EmptyStr
}
} else {
Expr::MalformedStr(problems.into_boxed_slice())
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`.
Expr::Str(&state.input[1..(parsed_chars - 1)])
};
let next_state = state.advance_without_indenting(len_with_quotes)?;
let next_state = state.advance_without_indenting(parsed_chars)?;
return Ok((expr, next_state));
}
'\t' => {
// Report the problem and continue. Tabs are syntax errors,
// but maybe the rest of the string is fine!
problems.push(loc_char(Problem::Tab, &state, buf.len()));
}
'\r' => {
// Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine!
problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
}
'\n' => {
// We hit a newline before a close quote.
// We can't safely assume where the string was supposed
// to end, so this is an unrecoverable error.
return Err(unexpected('\n', 0, state, Attempting::StringLiteral));
}
normal_char => buf.push(normal_char),
} else if ch == '\n' {
// This is a single-line string, which cannot have newlines!
// Treat this as an unclosed string literal, and consume
// all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared.
return Err(unexpected(
'\n',
state.input.len() - 1,
state,
Attempting::StringLiteral,
));
} else {
prev_ch = ch;
}
}
// We ran out of characters before finding a closed quote
Err(unexpected_eof(
buf.len(),
parsed_chars,
Attempting::StringLiteral,
state.clone(),
))
}
}
fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
let start_line = state.line;
let start_col = state.column + buf_len as u16;
let end_line = start_line;
// All invalid chars should have a length of 1
let end_col = state.column + 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
Loc { region, value }
}
fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
let start_line = state.line;
let start_col = state.column + buf_len as u16;
let end_line = start_line;
// escapes should all be 2 chars long
let end_col = state.column + 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
Loc { region, value }
}
fn loc_escaped_unicode<'a, V>(
value: V,
state: &State<'a>,
buf_len: usize,
hex_str_len: usize,
) -> Loc<V> {
let start_line = state.line;
// +1 due to the `"` which precedes buf.
let start_col = state.column + buf_len as u16 + 1;
let end_line = start_line;
// +3 due to the `\u{` and another + 1 due to the `}`
// -1 to prevent overshooting because end col is inclusive.
let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
Loc { region, value }
}
#[inline(always)]
fn handle_escaped_char<'a, I>(
arena: &'a Bump,
state: &State<'a>,
ch: char,
chars: &mut Peekable<I>,
buf: &mut String<'a>,
problems: &mut Problems,
) -> Result<Option<&'a str>, (Fail, State<'a>)>
fn parse_block_string<'a, I>(
_arena: &'a Bump,
_state: State<'a>,
_chars: &mut I,
) -> ParseResult<'a, Expr<'a>>
where
I: Iterator<Item = char>,
{
match ch {
'\\' => buf.push('\\'),
'"' => buf.push('"'),
't' => buf.push('\t'),
'n' => buf.push('\n'),
'r' => buf.push('\r'),
'0' => buf.push('\0'), // We explicitly support null characters, as we
// can't be sure we won't receive them from Rust.
'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?,
'(' => {
let ident = parse_interpolated_ident(arena, state, chars)?;
return Ok(Some(ident));
}
'\t' => {
// Report and continue.
// Tabs are syntax errors, but maybe the rest of the string is fine!
problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
}
'\r' => {
// Report and continue.
// Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine!
problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
}
'\n' => {
// Report and bail out.
// We can't safely assume where the string was supposed to end.
problems.push(loc_escaped_char(
Problem::NewlineInLiteral,
&state,
buf.len(),
));
return Err(unexpected_eof(
buf.len(),
Attempting::UnicodeEscape,
state.clone(),
));
}
_ => {
// Report and continue.
// An unsupported escaped char (e.g. \q) shouldn't halt parsing.
problems.push(loc_escaped_char(
Problem::UnsupportedEscapedChar,
&state,
buf.len(),
));
}
}
Ok(None)
}
#[inline(always)]
fn handle_escaped_unicode<'a, I>(
arena: &'a Bump,
state: &State<'a>,
chars: &mut Peekable<I>,
buf: &mut String<'a>,
problems: &mut Problems,
) -> Result<(), (Fail, State<'a>)>
where
I: Iterator<Item = char>,
{
// \u{00A0} is how you specify a Unicode code point,
// so we should always see a '{' next.
if chars.next() != Some('{') {
let start_line = state.line;
// +1 due to the `"` which precedes buf
let start_col = state.column + 1 + buf.len() as u16;
let end_line = start_line;
// All we parsed was `\u`, so end on the column after `\`'s column.
let end_col = start_col + 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
problems.push(Loc {
region,
value: Problem::NoUnicodeDigits,
});
// The rest of the string literal might be fine. Keep parsing!
return Ok(());
}
// Record the point in the string literal where we started parsing `\u`
let start_of_unicode = buf.len();
// Stores the accumulated unicode digits
let mut hex_str = String::new_in(arena);
while let Some(hex_char) = chars.next() {
match hex_char {
'}' => {
// Done! Validate and add it to the buffer.
match u32::from_str_radix(&hex_str, 16) {
Ok(code_pt) => {
if code_pt > 0x10FFFF {
let start_line = state.line;
// +1 due to the `"` which precedes buf
// +3 due to the `\u{` which precedes the hex digits
let start_col = state.column + 1 + buf.len() as u16 + 3;
let end_line = start_line;
// We want to underline only the number. That's the error!
// -1 because we want to end on the last digit, not
// overshoot it.
let end_col = start_col + hex_str.len() as u16 - 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
problems.push(Loc {
region,
value: Problem::UnicodeCodePointTooLarge,
});
} else {
// If it all checked out, add it to
// the main buffer.
match char::from_u32(code_pt) {
Some(ch) => buf.push(ch),
None => {
problems.push(loc_escaped_unicode(
Problem::InvalidUnicodeCodePoint,
&state,
start_of_unicode,
hex_str.len(),
));
}
}
}
}
Err(_) => {
let problem = if hex_str.is_empty() {
Problem::NoUnicodeDigits
} else {
Problem::NonHexCharsInUnicodeCodePoint
};
problems.push(loc_escaped_unicode(
problem,
&state,
start_of_unicode,
hex_str.len(),
));
}
}
// We are now done processing the unicode portion of the string,
// so exit the loop without further advancing the iterator.
return Ok(());
}
'\t' => {
// Report and continue.
// Tabs are syntax errors, but maybe the rest of the string is fine!
problems.push(loc_escaped_unicode(
Problem::Tab,
&state,
start_of_unicode,
hex_str.len(),
));
}
'\r' => {
// Report and continue.
// Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine!
problems.push(loc_escaped_unicode(
Problem::CarriageReturn,
&state,
start_of_unicode,
hex_str.len(),
));
}
'\n' => {
// Report and bail out.
// We can't safely assume where the string was supposed to end.
problems.push(loc_escaped_unicode(
Problem::NewlineInLiteral,
&state,
start_of_unicode,
hex_str.len(),
));
return Err(unexpected_eof(
buf.len(),
Attempting::UnicodeEscape,
state.clone(),
));
}
normal_char => hex_str.push(normal_char),
}
// If we're about to hit the end of the string, and we didn't already
// complete parsing a valid unicode escape sequence, this is a malformed
// escape sequence - it wasn't terminated!
if chars.peek() == Some(&'"') {
// Record a problem and exit the loop early, so the string literal
// parsing logic can consume the quote and do its job as normal.
let start_line = state.line;
// +1 due to the `"` which precedes buf.
let start_col = state.column + buf.len() as u16 + 1;
let end_line = start_line;
// +3 due to the `\u{`
// -1 to prevent overshooting because end col is inclusive.
let end_col = start_col + 3 + hex_str.len() as u16 - 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
problems.push(Loc {
region,
value: Problem::MalformedEscapedUnicode,
});
return Ok(());
}
}
Ok(())
}
#[inline(always)]
fn parse_interpolated_ident<'a, I>(
arena: &'a Bump,
state: &State<'a>,
chars: &mut Peekable<I>,
) -> Result<&'a str, (Fail, State<'a>)>
where
I: Iterator<Item = char>,
{
// This will return Err on invalid identifiers like "if"
let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?;
// Make sure we got a closing ) to end the interpolation.
match next_char {
Some(')') => Ok(string),
Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)),
None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)),
}
// So far we have consumed the `"""` and that's it.
let _parsed_chars = 3;
panic!("TODO parse block string, advance state, etc");
}

View file

@ -52,7 +52,19 @@ pub struct Located<T> {
}
impl<T> Located<T> {
pub fn new(value: T, region: Region) -> Located<T> {
pub fn new(
start_line: u32,
start_col: u16,
end_line: u32,
end_col: u16,
value: T,
) -> Located<T> {
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
Located { value, region }
}
}

View file

@ -11,7 +11,7 @@
// , _errors :: [Error.Error]
// }
use canonicalize::Symbol;
use can::symbol::Symbol;
use collections::ImMap;
use subs::{Content, Descriptor, FlatType, Subs, Variable};
use types::Constraint::{self, *};
@ -23,13 +23,15 @@ pub fn solve(env: &Env, subs: &mut Subs, constraint: Constraint) {
// println!("\nSolving:\n\n\t{:?}\n\n", constraint);
match constraint {
True => (),
Eq(typ, expected_type, region) => {
Eq(typ, expected_type, _region) => {
// TODO use region?
let actual = type_to_variable(subs, typ);
let expected = type_to_variable(subs, expected_type.get_type());
subs.union(actual, expected);
}
Lookup(symbol, expected_type, region) => {
Lookup(symbol, expected_type, _region) => {
// TODO use region?
let actual =
subs.copy_var(env.get(&symbol).unwrap_or_else(|| {
panic!("Could not find symbol {:?} in env {:?}", symbol, env)

View file

@ -192,7 +192,7 @@ impl Into<String> for RocStr {
fn into(self) -> String {
let len_msbyte = self.len_msbyte();
panic!("I'm not sure this works the way we want it to. Need to review.");
// TODO I'm not sure this works the way we want it to. Need to review.
if flagged_as_short_string(len_msbyte) {
// Drop the "is this a short string?" flag
@ -208,7 +208,6 @@ impl Into<String> for RocStr {
// same memory layout as a Rust &str slice.
let str_slice = unsafe { mem::transmute::<[u8; 16], &str>(self.0.raw) };
let string = str_slice.to_string();
let mut roc_str_mut = self;
// Drop will deallocate the bytes, which we don't want in this case.
// String is using those bytes now!
@ -241,14 +240,14 @@ impl From<String> for RocStr {
RocStr(InnerStr { raw: buffer })
} else {
let bytes_ptr = string.as_bytes().clone().as_ptr();
let long = LongStr {
bytes: MaybeUninit::new(bytes_ptr),
length: str_len,
};
panic!("TODO: use mem::forget on the string and steal its bytes!");
RocStr(InnerStr { long })
// let bytes_ptr = string.as_bytes().clone().as_ptr();
// let long = LongStr {
// bytes: MaybeUninit::new(bytes_ptr),
// length: str_len,
// };
// RocStr(InnerStr { long })
}
}
}

View file

@ -75,7 +75,6 @@ impl Subs {
}
pub fn mk_flex_var(&mut self) -> Variable {
/// TODO is "flex" the same as "unbound" and "rigid" the same as "bound"?!
self.fresh(flex_var_descriptor())
}

View file

@ -1,4 +1,4 @@
use canonicalize::Symbol;
use can::symbol::Symbol;
use collections::ImMap;
use operator::{ArgSide, Operator};
use region::Located;

View file

@ -1,183 +1,76 @@
use combine::error::ParseError;
use combine::stream::state::State;
use combine::stream::Stream;
use combine::{eof, Parser};
use roc::collections::MutMap;
use roc::deprecated::parse_state::IndentablePosition;
use roc::expr::{Expr, Pattern};
extern crate bumpalo;
extern crate roc;
use self::bumpalo::Bump;
use roc::can;
use roc::can::expr::Expr;
use roc::can::problem::Problem;
use roc::can::procedure::Procedure;
use roc::can::symbol::Symbol;
use roc::can::Output;
use roc::collections::{ImMap, MutMap};
use roc::ident::Ident;
use roc::parse;
use roc::parse::ast::{self, Attempting};
use roc::parse::parser::{Fail, Parser, State};
use roc::region::{Located, Region};
use std::hash::Hash;
pub fn loc_box<T>(val: T) -> Box<Located<T>> {
Box::new(loc(val))
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module);
let parser = parse::expr();
let answer = parser.parse(&arena, state);
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
}
pub fn loc<T>(val: T) -> Located<T> {
Located::new(val, Region::zero())
}
pub fn located<T>(
start_line: u32,
start_col: u16,
end_line: u32,
end_col: u16,
val: T,
) -> Located<T> {
Located::new(
val,
Region {
start_line,
start_col,
end_line,
end_col,
},
#[allow(dead_code)]
pub fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
can_expr_with(
&Bump::new(),
"blah",
expr_str,
&ImMap::default(),
&ImMap::default(),
)
}
pub fn zero_loc<T>(located_val: Located<T>) -> Located<T> {
loc(located_val.value)
}
/// Zero out the parse locations on everything in this Expr, so we can compare expected/actual without
/// having to account for that.
pub fn zero_loc_expr(expr: Expr) -> Expr {
use roc::expr::Expr::*;
match expr {
Int(_) | Float(_) | EmptyStr | Str(_) | Char(_) | Var(_) | EmptyRecord | EmptyList => expr,
InterpolatedStr(pairs, string) => InterpolatedStr(
pairs
.into_iter()
.map(|(prefix, ident)| (prefix, zero_loc(ident)))
.collect(),
string,
),
List(elems) => {
let zeroed_elems = elems
.into_iter()
.map(|loc_expr| loc(zero_loc_expr(loc_expr.value)))
.collect();
List(zeroed_elems)
}
Assign(assignments, loc_ret) => {
let zeroed_assignments = assignments
.into_iter()
.map(|(pattern, loc_expr)| {
(
zero_loc_pattern(pattern),
loc(zero_loc_expr(loc_expr.value)),
#[allow(dead_code)]
pub fn can_expr_with(
arena: &Bump,
name: &str,
expr_str: &str,
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<Box<ast::VariantName>>>,
) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
let expr = parse_with(&arena, expr_str).unwrap_or_else(|_| {
panic!(
"can_expr_with() got a parse error when attempting to canonicalize:\n\n{:?}",
expr_str
)
})
.collect();
});
Assign(zeroed_assignments, loc_box(zero_loc_expr((*loc_ret).value)))
}
Apply(fn_expr, args) => Apply(
loc_box(zero_loc_expr((*fn_expr).value)),
args.into_iter()
.map(|arg| loc(zero_loc_expr(arg.value)))
.collect(),
),
Operator(left, op, right) => Operator(
loc_box(zero_loc_expr((*left).value)),
zero_loc(op),
loc_box(zero_loc_expr((*right).value)),
),
Closure(patterns, body) => Closure(
patterns.into_iter().map(zero_loc).collect(),
loc_box(zero_loc_expr((*body).value)),
),
ApplyVariant(_, None) => expr,
ApplyVariant(name, Some(args)) => ApplyVariant(
let home = "Test".to_string();
let (loc_expr, output, problems, procedures) = can::canonicalize_declaration(
home,
name,
Some(
args.into_iter()
.map(|arg| loc(zero_loc_expr(arg.value)))
.collect(),
),
),
If(condition, if_true, if_false) => If(
loc_box(zero_loc_expr((*condition).value)),
loc_box(zero_loc_expr((*if_true).value)),
loc_box(zero_loc_expr((*if_false).value)),
),
Case(condition, branches) => Case(
loc_box(zero_loc_expr((*condition).value)),
branches
.into_iter()
.map(|(pattern, loc_expr)| {
(
zero_loc_pattern(pattern),
loc(zero_loc_expr(loc_expr.value)),
)
})
.collect(),
),
}
Located::new(0, 0, 0, 0, expr),
declared_idents,
declared_variants,
);
(loc_expr.value, output, problems, procedures)
}
/// Zero out the parse locations on everything in this Pattern, so we can compare expected/actual without
/// having to account for that.
pub fn zero_loc_pattern(loc_pattern: Located<Pattern>) -> Located<Pattern> {
use roc::expr::Pattern::*;
// pub fn mut_map_from_pairs<K, V, I>(pairs: I) -> MutMap<K, V>
// where
// I: IntoIterator<Item = (K, V)>,
// K: Hash + Eq,
// {
// let mut answer = MutMap::default();
let pattern = loc_pattern.value;
// for (key, value) in pairs {
// answer.insert(key, value);
// }
match pattern {
Identifier(_) | IntLiteral(_) | FloatLiteral(_) | ExactString(_) | EmptyRecordLiteral
| Underscore => loc(pattern),
Variant(loc_name, None) => loc(Variant(loc(loc_name.value), None)),
Variant(loc_name, Some(opt_located_patterns)) => loc(Variant(
loc(loc_name.value),
Some(
opt_located_patterns
.into_iter()
.map(|loc_pat| zero_loc_pattern(loc_pat))
.collect(),
),
)),
}
}
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs
pub fn mut_map_from_pairs<K, V, I>(pairs: I) -> MutMap<K, V>
where
I: IntoIterator<Item = (K, V)>,
K: Hash + Eq,
{
let mut answer = MutMap::default();
for (key, value) in pairs {
answer.insert(key, value);
}
answer
}
// PARSE HELPERS
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs
pub fn standalone_expr<I>() -> impl Parser<Input = I, Output = Expr>
where
I: Stream<Item = char, Position = IndentablePosition>,
I::Error: ParseError<I::Item, I::Range, I::Position>,
{
roc::deprecated::parse::expr().skip(eof())
}
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs
pub fn parse_without_loc(actual_str: &str) -> Result<(Expr, String), String> {
parse_standalone(actual_str).map(|(expr, leftover)| (zero_loc_expr(expr), leftover))
}
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_parse.rs
pub fn parse_standalone(actual_str: &str) -> Result<(Expr, String), String> {
let parse_state: State<&str, IndentablePosition> =
State::with_positioner(actual_str, IndentablePosition::default());
match standalone_expr().easy_parse(parse_state) {
Ok((expr, state)) => Ok((expr, state.input.to_string())),
Err(errors) => Err(errors.to_string()),
}
}
// answer
// }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,127 +0,0 @@
// #[macro_use] extern crate pretty_assertions;
extern crate combine;
extern crate fraction;
extern crate roc;
#[cfg(test)]
mod test_eval {
// use roc::operator::Operator::*;
// use roc::expr::Pattern::*;
// use roc::expr::Expr::*;
// use roc::expr::{Expr, Raw};
// use roc::eval;
// use roc::eval::Evaluated;
// use roc::region::{Located, Region};
// use fraction::Fraction;
// fn loc_box<T>(val: T) -> Box<Located<T>> {
// Box::new(loc(val))
// }
// fn eval(expr: Expr<Raw>) -> Evaluated {
// eval::eval(loc(expr))
// }
// fn loc<T>(val: T) -> Located<T> {
// Located::new(val, Region {
// start_line: 0,
// start_col: 0,
// end_line: 0,
// end_col: 0,
// })
// }
// #[test]
// fn one_plus_one() {
// assert_eq!(
// eval(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(1)))),
// Evaluated::Int(2)
// );
// }
// #[test]
// fn point_one_plus_point_two() {
// // 0.1 + 0.2 == 0.3 THAT'S WHAT'S UP
// assert_eq!(
// eval(Operator(loc_box(Frac(1, 10)), loc(Plus), loc_box(Frac(2, 10)))),
// Evaluated::Frac(Fraction::new(3u64, 10u64))
// );
// }
// #[test]
// fn addition_reduces() {
// assert_eq!(
// eval(Operator(loc_box(Frac(1, 3)), loc(Plus), loc_box(Frac(7, 14)))),
// Evaluated::Frac(Fraction::new(5u64, 6u64))
// );
// }
// #[test]
// fn division_reduces() {
// assert_eq!(
// eval(Operator(loc_box(Frac(1, 3)), loc(Slash), loc_box(Frac(7, 14)))),
// Evaluated::ApplyVariant(
// "Ok".to_string(),
// Some(vec![Evaluated::Frac(Fraction::new(2u64, 3u64))])
// )
// );
// }
// #[test]
// fn division_by_zero() {
// assert_eq!(
// eval(Operator(loc_box(Frac(1, 10)), loc(Slash), loc_box(Frac(0, 10)))),
// Evaluated::ApplyVariant(
// "Err".to_string(),
// Some(vec![Evaluated::ApplyVariant("DivisionByZero".to_string(), None)])
// )
// );
// }
// #[test]
// fn string_interpolation() {
// assert_eq!(
// eval(
// Assign(loc(Identifier("foo".to_string())), loc_box(Str("one".to_string())),
// loc_box(Assign(loc(Identifier("bar".to_string())), loc_box(Str("two".to_string())),
// loc_box(Assign(loc(Identifier("baz".to_string())), loc_box(Str("three".to_string())),
// loc_box(InterpolatedStr(
// // "hi_\(foo)_\(bar)_\(baz)_string!"
// vec![
// ("hi_".to_string(), loc(Raw::new("foo".to_string()))),
// ("_".to_string(), loc(Raw::new("bar".to_string()))),
// ("_".to_string(), loc(Raw::new("baz".to_string()))),
// ],
// "_string!".to_string()
// ))
// )))))
// ),
// Evaluated::Str("hi_one_two_three_string!".to_string())
// );
// }
// #[test]
// fn if_else() {
// assert_eq!(
// eval(
// If(loc_box(ApplyVariant("True".to_string(), None)),
// loc_box(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2)))),
// loc_box(Operator(loc_box(Int(4)), loc(Plus), loc_box(Int(5))))
// )
// ),
// Evaluated::Int(3)
// );
// assert_eq!(
// eval(
// If(loc_box(ApplyVariant("False".to_string(), None)),
// loc_box(Operator(loc_box(Int(1)), loc(Plus), loc_box(Int(2)))),
// loc_box(Operator(loc_box(Int(4)), loc(Plus), loc_box(Int(5))))
// )
// ),
// Evaluated::Int(9)
// );
// }
}

View file

@ -3,13 +3,10 @@ extern crate pretty_assertions;
#[macro_use]
extern crate indoc;
extern crate bumpalo;
extern crate combine; // OBSOLETE
extern crate roc;
mod helpers;
#[cfg(test)]
mod test_formatter {
mod test_format {
use bumpalo::Bump;
use roc::parse;
use roc::parse::ast::{Attempting, Expr};
@ -85,21 +82,28 @@ mod test_formatter {
));
}
// #[test]
// fn basic_string() {
// assert_formats_same(indoc!(
// r#"
// "blah"
// "#
// ));
// }
#[test]
fn basic_string() {
assert_formats_same(indoc!(
r#"
"blah"
"#
));
}
// #[test]
// fn escaped_unicode_string() {
// assert_formats_same(indoc!(
// r#"
// "unicode: \u{A00A}!"
// "#
// ));
// }
#[test]
fn escaped_unicode_string() {
assert_formats_same(indoc!(
r#"
"unicode: \u{A00A}!"
"#
));
}
// RECORD LITERALS
#[test]
fn empty_record() {
assert_formats_same("{}");
}
}

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,6 @@ extern crate pretty_assertions;
#[macro_use]
extern crate indoc;
extern crate bumpalo;
extern crate combine; // OBSOLETE
extern crate roc;
extern crate quickcheck;
@ -14,25 +13,15 @@ extern crate quickcheck_macros;
mod helpers;
#[cfg(test)]
mod test_parser {
mod test_parse {
use bumpalo::Bump;
use helpers::located;
use roc::parse;
use helpers::parse_with;
use roc::parse::ast::Attempting;
use roc::parse::ast::Expr::{self, *};
use roc::parse::parser::{Fail, FailReason, Parser, State};
use roc::parse::problems::Problem;
use roc::region::{Located, Region};
use roc::parse::parser::{Fail, FailReason};
use roc::region::Region;
use std::{f64, i64};
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module);
let parser = parse::expr();
let answer = parser.parse(&arena, state);
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)
}
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
let arena = Bump::new();
let actual = parse_with(&arena, input);
@ -48,16 +37,6 @@ mod test_parser {
assert_eq!(Err(expected_fail), actual);
}
fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) {
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(Expr::MalformedStr(expected_probs.into_boxed_slice())),
actual
);
}
// STRING LITERALS
fn expect_parsed_str(input: &str, expected: &str) {
@ -77,7 +56,7 @@ mod test_parser {
}
#[test]
fn one_char_list() {
fn one_char_string() {
assert_parses_to(
indoc!(
r#"
@ -89,7 +68,7 @@ mod test_parser {
}
#[test]
fn multi_char_list() {
fn multi_char_string() {
assert_parses_to(
indoc!(
r#"
@ -113,24 +92,11 @@ mod test_parser {
#[test]
fn string_with_special_escapes() {
expect_parsed_str(r#"x\x"#, r#""x\\x""#);
expect_parsed_str(r#"x"x"#, r#""x\"x""#);
expect_parsed_str("x\tx", r#""x\tx""#);
expect_parsed_str("x\rx", r#""x\rx""#);
expect_parsed_str("x\nx", r#""x\nx""#);
}
#[test]
fn string_with_escaped_interpolation() {
assert_parses_to(
// This should NOT be string interpolation, because of the \\
indoc!(
r#"
"abcd\\(efg)hij"
"#
),
Str(r#"abcd\(efg)hij"#.into()),
);
expect_parsed_str(r#"x\\x"#, r#""x\\x""#);
expect_parsed_str(r#"x\"x"#, r#""x\"x""#);
expect_parsed_str(r#"x\tx"#, r#""x\tx""#);
expect_parsed_str(r#"x\rx"#, r#""x\rx""#);
expect_parsed_str(r#"x\nx"#, r#""x\nx""#);
}
#[test]
@ -139,159 +105,6 @@ mod test_parser {
expect_parsed_str("x'x", r#""x'x""#);
}
#[test]
fn string_with_valid_unicode_escapes() {
expect_parsed_str("x\u{00A0}x", r#""x\u{00A0}x""#);
expect_parsed_str("x\u{101010}x", r#""x\u{101010}x""#);
}
#[test]
fn string_with_too_large_unicode_escape() {
// Should be too big - max size should be 10FFFF.
// (Rust has this restriction. I assume it's a good idea.)
assert_malformed_str(
r#""abc\u{110000}def""#,
vec![located(0, 7, 0, 12, Problem::UnicodeCodePointTooLarge)],
);
}
#[test]
fn string_with_no_unicode_digits() {
// No digits specified
assert_malformed_str(
r#""blah\u{}foo""#,
vec![located(0, 5, 0, 8, Problem::NoUnicodeDigits)],
);
}
#[test]
fn string_with_no_unicode_opening_brace() {
// No opening curly brace. It can't be sure if the closing brace
// was intended to be a closing brace for the unicode escape, so
// report that there were no digits specified.
assert_malformed_str(
r#""abc\u00A0}def""#,
vec![located(0, 4, 0, 5, Problem::NoUnicodeDigits)],
);
}
#[test]
fn string_with_no_unicode_closing_brace() {
// No closing curly brace
assert_malformed_str(
r#""blah\u{stuff""#,
vec![located(0, 5, 0, 12, Problem::MalformedEscapedUnicode)],
);
}
#[test]
fn string_with_no_unicode_braces() {
// No curly braces
assert_malformed_str(
r#""zzzz\uzzzzz""#,
vec![located(0, 5, 0, 6, Problem::NoUnicodeDigits)],
);
}
#[test]
fn string_with_interpolation_at_start() {
let input = indoc!(
r#"
"\(abc)defg"
"#
);
let (args, ret) = (vec![("", located(0, 2, 0, 4, Var("abc")))], "defg");
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
actual
);
}
#[test]
fn string_with_interpolation_at_end() {
let input = indoc!(
r#"
"abcd\(efg)"
"#
);
let (args, ret) = (vec![("abcd", located(0, 6, 0, 8, Var("efg")))], "");
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
actual
);
}
#[test]
fn string_with_interpolation_in_middle() {
let input = indoc!(
r#"
"abc\(defg)hij"
"#
);
let (args, ret) = (vec![("abc", located(0, 5, 0, 8, Var("defg")))], "hij");
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
actual
);
}
#[test]
fn string_with_two_interpolations_in_middle() {
let input = indoc!(
r#"
"abc\(defg)hi\(jkl)mn"
"#
);
let (args, ret) = (
vec![
("abc", located(0, 5, 0, 8, Var("defg"))),
("hi", located(0, 14, 0, 16, Var("jkl"))),
],
"mn",
);
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
actual
);
}
#[test]
fn string_with_four_interpolations() {
let input = indoc!(
r#"
"\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)"
"#
);
let (args, ret) = (
vec![
("", located(0, 2, 0, 4, Var("abc"))),
("def", located(0, 11, 0, 13, Var("ghi"))),
("jkl", located(0, 20, 0, 22, Var("mno"))),
("pqrs", located(0, 30, 0, 32, Var("tuv"))),
],
"",
);
let arena = Bump::new();
let actual = parse_with(&arena, input);
assert_eq!(
Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
actual
);
}
#[test]
fn empty_source_file() {
assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Expression);
@ -322,124 +135,109 @@ mod test_parser {
#[test]
fn zero_int() {
assert_parses_to("0", Int(0));
assert_parses_to("0", Int("0"));
}
#[test]
fn positive_int() {
assert_parses_to("1", Int(1));
assert_parses_to("42", Int(42));
assert_parses_to("1", Int("1"));
assert_parses_to("42", Int("42"));
}
#[test]
fn negative_int() {
assert_parses_to("-1", Int(-1));
assert_parses_to("-42", Int(-42));
assert_parses_to("-1", Int("-1"));
assert_parses_to("-42", Int("-42"));
}
#[test]
fn highest_int() {
assert_parses_to(i64::MAX.to_string().as_str(), Int(i64::MAX));
assert_parses_to(
i64::MAX.to_string().as_str(),
Int(i64::MAX.to_string().as_str()),
);
}
#[test]
fn lowest_int() {
assert_parses_to(i64::MIN.to_string().as_str(), Int(i64::MIN));
assert_parses_to(
i64::MIN.to_string().as_str(),
Int(i64::MIN.to_string().as_str()),
);
}
#[test]
fn int_with_underscore() {
assert_parses_to("1_2_34_567", Int(1234567));
assert_parses_to("-1_2_34_567", Int(-1234567));
assert_parses_to("1_2_34_567", Int("1_2_34_567"));
assert_parses_to("-1_2_34_567", Int("-1_2_34_567"));
// The following cases are silly. They aren't supported on purpose,
// but there would be a performance cost to explicitly disallowing them,
// which doesn't seem like it would benefit anyone.
assert_parses_to("1_", Int(1));
assert_parses_to("1__23", Int(123));
assert_parses_to("1_", Int("1_"));
assert_parses_to("1__23", Int("1__23"));
}
#[quickcheck]
fn all_i64_values_parse(num: i64) {
assert_parses_to(num.to_string().as_str(), Int(num));
}
#[test]
fn int_too_large() {
assert_parses_to(
(i64::MAX as i128 + 1).to_string().as_str(),
MalformedInt(Problem::OutsideSupportedRange),
);
}
#[test]
fn int_too_small() {
assert_parses_to(
(i64::MIN as i128 - 1).to_string().as_str(),
MalformedInt(Problem::OutsideSupportedRange),
);
assert_parses_to(num.to_string().as_str(), Int(num.to_string().as_str()));
}
// FLOAT LITERALS
#[test]
fn zero_float() {
assert_parses_to("0.0", Float(0.0));
assert_parses_to("0.0", Float("0.0"));
}
#[test]
fn positive_float() {
assert_parses_to("1.0", Float(1.0));
assert_parses_to("1.1", Float(1.1));
assert_parses_to("42.0", Float(42.0));
assert_parses_to("42.9", Float(42.9));
}
#[test]
fn highest_float() {
assert_parses_to(&format!("{}.0", f64::MAX), Float(f64::MAX));
assert_parses_to("1.0", Float("1.0"));
assert_parses_to("1.1", Float("1.1"));
assert_parses_to("42.0", Float("42.0"));
assert_parses_to("42.9", Float("42.9"));
}
#[test]
fn negative_float() {
assert_parses_to("-1.0", Float(-1.0));
assert_parses_to("-1.1", Float(-1.1));
assert_parses_to("-42.0", Float(-42.0));
assert_parses_to("-42.9", Float(-42.9));
}
#[test]
fn lowest_float() {
assert_parses_to(&format!("{}.0", f64::MIN), Float(f64::MIN));
assert_parses_to("-1.0", Float("-1.0"));
assert_parses_to("-1.1", Float("-1.1"));
assert_parses_to("-42.0", Float("-42.0"));
assert_parses_to("-42.9", Float("-42.9"));
}
#[test]
fn float_with_underscores() {
assert_parses_to("1_23_456.0_1_23_456", Float(123456.0123456));
assert_parses_to("-1_23_456.0_1_23_456", Float(-123456.0123456));
assert_parses_to("1_23_456.0_1_23_456", Float("1_23_456.0_1_23_456"));
assert_parses_to("-1_23_456.0_1_23_456", Float("-1_23_456.0_1_23_456"));
}
#[test]
fn highest_float() {
let string = format!("{}.0", f64::MAX);
assert_parses_to(&string, Float(&string));
}
#[test]
fn lowest_float() {
let string = format!("{}.0", f64::MIN);
assert_parses_to(&string, Float(&string));
}
#[quickcheck]
fn all_f64_values_parse(num: f64) {
assert_parses_to(num.to_string().as_str(), Float(num));
assert_parses_to(num.to_string().as_str(), Float(num.to_string().as_str()));
}
// RECORD LITERALS
#[test]
fn float_too_large() {
assert_parses_to(
format!("{}1.0", f64::MAX).as_str(),
MalformedFloat(Problem::OutsideSupportedRange),
);
fn empty_record() {
assert_parses_to("{}", EmptyRecord);
}
#[test]
fn float_too_small() {
assert_parses_to(
format!("{}1.0", f64::MIN).as_str(),
MalformedFloat(Problem::OutsideSupportedRange),
);
}
// TODO test what happens when interpolated strings contain 1+ malformed idents
// TODO test hex/oct/binary parsing
//
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
//