Merge remote-tracking branch 'origin/trunk' into refcount

This commit is contained in:
Folkert 2020-08-08 22:34:14 +02:00
commit 078c6df677
53 changed files with 3604 additions and 2377 deletions

31
Cargo.lock generated
View file

@ -69,6 +69,13 @@ version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034" checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034"
[[package]]
name = "arena-pool"
version = "0.1.0"
dependencies = [
"pretty_assertions",
]
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.5.1" version = "0.5.1"
@ -386,6 +393,20 @@ dependencies = [
"itertools", "itertools",
] ]
[[package]]
name = "crossbeam"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e"
dependencies = [
"cfg-if",
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.4.3" version = "0.4.3"
@ -527,6 +548,12 @@ version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "env_logger" name = "env_logger"
version = "0.6.2" version = "0.6.2"
@ -2167,9 +2194,11 @@ name = "roc_load"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"crossbeam",
"indoc", "indoc",
"inlinable_string", "inlinable_string",
"maplit", "maplit",
"num_cpus",
"pretty_assertions", "pretty_assertions",
"quickcheck", "quickcheck",
"quickcheck_macros", "quickcheck_macros",
@ -2184,7 +2213,6 @@ dependencies = [
"roc_solve", "roc_solve",
"roc_types", "roc_types",
"roc_unify", "roc_unify",
"tokio",
] ]
[[package]] [[package]]
@ -2230,6 +2258,7 @@ name = "roc_parse"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"encode_unicode",
"indoc", "indoc",
"inlinable_string", "inlinable_string",
"pretty_assertions", "pretty_assertions",

View file

@ -20,6 +20,7 @@ members = [
"compiler/load", "compiler/load",
"compiler/gen", "compiler/gen",
"compiler/build", "compiler/build",
"compiler/arena_pool",
"vendor/ena", "vendor/ena",
"vendor/pathfinding", "vendor/pathfinding",
"vendor/pretty", "vendor/pretty",

View file

@ -2,19 +2,17 @@
extern crate clap; extern crate clap;
use bumpalo::Bump; use bumpalo::Bump;
use clap::{App, Arg, ArgMatches};
use roc_build::program::gen; use roc_build::program::gen;
use roc_collections::all::MutMap; use roc_collections::all::MutMap;
use roc_gen::llvm::build::OptLevel; use roc_gen::llvm::build::OptLevel;
use roc_load::file::LoadingProblem; use roc_load::file::LoadingProblem;
use std::time::SystemTime;
use clap::{App, Arg, ArgMatches};
use std::io::{self, ErrorKind}; use std::io::{self, ErrorKind};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process; use std::process;
use std::process::Command;
use std::time::{Duration, SystemTime};
use target_lexicon::Triple; use target_lexicon::Triple;
use tokio::process::Command;
use tokio::runtime::Builder;
pub mod repl; pub mod repl;
@ -105,14 +103,6 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> {
let path = Path::new(filename).canonicalize().unwrap(); let path = Path::new(filename).canonicalize().unwrap();
let src_dir = path.parent().unwrap().canonicalize().unwrap(); let src_dir = path.parent().unwrap().canonicalize().unwrap();
// Create the runtime
let mut rt = Builder::new()
.thread_name("roc")
.threaded_scheduler()
.enable_io()
.build()
.expect("Error spawning initial compiler thread."); // TODO make this error nicer.
// Spawn the root task // Spawn the root task
let path = path.canonicalize().unwrap_or_else(|err| { let path = path.canonicalize().unwrap_or_else(|err| {
use ErrorKind::*; use ErrorKind::*;
@ -131,28 +121,31 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> {
} }
} }
}); });
let binary_path = rt
.block_on(build_file(src_dir, path, opt_level)) let binary_path =
.expect("TODO gracefully handle block_on failing"); build_file(src_dir, path, opt_level).expect("TODO gracefully handle build_file failing");
if run_after_build { if run_after_build {
// Run the compiled app // Run the compiled app
rt.block_on(async { Command::new(binary_path)
Command::new(binary_path) .spawn()
.spawn() .unwrap_or_else(|err| panic!("Failed to run app after building it: {:?}", err))
.unwrap_or_else(|err| panic!("Failed to run app after building it: {:?}", err)) .wait()
.await .expect("TODO gracefully handle block_on failing");
.map_err(|_| {
todo!("gracefully handle error after `app` spawned");
})
})
.expect("TODO gracefully handle block_on failing");
} }
Ok(()) Ok(())
} }
async fn build_file( fn report_timing(buf: &mut String, label: &str, duration: Duration) {
buf.push_str(&format!(
" {:.3} ms {}\n",
duration.as_secs_f64() * 1000.0,
label,
));
}
fn build_file(
src_dir: PathBuf, src_dir: PathBuf,
filename: PathBuf, filename: PathBuf,
opt_level: OptLevel, opt_level: OptLevel,
@ -168,9 +161,35 @@ async fn build_file(
OptLevel::Normal => roc_builtins::std::standard_stdlib(), OptLevel::Normal => roc_builtins::std::standard_stdlib(),
OptLevel::Optimize => roc_builtins::unique::uniq_stdlib(), OptLevel::Optimize => roc_builtins::unique::uniq_stdlib(),
}; };
let loaded = roc_load::file::load(&stdlib, src_dir, filename.clone(), subs_by_module).await?; let loaded =
roc_load::file::load(filename.clone(), &stdlib, src_dir.as_path(), subs_by_module)?;
let dest_filename = filename.with_extension("o"); let dest_filename = filename.with_extension("o");
let buf = &mut String::with_capacity(1024);
for (module_id, module_timing) in loaded.timings.iter() {
let module_name = loaded.interns.module_name(*module_id);
buf.push_str(" ");
buf.push_str(module_name);
buf.push_str("\n");
report_timing(buf, "Read .roc file from disk", module_timing.read_roc_file);
report_timing(buf, "Parse header", module_timing.parse_header);
report_timing(buf, "Parse body", module_timing.parse_body);
report_timing(buf, "Canonicalize", module_timing.canonicalize);
report_timing(buf, "Constrain", module_timing.constrain);
report_timing(buf, "Solve", module_timing.solve);
report_timing(buf, "Other", module_timing.other());
buf.push('\n');
report_timing(buf, "Total", module_timing.total());
}
println!(
"\n\nCompilation finished! Here's how long each module took to compile:\n\n{}",
buf
);
gen( gen(
&arena, &arena,
loaded, loaded,
@ -201,7 +220,7 @@ async fn build_file(
.map_err(|_| { .map_err(|_| {
todo!("gracefully handle `ar` failing to spawn."); todo!("gracefully handle `ar` failing to spawn.");
})? })?
.await .wait()
.map_err(|_| { .map_err(|_| {
todo!("gracefully handle error after `ar` spawned"); todo!("gracefully handle error after `ar` spawned");
})?; })?;
@ -224,7 +243,7 @@ async fn build_file(
.map_err(|_| { .map_err(|_| {
todo!("gracefully handle `rustc` failing to spawn."); todo!("gracefully handle `rustc` failing to spawn.");
})? })?
.await .wait()
.map_err(|_| { .map_err(|_| {
todo!("gracefully handle error after `rustc` spawned"); todo!("gracefully handle error after `rustc` spawned");
})?; })?;

View file

@ -33,6 +33,7 @@ use roc_types::types::Type;
use std::hash::Hash; use std::hash::Hash;
use std::io::{self, Write}; use std::io::{self, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::str::from_utf8_unchecked;
use target_lexicon::Triple; use target_lexicon::Triple;
pub fn main() -> io::Result<()> { pub fn main() -> io::Result<()> {
@ -145,7 +146,7 @@ fn report_parse_error(fail: Fail) {
} }
fn print_output(src: &str) -> Result<String, Fail> { fn print_output(src: &str) -> Result<String, Fail> {
gen(src, Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| { gen(src.as_bytes(), Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
format!("\n{} \u{001b}[35m:\u{001b}[0m {}", answer, answer_type) format!("\n{} \u{001b}[35m:\u{001b}[0m {}", answer, answer_type)
}) })
} }
@ -154,7 +155,7 @@ pub fn repl_home() -> ModuleId {
ModuleIds::default().get_or_insert(&"REPL".into()) ModuleIds::default().get_or_insert(&"REPL".into())
} }
pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> { pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE}; use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE};
// Look up the types and expressions of the `provided` values // Look up the types and expressions of the `provided` values
@ -169,13 +170,16 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
interns, interns,
problems: can_problems, problems: can_problems,
.. ..
} = can_expr(src)?; } = can_expr(src)?; // IMPORTANT: we must bail out here if there were UTF-8 errors!
let subs = Subs::new(var_store.into()); let subs = Subs::new(var_store.into());
let mut type_problems = Vec::new(); let mut type_problems = Vec::new();
let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var); let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var);
// SAFETY: we've already verified that this is valid UTF-8 during parsing.
let src_lines: Vec<&str> = unsafe { from_utf8_unchecked(src).split('\n').collect() };
// Report problems // Report problems
let src_lines: Vec<&str> = src.split('\n').collect();
let palette = DEFAULT_PALETTE; let palette = DEFAULT_PALETTE;
// Report parsing and canonicalization problems // Report parsing and canonicalization problems
@ -219,7 +223,7 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
let expr_type_str = content_to_string(content.clone(), &subs, home, &interns); let expr_type_str = content_to_string(content.clone(), &subs, home, &interns);
// Compute main_fn_type before moving subs to Env // Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| { let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!( panic!(
"Code gen error in test: could not convert to layout. Err was {:?}", "Code gen error in test: could not convert to layout. Err was {:?}",
err err
@ -255,7 +259,6 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };
@ -391,8 +394,11 @@ pub fn infer_expr(
(content, solved.into_inner()) (content, solved.into_inner())
} }
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(
let state = State::new(&input, Attempting::Module); arena: &'a Bump,
bytes: &'a [u8],
) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&bytes, Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
@ -401,14 +407,14 @@ pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast
.map_err(|(fail, _)| fail) .map_err(|(fail, _)| fail)
} }
pub fn can_expr(expr_str: &str) -> Result<CanExprOut, Fail> { pub fn can_expr(expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
can_expr_with(&Bump::new(), repl_home(), expr_str) can_expr_with(&Bump::new(), repl_home(), expr_bytes)
} }
// TODO make this return a named struct instead of a big tuple // TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn uniq_expr( pub fn uniq_expr(
expr_str: &str, expr_bytes: &[u8],
) -> Result< ) -> Result<
( (
Located<roc_can::expr::Expr>, Located<roc_can::expr::Expr>,
@ -424,14 +430,14 @@ pub fn uniq_expr(
> { > {
let declared_idents: &ImMap<Ident, (Symbol, Region)> = &ImMap::default(); let declared_idents: &ImMap<Ident, (Symbol, Region)> = &ImMap::default();
uniq_expr_with(&Bump::new(), expr_str, declared_idents) uniq_expr_with(&Bump::new(), expr_bytes, declared_idents)
} }
// TODO make this return a named struct instead of a big tuple // TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn uniq_expr_with( pub fn uniq_expr_with(
arena: &Bump, arena: &Bump,
expr_str: &str, expr_bytes: &[u8],
declared_idents: &ImMap<Ident, (Symbol, Region)>, declared_idents: &ImMap<Ident, (Symbol, Region)>,
) -> Result< ) -> Result<
( (
@ -455,7 +461,7 @@ pub fn uniq_expr_with(
var, var,
interns, interns,
.. ..
} = can_expr_with(arena, home, expr_str)?; } = can_expr_with(arena, home, expr_bytes)?;
// double check // double check
let mut var_store = VarStore::new(old_var_store.fresh()); let mut var_store = VarStore::new(old_var_store.fresh());
@ -510,8 +516,8 @@ pub struct CanExprOut {
pub constraint: Constraint, pub constraint: Constraint,
} }
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_str: &str) -> Result<CanExprOut, Fail> { pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
let loc_expr = parse_loc_with(&arena, expr_str)?; let loc_expr = parse_loc_with(&arena, expr_bytes)?;
let mut var_store = VarStore::default(); let mut var_store = VarStore::default();
let var = var_store.fresh(); let var = var_store.fresh();
let expected = Expected::NoExpectation(Type::Variable(var)); let expected = Expected::NoExpectation(Type::Variable(var));

View file

@ -0,0 +1,11 @@
[package]
name = "arena-pool"
version = "0.1.0"
authors = ["Richard Feldman <oss@rtfeldman.com>"]
repository = "https://github.com/rtfeldman/roc"
edition = "2018"
description = "A CLI for Roc"
license = "Apache-2.0"
[dev-dependencies]
pretty_assertions = "0.5.1"

View file

@ -0,0 +1 @@
pub mod pool;

View file

@ -0,0 +1,396 @@
use std::marker::PhantomPinned;
use std::ptr::{copy_nonoverlapping, NonNull};
pub struct ArenaRef<T> {
ptr: NonNull<T>,
_pin: PhantomPinned,
}
impl<T> ArenaRef<T> {
pub fn get<'a, A: AsArena<T>>(&'a self, arena: &A) -> &'a T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_ref() }
}
pub fn get_mut<'a, A: AsArena<T>>(&'a mut self, arena: &A) -> &'a mut T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_mut() }
}
}
/// Like a Vec, except the capacity you give it initially is its maximum
/// capacity forever. If you ever exceed it, it'll panic!
pub struct ArenaVec<T> {
buffer_ptr: NonNull<T>,
len: usize,
capacity: usize,
_pin: PhantomPinned,
}
impl<T> ArenaVec<T> {
pub fn new_in(arena: &mut Arena<T>) -> Self {
// We can't start with a NonNull::dangling pointer because when we go
// to push elements into this, they'll try to verify the dangling
// pointer resides in the arena it was given, which will likely panic.
//
// Instead, we'll take a pointer inside the array but never use it
// other than for verification, because our capacity is 0.
Self::with_capacity_in(0, arena)
}
pub fn with_capacity_in(capacity: usize, arena: &mut Arena<T>) -> Self {
let ptr = arena.alloc_vec(capacity);
Self {
buffer_ptr: unsafe { NonNull::new_unchecked(ptr) },
capacity,
len: 0,
_pin: PhantomPinned,
}
}
pub fn push<'a>(&'a mut self, val: T, arena: &mut Arena<T>) {
// Verify that this is the arena where we originally got our buffer,
// and is therefore safe to read and to write to. (If we have sufficient
// capacity, we'll write to it, and otherwise we'll read from it when
// copying our buffer over to the new reserved block.)
arena.verify_ownership(self.buffer_ptr);
if self.len <= self.capacity {
// We're all set!
//
// This empty branch is just here for branch prediction,
// since this should be the most common case in practice.
} else {
// Double our capacity and reserve a new block.
self.capacity *= 2;
let ptr = arena.alloc_vec(self.capacity);
// SAFETY: the existing buffer must have at least self.len elements,
// as must the new one, so copying that many between them is safe.
unsafe {
// Copy all elements from the current buffer into the new one
copy_nonoverlapping(self.buffer_ptr.as_ptr(), ptr, self.len);
}
self.buffer_ptr = unsafe { NonNull::new_unchecked(ptr) };
}
// Store the element in the appropriate memory address.
let elem_ptr = unsafe { &mut *self.buffer_ptr.as_ptr().add(self.len) };
*elem_ptr = val;
self.len += 1;
}
pub fn get<'a>(&'a self, index: usize, arena: &Arena<T>) -> Option<&'a T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &*self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
pub fn get_mut<'a>(&'a mut self, index: usize, arena: &Arena<T>) -> Option<&'a mut T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &mut *self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct ArenaPool<T> {
first_chunk: Vec<T>,
extra_chunks: Vec<Vec<T>>,
num_leased: usize,
default_chunk_capacity: usize,
}
impl<T> ArenaPool<T> {
const DEFAULT_CHUNK_SIZE: usize = 1024;
/// Be careful! Both of these arguments are of type usize.
///
/// The first is the number of elements that will be in each arena.
/// The second is the number of arenas.
///
/// This returns a new Pool, and also an iterator of Arenas. These Arenas can
/// be given to different threads, where they can be used to allocate
/// ArenaRef and ArenaVec values which can then be dereferenced by the Arena
/// that created them, or by this pool once those Arenas have been
/// reabsorbed back into it.
///
/// (A word of warning: if you try to use this pool to dereference ArenaRec
/// and ArenaVec values which were allocated by arenas that have *not* yet
/// been reabsorbed, it may work some of the time and panic other times,
/// depending on whether the arena needed to allocate extra chunks beyond
/// its initial chunk. tl;dr - doing that may panic, so don't try it!)
///
/// Before this pool gets dropped, you must call reabsorb() on every
/// arena that has been leased - otherwise, you'll get a panic when this
/// gets dropped! The memory safety of the system depends on all arenas
/// having been reabsorbed before the pool gets deallocated, which is why
/// the pool's Drop implementation enforces it.
pub fn new(num_arenas: usize, elems_per_arena: usize) -> (ArenaPool<T>, ArenaIter<T>) {
Self::with_chunk_size(num_arenas, elems_per_arena, Self::DEFAULT_CHUNK_SIZE)
}
/// Like `new`, except you can also specify the chunk size that each
/// arena will use to allocate its extra chunks if it runs out of space
/// in its main buffer.
///
/// Things will run fastest if that main buffer never runs out, though!
pub fn with_chunk_size(
num_arenas: usize,
elems_per_arena: usize,
chunk_size: usize,
) -> (ArenaPool<T>, ArenaIter<T>) {
let mut first_chunk = Vec::with_capacity(elems_per_arena * num_arenas);
let iter = ArenaIter {
ptr: first_chunk.as_mut_ptr(),
quantity_remaining: num_arenas,
first_chunk_capacity: elems_per_arena,
};
let pool = Self {
first_chunk,
extra_chunks: Vec::new(),
num_leased: num_arenas,
default_chunk_capacity: chunk_size,
};
(pool, iter)
}
/// Return an arena to the pool. (This would have been called "return" but
/// that's a reserved keyword.)
pub fn reabsorb(&mut self, arena: Arena<T>) {
// Ensure we're reabsorbing an arena that was
// actually leased by this pool in the first place!
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
arena.first_chunk_ptr,
);
// Add the arena's extra chunks to our own, so their memory remains live
// after the arena gets dropped. This is important, because at this
// point their pointers can still potentially be dereferenced!
self.extra_chunks.extend(arena.extra_chunks.into_iter());
self.num_leased -= 1;
}
}
impl<T> Drop for ArenaPool<T> {
fn drop(&mut self) {
// When an ArenaPool gets dropped, it must not have any leased
// arenas remaining. If it does, there will be outstanding IDs which
// could be used with those non-reabsorbed Arenas to read freed memory!
// This would be a use-after-free; we panic rather than permit that.
assert_eq!(self.num_leased, 0);
}
}
pub struct ArenaIter<T> {
ptr: *mut T,
quantity_remaining: usize,
first_chunk_capacity: usize,
}
// Implement `Iterator` for `Fibonacci`.
// The `Iterator` trait only requires a method to be defined for the `next` element.
impl<T> Iterator for ArenaIter<T> {
type Item = Arena<T>;
// Here, we define the sequence using `.curr` and `.next`.
// The return type is `Option<T>`:
// * When the `Iterator` is finished, `None` is returned.
// * Otherwise, the next value is wrapped in `Some` and returned.
fn next(&mut self) -> Option<Arena<T>> {
if self.quantity_remaining != 0 {
let first_chunk_ptr = self.ptr;
self.ptr = unsafe { self.ptr.add(self.first_chunk_capacity) };
self.quantity_remaining -= 1;
Some(Arena {
first_chunk_ptr,
first_chunk_len: 0,
first_chunk_cap: self.first_chunk_capacity,
extra_chunks: Vec::new(),
})
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct Arena<T> {
first_chunk_ptr: *mut T,
first_chunk_len: usize,
first_chunk_cap: usize,
extra_chunks: Vec<Vec<T>>,
}
impl<T> Arena<T> {
pub fn alloc(&mut self, val: T) -> ArenaRef<T> {
let ptr: *mut T = if self.first_chunk_len < self.first_chunk_cap {
// We have enough room in the first chunk for 1 allocation.
self.first_chunk_len += 1;
// Return a pointer to the next available slot.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() >= chunk.capacity() {
// We've run out of space in our last chunk. Create a new one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
chunk.push(val);
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
};
ArenaRef {
ptr: unsafe { NonNull::new_unchecked(ptr) },
_pin: PhantomPinned,
}
}
fn alloc_vec(&mut self, num_elems: usize) -> *mut T {
if self.first_chunk_len + num_elems <= self.first_chunk_cap {
// We have enough room in the first chunk for this vec.
self.first_chunk_len += num_elems;
// Return a pointer to the next available element.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
let new_chunk_cap = self.first_chunk_cap.max(num_elems);
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() + num_elems >= chunk.capacity() {
// We don't have enough space in our last chunk.
// Create a new one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
}
}
}
pub trait AsArena<T> {
fn verify_ownership(&self, ptr: NonNull<T>);
}
impl<T> AsArena<T> for ArenaPool<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
impl<T> AsArena<T> for Arena<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk_ptr,
self.first_chunk_cap,
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
fn verify_ownership<T>(
first_chunk_ptr: *const T,
first_chunk_cap: usize,
extra_chunks: &[Vec<T>],
ptr: *const T,
) {
let addr = ptr as usize;
let start_addr = first_chunk_ptr as usize;
let end_addr = start_addr + first_chunk_cap;
if start_addr <= addr && addr < end_addr {
// This is within our first chunk's address space, so it's verified!
} else {
// This wasn't within our first chunk's address space, so we need
// to see if we can find it in one of our extra_chunks.
for chunk in extra_chunks {
let start_addr = chunk.as_ptr() as usize;
let end_addr = start_addr + chunk.capacity();
if start_addr <= addr && addr < end_addr {
// Found it! No need to loop anymore; verification passed.
return;
}
}
// The address wasn't within any of our chunks' bounds.
// Panic to avoid use-after-free errors!
panic!("Pointer ownership verification failed.");
}
}

View file

@ -0,0 +1,17 @@
// #[macro_use]
// extern crate pretty_assertions;
extern crate arena_pool;
#[cfg(test)]
mod test_arena_pool {
use arena_pool::pool::{ArenaIter, ArenaPool};
#[test]
fn empty_pool() {
// Neither of these does anything, but they
// at least shouldn't panic or anything.
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::new(0, 0);
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::with_chunk_size(0, 0, 0);
}
}

View file

@ -136,14 +136,14 @@ pub fn gen(
fpm.initialize(); fpm.initialize();
// Compute main_fn_type before moving subs to Env // Compute main_fn_type before moving subs to Env
let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| {
panic!( panic!(
"Code gen error in Program: could not convert to layout. Err was {:?}", "Code gen error in Program: could not convert to layout. Err was {:?}",
err err
) )
}); });
let ptr_bytes = target.pointer_width().unwrap().bytes() as u32;
let main_fn_type = let main_fn_type =
basic_type_from_layout(&arena, &context, &layout, ptr_bytes).fn_type(&[], false); basic_type_from_layout(&arena, &context, &layout, ptr_bytes).fn_type(&[], false);
let main_fn_name = "$main"; let main_fn_name = "$main";
@ -169,7 +169,6 @@ pub fn gen(
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };

View file

@ -19,45 +19,24 @@ not : [True, False] -> [True, False]
## ##
## ## Performance Notes ## ## Performance Notes
## ##
## In dev builds, this works exactly as described. In release builds, as a ## In some languages, `&&` and `||` are special-cased in the compiler to skip
## performance optimization, the compiler translates calls to #Bool.and ## evaluating the expression after the operator under certain circumstances.
## (and #Bool.or) from function calls into conditionals. If its first ## For example, in some languages, `enablePets && likesDogs user` would compile
## argument evalutes to #False, then any function calls in the second argument ## to the equivalent of:
## get skipped, and the entire expression immediately evaluates to #False.
## ##
## For example: ## if enablePets then
## likesDogs user
## else
## False
## ##
## List.isEmpty list && Str.isEmpty str ## In Roc, however, `&&` and `||` are not special. They work the same way as
## other functions. Conditionals like `if` and `when` have a performance cost,
## and sometimes calling a function like `likesDogs user` can be faster across
## the board than doing an `if` to decide whether to skip calling it.
## ##
## In a release build, `Str.isEmpty` will only be called if `List.isEmpty list` ## (Naturally, if you expect the `if` to improve performance, you can always add
## returns #True. If `List.isEmpty list` returns #False, the entire expression ## one explicitly!)
## will immediately evaluate to #False. and : Bool, Bool -> Bool
##
## Since all Roc expressions are pure, this will always give the same answer
## as if both #Bool arguments had been fully evaluated (as they are in
## dev builds), but it can potentially avoid costly function calls in release builds.
##
## Because this optimization only skips function calls, you can opt out of it
## by calling the function up front, and giving its result a name. For example:
##
## emptyStr = Str.isEmpty str
##
## List.isEmpty list && emptyStr
##
## Here, `Str.isEmpty` will always be called no matter what, and the `&&` will
## not get compiled to a conditional because there are no function calls
## involved in its second argument.
##
## If you know the functions involved in the second argument are trivial
## (for example, they are other #&&, #||, and #Bool.not operations), then
## this can potentially be a (likely extremely minor) performance optimization
## because a logical `AND` instruction typically executes faster than a
## [branch misprediction](https://danluu.com/branch-prediction).
##
## That said, in practice the `&& Str.isEmpty str` approach will typically run
## faster than the `&& emptyStr` approach - both for `Str.isEmpty` in particular
## as well as for most functions in general.
and : [True, False], [True, False] -> [True, False]
## Returns #True when given #True for either argument, and #False only when given #False and #False. ## Returns #True when given #True for either argument, and #False only when given #False and #False.
@ -74,18 +53,19 @@ and : [True, False], [True, False] -> [True, False]
## ##
## ## Performance Notes ## ## Performance Notes
## ##
## #Bool.or does the same "compile to a conditional in release mode" optimization ## In some languages, `&&` and `||` are special-cased in the compiler to skip
## that #Bool.and does, except it short-circuits when the first argument is ## evaluating the expression after the operator under certain circumstances.
## #True (causing it to immediately returns #True).
## ##
## See the performance notes for #Bool.and for details. ## In Roc, this is not the case. See the performance notes for #Bool.and for details.
or : [True, False], [True, False] -> [True, False] or : Bool, Bool -> Bool
## Exclusive or ## Exclusive or
xor : [True, False], [True, False] -> [True, False] xor : Bool, Bool -> Bool
## Returns #True if the two values are *structurally equal*, and #False otherwise. ## Returns #True if the two values are *structurally equal*, and #False otherwise.
## ##
## `a == b` is shorthand for `Bool.isEq a b`
##
## Structural equality works as follows: ## Structural equality works as follows:
## ##
## 1. #Int and #Float values are equal if their numbers are equal. ## 1. #Int and #Float values are equal if their numbers are equal.
@ -93,18 +73,15 @@ xor : [True, False], [True, False] -> [True, False]
## 3. Global tags are equal if they are the same tag, and also their contents (if any) are equal. ## 3. Global tags are equal if they are the same tag, and also their contents (if any) are equal.
## 4. Private tags are equal if they are the same tag, in the same module, and also their contents (if any) are equal. ## 4. Private tags are equal if they are the same tag, in the same module, and also their contents (if any) are equal.
## 5. Collections (#String, #List, #Map, #Set, and #Bytes) are equal if they are the same length, and also all their corresponding elements are equal. ## 5. Collections (#String, #List, #Map, #Set, and #Bytes) are equal if they are the same length, and also all their corresponding elements are equal.
## 6. All functions are considered equal. (So `Bool.not == Bool.not` will return #True, as you might expect, but also `Num.abs == Num.negate` will return #True, as you might not. This design is because function equality has been formally proven to be undecidable in the general case, and returning #True in all cases turns out to be mostly harmless - especially compared to alternative designs like crashing, making #equal inconvenient to use, and so on.)
## ##
## This function always crashes when given two functions, or an erroneous ## Note that `isEq` takes `'val` instead of `val`, which means `isEq` does not
## #Float value (see #Float.isErroneous) ## accept arguments whose types contain functions.
## isEq : 'val, 'val -> Bool
## This is the same as the #== operator.
isEq : val, val -> [True, False]
## Calls #eq on the given values, then calls #not on the result. ## Calls #eq on the given values, then calls #not on the result.
## ##
## This is the same as the #!= operator. ## `a != b` is shorthand for `Bool.isNotEq a b`
isNe : val, val -> [True, False] ##
isNe = \left, right -> ## Note that `isNotEq` takes `'val` instead of `val`, which means `isNotEq` does not
not (equal left right) ## accept arguments whose types contain functions.
isNotEq : 'val, 'val -> Bool

View file

@ -2,17 +2,27 @@ interface Set
exposes [ Set, map, isEmpty ] exposes [ Set, map, isEmpty ]
imports [] imports []
## An empty set.
empty : Set *
## Check ## Check
# isEmpty : List * -> Bool isEmpty : Set * -> Bool
## Convert each element in the list to something new, by calling a conversion len : Set * -> Len
## function on each of them. Then return a new list of the converted values.
add : Set 'elem, 'elem -> Set 'elem
rem : Set 'elem, 'elem -> Set 'elem
## Convert each element in the set to something new, by calling a conversion
## function on each of them. Then return a new set of the converted values.
## ##
## >>> Set.map {[ -1, 1, 3 ]} Num.negate ## >>> Set.map {: -1, 1, 3 :} Num.negate
## ##
## >>> Set.map {[ "", "a", "bc" ]} Str.isEmpty ## >>> Set.map {: "", "a", "bc" :} Str.isEmpty
## ##
## `map` functions like this are common in Roc, and they all work similarly. ## `map` functions like this are common in Roc, and they all work similarly.
## See for example #Result.map, #List.map, and #Map.map. ## See for example #Result.map, #List.map, and #Map.map.
map : List before, (before -> after) -> List after map : Set 'elem, ('before -> 'after) -> Set 'after

View file

@ -510,9 +510,18 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
), ),
); );
// push : List elem -> elem -> List elem // append : List elem, elem -> List elem
add_type( add_type(
Symbol::LIST_PUSH, Symbol::LIST_APPEND,
SolvedType::Func(
vec![list_type(flex(TVAR1)), flex(TVAR1)],
Box::new(list_type(flex(TVAR1))),
),
);
// prepend : List elem, elem -> List elem
add_type(
Symbol::LIST_PREPEND,
SolvedType::Func( SolvedType::Func(
vec![list_type(flex(TVAR1)), flex(TVAR1)], vec![list_type(flex(TVAR1)), flex(TVAR1)],
Box::new(list_type(flex(TVAR1))), Box::new(list_type(flex(TVAR1))),
@ -543,15 +552,6 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
), ),
); );
// append : List elem, List elem -> List elem
add_type(
Symbol::LIST_APPEND,
SolvedType::Func(
vec![list_type(flex(TVAR1)), list_type(flex(TVAR1))],
Box::new(list_type(flex(TVAR1))),
),
);
// len : List * -> Int // len : List * -> Int
add_type( add_type(
Symbol::LIST_LEN, Symbol::LIST_LEN,

View file

@ -638,8 +638,8 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
) )
}); });
// append : Attr * (List (Attr * a)), Attr * (List (Attr * a)) -> Attr * (List (Attr * a)) // concat : Attr * (List (Attr * a)), Attr * (List (Attr * a)) -> Attr * (List (Attr * a))
add_type(Symbol::LIST_APPEND, { add_type(Symbol::LIST_CONCAT, {
let_tvars! { a, star1, star2, star3 }; let_tvars! { a, star1, star2, star3 };
unique_function( unique_function(
@ -669,13 +669,43 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
) )
}); });
// push : Attr * (List a) // append : Attr * (List a)
// , a // , a
// -> Attr * (List a) // -> Attr * (List a)
// //
// NOTE: we demand the new item to have the same uniqueness as the other list items. // NOTE: we demand the new item to have the same uniqueness as the other list items.
// It could be allowed to add unique items to shared lists, but that requires special code gen // It could be allowed to add unique items to shared lists, but that requires special code gen
add_type(Symbol::LIST_PUSH, { add_type(Symbol::LIST_APPEND, {
let_tvars! { a, star1, star2 };
unique_function(
vec![
SolvedType::Apply(
Symbol::ATTR_ATTR,
vec![
flex(star1),
SolvedType::Apply(Symbol::LIST_LIST, vec![flex(a)]),
],
),
flex(a),
],
SolvedType::Apply(
Symbol::ATTR_ATTR,
vec![
boolean(star2),
SolvedType::Apply(Symbol::LIST_LIST, vec![flex(a)]),
],
),
)
});
// prepend : Attr * (List a)
// , a
// -> Attr * (List a)
//
// NOTE: we demand the new item to have the same uniqueness as the other list items.
// It could be allowed to add unique items to shared lists, but that requires special code gen
add_type(Symbol::LIST_PREPEND, {
let_tvars! { a, star1, star2 }; let_tvars! { a, star1, star2 };
unique_function( unique_function(

View file

@ -53,13 +53,14 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
Symbol::LIST_LEN => list_len, Symbol::LIST_LEN => list_len,
Symbol::LIST_GET => list_get, Symbol::LIST_GET => list_get,
Symbol::LIST_SET => list_set, Symbol::LIST_SET => list_set,
Symbol::LIST_PUSH => list_push, Symbol::LIST_APPEND => list_append,
Symbol::LIST_FIRST => list_first, Symbol::LIST_FIRST => list_first,
Symbol::LIST_IS_EMPTY => list_is_empty, Symbol::LIST_IS_EMPTY => list_is_empty,
Symbol::LIST_SINGLE => list_single, Symbol::LIST_SINGLE => list_single,
Symbol::LIST_REPEAT => list_repeat, Symbol::LIST_REPEAT => list_repeat,
Symbol::LIST_REVERSE => list_reverse, Symbol::LIST_REVERSE => list_reverse,
Symbol::LIST_APPEND => list_append, Symbol::LIST_CONCAT => list_concat,
Symbol::LIST_PREPEND => list_prepend,
Symbol::NUM_ADD => num_add, Symbol::NUM_ADD => num_add,
Symbol::NUM_SUB => num_sub, Symbol::NUM_SUB => num_sub,
Symbol::NUM_MUL => num_mul, Symbol::NUM_MUL => num_mul,
@ -617,12 +618,12 @@ fn list_reverse(symbol: Symbol, var_store: &mut VarStore) -> Def {
) )
} }
/// List.append : List elem, List elem -> List elem /// List.concat : List elem, List elem -> List elem
fn list_append(symbol: Symbol, var_store: &mut VarStore) -> Def { fn list_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh(); let list_var = var_store.fresh();
let body = RunLowLevel { let body = RunLowLevel {
op: LowLevel::ListAppend, op: LowLevel::ListConcat,
args: vec![ args: vec![
(list_var, Var(Symbol::ARG_1)), (list_var, Var(Symbol::ARG_1)),
(list_var, Var(Symbol::ARG_2)), (list_var, Var(Symbol::ARG_2)),
@ -856,13 +857,36 @@ fn list_set(symbol: Symbol, var_store: &mut VarStore) -> Def {
) )
} }
/// List.push : List elem, elem -> List elem /// List.append : List elem, elem -> List elem
fn list_push(symbol: Symbol, var_store: &mut VarStore) -> Def { fn list_append(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh(); let list_var = var_store.fresh();
let elem_var = var_store.fresh(); let elem_var = var_store.fresh();
let body = RunLowLevel { let body = RunLowLevel {
op: LowLevel::ListPush, op: LowLevel::ListAppend,
args: vec![
(list_var, Var(Symbol::ARG_1)),
(elem_var, Var(Symbol::ARG_2)),
],
ret_var: list_var,
};
defn(
symbol,
vec![(list_var, Symbol::ARG_1), (elem_var, Symbol::ARG_2)],
var_store,
body,
list_var,
)
}
/// List.prepend : List elem, elem -> List elem
fn list_prepend(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh();
let elem_var = var_store.fresh();
let body = RunLowLevel {
op: LowLevel::ListPrepend,
args: vec![ args: vec![
(list_var, Var(Symbol::ARG_1)), (list_var, Var(Symbol::ARG_1)),
(elem_var, Var(Symbol::ARG_2)), (elem_var, Var(Symbol::ARG_2)),

View file

@ -27,7 +27,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -20,7 +20,7 @@ mod test_fmt {
use roc_parse::parser::{Fail, Parser, State}; use roc_parse::parser::{Fail, Parser, State};
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> { fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc!(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc!(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
@ -55,7 +55,7 @@ mod test_fmt {
let src = src.trim_end(); let src = src.trim_end();
let expected = expected.trim_end(); let expected = expected.trim_end();
match module::header().parse(&arena, State::new(&src, Attempting::Module)) { match module::header().parse(&arena, State::new(src.as_bytes(), Attempting::Module)) {
Ok((actual, state)) => { Ok((actual, state)) => {
let mut buf = String::new_in(&arena); let mut buf = String::new_in(&arena);

View file

@ -1828,7 +1828,7 @@ fn list_push<'a, 'ctx, 'env>(
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes); let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic); let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
let elems_ptr = load_list_ptr(builder, original_wrapper, ptr_type); let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
// The output list length, which is the old list length + 1 // The output list length, which is the old list length + 1
let new_list_len = env.builder.build_int_add( let new_list_len = env.builder.build_int_add(
@ -1837,7 +1837,6 @@ fn list_push<'a, 'ctx, 'env>(
"new_list_length", "new_list_length",
); );
let ctx = env.context;
let ptr_bytes = env.ptr_bytes; let ptr_bytes = env.ptr_bytes;
// Calculate the number of bytes we'll need to allocate. // Calculate the number of bytes we'll need to allocate.
@ -1863,7 +1862,7 @@ fn list_push<'a, 'ctx, 'env>(
// one we just malloc'd. // one we just malloc'd.
// //
// TODO how do we decide when to do the small memcpy vs the normal one? // TODO how do we decide when to do the small memcpy vs the normal one?
builder.build_memcpy(clone_ptr, ptr_bytes, elems_ptr, ptr_bytes, list_size); builder.build_memcpy(clone_ptr, ptr_bytes, list_ptr, ptr_bytes, list_size);
} else { } else {
panic!("TODO Cranelift currently only knows how to clone list elements that are Copy."); panic!("TODO Cranelift currently only knows how to clone list elements that are Copy.");
} }
@ -1887,17 +1886,105 @@ fn list_push<'a, 'ctx, 'env>(
.build_insert_value(struct_val, new_list_len, Builtin::WRAPPER_LEN, "insert_len") .build_insert_value(struct_val, new_list_len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap(); .unwrap();
let answer = builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
);
let elem_ptr = unsafe { builder.build_in_bounds_gep(clone_ptr, &[list_len], "load_index") }; let elem_ptr = unsafe { builder.build_in_bounds_gep(clone_ptr, &[list_len], "load_index") };
builder.build_store(elem_ptr, elem); builder.build_store(elem_ptr, elem);
answer builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
}
/// List.prepend List elem, elem -> List elem
fn list_prepend<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
original_wrapper: StructValue<'ctx>,
elem: BasicValueEnum<'ctx>,
elem_layout: &Layout<'a>,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
// Load the usize length from the wrapper.
let list_len = load_list_len(builder, original_wrapper);
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
// The output list length, which is the old list length + 1
let new_list_len = env.builder.build_int_add(
ctx.i64_type().const_int(1 as u64, false),
list_len,
"new_list_length",
);
let ptr_bytes = env.ptr_bytes;
// Allocate space for the new array that we'll copy into.
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let clone_ptr = builder
.build_array_malloc(elem_type, new_list_len, "list_ptr")
.unwrap();
let int_type = ptr_int(ctx, ptr_bytes);
let ptr_as_int = builder.build_ptr_to_int(clone_ptr, int_type, "list_cast_ptr");
builder.build_store(clone_ptr, elem);
let index_1_ptr = unsafe {
builder.build_in_bounds_gep(
clone_ptr,
&[ctx.i64_type().const_int(1 as u64, false)],
"load_index",
)
};
// Calculate the number of bytes we'll need to allocate.
let elem_bytes = env
.ptr_int()
.const_int(elem_layout.stack_size(env.ptr_bytes) as u64, false);
// This is the size of the list coming in, before we have added an element
// to the beginning.
let list_size = env
.builder
.build_int_mul(elem_bytes, list_len, "mul_old_len_by_elem_bytes");
if elem_layout.safe_to_memcpy() {
// Copy the bytes from the original array into the new
// one we just malloc'd.
//
// TODO how do we decide when to do the small memcpy vs the normal one?
builder.build_memcpy(index_1_ptr, ptr_bytes, list_ptr, ptr_bytes, list_size);
} else {
panic!("TODO Cranelift currently only knows how to clone list elements that are Copy.");
}
// Create a fresh wrapper struct for the newly populated array
let struct_type = collection(ctx, env.ptr_bytes);
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr_as_int,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(struct_val, new_list_len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
} }
fn list_set<'a, 'ctx, 'env>( fn list_set<'a, 'ctx, 'env>(
@ -2177,8 +2264,8 @@ fn run_low_level<'a, 'ctx, 'env>(
} }
} }
} }
ListAppend => list_append(env, scope, parent, args), ListConcat => list_concat(env, scope, parent, args),
ListPush => { ListAppend => {
// List.push List elem, elem -> List elem // List.push List elem, elem -> List elem
debug_assert_eq!(args.len(), 2); debug_assert_eq!(args.len(), 2);
@ -2187,6 +2274,15 @@ fn run_low_level<'a, 'ctx, 'env>(
list_push(env, original_wrapper, elem, elem_layout) list_push(env, original_wrapper, elem, elem_layout)
} }
ListPrepend => {
// List.prepend List elem, elem -> List elem
debug_assert_eq!(args.len(), 2);
let original_wrapper = load_symbol(env, scope, &args[0]).into_struct_value();
let (elem, elem_layout) = load_symbol_and_layout(env, scope, &args[1]);
list_prepend(env, original_wrapper, elem, elem_layout)
}
NumAbs | NumNeg | NumRound | NumSqrtUnchecked | NumSin | NumCos | NumToFloat => { NumAbs | NumNeg | NumRound | NumSqrtUnchecked | NumSin | NumCos | NumToFloat => {
debug_assert_eq!(args.len(), 1); debug_assert_eq!(args.len(), 1);
@ -2395,13 +2491,13 @@ fn build_int_binop<'a, 'ctx, 'env>(
} }
} }
fn list_append<'a, 'ctx, 'env>( fn list_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>, env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>, scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>, parent: FunctionValue<'ctx>,
args: &[Symbol], args: &[Symbol],
) -> BasicValueEnum<'ctx> { ) -> BasicValueEnum<'ctx> {
// List.append : List elem, List elem -> List elem // List.concat : List elem, List elem -> List elem
debug_assert_eq!(args.len(), 2); debug_assert_eq!(args.len(), 2);
// This implementation is quite long, let me explain what is complicating it. Here are our // This implementation is quite long, let me explain what is complicating it. Here are our
@ -2489,7 +2585,7 @@ fn list_append<'a, 'ctx, 'env>(
} }
_ => { _ => {
unreachable!( unreachable!(
"Invalid List layout for second input list of List.append: {:?}", "Invalid List layout for second input list of List.concat: {:?}",
second_list_layout second_list_layout
); );
} }
@ -2557,7 +2653,7 @@ fn list_append<'a, 'ctx, 'env>(
// FIRST LOOP // FIRST LOOP
{ {
let first_loop_bb = let first_loop_bb =
ctx.append_basic_block(parent, "first_list_append_loop"); ctx.append_basic_block(parent, "first_list_concat_loop");
builder.build_unconditional_branch(first_loop_bb); builder.build_unconditional_branch(first_loop_bb);
builder.position_at_end(first_loop_bb); builder.position_at_end(first_loop_bb);
@ -2628,7 +2724,7 @@ fn list_append<'a, 'ctx, 'env>(
// SECOND LOOP // SECOND LOOP
{ {
let second_loop_bb = let second_loop_bb =
ctx.append_basic_block(parent, "second_list_append_loop"); ctx.append_basic_block(parent, "second_list_concat_loop");
builder.build_unconditional_branch(second_loop_bb); builder.build_unconditional_branch(second_loop_bb);
builder.position_at_end(second_loop_bb); builder.position_at_end(second_loop_bb);
@ -2754,7 +2850,7 @@ fn list_append<'a, 'ctx, 'env>(
} }
_ => { _ => {
unreachable!( unreachable!(
"Invalid List layout for second input list of List.append: {:?}", "Invalid List layout for second input list of List.concat: {:?}",
second_list_layout second_list_layout
); );
} }
@ -2799,7 +2895,7 @@ fn list_append<'a, 'ctx, 'env>(
} }
_ => { _ => {
unreachable!( unreachable!(
"Invalid List layout for second input list of List.append: {:?}", "Invalid List layout for second input list of List.concat: {:?}",
second_list_layout second_list_layout
); );
} }
@ -2817,7 +2913,7 @@ fn list_append<'a, 'ctx, 'env>(
} }
_ => { _ => {
unreachable!( unreachable!(
"Invalid List layout for first list in List.append : {:?}", "Invalid List layout for first list in List.concat : {:?}",
first_list_layout first_list_layout
); );
} }

View file

@ -31,10 +31,10 @@ mod gen_list {
} }
#[test] #[test]
fn list_push() { fn list_append() {
assert_evals_to!("List.push [1] 2", &[1, 2], &'static [i64]); assert_evals_to!("List.append [1] 2", &[1, 2], &'static [i64]);
assert_evals_to!("List.push [1, 1] 2", &[1, 1, 2], &'static [i64]); assert_evals_to!("List.append [1, 1] 2", &[1, 1, 2], &'static [i64]);
assert_evals_to!("List.push [] 3", &[3], &'static [i64]); assert_evals_to!("List.append [] 3", &[3], &'static [i64]);
assert_evals_to!( assert_evals_to!(
indoc!( indoc!(
r#" r#"
@ -42,24 +42,55 @@ mod gen_list {
initThrees = initThrees =
[] []
List.push (List.push initThrees 3) 3 List.append (List.append initThrees 3) 3
"# "#
), ),
&[3, 3], &[3, 3],
&'static [i64] &'static [i64]
); );
assert_evals_to!( assert_evals_to!(
"List.push [ True, False ] True", "List.append [ True, False ] True",
&[true, false, true], &[true, false, true],
&'static [bool] &'static [bool]
); );
assert_evals_to!( assert_evals_to!(
"List.push [ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ] 23", "List.append [ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ] 23",
&[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], &[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
&'static [i64] &'static [i64]
); );
} }
#[test]
fn list_prepend() {
assert_evals_to!("List.prepend [] 1", &[1], &'static [i64]);
assert_evals_to!("List.prepend [2] 1", &[1, 2], &'static [i64]);
assert_evals_to!(
indoc!(
r#"
init : List Int
init =
[]
List.prepend (List.prepend init 4) 6
"#
),
&[6, 4],
&'static [i64]
);
assert_evals_to!(
"List.prepend [ True, False ] True",
&[true, true, false],
&'static [bool]
);
assert_evals_to!(
"List.prepend [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 100, 100, 100, 100 ] 9",
&[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 100, 100, 100, 100],
&'static [i64]
);
}
#[test] #[test]
fn list_single() { fn list_single() {
assert_evals_to!("List.single 1", &[1], &'static [i64]); assert_evals_to!("List.single 1", &[1], &'static [i64]);
@ -119,8 +150,8 @@ mod gen_list {
} }
#[test] #[test]
fn list_append() { fn list_concat() {
assert_evals_to!("List.append [] []", &[], &'static [i64]); assert_evals_to!("List.concat [] []", &[], &'static [i64]);
assert_evals_to!( assert_evals_to!(
indoc!( indoc!(
@ -133,30 +164,30 @@ mod gen_list {
secondList = secondList =
[] []
List.append firstList secondList List.concat firstList secondList
"# "#
), ),
&[], &[],
&'static [i64] &'static [i64]
); );
assert_evals_to!("List.append [ 12, 13 ] []", &[12, 13], &'static [i64]); assert_evals_to!("List.concat [ 12, 13 ] []", &[12, 13], &'static [i64]);
assert_evals_to!( assert_evals_to!(
"List.append [ 34, 43 ] [ 64, 55, 66 ]", "List.concat [ 34, 43 ] [ 64, 55, 66 ]",
&[34, 43, 64, 55, 66], &[34, 43, 64, 55, 66],
&'static [i64] &'static [i64]
); );
assert_evals_to!("List.append [] [ 23, 24 ]", &[23, 24], &'static [i64]); assert_evals_to!("List.concat [] [ 23, 24 ]", &[23, 24], &'static [i64]);
assert_evals_to!( assert_evals_to!(
"List.append [ 1, 2 ] [ 3, 4 ]", "List.concat [ 1, 2 ] [ 3, 4 ]",
&[1, 2, 3, 4], &[1, 2, 3, 4],
&'static [i64] &'static [i64]
); );
} }
fn assert_append_worked(num_elems1: i64, num_elems2: i64) { fn assert_concat_worked(num_elems1: i64, num_elems2: i64) {
let vec1: Vec<i64> = (0..num_elems1) let vec1: Vec<i64> = (0..num_elems1)
.map(|i| 12345 % (i + num_elems1 + num_elems2 + 1)) .map(|i| 12345 % (i + num_elems1 + num_elems2 + 1))
.collect(); .collect();
@ -172,51 +203,51 @@ mod gen_list {
let expected_slice: &[i64] = expected.as_ref(); let expected_slice: &[i64] = expected.as_ref();
assert_evals_to!( assert_evals_to!(
&format!("List.append {} {}", slice_str1, slice_str2), &format!("List.concat {} {}", slice_str1, slice_str2),
expected_slice, expected_slice,
&'static [i64] &'static [i64]
); );
} }
#[test] #[test]
fn list_append_empty_list() { fn list_concat_empty_list() {
assert_append_worked(0, 0); assert_concat_worked(0, 0);
assert_append_worked(1, 0); assert_concat_worked(1, 0);
assert_append_worked(2, 0); assert_concat_worked(2, 0);
assert_append_worked(3, 0); assert_concat_worked(3, 0);
assert_append_worked(4, 0); assert_concat_worked(4, 0);
assert_append_worked(7, 0); assert_concat_worked(7, 0);
assert_append_worked(8, 0); assert_concat_worked(8, 0);
assert_append_worked(9, 0); assert_concat_worked(9, 0);
assert_append_worked(25, 0); assert_concat_worked(25, 0);
assert_append_worked(150, 0); assert_concat_worked(150, 0);
assert_append_worked(0, 1); assert_concat_worked(0, 1);
assert_append_worked(0, 2); assert_concat_worked(0, 2);
assert_append_worked(0, 3); assert_concat_worked(0, 3);
assert_append_worked(0, 4); assert_concat_worked(0, 4);
assert_append_worked(0, 7); assert_concat_worked(0, 7);
assert_append_worked(0, 8); assert_concat_worked(0, 8);
assert_append_worked(0, 9); assert_concat_worked(0, 9);
assert_append_worked(0, 25); assert_concat_worked(0, 25);
assert_append_worked(0, 150); assert_concat_worked(0, 150);
} }
#[test] #[test]
fn list_append_nonempty_lists() { fn list_concat_nonempty_lists() {
assert_append_worked(1, 1); assert_concat_worked(1, 1);
assert_append_worked(1, 2); assert_concat_worked(1, 2);
assert_append_worked(1, 3); assert_concat_worked(1, 3);
assert_append_worked(2, 3); assert_concat_worked(2, 3);
assert_append_worked(2, 1); assert_concat_worked(2, 1);
assert_append_worked(2, 2); assert_concat_worked(2, 2);
assert_append_worked(3, 1); assert_concat_worked(3, 1);
assert_append_worked(3, 2); assert_concat_worked(3, 2);
assert_append_worked(2, 3); assert_concat_worked(2, 3);
assert_append_worked(3, 3); assert_concat_worked(3, 3);
assert_append_worked(4, 4); assert_concat_worked(4, 4);
assert_append_worked(150, 150); assert_concat_worked(150, 150);
assert_append_worked(129, 350); assert_concat_worked(129, 350);
assert_append_worked(350, 129); assert_concat_worked(350, 129);
} }
#[test] #[test]

View file

@ -67,7 +67,7 @@ pub fn helper_without_uniqueness<'a>(
fpm.initialize(); fpm.initialize();
// Compute main_fn_type before moving subs to Env // Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| { let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!( panic!(
"Code gen error in NON-OPTIMIZED test: could not convert to layout. Err was {:?}", "Code gen error in NON-OPTIMIZED test: could not convert to layout. Err was {:?}",
err err
@ -103,7 +103,6 @@ pub fn helper_without_uniqueness<'a>(
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };
@ -258,7 +257,7 @@ pub fn helper_with_uniqueness<'a>(
fpm.initialize(); fpm.initialize();
// Compute main_fn_type before moving subs to Env // Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| { let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!( panic!(
"Code gen error in OPTIMIZED test: could not convert to layout. Err was {:?}", "Code gen error in OPTIMIZED test: could not convert to layout. Err was {:?}",
err err
@ -296,7 +295,6 @@ pub fn helper_with_uniqueness<'a>(
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };

View file

@ -87,7 +87,7 @@ pub fn infer_expr(
} }
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -19,7 +19,8 @@ roc_parse = { path = "../parse" }
roc_solve = { path = "../solve" } roc_solve = { path = "../solve" }
bumpalo = { version = "3.2", features = ["collections"] } bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1" inlinable_string = "0.1"
tokio = { version = "0.2", features = ["blocking", "fs", "sync", "rt-threaded"] } crossbeam = "0.7"
num_cpus = "1"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "0.5.1" pretty_assertions = "0.5.1"

File diff suppressed because it is too large Load diff

View file

@ -54,7 +54,7 @@ reconstructPath = \cameFrom, goal ->
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model -> updateCost = \current, neighbour, model ->

View file

@ -54,7 +54,7 @@ reconstructPath = \cameFrom, goal ->
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model -> updateCost = \current, neighbour, model ->

View file

@ -29,29 +29,6 @@ pub fn test_home() -> ModuleId {
ModuleIds::default().get_or_insert(&"Test".into()) ModuleIds::default().get_or_insert(&"Test".into())
} }
/// Without a larger-than-default stack size, some tests
/// run out of stack space in debug builds (but don't in --release builds)
#[allow(dead_code)]
const THREAD_STACK_SIZE: usize = 4 * 1024 * 1024;
pub fn test_async<F: std::future::Future>(future: F) -> F::Output {
use tokio::runtime::Builder;
// Create the runtime
let mut rt = Builder::new()
.thread_name("tokio-thread-for-tests")
.thread_stack_size(THREAD_STACK_SIZE)
// DEBUG: Replace this with .basic_scheduler() to make tests run single-threaded on the main thread.
// Doing this makes assertion failures easier to read, but means
// the tests can't reveal concurrency bugs, so leave this off by default!
.threaded_scheduler()
.build()
.expect("Error initializing Tokio runtime.");
// Spawn the root task
rt.block_on(future)
}
#[allow(dead_code)] #[allow(dead_code)]
pub fn infer_expr( pub fn infer_expr(
subs: Subs, subs: Subs,
@ -92,7 +69,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -13,7 +13,7 @@ mod helpers;
#[cfg(test)] #[cfg(test)]
mod test_load { mod test_load {
use crate::helpers::{fixtures_dir, test_async}; use crate::helpers::fixtures_dir;
use inlinable_string::InlinableString; use inlinable_string::InlinableString;
use roc_can::def::Declaration::*; use roc_can::def::Declaration::*;
use roc_can::def::Def; use roc_can::def::Def;
@ -27,7 +27,7 @@ mod test_load {
// HELPERS // HELPERS
async fn load_fixture( fn load_fixture(
dir_name: &str, dir_name: &str,
module_name: &str, module_name: &str,
subs_by_module: SubsByModule, subs_by_module: SubsByModule,
@ -35,12 +35,11 @@ mod test_load {
let src_dir = fixtures_dir().join(dir_name); let src_dir = fixtures_dir().join(dir_name);
let filename = src_dir.join(format!("{}.roc", module_name)); let filename = src_dir.join(format!("{}.roc", module_name));
let loaded = load( let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename, filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module, subs_by_module,
) );
.await;
let loaded_module = loaded.expect("Test module failed to load"); let loaded_module = loaded.expect("Test module failed to load");
assert_eq!(loaded_module.can_problems, Vec::new()); assert_eq!(loaded_module.can_problems, Vec::new());
@ -129,286 +128,256 @@ mod test_load {
let subs_by_module = MutMap::default(); let subs_by_module = MutMap::default();
let src_dir = fixtures_dir().join("interface_with_deps"); let src_dir = fixtures_dir().join("interface_with_deps");
let filename = src_dir.join("Primary.roc"); let filename = src_dir.join("Primary.roc");
let loaded = load(
filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module,
);
test_async(async { let mut loaded_module = loaded.expect("Test module failed to load");
let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename,
subs_by_module,
)
.await;
let mut loaded_module = loaded.expect("Test module failed to load"); assert_eq!(loaded_module.can_problems, Vec::new());
assert_eq!(loaded_module.type_problems, Vec::new());
assert_eq!(loaded_module.can_problems, Vec::new()); let def_count: usize = loaded_module
assert_eq!(loaded_module.type_problems, Vec::new()); .declarations_by_id
.remove(&loaded_module.module_id)
.unwrap()
.into_iter()
.map(|decl| decl.def_count())
.sum();
let def_count: usize = loaded_module let expected_name = loaded_module
.declarations_by_id .interns
.remove(&loaded_module.module_id) .module_ids
.unwrap() .get_name(loaded_module.module_id)
.into_iter() .expect("Test ModuleID not found in module_ids");
.map(|decl| decl.def_count())
.sum();
let expected_name = loaded_module assert_eq!(expected_name, &InlinableString::from("Primary"));
.interns assert_eq!(def_count, 10);
.module_ids
.get_name(loaded_module.module_id)
.expect("Test ModuleID not found in module_ids");
assert_eq!(expected_name, &InlinableString::from("Primary"));
assert_eq!(def_count, 10);
});
} }
#[test] #[test]
fn load_unit() { fn load_unit() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("no_deps", "Unit", subs_by_module);
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"unit" => "Unit", "unit" => "Unit",
}, },
); );
});
} }
#[test] #[test]
fn import_alias() { fn import_alias() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "ImportAlias", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "ImportAlias", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"unit" => "Dep1.Unit", "unit" => "Dep1.Unit",
}, },
); );
});
} }
#[test] #[test]
fn load_and_typecheck() { fn load_and_typecheck() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "WithBuiltins", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "WithBuiltins", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"floatTest" => "Float", "floatTest" => "Float",
"divisionFn" => "Float, Float -> Result Float [ DivByZero ]*", "divisionFn" => "Float, Float -> Result Float [ DivByZero ]*",
"divisionTest" => "Result Float [ DivByZero ]*", "divisionTest" => "Result Float [ DivByZero ]*",
"intTest" => "Int", "intTest" => "Int",
"x" => "Float", "x" => "Float",
"constantNum" => "Num *", "constantNum" => "Num *",
"divDep1ByDep2" => "Result Float [ DivByZero ]*", "divDep1ByDep2" => "Result Float [ DivByZero ]*",
"fromDep2" => "Float", "fromDep2" => "Float",
}, },
); );
});
} }
#[test] #[test]
fn iface_quicksort() { fn iface_quicksort() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "Quicksort", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "Quicksort", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"swap" => "Int, Int, List a -> List a", "swap" => "Int, Int, List a -> List a",
"partition" => "Int, Int, List (Num a) -> [ Pair Int (List (Num a)) ]", "partition" => "Int, Int, List (Num a) -> [ Pair Int (List (Num a)) ]",
"quicksort" => "List (Num a), Int, Int -> List (Num a)", "quicksort" => "List (Num a), Int, Int -> List (Num a)",
}, },
); );
});
} }
#[test] #[test]
fn app_quicksort() { fn app_quicksort() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("app_with_deps", "Quicksort", subs_by_module);
let loaded_module = load_fixture("app_with_deps", "Quicksort", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"swap" => "Int, Int, List a -> List a", "swap" => "Int, Int, List a -> List a",
"partition" => "Int, Int, List (Num a) -> [ Pair Int (List (Num a)) ]", "partition" => "Int, Int, List (Num a) -> [ Pair Int (List (Num a)) ]",
"quicksort" => "List (Num a), Int, Int -> List (Num a)", "quicksort" => "List (Num a), Int, Int -> List (Num a)",
}, },
); );
});
} }
#[test] #[test]
fn load_astar() { fn load_astar() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module);
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"findPath" => "{ costFunction : (position, position -> Float), end : position, moveFunction : (position -> Set position), start : position } -> Result (List position) [ KeyNotFound ]*", "findPath" => "{ costFunction : (position, position -> Float), end : position, moveFunction : (position -> Set position), start : position } -> Result (List position) [ KeyNotFound ]*",
"initialModel" => "position -> Model position", "initialModel" => "position -> Model position",
"reconstructPath" => "Map position position, position -> List position", "reconstructPath" => "Map position position, position -> List position",
"updateCost" => "position, position, Model position -> Model position", "updateCost" => "position, position, Model position -> Model position",
"cheapestOpen" => "(position -> Float), Model position -> Result position [ KeyNotFound ]*", "cheapestOpen" => "(position -> Float), Model position -> Result position [ KeyNotFound ]*",
"astar" => "(position, position -> Float), (position -> Set position), position, Model position -> [ Err [ KeyNotFound ]*, Ok (List position) ]*", "astar" => "(position, position -> Float), (position -> Set position), position, Model position -> [ Err [ KeyNotFound ]*, Ok (List position) ]*",
}, },
); );
});
} }
#[test] #[test]
fn load_principal_types() { fn load_principal_types() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("no_deps", "Principal", subs_by_module);
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"intVal" => "Str", "intVal" => "Str",
"identity" => "a -> a", "identity" => "a -> a",
}, },
); );
});
} }
#[test] #[test]
fn iface_dep_types() { fn iface_dep_types() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "Primary", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "Primary", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"blah2" => "Float", "blah2" => "Float",
"blah3" => "Str", "blah3" => "Str",
"str" => "Str", "str" => "Str",
"alwaysThree" => "* -> Str", "alwaysThree" => "* -> Str",
"identity" => "a -> a", "identity" => "a -> a",
"z" => "Str", "z" => "Str",
"w" => "Dep1.Identity {}", "w" => "Dep1.Identity {}",
"succeed" => "a -> Dep1.Identity a", "succeed" => "a -> Dep1.Identity a",
"yay" => "Res.Res {} err", "yay" => "Res.Res {} err",
"withDefault" => "Res.Res a *, a -> a", "withDefault" => "Res.Res a *, a -> a",
}, },
); );
});
} }
#[test] #[test]
fn app_dep_types() { fn app_dep_types() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("app_with_deps", "Primary", subs_by_module);
let loaded_module = load_fixture("app_with_deps", "Primary", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"blah2" => "Float", "blah2" => "Float",
"blah3" => "Str", "blah3" => "Str",
"str" => "Str", "str" => "Str",
"alwaysThree" => "* -> Str", "alwaysThree" => "* -> Str",
"identity" => "a -> a", "identity" => "a -> a",
"z" => "Str", "z" => "Str",
"w" => "Dep1.Identity {}", "w" => "Dep1.Identity {}",
"succeed" => "a -> Dep1.Identity a", "succeed" => "a -> Dep1.Identity a",
"yay" => "Res.Res {} err", "yay" => "Res.Res {} err",
"withDefault" => "Res.Res a *, a -> a", "withDefault" => "Res.Res a *, a -> a",
}, },
); );
});
} }
#[test] #[test]
fn imported_dep_regression() { fn imported_dep_regression() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module);
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"str" => "Str", "str" => "Str",
}, },
); );
});
} }
// #[test] // #[test]
// fn load_records() { // fn load_records() {
// test_async(async { // use roc::types::{ErrorType, Mismatch, Problem, TypeExt};
// use roc::types::{ErrorType, Mismatch, Problem, TypeExt};
// let subs_by_module = MutMap::default(); // let subs_by_module = MutMap::default();
// let loaded_module = // let loaded_module =
// load_fixture("interface_with_deps", "Records", subs_by_module).await; // load_fixture("interface_with_deps", "Records", subs_by_module);
// // NOTE: `a` here is unconstrained, so unifies with <type error> // // NOTE: `a` here is unconstrained, so unifies with <type error>
// let expected_types = hashmap! { // let expected_types = hashmap! {
// "Records.intVal" => "a", // "Records.intVal" => "a",
// };
// let a = ErrorType::FlexVar("a".into());
// let mut record = SendMap::default();
// record.insert("x".into(), a);
// let problem = Problem::Mismatch(
// Mismatch::TypeMismatch,
// ErrorType::Record(SendMap::default(), TypeExt::Closed),
// ErrorType::Record(record, TypeExt::FlexOpen("b".into())),
// );
// assert_eq!(loaded_module.problems, vec![problem]);
// assert_eq!(expected_types.len(), loaded_module.declarations.len());
// let mut subs = loaded_module.solved.into_inner();
// for decl in loaded_module.declarations {
// let def = match decl {
// Declare(def) => def,
// rec_decl @ DeclareRec(_) => {
// panic!(
// "Unexpected recursive def in module declarations: {:?}",
// rec_decl
// );
// }
// cycle @ InvalidCycle(_, _) => {
// panic!("Unexpected cyclic def in module declarations: {:?}", cycle);
// }
// }; // };
// let a = ErrorType::FlexVar("a".into()); // for (symbol, expr_var) in def.pattern_vars {
// let content = subs.get(expr_var).content;
// let mut record = SendMap::default(); // name_all_type_vars(expr_var, &mut subs);
// record.insert("x".into(), a);
// let problem = Problem::Mismatch( // let actual_str = content_to_string(content, &mut subs);
// Mismatch::TypeMismatch, // let expected_type = expected_types.get(symbol.as_str()).unwrap_or_else(|| {
// ErrorType::Record(SendMap::default(), TypeExt::Closed), // panic!("Defs included an unexpected symbol: {:?}", symbol)
// ErrorType::Record(record, TypeExt::FlexOpen("b".into())), // });
// );
// assert_eq!(loaded_module.problems, vec![problem]); // assert_eq!((&symbol, expected_type), (&symbol, &actual_str.as_str()));
// assert_eq!(expected_types.len(), loaded_module.declarations.len());
// let mut subs = loaded_module.solved.into_inner();
// for decl in loaded_module.declarations {
// let def = match decl {
// Declare(def) => def,
// rec_decl @ DeclareRec(_) => {
// panic!(
// "Unexpected recursive def in module declarations: {:?}",
// rec_decl
// );
// }
// cycle @ InvalidCycle(_, _) => {
// panic!("Unexpected cyclic def in module declarations: {:?}", cycle);
// }
// };
// for (symbol, expr_var) in def.pattern_vars {
// let content = subs.get(expr_var).content;
// name_all_type_vars(expr_var, &mut subs);
// let actual_str = content_to_string(content, &mut subs);
// let expected_type = expected_types.get(symbol.as_str()).unwrap_or_else(|| {
// panic!("Defs included an unexpected symbol: {:?}", symbol)
// });
// assert_eq!((&symbol, expected_type), (&symbol, &actual_str.as_str()));
// }
// } // }
// }); // }
// } // }
} }

View file

@ -13,7 +13,7 @@ mod helpers;
#[cfg(test)] #[cfg(test)]
mod test_uniq_load { mod test_uniq_load {
use crate::helpers::{fixtures_dir, test_async}; use crate::helpers::fixtures_dir;
use inlinable_string::InlinableString; use inlinable_string::InlinableString;
use roc_builtins::unique; use roc_builtins::unique;
use roc_can::def::Declaration::*; use roc_can::def::Declaration::*;
@ -28,14 +28,19 @@ mod test_uniq_load {
// HELPERS // HELPERS
async fn load_fixture( fn load_fixture(
dir_name: &str, dir_name: &str,
module_name: &str, module_name: &str,
subs_by_module: SubsByModule, subs_by_module: SubsByModule,
) -> LoadedModule { ) -> LoadedModule {
let src_dir = fixtures_dir().join(dir_name); let src_dir = fixtures_dir().join(dir_name);
let filename = src_dir.join(format!("{}.roc", module_name)); let filename = src_dir.join(format!("{}.roc", module_name));
let loaded = load(&unique::uniq_stdlib(), src_dir, filename, subs_by_module).await; let loaded = load(
filename,
&unique::uniq_stdlib(),
src_dir.as_path(),
subs_by_module,
);
let loaded_module = loaded.expect("Test module failed to load"); let loaded_module = loaded.expect("Test module failed to load");
assert_eq!(loaded_module.can_problems, Vec::new()); assert_eq!(loaded_module.can_problems, Vec::new());
@ -124,205 +129,179 @@ mod test_uniq_load {
let subs_by_module = MutMap::default(); let subs_by_module = MutMap::default();
let src_dir = fixtures_dir().join("interface_with_deps"); let src_dir = fixtures_dir().join("interface_with_deps");
let filename = src_dir.join("Primary.roc"); let filename = src_dir.join("Primary.roc");
let loaded = load(
filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module,
);
test_async(async { let mut loaded_module = loaded.expect("Test module failed to load");
let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename,
subs_by_module,
)
.await;
let mut loaded_module = loaded.expect("Test module failed to load"); assert_eq!(loaded_module.can_problems, Vec::new());
assert_eq!(loaded_module.type_problems, Vec::new());
assert_eq!(loaded_module.can_problems, Vec::new()); let def_count: usize = loaded_module
assert_eq!(loaded_module.type_problems, Vec::new()); .declarations_by_id
.remove(&loaded_module.module_id)
.unwrap()
.into_iter()
.map(|decl| decl.def_count())
.sum();
let def_count: usize = loaded_module let expected_name = loaded_module
.declarations_by_id .interns
.remove(&loaded_module.module_id) .module_ids
.unwrap() .get_name(loaded_module.module_id)
.into_iter() .expect("Test ModuleID not found in module_ids");
.map(|decl| decl.def_count())
.sum();
let expected_name = loaded_module assert_eq!(expected_name, &InlinableString::from("Primary"));
.interns assert_eq!(def_count, 10);
.module_ids
.get_name(loaded_module.module_id)
.expect("Test ModuleID not found in module_ids");
assert_eq!(expected_name, &InlinableString::from("Primary"));
assert_eq!(def_count, 10);
});
} }
#[test] #[test]
fn load_unit() { fn load_unit() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("no_deps", "Unit", subs_by_module);
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"unit" => "Attr * Unit", "unit" => "Attr * Unit",
}, },
); );
});
} }
#[test] #[test]
fn import_alias() { fn import_alias() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "ImportAlias", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "ImportAlias", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"unit" => "Attr * Dep1.Unit", "unit" => "Attr * Dep1.Unit",
}, },
); );
});
} }
#[test] #[test]
fn load_and_typecheck() { fn load_and_typecheck() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "WithBuiltins", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "WithBuiltins", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"floatTest" => "Attr Shared Float", "floatTest" => "Attr Shared Float",
"divisionFn" => "Attr Shared (Attr * Float, Attr * Float -> Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*)))", "divisionFn" => "Attr Shared (Attr * Float, Attr * Float -> Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*)))",
"divisionTest" => "Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*))", "divisionTest" => "Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*))",
"intTest" => "Attr * Int", "intTest" => "Attr * Int",
"x" => "Attr * Float", "x" => "Attr * Float",
"constantNum" => "Attr * (Num (Attr * *))", "constantNum" => "Attr * (Num (Attr * *))",
"divDep1ByDep2" => "Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*))", "divDep1ByDep2" => "Attr * (Result (Attr * Float) (Attr * [ DivByZero ]*))",
"fromDep2" => "Attr * Float", "fromDep2" => "Attr * Float",
}, },
); );
});
} }
#[test] #[test]
fn load_astar() { fn load_astar() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module);
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"findPath" => "Attr * (Attr * { costFunction : (Attr Shared (Attr Shared position, Attr Shared position -> Attr * Float)), end : (Attr Shared position), moveFunction : (Attr Shared (Attr Shared position -> Attr * (Set (Attr * position)))), start : (Attr Shared position) } -> Attr * (Result (Attr * (List (Attr Shared position))) (Attr * [ KeyNotFound ]*)))", "findPath" => "Attr * (Attr * { costFunction : (Attr Shared (Attr Shared position, Attr Shared position -> Attr * Float)), end : (Attr Shared position), moveFunction : (Attr Shared (Attr Shared position -> Attr * (Set (Attr * position)))), start : (Attr Shared position) } -> Attr * (Result (Attr * (List (Attr Shared position))) (Attr * [ KeyNotFound ]*)))",
"initialModel" => "Attr * (Attr Shared position -> Attr * (Model (Attr Shared position)))", "initialModel" => "Attr * (Attr Shared position -> Attr * (Model (Attr Shared position)))",
"reconstructPath" => "Attr Shared (Attr Shared (Map (Attr * position) (Attr Shared position)), Attr Shared position -> Attr * (List (Attr Shared position)))", "reconstructPath" => "Attr Shared (Attr Shared (Map (Attr * position) (Attr Shared position)), Attr Shared position -> Attr * (List (Attr Shared position)))",
"updateCost" => "Attr * (Attr Shared position, Attr Shared position, Attr Shared (Model (Attr Shared position)) -> Attr Shared (Model (Attr Shared position)))", "updateCost" => "Attr * (Attr Shared position, Attr Shared position, Attr Shared (Model (Attr Shared position)) -> Attr Shared (Model (Attr Shared position)))",
"cheapestOpen" => "Attr * (Attr * (Attr Shared position -> Attr * Float), Attr (* | a | b | c) (Model (Attr Shared position)) -> Attr * (Result (Attr Shared position) (Attr * [ KeyNotFound ]*)))", "cheapestOpen" => "Attr * (Attr * (Attr Shared position -> Attr * Float), Attr (* | a | b | c) (Model (Attr Shared position)) -> Attr * (Result (Attr Shared position) (Attr * [ KeyNotFound ]*)))",
"astar" => "Attr Shared (Attr Shared (Attr Shared position, Attr Shared position -> Attr * Float), Attr Shared (Attr Shared position -> Attr * (Set (Attr * position))), Attr Shared position, Attr Shared (Model (Attr Shared position)) -> Attr * [ Err (Attr * [ KeyNotFound ]*), Ok (Attr * (List (Attr Shared position))) ]*)", "astar" => "Attr Shared (Attr Shared (Attr Shared position, Attr Shared position -> Attr * Float), Attr Shared (Attr Shared position -> Attr * (Set (Attr * position))), Attr Shared position, Attr Shared (Model (Attr Shared position)) -> Attr * [ Err (Attr * [ KeyNotFound ]*), Ok (Attr * (List (Attr Shared position))) ]*)",
}, },
); );
});
} }
#[test] #[test]
fn load_and_typecheck_quicksort() { fn load_and_typecheck_quicksort() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "Quicksort", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "Quicksort", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"swap" => "Attr * (Attr * Int, Attr * Int, Attr * (List (Attr Shared a)) -> Attr * (List (Attr Shared a)))", "swap" => "Attr * (Attr * Int, Attr * Int, Attr * (List (Attr Shared a)) -> Attr * (List (Attr Shared a)))",
"partition" => "Attr * (Attr Shared Int, Attr Shared Int, Attr b (List (Attr Shared (Num (Attr Shared a)))) -> Attr * [ Pair (Attr * Int) (Attr b (List (Attr Shared (Num (Attr Shared a))))) ])", "partition" => "Attr * (Attr Shared Int, Attr Shared Int, Attr b (List (Attr Shared (Num (Attr Shared a)))) -> Attr * [ Pair (Attr * Int) (Attr b (List (Attr Shared (Num (Attr Shared a))))) ])",
"quicksort" => "Attr Shared (Attr b (List (Attr Shared (Num (Attr Shared a)))), Attr Shared Int, Attr Shared Int -> Attr b (List (Attr Shared (Num (Attr Shared a)))))", "quicksort" => "Attr Shared (Attr b (List (Attr Shared (Num (Attr Shared a)))), Attr Shared Int, Attr Shared Int -> Attr b (List (Attr Shared (Num (Attr Shared a)))))",
}, },
); );
});
} }
#[test] #[test]
fn load_principal_types() { fn load_principal_types() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("no_deps", "Principal", subs_by_module);
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"intVal" => "Attr * Str", "intVal" => "Attr * Str",
"identity" => "Attr * (a -> a)", "identity" => "Attr * (a -> a)",
}, },
); );
});
} }
#[test] #[test]
fn load_dep_types() { fn load_dep_types() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "Primary", subs_by_module);
let loaded_module =
load_fixture("interface_with_deps", "Primary", subs_by_module).await;
// the inferred signature for withDefault is wrong, part of the alias in alias issue. // the inferred signature for withDefault is wrong, part of the alias in alias issue.
// "withDefault" => "Attr * (Attr * (Res.Res (Attr a b) (Attr * *)), Attr a b -> Attr a b)", // "withDefault" => "Attr * (Attr * (Res.Res (Attr a b) (Attr * *)), Attr a b -> Attr a b)",
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"blah2" => "Attr * Float", "blah2" => "Attr * Float",
"blah3" => "Attr * Str", "blah3" => "Attr * Str",
"str" => "Attr * Str", "str" => "Attr * Str",
"alwaysThree" => "Attr * (* -> Attr * Str)", "alwaysThree" => "Attr * (* -> Attr * Str)",
"identity" => "Attr * (a -> a)", "identity" => "Attr * (a -> a)",
"z" => "Attr * Str", "z" => "Attr * Str",
"w" => "Attr * (Dep1.Identity (Attr * {}))", "w" => "Attr * (Dep1.Identity (Attr * {}))",
"succeed" => "Attr * (Attr b a -> Attr * (Dep1.Identity (Attr b a)))", "succeed" => "Attr * (Attr b a -> Attr * (Dep1.Identity (Attr b a)))",
"yay" => "Attr * (Res.Res (Attr * {}) (Attr * err))", "yay" => "Attr * (Res.Res (Attr * {}) (Attr * err))",
"withDefault" => "Attr * (Attr (* | b | c) (Res.Res (Attr b a) (Attr c *)), Attr b a -> Attr b a)", "withDefault" => "Attr * (Attr (* | b | c) (Res.Res (Attr b a) (Attr c *)), Attr b a -> Attr b a)",
}, },
); );
});
} }
#[test] #[test]
fn load_custom_res() { fn load_custom_res() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "Res", subs_by_module);
let loaded_module = load_fixture("interface_with_deps", "Res", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"withDefault" =>"Attr * (Attr (* | b | c) (Res (Attr b a) (Attr c err)), Attr b a -> Attr b a)", "withDefault" =>"Attr * (Attr (* | b | c) (Res (Attr b a) (Attr c err)), Attr b a -> Attr b a)",
"map" => "Attr * (Attr (* | c | d) (Res (Attr c a) (Attr d err)), Attr * (Attr c a -> Attr e b) -> Attr * (Res (Attr e b) (Attr d err)))", "map" => "Attr * (Attr (* | c | d) (Res (Attr c a) (Attr d err)), Attr * (Attr c a -> Attr e b) -> Attr * (Res (Attr e b) (Attr d err)))",
"andThen" => "Attr * (Attr (* | c | d) (Res (Attr c a) (Attr d err)), Attr * (Attr c a -> Attr f (Res (Attr e b) (Attr d err))) -> Attr f (Res (Attr e b) (Attr d err)))", "andThen" => "Attr * (Attr (* | c | d) (Res (Attr c a) (Attr d err)), Attr * (Attr c a -> Attr f (Res (Attr e b) (Attr d err))) -> Attr f (Res (Attr e b) (Attr d err)))",
}, },
); );
});
} }
#[test] #[test]
fn imported_dep_regression() { fn imported_dep_regression() {
test_async(async { let subs_by_module = MutMap::default();
let subs_by_module = MutMap::default(); let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module);
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module).await;
expect_types( expect_types(
loaded_module, loaded_module,
hashmap! { hashmap! {
"str" => "Attr * Str", "str" => "Attr * Str",
}, },
); );
});
} }
// #[test] // #[test]
@ -332,7 +311,7 @@ mod test_uniq_load {
// let subs_by_module = MutMap::default(); // let subs_by_module = MutMap::default();
// let loaded_module = // let loaded_module =
// load_fixture("interface_with_deps", "Records", subs_by_module).await; // load_fixture("interface_with_deps", "Records", subs_by_module);
// // NOTE: `a` here is unconstrained, so unifies with <type error> // // NOTE: `a` here is unconstrained, so unifies with <type error>
// let expected_types = hashmap! { // let expected_types = hashmap! {

View file

@ -10,8 +10,9 @@ pub enum LowLevel {
ListSingle, ListSingle,
ListRepeat, ListRepeat,
ListReverse, ListReverse,
ListConcat,
ListAppend, ListAppend,
ListPush, ListPrepend,
NumAdd, NumAdd,
NumSub, NumSub,
NumMul, NumMul,

View file

@ -659,7 +659,7 @@ define_builtins! {
2 LIST_IS_EMPTY: "isEmpty" 2 LIST_IS_EMPTY: "isEmpty"
3 LIST_GET: "get" 3 LIST_GET: "get"
4 LIST_SET: "set" 4 LIST_SET: "set"
5 LIST_PUSH: "push" 5 LIST_APPEND: "append"
6 LIST_MAP: "map" 6 LIST_MAP: "map"
7 LIST_LEN: "len" 7 LIST_LEN: "len"
8 LIST_FOLDL: "foldl" 8 LIST_FOLDL: "foldl"
@ -669,7 +669,7 @@ define_builtins! {
12 LIST_SINGLE: "single" 12 LIST_SINGLE: "single"
13 LIST_REPEAT: "repeat" 13 LIST_REPEAT: "repeat"
14 LIST_REVERSE: "reverse" 14 LIST_REVERSE: "reverse"
15 LIST_APPEND: "append" 15 LIST_PREPEND: "prepend"
} }
5 RESULT: "Result" => { 5 RESULT: "Result" => {
0 RESULT_RESULT: "Result" imported // the Result.Result type alias 0 RESULT_RESULT: "Result" imported // the Result.Result type alias

View file

@ -154,9 +154,8 @@ fn to_decision_tree(raw_branches: Vec<Branch>) -> DecisionTree {
match check_for_match(&branches) { match check_for_match(&branches) {
Some(goal) => DecisionTree::Match(goal), Some(goal) => DecisionTree::Match(goal),
None => { None => {
// TODO remove clone // must clone here to release the borrow on `branches`
let path = pick_path(branches.clone()); let path = pick_path(&branches).clone();
let (edges, fallback) = gather_edges(branches, &path); let (edges, fallback) = gather_edges(branches, &path);
let mut decision_edges: Vec<_> = edges let mut decision_edges: Vec<_> = edges
@ -218,43 +217,47 @@ fn flatten<'a>(
path_pattern: (Path, Guard<'a>, Pattern<'a>), path_pattern: (Path, Guard<'a>, Pattern<'a>),
path_patterns: &mut Vec<(Path, Guard<'a>, Pattern<'a>)>, path_patterns: &mut Vec<(Path, Guard<'a>, Pattern<'a>)>,
) { ) {
match &path_pattern.2 { match path_pattern.2 {
Pattern::AppliedTag { Pattern::AppliedTag {
union, union,
arguments, arguments,
tag_id, tag_id,
.. tag_name,
} => { layout,
// TODO do we need to check that guard.is_none() here? } if union.alternatives.len() == 1 => {
if union.alternatives.len() == 1 { // TODO ^ do we need to check that guard.is_none() here?
let path = path_pattern.0;
// Theory: unbox doesn't have any value for us, because one-element tag unions let path = path_pattern.0;
// don't store the tag anyway. // Theory: unbox doesn't have any value for us, because one-element tag unions
if arguments.len() == 1 { // don't store the tag anyway.
path_patterns.push(( if arguments.len() == 1 {
Path::Unbox(Box::new(path)), path_patterns.push((
path_pattern.1.clone(), Path::Unbox(Box::new(path)),
path_pattern.2.clone(), path_pattern.1.clone(),
)); Pattern::AppliedTag {
} else { union,
for (index, (arg_pattern, _)) in arguments.iter().enumerate() { arguments,
flatten( tag_id,
( tag_name,
Path::Index { layout,
index: index as u64, },
tag_id: *tag_id, ));
path: Box::new(path.clone()),
},
// same guard here?
path_pattern.1.clone(),
arg_pattern.clone(),
),
path_patterns,
);
}
}
} else { } else {
path_patterns.push(path_pattern); for (index, (arg_pattern, _)) in arguments.iter().enumerate() {
flatten(
(
Path::Index {
index: index as u64,
tag_id,
path: Box::new(path.clone()),
},
// same guard here?
path_pattern.1.clone(),
arg_pattern.clone(),
),
path_patterns,
);
}
} }
} }
@ -289,8 +292,7 @@ fn gather_edges<'a>(
branches: Vec<Branch<'a>>, branches: Vec<Branch<'a>>,
path: &Path, path: &Path,
) -> (Vec<(Test<'a>, Vec<Branch<'a>>)>, Vec<Branch<'a>>) { ) -> (Vec<(Test<'a>, Vec<Branch<'a>>)>, Vec<Branch<'a>>) {
// TODO remove clone let relevant_tests = tests_at_path(path, &branches);
let relevant_tests = tests_at_path(path, branches.clone());
let check = is_complete(&relevant_tests); let check = is_complete(&relevant_tests);
@ -314,12 +316,12 @@ fn gather_edges<'a>(
/// FIND RELEVANT TESTS /// FIND RELEVANT TESTS
fn tests_at_path<'a>(selected_path: &Path, branches: Vec<Branch<'a>>) -> Vec<Test<'a>> { fn tests_at_path<'a>(selected_path: &Path, branches: &[Branch<'a>]) -> Vec<Test<'a>> {
// NOTE the ordering of the result is important! // NOTE the ordering of the result is important!
let mut all_tests = Vec::new(); let mut all_tests = Vec::new();
for branch in branches.into_iter() { for branch in branches {
test_at_path(selected_path, branch, &mut all_tests); test_at_path(selected_path, branch, &mut all_tests);
} }
@ -348,7 +350,7 @@ fn tests_at_path<'a>(selected_path: &Path, branches: Vec<Branch<'a>>) -> Vec<Tes
unique unique
} }
fn test_at_path<'a>(selected_path: &Path, branch: Branch<'a>, all_tests: &mut Vec<Test<'a>>) { fn test_at_path<'a>(selected_path: &Path, branch: &Branch<'a>, all_tests: &mut Vec<Test<'a>>) {
use Pattern::*; use Pattern::*;
use Test::*; use Test::*;
@ -466,7 +468,7 @@ fn edges_for<'a>(
) -> (Test<'a>, Vec<Branch<'a>>) { ) -> (Test<'a>, Vec<Branch<'a>>) {
let mut new_branches = Vec::new(); let mut new_branches = Vec::new();
for branch in branches.into_iter() { for branch in branches.iter() {
to_relevant_branch(&test, path, branch, &mut new_branches); to_relevant_branch(&test, path, branch, &mut new_branches);
} }
@ -476,13 +478,13 @@ fn edges_for<'a>(
fn to_relevant_branch<'a>( fn to_relevant_branch<'a>(
test: &Test<'a>, test: &Test<'a>,
path: &Path, path: &Path,
branch: Branch<'a>, branch: &Branch<'a>,
new_branches: &mut Vec<Branch<'a>>, new_branches: &mut Vec<Branch<'a>>,
) { ) {
// TODO remove clone // TODO remove clone
match extract(path, branch.patterns.clone()) { match extract(path, branch.patterns.clone()) {
Extract::NotFound => { Extract::NotFound => {
new_branches.push(branch); new_branches.push(branch.clone());
} }
Extract::Found { Extract::Found {
start, start,
@ -518,7 +520,7 @@ fn to_relevant_branch_help<'a>(
path: &Path, path: &Path,
mut start: Vec<(Path, Guard<'a>, Pattern<'a>)>, mut start: Vec<(Path, Guard<'a>, Pattern<'a>)>,
end: Vec<(Path, Guard<'a>, Pattern<'a>)>, end: Vec<(Path, Guard<'a>, Pattern<'a>)>,
branch: Branch<'a>, branch: &Branch<'a>,
guard: Guard<'a>, guard: Guard<'a>,
pattern: Pattern<'a>, pattern: Pattern<'a>,
) -> Option<Branch<'a>> { ) -> Option<Branch<'a>> {
@ -526,7 +528,7 @@ fn to_relevant_branch_help<'a>(
use Test::*; use Test::*;
match pattern { match pattern {
Identifier(_) | Underscore | Shadowed(_, _) | UnsupportedPattern(_) => Some(branch), Identifier(_) | Underscore | Shadowed(_, _) | UnsupportedPattern(_) => Some(branch.clone()),
RecordDestructure(destructs, _) => match test { RecordDestructure(destructs, _) => match test {
IsCtor { IsCtor {
@ -689,19 +691,14 @@ fn extract<'a>(
) -> Extract<'a> { ) -> Extract<'a> {
let mut start = Vec::new(); let mut start = Vec::new();
// TODO remove this clone
let mut copy = path_patterns.clone();
// TODO potential ordering problem // TODO potential ordering problem
for (index, current) in path_patterns.into_iter().enumerate() { let mut it = path_patterns.into_iter();
while let Some(current) = it.next() {
if &current.0 == selected_path { if &current.0 == selected_path {
return Extract::Found { return Extract::Found {
start, start,
found_pattern: (current.1, current.2), found_pattern: (current.1, current.2),
end: { end: it.collect::<Vec<_>>(),
copy.drain(0..=index);
copy
},
}; };
} else { } else {
start.push(current); start.push(current);
@ -742,22 +739,27 @@ fn needs_tests<'a>(pattern: &Pattern<'a>) -> bool {
/// PICK A PATH /// PICK A PATH
fn pick_path(branches: Vec<Branch>) -> Path { fn pick_path<'a>(branches: &'a [Branch]) -> &'a Path {
// TODO remove this clone let mut all_paths = Vec::with_capacity(branches.len());
let all_paths = branches
.clone()
.into_iter()
.map(|v| v.patterns)
.flatten()
.filter_map(is_choice_path);
let mut by_small_defaults = bests_by_small_defaults(&branches, all_paths); // is choice path
for branch in branches {
for (path, guard, pattern) in &branch.patterns {
if !guard.is_none() || needs_tests(&pattern) {
all_paths.push(path);
} else {
// do nothing
}
}
}
let mut by_small_defaults = bests_by_small_defaults(branches, all_paths.into_iter());
if by_small_defaults.len() == 1 { if by_small_defaults.len() == 1 {
by_small_defaults.remove(0) by_small_defaults.remove(0)
} else { } else {
debug_assert!(!by_small_defaults.is_empty()); debug_assert!(!by_small_defaults.is_empty());
let mut result = bests_by_small_branching_factor(&branches, by_small_defaults.into_iter()); let mut result = bests_by_small_branching_factor(branches, by_small_defaults.into_iter());
match result.pop() { match result.pop() {
None => unreachable!("bests_by will always return at least one value in the vec"), None => unreachable!("bests_by will always return at least one value in the vec"),
@ -766,33 +768,23 @@ fn pick_path(branches: Vec<Branch>) -> Path {
} }
} }
fn is_choice_path<'a>(path_and_pattern: (Path, Guard<'a>, Pattern<'a>)) -> Option<Path> { fn bests_by_small_branching_factor<'a, I>(branches: &[Branch], mut all_paths: I) -> Vec<&'a Path>
let (path, guard, pattern) = path_and_pattern;
if !guard.is_none() || needs_tests(&pattern) {
Some(path)
} else {
None
}
}
fn bests_by_small_branching_factor<I>(branches: &Vec<Branch>, mut all_paths: I) -> Vec<Path>
where where
I: Iterator<Item = Path>, I: Iterator<Item = &'a Path>,
{ {
match all_paths.next() { match all_paths.next() {
None => panic!("Cannot choose the best of zero paths. This should never happen."), None => panic!("Cannot choose the best of zero paths. This should never happen."),
Some(first_path) => { Some(first_path) => {
let mut min_weight = small_branching_factor(branches, &first_path); let mut min_weight = small_branching_factor(branches, first_path);
let mut min_paths = vec![first_path]; let mut min_paths = vec![first_path];
for path in all_paths { for path in all_paths {
let weight = small_branching_factor(branches, &path); let weight = small_branching_factor(branches, path);
use std::cmp::Ordering; use std::cmp::Ordering;
match weight.cmp(&min_weight) { match weight.cmp(&min_weight) {
Ordering::Equal => { Ordering::Equal => {
min_paths.push(path.clone()); min_paths.push(path);
} }
Ordering::Less => { Ordering::Less => {
min_weight = weight; min_weight = weight;
@ -808,14 +800,14 @@ where
} }
} }
fn bests_by_small_defaults<I>(branches: &Vec<Branch>, mut all_paths: I) -> Vec<Path> fn bests_by_small_defaults<'a, I>(branches: &[Branch], mut all_paths: I) -> Vec<&'a Path>
where where
I: Iterator<Item = Path>, I: Iterator<Item = &'a Path>,
{ {
match all_paths.next() { match all_paths.next() {
None => panic!("Cannot choose the best of zero paths. This should never happen."), None => panic!("Cannot choose the best of zero paths. This should never happen."),
Some(first_path) => { Some(first_path) => {
let mut min_weight = small_defaults(branches, &first_path); let mut min_weight = small_defaults(branches, first_path);
let mut min_paths = vec![first_path]; let mut min_paths = vec![first_path];
for path in all_paths { for path in all_paths {
@ -824,7 +816,7 @@ where
use std::cmp::Ordering; use std::cmp::Ordering;
match weight.cmp(&min_weight) { match weight.cmp(&min_weight) {
Ordering::Equal => { Ordering::Equal => {
min_paths.push(path.clone()); min_paths.push(path);
} }
Ordering::Less => { Ordering::Less => {
min_weight = weight; min_weight = weight;
@ -842,7 +834,7 @@ where
/// PATH PICKING HEURISTICS /// PATH PICKING HEURISTICS
fn small_defaults(branches: &Vec<Branch>, path: &Path) -> usize { fn small_defaults(branches: &[Branch], path: &Path) -> usize {
branches branches
.iter() .iter()
.filter(|b| is_irrelevant_to(path, b)) .filter(|b| is_irrelevant_to(path, b))
@ -850,7 +842,7 @@ fn small_defaults(branches: &Vec<Branch>, path: &Path) -> usize {
.sum() .sum()
} }
fn small_branching_factor(branches: &Vec<Branch>, path: &Path) -> usize { fn small_branching_factor(branches: &[Branch], path: &Path) -> usize {
// TODO remove clone // TODO remove clone
let (edges, fallback) = gather_edges(branches.to_vec(), path); let (edges, fallback) = gather_edges(branches.to_vec(), path);

View file

@ -53,7 +53,6 @@ fn simplify<'a>(pattern: &crate::ir::Pattern<'a>) -> Pattern {
StrLiteral(v) => Literal(Literal::Str(v.clone())), StrLiteral(v) => Literal(Literal::Str(v.clone())),
// To make sure these are exhaustive, we have to "fake" a union here // To make sure these are exhaustive, we have to "fake" a union here
// TODO: use the hash or some other integer to discriminate between constructors
BitLiteral { value, union, .. } => Ctor(union.clone(), TagId(*value as u8), vec![]), BitLiteral { value, union, .. } => Ctor(union.clone(), TagId(*value as u8), vec![]),
EnumLiteral { tag_id, union, .. } => Ctor(union.clone(), TagId(*tag_id), vec![]), EnumLiteral { tag_id, union, .. } => Ctor(union.clone(), TagId(*tag_id), vec![]),
@ -217,7 +216,7 @@ fn is_exhaustive(matrix: &PatternMatrix, n: usize) -> PatternMatrix {
let last: _ = alt_list let last: _ = alt_list
.iter() .iter()
.filter_map(|r| is_missing(alts.clone(), ctors.clone(), r)); .filter_map(|r| is_missing(alts.clone(), &ctors, r));
let mut result = Vec::new(); let mut result = Vec::new();
@ -257,7 +256,7 @@ fn is_exhaustive(matrix: &PatternMatrix, n: usize) -> PatternMatrix {
} }
} }
fn is_missing<T>(union: Union, ctors: MutMap<TagId, T>, ctor: &Ctor) -> Option<Pattern> { fn is_missing<T>(union: Union, ctors: &MutMap<TagId, T>, ctor: &Ctor) -> Option<Pattern> {
let Ctor { arity, tag_id, .. } = ctor; let Ctor { arity, tag_id, .. } = ctor;
if ctors.contains_key(tag_id) { if ctors.contains_key(tag_id) {
@ -336,7 +335,7 @@ fn to_nonredundant_rows<'a>(
vec![simplify(&loc_pat.value)] vec![simplify(&loc_pat.value)]
}; };
if is_useful(&checked_rows, &next_row) { if is_useful(checked_rows.clone(), next_row.clone()) {
checked_rows.push(next_row); checked_rows.push(next_row);
} else { } else {
return Err(Error::Redundant { return Err(Error::Redundant {
@ -351,83 +350,142 @@ fn to_nonredundant_rows<'a>(
} }
/// Check if a new row "vector" is useful given previous rows "matrix" /// Check if a new row "vector" is useful given previous rows "matrix"
fn is_useful(matrix: &PatternMatrix, vector: &Row) -> bool { fn is_useful(mut old_matrix: PatternMatrix, mut vector: Row) -> bool {
if matrix.is_empty() { let mut matrix = Vec::with_capacity(old_matrix.len());
// No rows are the same as the new vector! The vector is useful!
true
} else if vector.is_empty() {
// There is nothing left in the new vector, but we still have
// rows that match the same things. This is not a useful vector!
false
} else {
// NOTE: if there are bugs in this code, look at the ordering of the row/matrix
let mut vector = vector.clone();
let first_pattern = vector.remove(0);
let patterns = vector;
match first_pattern { // this loop ping-pongs the rows between old_matrix and matrix
// keep checking rows that start with this Ctor or Anything 'outer: loop {
Ctor(_, id, args) => { match vector.pop() {
let new_matrix: Vec<_> = matrix _ if old_matrix.is_empty() => {
.iter() // No rows are the same as the new vector! The vector is useful!
.filter_map(|r| specialize_row_by_ctor(id, args.len(), r)) break true;
.collect();
let mut new_row = Vec::new();
new_row.extend(patterns);
new_row.extend(args);
is_useful(&new_matrix, &new_row)
} }
None => {
// There is nothing left in the new vector, but we still have
// rows that match the same things. This is not a useful vector!
break false;
}
Some(first_pattern) => {
// NOTE: if there are bugs in this code, look at the ordering of the row/matrix
Anything => { match first_pattern {
// check if all alts appear in matrix // keep checking rows that start with this Ctor or Anything
match is_complete(matrix) { Ctor(_, id, args) => {
Complete::No => { specialize_row_by_ctor2(id, args.len(), &mut old_matrix, &mut matrix);
// This Anything is useful because some Ctors are missing.
// But what if a previous row has an Anything?
// If so, this one is not useful.
let new_matrix: Vec<_> = matrix
.iter()
.filter_map(|r| specialize_row_by_anything(r))
.collect();
is_useful(&new_matrix, &patterns) std::mem::swap(&mut old_matrix, &mut matrix);
vector.extend(args);
} }
Complete::Yes(alts) => {
// All Ctors are covered, so this Anything is not needed for any
// of those. But what if some of those Ctors have subpatterns
// that make them less general? If so, this actually is useful!
let is_useful_alt = |Ctor { arity, tag_id, .. }| {
let new_matrix = matrix
.iter()
.filter_map(|r| specialize_row_by_ctor(tag_id, arity, r))
.collect();
let mut new_row: Vec<Pattern> =
std::iter::repeat(Anything).take(arity).collect::<Vec<_>>();
new_row.extend(patterns.clone()); Anything => {
// check if all alternatives appear in matrix
match is_complete(&old_matrix) {
Complete::No => {
// This Anything is useful because some Ctors are missing.
// But what if a previous row has an Anything?
// If so, this one is not useful.
for mut row in old_matrix.drain(..) {
if let Some(Anything) = row.pop() {
matrix.push(row);
}
}
is_useful(&new_matrix, &new_row) std::mem::swap(&mut old_matrix, &mut matrix);
}; }
Complete::Yes(alternatives) => {
// All Ctors are covered, so this Anything is not needed for any
// of those. But what if some of those Ctors have subpatterns
// that make them less general? If so, this actually is useful!
for alternative in alternatives {
let Ctor { arity, tag_id, .. } = alternative;
alts.iter().cloned().any(is_useful_alt) let mut old_matrix = old_matrix.clone();
let mut matrix = vec![];
specialize_row_by_ctor2(
tag_id,
arity,
&mut old_matrix,
&mut matrix,
);
let mut vector = vector.clone();
vector.extend(std::iter::repeat(Anything).take(arity));
if is_useful(matrix, vector) {
break 'outer true;
}
}
break false;
}
}
}
Literal(literal) => {
// keep checking rows that start with this Literal or Anything
for mut row in old_matrix.drain(..) {
let head = row.pop();
let patterns = row;
match head {
Some(Literal(lit)) => {
if lit == literal {
matrix.push(patterns);
} else {
// do nothing
}
}
Some(Anything) => matrix.push(patterns),
Some(Ctor(_, _, _)) => panic!(
r#"Compiler bug! After type checking, constructors and literals should never align in pattern match exhaustiveness checks."#
),
None => panic!(
"Compiler error! Empty matrices should not get specialized."
),
}
}
std::mem::swap(&mut old_matrix, &mut matrix);
} }
} }
} }
Literal(literal) => {
// keep checking rows that start with this Literal or Anything
let new_matrix = matrix
.iter()
.filter_map(|r| specialize_row_by_literal(&literal, r))
.collect();
is_useful(&new_matrix, &patterns)
}
} }
} }
} }
/// INVARIANT: (length row == N) ==> (length result == arity + N - 1)
fn specialize_row_by_ctor2(
tag_id: TagId,
arity: usize,
old_matrix: &mut PatternMatrix,
matrix: &mut PatternMatrix,
) {
for mut row in old_matrix.drain(..) {
let head = row.pop();
let mut patterns = row;
match head {
Some(Ctor(_, id, args)) =>
if id == tag_id {
patterns.extend(args);
matrix.push(patterns);
} else {
// do nothing
}
Some(Anything) => {
// TODO order!
patterns.extend(std::iter::repeat(Anything).take(arity));
matrix.push(patterns);
}
Some(Literal(_)) => panic!( "Compiler bug! After type checking, constructors and literal should never align in pattern match exhaustiveness checks."),
None => panic!("Compiler error! Empty matrices should not get specialized."),
}
}
}
/// INVARIANT: (length row == N) ==> (length result == arity + N - 1) /// INVARIANT: (length row == N) ==> (length result == arity + N - 1)
fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row> { fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row> {
let mut row = row.clone(); let mut row = row.clone();
@ -436,7 +494,7 @@ fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row>
let patterns = row; let patterns = row;
match head { match head {
Some(Ctor(_, id, args)) => Some(Ctor(_, id, args)) => {
if id == tag_id { if id == tag_id {
// TODO order! // TODO order!
let mut new_patterns = Vec::new(); let mut new_patterns = Vec::new();
@ -446,38 +504,18 @@ fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row>
} else { } else {
None None
} }
}
Some(Anything) => { Some(Anything) => {
// TODO order! // TODO order!
let new_patterns = let new_patterns = std::iter::repeat(Anything)
std::iter::repeat(Anything).take(arity).chain(patterns).collect(); .take(arity)
.chain(patterns)
.collect();
Some(new_patterns) Some(new_patterns)
}
Some(Literal(_)) => panic!( "Compiler bug! After type checking, constructors and literal should never align in pattern match exhaustiveness checks."),
None => panic!("Compiler error! Empty matrices should not get specialized."),
}
}
/// INVARIANT: (length row == N) ==> (length result == N-1)
fn specialize_row_by_literal(literal: &Literal, row: &Row) -> Option<Row> {
let mut row = row.clone();
let head = row.pop();
let patterns = row;
match head {
Some(Literal(lit)) => {
if &lit == literal {
Some(patterns)
} else {
None
}
} }
Some(Anything) => Some(patterns), Some(Literal(_)) => unreachable!(
r#"Compiler bug! After type checking, a constructor can never align with a literal: that should be a type error!"#
Some(Ctor(_, _, _)) => panic!(
r#"Compiler bug! After type checking, constructors and literals should never align in pattern match exhaustiveness checks."#
), ),
None => panic!("Compiler error! Empty matrices should not get specialized."), None => panic!("Compiler error! Empty matrices should not get specialized."),
} }
} }
@ -501,14 +539,14 @@ pub enum Complete {
fn is_complete(matrix: &PatternMatrix) -> Complete { fn is_complete(matrix: &PatternMatrix) -> Complete {
let ctors = collect_ctors(matrix); let ctors = collect_ctors(matrix);
let length = ctors.len();
let mut it = ctors.values(); let mut it = ctors.into_iter();
match it.next() { match it.next() {
None => Complete::No, None => Complete::No,
Some(Union { alternatives, .. }) => { Some((_, Union { alternatives, .. })) => {
if ctors.len() == alternatives.len() { if length == alternatives.len() {
Complete::Yes(alternatives.to_vec()) Complete::Yes(alternatives)
} else { } else {
Complete::No Complete::No
} }

View file

@ -168,7 +168,7 @@ impl<'a> Procs<'a> {
// by the surrounding context, so we can add pending specializations // by the surrounding context, so we can add pending specializations
// for them immediately. // for them immediately.
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, annotation, env.subs, env.pointer_size) .from_var(env.arena, annotation, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
// if we've already specialized this one, no further work is needed. // if we've already specialized this one, no further work is needed.
@ -306,7 +306,6 @@ pub struct Env<'a, 'i> {
pub problems: &'i mut std::vec::Vec<MonoProblem>, pub problems: &'i mut std::vec::Vec<MonoProblem>,
pub home: ModuleId, pub home: ModuleId,
pub ident_ids: &'i mut IdentIds, pub ident_ids: &'i mut IdentIds,
pub pointer_size: u32,
pub jump_counter: &'a mut u64, pub jump_counter: &'a mut u64,
} }
@ -976,13 +975,13 @@ fn specialize<'a>(
); );
for (arg_var, arg_name) in pattern_vars.iter().zip(pattern_symbols.iter()) { for (arg_var, arg_name) in pattern_vars.iter().zip(pattern_symbols.iter()) {
let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs, env.pointer_size)?; let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs)?;
proc_args.push((layout, *arg_name)); proc_args.push((layout, *arg_name));
} }
let ret_layout = layout_cache let ret_layout = layout_cache
.from_var(&env.arena, ret_var, env.subs, env.pointer_size) .from_var(&env.arena, ret_var, env.subs)
.unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err)); .unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err));
// TODO WRONG // TODO WRONG
@ -1107,12 +1106,7 @@ pub fn with_hole<'a>(
use crate::layout::UnionVariant::*; use crate::layout::UnionVariant::*;
let arena = env.arena; let arena = env.arena;
let variant = crate::layout::union_sorted_tags( let variant = crate::layout::union_sorted_tags(env.arena, variant_var, env.subs);
env.arena,
variant_var,
env.subs,
env.pointer_size,
);
match variant { match variant {
Never => unreachable!("The `[]` type has no constructors"), Never => unreachable!("The `[]` type has no constructors"),
@ -1150,7 +1144,7 @@ pub fn with_hole<'a>(
// Layout will unpack this unwrapped tack if it only has one (non-zero-sized) field // Layout will unpack this unwrapped tack if it only has one (non-zero-sized) field
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, variant_var, env.subs, env.pointer_size) .from_var(env.arena, variant_var, env.subs)
.unwrap_or_else(|err| { .unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err) panic!("TODO turn fn_var into a RuntimeError {:?}", err)
}); });
@ -1255,12 +1249,7 @@ pub fn with_hole<'a>(
mut fields, mut fields,
.. ..
} => { } => {
let sorted_fields = crate::layout::sort_record_fields( let sorted_fields = crate::layout::sort_record_fields(env.arena, record_var, env.subs);
env.arena,
record_var,
env.subs,
env.pointer_size,
);
let mut field_symbols = Vec::with_capacity_in(fields.len(), env.arena); let mut field_symbols = Vec::with_capacity_in(fields.len(), env.arena);
let mut field_layouts = Vec::with_capacity_in(fields.len(), env.arena); let mut field_layouts = Vec::with_capacity_in(fields.len(), env.arena);
@ -1281,7 +1270,7 @@ pub fn with_hole<'a>(
// creating a record from the var will unpack it if it's just a single field. // creating a record from the var will unpack it if it's just a single field.
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, record_var, env.subs, env.pointer_size) .from_var(env.arena, record_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let field_symbols = field_symbols.into_bump_slice(); let field_symbols = field_symbols.into_bump_slice();
@ -1313,10 +1302,10 @@ pub fn with_hole<'a>(
final_else, final_else,
} => { } => {
let ret_layout = layout_cache let ret_layout = layout_cache
.from_var(env.arena, branch_var, env.subs, env.pointer_size) .from_var(env.arena, branch_var, env.subs)
.expect("invalid ret_layout"); .expect("invalid ret_layout");
let cond_layout = layout_cache let cond_layout = layout_cache
.from_var(env.arena, cond_var, env.subs, env.pointer_size) .from_var(env.arena, cond_var, env.subs)
.expect("invalid cond_layout"); .expect("invalid cond_layout");
let assigned_in_jump = env.unique_symbol(); let assigned_in_jump = env.unique_symbol();
@ -1367,7 +1356,7 @@ pub fn with_hole<'a>(
} }
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, branch_var, env.subs, env.pointer_size) .from_var(env.arena, branch_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let param = Param { let param = Param {
@ -1426,7 +1415,7 @@ pub fn with_hole<'a>(
}; };
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, expr_var, env.subs, env.pointer_size) .from_var(env.arena, expr_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let param = Param { let param = Param {
@ -1464,7 +1453,7 @@ pub fn with_hole<'a>(
let arg_symbols = arg_symbols.into_bump_slice(); let arg_symbols = arg_symbols.into_bump_slice();
let elem_layout = layout_cache let elem_layout = layout_cache
.from_var(env.arena, elem_var, env.subs, env.pointer_size) .from_var(env.arena, elem_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let expr = Expr::Array { let expr = Expr::Array {
@ -1508,12 +1497,7 @@ pub fn with_hole<'a>(
loc_expr, loc_expr,
.. ..
} => { } => {
let sorted_fields = crate::layout::sort_record_fields( let sorted_fields = crate::layout::sort_record_fields(env.arena, record_var, env.subs);
env.arena,
record_var,
env.subs,
env.pointer_size,
);
let mut index = None; let mut index = None;
let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena); let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena);
@ -1540,7 +1524,7 @@ pub fn with_hole<'a>(
}; };
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, field_var, env.subs, env.pointer_size) .from_var(env.arena, field_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let mut stmt = Stmt::Let(assigned, expr, layout, hole); let mut stmt = Stmt::Let(assigned, expr, layout, hole);
@ -1646,7 +1630,7 @@ pub fn with_hole<'a>(
} }
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, fn_var, env.subs, env.pointer_size) .from_var(env.arena, fn_var, env.subs)
.unwrap_or_else(|err| { .unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err) panic!("TODO turn fn_var into a RuntimeError {:?}", err)
}); });
@ -1657,7 +1641,7 @@ pub fn with_hole<'a>(
}; };
let ret_layout = layout_cache let ret_layout = layout_cache
.from_var(env.arena, ret_var, env.subs, env.pointer_size) .from_var(env.arena, ret_var, env.subs)
.unwrap_or_else(|err| { .unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err) panic!("TODO turn fn_var into a RuntimeError {:?}", err)
}); });
@ -1720,7 +1704,7 @@ pub fn with_hole<'a>(
// layout of the return type // layout of the return type
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, ret_var, env.subs, env.pointer_size) .from_var(env.arena, ret_var, env.subs)
.unwrap_or_else(|err| todo!("TODO turn fn_var into a RuntimeError {:?}", err)); .unwrap_or_else(|err| todo!("TODO turn fn_var into a RuntimeError {:?}", err));
let mut result = Stmt::Let(assigned, Expr::RunLowLevel(op, arg_symbols), layout, hole); let mut result = Stmt::Let(assigned, Expr::RunLowLevel(op, arg_symbols), layout, hole);
@ -1857,7 +1841,7 @@ pub fn from_can<'a>(
} }
let layout = layout_cache let layout = layout_cache
.from_var(env.arena, def.expr_var, env.subs, env.pointer_size) .from_var(env.arena, def.expr_var, env.subs)
.expect("invalid layout"); .expect("invalid layout");
// convert the continuation // convert the continuation
@ -1998,11 +1982,11 @@ fn from_can_when<'a>(
let opt_branches = to_opt_branches(env, region, branches, layout_cache); let opt_branches = to_opt_branches(env, region, branches, layout_cache);
let cond_layout = layout_cache let cond_layout = layout_cache
.from_var(env.arena, cond_var, env.subs, env.pointer_size) .from_var(env.arena, cond_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err));
let ret_layout = layout_cache let ret_layout = layout_cache
.from_var(env.arena, expr_var, env.subs, env.pointer_size) .from_var(env.arena, expr_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err)); .unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err));
let arena = env.arena; let arena = env.arena;
@ -2291,7 +2275,7 @@ fn call_by_name<'a>(
hole: &'a Stmt<'a>, hole: &'a Stmt<'a>,
) -> Stmt<'a> { ) -> Stmt<'a> {
// Register a pending_specialization for this function // Register a pending_specialization for this function
match layout_cache.from_var(env.arena, fn_var, env.subs, env.pointer_size) { match layout_cache.from_var(env.arena, fn_var, env.subs) {
Ok(layout) => { Ok(layout) => {
// Build the CallByName node // Build the CallByName node
let arena = env.arena; let arena = env.arena;
@ -2309,7 +2293,7 @@ fn call_by_name<'a>(
let field_symbols = field_symbols.into_bump_slice(); let field_symbols = field_symbols.into_bump_slice();
for (var, _) in &loc_args { for (var, _) in &loc_args {
match layout_cache.from_var(&env.arena, *var, &env.subs, env.pointer_size) { match layout_cache.from_var(&env.arena, *var, &env.subs) {
Ok(_) => { Ok(_) => {
pattern_vars.push(*var); pattern_vars.push(*var);
} }
@ -2591,8 +2575,7 @@ pub fn from_can_pattern<'a>(
use crate::exhaustive::Union; use crate::exhaustive::Union;
use crate::layout::UnionVariant::*; use crate::layout::UnionVariant::*;
let variant = let variant = crate::layout::union_sorted_tags(env.arena, *whole_var, env.subs);
crate::layout::union_sorted_tags(env.arena, *whole_var, env.subs, env.pointer_size);
match variant { match variant {
Never => unreachable!("there is no pattern of type `[]`"), Never => unreachable!("there is no pattern of type `[]`"),
@ -2745,12 +2728,7 @@ pub fn from_can_pattern<'a>(
let mut it = destructs.iter(); let mut it = destructs.iter();
let mut opt_destruct = it.next(); let mut opt_destruct = it.next();
let sorted_fields = crate::layout::sort_record_fields( let sorted_fields = crate::layout::sort_record_fields(env.arena, *whole_var, env.subs);
env.arena,
*whole_var,
env.subs,
env.pointer_size,
);
let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena); let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena);

View file

@ -64,17 +64,12 @@ pub enum Builtin<'a> {
} }
impl<'a> Layout<'a> { impl<'a> Layout<'a> {
pub fn new( pub fn new(arena: &'a Bump, content: Content, subs: &Subs) -> Result<Self, LayoutProblem> {
arena: &'a Bump,
content: Content,
subs: &Subs,
pointer_size: u32,
) -> Result<Self, LayoutProblem> {
use roc_types::subs::Content::*; use roc_types::subs::Content::*;
match content { match content {
FlexVar(_) | RigidVar(_) => Err(LayoutProblem::UnresolvedTypeVar), FlexVar(_) | RigidVar(_) => Err(LayoutProblem::UnresolvedTypeVar),
Structure(flat_type) => layout_from_flat_type(arena, flat_type, subs, pointer_size), Structure(flat_type) => layout_from_flat_type(arena, flat_type, subs),
Alias(Symbol::NUM_INT, args, _) => { Alias(Symbol::NUM_INT, args, _) => {
debug_assert!(args.is_empty()); debug_assert!(args.is_empty());
@ -84,12 +79,7 @@ impl<'a> Layout<'a> {
debug_assert!(args.is_empty()); debug_assert!(args.is_empty());
Ok(Layout::Builtin(Builtin::Float64)) Ok(Layout::Builtin(Builtin::Float64))
} }
Alias(_, _, var) => Self::new( Alias(_, _, var) => Self::new(arena, subs.get_without_compacting(var).content, subs),
arena,
subs.get_without_compacting(var).content,
subs,
pointer_size,
),
Error => Err(LayoutProblem::Erroneous), Error => Err(LayoutProblem::Erroneous),
} }
} }
@ -97,15 +87,10 @@ impl<'a> Layout<'a> {
/// Returns Err(()) if given an error, or Ok(Layout) if given a non-erroneous Structure. /// Returns Err(()) if given an error, or Ok(Layout) if given a non-erroneous Structure.
/// Panics if given a FlexVar or RigidVar, since those should have been /// Panics if given a FlexVar or RigidVar, since those should have been
/// monomorphized away already! /// monomorphized away already!
fn from_var( fn from_var(arena: &'a Bump, var: Variable, subs: &Subs) -> Result<Self, LayoutProblem> {
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> Result<Self, LayoutProblem> {
let content = subs.get_without_compacting(var).content; let content = subs.get_without_compacting(var).content;
Self::new(arena, content, subs, pointer_size) Self::new(arena, content, subs)
} }
pub fn safe_to_memcpy(&self) -> bool { pub fn safe_to_memcpy(&self) -> bool {
@ -130,6 +115,13 @@ impl<'a> Layout<'a> {
} }
} }
pub fn is_zero_sized(&self) -> bool {
// For this calculation, we don't need an accurate
// stack size, we just need to know whether it's zero,
// so it's fine to use a pointer size of 1.
self.stack_size(1) == 0
}
pub fn stack_size(&self, pointer_size: u32) -> u32 { pub fn stack_size(&self, pointer_size: u32) -> u32 {
use Layout::*; use Layout::*;
@ -175,7 +167,6 @@ impl<'a> LayoutCache<'a> {
arena: &'a Bump, arena: &'a Bump,
var: Variable, var: Variable,
subs: &Subs, subs: &Subs,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> { ) -> Result<Layout<'a>, LayoutProblem> {
// Store things according to the root Variable, to avoid duplicate work. // Store things according to the root Variable, to avoid duplicate work.
let var = subs.get_root_key_without_compacting(var); let var = subs.get_root_key_without_compacting(var);
@ -185,7 +176,7 @@ impl<'a> LayoutCache<'a> {
.or_insert_with(|| { .or_insert_with(|| {
let content = subs.get_without_compacting(var).content; let content = subs.get_without_compacting(var).content;
Layout::new(arena, content, subs, pointer_size) Layout::new(arena, content, subs)
}) })
.clone() .clone()
} }
@ -252,7 +243,6 @@ fn layout_from_flat_type<'a>(
arena: &'a Bump, arena: &'a Bump,
flat_type: FlatType, flat_type: FlatType,
subs: &Subs, subs: &Subs,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> { ) -> Result<Layout<'a>, LayoutProblem> {
use roc_types::subs::FlatType::*; use roc_types::subs::FlatType::*;
@ -277,7 +267,7 @@ fn layout_from_flat_type<'a>(
layout_from_num_content(content) layout_from_num_content(content)
} }
Symbol::STR_STR => Ok(Layout::Builtin(Builtin::Str)), Symbol::STR_STR => Ok(Layout::Builtin(Builtin::Str)),
Symbol::LIST_LIST => list_layout_from_elem(arena, subs, args[0], pointer_size), Symbol::LIST_LIST => list_layout_from_elem(arena, subs, args[0]),
Symbol::ATTR_ATTR => { Symbol::ATTR_ATTR => {
debug_assert_eq!(args.len(), 2); debug_assert_eq!(args.len(), 2);
@ -288,7 +278,7 @@ fn layout_from_flat_type<'a>(
// For now, layout is unaffected by uniqueness. // For now, layout is unaffected by uniqueness.
// (Incorporating refcounting may change this.) // (Incorporating refcounting may change this.)
// Unwrap and continue // Unwrap and continue
Layout::from_var(arena, wrapped_var, subs, pointer_size) Layout::from_var(arena, wrapped_var, subs)
} }
_ => { _ => {
panic!("TODO layout_from_flat_type for {:?}", Apply(symbol, args)); panic!("TODO layout_from_flat_type for {:?}", Apply(symbol, args));
@ -301,11 +291,11 @@ fn layout_from_flat_type<'a>(
for arg_var in args { for arg_var in args {
let arg_content = subs.get_without_compacting(arg_var).content; let arg_content = subs.get_without_compacting(arg_var).content;
fn_args.push(Layout::new(arena, arg_content, subs, pointer_size)?); fn_args.push(Layout::new(arena, arg_content, subs)?);
} }
let ret_content = subs.get_without_compacting(ret_var).content; let ret_content = subs.get_without_compacting(ret_var).content;
let ret = Layout::new(arena, ret_content, subs, pointer_size)?; let ret = Layout::new(arena, ret_content, subs)?;
Ok(Layout::FunctionPointer( Ok(Layout::FunctionPointer(
fn_args.into_bump_slice(), fn_args.into_bump_slice(),
@ -333,10 +323,10 @@ fn layout_from_flat_type<'a>(
let field_var = field.into_inner(); let field_var = field.into_inner();
let field_content = subs.get_without_compacting(field_var).content; let field_content = subs.get_without_compacting(field_var).content;
match Layout::new(arena, field_content, subs, pointer_size) { match Layout::new(arena, field_content, subs) {
Ok(layout) => { Ok(layout) => {
// Drop any zero-sized fields like {} // Drop any zero-sized fields like {}.
if layout.stack_size(pointer_size) != 0 { if !layout.is_zero_sized() {
layouts.push(layout); layouts.push(layout);
} }
} }
@ -358,7 +348,7 @@ fn layout_from_flat_type<'a>(
TagUnion(tags, ext_var) => { TagUnion(tags, ext_var) => {
debug_assert!(ext_var_is_empty_tag_union(subs, ext_var)); debug_assert!(ext_var_is_empty_tag_union(subs, ext_var));
Ok(layout_from_tag_union(arena, tags, subs, pointer_size)) Ok(layout_from_tag_union(arena, tags, subs))
} }
RecursiveTagUnion(_rec_var, _tags, _ext_var) => { RecursiveTagUnion(_rec_var, _tags, _ext_var) => {
panic!("TODO make Layout for empty RecursiveTagUnion"); panic!("TODO make Layout for empty RecursiveTagUnion");
@ -378,7 +368,6 @@ pub fn sort_record_fields<'a>(
arena: &'a Bump, arena: &'a Bump,
var: Variable, var: Variable,
subs: &Subs, subs: &Subs,
pointer_size: u32,
) -> Vec<'a, (Lowercase, Layout<'a>)> { ) -> Vec<'a, (Lowercase, Layout<'a>)> {
let mut fields_map = MutMap::default(); let mut fields_map = MutMap::default();
@ -389,11 +378,10 @@ pub fn sort_record_fields<'a>(
for (label, field) in fields_map { for (label, field) in fields_map {
let var = field.into_inner(); let var = field.into_inner();
let layout = Layout::from_var(arena, var, subs, pointer_size) let layout = Layout::from_var(arena, var, subs).expect("invalid layout from var");
.expect("invalid layout from var");
// Drop any zero-sized fields like {} // Drop any zero-sized fields like {}
if layout.stack_size(pointer_size) != 0 { if !layout.is_zero_sized() {
sorted_fields.push((label, layout)); sorted_fields.push((label, layout));
} }
} }
@ -416,17 +404,10 @@ pub enum UnionVariant<'a> {
Wrapped(Vec<'a, (TagName, &'a [Layout<'a>])>), Wrapped(Vec<'a, (TagName, &'a [Layout<'a>])>),
} }
pub fn union_sorted_tags<'a>( pub fn union_sorted_tags<'a>(arena: &'a Bump, var: Variable, subs: &Subs) -> UnionVariant<'a> {
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> UnionVariant<'a> {
let mut tags_vec = std::vec::Vec::new(); let mut tags_vec = std::vec::Vec::new();
match roc_types::pretty_print::chase_ext_tag_union(subs, var, &mut tags_vec) { match roc_types::pretty_print::chase_ext_tag_union(subs, var, &mut tags_vec) {
Ok(()) | Err((_, Content::FlexVar(_))) => { Ok(()) | Err((_, Content::FlexVar(_))) => union_sorted_tags_help(arena, tags_vec, subs),
union_sorted_tags_help(arena, tags_vec, subs, pointer_size)
}
Err(other) => panic!("invalid content in tag union variable: {:?}", other), Err(other) => panic!("invalid content in tag union variable: {:?}", other),
} }
} }
@ -435,7 +416,6 @@ fn union_sorted_tags_help<'a>(
arena: &'a Bump, arena: &'a Bump,
mut tags_vec: std::vec::Vec<(TagName, std::vec::Vec<Variable>)>, mut tags_vec: std::vec::Vec<(TagName, std::vec::Vec<Variable>)>,
subs: &Subs, subs: &Subs,
pointer_size: u32,
) -> UnionVariant<'a> { ) -> UnionVariant<'a> {
// sort up front; make sure the ordering stays intact! // sort up front; make sure the ordering stays intact!
tags_vec.sort(); tags_vec.sort();
@ -458,10 +438,10 @@ fn union_sorted_tags_help<'a>(
} }
_ => { _ => {
for var in arguments { for var in arguments {
match Layout::from_var(arena, var, subs, pointer_size) { match Layout::from_var(arena, var, subs) {
Ok(layout) => { Ok(layout) => {
// Drop any zero-sized arguments like {} // Drop any zero-sized arguments like {}
if layout.stack_size(pointer_size) != 0 { if !layout.is_zero_sized() {
layouts.push(layout); layouts.push(layout);
} }
} }
@ -497,10 +477,10 @@ fn union_sorted_tags_help<'a>(
arg_layouts.push(Layout::Builtin(Builtin::Int64)); arg_layouts.push(Layout::Builtin(Builtin::Int64));
for var in arguments { for var in arguments {
match Layout::from_var(arena, var, subs, pointer_size) { match Layout::from_var(arena, var, subs) {
Ok(layout) => { Ok(layout) => {
// Drop any zero-sized arguments like {} // Drop any zero-sized arguments like {}
if layout.stack_size(pointer_size) != 0 { if !layout.is_zero_sized() {
has_any_arguments = true; has_any_arguments = true;
arg_layouts.push(layout); arg_layouts.push(layout);
@ -551,14 +531,13 @@ pub fn layout_from_tag_union<'a>(
arena: &'a Bump, arena: &'a Bump,
tags: MutMap<TagName, std::vec::Vec<Variable>>, tags: MutMap<TagName, std::vec::Vec<Variable>>,
subs: &Subs, subs: &Subs,
pointer_size: u32,
) -> Layout<'a> { ) -> Layout<'a> {
use UnionVariant::*; use UnionVariant::*;
let tags_vec: std::vec::Vec<_> = tags.into_iter().collect(); let tags_vec: std::vec::Vec<_> = tags.into_iter().collect();
if tags_vec[0].0 != TagName::Private(Symbol::NUM_AT_NUM) { if tags_vec[0].0 != TagName::Private(Symbol::NUM_AT_NUM) {
let variant = union_sorted_tags_help(arena, tags_vec, subs, pointer_size); let variant = union_sorted_tags_help(arena, tags_vec, subs);
match variant { match variant {
Never => panic!("TODO gracefully handle trying to instantiate Never"), Never => panic!("TODO gracefully handle trying to instantiate Never"),
@ -692,7 +671,6 @@ pub fn list_layout_from_elem<'a>(
arena: &'a Bump, arena: &'a Bump,
subs: &Subs, subs: &Subs,
var: Variable, var: Variable,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> { ) -> Result<Layout<'a>, LayoutProblem> {
match subs.get_without_compacting(var).content { match subs.get_without_compacting(var).content {
Content::Structure(FlatType::Apply(Symbol::ATTR_ATTR, args)) => { Content::Structure(FlatType::Apply(Symbol::ATTR_ATTR, args)) => {
@ -700,14 +678,14 @@ pub fn list_layout_from_elem<'a>(
let arg_var = args.get(1).unwrap(); let arg_var = args.get(1).unwrap();
list_layout_from_elem(arena, subs, *arg_var, pointer_size) list_layout_from_elem(arena, subs, *arg_var)
} }
Content::FlexVar(_) | Content::RigidVar(_) => { Content::FlexVar(_) | Content::RigidVar(_) => {
// If this was still a (List *) then it must have been an empty list // If this was still a (List *) then it must have been an empty list
Ok(Layout::Builtin(Builtin::EmptyList)) Ok(Layout::Builtin(Builtin::EmptyList))
} }
content => { content => {
let elem_layout = Layout::new(arena, content, subs, pointer_size)?; let elem_layout = Layout::new(arena, content, subs)?;
// This is a normal list. // This is a normal list.
Ok(Layout::Builtin(Builtin::List( Ok(Layout::Builtin(Builtin::List(

View file

@ -53,7 +53,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -14,7 +14,6 @@ mod helpers;
mod test_mono { mod test_mono {
use crate::helpers::{can_expr, infer_expr, CanExprOut}; use crate::helpers::{can_expr, infer_expr, CanExprOut};
use bumpalo::Bump; use bumpalo::Bump;
use roc_collections::all::MutMap;
use roc_mono::layout::LayoutCache; use roc_mono::layout::LayoutCache;
use roc_types::subs::Subs; use roc_types::subs::Subs;
@ -38,8 +37,8 @@ mod test_mono {
let mut procs = roc_mono::ir::Procs::default(); let mut procs = roc_mono::ir::Procs::default();
let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap(); let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap();
// assume 64-bit pointers // Put this module's ident_ids back in the interns
let pointer_size = std::mem::size_of::<u64>() as u32; interns.all_ident_ids.insert(home, ident_ids.clone());
// Populate Procs and Subs, and get the low-level Expr from the canonical Expr // Populate Procs and Subs, and get the low-level Expr from the canonical Expr
let mut mono_problems = Vec::new(); let mut mono_problems = Vec::new();
@ -49,7 +48,6 @@ mod test_mono {
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };
@ -69,9 +67,6 @@ mod test_mono {
roc_collections::all::MutMap::default() roc_collections::all::MutMap::default()
); );
// Put this module's ident_ids back in the interns
interns.all_ident_ids.insert(home, ident_ids);
let mut procs_string = procs let mut procs_string = procs
.get_specialized_procs(mono_env.arena) .get_specialized_procs(mono_env.arena)
.values() .values()

View file

@ -11,6 +11,7 @@ roc_region = { path = "../region" }
roc_module = { path = "../module" } roc_module = { path = "../module" }
bumpalo = { version = "3.2", features = ["collections"] } bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1" inlinable_string = "0.1"
encode_unicode = "0.3"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "0.5.1" pretty_assertions = "0.5.1"

View file

@ -1,6 +1,8 @@
use crate::ast::CommentOrNewline::{self, *}; use crate::ast::CommentOrNewline::{self, *};
use crate::ast::Spaceable; use crate::ast::Spaceable;
use crate::parser::{self, and, unexpected, unexpected_eof, Parser, State}; use crate::parser::{
self, and, peek_utf8_char, unexpected, unexpected_eof, FailReason, Parser, State,
};
use bumpalo::collections::string::String; use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
@ -216,147 +218,179 @@ fn spaces<'a>(
) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> { ) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
let original_state = state.clone(); let original_state = state.clone();
let chars = state.input.chars().peekable();
let mut space_list = Vec::new_in(arena); let mut space_list = Vec::new_in(arena);
let mut chars_parsed = 0; let mut bytes_parsed = 0;
let mut comment_line_buf = String::new_in(arena); let mut comment_line_buf = String::new_in(arena);
let mut line_state = LineState::Normal; let mut line_state = LineState::Normal;
let mut state = state; let mut state = state;
let mut any_newlines = false; let mut any_newlines = false;
for ch in chars { while !state.bytes.is_empty() {
chars_parsed += 1; match peek_utf8_char(&state) {
Ok((ch, utf8_len)) => {
bytes_parsed += utf8_len;
match line_state { match line_state {
LineState::Normal => { LineState::Normal => {
match ch { match ch {
' ' => { ' ' => {
// Don't check indentation here; it might not be enough // Don't check indentation here; it might not be enough
// indentation yet, but maybe it will be after more spaces happen! // indentation yet, but maybe it will be after more spaces happen!
state = state.advance_spaces(1)?; state = state.advance_spaces(1)?;
} }
'\r' => { '\r' => {
// Ignore carriage returns. // Ignore carriage returns.
state = state.advance_spaces(1)?; state = state.advance_spaces(1)?;
} }
'\n' => { '\n' => {
// No need to check indentation because we're about to reset it anyway. // No need to check indentation because we're about to reset it anyway.
state = state.newline()?; state = state.newline()?;
// Newlines only get added to the list when they're outside comments. // Newlines only get added to the list when they're outside comments.
space_list.push(Newline); space_list.push(Newline);
any_newlines = true; any_newlines = true;
} }
'#' => { '#' => {
// Check indentation to make sure we were indented enough // Check indentation to make sure we were indented enough
// before this comment began. // before this comment began.
state = state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state.clone()))?
.advance_without_indenting(1)?;
// We're now parsing a line comment!
line_state = LineState::Comment;
}
nonblank => {
return if require_at_least_one && chars_parsed <= 1 {
// We've parsed 1 char and it was not a space,
// but we require parsing at least one space!
Err(unexpected(nonblank, 0, state.clone(), state.attempting))
} else {
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
state = state state = state
.check_indent(min_indent) .check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?; .map_err(|(fail, _)| (fail, original_state.clone()))?
.advance_without_indenting(1)?;
// We're now parsing a line comment!
line_state = LineState::Comment;
} }
_ => {
Ok((space_list.into_bump_slice(), state)) return if require_at_least_one && bytes_parsed <= 1 {
}; // We've parsed 1 char and it was not a space,
} // but we require parsing at least one space!
} Err(unexpected(0, state.clone(), state.attempting))
} } else {
LineState::Comment => { // First make sure we were indented enough!
match ch {
' ' => {
// If we're in a line comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
if comment_line_buf.len() == 1 {
match comment_line_buf.chars().next() {
Some('#') => {
// This is a comment begining with `## ` - that is,
// a doc comment.
// //
// (The space is important; otherwise, this is not // (We only do this if we've encountered any newlines.
// a doc comment, but rather something like a // Otherwise, we assume indentation is already correct.
// big separator block, e.g. ############) // It's actively important for correctness that we skip
line_state = LineState::DocComment; // this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
state = state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?;
}
// This is now the beginning of the doc comment. Ok((space_list.into_bump_slice(), state))
comment_line_buf.clear(); };
} }
_ => { }
}
LineState::Comment => {
match ch {
' ' => {
// If we're in a line comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
if comment_line_buf.len() == 1 {
match comment_line_buf.chars().next() {
Some('#') => {
// This is a comment begining with `## ` - that is,
// a doc comment.
//
// (The space is important; otherwise, this is not
// a doc comment, but rather something like a
// big separator block, e.g. ############)
line_state = LineState::DocComment;
// This is now the beginning of the doc comment.
comment_line_buf.clear();
}
_ => {
comment_line_buf.push(ch);
}
}
} else {
comment_line_buf.push(ch); comment_line_buf.push(ch);
} }
} }
} else { '\n' => {
comment_line_buf.push(ch); state = state.newline()?;
// This was a newline, so end this line comment.
space_list.push(LineComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
comment_line_buf.push(nonblank);
}
} }
} }
'\n' => { LineState::DocComment => {
state = state.newline()?; match ch {
' ' => {
// If we're in a doc comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
// This was a newline, so end this line comment. comment_line_buf.push(ch);
space_list.push(LineComment(comment_line_buf.into_bump_str())); }
comment_line_buf = String::new_in(arena); '\n' => {
state = state.newline()?;
line_state = LineState::Normal; // This was a newline, so end this doc comment.
} space_list.push(DocComment(comment_line_buf.into_bump_str()));
nonblank => { comment_line_buf = String::new_in(arena);
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
comment_line_buf.push(nonblank); line_state = LineState::Normal;
}
nonblank => {
state = state.advance_without_indenting(utf8_len)?;
comment_line_buf.push(nonblank);
}
}
} }
} }
} }
LineState::DocComment => { Err(FailReason::BadUtf8) => {
match ch { // If we hit an invalid UTF-8 character, bail out immediately.
' ' => { return state.fail(FailReason::BadUtf8);
// If we're in a doc comment, this won't affect indentation anyway. }
state = state.advance_without_indenting(1)?; Err(_) => {
if require_at_least_one && bytes_parsed == 0 {
return Err(unexpected_eof(0, state.attempting, state));
} else {
let space_slice = space_list.into_bump_slice();
comment_line_buf.push(ch); // First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
return Ok((
space_slice,
state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?,
));
} }
'\n' => {
state = state.newline()?;
// This was a newline, so end this doc comment. return Ok((space_slice, state));
space_list.push(DocComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
comment_line_buf.push(nonblank);
}
} }
} }
} };
} }
if require_at_least_one && chars_parsed == 0 { // If we didn't parse anything, return unexpected EOF
if require_at_least_one && original_state.bytes.len() == state.bytes.len() {
Err(unexpected_eof(0, state.attempting, state)) Err(unexpected_eof(0, state.attempting, state))
} else { } else {
// First make sure we were indented enough! // First make sure we were indented enough!

View file

@ -8,8 +8,8 @@ use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
use crate::keyword; use crate::keyword;
use crate::number_literal::number_literal; use crate::number_literal::number_literal;
use crate::parser::{ use crate::parser::{
self, allocated, char, fail, not, not_followed_by, optional, sep_by1, string, then, unexpected, self, allocated, ascii_char, ascii_string, fail, not, not_followed_by, optional, sep_by1, then,
unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State, unexpected, unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
}; };
use crate::type_annotation; use crate::type_annotation;
use bumpalo::collections::string::String; use bumpalo::collections::string::String;
@ -22,7 +22,7 @@ pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
// Recursive parsers must not directly invoke functions which return (impl Parser), // Recursive parsers must not directly invoke functions which return (impl Parser),
// as this causes rustc to stack overflow. Thus, parse_expr must be a // as this causes rustc to stack overflow. Thus, parse_expr must be a
// separate function which recurses by calling itself directly. // separate function which recurses by calling itself directly.
move |arena, state| parse_expr(min_indent, arena, state) move |arena, state: State<'a>| parse_expr(min_indent, arena, state)
} }
macro_rules! loc_parenthetical_expr { macro_rules! loc_parenthetical_expr {
@ -30,7 +30,7 @@ macro_rules! loc_parenthetical_expr {
then( then(
loc!(and!( loc!(and!(
between!( between!(
char('('), ascii_char('(' ),
map_with_arena!( map_with_arena!(
space0_around( space0_around(
loc!(move |arena, state| parse_expr($min_indent, arena, state)), loc!(move |arena, state| parse_expr($min_indent, arena, state)),
@ -43,7 +43,7 @@ macro_rules! loc_parenthetical_expr {
} }
} }
), ),
char(')') ascii_char(')' )
), ),
optional(either!( optional(either!(
// There may optionally be function args after the ')' // There may optionally be function args after the ')'
@ -59,7 +59,7 @@ macro_rules! loc_parenthetical_expr {
// as if there were any args they'd have consumed it anyway // as if there were any args they'd have consumed it anyway
// e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser // e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser
either!( either!(
one_or_more!(skip_first!(char('.'), lowercase_ident())), one_or_more!(skip_first!(ascii_char('.' ), lowercase_ident())),
and!(space0($min_indent), equals_with_indent()) and!(space0($min_indent), equals_with_indent())
) )
)) ))
@ -170,7 +170,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
one_of!( one_of!(
map_with_arena!( map_with_arena!(
and!( and!(
loc!(char('!')), loc!(ascii_char('!')),
loc!(move |arena, state| parse_expr(min_indent, arena, state)) loc!(move |arena, state| parse_expr(min_indent, arena, state))
), ),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| { |arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -179,7 +179,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
), ),
map_with_arena!( map_with_arena!(
and!( and!(
loc!(char('-')), loc!(ascii_char('-')),
loc!(move |arena, state| parse_expr(min_indent, arena, state)) loc!(move |arena, state| parse_expr(min_indent, arena, state))
), ),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| { |arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -450,9 +450,9 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
let (loc_tuple, state) = loc!(and!( let (loc_tuple, state) = loc!(and!(
space0_after( space0_after(
between!( between!(
char('('), ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent), space0_around(loc_pattern(min_indent), min_indent),
char(')') ascii_char(')')
), ),
min_indent, min_indent,
), ),
@ -482,7 +482,7 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
/// The '=' used in a def can't be followed by another '=' (or else it's actually /// The '=' used in a def can't be followed by another '=' (or else it's actually
/// an "==") and also it can't be followed by '>' (or else it's actually an "=>") /// an "==") and also it can't be followed by '>' (or else it's actually an "=>")
fn equals_for_def<'a>() -> impl Parser<'a, ()> { fn equals_for_def<'a>() -> impl Parser<'a, ()> {
not_followed_by(char('='), one_of!(char('='), char('>'))) not_followed_by(ascii_char('='), one_of!(ascii_char('='), ascii_char('>')))
} }
/// A definition, consisting of one of these: /// A definition, consisting of one of these:
@ -513,7 +513,7 @@ pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
), ),
// Annotation // Annotation
skip_first!( skip_first!(
char(':'), ascii_char(':'),
// Spaces after the ':' (at a normal indentation level) and then the type. // Spaces after the ':' (at a normal indentation level) and then the type.
// The type itself must be indented more than the pattern and ':' // The type itself must be indented more than the pattern and ':'
space0_before(type_annotation::located(indented_more), indented_more) space0_before(type_annotation::located(indented_more), indented_more)
@ -811,12 +811,12 @@ fn loc_parse_function_arg<'a>(
fn reserved_keyword<'a>() -> impl Parser<'a, ()> { fn reserved_keyword<'a>() -> impl Parser<'a, ()> {
one_of!( one_of!(
string(keyword::IF), ascii_string(keyword::IF),
string(keyword::THEN), ascii_string(keyword::THEN),
string(keyword::ELSE), ascii_string(keyword::ELSE),
string(keyword::WHEN), ascii_string(keyword::WHEN),
string(keyword::IS), ascii_string(keyword::IS),
string(keyword::AS) ascii_string(keyword::AS)
) )
} }
@ -824,7 +824,7 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!( map_with_arena!(
skip_first!( skip_first!(
// All closures start with a '\' - e.g. (\x -> x + 1) // All closures start with a '\' - e.g. (\x -> x + 1)
char('\\'), ascii_char('\\'),
// Once we see the '\', we're committed to parsing this as a closure. // Once we see the '\', we're committed to parsing this as a closure.
// It may turn out to be malformed, but it is definitely a closure. // It may turn out to be malformed, but it is definitely a closure.
optional(and!( optional(and!(
@ -833,13 +833,13 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
Attempting::ClosureParams, Attempting::ClosureParams,
// Params are comma-separated // Params are comma-separated
sep_by1( sep_by1(
char(','), ascii_char(','),
space0_around(loc_closure_param(min_indent), min_indent) space0_around(loc_closure_param(min_indent), min_indent)
) )
), ),
skip_first!( skip_first!(
// Parse the -> which separates params from body // Parse the -> which separates params from body
string("->"), ascii_string("->"),
// Parse the body // Parse the body
attempt!( attempt!(
Attempting::ClosureBody, Attempting::ClosureBody,
@ -877,9 +877,9 @@ fn parse_closure_param<'a>(
// If you wrap it in parens, you can match any arbitrary pattern at all. // If you wrap it in parens, you can match any arbitrary pattern at all.
// e.g. \User.UserId userId -> ... // e.g. \User.UserId userId -> ...
between!( between!(
char('('), ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent), space0_around(loc_pattern(min_indent), min_indent),
char(')') ascii_char(')')
) )
) )
.parse(arena, state) .parse(arena, state)
@ -903,9 +903,9 @@ fn loc_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
fn loc_parenthetical_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> { fn loc_parenthetical_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
between!( between!(
char('('), ascii_char('('),
move |arena, state| loc_pattern(min_indent).parse(arena, state), move |arena, state| loc_pattern(min_indent).parse(arena, state),
char(')') ascii_char(')')
) )
} }
@ -923,13 +923,13 @@ fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
} }
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> { fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(char('_'), |_| Pattern::Underscore) map!(ascii_char('_'), |_| Pattern::Underscore)
} }
fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> { fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
then( then(
collection!( collection!(
char('{'), ascii_char('{'),
move |arena: &'a bumpalo::Bump, move |arena: &'a bumpalo::Bump,
state: crate::parser::State<'a>| state: crate::parser::State<'a>|
-> crate::parser::ParseResult<'a, Located<crate::ast::Pattern<'a>>> { -> crate::parser::ParseResult<'a, Located<crate::ast::Pattern<'a>>> {
@ -947,10 +947,13 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
// (This is true in both literals and types.) // (This is true in both literals and types.)
let (opt_loc_val, state) = crate::parser::optional(either!( let (opt_loc_val, state) = crate::parser::optional(either!(
skip_first!( skip_first!(
char(':'), ascii_char(':'),
space0_before(loc_pattern(min_indent), min_indent) space0_before(loc_pattern(min_indent), min_indent)
), ),
skip_first!(char('?'), space0_before(loc!(expr(min_indent)), min_indent)) skip_first!(
ascii_char('?'),
space0_before(loc!(expr(min_indent)), min_indent)
)
)) ))
.parse(arena, state)?; .parse(arena, state)?;
@ -987,8 +990,8 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
Ok((answer, state)) Ok((answer, state))
}, },
char(','), ascii_char(','),
char('}'), ascii_char('}'),
min_indent min_indent
), ),
move |_arena, state, loc_patterns| { move |_arena, state, loc_patterns| {
@ -1109,7 +1112,7 @@ mod when {
loc!(move |arena, state| parse_expr(min_indent, arena, state)), loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent, min_indent,
), ),
string(keyword::IS) ascii_string(keyword::IS)
) )
) )
), ),
@ -1132,7 +1135,7 @@ mod when {
/// Parsing when with indentation. /// Parsing when with indentation.
fn when_with_indent<'a>() -> impl Parser<'a, u16> { fn when_with_indent<'a>() -> impl Parser<'a, u16> {
move |arena, state: State<'a>| { move |arena, state: State<'a>| {
string(keyword::WHEN) ascii_string(keyword::WHEN)
.parse(arena, state) .parse(arena, state)
.map(|((), state)| (state.indent_col, state)) .map(|((), state)| (state.indent_col, state))
} }
@ -1185,7 +1188,7 @@ mod when {
} }
); );
loop { while !state.bytes.is_empty() {
match branch_parser.parse(arena, state) { match branch_parser.parse(arena, state) {
Ok((next_output, next_state)) => { Ok((next_output, next_state)) => {
state = next_state; state = next_state;
@ -1210,11 +1213,11 @@ mod when {
) -> impl Parser<'a, (Vec<'a, Located<Pattern<'a>>>, Option<Located<Expr<'a>>>)> { ) -> impl Parser<'a, (Vec<'a, Located<Pattern<'a>>>, Option<Located<Expr<'a>>>)> {
and!( and!(
sep_by1( sep_by1(
char('|'), ascii_char('|'),
space0_around(loc_pattern(min_indent), min_indent), space0_around(loc_pattern(min_indent), min_indent),
), ),
optional(skip_first!( optional(skip_first!(
string(keyword::IF), ascii_string(keyword::IF),
// TODO we should require space before the expression but not after // TODO we should require space before the expression but not after
space1_around( space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)), loc!(move |arena, state| parse_expr(min_indent, arena, state)),
@ -1240,7 +1243,7 @@ mod when {
/// Parsing the righthandside of a branch in a when conditional. /// Parsing the righthandside of a branch in a when conditional.
fn branch_result<'a>(indent: u16) -> impl Parser<'a, Located<Expr<'a>>> { fn branch_result<'a>(indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
skip_first!( skip_first!(
string("->"), ascii_string("->"),
space0_before( space0_before(
loc!(move |arena, state| parse_expr(indent, arena, state)), loc!(move |arena, state| parse_expr(indent, arena, state)),
indent, indent,
@ -1253,7 +1256,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!( map_with_arena!(
and!( and!(
skip_first!( skip_first!(
string(keyword::IF), ascii_string(keyword::IF),
space1_around( space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)), loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent, min_indent,
@ -1261,14 +1264,14 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
), ),
and!( and!(
skip_first!( skip_first!(
string(keyword::THEN), ascii_string(keyword::THEN),
space1_around( space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)), loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent, min_indent,
) )
), ),
skip_first!( skip_first!(
string(keyword::ELSE), ascii_string(keyword::ELSE),
space1_before( space1_before(
loc!(move |arena, state| parse_expr(min_indent, arena, state)), loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent, min_indent,
@ -1310,10 +1313,15 @@ fn unary_negate_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
// Try to parse a number literal *before* trying to parse unary negate, // Try to parse a number literal *before* trying to parse unary negate,
// because otherwise (foo -1) will parse as (foo (Num.neg 1)) // because otherwise (foo -1) will parse as (foo (Num.neg 1))
loc!(number_literal()), loc!(number_literal()),
loc!(char('-')) loc!(ascii_char('-'))
) )
), ),
one_of!(char(' '), char('#'), char('\n'), char('>')), one_of!(
ascii_char(' '),
ascii_char('#'),
ascii_char('\n'),
ascii_char('>')
),
), ),
move |arena, state, (spaces, num_or_minus_char)| { move |arena, state, (spaces, num_or_minus_char)| {
match num_or_minus_char { match num_or_minus_char {
@ -1530,17 +1538,15 @@ pub fn ident_without_apply<'a>() -> impl Parser<'a, Expr<'a>> {
/// Like equals_for_def(), except it produces the indent_col of the state rather than () /// Like equals_for_def(), except it produces the indent_col of the state rather than ()
pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> { pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| { move |_arena, state: State<'a>| {
let mut iter = state.input.chars(); match state.bytes.first() {
Some(&byte) if byte == b'=' => {
match iter.next() { match state.bytes.get(1) {
Some(ch) if ch == '=' => {
match iter.peekable().peek() {
// The '=' must not be followed by another `=` or `>` // The '=' must not be followed by another `=` or `>`
// (See equals_for_def() for explanation) // (See equals_for_def() for explanation)
Some(next_ch) if next_ch != &'=' && next_ch != &'>' => { Some(&next_byte) if next_byte != b'=' && next_byte != b'>' => {
Ok((state.indent_col, state.advance_without_indenting(1)?)) Ok((state.indent_col, state.advance_without_indenting(1)?))
} }
Some(next_ch) => Err(unexpected(*next_ch, 0, state, Attempting::Def)), Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof( None => Err(unexpected_eof(
1, 1,
Attempting::Def, Attempting::Def,
@ -1548,21 +1554,17 @@ pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
)), )),
} }
} }
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)), Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)), None => Err(unexpected_eof(0, Attempting::Def, state)),
} }
} }
} }
pub fn colon_with_indent<'a>() -> impl Parser<'a, u16> { pub fn colon_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| { move |_arena, state: State<'a>| match state.bytes.first() {
let mut iter = state.input.chars(); Some(&byte) if byte == b':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
match iter.next() { None => Err(unexpected_eof(0, Attempting::Def, state)),
Some(ch) if ch == ':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
} }
} }
@ -1606,32 +1608,32 @@ fn binop<'a>() -> impl Parser<'a, BinOp> {
// with other valid operators (e.g. "<=" begins with "<") must // with other valid operators (e.g. "<=" begins with "<") must
// come before the shorter ones; otherwise, they will never // come before the shorter ones; otherwise, they will never
// be reached because the shorter one will pass and consume! // be reached because the shorter one will pass and consume!
map!(string("|>"), |_| BinOp::Pizza), map!(ascii_string("|>"), |_| BinOp::Pizza),
map!(string("=="), |_| BinOp::Equals), map!(ascii_string("=="), |_| BinOp::Equals),
map!(string("!="), |_| BinOp::NotEquals), map!(ascii_string("!="), |_| BinOp::NotEquals),
map!(string("&&"), |_| BinOp::And), map!(ascii_string("&&"), |_| BinOp::And),
map!(string("||"), |_| BinOp::Or), map!(ascii_string("||"), |_| BinOp::Or),
map!(char('+'), |_| BinOp::Plus), map!(ascii_char('+'), |_| BinOp::Plus),
map!(char('*'), |_| BinOp::Star), map!(ascii_char('*'), |_| BinOp::Star),
map!(char('-'), |_| BinOp::Minus), map!(ascii_char('-'), |_| BinOp::Minus),
map!(string("//"), |_| BinOp::DoubleSlash), map!(ascii_string("//"), |_| BinOp::DoubleSlash),
map!(char('/'), |_| BinOp::Slash), map!(ascii_char('/'), |_| BinOp::Slash),
map!(string("<="), |_| BinOp::LessThanOrEq), map!(ascii_string("<="), |_| BinOp::LessThanOrEq),
map!(char('<'), |_| BinOp::LessThan), map!(ascii_char('<'), |_| BinOp::LessThan),
map!(string(">="), |_| BinOp::GreaterThanOrEq), map!(ascii_string(">="), |_| BinOp::GreaterThanOrEq),
map!(char('>'), |_| BinOp::GreaterThan), map!(ascii_char('>'), |_| BinOp::GreaterThan),
map!(char('^'), |_| BinOp::Caret), map!(ascii_char('^'), |_| BinOp::Caret),
map!(string("%%"), |_| BinOp::DoublePercent), map!(ascii_string("%%"), |_| BinOp::DoublePercent),
map!(char('%'), |_| BinOp::Percent) map!(ascii_char('%'), |_| BinOp::Percent)
) )
} }
pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
let elems = collection!( let elems = collection!(
char('['), ascii_char('['),
loc!(expr(min_indent)), loc!(expr(min_indent)),
char(','), ascii_char(','),
char(']'), ascii_char(']'),
min_indent min_indent
); );
@ -1673,9 +1675,11 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
}; };
// there can be field access, e.g. `{ x : 4 }.x` // there can be field access, e.g. `{ x : 4 }.x`
let (accesses, state) = let (accesses, state) = optional(one_or_more!(skip_first!(
optional(one_or_more!(skip_first!(char('.'), lowercase_ident()))) ascii_char('.'),
.parse(arena, state)?; lowercase_ident()
)))
.parse(arena, state)?;
if let Some(fields) = accesses { if let Some(fields) = accesses {
for field in fields { for field in fields {
@ -1768,7 +1772,7 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
/// This is mainly for matching tags in closure params, e.g. \@Foo -> ... /// This is mainly for matching tags in closure params, e.g. \@Foo -> ...
pub fn private_tag<'a>() -> impl Parser<'a, &'a str> { pub fn private_tag<'a>() -> impl Parser<'a, &'a str> {
map_with_arena!( map_with_arena!(
skip_first!(char('@'), global_tag()), skip_first!(ascii_char('@'), global_tag()),
|arena: &'a Bump, name: &'a str| { |arena: &'a Bump, name: &'a str| {
let mut buf = String::with_capacity_in(1 + name.len(), arena); let mut buf = String::with_capacity_in(1 + name.len(), arena);

View file

@ -1,6 +1,6 @@
use crate::ast::Attempting; use crate::ast::Attempting;
use crate::keyword; use crate::keyword;
use crate::parser::{unexpected, unexpected_eof, Fail, FailReason, ParseResult, Parser, State}; use crate::parser::{peek_utf8_char, unexpected, Fail, FailReason, ParseResult, Parser, State};
use bumpalo::collections::string::String; use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
@ -67,129 +67,126 @@ impl<'a> Ident<'a> {
/// Sometimes we may want to check for those later in the process, and give /// Sometimes we may want to check for those later in the process, and give
/// more contextually-aware error messages than "unexpected `if`" or the like. /// more contextually-aware error messages than "unexpected `if`" or the like.
#[inline(always)] #[inline(always)]
pub fn parse_ident<'a, I>( pub fn parse_ident<'a>(
arena: &'a Bump, arena: &'a Bump,
chars: &mut I, mut state: State<'a>,
state: State<'a>, ) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena); let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena); let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut is_capitalized; let mut is_capitalized;
let is_accessor_fn; let is_accessor_fn;
let mut is_private_tag = false; let mut is_private_tag = false;
let mut chars_parsed;
// Identifiers and accessor functions must start with either a letter or a dot. // Identifiers and accessor functions must start with either a letter or a dot.
// If this starts with neither, it must be something else! // If this starts with neither, it must be something else!
match chars.next() { match peek_utf8_char(&state) {
Some(ch) => { Ok((first_ch, bytes_parsed)) => {
if ch == '@' { if first_ch.is_alphabetic() {
// '@' must always be followed by a capital letter! part_buf.push(first_ch);
match chars.next() {
Some(ch) if ch.is_uppercase() => {
part_buf.push('@');
part_buf.push(ch);
is_private_tag = true; is_capitalized = first_ch.is_uppercase();
is_capitalized = true;
is_accessor_fn = false;
chars_parsed = 2;
}
Some(ch) => {
return Err(unexpected(ch, 0, state, Attempting::Identifier));
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
}
} else if ch.is_alphabetic() {
part_buf.push(ch);
is_capitalized = ch.is_uppercase();
is_accessor_fn = false; is_accessor_fn = false;
chars_parsed = 1; state = state.advance_without_indenting(bytes_parsed)?;
} else if ch == '.' { } else if first_ch == '.' {
is_capitalized = false; is_capitalized = false;
is_accessor_fn = true; is_accessor_fn = true;
chars_parsed = 1; state = state.advance_without_indenting(bytes_parsed)?;
} else if first_ch == '@' {
state = state.advance_without_indenting(bytes_parsed)?;
// '@' must always be followed by a capital letter!
match peek_utf8_char(&state) {
Ok((next_ch, next_bytes_parsed)) => {
if next_ch.is_uppercase() {
state = state.advance_without_indenting(next_bytes_parsed)?;
part_buf.push('@');
part_buf.push(next_ch);
is_private_tag = true;
is_capitalized = true;
is_accessor_fn = false;
} else {
return Err(unexpected(
bytes_parsed + next_bytes_parsed,
state,
Attempting::Identifier,
));
}
}
Err(reason) => return state.fail(reason),
}
} else { } else {
return Err(unexpected(ch, 0, state, Attempting::Identifier)); return Err(unexpected(0, state, Attempting::Identifier));
} }
} }
None => { Err(reason) => return state.fail(reason),
return Err(unexpected_eof(0, Attempting::Identifier, state)); }
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() {
// Capitalization is determined by the first character in the part.
is_capitalized = ch.is_uppercase();
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(
Some(ch),
arena,
state,
capitalized_parts,
noncapitalized_parts,
);
}
part_buf.push(ch);
} else if ch == '.' {
// There are two posssible errors here:
//
// 1. Having two consecutive dots is an error.
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
return malformed(
Some(ch),
arena,
state,
capitalized_parts,
noncapitalized_parts,
);
}
if is_capitalized {
capitalized_parts.push(part_buf.into_bump_str());
} else {
noncapitalized_parts.push(part_buf.into_bump_str());
}
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the identifier. We're done!
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
} }
};
let mut next_char = None;
while let Some(ch) = chars.next() {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() {
// Capitalization is determined by the first character in the part.
is_capitalized = ch.is_uppercase();
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
}
part_buf.push(ch);
} else if ch == '.' {
// There are two posssible errors here:
//
// 1. Having two consecutive dots is an error.
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
}
if is_capitalized {
capitalized_parts.push(part_buf.into_bump_str());
} else {
noncapitalized_parts.push(part_buf.into_bump_str());
}
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the identifier. We're done!
next_char = Some(ch);
break;
}
chars_parsed += 1;
} }
if part_buf.is_empty() { if part_buf.is_empty() {
@ -200,10 +197,9 @@ where
// If we made it this far and don't have a next_char, then necessarily // If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously. // we have consumed a '.' char previously.
return malformed( return malformed(
next_char.or_else(|| Some('.')), Some('.'),
arena, arena,
state, state,
chars,
capitalized_parts, capitalized_parts,
noncapitalized_parts, noncapitalized_parts,
); );
@ -224,14 +220,7 @@ where
Ident::AccessorFunction(value) Ident::AccessorFunction(value)
} else { } else {
return malformed( return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
} else if noncapitalized_parts.is_empty() { } else if noncapitalized_parts.is_empty() {
// We have capitalized parts only, so this must be a tag. // We have capitalized parts only, so this must be a tag.
@ -245,33 +234,19 @@ where
} }
} else { } else {
// This is a qualified tag, which is not allowed! // This is a qualified tag, which is not allowed!
return malformed( return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} }
} }
None => { None => {
// We had neither capitalized nor noncapitalized parts, // We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was // yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos. // a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier)); return Err(unexpected(1, state, Attempting::Identifier));
} }
} }
} else if is_private_tag { } else if is_private_tag {
// This is qualified field access with an '@' in front, which does not make sense! // This is qualified field access with an '@' in front, which does not make sense!
return malformed( return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
} else { } else {
// We have multiple noncapitalized parts, so this must be field access. // We have multiple noncapitalized parts, so this must be field access.
Ident::Access { Ident::Access {
@ -280,22 +255,16 @@ where
} }
}; };
let state = state.advance_without_indenting(chars_parsed)?; Ok(((answer, None), state))
Ok(((answer, next_char), state))
} }
fn malformed<'a, I>( fn malformed<'a>(
opt_bad_char: Option<char>, opt_bad_char: Option<char>,
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, mut state: State<'a>,
chars: &mut I,
capitalized_parts: Vec<&'a str>, capitalized_parts: Vec<&'a str>,
noncapitalized_parts: Vec<&'a str>, noncapitalized_parts: Vec<&'a str>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)> ) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
where
I: Iterator<Item = char>,
{
// Reconstruct the original string that we've been parsing. // Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena); let mut full_string = String::new_in(arena);
@ -311,30 +280,35 @@ where
// Consume the remaining chars in the identifier. // Consume the remaining chars in the identifier.
let mut next_char = None; let mut next_char = None;
for ch in chars { while !state.bytes.is_empty() {
// We can't use ch.is_alphanumeric() here because that passes for match peek_utf8_char(&state) {
// things that are "numeric" but not ASCII digits, like `¾` Ok((ch, bytes_parsed)) => {
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { // We can't use ch.is_alphanumeric() here because that passes for
full_string.push(ch); // things that are "numeric" but not ASCII digits, like `¾`
} else { if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
next_char = Some(ch); full_string.push(ch);
} else {
next_char = Some(ch);
break; break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
} }
} }
let chars_parsed = full_string.len();
Ok(( Ok((
(Ident::Malformed(full_string.into_bump_str()), next_char), (Ident::Malformed(full_string.into_bump_str()), next_char),
state.advance_without_indenting(chars_parsed)?, state,
)) ))
} }
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> { pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
// Discard next_char; we don't need it. // Discard next_char; we don't need it.
let ((string, _), state) = parse_ident(arena, &mut state.input.chars(), state)?; let ((string, _), state) = parse_ident(arena, state)?;
Ok((string, state)) Ok((string, state))
} }
@ -344,52 +318,47 @@ pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str>
where where
F: Fn(char) -> bool, F: Fn(char) -> bool,
{ {
move |arena, state: State<'a>| { move |arena, mut state: State<'a>| {
let mut chars = state.input.chars();
// pred will determine if this is a tag or ident (based on capitalization) // pred will determine if this is a tag or ident (based on capitalization)
let first_letter = match chars.next() { let (first_letter, bytes_parsed) = match peek_utf8_char(&state) {
Some(first_char) => { Ok((first_letter, bytes_parsed)) => {
if pred(first_char) { if !pred(first_letter) {
first_char return Err(unexpected(0, state, Attempting::RecordFieldLabel));
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
} }
(first_letter, bytes_parsed)
} }
None => { Err(reason) => return state.fail(reason),
return Err(unexpected_eof(0, Attempting::RecordFieldLabel, state));
}
}; };
let mut buf = String::with_capacity_in(1, arena); let mut buf = String::with_capacity_in(1, arena);
buf.push(first_letter); buf.push(first_letter);
for ch in chars { state = state.advance_without_indenting(bytes_parsed)?;
// After the first character, only these are allowed:
// while !state.bytes.is_empty() {
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers match peek_utf8_char(&state) {
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() Ok((ch, bytes_parsed)) => {
// * A ':' indicating the end of the field // After the first character, only these are allowed:
if ch.is_alphabetic() || ch.is_ascii_digit() { //
buf.push(ch); // * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
} else { // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// This is the end of the field. We're done! // * A ':' indicating the end of the field
break; if ch.is_alphabetic() || ch.is_ascii_digit() {
} buf.push(ch);
state = state.advance_without_indenting(bytes_parsed)?;
} else {
// This is the end of the field. We're done!
break;
}
}
Err(reason) => return state.fail(reason),
};
} }
let chars_parsed = buf.len(); Ok((buf.into_bump_str(), state))
Ok((
buf.into_bump_str(),
state.advance_without_indenting(chars_parsed)?,
))
} }
} }

View file

@ -6,7 +6,10 @@ use crate::blankspace::{space0_around, space1};
use crate::expr::def; use crate::expr::def;
use crate::header::ModuleName; use crate::header::ModuleName;
use crate::ident::unqualified_ident; use crate::ident::unqualified_ident;
use crate::parser::{self, char, loc, optional, string, unexpected, unexpected_eof, Parser, State}; use crate::parser::{
self, ascii_char, ascii_string, loc, optional, peek_utf8_char, peek_utf8_char_at, unexpected,
Parser, State,
};
use bumpalo::collections::{String, Vec}; use bumpalo::collections::{String, Vec};
use roc_region::all::Located; use roc_region::all::Located;
@ -30,7 +33,10 @@ pub fn app_module<'a>() -> impl Parser<'a, Module<'a>> {
pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> { pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
parser::map( parser::map(
and!( and!(
skip_first!(string("interface"), and!(space1(1), loc!(module_name()))), skip_first!(
ascii_string("interface"),
and!(space1(1), loc!(module_name()))
),
and!(exposes(), imports()) and!(exposes(), imports())
), ),
|( |(
@ -56,72 +62,68 @@ pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
#[inline(always)] #[inline(always)]
pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> { pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
move |arena, state: State<'a>| { move |arena, mut state: State<'a>| {
let mut chars = state.input.chars(); match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if !first_letter.is_uppercase() {
return Err(unexpected(0, state, Attempting::Module));
};
let first_letter = match chars.next() { let mut buf = String::with_capacity_in(4, arena);
Some(first_char) => {
// Module names must all be uppercase
if first_char.is_uppercase() {
first_char
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
}
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
};
let mut buf = String::with_capacity_in(1, arena); buf.push(first_letter);
buf.push(first_letter); state = state.advance_without_indenting(bytes_parsed)?;
while let Some(ch) = chars.next() { while !state.bytes.is_empty() {
// After the first character, only these are allowed: match peek_utf8_char(&state) {
// Ok((ch, bytes_parsed)) => {
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers // After the first character, only these are allowed:
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A '.' separating module parts
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else if ch == '.' {
match chars.next() {
Some(next) => {
if next.is_uppercase() {
// If we hit another uppercase letter, keep going!
buf.push('.');
buf.push(next);
} else {
// We have finished parsing the module name.
// //
// There may be an identifier after this '.', // * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// e.g. "baz" in `Foo.Bar.baz` // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
break; // * A '.' separating module parts
if ch.is_alphabetic() || ch.is_ascii_digit() {
state = state.advance_without_indenting(bytes_parsed)?;
buf.push(ch);
} else if ch == '.' {
match peek_utf8_char_at(&state, 1) {
Ok((next, next_bytes_parsed)) => {
if next.is_uppercase() {
// If we hit another uppercase letter, keep going!
buf.push('.');
buf.push(next);
state = state.advance_without_indenting(
bytes_parsed + next_bytes_parsed,
)?;
} else {
// We have finished parsing the module name.
//
// There may be an identifier after this '.',
// e.g. "baz" in `Foo.Bar.baz`
return Ok((
ModuleName::new(buf.into_bump_str()),
state,
));
}
}
Err(reason) => return state.fail(reason),
}
} else {
// This is the end of the module name. We're done!
break;
}
} }
} Err(reason) => return state.fail(reason),
None => {
// A module name can't end with a '.'
return Err(unexpected_eof(0, Attempting::Identifier, state));
} }
} }
} else {
// This is the end of the module name. We're done! Ok((ModuleName::new(buf.into_bump_str()), state))
break;
} }
Err(reason) => state.fail(reason),
} }
let chars_parsed = buf.len();
Ok((
ModuleName::new(buf.into_bump_str()),
state.advance_without_indenting(chars_parsed)?,
))
} }
} }
@ -129,7 +131,7 @@ pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>> { fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>> {
parser::map( parser::map(
and!( and!(
skip_first!(string("app"), and!(space1(1), loc!(module_name()))), skip_first!(ascii_string("app"), and!(space1(1), loc!(module_name()))),
and!(provides(), imports()) and!(provides(), imports())
), ),
|( |(
@ -167,8 +169,14 @@ fn provides<'a>() -> impl Parser<
), ),
> { > {
and!( and!(
and!(skip_second!(space1(1), string("provides")), space1(1)), and!(skip_second!(space1(1), ascii_string("provides")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1) collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
) )
} }
@ -181,8 +189,14 @@ fn exposes<'a>() -> impl Parser<
), ),
> { > {
and!( and!(
and!(skip_second!(space1(1), string("exposes")), space1(1)), and!(skip_second!(space1(1), ascii_string("exposes")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1) collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
) )
} }
@ -195,8 +209,14 @@ fn imports<'a>() -> impl Parser<
), ),
> { > {
and!( and!(
and!(skip_second!(space1(1), string("imports")), space1(1)), and!(skip_second!(space1(1), ascii_string("imports")), space1(1)),
collection!(char('['), loc!(imports_entry()), char(','), char(']'), 1) collection!(
ascii_char('['),
loc!(imports_entry()),
ascii_char(','),
ascii_char(']'),
1
)
) )
} }
@ -213,8 +233,14 @@ fn imports_entry<'a>() -> impl Parser<'a, ImportsEntry<'a>> {
module_name(), module_name(),
// e.g. `.{ Task, after}` // e.g. `.{ Task, after}`
optional(skip_first!( optional(skip_first!(
char('.'), ascii_char('.'),
collection!(char('{'), loc!(exposes_entry()), char(','), char('}'), 1) collection!(
ascii_char('{'),
loc!(exposes_entry()),
ascii_char(','),
ascii_char('}'),
1
)
)) ))
), ),
|arena, |arena,

View file

@ -1,23 +1,19 @@
use crate::ast::{Attempting, Base, Expr}; use crate::ast::{Attempting, Base, Expr};
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char; use std::char;
use std::str::from_utf8_unchecked;
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> { pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |_arena, state: State<'a>| { move |_arena, state: State<'a>| {
let mut chars = state.input.chars(); let bytes = &mut state.bytes.iter();
match chars.next() { match bytes.next() {
Some(first_ch) => { Some(&first_byte) => {
// Number literals must start with either an '-' or a digit. // Number literals must start with either an '-' or a digit.
if first_ch == '-' || first_ch.is_ascii_digit() { if first_byte == b'-' || (first_byte as char).is_ascii_digit() {
parse_number_literal(first_ch, &mut chars, state) parse_number_literal(first_byte as char, bytes, state)
} else { } else {
Err(unexpected( Err(unexpected(1, state, Attempting::NumberLiteral))
first_ch,
first_ch.len_utf8(),
state,
Attempting::NumberLiteral,
))
} }
} }
None => Err(unexpected_eof(0, state.attempting, state)), None => Err(unexpected_eof(0, state.attempting, state)),
@ -28,11 +24,11 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
#[inline(always)] #[inline(always)]
fn parse_number_literal<'a, I>( fn parse_number_literal<'a, I>(
first_ch: char, first_ch: char,
chars: &mut I, bytes: &mut I,
state: State<'a>, state: State<'a>,
) -> ParseResult<'a, Expr<'a>> ) -> ParseResult<'a, Expr<'a>>
where where
I: Iterator<Item = char>, I: Iterator<Item = &'a u8>,
{ {
use self::LiteralType::*; use self::LiteralType::*;
@ -40,13 +36,12 @@ where
// We already parsed 1 character (which may have been a minus sign). // We already parsed 1 character (which may have been a minus sign).
let mut bytes_parsed = 1; let mut bytes_parsed = 1;
let mut prev_ch = first_ch; let mut prev_byte = first_ch as u8;
let mut has_parsed_digits = first_ch.is_ascii_digit(); let mut has_parsed_digits = first_ch.is_ascii_digit();
for next_ch in chars { for &next_byte in bytes {
let err_unexpected = || { let err_unexpected = || {
Err(unexpected( Err(unexpected(
next_ch,
bytes_parsed, bytes_parsed,
state.clone(), state.clone(),
Attempting::NumberLiteral, Attempting::NumberLiteral,
@ -55,91 +50,91 @@ where
let is_potentially_non_base10 = || { let is_potentially_non_base10 = || {
(bytes_parsed == 1 && first_ch == '0') (bytes_parsed == 1 && first_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0') || (bytes_parsed == 2 && first_ch == '-' && prev_byte == b'0')
}; };
if next_ch == '.' { match next_byte as char {
if typ == Float { '.' => {
// You only get one decimal point! if typ == Float {
return err_unexpected(); // You only get one decimal point!
} else { return err_unexpected();
typ = Float; } else {
typ = Float;
}
} }
} else if next_ch == 'x' { 'x' => {
if is_potentially_non_base10() { if is_potentially_non_base10() {
typ = Hex; typ = Hex;
} else { } else {
return err_unexpected(); return err_unexpected();
}
} }
} else if next_ch == 'b' && typ == Num { 'b' if typ == Num => {
// We have to check for typ == Num because otherwise we get a false // We have to check for typ == Num because otherwise we get a false
// positive here when parsing a hex literal that happens to have // positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb // a 'b' in it, e.g. 0xbbbb
if is_potentially_non_base10() { if is_potentially_non_base10() {
typ = Binary; typ = Binary;
} else { } else {
return err_unexpected(); return err_unexpected();
}
} }
} else if next_ch == 'o' { 'o' => {
if is_potentially_non_base10() { if is_potentially_non_base10() {
typ = Octal; typ = Octal;
} else { } else {
return err_unexpected(); return err_unexpected();
}
} }
} else if next_ch.is_ascii_digit() { next_ch if next_ch.is_ascii_digit() => {
has_parsed_digits = true; has_parsed_digits = true;
} else if next_ch != '_' && }
next_ch
if next_ch != '_' &&
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals. // ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
// We parse them in any int literal, so we can give a more helpful error // We parse them in any int literal, so we can give a more helpful error
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals" // in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
// or "the character 'g' is outside the range of valid Hex literals") // or "the character 'g' is outside the range of valid Hex literals")
!next_ch.is_ascii_alphabetic() !next_ch.is_ascii_alphabetic() =>
{ {
if has_parsed_digits { if has_parsed_digits {
// We hit an invalid number literal character; we're done! // We hit an invalid number literal character; we're done!
break; break;
} else { } else {
// No digits! We likely parsed a minus sign that's actually an operator. // No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected(); return err_unexpected();
}
} }
_ => {}
} }
// Since we only consume characters in the ASCII range for number literals, // Since we only consume characters in the ASCII range for number literals,
// this will always be exactly 1. There's no need to call next_ch.utf8_len(). // this will always be exactly 1. There's no need to call next_ch.utf8_len().
bytes_parsed += 1; bytes_parsed += 1;
prev_ch = next_ch; prev_byte = next_byte;
} }
let from_base = |base| {
let is_negative = first_ch == '-';
let string = if is_negative {
&state.input[3..bytes_parsed]
} else {
&state.input[2..bytes_parsed]
};
Expr::NonBase10Int {
is_negative,
string,
base,
}
};
// At this point we have a number, and will definitely succeed. // At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range), // If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that. // we'll succeed with an appropriate Expr which records that.
let expr = match typ { match typ {
Num => Expr::Num(&state.input[0..bytes_parsed]), Num => Ok((
Float => Expr::Float(&state.input[0..bytes_parsed]), // SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Num(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
Float => Ok((
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Float(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
// For these we trim off the 0x/0o/0b part // For these we trim off the 0x/0o/0b part
Hex => from_base(Base::Hex), Hex => from_base(Base::Hex, first_ch, bytes_parsed, state),
Octal => from_base(Base::Octal), Octal => from_base(Base::Octal, first_ch, bytes_parsed, state),
Binary => from_base(Base::Binary), Binary => from_base(Base::Binary, first_ch, bytes_parsed, state),
}; }
let next_state = state.advance_without_indenting(bytes_parsed)?;
Ok((expr, next_state))
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -150,3 +145,29 @@ enum LiteralType {
Octal, Octal,
Binary, Binary,
} }
fn from_base(
base: Base,
first_ch: char,
bytes_parsed: usize,
state: State<'_>,
) -> ParseResult<'_, Expr<'_>> {
let is_negative = first_ch == '-';
let bytes = if is_negative {
&state.bytes[3..bytes_parsed]
} else {
&state.bytes[2..bytes_parsed]
};
match parse_utf8(bytes) {
Ok(string) => Ok((
Expr::NonBase10Int {
is_negative,
string,
base,
},
state.advance_without_indenting(bytes_parsed)?,
)),
Err(reason) => state.fail(reason),
}
}

View file

@ -1,14 +1,17 @@
use crate::ast::Attempting; use crate::ast::Attempting;
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use encode_unicode::CharExt;
use roc_region::all::{Located, Region}; use roc_region::all::{Located, Region};
use std::fmt;
use std::str::from_utf8;
use std::{char, u16}; use std::{char, u16};
/// A position in a source file. /// A position in a source file.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Clone, PartialEq, Eq)]
pub struct State<'a> { pub struct State<'a> {
/// The raw input string. /// The raw input bytes from the file.
pub input: &'a str, pub bytes: &'a [u8],
/// Current line of the input /// Current line of the input
pub line: u32, pub line: u32,
@ -39,15 +42,15 @@ pub enum Either<First, Second> {
} }
impl<'a> State<'a> { impl<'a> State<'a> {
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> { pub fn new(bytes: &'a [u8], attempting: Attempting) -> State<'a> {
State { State {
input, bytes,
line: 0, line: 0,
column: 0, column: 0,
indent_col: 0, indent_col: 0,
is_indenting: true, is_indenting: true,
attempting, attempting,
original_len: input.len(), original_len: bytes.len(),
} }
} }
@ -69,7 +72,7 @@ impl<'a> State<'a> {
/// ///
/// So if the parser has consumed 8 bytes, this function will return 8. /// So if the parser has consumed 8 bytes, this function will return 8.
pub fn bytes_consumed(&self) -> usize { pub fn bytes_consumed(&self) -> usize {
self.original_len - self.input.len() self.original_len - self.bytes.len()
} }
/// Increments the line, then resets column, indent_col, and is_indenting. /// Increments the line, then resets column, indent_col, and is_indenting.
@ -77,7 +80,7 @@ impl<'a> State<'a> {
pub fn newline(&self) -> Result<Self, (Fail, Self)> { pub fn newline(&self) -> Result<Self, (Fail, Self)> {
match self.line.checked_add(1) { match self.line.checked_add(1) {
Some(line) => Ok(State { Some(line) => Ok(State {
input: &self.input[1..], bytes: &self.bytes[1..],
line, line,
column: 0, column: 0,
indent_col: 0, indent_col: 0,
@ -99,11 +102,11 @@ impl<'a> State<'a> {
/// This assumes we are *not* advancing with spaces, or at least that /// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean /// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway. /// they weren't eligible to indent anyway.
pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> { pub fn advance_without_indenting(self, quantity: usize) -> Result<Self, (Fail, Self)> {
match (self.column as usize).checked_add(quantity) { match (self.column as usize).checked_add(quantity) {
Some(column_usize) if column_usize <= u16::MAX as usize => { Some(column_usize) if column_usize <= u16::MAX as usize => {
Ok(State { Ok(State {
input: &self.input[quantity..], bytes: &self.bytes[quantity..],
line: self.line, line: self.line,
column: column_usize as u16, column: column_usize as u16,
indent_col: self.indent_col, indent_col: self.indent_col,
@ -141,7 +144,7 @@ impl<'a> State<'a> {
}; };
Ok(State { Ok(State {
input: &self.input[spaces..], bytes: &self.bytes[spaces..],
line: self.line, line: self.line,
column: column_usize as u16, column: column_usize as u16,
indent_col, indent_col,
@ -169,6 +172,35 @@ impl<'a> State<'a> {
end_line: self.line, end_line: self.line,
} }
} }
/// Return a failing ParseResult for the given FailReason
pub fn fail<T>(self, reason: FailReason) -> Result<(T, Self), (Fail, Self)> {
Err((
Fail {
reason,
attempting: self.attempting,
},
self,
))
}
}
impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match from_utf8(self.bytes) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
}
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
write!(f, "\n\tindent_col: {}", self.indent_col)?;
write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?;
write!(f, "\n\tattempting: {:?}", self.attempting)?;
write!(f, "\n\toriginal_len: {}", self.original_len)?;
write!(f, "\n}}")
}
} }
#[test] #[test]
@ -182,13 +214,14 @@ pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum FailReason { pub enum FailReason {
Unexpected(char, Region), Unexpected(Region),
OutdentedTooFar, OutdentedTooFar,
ConditionFailed, ConditionFailed,
LineTooLong(u32 /* which line was too long */), LineTooLong(u32 /* which line was too long */),
TooManyLines, TooManyLines,
Eof(Region), Eof(Region),
InvalidPattern, InvalidPattern,
BadUtf8,
ReservedKeyword(Region), ReservedKeyword(Region),
ArgumentsBeforeEquals(Region), ArgumentsBeforeEquals(Region),
} }
@ -332,13 +365,12 @@ pub fn unexpected_eof(
} }
pub fn unexpected( pub fn unexpected(
ch: char,
chars_consumed: usize, chars_consumed: usize,
state: State<'_>, state: State<'_>,
attempting: Attempting, attempting: Attempting,
) -> (Fail, State<'_>) { ) -> (Fail, State<'_>) {
checked_unexpected(chars_consumed, state, |region| Fail { checked_unexpected(chars_consumed, state, |region| Fail {
reason: FailReason::Unexpected(ch, region), reason: FailReason::Unexpected(region),
attempting, attempting,
}) })
} }
@ -385,9 +417,9 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
// (for example) the LineTooLong initially occurs in the middle of // (for example) the LineTooLong initially occurs in the middle of
// a one_of chain, which would otherwise prevent it from propagating. // a one_of chain, which would otherwise prevent it from propagating.
let column = u16::MAX; let column = u16::MAX;
let input = state.input.get(0..state.input.len()).unwrap(); let bytes = state.bytes.get(0..state.bytes.len()).unwrap();
let state = State { let state = State {
input, bytes,
line: state.line, line: state.line,
indent_col: state.indent_col, indent_col: state.indent_col,
is_indenting: state.is_indenting, is_indenting: state.is_indenting,
@ -399,29 +431,75 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
(fail, state) (fail, state)
} }
/// A single char. /// A single ASCII char.
pub fn char<'a>(expected: char) -> impl Parser<'a, ()> { pub fn ascii_char<'a>(expected: char) -> impl Parser<'a, ()> {
move |_arena, state: State<'a>| match state.input.chars().next() { // Make sure this really is an ASCII char!
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)), debug_assert!(expected.len_utf8() == 1);
Some(other_ch) => Err(unexpected(other_ch, 0, state, Attempting::Keyword)),
move |_arena, state: State<'a>| match state.bytes.first() {
Some(&actual) if expected == actual as char => {
Ok(((), state.advance_without_indenting(1)?))
}
Some(_) => Err(unexpected(0, state, Attempting::Keyword)),
_ => Err(unexpected_eof(0, Attempting::Keyword, state)), _ => Err(unexpected_eof(0, Attempting::Keyword, state)),
} }
} }
/// A hardcoded keyword string with no newlines in it. /// A single UTF-8-encoded char. This will both parse *and* validate that the
pub fn string<'a>(keyword: &'static str) -> impl Parser<'a, ()> { /// char is valid UTF-8, but it will *not* advance the state.
// We can't have newlines because we don't attempt to advance the row pub fn peek_utf8_char<'a>(state: &State<'a>) -> Result<(char, usize), FailReason> {
// in the state, only the column. if !state.bytes.is_empty() {
debug_assert!(!keyword.contains('\n')); match char::from_utf8_slice_start(state.bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A single UTF-8-encoded char, with an offset. This will both parse *and*
/// validate that the char is valid UTF-8, but it will *not* advance the state.
pub fn peek_utf8_char_at<'a>(
state: &State<'a>,
offset: usize,
) -> Result<(char, usize), FailReason> {
if state.bytes.len() > offset {
let bytes = &state.bytes[offset..];
match char::from_utf8_slice_start(bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A hardcoded string with no newlines, consisting only of ASCII characters
pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
// Verify that this really is exclusively ASCII characters.
// The `unsafe` block in this function relies upon this assumption!
//
// Also, this can't have newlines because we don't attempt to advance
// the row in the state, only the column.
debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n'));
move |_arena, state: State<'a>| { move |_arena, state: State<'a>| {
let input = state.input;
let len = keyword.len(); let len = keyword.len();
// TODO do this comparison in one SIMD instruction (on supported systems) // TODO do this comparison in one SIMD instruction (on supported systems)
match input.get(0..len) { match state.bytes.get(0..len) {
Some(next_str) if next_str == keyword => { Some(next_str) => {
Ok(((), state.advance_without_indenting(len)?)) if next_str == keyword.as_bytes() {
Ok(((), state.advance_without_indenting(len)?))
} else {
Err(unexpected(len, state, Attempting::Keyword))
}
} }
_ => Err(unexpected_eof(0, Attempting::Keyword, state)), _ => Err(unexpected_eof(0, Attempting::Keyword, state)),
} }
@ -686,7 +764,7 @@ macro_rules! collection {
// We could change the AST to add extra storage specifically to // We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this // support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance. // does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')), zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!( skip_second!(
$crate::parser::sep_by0( $crate::parser::sep_by0(
$delimiter, $delimiter,
@ -912,6 +990,7 @@ macro_rules! record_field {
use $crate::ast::AssignedField::*; use $crate::ast::AssignedField::*;
use $crate::blankspace::{space0, space0_before}; use $crate::blankspace::{space0, space0_before};
use $crate::ident::lowercase_ident; use $crate::ident::lowercase_ident;
use $crate::parser::ascii_char;
use $crate::parser::Either::*; use $crate::parser::Either::*;
// You must have a field name, e.g. "email" // You must have a field name, e.g. "email"
@ -922,8 +1001,8 @@ macro_rules! record_field {
// Having a value is optional; both `{ email }` and `{ email: blah }` work. // Having a value is optional; both `{ email }` and `{ email: blah }` work.
// (This is true in both literals and types.) // (This is true in both literals and types.)
let (opt_loc_val, state) = $crate::parser::optional(either!( let (opt_loc_val, state) = $crate::parser::optional(either!(
skip_first!(char(':'), space0_before($val_parser, $min_indent)), skip_first!(ascii_char(':'), space0_before($val_parser, $min_indent)),
skip_first!(char('?'), space0_before($val_parser, $min_indent)) skip_first!(ascii_char('?'), space0_before($val_parser, $min_indent))
)) ))
.parse(arena, state)?; .parse(arena, state)?;
@ -952,10 +1031,10 @@ macro_rules! record_field {
macro_rules! record_without_update { macro_rules! record_without_update {
($val_parser:expr, $min_indent:expr) => { ($val_parser:expr, $min_indent:expr) => {
collection!( collection!(
char('{'), ascii_char('{'),
loc!(record_field!($val_parser, $min_indent)), loc!(record_field!($val_parser, $min_indent)),
char(','), ascii_char(','),
char('}'), ascii_char('}'),
$min_indent $min_indent
) )
}; };
@ -965,7 +1044,7 @@ macro_rules! record_without_update {
macro_rules! record { macro_rules! record {
($val_parser:expr, $min_indent:expr) => { ($val_parser:expr, $min_indent:expr) => {
skip_first!( skip_first!(
$crate::parser::char('{'), $crate::parser::ascii_char('{'),
and!( and!(
// You can optionally have an identifier followed by an '&' to // You can optionally have an identifier followed by an '&' to
// make this a record update, e.g. { Foo.user & username: "blah" }. // make this a record update, e.g. { Foo.user & username: "blah" }.
@ -981,7 +1060,7 @@ macro_rules! record {
)), )),
$min_indent $min_indent
), ),
$crate::parser::char('&') $crate::parser::ascii_char('&')
)), )),
loc!(skip_first!( loc!(skip_first!(
// We specifically allow space characters inside here, so that // We specifically allow space characters inside here, so that
@ -995,16 +1074,16 @@ macro_rules! record {
// We could change the AST to add extra storage specifically to // We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this // support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance. // does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')), zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!( skip_second!(
$crate::parser::sep_by0( $crate::parser::sep_by0(
$crate::parser::char(','), $crate::parser::ascii_char(','),
$crate::blankspace::space0_around( $crate::blankspace::space0_around(
loc!(record_field!($val_parser, $min_indent)), loc!(record_field!($val_parser, $min_indent)),
$min_indent $min_indent
) )
), ),
$crate::parser::char('}') $crate::parser::ascii_char('}')
) )
)) ))
) )
@ -1067,3 +1146,10 @@ where
{ {
attempt!(attempting, parser) attempt!(attempting, parser)
} }
pub fn parse_utf8(bytes: &[u8]) -> Result<&str, FailReason> {
match from_utf8(bytes) {
Ok(string) => Ok(string),
Err(_) => Err(FailReason::BadUtf8),
}
}

View file

@ -1,8 +1,7 @@
use crate::ast::Attempting; use crate::ast::Attempting;
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use std::char;
pub enum StringLiteral<'a> { pub enum StringLiteral<'a> {
Line(&'a str), Line(&'a str),
@ -11,14 +10,15 @@ pub enum StringLiteral<'a> {
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> { pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars(); let mut bytes = state.bytes.iter();
// String literals must start with a quote. // String literals must start with a quote.
// If this doesn't, it must not be a string literal! // If this doesn't, it must not be a string literal!
match chars.next() { match bytes.next() {
Some('"') => (), Some(&byte) => {
Some(other_char) => { if byte != b'"' {
return Err(unexpected(other_char, 0, state, Attempting::StringLiteral)); return Err(unexpected(0, state, Attempting::StringLiteral));
}
} }
None => { None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state)); return Err(unexpected_eof(0, Attempting::StringLiteral, state));
@ -35,44 +35,49 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
// Since we're keeping the entire raw string, all we need to track is // Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`). // how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1; let mut parsed_chars = 1;
let mut prev_ch = '"'; let mut prev_byte = b'"';
while let Some(ch) = chars.next() { while let Some(&byte) = bytes.next() {
parsed_chars += 1; parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!) // Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' { if byte == b'"' && prev_byte != b'\\' {
let string = if parsed_chars == 2 { let (string, state) = if parsed_chars == 2 {
if let Some('"') = chars.next() { match bytes.next() {
// If the first three chars were all `"`, then this Some(byte) if *byte == b'"' => {
// literal begins with `"""` and is a block string. // If the first three chars were all `"`, then this
return parse_block_string(arena, state, &mut chars); // literal begins with `"""` and is a block string.
} else { return parse_block_string(arena, state, &mut bytes);
"" }
_ => ("", state.advance_without_indenting(2)?),
} }
} else { } else {
// Start at 1 so we omit the opening `"`. // Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`. // Subtract 1 from parsed_chars so we omit the closing `"`.
&state.input[1..(parsed_chars - 1)] let string_bytes = &state.bytes[1..(parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
Err(reason) => {
return state.fail(reason);
}
}
}; };
let next_state = state.advance_without_indenting(parsed_chars)?; return Ok((StringLiteral::Line(string), state));
} else if byte == b'\n' {
return Ok((StringLiteral::Line(string), next_state));
} else if ch == '\n' {
// This is a single-line string, which cannot have newlines! // This is a single-line string, which cannot have newlines!
// Treat this as an unclosed string literal, and consume // Treat this as an unclosed string literal, and consume
// all remaining chars. This will mask all other errors, but // all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant // it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared. // error starting from where the open quote appeared.
return Err(unexpected( return Err(unexpected(
'\n', state.bytes.len() - 1,
state.input.len() - 1,
state, state,
Attempting::StringLiteral, Attempting::StringLiteral,
)); ));
} else { } else {
prev_ch = ch; prev_byte = byte;
} }
} }
@ -88,48 +93,64 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
fn parse_block_string<'a, I>( fn parse_block_string<'a, I>(
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, state: State<'a>,
chars: &mut I, bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>> ) -> ParseResult<'a, StringLiteral<'a>>
where where
I: Iterator<Item = char>, I: Iterator<Item = &'a u8>,
{ {
// So far we have consumed the `"""` and that's it. // So far we have consumed the `"""` and that's it.
let mut parsed_chars = 3; let mut parsed_chars = 3;
let mut prev_ch = '"'; let mut prev_byte = b'"';
let mut quotes_seen = 0; let mut quotes_seen = 0;
// start at 3 to omit the opening `"`. // start at 3 to omit the opening `"`.
let mut line_start = 3; let mut line_start = 3;
let mut lines = Vec::new_in(arena); let mut lines: Vec<'a, &'a str> = Vec::new_in(arena);
for ch in chars { for byte in bytes {
parsed_chars += 1; parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!) // Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' { if *byte == b'"' && prev_byte != b'\\' {
if quotes_seen == 2 { if quotes_seen == 2 {
// three consecutive qoutes, end string // three consecutive qoutes, end string
// Subtract 3 from parsed_chars so we omit the closing `"`. // Subtract 3 from parsed_chars so we omit the closing `"`.
let string = &state.input[line_start..(parsed_chars - 3)]; let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
lines.push(string);
let next_state = state.advance_without_indenting(parsed_chars)?; return match parse_utf8(line_bytes) {
Ok(line) => {
let state = state.advance_without_indenting(parsed_chars)?;
return Ok((StringLiteral::Block(arena.alloc(lines)), next_state)); lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state))
}
Err(reason) => state.fail(reason),
};
} }
quotes_seen += 1; quotes_seen += 1;
} else if ch == '\n' { } else if *byte == b'\n' {
// note this includes the newline // note this includes the newline
let string = &state.input[line_start..parsed_chars]; let line_bytes = &state.bytes[line_start..parsed_chars];
lines.push(string);
quotes_seen = 0; match parse_utf8(line_bytes) {
line_start = parsed_chars; Ok(line) => {
lines.push(line);
quotes_seen = 0;
line_start = parsed_chars;
}
Err(reason) => {
return state.fail(reason);
}
}
} else { } else {
quotes_seen = 0; quotes_seen = 0;
} }
prev_ch = ch;
prev_byte = *byte;
} }
// We ran out of characters before finding 3 closing quotes // We ran out of characters before finding 3 closing quotes
@ -137,6 +158,6 @@ where
parsed_chars, parsed_chars,
// TODO custom BlockStringLiteral? // TODO custom BlockStringLiteral?
Attempting::StringLiteral, Attempting::StringLiteral,
state.clone(), state,
)) ))
} }

View file

@ -4,8 +4,8 @@ use crate::expr::{global_tag, private_tag};
use crate::ident::join_module_parts; use crate::ident::join_module_parts;
use crate::keyword; use crate::keyword;
use crate::parser::{ use crate::parser::{
allocated, char, not, optional, string, unexpected, unexpected_eof, Either, ParseResult, allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, unexpected, Either,
Parser, State, ParseResult, Parser, State,
}; };
use bumpalo::collections::string::String; use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
@ -22,10 +22,10 @@ macro_rules! tag_union {
map!( map!(
and!( and!(
collection!( collection!(
char('['), ascii_char('['),
loc!(tag_type($min_indent)), loc!(tag_type($min_indent)),
char(','), ascii_char(','),
char(']'), ascii_char(']'),
$min_indent $min_indent
), ),
optional( optional(
@ -61,7 +61,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
and!( and!(
space1(min_indent), space1(min_indent),
skip_first!( skip_first!(
string(keyword::AS), ascii_string(keyword::AS),
space1_before(term(min_indent), min_indent) space1_before(term(min_indent), min_indent)
) )
) )
@ -89,7 +89,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
/// The `*` type variable, e.g. in (List *) Wildcard, /// The `*` type variable, e.g. in (List *) Wildcard,
fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> { fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
map!(loc!(char('*')), |loc_val: Located<()>| { map!(loc!(ascii_char('*')), |loc_val: Located<()>| {
loc_val.map(|_| TypeAnnotation::Wildcard) loc_val.map(|_| TypeAnnotation::Wildcard)
}) })
} }
@ -97,7 +97,7 @@ fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> { pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
skip_first!( skip_first!(
// Once we hit an "as", stop parsing args // Once we hit an "as", stop parsing args
not(string(keyword::AS)), not(ascii_string(keyword::AS)),
one_of!( one_of!(
loc_wildcard(), loc_wildcard(),
loc_parenthetical_type(min_indent), loc_parenthetical_type(min_indent),
@ -112,12 +112,12 @@ pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnot
#[inline(always)] #[inline(always)]
fn loc_parenthetical_type<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> { fn loc_parenthetical_type<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
between!( between!(
char('('), ascii_char('('),
space0_around( space0_around(
move |arena, state| expression(min_indent).parse(arena, state), move |arena, state| expression(min_indent).parse(arena, state),
min_indent, min_indent,
), ),
char(')') ascii_char(')')
) )
} }
@ -208,7 +208,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
move |arena, state: State<'a>| { move |arena, state: State<'a>| {
let (first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?; let (first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?;
let (rest, state) = zero_or_more!(skip_first!( let (rest, state) = zero_or_more!(skip_first!(
char(','), ascii_char(','),
space0_around(term(min_indent), min_indent) space0_around(term(min_indent), min_indent)
)) ))
.parse(arena, state)?; .parse(arena, state)?;
@ -216,7 +216,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
// TODO this space0 is dropped, so newlines just before the function arrow when there // TODO this space0 is dropped, so newlines just before the function arrow when there
// is only one argument are not seen by the formatter. Can we do better? // is only one argument are not seen by the formatter. Can we do better?
let (is_function, state) = let (is_function, state) =
optional(skip_first!(space0(min_indent), string("->"))).parse(arena, state)?; optional(skip_first!(space0(min_indent), ascii_string("->"))).parse(arena, state)?;
if is_function.is_some() { if is_function.is_some() {
let (return_type, state) = let (return_type, state) =
@ -263,67 +263,70 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
fn parse_concrete_type<'a>( fn parse_concrete_type<'a>(
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> { ) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut parts: Vec<&'a str> = Vec::new_in(arena); let mut parts: Vec<&'a str> = Vec::new_in(arena);
// Qualified types must start with a capitalized letter. // Qualified types must start with a capitalized letter.
match chars.next() { match peek_utf8_char(&state) {
Some(ch) => { Ok((first_letter, bytes_parsed)) => {
if ch.is_alphabetic() && ch.is_uppercase() { if first_letter.is_alphabetic() && first_letter.is_uppercase() {
part_buf.push(ch); part_buf.push(first_letter);
} else { } else {
return Err(unexpected(ch, 0, state, Attempting::ConcreteType)); return Err(unexpected(0, state, Attempting::ConcreteType));
} }
}
None => {
return Err(unexpected_eof(0, Attempting::ConcreteType, state));
}
};
let mut chars_parsed = 1; state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
let mut next_char = None; let mut next_char = None;
while let Some(ch) = chars.next() { while !state.bytes.is_empty() {
// After the first character, only these are allowed: match peek_utf8_char(&state) {
// Ok((ch, bytes_parsed)) => {
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers // After the first character, only these are allowed:
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() //
// * A dot ('.') // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
if ch.is_alphabetic() { // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
if part_buf.is_empty() && !ch.is_uppercase() { // * A dot ('.')
// Each part must begin with a capital letter. if ch.is_alphabetic() {
return malformed(Some(ch), arena, state, &mut chars, parts); if part_buf.is_empty() && !ch.is_uppercase() {
// Each part must begin with a capital letter.
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch == '.' {
// Having two consecutive dots is an error.
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, parts);
}
parts.push(part_buf.into_bump_str());
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the type. We're done!
next_char = Some(ch);
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
} }
Err(reason) => return state.fail(reason),
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
}
part_buf.push(ch);
} else if ch == '.' {
// Having two consecutive dots is an error.
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
}
parts.push(part_buf.into_bump_str());
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the type. We're done!
next_char = Some(ch);
break;
} }
chars_parsed += 1;
} }
if part_buf.is_empty() { if part_buf.is_empty() {
@ -333,23 +336,16 @@ fn parse_concrete_type<'a>(
// //
// If we made it this far and don't have a next_char, then necessarily // If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously. // we have consumed a '.' char previously.
return malformed( return malformed(next_char.or_else(|| Some('.')), arena, state, parts);
next_char.or_else(|| Some('.')),
arena,
state,
&mut chars,
parts,
);
} }
if part_buf.is_empty() { if part_buf.is_empty() {
// We had neither capitalized nor noncapitalized parts, // We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was // yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos. // a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier)); return Err(unexpected(1, state, Attempting::Identifier));
} }
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::Apply( let answer = TypeAnnotation::Apply(
join_module_parts(arena, parts.into_bump_slice()), join_module_parts(arena, parts.into_bump_slice()),
part_buf.into_bump_str(), part_buf.into_bump_str(),
@ -361,58 +357,55 @@ fn parse_concrete_type<'a>(
fn parse_type_variable<'a>( fn parse_type_variable<'a>(
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> { ) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut buf = String::new_in(arena); let mut buf = String::new_in(arena);
// Type variables must start with a lowercase letter. match peek_utf8_char(&state) {
match chars.next() { Ok((first_letter, bytes_parsed)) => {
Some(ch) => { // Type variables must start with a lowercase letter.
if ch.is_alphabetic() && ch.is_lowercase() { if first_letter.is_alphabetic() && first_letter.is_lowercase() {
buf.push(ch); buf.push(first_letter);
} else { } else {
return Err(unexpected(ch, 0, state, Attempting::TypeVariable)); return Err(unexpected(0, state, Attempting::TypeVariable));
} }
}
None => {
return Err(unexpected_eof(0, Attempting::TypeVariable, state));
}
};
let mut chars_parsed = 1; state = state.advance_without_indenting(bytes_parsed)?;
}
for ch in chars { Err(reason) => return state.fail(reason),
// After the first character, only these are allowed: }
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers while !state.bytes.is_empty() {
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() match peek_utf8_char(&state) {
if ch.is_alphabetic() || ch.is_ascii_digit() { Ok((ch, bytes_parsed)) => {
buf.push(ch); // After the first character, only these are allowed:
} else { //
// This must be the end of the type. We're done! // * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
break; // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else {
// This must be the end of the type. We're done!
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
} }
chars_parsed += 1;
} }
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::BoundVariable(buf.into_bump_str()); let answer = TypeAnnotation::BoundVariable(buf.into_bump_str());
Ok((answer, state)) Ok((answer, state))
} }
fn malformed<'a, I>( fn malformed<'a>(
opt_bad_char: Option<char>, opt_bad_char: Option<char>,
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, mut state: State<'a>,
chars: &mut I,
parts: Vec<&'a str>, parts: Vec<&'a str>,
) -> ParseResult<'a, TypeAnnotation<'a>> ) -> ParseResult<'a, TypeAnnotation<'a>> {
where
I: Iterator<Item = char>,
{
// Reconstruct the original string that we've been parsing. // Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena); let mut full_string = String::new_in(arena);
@ -423,20 +416,25 @@ where
} }
// Consume the remaining chars in the identifier. // Consume the remaining chars in the identifier.
for ch in chars { while !state.bytes.is_empty() {
// We can't use ch.is_alphanumeric() here because that passes for match peek_utf8_char(&state) {
// things that are "numeric" but not ASCII digits, like `¾` Ok((ch, bytes_parsed)) => {
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { // We can't use ch.is_alphanumeric() here because that passes for
full_string.push(ch); // things that are "numeric" but not ASCII digits, like `¾`
} else { if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
break; full_string.push(ch);
} else {
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
} }
} }
let chars_parsed = full_string.len();
Ok(( Ok((
TypeAnnotation::Malformed(full_string.into_bump_str()), TypeAnnotation::Malformed(full_string.into_bump_str()),
state.advance_without_indenting(chars_parsed)?, state,
)) ))
} }

View file

@ -13,7 +13,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -918,17 +918,12 @@ mod test_parse {
let arena = Bump::new(); let arena = Bump::new();
let arg = arena.alloc(Located::new(0, 0, 5, 6, Num("1"))); let arg = arena.alloc(Located::new(0, 0, 5, 6, Num("1")));
let args = bumpalo::vec![in &arena; &*arg]; let args = bumpalo::vec![in &arena; &*arg];
let expr = Var {
module_name: "",
ident: "whee",
};
let expected = Expr::Apply( let expected = Expr::Apply(
arena.alloc(Located::new( arena.alloc(Located::new(0, 0, 0, 4, expr)),
0,
0,
0,
4,
Var {
module_name: "",
ident: "whee",
},
)),
args, args,
CalledVia::Space, CalledVia::Space,
); );
@ -1040,16 +1035,11 @@ mod test_parse {
fn unary_negation() { fn unary_negation() {
let arena = Bump::new(); let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Negate); let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Negate);
let loc_arg1_expr = Located::new( let arg1_expr = Var {
0, module_name: "",
0, ident: "foo",
1, };
4, let loc_arg1_expr = Located::new(0, 0, 1, 4, arg1_expr);
Var {
module_name: "",
ident: "foo",
},
);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op); let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "-foo"); let actual = parse_with(&arena, "-foo");
@ -1060,16 +1050,11 @@ mod test_parse {
fn unary_not() { fn unary_not() {
let arena = Bump::new(); let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Not); let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Not);
let loc_arg1_expr = Located::new( let arg1_expr = Var {
0, module_name: "",
0, ident: "blah",
1, };
5, let loc_arg1_expr = Located::new(0, 0, 1, 5, arg1_expr);
Var {
module_name: "",
ident: "blah",
},
);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op); let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "!blah"); let actual = parse_with(&arena, "!blah");
@ -2092,7 +2077,7 @@ mod test_parse {
"# "#
); );
let actual = interface_header() let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module)) .parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0); .map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual); assert_eq!(Ok(expected), actual);
@ -2121,7 +2106,7 @@ mod test_parse {
"# "#
); );
let actual = interface_header() let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module)) .parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0); .map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual); assert_eq!(Ok(expected), actual);
@ -2174,7 +2159,7 @@ mod test_parse {
"# "#
); );
let actual = module_defs() let actual = module_defs()
.parse(&arena, State::new(&src, Attempting::Module)) .parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0); .map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual); assert_eq!(Ok(expected), actual);

View file

@ -91,7 +91,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -86,9 +86,6 @@ mod test_reporting {
let mut procs = Procs::default(); let mut procs = Procs::default();
let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap(); let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap();
// assume 64-bit pointers
let pointer_size = std::mem::size_of::<u64>() as u32;
// Populate Procs and Subs, and get the low-level Expr from the canonical Expr // Populate Procs and Subs, and get the low-level Expr from the canonical Expr
let mut mono_env = roc_mono::ir::Env { let mut mono_env = roc_mono::ir::Env {
arena: &arena, arena: &arena,
@ -96,7 +93,6 @@ mod test_reporting {
problems: &mut mono_problems, problems: &mut mono_problems,
home, home,
ident_ids: &mut ident_ids, ident_ids: &mut ident_ids,
pointer_size,
jump_counter: arena.alloc(0), jump_counter: arena.alloc(0),
}; };
let _mono_expr = Stmt::new(&mut mono_env, loc_expr.value, &mut procs); let _mono_expr = Stmt::new(&mut mono_env, loc_expr.value, &mut procs);
@ -3771,4 +3767,48 @@ mod test_reporting {
), ),
) )
} }
#[test]
fn first_wildcard_is_required() {
report_problem_as(
indoc!(
r#"
when Foo 1 2 3 is
Foo _ 1 _ -> 1
_ -> 2
"#
),
"",
)
}
#[test]
fn second_wildcard_is_redundant() {
report_problem_as(
indoc!(
r#"
when Foo 1 2 3 is
Foo _ 1 _ -> 1
_ -> 2
_ -> 3
"#
),
indoc!(
r#"
-- REDUNDANT PATTERN -----------------------------------------------------------
The 3rd pattern is redundant:
1 when Foo 1 2 3 is
2 Foo _ 1 _ -> 1
3 _ -> 2
4 _ -> 3
^
Any value of this shape will be handled by a previous pattern, so this
one should be removed.
"#
),
)
}
} }

View file

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -2362,7 +2362,7 @@ mod solve_expr {
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
reconstructPath reconstructPath
"# "#
@ -2534,7 +2534,7 @@ mod solve_expr {
x = [] x = []
when List.get input 0 is when List.get input 0 is
Ok val -> List.push x val Ok val -> List.append x val
Err _ -> f input Err _ -> f input
f f
"# "#

View file

@ -2275,9 +2275,9 @@ mod solve_uniq_expr {
} }
#[test] #[test]
fn list_push() { fn list_append() {
infer_eq( infer_eq(
"List.push", "List.append",
"Attr * (Attr * (List a), a -> Attr * (List a))", "Attr * (Attr * (List a), a -> Attr * (List a))",
); );
} }
@ -2303,7 +2303,7 @@ mod solve_uniq_expr {
infer_eq( infer_eq(
indoc!( indoc!(
r#" r#"
singleton = \x -> List.push [] x singleton = \x -> List.append [] x
singleton singleton
"# "#
@ -2317,7 +2317,7 @@ mod solve_uniq_expr {
infer_eq( infer_eq(
indoc!( indoc!(
r#" r#"
reverse = \list -> List.foldr list (\e, l -> List.push l e) [] reverse = \list -> List.foldr list (\e, l -> List.append l e) []
reverse reverse
"# "#
@ -2742,7 +2742,7 @@ mod solve_uniq_expr {
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
reconstructPath reconstructPath
"# "#
@ -2812,7 +2812,7 @@ mod solve_uniq_expr {
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model -> updateCost = \current, neighbour, model ->
@ -2897,7 +2897,7 @@ mod solve_uniq_expr {
[] []
Ok next -> Ok next ->
List.push (reconstructPath cameFrom next) goal List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position updateCost : position, position, Model position -> Model position

View file

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)] #[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> { pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);

View file

@ -718,6 +718,64 @@ However, it cannot involve record field access. So this would *not* compile:
{ Foo.defaults.config & timeZone: utc } { Foo.defaults.config & timeZone: utc }
``` ```
## Optional Record Fields
There's a pattern in Elm where you pass a function a record of configuration
values, some of which you don't really care about and want to leave as defaults.
To incorporate the default config options, you call the function like so:
```elm
table { defaultConfig | height = 800, width = 600 }
```
This way, as the caller I'm specifying only the `height` and `width` fields,
and leaving the others to whatever is inside `defaultConfig`. Perhaps it also
has the fields `x` and `y`.
In Roc, you can do this like so:
```elm
table { height = 800, width = 600 }
```
...and the `table` function will fill in its default values for `x` and `y`.
There is no need to use a `defaultConfig` record.
Here's how `table` would be defined in Roc:
```
table = \{ height, width, x ? 0.0, y ? 0.0 } ->
```
This is using *optional field destructuring* to destructure a record while
also providing default values for any fields that might be missing.
Here's the type of `table`:
```
table : { height : Float, width : Float, x ? Float, y ? Float } -> Table
table = \{ height, width, x ? 0.0, y ? 0.0 } ->
```
This says that `table` takes a record with two *required* fields (`height` and
`width` and two *optional* fields (`x` and `y`). It also says that all of those
fields have the type `Float` This means you can choose to omit `x`, `y`, or both,
when calling the function...but if you provide them, they must be numbers.
This is also the type that would have been inferred for `table` if no annotation
had been written. Roc's compiler can tell from the destructuring syntax
`x ? 0.0` that `x` is an optional field, and that it has the type `Float`. These
default values can reference other expressions in the record destructure; if you
wanted, you could write `{ height, width, x ? 0.0, y ? x + 1 }`.
Destructuring is the only way to implement a record with optional fields.
(For example, if you write the expression `config.x` and `x` is an optional field,
you'll get a compile error.)
This means it's never possible to end up with an "optional value" that exists
outside a record field. Optionality is a concept that exists only in record fields,
and it's intended for the use case of config records like this. The ergonomics
of destructuring mean this wouldn't be a good fit for data modeling.
## Standard Data Structures ## Standard Data Structures
Elm has `List`, `Array`, `Set`, and `Dict` in the standard library. Elm has `List`, `Array`, `Set`, and `Dict` in the standard library.