Merge remote-tracking branch 'origin/trunk' into refcount

This commit is contained in:
Folkert 2020-08-08 22:34:14 +02:00
commit 078c6df677
53 changed files with 3604 additions and 2377 deletions

31
Cargo.lock generated
View file

@ -69,6 +69,13 @@ version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034"
[[package]]
name = "arena-pool"
version = "0.1.0"
dependencies = [
"pretty_assertions",
]
[[package]]
name = "arrayvec"
version = "0.5.1"
@ -386,6 +393,20 @@ dependencies = [
"itertools",
]
[[package]]
name = "crossbeam"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e"
dependencies = [
"cfg-if",
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.4.3"
@ -527,6 +548,12 @@ version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "env_logger"
version = "0.6.2"
@ -2167,9 +2194,11 @@ name = "roc_load"
version = "0.1.0"
dependencies = [
"bumpalo",
"crossbeam",
"indoc",
"inlinable_string",
"maplit",
"num_cpus",
"pretty_assertions",
"quickcheck",
"quickcheck_macros",
@ -2184,7 +2213,6 @@ dependencies = [
"roc_solve",
"roc_types",
"roc_unify",
"tokio",
]
[[package]]
@ -2230,6 +2258,7 @@ name = "roc_parse"
version = "0.1.0"
dependencies = [
"bumpalo",
"encode_unicode",
"indoc",
"inlinable_string",
"pretty_assertions",

View file

@ -20,6 +20,7 @@ members = [
"compiler/load",
"compiler/gen",
"compiler/build",
"compiler/arena_pool",
"vendor/ena",
"vendor/pathfinding",
"vendor/pretty",

View file

@ -2,19 +2,17 @@
extern crate clap;
use bumpalo::Bump;
use clap::{App, Arg, ArgMatches};
use roc_build::program::gen;
use roc_collections::all::MutMap;
use roc_gen::llvm::build::OptLevel;
use roc_load::file::LoadingProblem;
use std::time::SystemTime;
use clap::{App, Arg, ArgMatches};
use std::io::{self, ErrorKind};
use std::path::{Path, PathBuf};
use std::process;
use std::process::Command;
use std::time::{Duration, SystemTime};
use target_lexicon::Triple;
use tokio::process::Command;
use tokio::runtime::Builder;
pub mod repl;
@ -105,14 +103,6 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> {
let path = Path::new(filename).canonicalize().unwrap();
let src_dir = path.parent().unwrap().canonicalize().unwrap();
// Create the runtime
let mut rt = Builder::new()
.thread_name("roc")
.threaded_scheduler()
.enable_io()
.build()
.expect("Error spawning initial compiler thread."); // TODO make this error nicer.
// Spawn the root task
let path = path.canonicalize().unwrap_or_else(|err| {
use ErrorKind::*;
@ -131,28 +121,31 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> {
}
}
});
let binary_path = rt
.block_on(build_file(src_dir, path, opt_level))
.expect("TODO gracefully handle block_on failing");
let binary_path =
build_file(src_dir, path, opt_level).expect("TODO gracefully handle build_file failing");
if run_after_build {
// Run the compiled app
rt.block_on(async {
Command::new(binary_path)
.spawn()
.unwrap_or_else(|err| panic!("Failed to run app after building it: {:?}", err))
.await
.map_err(|_| {
todo!("gracefully handle error after `app` spawned");
})
})
.wait()
.expect("TODO gracefully handle block_on failing");
}
Ok(())
}
async fn build_file(
fn report_timing(buf: &mut String, label: &str, duration: Duration) {
buf.push_str(&format!(
" {:.3} ms {}\n",
duration.as_secs_f64() * 1000.0,
label,
));
}
fn build_file(
src_dir: PathBuf,
filename: PathBuf,
opt_level: OptLevel,
@ -168,9 +161,35 @@ async fn build_file(
OptLevel::Normal => roc_builtins::std::standard_stdlib(),
OptLevel::Optimize => roc_builtins::unique::uniq_stdlib(),
};
let loaded = roc_load::file::load(&stdlib, src_dir, filename.clone(), subs_by_module).await?;
let loaded =
roc_load::file::load(filename.clone(), &stdlib, src_dir.as_path(), subs_by_module)?;
let dest_filename = filename.with_extension("o");
let buf = &mut String::with_capacity(1024);
for (module_id, module_timing) in loaded.timings.iter() {
let module_name = loaded.interns.module_name(*module_id);
buf.push_str(" ");
buf.push_str(module_name);
buf.push_str("\n");
report_timing(buf, "Read .roc file from disk", module_timing.read_roc_file);
report_timing(buf, "Parse header", module_timing.parse_header);
report_timing(buf, "Parse body", module_timing.parse_body);
report_timing(buf, "Canonicalize", module_timing.canonicalize);
report_timing(buf, "Constrain", module_timing.constrain);
report_timing(buf, "Solve", module_timing.solve);
report_timing(buf, "Other", module_timing.other());
buf.push('\n');
report_timing(buf, "Total", module_timing.total());
}
println!(
"\n\nCompilation finished! Here's how long each module took to compile:\n\n{}",
buf
);
gen(
&arena,
loaded,
@ -201,7 +220,7 @@ async fn build_file(
.map_err(|_| {
todo!("gracefully handle `ar` failing to spawn.");
})?
.await
.wait()
.map_err(|_| {
todo!("gracefully handle error after `ar` spawned");
})?;
@ -224,7 +243,7 @@ async fn build_file(
.map_err(|_| {
todo!("gracefully handle `rustc` failing to spawn.");
})?
.await
.wait()
.map_err(|_| {
todo!("gracefully handle error after `rustc` spawned");
})?;

View file

@ -33,6 +33,7 @@ use roc_types::types::Type;
use std::hash::Hash;
use std::io::{self, Write};
use std::path::PathBuf;
use std::str::from_utf8_unchecked;
use target_lexicon::Triple;
pub fn main() -> io::Result<()> {
@ -145,7 +146,7 @@ fn report_parse_error(fail: Fail) {
}
fn print_output(src: &str) -> Result<String, Fail> {
gen(src, Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
gen(src.as_bytes(), Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
format!("\n{} \u{001b}[35m:\u{001b}[0m {}", answer, answer_type)
})
}
@ -154,7 +155,7 @@ pub fn repl_home() -> ModuleId {
ModuleIds::default().get_or_insert(&"REPL".into())
}
pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE};
// Look up the types and expressions of the `provided` values
@ -169,13 +170,16 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
interns,
problems: can_problems,
..
} = can_expr(src)?;
} = can_expr(src)?; // IMPORTANT: we must bail out here if there were UTF-8 errors!
let subs = Subs::new(var_store.into());
let mut type_problems = Vec::new();
let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var);
// SAFETY: we've already verified that this is valid UTF-8 during parsing.
let src_lines: Vec<&str> = unsafe { from_utf8_unchecked(src).split('\n').collect() };
// Report problems
let src_lines: Vec<&str> = src.split('\n').collect();
let palette = DEFAULT_PALETTE;
// Report parsing and canonicalization problems
@ -219,7 +223,7 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
let expr_type_str = content_to_string(content.clone(), &subs, home, &interns);
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| {
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!(
"Code gen error in test: could not convert to layout. Err was {:?}",
err
@ -255,7 +259,6 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0),
};
@ -391,8 +394,11 @@ pub fn infer_expr(
(content, solved.into_inner())
}
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
pub fn parse_loc_with<'a>(
arena: &'a Bump,
bytes: &'a [u8],
) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&bytes, Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);
@ -401,14 +407,14 @@ pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast
.map_err(|(fail, _)| fail)
}
pub fn can_expr(expr_str: &str) -> Result<CanExprOut, Fail> {
can_expr_with(&Bump::new(), repl_home(), expr_str)
pub fn can_expr(expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
can_expr_with(&Bump::new(), repl_home(), expr_bytes)
}
// TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)]
pub fn uniq_expr(
expr_str: &str,
expr_bytes: &[u8],
) -> Result<
(
Located<roc_can::expr::Expr>,
@ -424,14 +430,14 @@ pub fn uniq_expr(
> {
let declared_idents: &ImMap<Ident, (Symbol, Region)> = &ImMap::default();
uniq_expr_with(&Bump::new(), expr_str, declared_idents)
uniq_expr_with(&Bump::new(), expr_bytes, declared_idents)
}
// TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)]
pub fn uniq_expr_with(
arena: &Bump,
expr_str: &str,
expr_bytes: &[u8],
declared_idents: &ImMap<Ident, (Symbol, Region)>,
) -> Result<
(
@ -455,7 +461,7 @@ pub fn uniq_expr_with(
var,
interns,
..
} = can_expr_with(arena, home, expr_str)?;
} = can_expr_with(arena, home, expr_bytes)?;
// double check
let mut var_store = VarStore::new(old_var_store.fresh());
@ -510,8 +516,8 @@ pub struct CanExprOut {
pub constraint: Constraint,
}
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_str: &str) -> Result<CanExprOut, Fail> {
let loc_expr = parse_loc_with(&arena, expr_str)?;
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
let loc_expr = parse_loc_with(&arena, expr_bytes)?;
let mut var_store = VarStore::default();
let var = var_store.fresh();
let expected = Expected::NoExpectation(Type::Variable(var));

View file

@ -0,0 +1,11 @@
[package]
name = "arena-pool"
version = "0.1.0"
authors = ["Richard Feldman <oss@rtfeldman.com>"]
repository = "https://github.com/rtfeldman/roc"
edition = "2018"
description = "A CLI for Roc"
license = "Apache-2.0"
[dev-dependencies]
pretty_assertions = "0.5.1"

View file

@ -0,0 +1 @@
pub mod pool;

View file

@ -0,0 +1,396 @@
use std::marker::PhantomPinned;
use std::ptr::{copy_nonoverlapping, NonNull};
pub struct ArenaRef<T> {
ptr: NonNull<T>,
_pin: PhantomPinned,
}
impl<T> ArenaRef<T> {
pub fn get<'a, A: AsArena<T>>(&'a self, arena: &A) -> &'a T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_ref() }
}
pub fn get_mut<'a, A: AsArena<T>>(&'a mut self, arena: &A) -> &'a mut T {
arena.verify_ownership(self.ptr);
// SAFETY: we know this pointer is safe to follow because it will only
// get deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
unsafe { self.ptr.as_mut() }
}
}
/// Like a Vec, except the capacity you give it initially is its maximum
/// capacity forever. If you ever exceed it, it'll panic!
pub struct ArenaVec<T> {
buffer_ptr: NonNull<T>,
len: usize,
capacity: usize,
_pin: PhantomPinned,
}
impl<T> ArenaVec<T> {
pub fn new_in(arena: &mut Arena<T>) -> Self {
// We can't start with a NonNull::dangling pointer because when we go
// to push elements into this, they'll try to verify the dangling
// pointer resides in the arena it was given, which will likely panic.
//
// Instead, we'll take a pointer inside the array but never use it
// other than for verification, because our capacity is 0.
Self::with_capacity_in(0, arena)
}
pub fn with_capacity_in(capacity: usize, arena: &mut Arena<T>) -> Self {
let ptr = arena.alloc_vec(capacity);
Self {
buffer_ptr: unsafe { NonNull::new_unchecked(ptr) },
capacity,
len: 0,
_pin: PhantomPinned,
}
}
pub fn push<'a>(&'a mut self, val: T, arena: &mut Arena<T>) {
// Verify that this is the arena where we originally got our buffer,
// and is therefore safe to read and to write to. (If we have sufficient
// capacity, we'll write to it, and otherwise we'll read from it when
// copying our buffer over to the new reserved block.)
arena.verify_ownership(self.buffer_ptr);
if self.len <= self.capacity {
// We're all set!
//
// This empty branch is just here for branch prediction,
// since this should be the most common case in practice.
} else {
// Double our capacity and reserve a new block.
self.capacity *= 2;
let ptr = arena.alloc_vec(self.capacity);
// SAFETY: the existing buffer must have at least self.len elements,
// as must the new one, so copying that many between them is safe.
unsafe {
// Copy all elements from the current buffer into the new one
copy_nonoverlapping(self.buffer_ptr.as_ptr(), ptr, self.len);
}
self.buffer_ptr = unsafe { NonNull::new_unchecked(ptr) };
}
// Store the element in the appropriate memory address.
let elem_ptr = unsafe { &mut *self.buffer_ptr.as_ptr().add(self.len) };
*elem_ptr = val;
self.len += 1;
}
pub fn get<'a>(&'a self, index: usize, arena: &Arena<T>) -> Option<&'a T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &*self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
pub fn get_mut<'a>(&'a mut self, index: usize, arena: &Arena<T>) -> Option<&'a mut T> {
arena.verify_ownership(self.buffer_ptr);
if index < self.len {
// SAFETY: we know this pointer is safe to follow because we've
// done a bounds check, and because we know it will only get
// deallocated once the pool where it was created gets deallocated
// (along with all of the Arenas it detached), and we just verified that
// this ArenaRef's ID matches a pool which has not yet been deallocated.
Some(unsafe { &mut *self.buffer_ptr.as_ptr().add(index) })
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct ArenaPool<T> {
first_chunk: Vec<T>,
extra_chunks: Vec<Vec<T>>,
num_leased: usize,
default_chunk_capacity: usize,
}
impl<T> ArenaPool<T> {
const DEFAULT_CHUNK_SIZE: usize = 1024;
/// Be careful! Both of these arguments are of type usize.
///
/// The first is the number of elements that will be in each arena.
/// The second is the number of arenas.
///
/// This returns a new Pool, and also an iterator of Arenas. These Arenas can
/// be given to different threads, where they can be used to allocate
/// ArenaRef and ArenaVec values which can then be dereferenced by the Arena
/// that created them, or by this pool once those Arenas have been
/// reabsorbed back into it.
///
/// (A word of warning: if you try to use this pool to dereference ArenaRec
/// and ArenaVec values which were allocated by arenas that have *not* yet
/// been reabsorbed, it may work some of the time and panic other times,
/// depending on whether the arena needed to allocate extra chunks beyond
/// its initial chunk. tl;dr - doing that may panic, so don't try it!)
///
/// Before this pool gets dropped, you must call reabsorb() on every
/// arena that has been leased - otherwise, you'll get a panic when this
/// gets dropped! The memory safety of the system depends on all arenas
/// having been reabsorbed before the pool gets deallocated, which is why
/// the pool's Drop implementation enforces it.
pub fn new(num_arenas: usize, elems_per_arena: usize) -> (ArenaPool<T>, ArenaIter<T>) {
Self::with_chunk_size(num_arenas, elems_per_arena, Self::DEFAULT_CHUNK_SIZE)
}
/// Like `new`, except you can also specify the chunk size that each
/// arena will use to allocate its extra chunks if it runs out of space
/// in its main buffer.
///
/// Things will run fastest if that main buffer never runs out, though!
pub fn with_chunk_size(
num_arenas: usize,
elems_per_arena: usize,
chunk_size: usize,
) -> (ArenaPool<T>, ArenaIter<T>) {
let mut first_chunk = Vec::with_capacity(elems_per_arena * num_arenas);
let iter = ArenaIter {
ptr: first_chunk.as_mut_ptr(),
quantity_remaining: num_arenas,
first_chunk_capacity: elems_per_arena,
};
let pool = Self {
first_chunk,
extra_chunks: Vec::new(),
num_leased: num_arenas,
default_chunk_capacity: chunk_size,
};
(pool, iter)
}
/// Return an arena to the pool. (This would have been called "return" but
/// that's a reserved keyword.)
pub fn reabsorb(&mut self, arena: Arena<T>) {
// Ensure we're reabsorbing an arena that was
// actually leased by this pool in the first place!
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
arena.first_chunk_ptr,
);
// Add the arena's extra chunks to our own, so their memory remains live
// after the arena gets dropped. This is important, because at this
// point their pointers can still potentially be dereferenced!
self.extra_chunks.extend(arena.extra_chunks.into_iter());
self.num_leased -= 1;
}
}
impl<T> Drop for ArenaPool<T> {
fn drop(&mut self) {
// When an ArenaPool gets dropped, it must not have any leased
// arenas remaining. If it does, there will be outstanding IDs which
// could be used with those non-reabsorbed Arenas to read freed memory!
// This would be a use-after-free; we panic rather than permit that.
assert_eq!(self.num_leased, 0);
}
}
pub struct ArenaIter<T> {
ptr: *mut T,
quantity_remaining: usize,
first_chunk_capacity: usize,
}
// Implement `Iterator` for `Fibonacci`.
// The `Iterator` trait only requires a method to be defined for the `next` element.
impl<T> Iterator for ArenaIter<T> {
type Item = Arena<T>;
// Here, we define the sequence using `.curr` and `.next`.
// The return type is `Option<T>`:
// * When the `Iterator` is finished, `None` is returned.
// * Otherwise, the next value is wrapped in `Some` and returned.
fn next(&mut self) -> Option<Arena<T>> {
if self.quantity_remaining != 0 {
let first_chunk_ptr = self.ptr;
self.ptr = unsafe { self.ptr.add(self.first_chunk_capacity) };
self.quantity_remaining -= 1;
Some(Arena {
first_chunk_ptr,
first_chunk_len: 0,
first_chunk_cap: self.first_chunk_capacity,
extra_chunks: Vec::new(),
})
} else {
None
}
}
}
#[derive(PartialEq, Eq)]
pub struct Arena<T> {
first_chunk_ptr: *mut T,
first_chunk_len: usize,
first_chunk_cap: usize,
extra_chunks: Vec<Vec<T>>,
}
impl<T> Arena<T> {
pub fn alloc(&mut self, val: T) -> ArenaRef<T> {
let ptr: *mut T = if self.first_chunk_len < self.first_chunk_cap {
// We have enough room in the first chunk for 1 allocation.
self.first_chunk_len += 1;
// Return a pointer to the next available slot.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() >= chunk.capacity() {
// We've run out of space in our last chunk. Create a new one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks
.push(Vec::with_capacity(self.first_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
chunk.push(val);
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
};
ArenaRef {
ptr: unsafe { NonNull::new_unchecked(ptr) },
_pin: PhantomPinned,
}
}
fn alloc_vec(&mut self, num_elems: usize) -> *mut T {
if self.first_chunk_len + num_elems <= self.first_chunk_cap {
// We have enough room in the first chunk for this vec.
self.first_chunk_len += num_elems;
// Return a pointer to the next available element.
unsafe { self.first_chunk_ptr.add(self.first_chunk_len) }
} else {
let new_chunk_cap = self.first_chunk_cap.max(num_elems);
// We ran out of space in the first chunk, so we turn to extra chunks.
// First, ensure that we have an extra chunk with enough space in it.
match self.extra_chunks.last() {
Some(chunk) => {
if chunk.len() + num_elems >= chunk.capacity() {
// We don't have enough space in our last chunk.
// Create a new one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
None => {
// We've never had extra chunks until now. Create the first one!
self.extra_chunks.push(Vec::with_capacity(new_chunk_cap));
}
}
let chunk = self.extra_chunks.last_mut().unwrap();
let index = chunk.len();
// Get a pointer to a memory address within our particular chunk.
&mut chunk[index]
}
}
}
pub trait AsArena<T> {
fn verify_ownership(&self, ptr: NonNull<T>);
}
impl<T> AsArena<T> for ArenaPool<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk.as_ptr(),
self.first_chunk.capacity(),
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
impl<T> AsArena<T> for Arena<T> {
fn verify_ownership(&self, ptr: NonNull<T>) {
verify_ownership(
self.first_chunk_ptr,
self.first_chunk_cap,
&self.extra_chunks,
ptr.as_ptr(),
);
}
}
fn verify_ownership<T>(
first_chunk_ptr: *const T,
first_chunk_cap: usize,
extra_chunks: &[Vec<T>],
ptr: *const T,
) {
let addr = ptr as usize;
let start_addr = first_chunk_ptr as usize;
let end_addr = start_addr + first_chunk_cap;
if start_addr <= addr && addr < end_addr {
// This is within our first chunk's address space, so it's verified!
} else {
// This wasn't within our first chunk's address space, so we need
// to see if we can find it in one of our extra_chunks.
for chunk in extra_chunks {
let start_addr = chunk.as_ptr() as usize;
let end_addr = start_addr + chunk.capacity();
if start_addr <= addr && addr < end_addr {
// Found it! No need to loop anymore; verification passed.
return;
}
}
// The address wasn't within any of our chunks' bounds.
// Panic to avoid use-after-free errors!
panic!("Pointer ownership verification failed.");
}
}

View file

@ -0,0 +1,17 @@
// #[macro_use]
// extern crate pretty_assertions;
extern crate arena_pool;
#[cfg(test)]
mod test_arena_pool {
use arena_pool::pool::{ArenaIter, ArenaPool};
#[test]
fn empty_pool() {
// Neither of these does anything, but they
// at least shouldn't panic or anything.
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::new(0, 0);
let _: (ArenaPool<()>, ArenaIter<()>) = ArenaPool::with_chunk_size(0, 0, 0);
}
}

View file

@ -136,14 +136,14 @@ pub fn gen(
fpm.initialize();
// Compute main_fn_type before moving subs to Env
let ptr_bytes = target.pointer_width().unwrap().bytes() as u32;
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| {
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!(
"Code gen error in Program: could not convert to layout. Err was {:?}",
err
)
});
let ptr_bytes = target.pointer_width().unwrap().bytes() as u32;
let main_fn_type =
basic_type_from_layout(&arena, &context, &layout, ptr_bytes).fn_type(&[], false);
let main_fn_name = "$main";
@ -169,7 +169,6 @@ pub fn gen(
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0),
};

View file

@ -19,45 +19,24 @@ not : [True, False] -> [True, False]
##
## ## Performance Notes
##
## In dev builds, this works exactly as described. In release builds, as a
## performance optimization, the compiler translates calls to #Bool.and
## (and #Bool.or) from function calls into conditionals. If its first
## argument evalutes to #False, then any function calls in the second argument
## get skipped, and the entire expression immediately evaluates to #False.
## In some languages, `&&` and `||` are special-cased in the compiler to skip
## evaluating the expression after the operator under certain circumstances.
## For example, in some languages, `enablePets && likesDogs user` would compile
## to the equivalent of:
##
## For example:
## if enablePets then
## likesDogs user
## else
## False
##
## List.isEmpty list && Str.isEmpty str
## In Roc, however, `&&` and `||` are not special. They work the same way as
## other functions. Conditionals like `if` and `when` have a performance cost,
## and sometimes calling a function like `likesDogs user` can be faster across
## the board than doing an `if` to decide whether to skip calling it.
##
## In a release build, `Str.isEmpty` will only be called if `List.isEmpty list`
## returns #True. If `List.isEmpty list` returns #False, the entire expression
## will immediately evaluate to #False.
##
## Since all Roc expressions are pure, this will always give the same answer
## as if both #Bool arguments had been fully evaluated (as they are in
## dev builds), but it can potentially avoid costly function calls in release builds.
##
## Because this optimization only skips function calls, you can opt out of it
## by calling the function up front, and giving its result a name. For example:
##
## emptyStr = Str.isEmpty str
##
## List.isEmpty list && emptyStr
##
## Here, `Str.isEmpty` will always be called no matter what, and the `&&` will
## not get compiled to a conditional because there are no function calls
## involved in its second argument.
##
## If you know the functions involved in the second argument are trivial
## (for example, they are other #&&, #||, and #Bool.not operations), then
## this can potentially be a (likely extremely minor) performance optimization
## because a logical `AND` instruction typically executes faster than a
## [branch misprediction](https://danluu.com/branch-prediction).
##
## That said, in practice the `&& Str.isEmpty str` approach will typically run
## faster than the `&& emptyStr` approach - both for `Str.isEmpty` in particular
## as well as for most functions in general.
and : [True, False], [True, False] -> [True, False]
## (Naturally, if you expect the `if` to improve performance, you can always add
## one explicitly!)
and : Bool, Bool -> Bool
## Returns #True when given #True for either argument, and #False only when given #False and #False.
@ -74,18 +53,19 @@ and : [True, False], [True, False] -> [True, False]
##
## ## Performance Notes
##
## #Bool.or does the same "compile to a conditional in release mode" optimization
## that #Bool.and does, except it short-circuits when the first argument is
## #True (causing it to immediately returns #True).
## In some languages, `&&` and `||` are special-cased in the compiler to skip
## evaluating the expression after the operator under certain circumstances.
##
## See the performance notes for #Bool.and for details.
or : [True, False], [True, False] -> [True, False]
## In Roc, this is not the case. See the performance notes for #Bool.and for details.
or : Bool, Bool -> Bool
## Exclusive or
xor : [True, False], [True, False] -> [True, False]
xor : Bool, Bool -> Bool
## Returns #True if the two values are *structurally equal*, and #False otherwise.
##
## `a == b` is shorthand for `Bool.isEq a b`
##
## Structural equality works as follows:
##
## 1. #Int and #Float values are equal if their numbers are equal.
@ -93,18 +73,15 @@ xor : [True, False], [True, False] -> [True, False]
## 3. Global tags are equal if they are the same tag, and also their contents (if any) are equal.
## 4. Private tags are equal if they are the same tag, in the same module, and also their contents (if any) are equal.
## 5. Collections (#String, #List, #Map, #Set, and #Bytes) are equal if they are the same length, and also all their corresponding elements are equal.
## 6. All functions are considered equal. (So `Bool.not == Bool.not` will return #True, as you might expect, but also `Num.abs == Num.negate` will return #True, as you might not. This design is because function equality has been formally proven to be undecidable in the general case, and returning #True in all cases turns out to be mostly harmless - especially compared to alternative designs like crashing, making #equal inconvenient to use, and so on.)
##
## This function always crashes when given two functions, or an erroneous
## #Float value (see #Float.isErroneous)
##
## This is the same as the #== operator.
isEq : val, val -> [True, False]
## Note that `isEq` takes `'val` instead of `val`, which means `isEq` does not
## accept arguments whose types contain functions.
isEq : 'val, 'val -> Bool
## Calls #eq on the given values, then calls #not on the result.
##
## This is the same as the #!= operator.
isNe : val, val -> [True, False]
isNe = \left, right ->
not (equal left right)
## `a != b` is shorthand for `Bool.isNotEq a b`
##
## Note that `isNotEq` takes `'val` instead of `val`, which means `isNotEq` does not
## accept arguments whose types contain functions.
isNotEq : 'val, 'val -> Bool

View file

@ -2,17 +2,27 @@ interface Set
exposes [ Set, map, isEmpty ]
imports []
## An empty set.
empty : Set *
## Check
# isEmpty : List * -> Bool
isEmpty : Set * -> Bool
## Convert each element in the list to something new, by calling a conversion
## function on each of them. Then return a new list of the converted values.
len : Set * -> Len
add : Set 'elem, 'elem -> Set 'elem
rem : Set 'elem, 'elem -> Set 'elem
## Convert each element in the set to something new, by calling a conversion
## function on each of them. Then return a new set of the converted values.
##
## >>> Set.map {[ -1, 1, 3 ]} Num.negate
## >>> Set.map {: -1, 1, 3 :} Num.negate
##
## >>> Set.map {[ "", "a", "bc" ]} Str.isEmpty
## >>> Set.map {: "", "a", "bc" :} Str.isEmpty
##
## `map` functions like this are common in Roc, and they all work similarly.
## See for example #Result.map, #List.map, and #Map.map.
map : List before, (before -> after) -> List after
map : Set 'elem, ('before -> 'after) -> Set 'after

View file

@ -510,9 +510,18 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
),
);
// push : List elem -> elem -> List elem
// append : List elem, elem -> List elem
add_type(
Symbol::LIST_PUSH,
Symbol::LIST_APPEND,
SolvedType::Func(
vec![list_type(flex(TVAR1)), flex(TVAR1)],
Box::new(list_type(flex(TVAR1))),
),
);
// prepend : List elem, elem -> List elem
add_type(
Symbol::LIST_PREPEND,
SolvedType::Func(
vec![list_type(flex(TVAR1)), flex(TVAR1)],
Box::new(list_type(flex(TVAR1))),
@ -543,15 +552,6 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
),
);
// append : List elem, List elem -> List elem
add_type(
Symbol::LIST_APPEND,
SolvedType::Func(
vec![list_type(flex(TVAR1)), list_type(flex(TVAR1))],
Box::new(list_type(flex(TVAR1))),
),
);
// len : List * -> Int
add_type(
Symbol::LIST_LEN,

View file

@ -638,8 +638,8 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
)
});
// append : Attr * (List (Attr * a)), Attr * (List (Attr * a)) -> Attr * (List (Attr * a))
add_type(Symbol::LIST_APPEND, {
// concat : Attr * (List (Attr * a)), Attr * (List (Attr * a)) -> Attr * (List (Attr * a))
add_type(Symbol::LIST_CONCAT, {
let_tvars! { a, star1, star2, star3 };
unique_function(
@ -669,13 +669,43 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
)
});
// push : Attr * (List a)
// append : Attr * (List a)
// , a
// -> Attr * (List a)
//
// NOTE: we demand the new item to have the same uniqueness as the other list items.
// It could be allowed to add unique items to shared lists, but that requires special code gen
add_type(Symbol::LIST_PUSH, {
add_type(Symbol::LIST_APPEND, {
let_tvars! { a, star1, star2 };
unique_function(
vec![
SolvedType::Apply(
Symbol::ATTR_ATTR,
vec![
flex(star1),
SolvedType::Apply(Symbol::LIST_LIST, vec![flex(a)]),
],
),
flex(a),
],
SolvedType::Apply(
Symbol::ATTR_ATTR,
vec![
boolean(star2),
SolvedType::Apply(Symbol::LIST_LIST, vec![flex(a)]),
],
),
)
});
// prepend : Attr * (List a)
// , a
// -> Attr * (List a)
//
// NOTE: we demand the new item to have the same uniqueness as the other list items.
// It could be allowed to add unique items to shared lists, but that requires special code gen
add_type(Symbol::LIST_PREPEND, {
let_tvars! { a, star1, star2 };
unique_function(

View file

@ -53,13 +53,14 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
Symbol::LIST_LEN => list_len,
Symbol::LIST_GET => list_get,
Symbol::LIST_SET => list_set,
Symbol::LIST_PUSH => list_push,
Symbol::LIST_APPEND => list_append,
Symbol::LIST_FIRST => list_first,
Symbol::LIST_IS_EMPTY => list_is_empty,
Symbol::LIST_SINGLE => list_single,
Symbol::LIST_REPEAT => list_repeat,
Symbol::LIST_REVERSE => list_reverse,
Symbol::LIST_APPEND => list_append,
Symbol::LIST_CONCAT => list_concat,
Symbol::LIST_PREPEND => list_prepend,
Symbol::NUM_ADD => num_add,
Symbol::NUM_SUB => num_sub,
Symbol::NUM_MUL => num_mul,
@ -617,12 +618,12 @@ fn list_reverse(symbol: Symbol, var_store: &mut VarStore) -> Def {
)
}
/// List.append : List elem, List elem -> List elem
fn list_append(symbol: Symbol, var_store: &mut VarStore) -> Def {
/// List.concat : List elem, List elem -> List elem
fn list_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh();
let body = RunLowLevel {
op: LowLevel::ListAppend,
op: LowLevel::ListConcat,
args: vec![
(list_var, Var(Symbol::ARG_1)),
(list_var, Var(Symbol::ARG_2)),
@ -856,13 +857,36 @@ fn list_set(symbol: Symbol, var_store: &mut VarStore) -> Def {
)
}
/// List.push : List elem, elem -> List elem
fn list_push(symbol: Symbol, var_store: &mut VarStore) -> Def {
/// List.append : List elem, elem -> List elem
fn list_append(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh();
let elem_var = var_store.fresh();
let body = RunLowLevel {
op: LowLevel::ListPush,
op: LowLevel::ListAppend,
args: vec![
(list_var, Var(Symbol::ARG_1)),
(elem_var, Var(Symbol::ARG_2)),
],
ret_var: list_var,
};
defn(
symbol,
vec![(list_var, Symbol::ARG_1), (elem_var, Symbol::ARG_2)],
var_store,
body,
list_var,
)
}
/// List.prepend : List elem, elem -> List elem
fn list_prepend(symbol: Symbol, var_store: &mut VarStore) -> Def {
let list_var = var_store.fresh();
let elem_var = var_store.fresh();
let body = RunLowLevel {
op: LowLevel::ListPrepend,
args: vec![
(list_var, Var(Symbol::ARG_1)),
(elem_var, Var(Symbol::ARG_2)),

View file

@ -27,7 +27,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -20,7 +20,7 @@ mod test_fmt {
use roc_parse::parser::{Fail, Parser, State};
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc!(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);
@ -55,7 +55,7 @@ mod test_fmt {
let src = src.trim_end();
let expected = expected.trim_end();
match module::header().parse(&arena, State::new(&src, Attempting::Module)) {
match module::header().parse(&arena, State::new(src.as_bytes(), Attempting::Module)) {
Ok((actual, state)) => {
let mut buf = String::new_in(&arena);

View file

@ -1828,7 +1828,7 @@ fn list_push<'a, 'ctx, 'env>(
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
let elems_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
// The output list length, which is the old list length + 1
let new_list_len = env.builder.build_int_add(
@ -1837,7 +1837,6 @@ fn list_push<'a, 'ctx, 'env>(
"new_list_length",
);
let ctx = env.context;
let ptr_bytes = env.ptr_bytes;
// Calculate the number of bytes we'll need to allocate.
@ -1863,7 +1862,7 @@ fn list_push<'a, 'ctx, 'env>(
// one we just malloc'd.
//
// TODO how do we decide when to do the small memcpy vs the normal one?
builder.build_memcpy(clone_ptr, ptr_bytes, elems_ptr, ptr_bytes, list_size);
builder.build_memcpy(clone_ptr, ptr_bytes, list_ptr, ptr_bytes, list_size);
} else {
panic!("TODO Cranelift currently only knows how to clone list elements that are Copy.");
}
@ -1887,17 +1886,105 @@ fn list_push<'a, 'ctx, 'env>(
.build_insert_value(struct_val, new_list_len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
let answer = builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
);
let elem_ptr = unsafe { builder.build_in_bounds_gep(clone_ptr, &[list_len], "load_index") };
builder.build_store(elem_ptr, elem);
answer
builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
}
/// List.prepend List elem, elem -> List elem
fn list_prepend<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
original_wrapper: StructValue<'ctx>,
elem: BasicValueEnum<'ctx>,
elem_layout: &Layout<'a>,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
// Load the usize length from the wrapper.
let list_len = load_list_len(builder, original_wrapper);
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type);
// The output list length, which is the old list length + 1
let new_list_len = env.builder.build_int_add(
ctx.i64_type().const_int(1 as u64, false),
list_len,
"new_list_length",
);
let ptr_bytes = env.ptr_bytes;
// Allocate space for the new array that we'll copy into.
let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
let clone_ptr = builder
.build_array_malloc(elem_type, new_list_len, "list_ptr")
.unwrap();
let int_type = ptr_int(ctx, ptr_bytes);
let ptr_as_int = builder.build_ptr_to_int(clone_ptr, int_type, "list_cast_ptr");
builder.build_store(clone_ptr, elem);
let index_1_ptr = unsafe {
builder.build_in_bounds_gep(
clone_ptr,
&[ctx.i64_type().const_int(1 as u64, false)],
"load_index",
)
};
// Calculate the number of bytes we'll need to allocate.
let elem_bytes = env
.ptr_int()
.const_int(elem_layout.stack_size(env.ptr_bytes) as u64, false);
// This is the size of the list coming in, before we have added an element
// to the beginning.
let list_size = env
.builder
.build_int_mul(elem_bytes, list_len, "mul_old_len_by_elem_bytes");
if elem_layout.safe_to_memcpy() {
// Copy the bytes from the original array into the new
// one we just malloc'd.
//
// TODO how do we decide when to do the small memcpy vs the normal one?
builder.build_memcpy(index_1_ptr, ptr_bytes, list_ptr, ptr_bytes, list_size);
} else {
panic!("TODO Cranelift currently only knows how to clone list elements that are Copy.");
}
// Create a fresh wrapper struct for the newly populated array
let struct_type = collection(ctx, env.ptr_bytes);
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr_as_int,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(struct_val, new_list_len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
}
fn list_set<'a, 'ctx, 'env>(
@ -2177,8 +2264,8 @@ fn run_low_level<'a, 'ctx, 'env>(
}
}
}
ListAppend => list_append(env, scope, parent, args),
ListPush => {
ListConcat => list_concat(env, scope, parent, args),
ListAppend => {
// List.push List elem, elem -> List elem
debug_assert_eq!(args.len(), 2);
@ -2187,6 +2274,15 @@ fn run_low_level<'a, 'ctx, 'env>(
list_push(env, original_wrapper, elem, elem_layout)
}
ListPrepend => {
// List.prepend List elem, elem -> List elem
debug_assert_eq!(args.len(), 2);
let original_wrapper = load_symbol(env, scope, &args[0]).into_struct_value();
let (elem, elem_layout) = load_symbol_and_layout(env, scope, &args[1]);
list_prepend(env, original_wrapper, elem, elem_layout)
}
NumAbs | NumNeg | NumRound | NumSqrtUnchecked | NumSin | NumCos | NumToFloat => {
debug_assert_eq!(args.len(), 1);
@ -2395,13 +2491,13 @@ fn build_int_binop<'a, 'ctx, 'env>(
}
}
fn list_append<'a, 'ctx, 'env>(
fn list_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
args: &[Symbol],
) -> BasicValueEnum<'ctx> {
// List.append : List elem, List elem -> List elem
// List.concat : List elem, List elem -> List elem
debug_assert_eq!(args.len(), 2);
// This implementation is quite long, let me explain what is complicating it. Here are our
@ -2489,7 +2585,7 @@ fn list_append<'a, 'ctx, 'env>(
}
_ => {
unreachable!(
"Invalid List layout for second input list of List.append: {:?}",
"Invalid List layout for second input list of List.concat: {:?}",
second_list_layout
);
}
@ -2557,7 +2653,7 @@ fn list_append<'a, 'ctx, 'env>(
// FIRST LOOP
{
let first_loop_bb =
ctx.append_basic_block(parent, "first_list_append_loop");
ctx.append_basic_block(parent, "first_list_concat_loop");
builder.build_unconditional_branch(first_loop_bb);
builder.position_at_end(first_loop_bb);
@ -2628,7 +2724,7 @@ fn list_append<'a, 'ctx, 'env>(
// SECOND LOOP
{
let second_loop_bb =
ctx.append_basic_block(parent, "second_list_append_loop");
ctx.append_basic_block(parent, "second_list_concat_loop");
builder.build_unconditional_branch(second_loop_bb);
builder.position_at_end(second_loop_bb);
@ -2754,7 +2850,7 @@ fn list_append<'a, 'ctx, 'env>(
}
_ => {
unreachable!(
"Invalid List layout for second input list of List.append: {:?}",
"Invalid List layout for second input list of List.concat: {:?}",
second_list_layout
);
}
@ -2799,7 +2895,7 @@ fn list_append<'a, 'ctx, 'env>(
}
_ => {
unreachable!(
"Invalid List layout for second input list of List.append: {:?}",
"Invalid List layout for second input list of List.concat: {:?}",
second_list_layout
);
}
@ -2817,7 +2913,7 @@ fn list_append<'a, 'ctx, 'env>(
}
_ => {
unreachable!(
"Invalid List layout for first list in List.append : {:?}",
"Invalid List layout for first list in List.concat : {:?}",
first_list_layout
);
}

View file

@ -31,10 +31,10 @@ mod gen_list {
}
#[test]
fn list_push() {
assert_evals_to!("List.push [1] 2", &[1, 2], &'static [i64]);
assert_evals_to!("List.push [1, 1] 2", &[1, 1, 2], &'static [i64]);
assert_evals_to!("List.push [] 3", &[3], &'static [i64]);
fn list_append() {
assert_evals_to!("List.append [1] 2", &[1, 2], &'static [i64]);
assert_evals_to!("List.append [1, 1] 2", &[1, 1, 2], &'static [i64]);
assert_evals_to!("List.append [] 3", &[3], &'static [i64]);
assert_evals_to!(
indoc!(
r#"
@ -42,24 +42,55 @@ mod gen_list {
initThrees =
[]
List.push (List.push initThrees 3) 3
List.append (List.append initThrees 3) 3
"#
),
&[3, 3],
&'static [i64]
);
assert_evals_to!(
"List.push [ True, False ] True",
"List.append [ True, False ] True",
&[true, false, true],
&'static [bool]
);
assert_evals_to!(
"List.push [ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ] 23",
"List.append [ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ] 23",
&[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
&'static [i64]
);
}
#[test]
fn list_prepend() {
assert_evals_to!("List.prepend [] 1", &[1], &'static [i64]);
assert_evals_to!("List.prepend [2] 1", &[1, 2], &'static [i64]);
assert_evals_to!(
indoc!(
r#"
init : List Int
init =
[]
List.prepend (List.prepend init 4) 6
"#
),
&[6, 4],
&'static [i64]
);
assert_evals_to!(
"List.prepend [ True, False ] True",
&[true, true, false],
&'static [bool]
);
assert_evals_to!(
"List.prepend [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 100, 100, 100, 100 ] 9",
&[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 100, 100, 100, 100],
&'static [i64]
);
}
#[test]
fn list_single() {
assert_evals_to!("List.single 1", &[1], &'static [i64]);
@ -119,8 +150,8 @@ mod gen_list {
}
#[test]
fn list_append() {
assert_evals_to!("List.append [] []", &[], &'static [i64]);
fn list_concat() {
assert_evals_to!("List.concat [] []", &[], &'static [i64]);
assert_evals_to!(
indoc!(
@ -133,30 +164,30 @@ mod gen_list {
secondList =
[]
List.append firstList secondList
List.concat firstList secondList
"#
),
&[],
&'static [i64]
);
assert_evals_to!("List.append [ 12, 13 ] []", &[12, 13], &'static [i64]);
assert_evals_to!("List.concat [ 12, 13 ] []", &[12, 13], &'static [i64]);
assert_evals_to!(
"List.append [ 34, 43 ] [ 64, 55, 66 ]",
"List.concat [ 34, 43 ] [ 64, 55, 66 ]",
&[34, 43, 64, 55, 66],
&'static [i64]
);
assert_evals_to!("List.append [] [ 23, 24 ]", &[23, 24], &'static [i64]);
assert_evals_to!("List.concat [] [ 23, 24 ]", &[23, 24], &'static [i64]);
assert_evals_to!(
"List.append [ 1, 2 ] [ 3, 4 ]",
"List.concat [ 1, 2 ] [ 3, 4 ]",
&[1, 2, 3, 4],
&'static [i64]
);
}
fn assert_append_worked(num_elems1: i64, num_elems2: i64) {
fn assert_concat_worked(num_elems1: i64, num_elems2: i64) {
let vec1: Vec<i64> = (0..num_elems1)
.map(|i| 12345 % (i + num_elems1 + num_elems2 + 1))
.collect();
@ -172,51 +203,51 @@ mod gen_list {
let expected_slice: &[i64] = expected.as_ref();
assert_evals_to!(
&format!("List.append {} {}", slice_str1, slice_str2),
&format!("List.concat {} {}", slice_str1, slice_str2),
expected_slice,
&'static [i64]
);
}
#[test]
fn list_append_empty_list() {
assert_append_worked(0, 0);
assert_append_worked(1, 0);
assert_append_worked(2, 0);
assert_append_worked(3, 0);
assert_append_worked(4, 0);
assert_append_worked(7, 0);
assert_append_worked(8, 0);
assert_append_worked(9, 0);
assert_append_worked(25, 0);
assert_append_worked(150, 0);
assert_append_worked(0, 1);
assert_append_worked(0, 2);
assert_append_worked(0, 3);
assert_append_worked(0, 4);
assert_append_worked(0, 7);
assert_append_worked(0, 8);
assert_append_worked(0, 9);
assert_append_worked(0, 25);
assert_append_worked(0, 150);
fn list_concat_empty_list() {
assert_concat_worked(0, 0);
assert_concat_worked(1, 0);
assert_concat_worked(2, 0);
assert_concat_worked(3, 0);
assert_concat_worked(4, 0);
assert_concat_worked(7, 0);
assert_concat_worked(8, 0);
assert_concat_worked(9, 0);
assert_concat_worked(25, 0);
assert_concat_worked(150, 0);
assert_concat_worked(0, 1);
assert_concat_worked(0, 2);
assert_concat_worked(0, 3);
assert_concat_worked(0, 4);
assert_concat_worked(0, 7);
assert_concat_worked(0, 8);
assert_concat_worked(0, 9);
assert_concat_worked(0, 25);
assert_concat_worked(0, 150);
}
#[test]
fn list_append_nonempty_lists() {
assert_append_worked(1, 1);
assert_append_worked(1, 2);
assert_append_worked(1, 3);
assert_append_worked(2, 3);
assert_append_worked(2, 1);
assert_append_worked(2, 2);
assert_append_worked(3, 1);
assert_append_worked(3, 2);
assert_append_worked(2, 3);
assert_append_worked(3, 3);
assert_append_worked(4, 4);
assert_append_worked(150, 150);
assert_append_worked(129, 350);
assert_append_worked(350, 129);
fn list_concat_nonempty_lists() {
assert_concat_worked(1, 1);
assert_concat_worked(1, 2);
assert_concat_worked(1, 3);
assert_concat_worked(2, 3);
assert_concat_worked(2, 1);
assert_concat_worked(2, 2);
assert_concat_worked(3, 1);
assert_concat_worked(3, 2);
assert_concat_worked(2, 3);
assert_concat_worked(3, 3);
assert_concat_worked(4, 4);
assert_concat_worked(150, 150);
assert_concat_worked(129, 350);
assert_concat_worked(350, 129);
}
#[test]

View file

@ -67,7 +67,7 @@ pub fn helper_without_uniqueness<'a>(
fpm.initialize();
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| {
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!(
"Code gen error in NON-OPTIMIZED test: could not convert to layout. Err was {:?}",
err
@ -103,7 +103,6 @@ pub fn helper_without_uniqueness<'a>(
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0),
};
@ -258,7 +257,7 @@ pub fn helper_with_uniqueness<'a>(
fpm.initialize();
// Compute main_fn_type before moving subs to Env
let layout = Layout::new(&arena, content, &subs, ptr_bytes).unwrap_or_else(|err| {
let layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| {
panic!(
"Code gen error in OPTIMIZED test: could not convert to layout. Err was {:?}",
err
@ -296,7 +295,6 @@ pub fn helper_with_uniqueness<'a>(
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size: ptr_bytes,
jump_counter: arena.alloc(0),
};

View file

@ -87,7 +87,7 @@ pub fn infer_expr(
}
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -19,7 +19,8 @@ roc_parse = { path = "../parse" }
roc_solve = { path = "../solve" }
bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1"
tokio = { version = "0.2", features = ["blocking", "fs", "sync", "rt-threaded"] }
crossbeam = "0.7"
num_cpus = "1"
[dev-dependencies]
pretty_assertions = "0.5.1"

File diff suppressed because it is too large Load diff

View file

@ -54,7 +54,7 @@ reconstructPath = \cameFrom, goal ->
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model ->

View file

@ -54,7 +54,7 @@ reconstructPath = \cameFrom, goal ->
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model ->

View file

@ -29,29 +29,6 @@ pub fn test_home() -> ModuleId {
ModuleIds::default().get_or_insert(&"Test".into())
}
/// Without a larger-than-default stack size, some tests
/// run out of stack space in debug builds (but don't in --release builds)
#[allow(dead_code)]
const THREAD_STACK_SIZE: usize = 4 * 1024 * 1024;
pub fn test_async<F: std::future::Future>(future: F) -> F::Output {
use tokio::runtime::Builder;
// Create the runtime
let mut rt = Builder::new()
.thread_name("tokio-thread-for-tests")
.thread_stack_size(THREAD_STACK_SIZE)
// DEBUG: Replace this with .basic_scheduler() to make tests run single-threaded on the main thread.
// Doing this makes assertion failures easier to read, but means
// the tests can't reveal concurrency bugs, so leave this off by default!
.threaded_scheduler()
.build()
.expect("Error initializing Tokio runtime.");
// Spawn the root task
rt.block_on(future)
}
#[allow(dead_code)]
pub fn infer_expr(
subs: Subs,
@ -92,7 +69,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -13,7 +13,7 @@ mod helpers;
#[cfg(test)]
mod test_load {
use crate::helpers::{fixtures_dir, test_async};
use crate::helpers::fixtures_dir;
use inlinable_string::InlinableString;
use roc_can::def::Declaration::*;
use roc_can::def::Def;
@ -27,7 +27,7 @@ mod test_load {
// HELPERS
async fn load_fixture(
fn load_fixture(
dir_name: &str,
module_name: &str,
subs_by_module: SubsByModule,
@ -35,12 +35,11 @@ mod test_load {
let src_dir = fixtures_dir().join(dir_name);
let filename = src_dir.join(format!("{}.roc", module_name));
let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module,
)
.await;
);
let loaded_module = loaded.expect("Test module failed to load");
assert_eq!(loaded_module.can_problems, Vec::new());
@ -129,15 +128,12 @@ mod test_load {
let subs_by_module = MutMap::default();
let src_dir = fixtures_dir().join("interface_with_deps");
let filename = src_dir.join("Primary.roc");
test_async(async {
let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module,
)
.await;
);
let mut loaded_module = loaded.expect("Test module failed to load");
@ -160,14 +156,12 @@ mod test_load {
assert_eq!(expected_name, &InlinableString::from("Primary"));
assert_eq!(def_count, 10);
});
}
#[test]
fn load_unit() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module).await;
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module);
expect_types(
loaded_module,
@ -175,15 +169,12 @@ mod test_load {
"unit" => "Unit",
},
);
});
}
#[test]
fn import_alias() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "ImportAlias", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "ImportAlias", subs_by_module);
expect_types(
loaded_module,
@ -191,15 +182,12 @@ mod test_load {
"unit" => "Dep1.Unit",
},
);
});
}
#[test]
fn load_and_typecheck() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "WithBuiltins", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "WithBuiltins", subs_by_module);
expect_types(
loaded_module,
@ -214,15 +202,12 @@ mod test_load {
"fromDep2" => "Float",
},
);
});
}
#[test]
fn iface_quicksort() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "Quicksort", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "Quicksort", subs_by_module);
expect_types(
loaded_module,
@ -232,14 +217,12 @@ mod test_load {
"quicksort" => "List (Num a), Int, Int -> List (Num a)",
},
);
});
}
#[test]
fn app_quicksort() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("app_with_deps", "Quicksort", subs_by_module).await;
let loaded_module = load_fixture("app_with_deps", "Quicksort", subs_by_module);
expect_types(
loaded_module,
@ -249,14 +232,12 @@ mod test_load {
"quicksort" => "List (Num a), Int, Int -> List (Num a)",
},
);
});
}
#[test]
fn load_astar() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module);
expect_types(
loaded_module,
@ -269,14 +250,12 @@ mod test_load {
"astar" => "(position, position -> Float), (position -> Set position), position, Model position -> [ Err [ KeyNotFound ]*, Ok (List position) ]*",
},
);
});
}
#[test]
fn load_principal_types() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module).await;
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module);
expect_types(
loaded_module,
@ -285,15 +264,12 @@ mod test_load {
"identity" => "a -> a",
},
);
});
}
#[test]
fn iface_dep_types() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "Primary", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "Primary", subs_by_module);
expect_types(
loaded_module,
@ -310,14 +286,12 @@ mod test_load {
"withDefault" => "Res.Res a *, a -> a",
},
);
});
}
#[test]
fn app_dep_types() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("app_with_deps", "Primary", subs_by_module).await;
let loaded_module = load_fixture("app_with_deps", "Primary", subs_by_module);
expect_types(
loaded_module,
@ -334,14 +308,12 @@ mod test_load {
"withDefault" => "Res.Res a *, a -> a",
},
);
});
}
#[test]
fn imported_dep_regression() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module);
expect_types(
loaded_module,
@ -349,17 +321,15 @@ mod test_load {
"str" => "Str",
},
);
});
}
// #[test]
// fn load_records() {
// test_async(async {
// use roc::types::{ErrorType, Mismatch, Problem, TypeExt};
// let subs_by_module = MutMap::default();
// let loaded_module =
// load_fixture("interface_with_deps", "Records", subs_by_module).await;
// load_fixture("interface_with_deps", "Records", subs_by_module);
// // NOTE: `a` here is unconstrained, so unifies with <type error>
// let expected_types = hashmap! {
@ -409,6 +379,5 @@ mod test_load {
// assert_eq!((&symbol, expected_type), (&symbol, &actual_str.as_str()));
// }
// }
// });
// }
}

View file

@ -13,7 +13,7 @@ mod helpers;
#[cfg(test)]
mod test_uniq_load {
use crate::helpers::{fixtures_dir, test_async};
use crate::helpers::fixtures_dir;
use inlinable_string::InlinableString;
use roc_builtins::unique;
use roc_can::def::Declaration::*;
@ -28,14 +28,19 @@ mod test_uniq_load {
// HELPERS
async fn load_fixture(
fn load_fixture(
dir_name: &str,
module_name: &str,
subs_by_module: SubsByModule,
) -> LoadedModule {
let src_dir = fixtures_dir().join(dir_name);
let filename = src_dir.join(format!("{}.roc", module_name));
let loaded = load(&unique::uniq_stdlib(), src_dir, filename, subs_by_module).await;
let loaded = load(
filename,
&unique::uniq_stdlib(),
src_dir.as_path(),
subs_by_module,
);
let loaded_module = loaded.expect("Test module failed to load");
assert_eq!(loaded_module.can_problems, Vec::new());
@ -124,15 +129,12 @@ mod test_uniq_load {
let subs_by_module = MutMap::default();
let src_dir = fixtures_dir().join("interface_with_deps");
let filename = src_dir.join("Primary.roc");
test_async(async {
let loaded = load(
&roc_builtins::std::standard_stdlib(),
src_dir,
filename,
&roc_builtins::std::standard_stdlib(),
src_dir.as_path(),
subs_by_module,
)
.await;
);
let mut loaded_module = loaded.expect("Test module failed to load");
@ -155,14 +157,12 @@ mod test_uniq_load {
assert_eq!(expected_name, &InlinableString::from("Primary"));
assert_eq!(def_count, 10);
});
}
#[test]
fn load_unit() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module).await;
let loaded_module = load_fixture("no_deps", "Unit", subs_by_module);
expect_types(
loaded_module,
@ -170,15 +170,12 @@ mod test_uniq_load {
"unit" => "Attr * Unit",
},
);
});
}
#[test]
fn import_alias() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "ImportAlias", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "ImportAlias", subs_by_module);
expect_types(
loaded_module,
@ -186,15 +183,12 @@ mod test_uniq_load {
"unit" => "Attr * Dep1.Unit",
},
);
});
}
#[test]
fn load_and_typecheck() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "WithBuiltins", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "WithBuiltins", subs_by_module);
expect_types(
loaded_module,
@ -209,14 +203,12 @@ mod test_uniq_load {
"fromDep2" => "Attr * Float",
},
);
});
}
#[test]
fn load_astar() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "AStar", subs_by_module);
expect_types(
loaded_module,
@ -229,15 +221,12 @@ mod test_uniq_load {
"astar" => "Attr Shared (Attr Shared (Attr Shared position, Attr Shared position -> Attr * Float), Attr Shared (Attr Shared position -> Attr * (Set (Attr * position))), Attr Shared position, Attr Shared (Model (Attr Shared position)) -> Attr * [ Err (Attr * [ KeyNotFound ]*), Ok (Attr * (List (Attr Shared position))) ]*)",
},
);
});
}
#[test]
fn load_and_typecheck_quicksort() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "Quicksort", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "Quicksort", subs_by_module);
expect_types(
loaded_module,
@ -247,14 +236,12 @@ mod test_uniq_load {
"quicksort" => "Attr Shared (Attr b (List (Attr Shared (Num (Attr Shared a)))), Attr Shared Int, Attr Shared Int -> Attr b (List (Attr Shared (Num (Attr Shared a)))))",
},
);
});
}
#[test]
fn load_principal_types() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module).await;
let loaded_module = load_fixture("no_deps", "Principal", subs_by_module);
expect_types(
loaded_module,
@ -263,15 +250,12 @@ mod test_uniq_load {
"identity" => "Attr * (a -> a)",
},
);
});
}
#[test]
fn load_dep_types() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module =
load_fixture("interface_with_deps", "Primary", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "Primary", subs_by_module);
// the inferred signature for withDefault is wrong, part of the alias in alias issue.
// "withDefault" => "Attr * (Attr * (Res.Res (Attr a b) (Attr * *)), Attr a b -> Attr a b)",
@ -290,14 +274,12 @@ mod test_uniq_load {
"withDefault" => "Attr * (Attr (* | b | c) (Res.Res (Attr b a) (Attr c *)), Attr b a -> Attr b a)",
},
);
});
}
#[test]
fn load_custom_res() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("interface_with_deps", "Res", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "Res", subs_by_module);
expect_types(
loaded_module,
@ -307,14 +289,12 @@ mod test_uniq_load {
"andThen" => "Attr * (Attr (* | c | d) (Res (Attr c a) (Attr d err)), Attr * (Attr c a -> Attr f (Res (Attr e b) (Attr d err))) -> Attr f (Res (Attr e b) (Attr d err)))",
},
);
});
}
#[test]
fn imported_dep_regression() {
test_async(async {
let subs_by_module = MutMap::default();
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module).await;
let loaded_module = load_fixture("interface_with_deps", "OneDep", subs_by_module);
expect_types(
loaded_module,
@ -322,7 +302,6 @@ mod test_uniq_load {
"str" => "Attr * Str",
},
);
});
}
// #[test]
@ -332,7 +311,7 @@ mod test_uniq_load {
// let subs_by_module = MutMap::default();
// let loaded_module =
// load_fixture("interface_with_deps", "Records", subs_by_module).await;
// load_fixture("interface_with_deps", "Records", subs_by_module);
// // NOTE: `a` here is unconstrained, so unifies with <type error>
// let expected_types = hashmap! {

View file

@ -10,8 +10,9 @@ pub enum LowLevel {
ListSingle,
ListRepeat,
ListReverse,
ListConcat,
ListAppend,
ListPush,
ListPrepend,
NumAdd,
NumSub,
NumMul,

View file

@ -659,7 +659,7 @@ define_builtins! {
2 LIST_IS_EMPTY: "isEmpty"
3 LIST_GET: "get"
4 LIST_SET: "set"
5 LIST_PUSH: "push"
5 LIST_APPEND: "append"
6 LIST_MAP: "map"
7 LIST_LEN: "len"
8 LIST_FOLDL: "foldl"
@ -669,7 +669,7 @@ define_builtins! {
12 LIST_SINGLE: "single"
13 LIST_REPEAT: "repeat"
14 LIST_REVERSE: "reverse"
15 LIST_APPEND: "append"
15 LIST_PREPEND: "prepend"
}
5 RESULT: "Result" => {
0 RESULT_RESULT: "Result" imported // the Result.Result type alias

View file

@ -154,9 +154,8 @@ fn to_decision_tree(raw_branches: Vec<Branch>) -> DecisionTree {
match check_for_match(&branches) {
Some(goal) => DecisionTree::Match(goal),
None => {
// TODO remove clone
let path = pick_path(branches.clone());
// must clone here to release the borrow on `branches`
let path = pick_path(&branches).clone();
let (edges, fallback) = gather_edges(branches, &path);
let mut decision_edges: Vec<_> = edges
@ -218,15 +217,16 @@ fn flatten<'a>(
path_pattern: (Path, Guard<'a>, Pattern<'a>),
path_patterns: &mut Vec<(Path, Guard<'a>, Pattern<'a>)>,
) {
match &path_pattern.2 {
match path_pattern.2 {
Pattern::AppliedTag {
union,
arguments,
tag_id,
..
} => {
// TODO do we need to check that guard.is_none() here?
if union.alternatives.len() == 1 {
tag_name,
layout,
} if union.alternatives.len() == 1 => {
// TODO ^ do we need to check that guard.is_none() here?
let path = path_pattern.0;
// Theory: unbox doesn't have any value for us, because one-element tag unions
// don't store the tag anyway.
@ -234,7 +234,13 @@ fn flatten<'a>(
path_patterns.push((
Path::Unbox(Box::new(path)),
path_pattern.1.clone(),
path_pattern.2.clone(),
Pattern::AppliedTag {
union,
arguments,
tag_id,
tag_name,
layout,
},
));
} else {
for (index, (arg_pattern, _)) in arguments.iter().enumerate() {
@ -242,7 +248,7 @@ fn flatten<'a>(
(
Path::Index {
index: index as u64,
tag_id: *tag_id,
tag_id,
path: Box::new(path.clone()),
},
// same guard here?
@ -253,9 +259,6 @@ fn flatten<'a>(
);
}
}
} else {
path_patterns.push(path_pattern);
}
}
_ => {
@ -289,8 +292,7 @@ fn gather_edges<'a>(
branches: Vec<Branch<'a>>,
path: &Path,
) -> (Vec<(Test<'a>, Vec<Branch<'a>>)>, Vec<Branch<'a>>) {
// TODO remove clone
let relevant_tests = tests_at_path(path, branches.clone());
let relevant_tests = tests_at_path(path, &branches);
let check = is_complete(&relevant_tests);
@ -314,12 +316,12 @@ fn gather_edges<'a>(
/// FIND RELEVANT TESTS
fn tests_at_path<'a>(selected_path: &Path, branches: Vec<Branch<'a>>) -> Vec<Test<'a>> {
fn tests_at_path<'a>(selected_path: &Path, branches: &[Branch<'a>]) -> Vec<Test<'a>> {
// NOTE the ordering of the result is important!
let mut all_tests = Vec::new();
for branch in branches.into_iter() {
for branch in branches {
test_at_path(selected_path, branch, &mut all_tests);
}
@ -348,7 +350,7 @@ fn tests_at_path<'a>(selected_path: &Path, branches: Vec<Branch<'a>>) -> Vec<Tes
unique
}
fn test_at_path<'a>(selected_path: &Path, branch: Branch<'a>, all_tests: &mut Vec<Test<'a>>) {
fn test_at_path<'a>(selected_path: &Path, branch: &Branch<'a>, all_tests: &mut Vec<Test<'a>>) {
use Pattern::*;
use Test::*;
@ -466,7 +468,7 @@ fn edges_for<'a>(
) -> (Test<'a>, Vec<Branch<'a>>) {
let mut new_branches = Vec::new();
for branch in branches.into_iter() {
for branch in branches.iter() {
to_relevant_branch(&test, path, branch, &mut new_branches);
}
@ -476,13 +478,13 @@ fn edges_for<'a>(
fn to_relevant_branch<'a>(
test: &Test<'a>,
path: &Path,
branch: Branch<'a>,
branch: &Branch<'a>,
new_branches: &mut Vec<Branch<'a>>,
) {
// TODO remove clone
match extract(path, branch.patterns.clone()) {
Extract::NotFound => {
new_branches.push(branch);
new_branches.push(branch.clone());
}
Extract::Found {
start,
@ -518,7 +520,7 @@ fn to_relevant_branch_help<'a>(
path: &Path,
mut start: Vec<(Path, Guard<'a>, Pattern<'a>)>,
end: Vec<(Path, Guard<'a>, Pattern<'a>)>,
branch: Branch<'a>,
branch: &Branch<'a>,
guard: Guard<'a>,
pattern: Pattern<'a>,
) -> Option<Branch<'a>> {
@ -526,7 +528,7 @@ fn to_relevant_branch_help<'a>(
use Test::*;
match pattern {
Identifier(_) | Underscore | Shadowed(_, _) | UnsupportedPattern(_) => Some(branch),
Identifier(_) | Underscore | Shadowed(_, _) | UnsupportedPattern(_) => Some(branch.clone()),
RecordDestructure(destructs, _) => match test {
IsCtor {
@ -689,19 +691,14 @@ fn extract<'a>(
) -> Extract<'a> {
let mut start = Vec::new();
// TODO remove this clone
let mut copy = path_patterns.clone();
// TODO potential ordering problem
for (index, current) in path_patterns.into_iter().enumerate() {
let mut it = path_patterns.into_iter();
while let Some(current) = it.next() {
if &current.0 == selected_path {
return Extract::Found {
start,
found_pattern: (current.1, current.2),
end: {
copy.drain(0..=index);
copy
},
end: it.collect::<Vec<_>>(),
};
} else {
start.push(current);
@ -742,22 +739,27 @@ fn needs_tests<'a>(pattern: &Pattern<'a>) -> bool {
/// PICK A PATH
fn pick_path(branches: Vec<Branch>) -> Path {
// TODO remove this clone
let all_paths = branches
.clone()
.into_iter()
.map(|v| v.patterns)
.flatten()
.filter_map(is_choice_path);
fn pick_path<'a>(branches: &'a [Branch]) -> &'a Path {
let mut all_paths = Vec::with_capacity(branches.len());
let mut by_small_defaults = bests_by_small_defaults(&branches, all_paths);
// is choice path
for branch in branches {
for (path, guard, pattern) in &branch.patterns {
if !guard.is_none() || needs_tests(&pattern) {
all_paths.push(path);
} else {
// do nothing
}
}
}
let mut by_small_defaults = bests_by_small_defaults(branches, all_paths.into_iter());
if by_small_defaults.len() == 1 {
by_small_defaults.remove(0)
} else {
debug_assert!(!by_small_defaults.is_empty());
let mut result = bests_by_small_branching_factor(&branches, by_small_defaults.into_iter());
let mut result = bests_by_small_branching_factor(branches, by_small_defaults.into_iter());
match result.pop() {
None => unreachable!("bests_by will always return at least one value in the vec"),
@ -766,33 +768,23 @@ fn pick_path(branches: Vec<Branch>) -> Path {
}
}
fn is_choice_path<'a>(path_and_pattern: (Path, Guard<'a>, Pattern<'a>)) -> Option<Path> {
let (path, guard, pattern) = path_and_pattern;
if !guard.is_none() || needs_tests(&pattern) {
Some(path)
} else {
None
}
}
fn bests_by_small_branching_factor<I>(branches: &Vec<Branch>, mut all_paths: I) -> Vec<Path>
fn bests_by_small_branching_factor<'a, I>(branches: &[Branch], mut all_paths: I) -> Vec<&'a Path>
where
I: Iterator<Item = Path>,
I: Iterator<Item = &'a Path>,
{
match all_paths.next() {
None => panic!("Cannot choose the best of zero paths. This should never happen."),
Some(first_path) => {
let mut min_weight = small_branching_factor(branches, &first_path);
let mut min_weight = small_branching_factor(branches, first_path);
let mut min_paths = vec![first_path];
for path in all_paths {
let weight = small_branching_factor(branches, &path);
let weight = small_branching_factor(branches, path);
use std::cmp::Ordering;
match weight.cmp(&min_weight) {
Ordering::Equal => {
min_paths.push(path.clone());
min_paths.push(path);
}
Ordering::Less => {
min_weight = weight;
@ -808,14 +800,14 @@ where
}
}
fn bests_by_small_defaults<I>(branches: &Vec<Branch>, mut all_paths: I) -> Vec<Path>
fn bests_by_small_defaults<'a, I>(branches: &[Branch], mut all_paths: I) -> Vec<&'a Path>
where
I: Iterator<Item = Path>,
I: Iterator<Item = &'a Path>,
{
match all_paths.next() {
None => panic!("Cannot choose the best of zero paths. This should never happen."),
Some(first_path) => {
let mut min_weight = small_defaults(branches, &first_path);
let mut min_weight = small_defaults(branches, first_path);
let mut min_paths = vec![first_path];
for path in all_paths {
@ -824,7 +816,7 @@ where
use std::cmp::Ordering;
match weight.cmp(&min_weight) {
Ordering::Equal => {
min_paths.push(path.clone());
min_paths.push(path);
}
Ordering::Less => {
min_weight = weight;
@ -842,7 +834,7 @@ where
/// PATH PICKING HEURISTICS
fn small_defaults(branches: &Vec<Branch>, path: &Path) -> usize {
fn small_defaults(branches: &[Branch], path: &Path) -> usize {
branches
.iter()
.filter(|b| is_irrelevant_to(path, b))
@ -850,7 +842,7 @@ fn small_defaults(branches: &Vec<Branch>, path: &Path) -> usize {
.sum()
}
fn small_branching_factor(branches: &Vec<Branch>, path: &Path) -> usize {
fn small_branching_factor(branches: &[Branch], path: &Path) -> usize {
// TODO remove clone
let (edges, fallback) = gather_edges(branches.to_vec(), path);

View file

@ -53,7 +53,6 @@ fn simplify<'a>(pattern: &crate::ir::Pattern<'a>) -> Pattern {
StrLiteral(v) => Literal(Literal::Str(v.clone())),
// To make sure these are exhaustive, we have to "fake" a union here
// TODO: use the hash or some other integer to discriminate between constructors
BitLiteral { value, union, .. } => Ctor(union.clone(), TagId(*value as u8), vec![]),
EnumLiteral { tag_id, union, .. } => Ctor(union.clone(), TagId(*tag_id), vec![]),
@ -217,7 +216,7 @@ fn is_exhaustive(matrix: &PatternMatrix, n: usize) -> PatternMatrix {
let last: _ = alt_list
.iter()
.filter_map(|r| is_missing(alts.clone(), ctors.clone(), r));
.filter_map(|r| is_missing(alts.clone(), &ctors, r));
let mut result = Vec::new();
@ -257,7 +256,7 @@ fn is_exhaustive(matrix: &PatternMatrix, n: usize) -> PatternMatrix {
}
}
fn is_missing<T>(union: Union, ctors: MutMap<TagId, T>, ctor: &Ctor) -> Option<Pattern> {
fn is_missing<T>(union: Union, ctors: &MutMap<TagId, T>, ctor: &Ctor) -> Option<Pattern> {
let Ctor { arity, tag_id, .. } = ctor;
if ctors.contains_key(tag_id) {
@ -336,7 +335,7 @@ fn to_nonredundant_rows<'a>(
vec![simplify(&loc_pat.value)]
};
if is_useful(&checked_rows, &next_row) {
if is_useful(checked_rows.clone(), next_row.clone()) {
checked_rows.push(next_row);
} else {
return Err(Error::Redundant {
@ -351,80 +350,139 @@ fn to_nonredundant_rows<'a>(
}
/// Check if a new row "vector" is useful given previous rows "matrix"
fn is_useful(matrix: &PatternMatrix, vector: &Row) -> bool {
if matrix.is_empty() {
fn is_useful(mut old_matrix: PatternMatrix, mut vector: Row) -> bool {
let mut matrix = Vec::with_capacity(old_matrix.len());
// this loop ping-pongs the rows between old_matrix and matrix
'outer: loop {
match vector.pop() {
_ if old_matrix.is_empty() => {
// No rows are the same as the new vector! The vector is useful!
true
} else if vector.is_empty() {
break true;
}
None => {
// There is nothing left in the new vector, but we still have
// rows that match the same things. This is not a useful vector!
false
} else {
break false;
}
Some(first_pattern) => {
// NOTE: if there are bugs in this code, look at the ordering of the row/matrix
let mut vector = vector.clone();
let first_pattern = vector.remove(0);
let patterns = vector;
match first_pattern {
// keep checking rows that start with this Ctor or Anything
Ctor(_, id, args) => {
let new_matrix: Vec<_> = matrix
.iter()
.filter_map(|r| specialize_row_by_ctor(id, args.len(), r))
.collect();
specialize_row_by_ctor2(id, args.len(), &mut old_matrix, &mut matrix);
let mut new_row = Vec::new();
new_row.extend(patterns);
new_row.extend(args);
std::mem::swap(&mut old_matrix, &mut matrix);
is_useful(&new_matrix, &new_row)
vector.extend(args);
}
Anything => {
// check if all alts appear in matrix
match is_complete(matrix) {
// check if all alternatives appear in matrix
match is_complete(&old_matrix) {
Complete::No => {
// This Anything is useful because some Ctors are missing.
// But what if a previous row has an Anything?
// If so, this one is not useful.
let new_matrix: Vec<_> = matrix
.iter()
.filter_map(|r| specialize_row_by_anything(r))
.collect();
is_useful(&new_matrix, &patterns)
for mut row in old_matrix.drain(..) {
if let Some(Anything) = row.pop() {
matrix.push(row);
}
Complete::Yes(alts) => {
}
std::mem::swap(&mut old_matrix, &mut matrix);
}
Complete::Yes(alternatives) => {
// All Ctors are covered, so this Anything is not needed for any
// of those. But what if some of those Ctors have subpatterns
// that make them less general? If so, this actually is useful!
let is_useful_alt = |Ctor { arity, tag_id, .. }| {
let new_matrix = matrix
.iter()
.filter_map(|r| specialize_row_by_ctor(tag_id, arity, r))
.collect();
let mut new_row: Vec<Pattern> =
std::iter::repeat(Anything).take(arity).collect::<Vec<_>>();
for alternative in alternatives {
let Ctor { arity, tag_id, .. } = alternative;
new_row.extend(patterns.clone());
let mut old_matrix = old_matrix.clone();
let mut matrix = vec![];
specialize_row_by_ctor2(
tag_id,
arity,
&mut old_matrix,
&mut matrix,
);
is_useful(&new_matrix, &new_row)
};
let mut vector = vector.clone();
vector.extend(std::iter::repeat(Anything).take(arity));
alts.iter().cloned().any(is_useful_alt)
if is_useful(matrix, vector) {
break 'outer true;
}
}
break false;
}
}
}
Literal(literal) => {
// keep checking rows that start with this Literal or Anything
let new_matrix = matrix
.iter()
.filter_map(|r| specialize_row_by_literal(&literal, r))
.collect();
is_useful(&new_matrix, &patterns)
for mut row in old_matrix.drain(..) {
let head = row.pop();
let patterns = row;
match head {
Some(Literal(lit)) => {
if lit == literal {
matrix.push(patterns);
} else {
// do nothing
}
}
Some(Anything) => matrix.push(patterns),
Some(Ctor(_, _, _)) => panic!(
r#"Compiler bug! After type checking, constructors and literals should never align in pattern match exhaustiveness checks."#
),
None => panic!(
"Compiler error! Empty matrices should not get specialized."
),
}
}
std::mem::swap(&mut old_matrix, &mut matrix);
}
}
}
}
}
}
/// INVARIANT: (length row == N) ==> (length result == arity + N - 1)
fn specialize_row_by_ctor2(
tag_id: TagId,
arity: usize,
old_matrix: &mut PatternMatrix,
matrix: &mut PatternMatrix,
) {
for mut row in old_matrix.drain(..) {
let head = row.pop();
let mut patterns = row;
match head {
Some(Ctor(_, id, args)) =>
if id == tag_id {
patterns.extend(args);
matrix.push(patterns);
} else {
// do nothing
}
Some(Anything) => {
// TODO order!
patterns.extend(std::iter::repeat(Anything).take(arity));
matrix.push(patterns);
}
Some(Literal(_)) => panic!( "Compiler bug! After type checking, constructors and literal should never align in pattern match exhaustiveness checks."),
None => panic!("Compiler error! Empty matrices should not get specialized."),
}
}
}
@ -436,7 +494,7 @@ fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row>
let patterns = row;
match head {
Some(Ctor(_, id, args)) =>
Some(Ctor(_, id, args)) => {
if id == tag_id {
// TODO order!
let mut new_patterns = Vec::new();
@ -446,38 +504,18 @@ fn specialize_row_by_ctor(tag_id: TagId, arity: usize, row: &Row) -> Option<Row>
} else {
None
}
}
Some(Anything) => {
// TODO order!
let new_patterns =
std::iter::repeat(Anything).take(arity).chain(patterns).collect();
let new_patterns = std::iter::repeat(Anything)
.take(arity)
.chain(patterns)
.collect();
Some(new_patterns)
}
Some(Literal(_)) => panic!( "Compiler bug! After type checking, constructors and literal should never align in pattern match exhaustiveness checks."),
None => panic!("Compiler error! Empty matrices should not get specialized."),
}
}
/// INVARIANT: (length row == N) ==> (length result == N-1)
fn specialize_row_by_literal(literal: &Literal, row: &Row) -> Option<Row> {
let mut row = row.clone();
let head = row.pop();
let patterns = row;
match head {
Some(Literal(lit)) => {
if &lit == literal {
Some(patterns)
} else {
None
}
}
Some(Anything) => Some(patterns),
Some(Ctor(_, _, _)) => panic!(
r#"Compiler bug! After type checking, constructors and literals should never align in pattern match exhaustiveness checks."#
Some(Literal(_)) => unreachable!(
r#"Compiler bug! After type checking, a constructor can never align with a literal: that should be a type error!"#
),
None => panic!("Compiler error! Empty matrices should not get specialized."),
}
}
@ -501,14 +539,14 @@ pub enum Complete {
fn is_complete(matrix: &PatternMatrix) -> Complete {
let ctors = collect_ctors(matrix);
let mut it = ctors.values();
let length = ctors.len();
let mut it = ctors.into_iter();
match it.next() {
None => Complete::No,
Some(Union { alternatives, .. }) => {
if ctors.len() == alternatives.len() {
Complete::Yes(alternatives.to_vec())
Some((_, Union { alternatives, .. })) => {
if length == alternatives.len() {
Complete::Yes(alternatives)
} else {
Complete::No
}

View file

@ -168,7 +168,7 @@ impl<'a> Procs<'a> {
// by the surrounding context, so we can add pending specializations
// for them immediately.
let layout = layout_cache
.from_var(env.arena, annotation, env.subs, env.pointer_size)
.from_var(env.arena, annotation, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
// if we've already specialized this one, no further work is needed.
@ -306,7 +306,6 @@ pub struct Env<'a, 'i> {
pub problems: &'i mut std::vec::Vec<MonoProblem>,
pub home: ModuleId,
pub ident_ids: &'i mut IdentIds,
pub pointer_size: u32,
pub jump_counter: &'a mut u64,
}
@ -976,13 +975,13 @@ fn specialize<'a>(
);
for (arg_var, arg_name) in pattern_vars.iter().zip(pattern_symbols.iter()) {
let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs, env.pointer_size)?;
let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs)?;
proc_args.push((layout, *arg_name));
}
let ret_layout = layout_cache
.from_var(&env.arena, ret_var, env.subs, env.pointer_size)
.from_var(&env.arena, ret_var, env.subs)
.unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err));
// TODO WRONG
@ -1107,12 +1106,7 @@ pub fn with_hole<'a>(
use crate::layout::UnionVariant::*;
let arena = env.arena;
let variant = crate::layout::union_sorted_tags(
env.arena,
variant_var,
env.subs,
env.pointer_size,
);
let variant = crate::layout::union_sorted_tags(env.arena, variant_var, env.subs);
match variant {
Never => unreachable!("The `[]` type has no constructors"),
@ -1150,7 +1144,7 @@ pub fn with_hole<'a>(
// Layout will unpack this unwrapped tack if it only has one (non-zero-sized) field
let layout = layout_cache
.from_var(env.arena, variant_var, env.subs, env.pointer_size)
.from_var(env.arena, variant_var, env.subs)
.unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err)
});
@ -1255,12 +1249,7 @@ pub fn with_hole<'a>(
mut fields,
..
} => {
let sorted_fields = crate::layout::sort_record_fields(
env.arena,
record_var,
env.subs,
env.pointer_size,
);
let sorted_fields = crate::layout::sort_record_fields(env.arena, record_var, env.subs);
let mut field_symbols = Vec::with_capacity_in(fields.len(), env.arena);
let mut field_layouts = Vec::with_capacity_in(fields.len(), env.arena);
@ -1281,7 +1270,7 @@ pub fn with_hole<'a>(
// creating a record from the var will unpack it if it's just a single field.
let layout = layout_cache
.from_var(env.arena, record_var, env.subs, env.pointer_size)
.from_var(env.arena, record_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let field_symbols = field_symbols.into_bump_slice();
@ -1313,10 +1302,10 @@ pub fn with_hole<'a>(
final_else,
} => {
let ret_layout = layout_cache
.from_var(env.arena, branch_var, env.subs, env.pointer_size)
.from_var(env.arena, branch_var, env.subs)
.expect("invalid ret_layout");
let cond_layout = layout_cache
.from_var(env.arena, cond_var, env.subs, env.pointer_size)
.from_var(env.arena, cond_var, env.subs)
.expect("invalid cond_layout");
let assigned_in_jump = env.unique_symbol();
@ -1367,7 +1356,7 @@ pub fn with_hole<'a>(
}
let layout = layout_cache
.from_var(env.arena, branch_var, env.subs, env.pointer_size)
.from_var(env.arena, branch_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let param = Param {
@ -1426,7 +1415,7 @@ pub fn with_hole<'a>(
};
let layout = layout_cache
.from_var(env.arena, expr_var, env.subs, env.pointer_size)
.from_var(env.arena, expr_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let param = Param {
@ -1464,7 +1453,7 @@ pub fn with_hole<'a>(
let arg_symbols = arg_symbols.into_bump_slice();
let elem_layout = layout_cache
.from_var(env.arena, elem_var, env.subs, env.pointer_size)
.from_var(env.arena, elem_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let expr = Expr::Array {
@ -1508,12 +1497,7 @@ pub fn with_hole<'a>(
loc_expr,
..
} => {
let sorted_fields = crate::layout::sort_record_fields(
env.arena,
record_var,
env.subs,
env.pointer_size,
);
let sorted_fields = crate::layout::sort_record_fields(env.arena, record_var, env.subs);
let mut index = None;
let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena);
@ -1540,7 +1524,7 @@ pub fn with_hole<'a>(
};
let layout = layout_cache
.from_var(env.arena, field_var, env.subs, env.pointer_size)
.from_var(env.arena, field_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err));
let mut stmt = Stmt::Let(assigned, expr, layout, hole);
@ -1646,7 +1630,7 @@ pub fn with_hole<'a>(
}
let layout = layout_cache
.from_var(env.arena, fn_var, env.subs, env.pointer_size)
.from_var(env.arena, fn_var, env.subs)
.unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err)
});
@ -1657,7 +1641,7 @@ pub fn with_hole<'a>(
};
let ret_layout = layout_cache
.from_var(env.arena, ret_var, env.subs, env.pointer_size)
.from_var(env.arena, ret_var, env.subs)
.unwrap_or_else(|err| {
panic!("TODO turn fn_var into a RuntimeError {:?}", err)
});
@ -1720,7 +1704,7 @@ pub fn with_hole<'a>(
// layout of the return type
let layout = layout_cache
.from_var(env.arena, ret_var, env.subs, env.pointer_size)
.from_var(env.arena, ret_var, env.subs)
.unwrap_or_else(|err| todo!("TODO turn fn_var into a RuntimeError {:?}", err));
let mut result = Stmt::Let(assigned, Expr::RunLowLevel(op, arg_symbols), layout, hole);
@ -1857,7 +1841,7 @@ pub fn from_can<'a>(
}
let layout = layout_cache
.from_var(env.arena, def.expr_var, env.subs, env.pointer_size)
.from_var(env.arena, def.expr_var, env.subs)
.expect("invalid layout");
// convert the continuation
@ -1998,11 +1982,11 @@ fn from_can_when<'a>(
let opt_branches = to_opt_branches(env, region, branches, layout_cache);
let cond_layout = layout_cache
.from_var(env.arena, cond_var, env.subs, env.pointer_size)
.from_var(env.arena, cond_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err));
let ret_layout = layout_cache
.from_var(env.arena, expr_var, env.subs, env.pointer_size)
.from_var(env.arena, expr_var, env.subs)
.unwrap_or_else(|err| panic!("TODO turn this into a RuntimeError {:?}", err));
let arena = env.arena;
@ -2291,7 +2275,7 @@ fn call_by_name<'a>(
hole: &'a Stmt<'a>,
) -> Stmt<'a> {
// Register a pending_specialization for this function
match layout_cache.from_var(env.arena, fn_var, env.subs, env.pointer_size) {
match layout_cache.from_var(env.arena, fn_var, env.subs) {
Ok(layout) => {
// Build the CallByName node
let arena = env.arena;
@ -2309,7 +2293,7 @@ fn call_by_name<'a>(
let field_symbols = field_symbols.into_bump_slice();
for (var, _) in &loc_args {
match layout_cache.from_var(&env.arena, *var, &env.subs, env.pointer_size) {
match layout_cache.from_var(&env.arena, *var, &env.subs) {
Ok(_) => {
pattern_vars.push(*var);
}
@ -2591,8 +2575,7 @@ pub fn from_can_pattern<'a>(
use crate::exhaustive::Union;
use crate::layout::UnionVariant::*;
let variant =
crate::layout::union_sorted_tags(env.arena, *whole_var, env.subs, env.pointer_size);
let variant = crate::layout::union_sorted_tags(env.arena, *whole_var, env.subs);
match variant {
Never => unreachable!("there is no pattern of type `[]`"),
@ -2745,12 +2728,7 @@ pub fn from_can_pattern<'a>(
let mut it = destructs.iter();
let mut opt_destruct = it.next();
let sorted_fields = crate::layout::sort_record_fields(
env.arena,
*whole_var,
env.subs,
env.pointer_size,
);
let sorted_fields = crate::layout::sort_record_fields(env.arena, *whole_var, env.subs);
let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena);

View file

@ -64,17 +64,12 @@ pub enum Builtin<'a> {
}
impl<'a> Layout<'a> {
pub fn new(
arena: &'a Bump,
content: Content,
subs: &Subs,
pointer_size: u32,
) -> Result<Self, LayoutProblem> {
pub fn new(arena: &'a Bump, content: Content, subs: &Subs) -> Result<Self, LayoutProblem> {
use roc_types::subs::Content::*;
match content {
FlexVar(_) | RigidVar(_) => Err(LayoutProblem::UnresolvedTypeVar),
Structure(flat_type) => layout_from_flat_type(arena, flat_type, subs, pointer_size),
Structure(flat_type) => layout_from_flat_type(arena, flat_type, subs),
Alias(Symbol::NUM_INT, args, _) => {
debug_assert!(args.is_empty());
@ -84,12 +79,7 @@ impl<'a> Layout<'a> {
debug_assert!(args.is_empty());
Ok(Layout::Builtin(Builtin::Float64))
}
Alias(_, _, var) => Self::new(
arena,
subs.get_without_compacting(var).content,
subs,
pointer_size,
),
Alias(_, _, var) => Self::new(arena, subs.get_without_compacting(var).content, subs),
Error => Err(LayoutProblem::Erroneous),
}
}
@ -97,15 +87,10 @@ impl<'a> Layout<'a> {
/// Returns Err(()) if given an error, or Ok(Layout) if given a non-erroneous Structure.
/// Panics if given a FlexVar or RigidVar, since those should have been
/// monomorphized away already!
fn from_var(
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> Result<Self, LayoutProblem> {
fn from_var(arena: &'a Bump, var: Variable, subs: &Subs) -> Result<Self, LayoutProblem> {
let content = subs.get_without_compacting(var).content;
Self::new(arena, content, subs, pointer_size)
Self::new(arena, content, subs)
}
pub fn safe_to_memcpy(&self) -> bool {
@ -130,6 +115,13 @@ impl<'a> Layout<'a> {
}
}
pub fn is_zero_sized(&self) -> bool {
// For this calculation, we don't need an accurate
// stack size, we just need to know whether it's zero,
// so it's fine to use a pointer size of 1.
self.stack_size(1) == 0
}
pub fn stack_size(&self, pointer_size: u32) -> u32 {
use Layout::*;
@ -175,7 +167,6 @@ impl<'a> LayoutCache<'a> {
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> {
// Store things according to the root Variable, to avoid duplicate work.
let var = subs.get_root_key_without_compacting(var);
@ -185,7 +176,7 @@ impl<'a> LayoutCache<'a> {
.or_insert_with(|| {
let content = subs.get_without_compacting(var).content;
Layout::new(arena, content, subs, pointer_size)
Layout::new(arena, content, subs)
})
.clone()
}
@ -252,7 +243,6 @@ fn layout_from_flat_type<'a>(
arena: &'a Bump,
flat_type: FlatType,
subs: &Subs,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> {
use roc_types::subs::FlatType::*;
@ -277,7 +267,7 @@ fn layout_from_flat_type<'a>(
layout_from_num_content(content)
}
Symbol::STR_STR => Ok(Layout::Builtin(Builtin::Str)),
Symbol::LIST_LIST => list_layout_from_elem(arena, subs, args[0], pointer_size),
Symbol::LIST_LIST => list_layout_from_elem(arena, subs, args[0]),
Symbol::ATTR_ATTR => {
debug_assert_eq!(args.len(), 2);
@ -288,7 +278,7 @@ fn layout_from_flat_type<'a>(
// For now, layout is unaffected by uniqueness.
// (Incorporating refcounting may change this.)
// Unwrap and continue
Layout::from_var(arena, wrapped_var, subs, pointer_size)
Layout::from_var(arena, wrapped_var, subs)
}
_ => {
panic!("TODO layout_from_flat_type for {:?}", Apply(symbol, args));
@ -301,11 +291,11 @@ fn layout_from_flat_type<'a>(
for arg_var in args {
let arg_content = subs.get_without_compacting(arg_var).content;
fn_args.push(Layout::new(arena, arg_content, subs, pointer_size)?);
fn_args.push(Layout::new(arena, arg_content, subs)?);
}
let ret_content = subs.get_without_compacting(ret_var).content;
let ret = Layout::new(arena, ret_content, subs, pointer_size)?;
let ret = Layout::new(arena, ret_content, subs)?;
Ok(Layout::FunctionPointer(
fn_args.into_bump_slice(),
@ -333,10 +323,10 @@ fn layout_from_flat_type<'a>(
let field_var = field.into_inner();
let field_content = subs.get_without_compacting(field_var).content;
match Layout::new(arena, field_content, subs, pointer_size) {
match Layout::new(arena, field_content, subs) {
Ok(layout) => {
// Drop any zero-sized fields like {}
if layout.stack_size(pointer_size) != 0 {
// Drop any zero-sized fields like {}.
if !layout.is_zero_sized() {
layouts.push(layout);
}
}
@ -358,7 +348,7 @@ fn layout_from_flat_type<'a>(
TagUnion(tags, ext_var) => {
debug_assert!(ext_var_is_empty_tag_union(subs, ext_var));
Ok(layout_from_tag_union(arena, tags, subs, pointer_size))
Ok(layout_from_tag_union(arena, tags, subs))
}
RecursiveTagUnion(_rec_var, _tags, _ext_var) => {
panic!("TODO make Layout for empty RecursiveTagUnion");
@ -378,7 +368,6 @@ pub fn sort_record_fields<'a>(
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> Vec<'a, (Lowercase, Layout<'a>)> {
let mut fields_map = MutMap::default();
@ -389,11 +378,10 @@ pub fn sort_record_fields<'a>(
for (label, field) in fields_map {
let var = field.into_inner();
let layout = Layout::from_var(arena, var, subs, pointer_size)
.expect("invalid layout from var");
let layout = Layout::from_var(arena, var, subs).expect("invalid layout from var");
// Drop any zero-sized fields like {}
if layout.stack_size(pointer_size) != 0 {
if !layout.is_zero_sized() {
sorted_fields.push((label, layout));
}
}
@ -416,17 +404,10 @@ pub enum UnionVariant<'a> {
Wrapped(Vec<'a, (TagName, &'a [Layout<'a>])>),
}
pub fn union_sorted_tags<'a>(
arena: &'a Bump,
var: Variable,
subs: &Subs,
pointer_size: u32,
) -> UnionVariant<'a> {
pub fn union_sorted_tags<'a>(arena: &'a Bump, var: Variable, subs: &Subs) -> UnionVariant<'a> {
let mut tags_vec = std::vec::Vec::new();
match roc_types::pretty_print::chase_ext_tag_union(subs, var, &mut tags_vec) {
Ok(()) | Err((_, Content::FlexVar(_))) => {
union_sorted_tags_help(arena, tags_vec, subs, pointer_size)
}
Ok(()) | Err((_, Content::FlexVar(_))) => union_sorted_tags_help(arena, tags_vec, subs),
Err(other) => panic!("invalid content in tag union variable: {:?}", other),
}
}
@ -435,7 +416,6 @@ fn union_sorted_tags_help<'a>(
arena: &'a Bump,
mut tags_vec: std::vec::Vec<(TagName, std::vec::Vec<Variable>)>,
subs: &Subs,
pointer_size: u32,
) -> UnionVariant<'a> {
// sort up front; make sure the ordering stays intact!
tags_vec.sort();
@ -458,10 +438,10 @@ fn union_sorted_tags_help<'a>(
}
_ => {
for var in arguments {
match Layout::from_var(arena, var, subs, pointer_size) {
match Layout::from_var(arena, var, subs) {
Ok(layout) => {
// Drop any zero-sized arguments like {}
if layout.stack_size(pointer_size) != 0 {
if !layout.is_zero_sized() {
layouts.push(layout);
}
}
@ -497,10 +477,10 @@ fn union_sorted_tags_help<'a>(
arg_layouts.push(Layout::Builtin(Builtin::Int64));
for var in arguments {
match Layout::from_var(arena, var, subs, pointer_size) {
match Layout::from_var(arena, var, subs) {
Ok(layout) => {
// Drop any zero-sized arguments like {}
if layout.stack_size(pointer_size) != 0 {
if !layout.is_zero_sized() {
has_any_arguments = true;
arg_layouts.push(layout);
@ -551,14 +531,13 @@ pub fn layout_from_tag_union<'a>(
arena: &'a Bump,
tags: MutMap<TagName, std::vec::Vec<Variable>>,
subs: &Subs,
pointer_size: u32,
) -> Layout<'a> {
use UnionVariant::*;
let tags_vec: std::vec::Vec<_> = tags.into_iter().collect();
if tags_vec[0].0 != TagName::Private(Symbol::NUM_AT_NUM) {
let variant = union_sorted_tags_help(arena, tags_vec, subs, pointer_size);
let variant = union_sorted_tags_help(arena, tags_vec, subs);
match variant {
Never => panic!("TODO gracefully handle trying to instantiate Never"),
@ -692,7 +671,6 @@ pub fn list_layout_from_elem<'a>(
arena: &'a Bump,
subs: &Subs,
var: Variable,
pointer_size: u32,
) -> Result<Layout<'a>, LayoutProblem> {
match subs.get_without_compacting(var).content {
Content::Structure(FlatType::Apply(Symbol::ATTR_ATTR, args)) => {
@ -700,14 +678,14 @@ pub fn list_layout_from_elem<'a>(
let arg_var = args.get(1).unwrap();
list_layout_from_elem(arena, subs, *arg_var, pointer_size)
list_layout_from_elem(arena, subs, *arg_var)
}
Content::FlexVar(_) | Content::RigidVar(_) => {
// If this was still a (List *) then it must have been an empty list
Ok(Layout::Builtin(Builtin::EmptyList))
}
content => {
let elem_layout = Layout::new(arena, content, subs, pointer_size)?;
let elem_layout = Layout::new(arena, content, subs)?;
// This is a normal list.
Ok(Layout::Builtin(Builtin::List(

View file

@ -53,7 +53,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -14,7 +14,6 @@ mod helpers;
mod test_mono {
use crate::helpers::{can_expr, infer_expr, CanExprOut};
use bumpalo::Bump;
use roc_collections::all::MutMap;
use roc_mono::layout::LayoutCache;
use roc_types::subs::Subs;
@ -38,8 +37,8 @@ mod test_mono {
let mut procs = roc_mono::ir::Procs::default();
let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap();
// assume 64-bit pointers
let pointer_size = std::mem::size_of::<u64>() as u32;
// Put this module's ident_ids back in the interns
interns.all_ident_ids.insert(home, ident_ids.clone());
// Populate Procs and Subs, and get the low-level Expr from the canonical Expr
let mut mono_problems = Vec::new();
@ -49,7 +48,6 @@ mod test_mono {
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size,
jump_counter: arena.alloc(0),
};
@ -69,9 +67,6 @@ mod test_mono {
roc_collections::all::MutMap::default()
);
// Put this module's ident_ids back in the interns
interns.all_ident_ids.insert(home, ident_ids);
let mut procs_string = procs
.get_specialized_procs(mono_env.arena)
.values()

View file

@ -11,6 +11,7 @@ roc_region = { path = "../region" }
roc_module = { path = "../module" }
bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1"
encode_unicode = "0.3"
[dev-dependencies]
pretty_assertions = "0.5.1"

View file

@ -1,6 +1,8 @@
use crate::ast::CommentOrNewline::{self, *};
use crate::ast::Spaceable;
use crate::parser::{self, and, unexpected, unexpected_eof, Parser, State};
use crate::parser::{
self, and, peek_utf8_char, unexpected, unexpected_eof, FailReason, Parser, State,
};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -216,16 +218,17 @@ fn spaces<'a>(
) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
move |arena: &'a Bump, state: State<'a>| {
let original_state = state.clone();
let chars = state.input.chars().peekable();
let mut space_list = Vec::new_in(arena);
let mut chars_parsed = 0;
let mut bytes_parsed = 0;
let mut comment_line_buf = String::new_in(arena);
let mut line_state = LineState::Normal;
let mut state = state;
let mut any_newlines = false;
for ch in chars {
chars_parsed += 1;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, utf8_len)) => {
bytes_parsed += utf8_len;
match line_state {
LineState::Normal => {
@ -259,11 +262,11 @@ fn spaces<'a>(
// We're now parsing a line comment!
line_state = LineState::Comment;
}
nonblank => {
return if require_at_least_one && chars_parsed <= 1 {
_ => {
return if require_at_least_one && bytes_parsed <= 1 {
// We've parsed 1 char and it was not a space,
// but we require parsing at least one space!
Err(unexpected(nonblank, 0, state.clone(), state.attempting))
Err(unexpected(0, state.clone(), state.attempting))
} else {
// First make sure we were indented enough!
//
@ -346,8 +349,7 @@ fn spaces<'a>(
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
state = state.advance_without_indenting(utf8_len)?;
comment_line_buf.push(nonblank);
}
@ -355,8 +357,40 @@ fn spaces<'a>(
}
}
}
Err(FailReason::BadUtf8) => {
// If we hit an invalid UTF-8 character, bail out immediately.
return state.fail(FailReason::BadUtf8);
}
Err(_) => {
if require_at_least_one && bytes_parsed == 0 {
return Err(unexpected_eof(0, state.attempting, state));
} else {
let space_slice = space_list.into_bump_slice();
if require_at_least_one && chars_parsed == 0 {
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
return Ok((
space_slice,
state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?,
));
}
return Ok((space_slice, state));
}
}
};
}
// If we didn't parse anything, return unexpected EOF
if require_at_least_one && original_state.bytes.len() == state.bytes.len() {
Err(unexpected_eof(0, state.attempting, state))
} else {
// First make sure we were indented enough!

View file

@ -8,8 +8,8 @@ use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
use crate::keyword;
use crate::number_literal::number_literal;
use crate::parser::{
self, allocated, char, fail, not, not_followed_by, optional, sep_by1, string, then, unexpected,
unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
self, allocated, ascii_char, ascii_string, fail, not, not_followed_by, optional, sep_by1, then,
unexpected, unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
};
use crate::type_annotation;
use bumpalo::collections::string::String;
@ -22,7 +22,7 @@ pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
// Recursive parsers must not directly invoke functions which return (impl Parser),
// as this causes rustc to stack overflow. Thus, parse_expr must be a
// separate function which recurses by calling itself directly.
move |arena, state| parse_expr(min_indent, arena, state)
move |arena, state: State<'a>| parse_expr(min_indent, arena, state)
}
macro_rules! loc_parenthetical_expr {
@ -30,7 +30,7 @@ macro_rules! loc_parenthetical_expr {
then(
loc!(and!(
between!(
char('('),
ascii_char('(' ),
map_with_arena!(
space0_around(
loc!(move |arena, state| parse_expr($min_indent, arena, state)),
@ -43,7 +43,7 @@ macro_rules! loc_parenthetical_expr {
}
}
),
char(')')
ascii_char(')' )
),
optional(either!(
// There may optionally be function args after the ')'
@ -59,7 +59,7 @@ macro_rules! loc_parenthetical_expr {
// as if there were any args they'd have consumed it anyway
// e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser
either!(
one_or_more!(skip_first!(char('.'), lowercase_ident())),
one_or_more!(skip_first!(ascii_char('.' ), lowercase_ident())),
and!(space0($min_indent), equals_with_indent())
)
))
@ -170,7 +170,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
one_of!(
map_with_arena!(
and!(
loc!(char('!')),
loc!(ascii_char('!')),
loc!(move |arena, state| parse_expr(min_indent, arena, state))
),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -179,7 +179,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
),
map_with_arena!(
and!(
loc!(char('-')),
loc!(ascii_char('-')),
loc!(move |arena, state| parse_expr(min_indent, arena, state))
),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -450,9 +450,9 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
let (loc_tuple, state) = loc!(and!(
space0_after(
between!(
char('('),
ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent),
char(')')
ascii_char(')')
),
min_indent,
),
@ -482,7 +482,7 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
/// The '=' used in a def can't be followed by another '=' (or else it's actually
/// an "==") and also it can't be followed by '>' (or else it's actually an "=>")
fn equals_for_def<'a>() -> impl Parser<'a, ()> {
not_followed_by(char('='), one_of!(char('='), char('>')))
not_followed_by(ascii_char('='), one_of!(ascii_char('='), ascii_char('>')))
}
/// A definition, consisting of one of these:
@ -513,7 +513,7 @@ pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
),
// Annotation
skip_first!(
char(':'),
ascii_char(':'),
// Spaces after the ':' (at a normal indentation level) and then the type.
// The type itself must be indented more than the pattern and ':'
space0_before(type_annotation::located(indented_more), indented_more)
@ -811,12 +811,12 @@ fn loc_parse_function_arg<'a>(
fn reserved_keyword<'a>() -> impl Parser<'a, ()> {
one_of!(
string(keyword::IF),
string(keyword::THEN),
string(keyword::ELSE),
string(keyword::WHEN),
string(keyword::IS),
string(keyword::AS)
ascii_string(keyword::IF),
ascii_string(keyword::THEN),
ascii_string(keyword::ELSE),
ascii_string(keyword::WHEN),
ascii_string(keyword::IS),
ascii_string(keyword::AS)
)
}
@ -824,7 +824,7 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!(
skip_first!(
// All closures start with a '\' - e.g. (\x -> x + 1)
char('\\'),
ascii_char('\\'),
// Once we see the '\', we're committed to parsing this as a closure.
// It may turn out to be malformed, but it is definitely a closure.
optional(and!(
@ -833,13 +833,13 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
Attempting::ClosureParams,
// Params are comma-separated
sep_by1(
char(','),
ascii_char(','),
space0_around(loc_closure_param(min_indent), min_indent)
)
),
skip_first!(
// Parse the -> which separates params from body
string("->"),
ascii_string("->"),
// Parse the body
attempt!(
Attempting::ClosureBody,
@ -877,9 +877,9 @@ fn parse_closure_param<'a>(
// If you wrap it in parens, you can match any arbitrary pattern at all.
// e.g. \User.UserId userId -> ...
between!(
char('('),
ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent),
char(')')
ascii_char(')')
)
)
.parse(arena, state)
@ -903,9 +903,9 @@ fn loc_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
fn loc_parenthetical_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
between!(
char('('),
ascii_char('('),
move |arena, state| loc_pattern(min_indent).parse(arena, state),
char(')')
ascii_char(')')
)
}
@ -923,13 +923,13 @@ fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
}
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(char('_'), |_| Pattern::Underscore)
map!(ascii_char('_'), |_| Pattern::Underscore)
}
fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
then(
collection!(
char('{'),
ascii_char('{'),
move |arena: &'a bumpalo::Bump,
state: crate::parser::State<'a>|
-> crate::parser::ParseResult<'a, Located<crate::ast::Pattern<'a>>> {
@ -947,10 +947,13 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
// (This is true in both literals and types.)
let (opt_loc_val, state) = crate::parser::optional(either!(
skip_first!(
char(':'),
ascii_char(':'),
space0_before(loc_pattern(min_indent), min_indent)
),
skip_first!(char('?'), space0_before(loc!(expr(min_indent)), min_indent))
skip_first!(
ascii_char('?'),
space0_before(loc!(expr(min_indent)), min_indent)
)
))
.parse(arena, state)?;
@ -987,8 +990,8 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
Ok((answer, state))
},
char(','),
char('}'),
ascii_char(','),
ascii_char('}'),
min_indent
),
move |_arena, state, loc_patterns| {
@ -1109,7 +1112,7 @@ mod when {
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
),
string(keyword::IS)
ascii_string(keyword::IS)
)
)
),
@ -1132,7 +1135,7 @@ mod when {
/// Parsing when with indentation.
fn when_with_indent<'a>() -> impl Parser<'a, u16> {
move |arena, state: State<'a>| {
string(keyword::WHEN)
ascii_string(keyword::WHEN)
.parse(arena, state)
.map(|((), state)| (state.indent_col, state))
}
@ -1185,7 +1188,7 @@ mod when {
}
);
loop {
while !state.bytes.is_empty() {
match branch_parser.parse(arena, state) {
Ok((next_output, next_state)) => {
state = next_state;
@ -1210,11 +1213,11 @@ mod when {
) -> impl Parser<'a, (Vec<'a, Located<Pattern<'a>>>, Option<Located<Expr<'a>>>)> {
and!(
sep_by1(
char('|'),
ascii_char('|'),
space0_around(loc_pattern(min_indent), min_indent),
),
optional(skip_first!(
string(keyword::IF),
ascii_string(keyword::IF),
// TODO we should require space before the expression but not after
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
@ -1240,7 +1243,7 @@ mod when {
/// Parsing the righthandside of a branch in a when conditional.
fn branch_result<'a>(indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
skip_first!(
string("->"),
ascii_string("->"),
space0_before(
loc!(move |arena, state| parse_expr(indent, arena, state)),
indent,
@ -1253,7 +1256,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!(
and!(
skip_first!(
string(keyword::IF),
ascii_string(keyword::IF),
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
@ -1261,14 +1264,14 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
),
and!(
skip_first!(
string(keyword::THEN),
ascii_string(keyword::THEN),
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
)
),
skip_first!(
string(keyword::ELSE),
ascii_string(keyword::ELSE),
space1_before(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
@ -1310,10 +1313,15 @@ fn unary_negate_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
// Try to parse a number literal *before* trying to parse unary negate,
// because otherwise (foo -1) will parse as (foo (Num.neg 1))
loc!(number_literal()),
loc!(char('-'))
loc!(ascii_char('-'))
)
),
one_of!(char(' '), char('#'), char('\n'), char('>')),
one_of!(
ascii_char(' '),
ascii_char('#'),
ascii_char('\n'),
ascii_char('>')
),
),
move |arena, state, (spaces, num_or_minus_char)| {
match num_or_minus_char {
@ -1530,17 +1538,15 @@ pub fn ident_without_apply<'a>() -> impl Parser<'a, Expr<'a>> {
/// Like equals_for_def(), except it produces the indent_col of the state rather than ()
pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| {
let mut iter = state.input.chars();
match iter.next() {
Some(ch) if ch == '=' => {
match iter.peekable().peek() {
match state.bytes.first() {
Some(&byte) if byte == b'=' => {
match state.bytes.get(1) {
// The '=' must not be followed by another `=` or `>`
// (See equals_for_def() for explanation)
Some(next_ch) if next_ch != &'=' && next_ch != &'>' => {
Some(&next_byte) if next_byte != b'=' && next_byte != b'>' => {
Ok((state.indent_col, state.advance_without_indenting(1)?))
}
Some(next_ch) => Err(unexpected(*next_ch, 0, state, Attempting::Def)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(
1,
Attempting::Def,
@ -1548,23 +1554,19 @@ pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
)),
}
}
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
}
}
pub fn colon_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| {
let mut iter = state.input.chars();
match iter.next() {
Some(ch) if ch == ':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
move |_arena, state: State<'a>| match state.bytes.first() {
Some(&byte) if byte == b':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
}
}
pub fn ident_to_expr<'a>(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> {
match src {
@ -1606,32 +1608,32 @@ fn binop<'a>() -> impl Parser<'a, BinOp> {
// with other valid operators (e.g. "<=" begins with "<") must
// come before the shorter ones; otherwise, they will never
// be reached because the shorter one will pass and consume!
map!(string("|>"), |_| BinOp::Pizza),
map!(string("=="), |_| BinOp::Equals),
map!(string("!="), |_| BinOp::NotEquals),
map!(string("&&"), |_| BinOp::And),
map!(string("||"), |_| BinOp::Or),
map!(char('+'), |_| BinOp::Plus),
map!(char('*'), |_| BinOp::Star),
map!(char('-'), |_| BinOp::Minus),
map!(string("//"), |_| BinOp::DoubleSlash),
map!(char('/'), |_| BinOp::Slash),
map!(string("<="), |_| BinOp::LessThanOrEq),
map!(char('<'), |_| BinOp::LessThan),
map!(string(">="), |_| BinOp::GreaterThanOrEq),
map!(char('>'), |_| BinOp::GreaterThan),
map!(char('^'), |_| BinOp::Caret),
map!(string("%%"), |_| BinOp::DoublePercent),
map!(char('%'), |_| BinOp::Percent)
map!(ascii_string("|>"), |_| BinOp::Pizza),
map!(ascii_string("=="), |_| BinOp::Equals),
map!(ascii_string("!="), |_| BinOp::NotEquals),
map!(ascii_string("&&"), |_| BinOp::And),
map!(ascii_string("||"), |_| BinOp::Or),
map!(ascii_char('+'), |_| BinOp::Plus),
map!(ascii_char('*'), |_| BinOp::Star),
map!(ascii_char('-'), |_| BinOp::Minus),
map!(ascii_string("//"), |_| BinOp::DoubleSlash),
map!(ascii_char('/'), |_| BinOp::Slash),
map!(ascii_string("<="), |_| BinOp::LessThanOrEq),
map!(ascii_char('<'), |_| BinOp::LessThan),
map!(ascii_string(">="), |_| BinOp::GreaterThanOrEq),
map!(ascii_char('>'), |_| BinOp::GreaterThan),
map!(ascii_char('^'), |_| BinOp::Caret),
map!(ascii_string("%%"), |_| BinOp::DoublePercent),
map!(ascii_char('%'), |_| BinOp::Percent)
)
}
pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
let elems = collection!(
char('['),
ascii_char('['),
loc!(expr(min_indent)),
char(','),
char(']'),
ascii_char(','),
ascii_char(']'),
min_indent
);
@ -1673,8 +1675,10 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
};
// there can be field access, e.g. `{ x : 4 }.x`
let (accesses, state) =
optional(one_or_more!(skip_first!(char('.'), lowercase_ident())))
let (accesses, state) = optional(one_or_more!(skip_first!(
ascii_char('.'),
lowercase_ident()
)))
.parse(arena, state)?;
if let Some(fields) = accesses {
@ -1768,7 +1772,7 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
/// This is mainly for matching tags in closure params, e.g. \@Foo -> ...
pub fn private_tag<'a>() -> impl Parser<'a, &'a str> {
map_with_arena!(
skip_first!(char('@'), global_tag()),
skip_first!(ascii_char('@'), global_tag()),
|arena: &'a Bump, name: &'a str| {
let mut buf = String::with_capacity_in(1 + name.len(), arena);

View file

@ -1,6 +1,6 @@
use crate::ast::Attempting;
use crate::keyword;
use crate::parser::{unexpected, unexpected_eof, Fail, FailReason, ParseResult, Parser, State};
use crate::parser::{peek_utf8_char, unexpected, Fail, FailReason, ParseResult, Parser, State};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -67,70 +67,68 @@ impl<'a> Ident<'a> {
/// Sometimes we may want to check for those later in the process, and give
/// more contextually-aware error messages than "unexpected `if`" or the like.
#[inline(always)]
pub fn parse_ident<'a, I>(
pub fn parse_ident<'a>(
arena: &'a Bump,
chars: &mut I,
state: State<'a>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
mut state: State<'a>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut is_capitalized;
let is_accessor_fn;
let mut is_private_tag = false;
let mut chars_parsed;
// Identifiers and accessor functions must start with either a letter or a dot.
// If this starts with neither, it must be something else!
match chars.next() {
Some(ch) => {
if ch == '@' {
match peek_utf8_char(&state) {
Ok((first_ch, bytes_parsed)) => {
if first_ch.is_alphabetic() {
part_buf.push(first_ch);
is_capitalized = first_ch.is_uppercase();
is_accessor_fn = false;
state = state.advance_without_indenting(bytes_parsed)?;
} else if first_ch == '.' {
is_capitalized = false;
is_accessor_fn = true;
state = state.advance_without_indenting(bytes_parsed)?;
} else if first_ch == '@' {
state = state.advance_without_indenting(bytes_parsed)?;
// '@' must always be followed by a capital letter!
match chars.next() {
Some(ch) if ch.is_uppercase() => {
match peek_utf8_char(&state) {
Ok((next_ch, next_bytes_parsed)) => {
if next_ch.is_uppercase() {
state = state.advance_without_indenting(next_bytes_parsed)?;
part_buf.push('@');
part_buf.push(ch);
part_buf.push(next_ch);
is_private_tag = true;
is_capitalized = true;
is_accessor_fn = false;
chars_parsed = 2;
}
Some(ch) => {
return Err(unexpected(ch, 0, state, Attempting::Identifier));
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
}
} else if ch.is_alphabetic() {
part_buf.push(ch);
is_capitalized = ch.is_uppercase();
is_accessor_fn = false;
chars_parsed = 1;
} else if ch == '.' {
is_capitalized = false;
is_accessor_fn = true;
chars_parsed = 1;
} else {
return Err(unexpected(ch, 0, state, Attempting::Identifier));
return Err(unexpected(
bytes_parsed + next_bytes_parsed,
state,
Attempting::Identifier,
));
}
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
Err(reason) => return state.fail(reason),
}
} else {
return Err(unexpected(0, state, Attempting::Identifier));
}
}
Err(reason) => return state.fail(reason),
}
};
let mut next_char = None;
while let Some(ch) = chars.next() {
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
@ -150,7 +148,6 @@ where
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
@ -167,7 +164,6 @@ where
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
@ -184,12 +180,13 @@ where
} else {
// This must be the end of the identifier. We're done!
next_char = Some(ch);
break;
}
chars_parsed += 1;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
if part_buf.is_empty() {
@ -200,10 +197,9 @@ where
// If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously.
return malformed(
next_char.or_else(|| Some('.')),
Some('.'),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
@ -224,14 +220,7 @@ where
Ident::AccessorFunction(value)
} else {
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
}
} else if noncapitalized_parts.is_empty() {
// We have capitalized parts only, so this must be a tag.
@ -245,33 +234,19 @@ where
}
} else {
// This is a qualified tag, which is not allowed!
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
}
}
None => {
// We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier));
return Err(unexpected(1, state, Attempting::Identifier));
}
}
} else if is_private_tag {
// This is qualified field access with an '@' in front, which does not make sense!
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
} else {
// We have multiple noncapitalized parts, so this must be field access.
Ident::Access {
@ -280,22 +255,16 @@ where
}
};
let state = state.advance_without_indenting(chars_parsed)?;
Ok(((answer, next_char), state))
Ok(((answer, None), state))
}
fn malformed<'a, I>(
fn malformed<'a>(
opt_bad_char: Option<char>,
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
mut state: State<'a>,
capitalized_parts: Vec<&'a str>,
noncapitalized_parts: Vec<&'a str>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
@ -311,7 +280,9 @@ where
// Consume the remaining chars in the identifier.
let mut next_char = None;
for ch in chars {
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
@ -321,20 +292,23 @@ where
break;
}
}
let chars_parsed = full_string.len();
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
Ok((
(Ident::Malformed(full_string.into_bump_str()), next_char),
state.advance_without_indenting(chars_parsed)?,
state,
))
}
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
move |arena: &'a Bump, state: State<'a>| {
// Discard next_char; we don't need it.
let ((string, _), state) = parse_ident(arena, &mut state.input.chars(), state)?;
let ((string, _), state) = parse_ident(arena, state)?;
Ok((string, state))
}
@ -344,33 +318,28 @@ pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str>
where
F: Fn(char) -> bool,
{
move |arena, state: State<'a>| {
let mut chars = state.input.chars();
move |arena, mut state: State<'a>| {
// pred will determine if this is a tag or ident (based on capitalization)
let first_letter = match chars.next() {
Some(first_char) => {
if pred(first_char) {
first_char
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
let (first_letter, bytes_parsed) = match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if !pred(first_letter) {
return Err(unexpected(0, state, Attempting::RecordFieldLabel));
}
(first_letter, bytes_parsed)
}
None => {
return Err(unexpected_eof(0, Attempting::RecordFieldLabel, state));
}
Err(reason) => return state.fail(reason),
};
let mut buf = String::with_capacity_in(1, arena);
buf.push(first_letter);
for ch in chars {
state = state.advance_without_indenting(bytes_parsed)?;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
@ -378,18 +347,18 @@ where
// * A ':' indicating the end of the field
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
state = state.advance_without_indenting(bytes_parsed)?;
} else {
// This is the end of the field. We're done!
break;
}
}
Err(reason) => return state.fail(reason),
};
}
let chars_parsed = buf.len();
Ok((
buf.into_bump_str(),
state.advance_without_indenting(chars_parsed)?,
))
Ok((buf.into_bump_str(), state))
}
}

View file

@ -6,7 +6,10 @@ use crate::blankspace::{space0_around, space1};
use crate::expr::def;
use crate::header::ModuleName;
use crate::ident::unqualified_ident;
use crate::parser::{self, char, loc, optional, string, unexpected, unexpected_eof, Parser, State};
use crate::parser::{
self, ascii_char, ascii_string, loc, optional, peek_utf8_char, peek_utf8_char_at, unexpected,
Parser, State,
};
use bumpalo::collections::{String, Vec};
use roc_region::all::Located;
@ -30,7 +33,10 @@ pub fn app_module<'a>() -> impl Parser<'a, Module<'a>> {
pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
parser::map(
and!(
skip_first!(string("interface"), and!(space1(1), loc!(module_name()))),
skip_first!(
ascii_string("interface"),
and!(space1(1), loc!(module_name()))
),
and!(exposes(), imports())
),
|(
@ -56,72 +62,68 @@ pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
#[inline(always)]
pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
move |arena, state: State<'a>| {
let mut chars = state.input.chars();
let first_letter = match chars.next() {
Some(first_char) => {
// Module names must all be uppercase
if first_char.is_uppercase() {
first_char
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
}
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
move |arena, mut state: State<'a>| {
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if !first_letter.is_uppercase() {
return Err(unexpected(0, state, Attempting::Module));
};
let mut buf = String::with_capacity_in(1, arena);
let mut buf = String::with_capacity_in(4, arena);
buf.push(first_letter);
while let Some(ch) = chars.next() {
state = state.advance_without_indenting(bytes_parsed)?;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A '.' separating module parts
if ch.is_alphabetic() || ch.is_ascii_digit() {
state = state.advance_without_indenting(bytes_parsed)?;
buf.push(ch);
} else if ch == '.' {
match chars.next() {
Some(next) => {
match peek_utf8_char_at(&state, 1) {
Ok((next, next_bytes_parsed)) => {
if next.is_uppercase() {
// If we hit another uppercase letter, keep going!
buf.push('.');
buf.push(next);
state = state.advance_without_indenting(
bytes_parsed + next_bytes_parsed,
)?;
} else {
// We have finished parsing the module name.
//
// There may be an identifier after this '.',
// e.g. "baz" in `Foo.Bar.baz`
break;
return Ok((
ModuleName::new(buf.into_bump_str()),
state,
));
}
}
None => {
// A module name can't end with a '.'
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
Err(reason) => return state.fail(reason),
}
} else {
// This is the end of the module name. We're done!
break;
}
}
Err(reason) => return state.fail(reason),
}
}
let chars_parsed = buf.len();
Ok((
ModuleName::new(buf.into_bump_str()),
state.advance_without_indenting(chars_parsed)?,
))
Ok((ModuleName::new(buf.into_bump_str()), state))
}
Err(reason) => state.fail(reason),
}
}
}
@ -129,7 +131,7 @@ pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>> {
parser::map(
and!(
skip_first!(string("app"), and!(space1(1), loc!(module_name()))),
skip_first!(ascii_string("app"), and!(space1(1), loc!(module_name()))),
and!(provides(), imports())
),
|(
@ -167,8 +169,14 @@ fn provides<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("provides")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("provides")), space1(1)),
collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -181,8 +189,14 @@ fn exposes<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("exposes")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("exposes")), space1(1)),
collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -195,8 +209,14 @@ fn imports<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("imports")), space1(1)),
collection!(char('['), loc!(imports_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("imports")), space1(1)),
collection!(
ascii_char('['),
loc!(imports_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -213,8 +233,14 @@ fn imports_entry<'a>() -> impl Parser<'a, ImportsEntry<'a>> {
module_name(),
// e.g. `.{ Task, after}`
optional(skip_first!(
char('.'),
collection!(char('{'), loc!(exposes_entry()), char(','), char('}'), 1)
ascii_char('.'),
collection!(
ascii_char('{'),
loc!(exposes_entry()),
ascii_char(','),
ascii_char('}'),
1
)
))
),
|arena,

View file

@ -1,23 +1,19 @@
use crate::ast::{Attempting, Base, Expr};
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char;
use std::str::from_utf8_unchecked;
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |_arena, state: State<'a>| {
let mut chars = state.input.chars();
let bytes = &mut state.bytes.iter();
match chars.next() {
Some(first_ch) => {
match bytes.next() {
Some(&first_byte) => {
// Number literals must start with either an '-' or a digit.
if first_ch == '-' || first_ch.is_ascii_digit() {
parse_number_literal(first_ch, &mut chars, state)
if first_byte == b'-' || (first_byte as char).is_ascii_digit() {
parse_number_literal(first_byte as char, bytes, state)
} else {
Err(unexpected(
first_ch,
first_ch.len_utf8(),
state,
Attempting::NumberLiteral,
))
Err(unexpected(1, state, Attempting::NumberLiteral))
}
}
None => Err(unexpected_eof(0, state.attempting, state)),
@ -28,11 +24,11 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
#[inline(always)]
fn parse_number_literal<'a, I>(
first_ch: char,
chars: &mut I,
bytes: &mut I,
state: State<'a>,
) -> ParseResult<'a, Expr<'a>>
where
I: Iterator<Item = char>,
I: Iterator<Item = &'a u8>,
{
use self::LiteralType::*;
@ -40,13 +36,12 @@ where
// We already parsed 1 character (which may have been a minus sign).
let mut bytes_parsed = 1;
let mut prev_ch = first_ch;
let mut prev_byte = first_ch as u8;
let mut has_parsed_digits = first_ch.is_ascii_digit();
for next_ch in chars {
for &next_byte in bytes {
let err_unexpected = || {
Err(unexpected(
next_ch,
bytes_parsed,
state.clone(),
Attempting::NumberLiteral,
@ -55,23 +50,26 @@ where
let is_potentially_non_base10 = || {
(bytes_parsed == 1 && first_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_byte == b'0')
};
if next_ch == '.' {
match next_byte as char {
'.' => {
if typ == Float {
// You only get one decimal point!
return err_unexpected();
} else {
typ = Float;
}
} else if next_ch == 'x' {
}
'x' => {
if is_potentially_non_base10() {
typ = Hex;
} else {
return err_unexpected();
}
} else if next_ch == 'b' && typ == Num {
}
'b' if typ == Num => {
// We have to check for typ == Num because otherwise we get a false
// positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb
@ -80,20 +78,24 @@ where
} else {
return err_unexpected();
}
} else if next_ch == 'o' {
}
'o' => {
if is_potentially_non_base10() {
typ = Octal;
} else {
return err_unexpected();
}
} else if next_ch.is_ascii_digit() {
}
next_ch if next_ch.is_ascii_digit() => {
has_parsed_digits = true;
} else if next_ch != '_' &&
}
next_ch
if next_ch != '_' &&
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
// We parse them in any int literal, so we can give a more helpful error
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
// or "the character 'g' is outside the range of valid Hex literals")
!next_ch.is_ascii_alphabetic()
!next_ch.is_ascii_alphabetic() =>
{
if has_parsed_digits {
// We hit an invalid number literal character; we're done!
@ -103,43 +105,36 @@ where
return err_unexpected();
}
}
_ => {}
}
// Since we only consume characters in the ASCII range for number literals,
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
bytes_parsed += 1;
prev_ch = next_ch;
prev_byte = next_byte;
}
let from_base = |base| {
let is_negative = first_ch == '-';
let string = if is_negative {
&state.input[3..bytes_parsed]
} else {
&state.input[2..bytes_parsed]
};
Expr::NonBase10Int {
is_negative,
string,
base,
}
};
// At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that.
let expr = match typ {
Num => Expr::Num(&state.input[0..bytes_parsed]),
Float => Expr::Float(&state.input[0..bytes_parsed]),
match typ {
Num => Ok((
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Num(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
Float => Ok((
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Float(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
// For these we trim off the 0x/0o/0b part
Hex => from_base(Base::Hex),
Octal => from_base(Base::Octal),
Binary => from_base(Base::Binary),
};
let next_state = state.advance_without_indenting(bytes_parsed)?;
Ok((expr, next_state))
Hex => from_base(Base::Hex, first_ch, bytes_parsed, state),
Octal => from_base(Base::Octal, first_ch, bytes_parsed, state),
Binary => from_base(Base::Binary, first_ch, bytes_parsed, state),
}
}
#[derive(Debug, PartialEq, Eq)]
@ -150,3 +145,29 @@ enum LiteralType {
Octal,
Binary,
}
fn from_base(
base: Base,
first_ch: char,
bytes_parsed: usize,
state: State<'_>,
) -> ParseResult<'_, Expr<'_>> {
let is_negative = first_ch == '-';
let bytes = if is_negative {
&state.bytes[3..bytes_parsed]
} else {
&state.bytes[2..bytes_parsed]
};
match parse_utf8(bytes) {
Ok(string) => Ok((
Expr::NonBase10Int {
is_negative,
string,
base,
},
state.advance_without_indenting(bytes_parsed)?,
)),
Err(reason) => state.fail(reason),
}
}

View file

@ -1,14 +1,17 @@
use crate::ast::Attempting;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use encode_unicode::CharExt;
use roc_region::all::{Located, Region};
use std::fmt;
use std::str::from_utf8;
use std::{char, u16};
/// A position in a source file.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Clone, PartialEq, Eq)]
pub struct State<'a> {
/// The raw input string.
pub input: &'a str,
/// The raw input bytes from the file.
pub bytes: &'a [u8],
/// Current line of the input
pub line: u32,
@ -39,15 +42,15 @@ pub enum Either<First, Second> {
}
impl<'a> State<'a> {
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
pub fn new(bytes: &'a [u8], attempting: Attempting) -> State<'a> {
State {
input,
bytes,
line: 0,
column: 0,
indent_col: 0,
is_indenting: true,
attempting,
original_len: input.len(),
original_len: bytes.len(),
}
}
@ -69,7 +72,7 @@ impl<'a> State<'a> {
///
/// So if the parser has consumed 8 bytes, this function will return 8.
pub fn bytes_consumed(&self) -> usize {
self.original_len - self.input.len()
self.original_len - self.bytes.len()
}
/// Increments the line, then resets column, indent_col, and is_indenting.
@ -77,7 +80,7 @@ impl<'a> State<'a> {
pub fn newline(&self) -> Result<Self, (Fail, Self)> {
match self.line.checked_add(1) {
Some(line) => Ok(State {
input: &self.input[1..],
bytes: &self.bytes[1..],
line,
column: 0,
indent_col: 0,
@ -99,11 +102,11 @@ impl<'a> State<'a> {
/// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway.
pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> {
pub fn advance_without_indenting(self, quantity: usize) -> Result<Self, (Fail, Self)> {
match (self.column as usize).checked_add(quantity) {
Some(column_usize) if column_usize <= u16::MAX as usize => {
Ok(State {
input: &self.input[quantity..],
bytes: &self.bytes[quantity..],
line: self.line,
column: column_usize as u16,
indent_col: self.indent_col,
@ -141,7 +144,7 @@ impl<'a> State<'a> {
};
Ok(State {
input: &self.input[spaces..],
bytes: &self.bytes[spaces..],
line: self.line,
column: column_usize as u16,
indent_col,
@ -169,6 +172,35 @@ impl<'a> State<'a> {
end_line: self.line,
}
}
/// Return a failing ParseResult for the given FailReason
pub fn fail<T>(self, reason: FailReason) -> Result<(T, Self), (Fail, Self)> {
Err((
Fail {
reason,
attempting: self.attempting,
},
self,
))
}
}
impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match from_utf8(self.bytes) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
}
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
write!(f, "\n\tindent_col: {}", self.indent_col)?;
write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?;
write!(f, "\n\tattempting: {:?}", self.attempting)?;
write!(f, "\n\toriginal_len: {}", self.original_len)?;
write!(f, "\n}}")
}
}
#[test]
@ -182,13 +214,14 @@ pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FailReason {
Unexpected(char, Region),
Unexpected(Region),
OutdentedTooFar,
ConditionFailed,
LineTooLong(u32 /* which line was too long */),
TooManyLines,
Eof(Region),
InvalidPattern,
BadUtf8,
ReservedKeyword(Region),
ArgumentsBeforeEquals(Region),
}
@ -332,13 +365,12 @@ pub fn unexpected_eof(
}
pub fn unexpected(
ch: char,
chars_consumed: usize,
state: State<'_>,
attempting: Attempting,
) -> (Fail, State<'_>) {
checked_unexpected(chars_consumed, state, |region| Fail {
reason: FailReason::Unexpected(ch, region),
reason: FailReason::Unexpected(region),
attempting,
})
}
@ -385,9 +417,9 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
// (for example) the LineTooLong initially occurs in the middle of
// a one_of chain, which would otherwise prevent it from propagating.
let column = u16::MAX;
let input = state.input.get(0..state.input.len()).unwrap();
let bytes = state.bytes.get(0..state.bytes.len()).unwrap();
let state = State {
input,
bytes,
line: state.line,
indent_col: state.indent_col,
is_indenting: state.is_indenting,
@ -399,29 +431,75 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
(fail, state)
}
/// A single char.
pub fn char<'a>(expected: char) -> impl Parser<'a, ()> {
move |_arena, state: State<'a>| match state.input.chars().next() {
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
Some(other_ch) => Err(unexpected(other_ch, 0, state, Attempting::Keyword)),
/// A single ASCII char.
pub fn ascii_char<'a>(expected: char) -> impl Parser<'a, ()> {
// Make sure this really is an ASCII char!
debug_assert!(expected.len_utf8() == 1);
move |_arena, state: State<'a>| match state.bytes.first() {
Some(&actual) if expected == actual as char => {
Ok(((), state.advance_without_indenting(1)?))
}
Some(_) => Err(unexpected(0, state, Attempting::Keyword)),
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
}
}
/// A hardcoded keyword string with no newlines in it.
pub fn string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
// We can't have newlines because we don't attempt to advance the row
// in the state, only the column.
debug_assert!(!keyword.contains('\n'));
/// A single UTF-8-encoded char. This will both parse *and* validate that the
/// char is valid UTF-8, but it will *not* advance the state.
pub fn peek_utf8_char<'a>(state: &State<'a>) -> Result<(char, usize), FailReason> {
if !state.bytes.is_empty() {
match char::from_utf8_slice_start(state.bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A single UTF-8-encoded char, with an offset. This will both parse *and*
/// validate that the char is valid UTF-8, but it will *not* advance the state.
pub fn peek_utf8_char_at<'a>(
state: &State<'a>,
offset: usize,
) -> Result<(char, usize), FailReason> {
if state.bytes.len() > offset {
let bytes = &state.bytes[offset..];
match char::from_utf8_slice_start(bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A hardcoded string with no newlines, consisting only of ASCII characters
pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
// Verify that this really is exclusively ASCII characters.
// The `unsafe` block in this function relies upon this assumption!
//
// Also, this can't have newlines because we don't attempt to advance
// the row in the state, only the column.
debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n'));
move |_arena, state: State<'a>| {
let input = state.input;
let len = keyword.len();
// TODO do this comparison in one SIMD instruction (on supported systems)
match input.get(0..len) {
Some(next_str) if next_str == keyword => {
match state.bytes.get(0..len) {
Some(next_str) => {
if next_str == keyword.as_bytes() {
Ok(((), state.advance_without_indenting(len)?))
} else {
Err(unexpected(len, state, Attempting::Keyword))
}
}
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
}
@ -686,7 +764,7 @@ macro_rules! collection {
// We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')),
zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!(
$crate::parser::sep_by0(
$delimiter,
@ -912,6 +990,7 @@ macro_rules! record_field {
use $crate::ast::AssignedField::*;
use $crate::blankspace::{space0, space0_before};
use $crate::ident::lowercase_ident;
use $crate::parser::ascii_char;
use $crate::parser::Either::*;
// You must have a field name, e.g. "email"
@ -922,8 +1001,8 @@ macro_rules! record_field {
// Having a value is optional; both `{ email }` and `{ email: blah }` work.
// (This is true in both literals and types.)
let (opt_loc_val, state) = $crate::parser::optional(either!(
skip_first!(char(':'), space0_before($val_parser, $min_indent)),
skip_first!(char('?'), space0_before($val_parser, $min_indent))
skip_first!(ascii_char(':'), space0_before($val_parser, $min_indent)),
skip_first!(ascii_char('?'), space0_before($val_parser, $min_indent))
))
.parse(arena, state)?;
@ -952,10 +1031,10 @@ macro_rules! record_field {
macro_rules! record_without_update {
($val_parser:expr, $min_indent:expr) => {
collection!(
char('{'),
ascii_char('{'),
loc!(record_field!($val_parser, $min_indent)),
char(','),
char('}'),
ascii_char(','),
ascii_char('}'),
$min_indent
)
};
@ -965,7 +1044,7 @@ macro_rules! record_without_update {
macro_rules! record {
($val_parser:expr, $min_indent:expr) => {
skip_first!(
$crate::parser::char('{'),
$crate::parser::ascii_char('{'),
and!(
// You can optionally have an identifier followed by an '&' to
// make this a record update, e.g. { Foo.user & username: "blah" }.
@ -981,7 +1060,7 @@ macro_rules! record {
)),
$min_indent
),
$crate::parser::char('&')
$crate::parser::ascii_char('&')
)),
loc!(skip_first!(
// We specifically allow space characters inside here, so that
@ -995,16 +1074,16 @@ macro_rules! record {
// We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')),
zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!(
$crate::parser::sep_by0(
$crate::parser::char(','),
$crate::parser::ascii_char(','),
$crate::blankspace::space0_around(
loc!(record_field!($val_parser, $min_indent)),
$min_indent
)
),
$crate::parser::char('}')
$crate::parser::ascii_char('}')
)
))
)
@ -1067,3 +1146,10 @@ where
{
attempt!(attempting, parser)
}
pub fn parse_utf8(bytes: &[u8]) -> Result<&str, FailReason> {
match from_utf8(bytes) {
Ok(string) => Ok(string),
Err(_) => Err(FailReason::BadUtf8),
}
}

View file

@ -1,8 +1,7 @@
use crate::ast::Attempting;
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use std::char;
pub enum StringLiteral<'a> {
Line(&'a str),
@ -11,14 +10,15 @@ pub enum StringLiteral<'a> {
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars();
let mut bytes = state.bytes.iter();
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
match chars.next() {
Some('"') => (),
Some(other_char) => {
return Err(unexpected(other_char, 0, state, Attempting::StringLiteral));
match bytes.next() {
Some(&byte) => {
if byte != b'"' {
return Err(unexpected(0, state, Attempting::StringLiteral));
}
}
None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state));
@ -35,44 +35,49 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
// Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1;
let mut prev_ch = '"';
let mut prev_byte = b'"';
while let Some(ch) = chars.next() {
while let Some(&byte) = bytes.next() {
parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' {
let string = if parsed_chars == 2 {
if let Some('"') = chars.next() {
if byte == b'"' && prev_byte != b'\\' {
let (string, state) = if parsed_chars == 2 {
match bytes.next() {
Some(byte) if *byte == b'"' => {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut chars);
} else {
""
return parse_block_string(arena, state, &mut bytes);
}
_ => ("", state.advance_without_indenting(2)?),
}
} else {
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`.
&state.input[1..(parsed_chars - 1)]
let string_bytes = &state.bytes[1..(parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
Err(reason) => {
return state.fail(reason);
}
}
};
let next_state = state.advance_without_indenting(parsed_chars)?;
return Ok((StringLiteral::Line(string), next_state));
} else if ch == '\n' {
return Ok((StringLiteral::Line(string), state));
} else if byte == b'\n' {
// This is a single-line string, which cannot have newlines!
// Treat this as an unclosed string literal, and consume
// all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared.
return Err(unexpected(
'\n',
state.input.len() - 1,
state.bytes.len() - 1,
state,
Attempting::StringLiteral,
));
} else {
prev_ch = ch;
prev_byte = byte;
}
}
@ -88,48 +93,64 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
fn parse_block_string<'a, I>(
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>>
where
I: Iterator<Item = char>,
I: Iterator<Item = &'a u8>,
{
// So far we have consumed the `"""` and that's it.
let mut parsed_chars = 3;
let mut prev_ch = '"';
let mut prev_byte = b'"';
let mut quotes_seen = 0;
// start at 3 to omit the opening `"`.
let mut line_start = 3;
let mut lines = Vec::new_in(arena);
let mut lines: Vec<'a, &'a str> = Vec::new_in(arena);
for ch in chars {
for byte in bytes {
parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' {
if *byte == b'"' && prev_byte != b'\\' {
if quotes_seen == 2 {
// three consecutive qoutes, end string
// Subtract 3 from parsed_chars so we omit the closing `"`.
let string = &state.input[line_start..(parsed_chars - 3)];
lines.push(string);
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
let next_state = state.advance_without_indenting(parsed_chars)?;
return match parse_utf8(line_bytes) {
Ok(line) => {
let state = state.advance_without_indenting(parsed_chars)?;
return Ok((StringLiteral::Block(arena.alloc(lines)), next_state));
lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state))
}
Err(reason) => state.fail(reason),
};
}
quotes_seen += 1;
} else if ch == '\n' {
} else if *byte == b'\n' {
// note this includes the newline
let string = &state.input[line_start..parsed_chars];
lines.push(string);
let line_bytes = &state.bytes[line_start..parsed_chars];
match parse_utf8(line_bytes) {
Ok(line) => {
lines.push(line);
quotes_seen = 0;
line_start = parsed_chars;
}
Err(reason) => {
return state.fail(reason);
}
}
} else {
quotes_seen = 0;
}
prev_ch = ch;
prev_byte = *byte;
}
// We ran out of characters before finding 3 closing quotes
@ -137,6 +158,6 @@ where
parsed_chars,
// TODO custom BlockStringLiteral?
Attempting::StringLiteral,
state.clone(),
state,
))
}

View file

@ -4,8 +4,8 @@ use crate::expr::{global_tag, private_tag};
use crate::ident::join_module_parts;
use crate::keyword;
use crate::parser::{
allocated, char, not, optional, string, unexpected, unexpected_eof, Either, ParseResult,
Parser, State,
allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, unexpected, Either,
ParseResult, Parser, State,
};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
@ -22,10 +22,10 @@ macro_rules! tag_union {
map!(
and!(
collection!(
char('['),
ascii_char('['),
loc!(tag_type($min_indent)),
char(','),
char(']'),
ascii_char(','),
ascii_char(']'),
$min_indent
),
optional(
@ -61,7 +61,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
and!(
space1(min_indent),
skip_first!(
string(keyword::AS),
ascii_string(keyword::AS),
space1_before(term(min_indent), min_indent)
)
)
@ -89,7 +89,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
/// The `*` type variable, e.g. in (List *) Wildcard,
fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
map!(loc!(char('*')), |loc_val: Located<()>| {
map!(loc!(ascii_char('*')), |loc_val: Located<()>| {
loc_val.map(|_| TypeAnnotation::Wildcard)
})
}
@ -97,7 +97,7 @@ fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
skip_first!(
// Once we hit an "as", stop parsing args
not(string(keyword::AS)),
not(ascii_string(keyword::AS)),
one_of!(
loc_wildcard(),
loc_parenthetical_type(min_indent),
@ -112,12 +112,12 @@ pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnot
#[inline(always)]
fn loc_parenthetical_type<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
between!(
char('('),
ascii_char('('),
space0_around(
move |arena, state| expression(min_indent).parse(arena, state),
min_indent,
),
char(')')
ascii_char(')')
)
}
@ -208,7 +208,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
move |arena, state: State<'a>| {
let (first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?;
let (rest, state) = zero_or_more!(skip_first!(
char(','),
ascii_char(','),
space0_around(term(min_indent), min_indent)
))
.parse(arena, state)?;
@ -216,7 +216,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
// TODO this space0 is dropped, so newlines just before the function arrow when there
// is only one argument are not seen by the formatter. Can we do better?
let (is_function, state) =
optional(skip_first!(space0(min_indent), string("->"))).parse(arena, state)?;
optional(skip_first!(space0(min_indent), ascii_string("->"))).parse(arena, state)?;
if is_function.is_some() {
let (return_type, state) =
@ -263,30 +263,30 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
fn parse_concrete_type<'a>(
arena: &'a Bump,
state: State<'a>,
mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut parts: Vec<&'a str> = Vec::new_in(arena);
// Qualified types must start with a capitalized letter.
match chars.next() {
Some(ch) => {
if ch.is_alphabetic() && ch.is_uppercase() {
part_buf.push(ch);
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if first_letter.is_alphabetic() && first_letter.is_uppercase() {
part_buf.push(first_letter);
} else {
return Err(unexpected(ch, 0, state, Attempting::ConcreteType));
return Err(unexpected(0, state, Attempting::ConcreteType));
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
None => {
return Err(unexpected_eof(0, Attempting::ConcreteType, state));
}
};
let mut chars_parsed = 1;
let mut next_char = None;
while let Some(ch) = chars.next() {
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
@ -295,21 +295,21 @@ fn parse_concrete_type<'a>(
if ch.is_alphabetic() {
if part_buf.is_empty() && !ch.is_uppercase() {
// Each part must begin with a capital letter.
return malformed(Some(ch), arena, state, &mut chars, parts);
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch == '.' {
// Having two consecutive dots is an error.
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
return malformed(Some(ch), arena, state, parts);
}
parts.push(part_buf.into_bump_str());
@ -323,7 +323,10 @@ fn parse_concrete_type<'a>(
break;
}
chars_parsed += 1;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
if part_buf.is_empty() {
@ -333,23 +336,16 @@ fn parse_concrete_type<'a>(
//
// If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously.
return malformed(
next_char.or_else(|| Some('.')),
arena,
state,
&mut chars,
parts,
);
return malformed(next_char.or_else(|| Some('.')), arena, state, parts);
}
if part_buf.is_empty() {
// We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier));
return Err(unexpected(1, state, Attempting::Identifier));
}
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::Apply(
join_module_parts(arena, parts.into_bump_slice()),
part_buf.into_bump_str(),
@ -361,28 +357,27 @@ fn parse_concrete_type<'a>(
fn parse_type_variable<'a>(
arena: &'a Bump,
state: State<'a>,
mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut buf = String::new_in(arena);
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
// Type variables must start with a lowercase letter.
match chars.next() {
Some(ch) => {
if ch.is_alphabetic() && ch.is_lowercase() {
buf.push(ch);
if first_letter.is_alphabetic() && first_letter.is_lowercase() {
buf.push(first_letter);
} else {
return Err(unexpected(ch, 0, state, Attempting::TypeVariable));
return Err(unexpected(0, state, Attempting::TypeVariable));
}
}
None => {
return Err(unexpected_eof(0, Attempting::TypeVariable, state));
}
};
let mut chars_parsed = 1;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
for ch in chars {
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
@ -394,25 +389,23 @@ fn parse_type_variable<'a>(
break;
}
chars_parsed += 1;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::BoundVariable(buf.into_bump_str());
Ok((answer, state))
}
fn malformed<'a, I>(
fn malformed<'a>(
opt_bad_char: Option<char>,
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
mut state: State<'a>,
parts: Vec<&'a str>,
) -> ParseResult<'a, TypeAnnotation<'a>>
where
I: Iterator<Item = char>,
{
) -> ParseResult<'a, TypeAnnotation<'a>> {
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
@ -423,7 +416,9 @@ where
}
// Consume the remaining chars in the identifier.
for ch in chars {
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
@ -431,12 +426,15 @@ where
} else {
break;
}
}
let chars_parsed = full_string.len();
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
Ok((
TypeAnnotation::Malformed(full_string.into_bump_str()),
state.advance_without_indenting(chars_parsed)?,
state,
))
}

View file

@ -13,7 +13,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -918,17 +918,12 @@ mod test_parse {
let arena = Bump::new();
let arg = arena.alloc(Located::new(0, 0, 5, 6, Num("1")));
let args = bumpalo::vec![in &arena; &*arg];
let expected = Expr::Apply(
arena.alloc(Located::new(
0,
0,
0,
4,
Var {
let expr = Var {
module_name: "",
ident: "whee",
},
)),
};
let expected = Expr::Apply(
arena.alloc(Located::new(0, 0, 0, 4, expr)),
args,
CalledVia::Space,
);
@ -1040,16 +1035,11 @@ mod test_parse {
fn unary_negation() {
let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Negate);
let loc_arg1_expr = Located::new(
0,
0,
1,
4,
Var {
let arg1_expr = Var {
module_name: "",
ident: "foo",
},
);
};
let loc_arg1_expr = Located::new(0, 0, 1, 4, arg1_expr);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "-foo");
@ -1060,16 +1050,11 @@ mod test_parse {
fn unary_not() {
let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Not);
let loc_arg1_expr = Located::new(
0,
0,
1,
5,
Var {
let arg1_expr = Var {
module_name: "",
ident: "blah",
},
);
};
let loc_arg1_expr = Located::new(0, 0, 1, 5, arg1_expr);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "!blah");
@ -2092,7 +2077,7 @@ mod test_parse {
"#
);
let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);
@ -2121,7 +2106,7 @@ mod test_parse {
"#
);
let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);
@ -2174,7 +2159,7 @@ mod test_parse {
"#
);
let actual = module_defs()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);

View file

@ -91,7 +91,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -86,9 +86,6 @@ mod test_reporting {
let mut procs = Procs::default();
let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap();
// assume 64-bit pointers
let pointer_size = std::mem::size_of::<u64>() as u32;
// Populate Procs and Subs, and get the low-level Expr from the canonical Expr
let mut mono_env = roc_mono::ir::Env {
arena: &arena,
@ -96,7 +93,6 @@ mod test_reporting {
problems: &mut mono_problems,
home,
ident_ids: &mut ident_ids,
pointer_size,
jump_counter: arena.alloc(0),
};
let _mono_expr = Stmt::new(&mut mono_env, loc_expr.value, &mut procs);
@ -3771,4 +3767,48 @@ mod test_reporting {
),
)
}
#[test]
fn first_wildcard_is_required() {
report_problem_as(
indoc!(
r#"
when Foo 1 2 3 is
Foo _ 1 _ -> 1
_ -> 2
"#
),
"",
)
}
#[test]
fn second_wildcard_is_redundant() {
report_problem_as(
indoc!(
r#"
when Foo 1 2 3 is
Foo _ 1 _ -> 1
_ -> 2
_ -> 3
"#
),
indoc!(
r#"
-- REDUNDANT PATTERN -----------------------------------------------------------
The 3rd pattern is redundant:
1 when Foo 1 2 3 is
2 Foo _ 1 _ -> 1
3 _ -> 2
4 _ -> 3
^
Any value of this shape will be handled by a previous pattern, so this
one should be removed.
"#
),
)
}
}

View file

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -2362,7 +2362,7 @@ mod solve_expr {
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
reconstructPath
"#
@ -2534,7 +2534,7 @@ mod solve_expr {
x = []
when List.get input 0 is
Ok val -> List.push x val
Ok val -> List.append x val
Err _ -> f input
f
"#

View file

@ -2275,9 +2275,9 @@ mod solve_uniq_expr {
}
#[test]
fn list_push() {
fn list_append() {
infer_eq(
"List.push",
"List.append",
"Attr * (Attr * (List a), a -> Attr * (List a))",
);
}
@ -2303,7 +2303,7 @@ mod solve_uniq_expr {
infer_eq(
indoc!(
r#"
singleton = \x -> List.push [] x
singleton = \x -> List.append [] x
singleton
"#
@ -2317,7 +2317,7 @@ mod solve_uniq_expr {
infer_eq(
indoc!(
r#"
reverse = \list -> List.foldr list (\e, l -> List.push l e) []
reverse = \list -> List.foldr list (\e, l -> List.append l e) []
reverse
"#
@ -2742,7 +2742,7 @@ mod solve_uniq_expr {
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
reconstructPath
"#
@ -2812,7 +2812,7 @@ mod solve_uniq_expr {
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position
updateCost = \current, neighbour, model ->
@ -2897,7 +2897,7 @@ mod solve_uniq_expr {
[]
Ok next ->
List.push (reconstructPath cameFrom next) goal
List.append (reconstructPath cameFrom next) goal
updateCost : position, position, Model position -> Model position

View file

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View file

@ -718,6 +718,64 @@ However, it cannot involve record field access. So this would *not* compile:
{ Foo.defaults.config & timeZone: utc }
```
## Optional Record Fields
There's a pattern in Elm where you pass a function a record of configuration
values, some of which you don't really care about and want to leave as defaults.
To incorporate the default config options, you call the function like so:
```elm
table { defaultConfig | height = 800, width = 600 }
```
This way, as the caller I'm specifying only the `height` and `width` fields,
and leaving the others to whatever is inside `defaultConfig`. Perhaps it also
has the fields `x` and `y`.
In Roc, you can do this like so:
```elm
table { height = 800, width = 600 }
```
...and the `table` function will fill in its default values for `x` and `y`.
There is no need to use a `defaultConfig` record.
Here's how `table` would be defined in Roc:
```
table = \{ height, width, x ? 0.0, y ? 0.0 } ->
```
This is using *optional field destructuring* to destructure a record while
also providing default values for any fields that might be missing.
Here's the type of `table`:
```
table : { height : Float, width : Float, x ? Float, y ? Float } -> Table
table = \{ height, width, x ? 0.0, y ? 0.0 } ->
```
This says that `table` takes a record with two *required* fields (`height` and
`width` and two *optional* fields (`x` and `y`). It also says that all of those
fields have the type `Float` This means you can choose to omit `x`, `y`, or both,
when calling the function...but if you provide them, they must be numbers.
This is also the type that would have been inferred for `table` if no annotation
had been written. Roc's compiler can tell from the destructuring syntax
`x ? 0.0` that `x` is an optional field, and that it has the type `Float`. These
default values can reference other expressions in the record destructure; if you
wanted, you could write `{ height, width, x ? 0.0, y ? x + 1 }`.
Destructuring is the only way to implement a record with optional fields.
(For example, if you write the expression `config.x` and `x` is an optional field,
you'll get a compile error.)
This means it's never possible to end up with an "optional value" that exists
outside a record field. Optionality is a concept that exists only in record fields,
and it's intended for the use case of config records like this. The ergonomics
of destructuring mean this wouldn't be a good fit for data modeling.
## Standard Data Structures
Elm has `List`, `Array`, `Set`, and `Dict` in the standard library.