From b91359e4c7eedb4aaee540397bac53004eb67b45 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sat, 1 Aug 2020 15:42:03 -0400 Subject: [PATCH] Initial pass at arena_pool --- Cargo.lock | 4 + Cargo.toml | 1 + compiler/arena_pool/Cargo.toml | 11 ++ compiler/arena_pool/src/arena_pool.rs | 268 ++++++++++++++++++++++++++ compiler/arena_pool/src/lib.rs | 2 + compiler/arena_pool/src/pool_id.rs | 14 ++ compiler/load/src/file.rs | 5 - roc-for-elm-programmers.md | 10 + 8 files changed, 310 insertions(+), 5 deletions(-) create mode 100644 compiler/arena_pool/Cargo.toml create mode 100644 compiler/arena_pool/src/arena_pool.rs create mode 100644 compiler/arena_pool/src/lib.rs create mode 100644 compiler/arena_pool/src/pool_id.rs diff --git a/Cargo.lock b/Cargo.lock index f01301ee2d..42436daabc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,6 +69,10 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034" +[[package]] +name = "arena-pool" +version = "0.1.0" + [[package]] name = "arrayvec" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 1bf23787c4..594ea02533 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "compiler/load", "compiler/gen", "compiler/build", + "compiler/arena_pool", "vendor/ena", "vendor/pathfinding", "vendor/pretty", diff --git a/compiler/arena_pool/Cargo.toml b/compiler/arena_pool/Cargo.toml new file mode 100644 index 0000000000..2d71a56226 --- /dev/null +++ b/compiler/arena_pool/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "arena-pool" +version = "0.1.0" +authors = ["Richard Feldman "] +repository = "https://github.com/rtfeldman/roc" +edition = "2018" +description = "A CLI for Roc" +license = "Apache-2.0" + +[dependencies] +smallvec = "1.4" diff --git a/compiler/arena_pool/src/arena_pool.rs b/compiler/arena_pool/src/arena_pool.rs new file mode 100644 index 0000000000..780e307e37 --- /dev/null +++ b/compiler/arena_pool/src/arena_pool.rs @@ -0,0 +1,268 @@ +use crate::pool_id::PoolId; +use smallvec::SmallVec; +use std::marker::PhantomPinned; +use std::ptr::{copy_nonoverlapping, NonNull}; + +pub struct ArenaRef { + pool_id: PoolId, + ptr: NonNull, + _pin: PhantomPinned, +} + +impl ArenaRef { + pub fn get<'a>(&'a self, arena: &Arena) -> &'a T { + self.verify_pool_id(arena); + + // SAFETY: we know this pointer is safe to follow because it will only + // get deallocated once the pool where it was created gets deallocated + // (along with all of the Arenas it detached), and we just verified that + // this ArenaRef's ID matches a pool which has not yet been deallocated. + unsafe { self.ptr.as_ref() } + } + + pub fn get_mut<'a>(&'a mut self, arena: &Arena) -> &'a mut T { + self.verify_pool_id(arena); + + // SAFETY: we know this pointer is safe to follow because it will only + // get deallocated once the pool where it was created gets deallocated + // (along with all of the Arenas it detached), and we just verified that + // this ArenaRef's ID matches a pool which has not yet been deallocated. + unsafe { self.ptr.as_mut() } + } + + fn verify_pool_id(&self, arena: &Arena) { + // Verify that this ArenaRef actually came from the same pool as the + // given Arena. + // + // If it didn't, we may have come from a pool that's been freed, + // so trying to dereference our pointer could cause a use-after-free! + assert_eq!(self.pool_id, arena.pool_id); + } +} + +pub struct ArenaVec { + pool_id: PoolId, + buffer_ptr: NonNull, + len: usize, + capacity: usize, + _pin: PhantomPinned, +} + +impl ArenaVec { + pub fn new_in<'a>(&'a self, arena: &Arena) -> Self { + Self { + pool_id: arena.pool_id, + buffer_ptr: NonNull::dangling(), + capacity: 0, + len: 0, + _pin: PhantomPinned, + } + } + + pub fn push<'a>(&'a mut self, val: T, arena: &mut Arena) { + self.verify_pool_id(arena); + + if self.len <= self.capacity { + // We're all set! + // + // This empty branch is just here for branch prediction, + // since this should be the most common case in practice. + } else { + // Double our capacity and reserve a new block. + self.capacity *= 2; + + let ptr = arena.alloc_array(self.capacity); + + // SAFETY: the existing buffer must have at least self.len elements, + // as must the new one, so copying that many between them is safe. + unsafe { + // Copy all elements from the current buffer into the new one + copy_nonoverlapping(self.buffer_ptr.as_ptr(), ptr, self.len); + } + + self.buffer_ptr = unsafe { NonNull::new_unchecked(ptr) }; + } + + // Store the element in the appropriate memory address. + let elem_ptr = unsafe { &mut *self.buffer_ptr.as_ptr().offset(self.len as isize) }; + + *elem_ptr = val; + + self.len += 1; + } + + pub fn get<'a>(&'a self, index: usize, arena: &Arena) -> Option<&'a T> { + self.verify_pool_id(arena); + + if index < self.len { + // SAFETY: we know this pointer is safe to follow because we've + // done a bounds check, and because we know it will only get + // deallocated once the pool where it was created gets deallocated + // (along with all of the Arenas it detached), and we just verified that + // this ArenaRef's ID matches a pool which has not yet been deallocated. + Some(unsafe { &*self.buffer_ptr.as_ptr().offset(index as isize) }) + } else { + None + } + } + + pub fn get_mut<'a>(&'a mut self, index: usize, arena: &Arena) -> Option<&'a mut T> { + self.verify_pool_id(arena); + + if index < self.len { + // SAFETY: we know this pointer is safe to follow because we've + // done a bounds check, and because we know it will only get + // deallocated once the pool where it was created gets deallocated + // (along with all of the Arenas it detached), and we just verified that + // this ArenaRef's ID matches a pool which has not yet been deallocated. + Some(unsafe { &mut *self.buffer_ptr.as_ptr().offset(index as isize) }) + } else { + None + } + } + + fn verify_pool_id(&self, arena: &Arena) { + // Verify that this ArenaVec actually came from the same pool as the + // given Arena. + // + // If it didn't, we may have come from a pool that's been freed, + // so trying to dereference our pointer could cause a use-after-free! + assert_eq!(self.pool_id, arena.pool_id); + } +} + +impl ArenaVec {} + +#[derive(Default, PartialEq, Eq)] +pub struct ArenaPool { + pool_id: PoolId, + chunks: Vec>, + num_leased: usize, + default_chunk_capacity: usize, +} + +impl ArenaPool { + const DEFAULT_CHUNK_SIZE: usize = 4096; + + pub fn with_capacity(capacity: usize) -> ArenaPool { + Self::with_capacity_and_chunk_size(capacity, Self::DEFAULT_CHUNK_SIZE) + } + + pub fn with_capacity_and_chunk_size(capacity: usize, chunk_size: usize) -> ArenaPool { + Self { + pool_id: PoolId::default(), + chunks: Vec::with_capacity(capacity), + num_leased: 0, + default_chunk_capacity: chunk_size, + } + } + + /// Return a new Arena, which can be given to different threads. + /// This arena can be used to allocate ArenaRef and ArenaVec values which + /// are compatible with any Arena leased from this pool. + /// + /// Before this pool gets dropped, you must call reabsorb() on every + /// arena that has been leased - otherwise, you'll get a panic when this + /// gets dropped! The memory safety of the system depends on all arenas + /// having been reabsorbed before the pool gets deallocated, which is why + /// hte pool's Drop implementation enforces it. + pub fn lease(&mut self) -> Arena { + self.num_leased += 1; + + let mut chunks = SmallVec::with_capacity(1); + + chunks.push(Vec::with_capacity(self.default_chunk_capacity)); + + Arena { + pool_id: self.pool_id, + chunks, + default_chunk_capacity: self.default_chunk_capacity, + } + } + + /// Return an arena to the pool. (This would have been called "return" but + /// that's a reserved keyword.) + pub fn reabsorb(&mut self, arena: Arena) { + // Ensure we're reabsorbing an arena that was + // actually leased by this pool in the first place! + assert_eq!(arena.pool_id, self.pool_id); + + // Add the arena's chunks to our own, so their memory remains live + // after the arena gets dropped. This is important, because at this + // point their pointers can still potentially be dereferenced! + self.chunks.extend(arena.chunks.into_iter()); + + self.num_leased -= 1; + } +} + +impl Drop for ArenaPool { + fn drop(&mut self) { + // When an ArenaPool gets dropped, it must not have any leased + // arenas remaining. If it does, there will be outstanding IDs which + // could be used with those non-reabsorbed Arenas to read freed memory! + // This would be a use-after-free; we panic rather than permit that. + assert_eq!(self.num_leased, 0); + } +} + +#[derive(PartialEq, Eq)] +pub struct Arena { + pool_id: PoolId, + chunks: SmallVec<[Vec; 1]>, + default_chunk_capacity: usize, +} + +impl Arena { + pub fn alloc(&mut self, val: T) -> ArenaRef { + // Every Arena should always be initialized with at least one chunk. + debug_assert!(!self.chunks.is_empty()); + + let (chunk_len, chunk_capacity) = { + let chunk = self.chunks.last().unwrap(); + + (chunk.len(), chunk.capacity()) + }; + + if chunk_len == chunk_capacity { + // We've run out of space in our last chunk. Create a new one! + self.chunks + .push(Vec::with_capacity(self.default_chunk_capacity)); + } + + let chunk = self.chunks.last_mut().unwrap(); + let index = chunk.len(); + + chunk.push(val); + + // Get a pointer to the memory address within our particular chunk. + let ptr: *mut T = &mut chunk[index]; + + ArenaRef { + pool_id: self.pool_id, + ptr: unsafe { NonNull::new_unchecked(ptr) }, + _pin: PhantomPinned, + } + } + + fn alloc_array(&mut self, num_elems: usize) -> *mut T { + let (chunk_len, chunk_capacity) = { + let chunk = self.chunks.last().unwrap(); + + (chunk.len(), chunk.capacity()) + }; + + if chunk_len + num_elems <= chunk_capacity { + // This will fit in the current chunk, so we'll just use that. + } else { + // This won't fit in our current chunk, so we'll need a new one. + let capacity = self.default_chunk_capacity.max(num_elems); + + // This won't fit in a default-sized chunk, so we need to + // allocate a new one just for this. + self.chunks.push(Vec::with_capacity(capacity)); + } + + self.chunks.last_mut().unwrap().as_mut_ptr() + } +} diff --git a/compiler/arena_pool/src/lib.rs b/compiler/arena_pool/src/lib.rs new file mode 100644 index 0000000000..c2b7822bf0 --- /dev/null +++ b/compiler/arena_pool/src/lib.rs @@ -0,0 +1,2 @@ +pub mod arena_pool; +mod pool_id; diff --git a/compiler/arena_pool/src/pool_id.rs b/compiler/arena_pool/src/pool_id.rs new file mode 100644 index 0000000000..c78347bdf1 --- /dev/null +++ b/compiler/arena_pool/src/pool_id.rs @@ -0,0 +1,14 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +static mut NEXT_ID: AtomicUsize = AtomicUsize::new(0); + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct PoolId(usize); + +impl Default for PoolId { + fn default() -> Self { + let id = unsafe { NEXT_ID.fetch_add(1, Ordering::Relaxed) }; + + Self(id) + } +} diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 24dca3718d..2751fd696b 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -579,11 +579,6 @@ fn parse_src( } /// Load a module by its filename -/// -/// This has two unsafe calls: -/// -/// * memory map the filename instead of doing a buffered read -/// * assume the contents of the file are valid UTF-8 fn load_filename( filename: PathBuf, msg_tx: MsgSender, diff --git a/roc-for-elm-programmers.md b/roc-for-elm-programmers.md index e4f733b786..5bae2651a2 100644 --- a/roc-for-elm-programmers.md +++ b/roc-for-elm-programmers.md @@ -718,6 +718,16 @@ However, it cannot involve record field access. So this would *not* compile: { Foo.defaults.config & timeZone: utc } ``` +## Optional Record Fields + +In Elm, all record fields are required. [Kinda not really with Open Records though, yeah?] + + + + + + + ## Standard Data Structures Elm has `List`, `Array`, `Set`, and `Dict` in the standard library.