mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-01 15:51:12 +00:00
Initial pass at arena_pool
This commit is contained in:
parent
8b3dd6c90c
commit
b91359e4c7
8 changed files with 310 additions and 5 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -69,6 +69,10 @@ version = "0.4.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034"
|
||||
|
||||
[[package]]
|
||||
name = "arena-pool"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.5.1"
|
||||
|
|
|
@ -20,6 +20,7 @@ members = [
|
|||
"compiler/load",
|
||||
"compiler/gen",
|
||||
"compiler/build",
|
||||
"compiler/arena_pool",
|
||||
"vendor/ena",
|
||||
"vendor/pathfinding",
|
||||
"vendor/pretty",
|
||||
|
|
11
compiler/arena_pool/Cargo.toml
Normal file
11
compiler/arena_pool/Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "arena-pool"
|
||||
version = "0.1.0"
|
||||
authors = ["Richard Feldman <oss@rtfeldman.com>"]
|
||||
repository = "https://github.com/rtfeldman/roc"
|
||||
edition = "2018"
|
||||
description = "A CLI for Roc"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
smallvec = "1.4"
|
268
compiler/arena_pool/src/arena_pool.rs
Normal file
268
compiler/arena_pool/src/arena_pool.rs
Normal file
|
@ -0,0 +1,268 @@
|
|||
use crate::pool_id::PoolId;
|
||||
use smallvec::SmallVec;
|
||||
use std::marker::PhantomPinned;
|
||||
use std::ptr::{copy_nonoverlapping, NonNull};
|
||||
|
||||
pub struct ArenaRef<T> {
|
||||
pool_id: PoolId,
|
||||
ptr: NonNull<T>,
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
|
||||
impl<T> ArenaRef<T> {
|
||||
pub fn get<'a>(&'a self, arena: &Arena<T>) -> &'a T {
|
||||
self.verify_pool_id(arena);
|
||||
|
||||
// SAFETY: we know this pointer is safe to follow because it will only
|
||||
// get deallocated once the pool where it was created gets deallocated
|
||||
// (along with all of the Arenas it detached), and we just verified that
|
||||
// this ArenaRef's ID matches a pool which has not yet been deallocated.
|
||||
unsafe { self.ptr.as_ref() }
|
||||
}
|
||||
|
||||
pub fn get_mut<'a>(&'a mut self, arena: &Arena<T>) -> &'a mut T {
|
||||
self.verify_pool_id(arena);
|
||||
|
||||
// SAFETY: we know this pointer is safe to follow because it will only
|
||||
// get deallocated once the pool where it was created gets deallocated
|
||||
// (along with all of the Arenas it detached), and we just verified that
|
||||
// this ArenaRef's ID matches a pool which has not yet been deallocated.
|
||||
unsafe { self.ptr.as_mut() }
|
||||
}
|
||||
|
||||
fn verify_pool_id(&self, arena: &Arena<T>) {
|
||||
// Verify that this ArenaRef actually came from the same pool as the
|
||||
// given Arena.
|
||||
//
|
||||
// If it didn't, we may have come from a pool that's been freed,
|
||||
// so trying to dereference our pointer could cause a use-after-free!
|
||||
assert_eq!(self.pool_id, arena.pool_id);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ArenaVec<T> {
|
||||
pool_id: PoolId,
|
||||
buffer_ptr: NonNull<T>,
|
||||
len: usize,
|
||||
capacity: usize,
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
|
||||
impl<T> ArenaVec<T> {
|
||||
pub fn new_in<'a>(&'a self, arena: &Arena<T>) -> Self {
|
||||
Self {
|
||||
pool_id: arena.pool_id,
|
||||
buffer_ptr: NonNull::dangling(),
|
||||
capacity: 0,
|
||||
len: 0,
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push<'a>(&'a mut self, val: T, arena: &mut Arena<T>) {
|
||||
self.verify_pool_id(arena);
|
||||
|
||||
if self.len <= self.capacity {
|
||||
// We're all set!
|
||||
//
|
||||
// This empty branch is just here for branch prediction,
|
||||
// since this should be the most common case in practice.
|
||||
} else {
|
||||
// Double our capacity and reserve a new block.
|
||||
self.capacity *= 2;
|
||||
|
||||
let ptr = arena.alloc_array(self.capacity);
|
||||
|
||||
// SAFETY: the existing buffer must have at least self.len elements,
|
||||
// as must the new one, so copying that many between them is safe.
|
||||
unsafe {
|
||||
// Copy all elements from the current buffer into the new one
|
||||
copy_nonoverlapping(self.buffer_ptr.as_ptr(), ptr, self.len);
|
||||
}
|
||||
|
||||
self.buffer_ptr = unsafe { NonNull::new_unchecked(ptr) };
|
||||
}
|
||||
|
||||
// Store the element in the appropriate memory address.
|
||||
let elem_ptr = unsafe { &mut *self.buffer_ptr.as_ptr().offset(self.len as isize) };
|
||||
|
||||
*elem_ptr = val;
|
||||
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
pub fn get<'a>(&'a self, index: usize, arena: &Arena<T>) -> Option<&'a T> {
|
||||
self.verify_pool_id(arena);
|
||||
|
||||
if index < self.len {
|
||||
// SAFETY: we know this pointer is safe to follow because we've
|
||||
// done a bounds check, and because we know it will only get
|
||||
// deallocated once the pool where it was created gets deallocated
|
||||
// (along with all of the Arenas it detached), and we just verified that
|
||||
// this ArenaRef's ID matches a pool which has not yet been deallocated.
|
||||
Some(unsafe { &*self.buffer_ptr.as_ptr().offset(index as isize) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_mut<'a>(&'a mut self, index: usize, arena: &Arena<T>) -> Option<&'a mut T> {
|
||||
self.verify_pool_id(arena);
|
||||
|
||||
if index < self.len {
|
||||
// SAFETY: we know this pointer is safe to follow because we've
|
||||
// done a bounds check, and because we know it will only get
|
||||
// deallocated once the pool where it was created gets deallocated
|
||||
// (along with all of the Arenas it detached), and we just verified that
|
||||
// this ArenaRef's ID matches a pool which has not yet been deallocated.
|
||||
Some(unsafe { &mut *self.buffer_ptr.as_ptr().offset(index as isize) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_pool_id(&self, arena: &Arena<T>) {
|
||||
// Verify that this ArenaVec actually came from the same pool as the
|
||||
// given Arena.
|
||||
//
|
||||
// If it didn't, we may have come from a pool that's been freed,
|
||||
// so trying to dereference our pointer could cause a use-after-free!
|
||||
assert_eq!(self.pool_id, arena.pool_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ArenaVec<T> {}
|
||||
|
||||
#[derive(Default, PartialEq, Eq)]
|
||||
pub struct ArenaPool<T> {
|
||||
pool_id: PoolId,
|
||||
chunks: Vec<Vec<T>>,
|
||||
num_leased: usize,
|
||||
default_chunk_capacity: usize,
|
||||
}
|
||||
|
||||
impl<T> ArenaPool<T> {
|
||||
const DEFAULT_CHUNK_SIZE: usize = 4096;
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> ArenaPool<T> {
|
||||
Self::with_capacity_and_chunk_size(capacity, Self::DEFAULT_CHUNK_SIZE)
|
||||
}
|
||||
|
||||
pub fn with_capacity_and_chunk_size(capacity: usize, chunk_size: usize) -> ArenaPool<T> {
|
||||
Self {
|
||||
pool_id: PoolId::default(),
|
||||
chunks: Vec::with_capacity(capacity),
|
||||
num_leased: 0,
|
||||
default_chunk_capacity: chunk_size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a new Arena, which can be given to different threads.
|
||||
/// This arena can be used to allocate ArenaRef and ArenaVec values which
|
||||
/// are compatible with any Arena leased from this pool.
|
||||
///
|
||||
/// Before this pool gets dropped, you must call reabsorb() on every
|
||||
/// arena that has been leased - otherwise, you'll get a panic when this
|
||||
/// gets dropped! The memory safety of the system depends on all arenas
|
||||
/// having been reabsorbed before the pool gets deallocated, which is why
|
||||
/// hte pool's Drop implementation enforces it.
|
||||
pub fn lease(&mut self) -> Arena<T> {
|
||||
self.num_leased += 1;
|
||||
|
||||
let mut chunks = SmallVec::with_capacity(1);
|
||||
|
||||
chunks.push(Vec::with_capacity(self.default_chunk_capacity));
|
||||
|
||||
Arena {
|
||||
pool_id: self.pool_id,
|
||||
chunks,
|
||||
default_chunk_capacity: self.default_chunk_capacity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an arena to the pool. (This would have been called "return" but
|
||||
/// that's a reserved keyword.)
|
||||
pub fn reabsorb(&mut self, arena: Arena<T>) {
|
||||
// Ensure we're reabsorbing an arena that was
|
||||
// actually leased by this pool in the first place!
|
||||
assert_eq!(arena.pool_id, self.pool_id);
|
||||
|
||||
// Add the arena's chunks to our own, so their memory remains live
|
||||
// after the arena gets dropped. This is important, because at this
|
||||
// point their pointers can still potentially be dereferenced!
|
||||
self.chunks.extend(arena.chunks.into_iter());
|
||||
|
||||
self.num_leased -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for ArenaPool<T> {
|
||||
fn drop(&mut self) {
|
||||
// When an ArenaPool gets dropped, it must not have any leased
|
||||
// arenas remaining. If it does, there will be outstanding IDs which
|
||||
// could be used with those non-reabsorbed Arenas to read freed memory!
|
||||
// This would be a use-after-free; we panic rather than permit that.
|
||||
assert_eq!(self.num_leased, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Arena<T> {
|
||||
pool_id: PoolId,
|
||||
chunks: SmallVec<[Vec<T>; 1]>,
|
||||
default_chunk_capacity: usize,
|
||||
}
|
||||
|
||||
impl<T> Arena<T> {
|
||||
pub fn alloc(&mut self, val: T) -> ArenaRef<T> {
|
||||
// Every Arena should always be initialized with at least one chunk.
|
||||
debug_assert!(!self.chunks.is_empty());
|
||||
|
||||
let (chunk_len, chunk_capacity) = {
|
||||
let chunk = self.chunks.last().unwrap();
|
||||
|
||||
(chunk.len(), chunk.capacity())
|
||||
};
|
||||
|
||||
if chunk_len == chunk_capacity {
|
||||
// We've run out of space in our last chunk. Create a new one!
|
||||
self.chunks
|
||||
.push(Vec::with_capacity(self.default_chunk_capacity));
|
||||
}
|
||||
|
||||
let chunk = self.chunks.last_mut().unwrap();
|
||||
let index = chunk.len();
|
||||
|
||||
chunk.push(val);
|
||||
|
||||
// Get a pointer to the memory address within our particular chunk.
|
||||
let ptr: *mut T = &mut chunk[index];
|
||||
|
||||
ArenaRef {
|
||||
pool_id: self.pool_id,
|
||||
ptr: unsafe { NonNull::new_unchecked(ptr) },
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc_array(&mut self, num_elems: usize) -> *mut T {
|
||||
let (chunk_len, chunk_capacity) = {
|
||||
let chunk = self.chunks.last().unwrap();
|
||||
|
||||
(chunk.len(), chunk.capacity())
|
||||
};
|
||||
|
||||
if chunk_len + num_elems <= chunk_capacity {
|
||||
// This will fit in the current chunk, so we'll just use that.
|
||||
} else {
|
||||
// This won't fit in our current chunk, so we'll need a new one.
|
||||
let capacity = self.default_chunk_capacity.max(num_elems);
|
||||
|
||||
// This won't fit in a default-sized chunk, so we need to
|
||||
// allocate a new one just for this.
|
||||
self.chunks.push(Vec::with_capacity(capacity));
|
||||
}
|
||||
|
||||
self.chunks.last_mut().unwrap().as_mut_ptr()
|
||||
}
|
||||
}
|
2
compiler/arena_pool/src/lib.rs
Normal file
2
compiler/arena_pool/src/lib.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
pub mod arena_pool;
|
||||
mod pool_id;
|
14
compiler/arena_pool/src/pool_id.rs
Normal file
14
compiler/arena_pool/src/pool_id.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
static mut NEXT_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct PoolId(usize);
|
||||
|
||||
impl Default for PoolId {
|
||||
fn default() -> Self {
|
||||
let id = unsafe { NEXT_ID.fetch_add(1, Ordering::Relaxed) };
|
||||
|
||||
Self(id)
|
||||
}
|
||||
}
|
|
@ -579,11 +579,6 @@ fn parse_src(
|
|||
}
|
||||
|
||||
/// Load a module by its filename
|
||||
///
|
||||
/// This has two unsafe calls:
|
||||
///
|
||||
/// * memory map the filename instead of doing a buffered read
|
||||
/// * assume the contents of the file are valid UTF-8
|
||||
fn load_filename(
|
||||
filename: PathBuf,
|
||||
msg_tx: MsgSender,
|
||||
|
|
|
@ -718,6 +718,16 @@ However, it cannot involve record field access. So this would *not* compile:
|
|||
{ Foo.defaults.config & timeZone: utc }
|
||||
```
|
||||
|
||||
## Optional Record Fields
|
||||
|
||||
In Elm, all record fields are required. [Kinda not really with Open Records though, yeah?]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Standard Data Structures
|
||||
|
||||
Elm has `List`, `Array`, `Set`, and `Dict` in the standard library.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue