moved all crates into seperate folder + related path fixes

2025-09-29 14:54:47 +00:00 · 2022-07-01 17:37:43 +02:00 · 2022-07-01 17:37:43 +02:00 · eee85fa45d
commit eee85fa45d
parent 12ef03bb86
1063 changed files with 92 additions and 93 deletions
--- a/crates/ast/src/mem_pool/mod.rs
+++ b/crates/ast/src/mem_pool/mod.rs
@ -0,0 +1,4 @@
+pub mod pool;
+pub mod pool_str;
+pub mod pool_vec;
+pub mod shallow_clone;
--- a/crates/ast/src/mem_pool/pool.rs
+++ b/crates/ast/src/mem_pool/pool.rs
@ -0,0 +1,269 @@
+/// A memory pool of 32-byte nodes. The node value 0 is reserved for the pool's
+/// use, and valid nodes may never have that value.
+///
+/// Internally, the pool is divided into pages of 4096 bytes. It stores nodes
+/// into one page at a time, and when it runs out, it uses mmap to reserve an
+/// anonymous memory page in which to store nodes.
+///
+/// Since nodes are 32 bytes, one page can store 128 nodes; you can access a
+/// particular node by its NodeId, which is an opaque wrapper around a pointer.
+///
+/// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied.
+/// This is important for performance.
+use std::any::type_name;
+use std::ffi::c_void;
+use std::marker::PhantomData;
+use std::mem::{align_of, size_of, MaybeUninit};
+
+pub const NODE_BYTES: usize = 32;
+
+// Each page has 128 slots. Each slot holds one 32B node
+// This means each page is 4096B, which is the size of a memory page
+// on typical systems where the compiler will be run.
+//
+// Nice things about this system include:
+// * Allocating a new page is as simple as asking the OS for a memory page.
+// * Since each node is 32B, each node's memory address will be a multiple of 16.
+// * Thanks to the free lists and our consistent chunk sizes, we should
+//   end up with very little fragmentation.
+// * Finding a slot for a given node should be very fast: see if the relevant
+//   free list has any openings; if not, try the next size up.
+//
+// Less nice things include:
+// * This system makes it very hard to ever give a page back to the OS.
+//   We could try doing the Mesh Allocator strategy: whenever we allocate
+//   something, assign it to a random slot in the page, and then periodically
+//   try to merge two pages into one (by locking and remapping them in the OS)
+//   and then returning the redundant physical page back to the OS. This should
+//   work in theory, but is pretty complicated, and we'd need to schedule it.
+//   Keep in mind that we can't use the Mesh Allocator itself because it returns
+//   usize pointers, which would be too big for us to have 16B nodes.
+//   On the plus side, we could be okay with higher memory usage early on,
+//   and then later use the Mesh strategy to reduce long-running memory usage.
+//
+// With this system, we can allocate up to 4B nodes. If we wanted to keep
+// a generational index in there, like https://crates.io/crates/sharded-slab
+// does, we could use some of the 32 bits for that. For example, if we wanted
+// to have a 5-bit generational index (supporting up to 32 generations), then
+// we would have 27 bits remaining, meaning we could only support at most
+// 134M nodes. Since the editor has a separate Pool for each module, is that
+// enough for any single module we'll encounter in practice? Probably, and
+// especially if we allocate super large collection literals on the heap instead
+// of in the pool.
+//
+// Another possible design is to try to catch reuse bugs using an "ASan" like
+// approach: in development builds, whenever we "free" a particular slot, we
+// can add it to a dev-build-only "freed nodes" list and don't hand it back
+// out (so, we leak the memory.) Then we can (again, in development builds only)
+// check to see if we're about to store something in zeroed-out memory; if so, check
+// to see if it was
+
+#[derive(Debug, Eq)]
+pub struct NodeId<T> {
+    pub(super) index: u32,
+    pub(super) _phantom: PhantomData<T>,
+}
+
+impl<T> Clone for NodeId<T> {
+    fn clone(&self) -> Self {
+        NodeId {
+            index: self.index,
+            _phantom: PhantomData::default(),
+        }
+    }
+}
+
+impl<T> PartialEq for NodeId<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index
+    }
+}
+
+impl<T> Copy for NodeId<T> {}
+
+#[derive(Debug)]
+pub struct Pool {
+    pub(super) nodes: *mut [MaybeUninit<u8>; NODE_BYTES],
+    num_nodes: u32,
+    capacity: u32,
+    // free_1node_slots: Vec<NodeId<T>>,
+}
+
+impl Pool {
+    pub fn with_capacity(nodes: u32) -> Self {
+        // round up number of nodes requested to nearest page size in bytes
+        let bytes_per_page = page_size::get();
+        let node_bytes = NODE_BYTES * nodes as usize;
+        let leftover = node_bytes % bytes_per_page;
+        let bytes_to_mmap = if leftover == 0 {
+            node_bytes
+        } else {
+            node_bytes + bytes_per_page - leftover
+        };
+
+        let nodes = unsafe {
+            // mmap anonymous memory pages - that is, contiguous virtual memory
+            // addresses from the OS which will be lazily translated into
+            // physical memory one 4096-byte page at a time, once we actually
+            // try to read or write in that page's address range.
+            #[cfg(unix)]
+            {
+                use libc::{MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
+
+                libc::mmap(
+                    std::ptr::null_mut(),
+                    bytes_to_mmap,
+                    PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS,
+                    0,
+                    0,
+                )
+            }
+            #[cfg(windows)]
+            {
+                use winapi::um::memoryapi::VirtualAlloc;
+                use winapi::um::winnt::PAGE_READWRITE;
+                use winapi::um::winnt::{MEM_COMMIT, MEM_RESERVE};
+
+                VirtualAlloc(
+                    std::ptr::null_mut(),
+                    bytes_to_mmap,
+                    MEM_COMMIT | MEM_RESERVE,
+                    PAGE_READWRITE,
+                )
+            }
+        } as *mut [MaybeUninit<u8>; NODE_BYTES];
+
+        // This is our actual capacity, in nodes.
+        // It might be higher than the requested capacity due to rounding up
+        // to nearest page size.
+        let capacity = (bytes_to_mmap / NODE_BYTES) as u32;
+
+        Pool {
+            nodes,
+            num_nodes: 0,
+            capacity,
+        }
+    }
+
+    pub fn add<T>(&mut self, node: T) -> NodeId<T> {
+        // It's only safe to store this if T fits in S.
+        debug_assert!(
+            size_of::<T>() <= NODE_BYTES,
+            "{} has a size of {}, but it needs to be at most {}",
+            type_name::<T>(),
+            size_of::<T>(),
+            NODE_BYTES
+        );
+
+        let node_id = self.reserve(1);
+
+        let node_ptr = self.get_ptr(node_id);
+
+        unsafe { node_ptr.write(MaybeUninit::new(node)) };
+
+        node_id
+    }
+
+    /// Reserves the given number of contiguous node slots, and returns
+    /// the NodeId of the first one. We only allow reserving 2^32 in a row.
+    pub(super) fn reserve<T>(&mut self, nodes: u32) -> NodeId<T> {
+        // TODO once we have a free list, look in there for an open slot first!
+        let index = self.num_nodes;
+
+        if index < self.capacity {
+            self.num_nodes = index + nodes;
+
+            NodeId {
+                index,
+                _phantom: PhantomData::default(),
+            }
+        } else {
+            todo!("pool ran out of capacity. TODO reallocate the nodes pointer to map to a bigger space. Can use mremap on Linux, but must memcpy lots of bytes on macOS and Windows.");
+        }
+    }
+
+    pub fn get<'a, 'b, T>(&'a self, node_id: NodeId<T>) -> &'b T {
+        unsafe {
+            let node_ptr = self.get_ptr(node_id) as *const T;
+
+            &*node_ptr
+        }
+    }
+
+    pub fn get_mut<T>(&mut self, node_id: NodeId<T>) -> &mut T {
+        unsafe {
+            let node_ptr = self.get_ptr(node_id) as *mut T;
+
+            &mut *node_ptr
+        }
+    }
+
+    pub fn set<T>(&mut self, node_id: NodeId<T>, element: T) {
+        unsafe {
+            let node_ptr = self.get_ptr(node_id);
+
+            node_ptr.write(MaybeUninit::new(element));
+        }
+    }
+
+    fn get_ptr<T>(&self, node_id: NodeId<T>) -> *mut MaybeUninit<T> {
+        let node_offset = unsafe { self.nodes.offset(node_id.index as isize) };
+
+        // This checks if the node_offset is aligned to T
+        assert!(0 == (node_offset as usize) & (align_of::<T>() - 1));
+
+        node_offset as *mut MaybeUninit<T>
+    }
+
+    // A node is available iff its bytes are all zeroes
+    #[allow(dead_code)]
+    fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
+        debug_assert_eq!(size_of::<T>(), NODE_BYTES);
+
+        unsafe {
+            let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];
+
+            *node_ptr == [0; NODE_BYTES]
+        }
+    }
+}
+
+impl<T> std::ops::Index<NodeId<T>> for Pool {
+    type Output = T;
+
+    fn index(&self, node_id: NodeId<T>) -> &Self::Output {
+        self.get(node_id)
+    }
+}
+
+impl<T> std::ops::IndexMut<NodeId<T>> for Pool {
+    fn index_mut(&mut self, node_id: NodeId<T>) -> &mut Self::Output {
+        self.get_mut(node_id)
+    }
+}
+
+impl Drop for Pool {
+    fn drop(&mut self) {
+        unsafe {
+            #[cfg(unix)]
+            {
+                libc::munmap(
+                    self.nodes as *mut c_void,
+                    NODE_BYTES * self.capacity as usize,
+                );
+            }
+            #[cfg(windows)]
+            {
+                use winapi::um::memoryapi::VirtualFree;
+                use winapi::um::winnt::MEM_RELEASE;
+
+                VirtualFree(
+                    self.nodes as *mut c_void,
+                    NODE_BYTES * self.capacity as usize,
+                    MEM_RELEASE,
+                );
+            }
+        }
+    }
+}
--- a/crates/ast/src/mem_pool/pool_str.rs
+++ b/crates/ast/src/mem_pool/pool_str.rs
@ -0,0 +1,86 @@
+use super::pool::{NodeId, Pool, NODE_BYTES};
+use super::shallow_clone::ShallowClone;
+use std::ffi::c_void;
+use std::marker::PhantomData;
+use std::mem::size_of;
+
+/// A string containing at most 2^32 pool-allocated bytes.
+#[derive(Debug, Copy, Clone)]
+pub struct PoolStr {
+    first_node_id: NodeId<()>,
+    len: u32,
+}
+
+#[test]
+fn pool_str_size() {
+    assert_eq!(size_of::<PoolStr>(), 8);
+}
+
+impl PoolStr {
+    pub fn new(string: &str, pool: &mut Pool) -> Self {
+        debug_assert!(string.len() <= u32::MAX as usize);
+
+        let chars_per_node = NODE_BYTES / size_of::<char>();
+
+        let number_of_nodes = f64::ceil(string.len() as f64 / chars_per_node as f64) as u32;
+
+        if number_of_nodes > 0 {
+            let first_node_id = pool.reserve(number_of_nodes);
+            let index = first_node_id.index as isize;
+            let next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut c_void;
+
+            unsafe {
+                libc::memcpy(
+                    next_node_ptr,
+                    string.as_ptr() as *const c_void,
+                    string.len(),
+                );
+            }
+
+            PoolStr {
+                first_node_id,
+                len: string.len() as u32,
+            }
+        } else {
+            PoolStr {
+                first_node_id: NodeId {
+                    index: 0,
+                    _phantom: PhantomData::default(),
+                },
+                len: 0,
+            }
+        }
+    }
+
+    pub fn as_str(&self, pool: &Pool) -> &str {
+        unsafe {
+            let node_ptr = pool.nodes.offset(self.first_node_id.index as isize) as *const u8;
+
+            let node_slice: &[u8] = std::slice::from_raw_parts(node_ptr, self.len as usize);
+
+            std::str::from_utf8_unchecked(&node_slice[0..self.len as usize])
+        }
+    }
+
+    #[allow(clippy::len_without_is_empty)]
+    pub fn len(&self, pool: &Pool) -> usize {
+        let contents = self.as_str(pool);
+
+        contents.len()
+    }
+
+    pub fn is_empty(&self, pool: &Pool) -> bool {
+        self.len(pool) == 0
+    }
+}
+
+impl ShallowClone for PoolStr {
+    fn shallow_clone(&self) -> Self {
+        // Question: should this fully clone, or is a shallow copy
+        // (and the aliasing it entails) OK?
+        Self {
+            first_node_id: self.first_node_id,
+            len: self.len,
+        }
+    }
+}
--- a/crates/ast/src/mem_pool/pool_vec.rs
+++ b/crates/ast/src/mem_pool/pool_vec.rs
@ -0,0 +1,323 @@
+use super::pool::{NodeId, Pool, NODE_BYTES};
+use super::shallow_clone::ShallowClone;
+use std::any::type_name;
+use std::cmp::Ordering;
+use std::ffi::c_void;
+use std::marker::PhantomData;
+use std::mem::size_of;
+
+/// An array of at most 2^32 pool-allocated nodes.
+#[derive(Debug)]
+pub struct PoolVec<T> {
+    first_node_id: NodeId<T>,
+    len: u32,
+}
+
+#[test]
+fn pool_vec_size() {
+    assert_eq!(size_of::<PoolVec<()>>(), 8);
+}
+
+impl<'a, T: 'a + Sized> PoolVec<T> {
+    pub fn empty(pool: &mut Pool) -> Self {
+        Self::new(std::iter::empty(), pool)
+    }
+
+    pub fn with_capacity(len: u32, pool: &mut Pool) -> Self {
+        debug_assert!(
+            size_of::<T>() <= NODE_BYTES,
+            "{} has a size of {}",
+            type_name::<T>(),
+            size_of::<T>()
+        );
+
+        if len == 0 {
+            Self::empty(pool)
+        } else {
+            let first_node_id = pool.reserve(len);
+
+            PoolVec { first_node_id, len }
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.len as usize
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    pub fn new<I: ExactSizeIterator<Item = T>>(nodes: I, pool: &mut Pool) -> Self {
+        debug_assert!(nodes.len() <= u32::MAX as usize);
+        debug_assert!(size_of::<T>() <= NODE_BYTES);
+
+        let len = nodes.len() as u32;
+
+        if len > 0 {
+            let first_node_id = pool.reserve(len);
+            let index = first_node_id.index as isize;
+            let mut next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut T;
+
+            for (indx_inc, node) in nodes.enumerate() {
+                unsafe {
+                    *next_node_ptr = node;
+
+                    next_node_ptr = pool.nodes.offset(index + (indx_inc as isize) + 1) as *mut T;
+                }
+            }
+
+            PoolVec { first_node_id, len }
+        } else {
+            PoolVec {
+                first_node_id: NodeId {
+                    index: 0,
+                    _phantom: PhantomData::default(),
+                },
+                len: 0,
+            }
+        }
+    }
+
+    pub fn iter(&self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
+        self.pool_list_iter(pool)
+    }
+
+    pub fn iter_mut(&self, pool: &'a mut Pool) -> impl ExactSizeIterator<Item = &'a mut T> {
+        self.pool_list_iter_mut(pool)
+    }
+
+    pub fn iter_node_ids(&self) -> impl ExactSizeIterator<Item = NodeId<T>> {
+        self.pool_list_iter_node_ids()
+    }
+
+    /// Private version of into_iter which exposes the implementation detail
+    /// of PoolVecIter. We don't want that struct to be public, but we
+    /// actually do want to have this separate function for code reuse
+    /// in the iterator's next() method.
+    #[inline(always)]
+    fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
+        PoolVecIter {
+            pool,
+            current_node_id: self.first_node_id,
+            len_remaining: self.len,
+        }
+    }
+
+    #[inline(always)]
+    fn pool_list_iter_mut(&self, pool: &'a Pool) -> PoolVecIterMut<'a, T> {
+        PoolVecIterMut {
+            pool,
+            current_node_id: self.first_node_id,
+            len_remaining: self.len,
+        }
+    }
+
+    #[inline(always)]
+    fn pool_list_iter_node_ids(&self) -> PoolVecIterNodeIds<T> {
+        PoolVecIterNodeIds {
+            current_node_id: self.first_node_id,
+            len_remaining: self.len,
+        }
+    }
+
+    pub fn free<S>(self, pool: &'a mut Pool) {
+        // zero out the memory
+        unsafe {
+            let index = self.first_node_id.index as isize;
+            let node_ptr = pool.nodes.offset(index) as *mut c_void;
+            let bytes = self.len as usize * NODE_BYTES;
+
+            libc::memset(node_ptr, 0, bytes);
+        }
+
+        // TODO insert it into the pool's free list
+    }
+}
+
+impl<T> ShallowClone for PoolVec<T> {
+    fn shallow_clone(&self) -> Self {
+        // Question: should this fully clone, or is a shallow copy
+        // (and the aliasing it entails) OK?
+        Self {
+            first_node_id: self.first_node_id,
+            len: self.len,
+        }
+    }
+}
+
+struct PoolVecIter<'a, T> {
+    pool: &'a Pool,
+    current_node_id: NodeId<T>,
+    len_remaining: u32,
+}
+
+impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
+where
+    T: 'a,
+{
+    fn len(&self) -> usize {
+        self.len_remaining as usize
+    }
+}
+
+impl<'a, T> Iterator for PoolVecIter<'a, T>
+where
+    T: 'a,
+{
+    type Item = &'a T;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let len_remaining = self.len_remaining;
+
+        match len_remaining.cmp(&1) {
+            Ordering::Greater => {
+                // Get the current node
+                let index = self.current_node_id.index;
+                let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
+
+                // Advance the node pointer to the next node in the current page
+                self.current_node_id = NodeId {
+                    index: index + 1,
+                    _phantom: PhantomData::default(),
+                };
+                self.len_remaining = len_remaining - 1;
+
+                Some(unsafe { &*node_ptr })
+            }
+            Ordering::Equal => {
+                self.len_remaining = 0;
+
+                // Don't advance the node pointer's node, because that might
+                // advance past the end of the page!
+
+                let index = self.current_node_id.index;
+                let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
+
+                Some(unsafe { &*node_ptr })
+            }
+            Ordering::Less => {
+                // len_remaining was 0
+                None
+            }
+        }
+    }
+}
+
+struct PoolVecIterMut<'a, T> {
+    pool: &'a Pool,
+    current_node_id: NodeId<T>,
+    len_remaining: u32,
+}
+
+impl<'a, T> ExactSizeIterator for PoolVecIterMut<'a, T>
+where
+    T: 'a,
+{
+    fn len(&self) -> usize {
+        self.len_remaining as usize
+    }
+}
+
+impl<'a, T> Iterator for PoolVecIterMut<'a, T>
+where
+    T: 'a,
+{
+    type Item = &'a mut T;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let len_remaining = self.len_remaining;
+
+        match len_remaining.cmp(&1) {
+            Ordering::Greater => {
+                // Get the current node
+                let index = self.current_node_id.index;
+                let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T;
+
+                // Advance the node pointer to the next node in the current page
+                self.current_node_id = NodeId {
+                    index: index + 1,
+                    _phantom: PhantomData::default(),
+                };
+                self.len_remaining = len_remaining - 1;
+
+                Some(unsafe { &mut *node_ptr })
+            }
+            Ordering::Equal => {
+                self.len_remaining = 0;
+
+                // Don't advance the node pointer's node, because that might
+                // advance past the end of the page!
+
+                let index = self.current_node_id.index;
+                let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *mut T;
+
+                Some(unsafe { &mut *node_ptr })
+            }
+            Ordering::Less => {
+                // len_remaining was 0
+                None
+            }
+        }
+    }
+}
+
+struct PoolVecIterNodeIds<T> {
+    current_node_id: NodeId<T>,
+    len_remaining: u32,
+}
+
+impl<T> ExactSizeIterator for PoolVecIterNodeIds<T> {
+    fn len(&self) -> usize {
+        self.len_remaining as usize
+    }
+}
+
+impl<T> Iterator for PoolVecIterNodeIds<T> {
+    type Item = NodeId<T>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let len_remaining = self.len_remaining;
+
+        match len_remaining.cmp(&1) {
+            Ordering::Greater => {
+                // Get the current node
+                let current = self.current_node_id;
+                let index = current.index;
+
+                // Advance the node pointer to the next node in the current page
+                self.current_node_id = NodeId {
+                    index: index + 1,
+                    _phantom: PhantomData::default(),
+                };
+                self.len_remaining = len_remaining - 1;
+
+                Some(current)
+            }
+            Ordering::Equal => {
+                self.len_remaining = 0;
+
+                // Don't advance the node pointer's node, because that might
+                // advance past the end of the page!
+
+                Some(self.current_node_id)
+            }
+            Ordering::Less => {
+                // len_remaining was 0
+                None
+            }
+        }
+    }
+}
+
+#[test]
+fn pool_vec_iter_test() {
+    let expected_vec: Vec<usize> = vec![2, 4, 8, 16];
+
+    let mut test_pool = Pool::with_capacity(1024);
+    let pool_vec = PoolVec::new(expected_vec.clone().into_iter(), &mut test_pool);
+
+    let current_vec: Vec<usize> = pool_vec.iter(&test_pool).copied().collect();
+
+    assert_eq!(current_vec, expected_vec);
+}
--- a/crates/ast/src/mem_pool/shallow_clone.rs
+++ b/crates/ast/src/mem_pool/shallow_clone.rs
@ -0,0 +1,35 @@
+use roc_can::expected::Expected;
+use roc_can::expected::PExpected;
+
+/// Clones the outer node, but does not clone any nodeids
+pub trait ShallowClone {
+    fn shallow_clone(&self) -> Self;
+}
+
+impl<T> ShallowClone for Expected<T>
+where
+    T: ShallowClone,
+{
+    fn shallow_clone(&self) -> Self {
+        use Expected::*;
+
+        match self {
+            NoExpectation(t) => NoExpectation(t.shallow_clone()),
+            ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region),
+            FromAnnotation(loc_pat, n, source, t) => {
+                FromAnnotation(loc_pat.clone(), *n, *source, t.shallow_clone())
+            }
+        }
+    }
+}
+
+impl<T: ShallowClone> ShallowClone for PExpected<T> {
+    fn shallow_clone(&self) -> Self {
+        use PExpected::*;
+
+        match self {
+            NoExpectation(t) => NoExpectation(t.shallow_clone()),
+            ForReason(reason, t, region) => ForReason(reason.clone(), t.shallow_clone(), *region),
+        }
+    }
+}