Add bucket.rs

2025-10-03 00:24:34 +00:00 · 2020-10-21 21:48:01 -04:00 · 2020-10-21 21:48:01 -04:00 · bf7f1d49e2
commit bf7f1d49e2
parent 9870aaf26c
5 changed files with 224 additions and 0 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1639,6 +1639,16 @@ dependencies = [
 "ttf-parser",
 ]

+[[package]]
+name = "page_size"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
+dependencies = [
+ "libc",
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "parking_lot"
 version = "0.10.2"
@ -2377,8 +2387,10 @@ dependencies = [
 "indoc",
 "inkwell",
 "inlinable_string",
+ "libc",
 "log",
 "maplit",
+ "page_size",
 "pretty_assertions",
 "quickcheck",
 "quickcheck_macros",
--- a/editor/Cargo.toml
+++ b/editor/Cargo.toml
@ -29,6 +29,8 @@ im-rc = "14" # im and im-rc should always have the same version!
 bumpalo = { version = "3.2", features = ["collections"] }
 inlinable_string = "0.1"
 arraystring = "0.3.0"
+libc = "0.2"
+page_size = "0.4"
 # NOTE: rtfeldman/inkwell is a fork of TheDan64/inkwell which does not change anything.
 #
 # The reason for this fork is that the way Inkwell is designed, you have to use
--- a/editor/src/ast.rs
+++ b/editor/src/ast.rs
@ -345,6 +345,10 @@ pub struct Exprs {
    // memory usage. We could in theory go up to free_128node_slots, but in
    // practice it seems unlikely that it would be worth the bookkeeping
    // effort to go that high.
+    //
+    // TODO: this could be refactored Into `free_slots: [5; Vec<ExprId>]`
+    // where (2 ^ index) is the size node in that slot. It's less
+    // self-documenting but might allow for better code reuse.
    pub free_1node_slots: Vec<ExprId>,
    pub free_2node_slots: Vec<ExprId>,
    pub free_4node_slots: Vec<ExprId>,
@ -366,6 +370,26 @@ pub struct Exprs {
 // (e.g. If, When, Record, Tag, Call, Closure) can only contain at most
 // 255 nodes. So functions can have at most 255 arguments, records can have
 // at most 255 fields, etc.
+//
+// Nice things about this system include:
+// * Allocating a new bucket is as simple as asking the OS for a memory page.
+// * Since each node is 16B, each node's memory address will be a multiple of 16.
+// * Thanks to the free lists and our consistent chunk sizes, we should
+//   end up with very little fragmentation.
+// * Finding a slot for a given node should be very fast: see if the relevant
+//   free list has any openings; if not, try the next size up.
+//
+// Less nice things include:
+// * This system makes it very hard to ever give a page back to the OS.
+//   We could try doing the Mesh Allocator strategy: whenever we allocate
+//   something, assign it to a random slot in the bucket, and then periodically
+//   try to merge two pages into one (by locking and remapping them in the OS)
+//   and then returning the redundant physical page back to the OS. This should
+//   work in theory, but is pretty complicated, and we'd need to schedule it.
+//   Keep in mind that we can't use the Mesh Allocator itself because it returns
+//   usize pointers, which would be too big for us to have 16B nodes.
+//   On the plus side, we could be okay with higher memory usage early on,
+//   and then later use the Mesh strategy to reduce long-running memory usage.
 type ExprBucketSlots = [Expr2; 256];

 #[test]
--- a/editor/src/bucket.rs
+++ b/editor/src/bucket.rs
@ -0,0 +1,185 @@
+/// A bucket
+use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
+use std::marker::PhantomData;
+use std::mem::{self, size_of};
+use std::ptr::null;
+use std::{u16, u8};
+
+const BUCKET_BYTES: usize = 4096;
+
+pub struct NodeId<T> {
+    pub bucket_id: BucketId<T>,
+    pub slot: BucketSlot<T>,
+}
+
+pub struct BucketId<T> {
+    value: u16,
+    _phantom: PhantomData<T>,
+}
+
+impl<T> BucketId<T> {
+    fn from_u16(value: u16) -> Self {
+        BucketId {
+            value,
+            _phantom: PhantomData::default(),
+        }
+    }
+}
+
+pub struct BucketSlot<T> {
+    value: u8,
+    _phantom: PhantomData<T>,
+}
+
+impl<T> BucketSlot<T> {
+    fn from_u8(value: u8) -> Self {
+        BucketSlot {
+            value,
+            _phantom: PhantomData::default(),
+        }
+    }
+}
+
+pub struct Buckets<T> {
+    buckets: Vec<Bucket<T>>,
+}
+
+impl<T> Buckets<T> {
+    pub fn add(&mut self) -> Result<BucketId<T>, ()> {
+        let num_buckets = self.buckets.len();
+
+        if num_buckets <= u16::MAX as usize {
+            let bucket_id = BucketId::from_u16(num_buckets as u16);
+
+            self.buckets.push(Bucket::default());
+
+            Ok(bucket_id)
+        } else {
+            Err(())
+        }
+    }
+
+    pub fn get<'a>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
+        self.buckets
+            .get(node_id.bucket_id.value as usize)
+            .and_then(|bucket| bucket.get(node_id.slot))
+    }
+}
+
+pub struct Bucket<T> {
+    next_unused_slot: u16,
+    first_slot: *mut T,
+    _phantom: PhantomData<T>,
+}
+
+impl<T> Bucket<T> {
+    /// If there's room left in the bucket, adds the item and returns
+    /// the slot where it was put. If there was no room left, returns Err(()).
+    pub fn add(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
+        // Once next_unused_slot exceeds u8::MAX, we have no room left.
+        if self.next_unused_slot <= u8::MAX as u16 {
+            let chosen_slot = self.next_unused_slot as u8;
+
+            unsafe { self.put_unchecked(node, chosen_slot) };
+            self.next_unused_slot += 1;
+
+            Ok(BucketSlot::from_u8(chosen_slot))
+        } else {
+            // No room left!
+            Err(())
+        }
+    }
+
+    /// If the given slot is available, inserts the given node into it.
+    /// Otherwise, returns the node that was in the already-occupied slot.
+    pub fn insert(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
+        let slot = slot.value;
+
+        unsafe {
+            if self.is_available(slot) {
+                self.put_unchecked(node, slot);
+
+                Ok(())
+            } else {
+                Err(self.get_unchecked(slot))
+            }
+        }
+    }
+
+    pub fn get<'a>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
+        unsafe {
+            let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
+            let value = &*slot_ptr;
+
+            if *mem::transmute::<&T, &[u8; 16]>(value) != [0; 16] {
+                Some(value)
+            } else {
+                None
+            }
+        }
+    }
+
+    unsafe fn put_unchecked(&mut self, node: T, slot: u8) {
+        let slot_ptr = self.first_slot.offset(slot as isize);
+
+        *slot_ptr = node;
+    }
+
+    unsafe fn get_unchecked<'a>(&'a self, slot: u8) -> &'a T {
+        &*self.first_slot.offset(slot as isize)
+    }
+
+    // A slot is available iff its bytes are all zeroes
+    unsafe fn is_available(&self, slot: u8) -> bool {
+        let slot_ptr = self.first_slot.offset(slot as isize) as *const [u8; 16];
+
+        *slot_ptr == [0; 16]
+    }
+}
+
+impl<T> Default for Bucket<T> {
+    fn default() -> Self {
+        // It's only safe to store this as a *const T if T is 16 bytes.
+        // This is designed to be used exclusively with 16-byte nodes!
+        debug_assert_eq!(size_of::<T>(), 16);
+
+        let first_slot = if page_size::get() == 4096 {
+            unsafe {
+                // mmap exactly one memory page (4096 bytes)
+                mmap(
+                    null::<c_void>() as *mut c_void,
+                    BUCKET_BYTES,
+                    PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS,
+                    0,
+                    0,
+                )
+            }
+        } else {
+            // Somehow the page size is not 4096 bytes, so fall back on calloc.
+            // (We use calloc over malloc because we rely on the bytes having
+            // been zeroed to tell which slots are available.)
+            unsafe { calloc(1, BUCKET_BYTES) }
+        } as *mut T;
+
+        Bucket {
+            next_unused_slot: 0,
+            first_slot,
+            _phantom: PhantomData::default(),
+        }
+    }
+}
+
+impl<T> Drop for Bucket<T> {
+    fn drop(&mut self) {
+        if page_size::get() == 4096 {
+            unsafe {
+                munmap(self.first_slot as *mut c_void, BUCKET_BYTES);
+            }
+        } else {
+            unsafe {
+                free(self.first_slot as *mut c_void);
+            }
+        }
+    }
+}
--- a/editor/src/lib.rs
+++ b/editor/src/lib.rs
@ -20,6 +20,7 @@ use winit::event_loop::ControlFlow;

 pub mod ast;
 pub mod text_state;
+pub mod bucket;

 /// The editor is actually launched from the CLI if you pass it zero arguments,
 /// or if you provide it 1 or more files or directories to open on launch.