Add bucket.rs

This commit is contained in:
Richard Feldman 2020-10-21 21:48:01 -04:00
parent 9870aaf26c
commit bf7f1d49e2
5 changed files with 224 additions and 0 deletions

12
Cargo.lock generated
View file

@ -1639,6 +1639,16 @@ dependencies = [
"ttf-parser",
]
[[package]]
name = "page_size"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
dependencies = [
"libc",
"winapi 0.3.9",
]
[[package]]
name = "parking_lot"
version = "0.10.2"
@ -2377,8 +2387,10 @@ dependencies = [
"indoc",
"inkwell",
"inlinable_string",
"libc",
"log",
"maplit",
"page_size",
"pretty_assertions",
"quickcheck",
"quickcheck_macros",

View file

@ -29,6 +29,8 @@ im-rc = "14" # im and im-rc should always have the same version!
bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1"
arraystring = "0.3.0"
libc = "0.2"
page_size = "0.4"
# NOTE: rtfeldman/inkwell is a fork of TheDan64/inkwell which does not change anything.
#
# The reason for this fork is that the way Inkwell is designed, you have to use

View file

@ -345,6 +345,10 @@ pub struct Exprs {
// memory usage. We could in theory go up to free_128node_slots, but in
// practice it seems unlikely that it would be worth the bookkeeping
// effort to go that high.
//
// TODO: this could be refactored Into `free_slots: [5; Vec<ExprId>]`
// where (2 ^ index) is the size node in that slot. It's less
// self-documenting but might allow for better code reuse.
pub free_1node_slots: Vec<ExprId>,
pub free_2node_slots: Vec<ExprId>,
pub free_4node_slots: Vec<ExprId>,
@ -366,6 +370,26 @@ pub struct Exprs {
// (e.g. If, When, Record, Tag, Call, Closure) can only contain at most
// 255 nodes. So functions can have at most 255 arguments, records can have
// at most 255 fields, etc.
//
// Nice things about this system include:
// * Allocating a new bucket is as simple as asking the OS for a memory page.
// * Since each node is 16B, each node's memory address will be a multiple of 16.
// * Thanks to the free lists and our consistent chunk sizes, we should
// end up with very little fragmentation.
// * Finding a slot for a given node should be very fast: see if the relevant
// free list has any openings; if not, try the next size up.
//
// Less nice things include:
// * This system makes it very hard to ever give a page back to the OS.
// We could try doing the Mesh Allocator strategy: whenever we allocate
// something, assign it to a random slot in the bucket, and then periodically
// try to merge two pages into one (by locking and remapping them in the OS)
// and then returning the redundant physical page back to the OS. This should
// work in theory, but is pretty complicated, and we'd need to schedule it.
// Keep in mind that we can't use the Mesh Allocator itself because it returns
// usize pointers, which would be too big for us to have 16B nodes.
// On the plus side, we could be okay with higher memory usage early on,
// and then later use the Mesh strategy to reduce long-running memory usage.
type ExprBucketSlots = [Expr2; 256];
#[test]

185
editor/src/bucket.rs Normal file
View file

@ -0,0 +1,185 @@
/// A bucket
use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
use std::marker::PhantomData;
use std::mem::{self, size_of};
use std::ptr::null;
use std::{u16, u8};
const BUCKET_BYTES: usize = 4096;
pub struct NodeId<T> {
pub bucket_id: BucketId<T>,
pub slot: BucketSlot<T>,
}
pub struct BucketId<T> {
value: u16,
_phantom: PhantomData<T>,
}
impl<T> BucketId<T> {
fn from_u16(value: u16) -> Self {
BucketId {
value,
_phantom: PhantomData::default(),
}
}
}
pub struct BucketSlot<T> {
value: u8,
_phantom: PhantomData<T>,
}
impl<T> BucketSlot<T> {
fn from_u8(value: u8) -> Self {
BucketSlot {
value,
_phantom: PhantomData::default(),
}
}
}
pub struct Buckets<T> {
buckets: Vec<Bucket<T>>,
}
impl<T> Buckets<T> {
pub fn add(&mut self) -> Result<BucketId<T>, ()> {
let num_buckets = self.buckets.len();
if num_buckets <= u16::MAX as usize {
let bucket_id = BucketId::from_u16(num_buckets as u16);
self.buckets.push(Bucket::default());
Ok(bucket_id)
} else {
Err(())
}
}
pub fn get<'a>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
self.buckets
.get(node_id.bucket_id.value as usize)
.and_then(|bucket| bucket.get(node_id.slot))
}
}
pub struct Bucket<T> {
next_unused_slot: u16,
first_slot: *mut T,
_phantom: PhantomData<T>,
}
impl<T> Bucket<T> {
/// If there's room left in the bucket, adds the item and returns
/// the slot where it was put. If there was no room left, returns Err(()).
pub fn add(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
// Once next_unused_slot exceeds u8::MAX, we have no room left.
if self.next_unused_slot <= u8::MAX as u16 {
let chosen_slot = self.next_unused_slot as u8;
unsafe { self.put_unchecked(node, chosen_slot) };
self.next_unused_slot += 1;
Ok(BucketSlot::from_u8(chosen_slot))
} else {
// No room left!
Err(())
}
}
/// If the given slot is available, inserts the given node into it.
/// Otherwise, returns the node that was in the already-occupied slot.
pub fn insert(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
let slot = slot.value;
unsafe {
if self.is_available(slot) {
self.put_unchecked(node, slot);
Ok(())
} else {
Err(self.get_unchecked(slot))
}
}
}
pub fn get<'a>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
unsafe {
let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
let value = &*slot_ptr;
if *mem::transmute::<&T, &[u8; 16]>(value) != [0; 16] {
Some(value)
} else {
None
}
}
}
unsafe fn put_unchecked(&mut self, node: T, slot: u8) {
let slot_ptr = self.first_slot.offset(slot as isize);
*slot_ptr = node;
}
unsafe fn get_unchecked<'a>(&'a self, slot: u8) -> &'a T {
&*self.first_slot.offset(slot as isize)
}
// A slot is available iff its bytes are all zeroes
unsafe fn is_available(&self, slot: u8) -> bool {
let slot_ptr = self.first_slot.offset(slot as isize) as *const [u8; 16];
*slot_ptr == [0; 16]
}
}
impl<T> Default for Bucket<T> {
fn default() -> Self {
// It's only safe to store this as a *const T if T is 16 bytes.
// This is designed to be used exclusively with 16-byte nodes!
debug_assert_eq!(size_of::<T>(), 16);
let first_slot = if page_size::get() == 4096 {
unsafe {
// mmap exactly one memory page (4096 bytes)
mmap(
null::<c_void>() as *mut c_void,
BUCKET_BYTES,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
0,
0,
)
}
} else {
// Somehow the page size is not 4096 bytes, so fall back on calloc.
// (We use calloc over malloc because we rely on the bytes having
// been zeroed to tell which slots are available.)
unsafe { calloc(1, BUCKET_BYTES) }
} as *mut T;
Bucket {
next_unused_slot: 0,
first_slot,
_phantom: PhantomData::default(),
}
}
}
impl<T> Drop for Bucket<T> {
fn drop(&mut self) {
if page_size::get() == 4096 {
unsafe {
munmap(self.first_slot as *mut c_void, BUCKET_BYTES);
}
} else {
unsafe {
free(self.first_slot as *mut c_void);
}
}
}
}

View file

@ -20,6 +20,7 @@ use winit::event_loop::ControlFlow;
pub mod ast;
pub mod text_state;
pub mod bucket;
/// The editor is actually launched from the CLI if you pass it zero arguments,
/// or if you provide it 1 or more files or directories to open on launch.