mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-03 16:44:33 +00:00
Add bucket.rs
This commit is contained in:
parent
9870aaf26c
commit
bf7f1d49e2
5 changed files with 224 additions and 0 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -1639,6 +1639,16 @@ dependencies = [
|
||||||
"ttf-parser",
|
"ttf-parser",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "page_size"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.10.2"
|
version = "0.10.2"
|
||||||
|
@ -2377,8 +2387,10 @@ dependencies = [
|
||||||
"indoc",
|
"indoc",
|
||||||
"inkwell",
|
"inkwell",
|
||||||
"inlinable_string",
|
"inlinable_string",
|
||||||
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
"maplit",
|
"maplit",
|
||||||
|
"page_size",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"quickcheck",
|
"quickcheck",
|
||||||
"quickcheck_macros",
|
"quickcheck_macros",
|
||||||
|
|
|
@ -29,6 +29,8 @@ im-rc = "14" # im and im-rc should always have the same version!
|
||||||
bumpalo = { version = "3.2", features = ["collections"] }
|
bumpalo = { version = "3.2", features = ["collections"] }
|
||||||
inlinable_string = "0.1"
|
inlinable_string = "0.1"
|
||||||
arraystring = "0.3.0"
|
arraystring = "0.3.0"
|
||||||
|
libc = "0.2"
|
||||||
|
page_size = "0.4"
|
||||||
# NOTE: rtfeldman/inkwell is a fork of TheDan64/inkwell which does not change anything.
|
# NOTE: rtfeldman/inkwell is a fork of TheDan64/inkwell which does not change anything.
|
||||||
#
|
#
|
||||||
# The reason for this fork is that the way Inkwell is designed, you have to use
|
# The reason for this fork is that the way Inkwell is designed, you have to use
|
||||||
|
|
|
@ -345,6 +345,10 @@ pub struct Exprs {
|
||||||
// memory usage. We could in theory go up to free_128node_slots, but in
|
// memory usage. We could in theory go up to free_128node_slots, but in
|
||||||
// practice it seems unlikely that it would be worth the bookkeeping
|
// practice it seems unlikely that it would be worth the bookkeeping
|
||||||
// effort to go that high.
|
// effort to go that high.
|
||||||
|
//
|
||||||
|
// TODO: this could be refactored Into `free_slots: [5; Vec<ExprId>]`
|
||||||
|
// where (2 ^ index) is the size node in that slot. It's less
|
||||||
|
// self-documenting but might allow for better code reuse.
|
||||||
pub free_1node_slots: Vec<ExprId>,
|
pub free_1node_slots: Vec<ExprId>,
|
||||||
pub free_2node_slots: Vec<ExprId>,
|
pub free_2node_slots: Vec<ExprId>,
|
||||||
pub free_4node_slots: Vec<ExprId>,
|
pub free_4node_slots: Vec<ExprId>,
|
||||||
|
@ -366,6 +370,26 @@ pub struct Exprs {
|
||||||
// (e.g. If, When, Record, Tag, Call, Closure) can only contain at most
|
// (e.g. If, When, Record, Tag, Call, Closure) can only contain at most
|
||||||
// 255 nodes. So functions can have at most 255 arguments, records can have
|
// 255 nodes. So functions can have at most 255 arguments, records can have
|
||||||
// at most 255 fields, etc.
|
// at most 255 fields, etc.
|
||||||
|
//
|
||||||
|
// Nice things about this system include:
|
||||||
|
// * Allocating a new bucket is as simple as asking the OS for a memory page.
|
||||||
|
// * Since each node is 16B, each node's memory address will be a multiple of 16.
|
||||||
|
// * Thanks to the free lists and our consistent chunk sizes, we should
|
||||||
|
// end up with very little fragmentation.
|
||||||
|
// * Finding a slot for a given node should be very fast: see if the relevant
|
||||||
|
// free list has any openings; if not, try the next size up.
|
||||||
|
//
|
||||||
|
// Less nice things include:
|
||||||
|
// * This system makes it very hard to ever give a page back to the OS.
|
||||||
|
// We could try doing the Mesh Allocator strategy: whenever we allocate
|
||||||
|
// something, assign it to a random slot in the bucket, and then periodically
|
||||||
|
// try to merge two pages into one (by locking and remapping them in the OS)
|
||||||
|
// and then returning the redundant physical page back to the OS. This should
|
||||||
|
// work in theory, but is pretty complicated, and we'd need to schedule it.
|
||||||
|
// Keep in mind that we can't use the Mesh Allocator itself because it returns
|
||||||
|
// usize pointers, which would be too big for us to have 16B nodes.
|
||||||
|
// On the plus side, we could be okay with higher memory usage early on,
|
||||||
|
// and then later use the Mesh strategy to reduce long-running memory usage.
|
||||||
type ExprBucketSlots = [Expr2; 256];
|
type ExprBucketSlots = [Expr2; 256];
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
185
editor/src/bucket.rs
Normal file
185
editor/src/bucket.rs
Normal file
|
@ -0,0 +1,185 @@
|
||||||
|
/// A bucket
|
||||||
|
use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
use std::mem::{self, size_of};
|
||||||
|
use std::ptr::null;
|
||||||
|
use std::{u16, u8};
|
||||||
|
|
||||||
|
const BUCKET_BYTES: usize = 4096;
|
||||||
|
|
||||||
|
pub struct NodeId<T> {
|
||||||
|
pub bucket_id: BucketId<T>,
|
||||||
|
pub slot: BucketSlot<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BucketId<T> {
|
||||||
|
value: u16,
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> BucketId<T> {
|
||||||
|
fn from_u16(value: u16) -> Self {
|
||||||
|
BucketId {
|
||||||
|
value,
|
||||||
|
_phantom: PhantomData::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BucketSlot<T> {
|
||||||
|
value: u8,
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> BucketSlot<T> {
|
||||||
|
fn from_u8(value: u8) -> Self {
|
||||||
|
BucketSlot {
|
||||||
|
value,
|
||||||
|
_phantom: PhantomData::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Buckets<T> {
|
||||||
|
buckets: Vec<Bucket<T>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Buckets<T> {
|
||||||
|
pub fn add(&mut self) -> Result<BucketId<T>, ()> {
|
||||||
|
let num_buckets = self.buckets.len();
|
||||||
|
|
||||||
|
if num_buckets <= u16::MAX as usize {
|
||||||
|
let bucket_id = BucketId::from_u16(num_buckets as u16);
|
||||||
|
|
||||||
|
self.buckets.push(Bucket::default());
|
||||||
|
|
||||||
|
Ok(bucket_id)
|
||||||
|
} else {
|
||||||
|
Err(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get<'a>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
|
||||||
|
self.buckets
|
||||||
|
.get(node_id.bucket_id.value as usize)
|
||||||
|
.and_then(|bucket| bucket.get(node_id.slot))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Bucket<T> {
|
||||||
|
next_unused_slot: u16,
|
||||||
|
first_slot: *mut T,
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Bucket<T> {
|
||||||
|
/// If there's room left in the bucket, adds the item and returns
|
||||||
|
/// the slot where it was put. If there was no room left, returns Err(()).
|
||||||
|
pub fn add(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
|
||||||
|
// Once next_unused_slot exceeds u8::MAX, we have no room left.
|
||||||
|
if self.next_unused_slot <= u8::MAX as u16 {
|
||||||
|
let chosen_slot = self.next_unused_slot as u8;
|
||||||
|
|
||||||
|
unsafe { self.put_unchecked(node, chosen_slot) };
|
||||||
|
self.next_unused_slot += 1;
|
||||||
|
|
||||||
|
Ok(BucketSlot::from_u8(chosen_slot))
|
||||||
|
} else {
|
||||||
|
// No room left!
|
||||||
|
Err(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If the given slot is available, inserts the given node into it.
|
||||||
|
/// Otherwise, returns the node that was in the already-occupied slot.
|
||||||
|
pub fn insert(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
|
||||||
|
let slot = slot.value;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if self.is_available(slot) {
|
||||||
|
self.put_unchecked(node, slot);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(self.get_unchecked(slot))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get<'a>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
|
||||||
|
unsafe {
|
||||||
|
let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
|
||||||
|
let value = &*slot_ptr;
|
||||||
|
|
||||||
|
if *mem::transmute::<&T, &[u8; 16]>(value) != [0; 16] {
|
||||||
|
Some(value)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn put_unchecked(&mut self, node: T, slot: u8) {
|
||||||
|
let slot_ptr = self.first_slot.offset(slot as isize);
|
||||||
|
|
||||||
|
*slot_ptr = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn get_unchecked<'a>(&'a self, slot: u8) -> &'a T {
|
||||||
|
&*self.first_slot.offset(slot as isize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// A slot is available iff its bytes are all zeroes
|
||||||
|
unsafe fn is_available(&self, slot: u8) -> bool {
|
||||||
|
let slot_ptr = self.first_slot.offset(slot as isize) as *const [u8; 16];
|
||||||
|
|
||||||
|
*slot_ptr == [0; 16]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Default for Bucket<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
// It's only safe to store this as a *const T if T is 16 bytes.
|
||||||
|
// This is designed to be used exclusively with 16-byte nodes!
|
||||||
|
debug_assert_eq!(size_of::<T>(), 16);
|
||||||
|
|
||||||
|
let first_slot = if page_size::get() == 4096 {
|
||||||
|
unsafe {
|
||||||
|
// mmap exactly one memory page (4096 bytes)
|
||||||
|
mmap(
|
||||||
|
null::<c_void>() as *mut c_void,
|
||||||
|
BUCKET_BYTES,
|
||||||
|
PROT_READ | PROT_WRITE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Somehow the page size is not 4096 bytes, so fall back on calloc.
|
||||||
|
// (We use calloc over malloc because we rely on the bytes having
|
||||||
|
// been zeroed to tell which slots are available.)
|
||||||
|
unsafe { calloc(1, BUCKET_BYTES) }
|
||||||
|
} as *mut T;
|
||||||
|
|
||||||
|
Bucket {
|
||||||
|
next_unused_slot: 0,
|
||||||
|
first_slot,
|
||||||
|
_phantom: PhantomData::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Drop for Bucket<T> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if page_size::get() == 4096 {
|
||||||
|
unsafe {
|
||||||
|
munmap(self.first_slot as *mut c_void, BUCKET_BYTES);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
free(self.first_slot as *mut c_void);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,6 +20,7 @@ use winit::event_loop::ControlFlow;
|
||||||
|
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
pub mod text_state;
|
pub mod text_state;
|
||||||
|
pub mod bucket;
|
||||||
|
|
||||||
/// The editor is actually launched from the CLI if you pass it zero arguments,
|
/// The editor is actually launched from the CLI if you pass it zero arguments,
|
||||||
/// or if you provide it 1 or more files or directories to open on launch.
|
/// or if you provide it 1 or more files or directories to open on launch.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue