Merge pull request #785 from rtfeldman/pool

Use pool for editor IR
This commit is contained in:
Richard Feldman 2020-12-09 21:58:26 -05:00 committed by GitHub
commit a1355a7557
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 452 additions and 730 deletions

View file

@ -1,9 +1,10 @@
use crate::bucket::{BucketId, BucketList, BucketSlot, BucketStr, NodeId}; use crate::pool::{NodeId, PoolStr, PoolVec};
use arraystring::{typenum::U14, ArrayString}; use arraystring::{typenum::U30, ArrayString};
use roc_can::def::Annotation; use roc_can::def::Annotation;
use roc_can::expr::{Field, Recursive}; use roc_can::expr::{Field, Recursive};
use roc_module::ident::Lowercase; use roc_module::ident::Lowercase;
use roc_module::low_level::LowLevel; use roc_module::low_level::LowLevel;
use roc_module::operator::CalledVia;
use roc_module::symbol::Symbol; use roc_module::symbol::Symbol;
use roc_types::subs::Variable; use roc_types::subs::Variable;
use roc_types::types::Alias; use roc_types::types::Alias;
@ -23,47 +24,67 @@ pub enum IntStyle {
Binary, Binary,
} }
/// An Expr that fits in 16B. #[derive(Debug, Copy, Clone, PartialEq, Eq)]
/// It has a 1B discriminant and variants which hold payloads of at most 15B. pub enum IntVal {
I64(i64),
U64(u64),
I32(i32),
U32(u32),
I16(i16),
U16(u16),
I8(i8),
U8(u8),
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum FloatVal {
F64(f64),
F32(f32),
}
#[test]
fn size_of_intval() {
assert_eq!(std::mem::size_of::<IntVal>(), 16);
}
/// An Expr that fits in 32B.
/// It has a 1B discriminant and variants which hold payloads of at most 31B.
#[derive(Debug)] #[derive(Debug)]
pub enum Expr2 { pub enum Expr2 {
/// A number literal (without a dot) containing no underscores /// A negative number literal without a dot
Num { SmallInt {
number: i64, // 8B number: IntVal, // 16B
var: Variable, // 4B var: Variable, // 4B
style: IntStyle, // 1B style: IntStyle, // 1B
text: PoolStr, // 8B
}, },
/// A floating-point literal (with a dot) containing no underscores /// A large (over 64-bit) negative number literal without a dot.
/// This variant can't use IntVal because if IntVal stored 128-bit
/// integers, it would be 32B on its own because of alignment.
I128 {
number: i128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A large (over 64-bit) nonnegative number literal without a dot
/// This variant can't use IntVal because if IntVal stored 128-bit
/// integers, it would be 32B on its own because of alignment.
U128 {
number: u128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A floating-point literal (with a dot)
Float { Float {
number: f64, // 8B number: FloatVal, // 16B
var: Variable, // 4B var: Variable, // 4B
},
/// A number literal (without a dot) containing underscores
NumWithUnderscores {
number: i64, // 8B
var: Variable, // 4B
text_id: BucketId<BucketStr>, // 2B
text_sl: BucketSlot<BucketStr>, // 1B
},
/// A float literal (with a dot) containing underscores
FloatWithUnderscores {
number: f64, // 8B
var: Variable, // 4B
text_id: BucketId<BucketStr>, // 2B
text_sl: BucketSlot<BucketStr>, // 1B
},
/// string literals of length up to 14B
SmallStr(ArrayString<U14>), // 15B
/// string literals of length up to 4094B
MedStr {
str_id: BucketId<BucketStr>,
str_sl: BucketSlot<BucketStr>,
}, // 4B
/// string literals of length over 4094B, but requires calling malloc/free
BigStr {
pointer: *const u8, // 8B on 64-bit systems
len: u32, // 4B, meaning maximum string literal size of 4GB. Could theoretically fit 7B here, which would get closer to the full isize::MAX
}, },
/// string literals of length up to 30B
SmallStr(ArrayString<U30>), // 31B
/// string literals of length 31B or more
Str(PoolStr), // 8B
// Lookups // Lookups
Var(Symbol), // 8B Var(Symbol), // 8B
@ -74,160 +95,108 @@ pub enum Expr2 {
elem_var: Variable, // 4B elem_var: Variable, // 4B
}, },
List { List {
list_var: Variable, // 4B - required for uniqueness of the list list_var: Variable, // 4B - required for uniqueness of the list
elem_var: Variable, // 4B elem_var: Variable, // 4B
elems: BucketList<Expr2>, // 4B elems: PoolVec<Expr2>, // 8B
}, },
If { If {
cond_var: Variable, // 4B cond_var: Variable, // 4B
expr_var: Variable, // 4B expr_var: Variable, // 4B
branches: BucketList<(Expr2, Expr2)>, // 4B branches: PoolVec<(Expr2, Expr2)>, // 8B
final_else_id: BucketId<Expr2>, // 2B final_else: NodeId<Expr2>, // 4B
final_else_sl: BucketSlot<Expr2>, // 1B
}, },
When { When {
cond_var: Variable, // 4B cond_var: Variable, // 4B
expr_var: Variable, // 4B expr_var: Variable, // 4B
branches: BucketList<WhenBranch>, // 4B branches: PoolVec<WhenBranch>, // 8B
cond_id: BucketId<Expr2>, // 2B cond: NodeId<Expr2>, // 4B
cond_sl: BucketSlot<Expr2>, // 1B
}, },
LetRec { LetRec {
// TODO need to make this Alias type here bucket-friendly, which will be hard! // TODO need to make this Alias type here page-friendly, which will be hard!
aliases: BucketList<(Symbol, Alias)>, // 4B aliases: PoolVec<(Symbol, Alias)>, // 8B
defs: BucketList<Def>, // 4B defs: PoolVec<Def>, // 8B
body_var: Variable, // 4B body_var: Variable, // 8B
body_id: BucketId<Expr2>, // 2B body_id: NodeId<Expr2>, // 4B
body_sl: BucketSlot<Expr2>, // 1B
}, },
LetNonRec { LetNonRec {
// TODO need to make this Alias type here bucket-friendly, which will be hard! // TODO need to make this Alias type here page-friendly, which will be hard!
aliases: BucketList<(Symbol, Alias)>, // 4B aliases: PoolVec<(Symbol, Alias)>, // 8B
def_id: BucketId<Def>, // 2B def_id: NodeId<Def>, // 4B
def_sl: BucketSlot<Def>, // 1B body_id: NodeId<Expr2>, // 4B
body_id: BucketId<Expr2>, // 2B body_var: Variable, // 4B
body_sl: BucketSlot<Expr2>, // 1B
body_var: Variable, // 4B
}, },
Call { Call {
/// NOTE: the first elem in this list is the expression and its variable. args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
/// The others are arguments. This is because we didn't have room for expr: NodeId<Expr2>, // 4B
/// both the expr and its variable otherwise. expr_var: Variable, // 4B
expr_and_args: BucketList<(Variable, NodeId<Expr2>)>, // 4B fn_var: Variable, // 4B
fn_var: Variable, // 4B closure_var: Variable, // 4B
closure_var: Variable, // 4B called_via: CalledVia, // 2B
/// Cached outside expr_and_args so we don't have to potentially
/// traverse that whole linked list chain to count all the args.
arity: u16, // 2B
called_via: CalledVia2, // 1B
}, },
RunLowLevel { RunLowLevel {
op: LowLevel, // 1B op: LowLevel, // 1B
args: BucketList<(Variable, NodeId<Expr2>)>, // 4B args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
ret_var: Variable, // 4B ret_var: Variable, // 4B
}, },
Closure { Closure {
/// NOTE: the first elem in this list is the function's name Symbol, plus Variable::NONE args: PoolVec<(Variable, NodeId<Pat2>)>, // 8B
/// name: Symbol, // 8B
/// This is not ideal, but there's no room for an 8-byte Symbol body: NodeId<Expr2>, // 4B
/// in a 16B node that already needs to hold this much other data. function_type: Variable, // 4B
captured_symbols: BucketList<(Symbol, Variable)>, // 4B recursive: Recursive, // 1B
args: BucketList<(Variable, NodeId<Pat2>)>, // 4B extra: NodeId<ClosureExtra>, // 4B
recursive: Recursive, // 1B
body_id: BucketId<Expr2>, // 2B
body_sl: BucketSlot<Expr2>, // 1B
vars_id: BucketId<ClosureVars>, // 2B
vars_sl: BucketSlot<ClosureVars>, // 1B
}, },
// Product Types // Product Types
Record { Record {
record_var: Variable, // 4B record_var: Variable, // 4B
fields: BucketList<(BucketStr, Variable, NodeId<Expr2>)>, // 4B fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 8B
}, },
/// Empty record constant /// Empty record constant
EmptyRecord, EmptyRecord,
/// Look up exactly one field on a record, e.g. (expr).foo. /// Look up exactly one field on a record, e.g. (expr).foo.
Access { Access {
field_id: BucketId<BucketStr>, // 3B field: PoolStr, // 4B
field_sl: BucketSlot<BucketStr>, // 3B expr: NodeId<Expr2>, // 4B
expr_id: BucketId<Expr2>, // 2B record_var: Variable, // 4B
expr_sl: BucketSlot<Expr2>, // 1B ext_var: Variable, // 4B
vars_id: BucketId<AccessVars>, // 2B field_var: Variable, // 4B
vars_sl: BucketSlot<AccessVars>, // 1B
}, },
/// field accessor as a function, e.g. (.foo) expr /// field accessor as a function, e.g. (.foo) expr
Accessor { Accessor {
record_vars_id: BucketId<RecordVars>, // 3B function_var: Variable, // 4B
record_vars_sl: BucketSlot<RecordVars>, // 3B closure_var: Variable, // 4B
function_var: Variable, // 4B field: PoolStr, // 4B
closure_var: Variable, // 4B record_var: Variable, // 4B
field_id: BucketId<BucketStr>, // 2B ext_var: Variable, // 4B
field_sl: BucketSlot<BucketStr>, // 1B field_var: Variable, // 4B
}, },
Update { Update {
symbol: Symbol, // 8B symbol: Symbol, // 8B
updates: BucketList<(Lowercase, Field)>, // 4B updates: PoolVec<(Lowercase, Field)>, // 8B
vars_id: BucketId<UpdateVars>, // 2B record_var: Variable, // 4B
vars_sl: BucketSlot<UpdateVars>, // 1B ext_var: Variable, // 4B
}, },
// Sum Types // Sum Types
Tag { Tag {
// NOTE: A BucketStr node is a 2B length and then 14B bytes, name: PoolStr, // 4B
// plus more bytes in adjacent nodes if necessary. Thus we have variant_var: Variable, // 4B
// a hard cap of 4094 bytes as the maximum length of tags and fields. ext_var: Variable, // 4B
name_id: BucketId<BucketStr>, // 2B arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
name_sl: BucketSlot<BucketStr>, // 1B
variant_var: Variable, // 4B
ext_var: Variable, // 4B
arguments: BucketList<(Variable, BucketId<Expr2>, BucketSlot<Expr2>)>, // 4B
}, },
// Compiles, but will crash if reached // Compiles, but will crash if reached
RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */), RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// It's critical that this fit in 1 byte. If it takes 2B, Expr::Call is too big.
/// That's why we have all the variants in here, instead of having separate
/// UnaryOp and Binary
pub enum CalledVia2 {
/// Calling with space, e.g. (foo bar)
Space,
/// (-), e.g. (-x)
Negate,
/// (!), e.g. (!x)
Not,
// highest precedence binary op
Caret,
Star,
Slash,
DoubleSlash,
Percent,
DoublePercent,
Plus,
Minus,
Equals,
NotEquals,
LessThan,
GreaterThan,
LessThanOrEq,
GreaterThanOrEq,
And,
Or,
Pizza, // lowest precedence binary op
}
#[derive(Debug)] #[derive(Debug)]
pub struct Def { pub struct Def {
pub pattern: NodeId<Pat2>, // 3B pub pattern: NodeId<Pat2>, // 3B
pub expr: NodeId<Expr2>, // 3B pub expr: NodeId<Expr2>, // 3B
// TODO maybe need to combine these vars behind a pointer? // TODO maybe need to combine these vars behind a pointer?
pub expr_var: Variable, // 4B pub expr_var: Variable, // 4B
pub pattern_vars: BucketList<(Symbol, Variable)>, // 4B pub pattern_vars: PoolVec<(Symbol, Variable)>, // 4B
// TODO how big is an annotation? What about an Option<Annotation>? // TODO how big is an annotation? What about an Option<Annotation>?
pub annotation: Option<Annotation>, // ??? pub annotation: Option<Annotation>, // ???
} }
@ -237,155 +206,52 @@ pub enum Pat2 {
Todo, Todo,
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] /// This is overflow data from a Closure variant, which needs to store
pub struct UpdateVars { /// more than 32B of total data
record_var: Variable, // 4B #[derive(Debug)]
ext_var: Variable, // 4B pub struct ClosureExtra {
} return_type: Variable, // 4B
captured_symbols: PoolVec<(Symbol, Variable)>, // 8B
#[derive(Debug, Copy, Clone, PartialEq, Eq)] closure_type: Variable, // 4B
pub struct RecordVars { closure_ext_var: Variable, // 4B
record_var: Variable, // 4B
ext_var: Variable, // 4B
field_var: Variable, // 4B
}
/// This is 15B, so it fits in a Node slot.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct AccessVars {
record_var: Variable, // 4B
ext_var: Variable, // 4B
field_var: Variable, // 4B
}
/// This is 16B, so it fits in a Node slot.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ClosureVars {
function_type: Variable,
closure_type: Variable,
closure_ext_var: Variable,
return_type: Variable,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct WhenBranch { pub struct WhenBranch {
pub patterns: BucketList<Pat2>, // 4B pub patterns: PoolVec<Pat2>, // 4B
pub body: NodeId<Expr2>, // 3B pub body: NodeId<Expr2>, // 3B
pub guard: Option<NodeId<Expr2>>, // 4B pub guard: Option<NodeId<Expr2>>, // 4B
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct PatternId { pub struct PatternId {
/// TODO: PatternBucketId /// TODO: PatternPoolId
bucket_id: ExprBucketId, page_id: ExprPoolId,
/// TODO: PatternBucketSlot /// TODO: PatternPoolSlot
slot: ExprBucketSlot, slot: ExprPoolSlot,
}
// Each bucket has metadata and slots.
// The metadata determines things like which slots are free.
#[derive(Debug)]
pub struct ExprBucket {
// We can store this as a u8 because whenever we create a bucket, we
// always fill at least one slot. So there will never be 256 unused slots
// remaining; the most there will ever be will be 255.
//
// Note that there can be "holes" in this as we remove nodes; those
// are recorded in the containing struct, not here.
//
// Also note that we can derive from this the next unused slot.
unused_slots_remaining: u8,
slots: Box<ExprBucketSlots>,
}
pub struct Exprs {
// Whenever we free a slot of a particular size, we make a note of it
// here, so we can reuse it later. This can lead to poor data locality
// over time, but the alternative is memory fragmentation and ever-growing
// memory usage. We could in theory go up to free_128node_slots, but in
// practice it seems unlikely that it would be worth the bookkeeping
// effort to go that high.
//
// TODO: this could be refactored Into `free_slots: [5; Vec<ExprId>]`
// where (2 ^ index) is the size node in that slot. It's less
// self-documenting but might allow for better code reuse.
pub free_1node_slots: Vec<ExprId>,
pub free_2node_slots: Vec<ExprId>,
pub free_4node_slots: Vec<ExprId>,
pub free_8node_slots: Vec<ExprId>,
pub free_16node_slots: Vec<ExprId>,
// Note that empty_buckets is equivalent to free_256node_slots - it means
// the entire bucket is empty, at which point we can fill it with
// whatever we please.
pub empty_buckets: Vec<ExprBucketId>,
pub buckets: Vec<ExprBucket>,
}
// Each bucket has 256 slots. Each slot holds one 16B node
// This means each bucket is 4096B, which is the size of a memory page
// on typical systems where the compiler will be run.
//
// Because each bucket has 256 slots, and arrays of nodes must fit inside
// a single bucket, this implies that nodes which contain arrays of nodes
// (e.g. If, When, Record, Tag, Call, Closure) can only contain at most
// 255 nodes. So functions can have at most 255 arguments, records can have
// at most 255 fields, etc.
//
// Nice things about this system include:
// * Allocating a new bucket is as simple as asking the OS for a memory page.
// * Since each node is 16B, each node's memory address will be a multiple of 16.
// * Thanks to the free lists and our consistent chunk sizes, we should
// end up with very little fragmentation.
// * Finding a slot for a given node should be very fast: see if the relevant
// free list has any openings; if not, try the next size up.
//
// Less nice things include:
// * This system makes it very hard to ever give a page back to the OS.
// We could try doing the Mesh Allocator strategy: whenever we allocate
// something, assign it to a random slot in the bucket, and then periodically
// try to merge two pages into one (by locking and remapping them in the OS)
// and then returning the redundant physical page back to the OS. This should
// work in theory, but is pretty complicated, and we'd need to schedule it.
// Keep in mind that we can't use the Mesh Allocator itself because it returns
// usize pointers, which would be too big for us to have 16B nodes.
// On the plus side, we could be okay with higher memory usage early on,
// and then later use the Mesh strategy to reduce long-running memory usage.
type ExprBucketSlots = [Expr2; 256];
#[test]
fn size_of_expr_bucket() {
assert_eq!(
std::mem::size_of::<ExprBucketSlots>(),
crate::bucket::BUCKET_BYTES
);
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct PatId { pub struct PatId {
bucket_id: ExprBucketId, // TODO PatBucketId page_id: ExprPoolId, // TODO PatPoolId
slot: ExprBucketSlot, // TODO PatBucketSlot slot: ExprPoolSlot, // TODO PatPoolSlot
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ExprId { pub struct ExprId {
bucket_id: ExprBucketId, page_id: ExprPoolId,
slot: ExprBucketSlot, slot: ExprPoolSlot,
} }
// We have a maximum of 65K buckets. // We have a maximum of 65K pages.
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ExprBucketId(u16); pub struct ExprPoolId(u16);
/// Each of these is the index of one 16B node inside a bucket's 4096B /// Each of these is the index of one 16B node inside a page's 4096B
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ExprBucketSlot(u8); pub struct ExprPoolSlot(u8);
#[test] #[test]
fn size_of_expr() { fn size_of_expr() {
assert_eq!(std::mem::size_of::<Expr2>(), 16); assert_eq!(std::mem::size_of::<Expr2>(), crate::pool::NODE_BYTES);
}
#[test]
fn size_of_called_via() {
assert_eq!(std::mem::size_of::<CalledVia2>(), 1);
} }

View file

@ -1,457 +0,0 @@
/// A bucket of 16-byte nodes. The node value 0 is reserved for the bucket's
/// use, and valid nodes may never have that value.
///
/// By design, each bucket is 4096 bytes large. When you make a bucket, it
/// uses mmap to reserve one anonymous memory page in which to store nodes.
/// Since nodes are 16 bytes, one bucket can store 256 nodes; you can access
/// a particular node by its BucketSlot, which is an opaque wrapper around a u8.
///
/// Buckets also use the node value 0 (all 0 bits) to mark slots as unoccupied.
/// This is important for performance.
use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
use std::marker::PhantomData;
use std::mem::size_of;
use std::ptr::null;
pub const BUCKET_BYTES: usize = 4096;
#[derive(Debug)]
pub struct NodeId<T: Sized> {
pub bucket_id: BucketId<T>,
pub slot: BucketSlot<T>,
}
#[test]
fn size_of_node_id() {
assert_eq!(std::mem::size_of::<NodeId<()>>(), 4);
}
impl<T> Clone for NodeId<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for NodeId<T> {}
impl<T: Sized> NodeId<T> {
fn next_slot(&self) -> Self {
NodeId {
bucket_id: self.bucket_id,
slot: self.slot.increment(),
}
}
}
impl<T> PartialEq for NodeId<T> {
fn eq(&self, other: &Self) -> bool {
self.bucket_id == other.bucket_id && self.slot == other.slot
}
}
impl<T> Eq for NodeId<T> {}
#[derive(Debug)]
#[repr(transparent)]
pub struct BucketId<T: Sized> {
value: u16,
_phantom: PhantomData<T>,
}
#[test]
fn size_of_bucket_id() {
assert_eq!(std::mem::size_of::<BucketId<()>>(), 2);
}
impl<T> Clone for BucketId<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for BucketId<T> {}
impl<T> PartialEq for BucketId<T> {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl<T> Eq for BucketId<T> {}
impl<T: Sized> BucketId<T> {
fn from_u16(value: u16) -> Self {
BucketId {
value,
_phantom: PhantomData::default(),
}
}
}
#[derive(Debug)]
#[repr(transparent)]
pub struct BucketSlot<T: Sized> {
value: u8,
_phantom: PhantomData<T>,
}
#[test]
fn size_of_bucket_slot() {
assert_eq!(std::mem::size_of::<BucketSlot<()>>(), 1);
}
impl<T> Clone for BucketSlot<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for BucketSlot<T> {}
impl<T> PartialEq for BucketSlot<T> {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl<T> Eq for BucketSlot<T> {}
impl<T: Sized> BucketSlot<T> {
#[allow(dead_code)]
fn from_u8(value: u8) -> Self {
BucketSlot {
value,
_phantom: PhantomData::default(),
}
}
fn increment(&self) -> Self {
BucketSlot {
value: self.value + 1,
_phantom: PhantomData::default(),
}
}
}
pub struct Buckets {
buckets: Vec<Bucket>,
// free_1node_slots: Vec<NodeId<T>>,
}
impl Buckets {
// fn find_space_for(&mut self, nodes: u8) -> Result<BucketId<T>, ()> {}
pub fn add<T: Sized>(&mut self) -> Result<BucketId<T>, ()> {
let num_buckets = self.buckets.len();
if num_buckets <= u16::MAX as usize {
let bucket_id = BucketId::from_u16(num_buckets as u16);
let bucket = Bucket::default();
self.buckets.push(bucket);
Ok(bucket_id)
} else {
Err(())
}
}
fn get_unchecked<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> &'a T {
unsafe {
self.buckets
.get(node_id.bucket_id.value as usize)
.unwrap()
.get_unchecked(node_id.slot.value)
}
}
pub fn get<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
self.buckets
.get(node_id.bucket_id.value as usize)
.and_then(|bucket| bucket.get(node_id.slot))
}
}
struct Bucket {
#[allow(dead_code)]
next_unused_slot: u16,
first_slot: *mut [u8; 16],
}
impl Bucket {
/// If there's room left in the bucket, adds the item and returns
/// the slot where it was put. If there was no room left, returns Err(()).
#[allow(dead_code)]
pub fn add<T: Sized>(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
// It's only safe to store this as a *const T if T is 16 bytes.
// This is designed to be used exclusively with 16-byte nodes!
debug_assert_eq!(size_of::<T>(), 16);
// Once next_unused_slot exceeds u8::MAX, we have no room left.
if self.next_unused_slot <= u8::MAX as u16 {
let chosen_slot = self.next_unused_slot as u8;
unsafe { self.put_unchecked(node, chosen_slot) };
self.next_unused_slot += 1;
Ok(BucketSlot::from_u8(chosen_slot))
} else {
// No room left!
Err(())
}
}
/// If the given slot is available, inserts the given node into it.
/// Otherwise, returns the node that was in the already-occupied slot.
#[allow(dead_code)]
pub fn insert<T: Sized>(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
// It's only safe to use this if T is 16 bytes.
// This is designed to be used exclusively with 16-byte nodes!
debug_assert_eq!(size_of::<T>(), 16);
let slot = slot.value;
unsafe {
if self.is_available(slot) {
self.put_unchecked(node, slot);
Ok(())
} else {
Err(self.get_unchecked(slot))
}
}
}
pub fn get<'a, T: Sized>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
// It's only safe to store this as a *const T if T is 16 bytes.
// This is designed to be used exclusively with 16-byte nodes!
debug_assert_eq!(size_of::<T>(), 16);
unsafe {
let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
let value: &[u8; 16] = &*(slot_ptr as *const [u8; 16]);
if *value != [0; 16] {
Some(&*(value as *const [u8; 16] as *const T))
} else {
None
}
}
}
unsafe fn put_unchecked<T: Sized>(&mut self, node: T, slot: u8) {
// It's only safe to store this as a *const T if T is 16 bytes.
// This is designed to be used exclusively with 16-byte nodes!
debug_assert_eq!(size_of::<T>(), 16);
let slot_ptr = self.first_slot.offset(slot as isize) as *mut T;
*slot_ptr = node;
}
unsafe fn get_unchecked<T>(&self, slot: u8) -> &T {
&*(self.first_slot.offset(slot as isize) as *const T)
}
// A slot is available iff its bytes are all zeroes
unsafe fn is_available(&self, slot: u8) -> bool {
let slot_ptr = self.first_slot.offset(slot as isize) as *const [u8; 16];
*slot_ptr == [0; 16]
}
}
impl Default for Bucket {
fn default() -> Self {
let first_slot = if page_size::get() == 4096 {
unsafe {
// mmap exactly one memory page (4096 bytes)
mmap(
null::<c_void>() as *mut c_void,
BUCKET_BYTES,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
0,
0,
)
}
} else {
// Somehow the page size is not 4096 bytes, so fall back on calloc.
// (We use calloc over malloc because we rely on the bytes having
// been zeroed to tell which slots are available.)
unsafe { calloc(1, BUCKET_BYTES) }
} as *mut [u8; 16];
Bucket {
next_unused_slot: 0,
first_slot,
}
}
}
impl Drop for Bucket {
fn drop(&mut self) {
if page_size::get() == 4096 {
unsafe {
munmap(self.first_slot as *mut c_void, BUCKET_BYTES);
}
} else {
unsafe {
free(self.first_slot as *mut c_void);
}
}
}
}
#[derive(Debug)]
pub struct BucketStr {
first_node_id: NodeId<()>,
first_segment_len: u8,
}
#[test]
fn size_of_bucket_str() {
assert_eq!(std::mem::size_of::<BucketList<()>>(), 4);
}
/// A non-empty list inside a bucket. It takes 4B of memory.
///
/// This is internally represented as an array of at most 255 nodes, which
/// can grow to 256+ nodes by having the last nodeent be a linked list Cons
/// cell which points to another such backing array which has more nodes.
///
/// In practice, these will almost be far below 256 nodes, but in theory
/// they can be enormous in length thanks to the linked list fallback.
///
/// Since these are non-empty lists, we need separate variants for collections
/// that can be empty, e.g. EmptyRecord and EmptyList. In contrast, we don't
/// need an EmptyList or EmptyWhen, since although those use BucketList
/// to store their branches, having zero branches is syntactically invalid.
/// Same with Call and Closure, since all functions must have 1+ arguments.
#[derive(Debug)]
pub struct BucketList<T: Sized> {
first_node_id: BucketId<T>,
first_node_sl: BucketSlot<T>,
first_segment_len: u8,
}
#[test]
fn size_of_bucket_list() {
assert_eq!(std::mem::size_of::<BucketList<()>>(), 4);
}
impl<'a, T: 'a + Sized> BucketList<T> {
/// If given a first_segment_len of 0, that means this is a BucketList
/// consisting of 256+ nodes. The first 255 are stored in the usual
/// array, and then there's one more nodeent at the end which continues
/// the list with a new length and NodeId value. BucketList iterators
/// automatically do these jumps behind the scenes when necessary.
pub fn new(first_node_id: NodeId<T>, first_segment_len: u8) -> Self {
BucketList {
first_segment_len,
first_node_id: first_node_id.bucket_id,
first_node_sl: first_node_id.slot,
}
}
pub fn into_iter(self, buckets: &'a Buckets) -> impl Iterator<Item = &'a T> {
self.bucket_list_iter(buckets)
}
/// Private version of into_iter which exposes the implementation detail
/// of BucketListIter. We don't want that struct to be public, but we
/// actually do want to have this separate function for code reuse
/// in the iterator's next() method.
fn bucket_list_iter(&self, buckets: &'a Buckets) -> BucketListIter<'a, T> {
let first_segment_len = self.first_segment_len;
let continues_with_cons = first_segment_len == 0;
let len_remaining = if continues_with_cons {
// We have 255 nodes followed by a Cons cell continuing the list.
u8::MAX
} else {
first_segment_len
};
BucketListIter {
continues_with_cons,
len_remaining,
bucket_id: self.first_node_id,
slot: self.first_node_sl,
buckets,
}
}
}
struct BucketListIter<'a, T: Sized> {
bucket_id: BucketId<T>,
slot: BucketSlot<T>,
len_remaining: u8,
continues_with_cons: bool,
buckets: &'a Buckets,
}
impl<'a, T: Sized> Iterator for BucketListIter<'a, T>
where
T: 'a,
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
match self.len_remaining {
0 => match self.continues_with_cons {
// We're done! This is by far the most common case, so we put
// it first to avoid branch mispredictions.
false => None,
// We need to continue with a Cons cell.
true => {
let node_id = NodeId {
bucket_id: self.bucket_id,
slot: self.slot,
}
.next_slot();
// Since we have continues_with_cons set, the next slot
// will definitely be occupied with a BucketList struct.
let node = self.buckets.get_unchecked(node_id);
let next_list = unsafe { &*(node as *const T as *const BucketList<T>) };
// Replace the current iterator with an iterator into that
// list, and then continue with next() on that iterator.
let next_iter = next_list.bucket_list_iter(self.buckets);
self.bucket_id = next_iter.bucket_id;
self.slot = next_iter.slot;
self.len_remaining = next_iter.len_remaining;
self.continues_with_cons = next_iter.continues_with_cons;
self.next()
}
},
1 => {
self.len_remaining = 0;
// Don't advance the node pointer's slot, because that might
// advance past the end of the bucket!
Some(self.buckets.get_unchecked(NodeId {
bucket_id: self.bucket_id,
slot: self.slot,
}))
}
len_remaining => {
// Get the current node
let node_id = NodeId {
bucket_id: self.bucket_id,
slot: self.slot,
};
let node = self.buckets.get_unchecked(node_id);
// Advance the node pointer to the next slot in the current bucket
self.slot = self.slot.increment();
self.len_remaining = len_remaining - 1;
Some(node)
}
}
}
}

View file

@ -27,10 +27,10 @@ use winit::event::{Event, ModifiersState};
use winit::event_loop::ControlFlow; use winit::event_loop::ControlFlow;
pub mod ast; pub mod ast;
pub mod bucket;
mod buffer; mod buffer;
pub mod file; pub mod file;
mod keyboard_input; mod keyboard_input;
pub mod pool;
mod rect; mod rect;
pub mod text; pub mod text;
mod util; mod util;

313
editor/src/pool.rs Normal file
View file

@ -0,0 +1,313 @@
/// A pool of 32-byte nodes. The node value 0 is reserved for the pool's
/// use, and valid nodes may never have that value.
///
/// Internally, the pool is divided into pages of 4096 bytes. It stores nodes
/// into one page at a time, and when it runs out, it uses mmap to reserve an
/// anonymous memory page in which to store nodes.
///
/// Since nodes are 32 bytes, one page can store 128 nodes; you can access a
/// particular node by its NodeId, which is an opaque wrapper around a pointer.
///
/// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied.
/// This is important for performance.
use libc::{c_void, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
use std::cmp::Ordering;
use std::marker::PhantomData;
use std::mem::size_of;
use std::ptr::null;
pub const NODE_BYTES: usize = 32;
// Each page has 128 slots. Each slot holds one 32B node
// This means each page is 4096B, which is the size of a memory page
// on typical systems where the compiler will be run.
//
// Nice things about this system include:
// * Allocating a new page is as simple as asking the OS for a memory page.
// * Since each node is 32B, each node's memory address will be a multiple of 16.
// * Thanks to the free lists and our consistent chunk sizes, we should
// end up with very little fragmentation.
// * Finding a slot for a given node should be very fast: see if the relevant
// free list has any openings; if not, try the next size up.
//
// Less nice things include:
// * This system makes it very hard to ever give a page back to the OS.
// We could try doing the Mesh Allocator strategy: whenever we allocate
// something, assign it to a random slot in the page, and then periodically
// try to merge two pages into one (by locking and remapping them in the OS)
// and then returning the redundant physical page back to the OS. This should
// work in theory, but is pretty complicated, and we'd need to schedule it.
// Keep in mind that we can't use the Mesh Allocator itself because it returns
// usize pointers, which would be too big for us to have 16B nodes.
// On the plus side, we could be okay with higher memory usage early on,
// and then later use the Mesh strategy to reduce long-running memory usage.
#[derive(Debug, PartialEq, Eq)]
pub struct NodeId<T> {
index: u32,
_phantom: PhantomData<T>,
}
impl<T> Clone for NodeId<T> {
fn clone(&self) -> Self {
NodeId {
index: self.index,
_phantom: PhantomData::default(),
}
}
}
impl<T> Copy for NodeId<T> {}
pub struct Pool {
nodes: *mut [u8; NODE_BYTES],
num_nodes: u32,
capacity: u32,
// free_1node_slots: Vec<NodeId<T>>,
}
impl Pool {
pub fn with_capacity(nodes: u32) -> Self {
// round up number of nodes requested to nearest page size in bytes
let bytes_per_page = page_size::get();
let node_bytes = NODE_BYTES * nodes as usize;
let leftover = node_bytes % bytes_per_page;
let bytes_to_mmap = if leftover == 0 {
node_bytes
} else {
node_bytes + bytes_per_page - leftover
};
let nodes = unsafe {
// mmap anonymous memory pages - that is, contiguous virtual memory
// addresses from the OS which will be lazily translated into
// physical memory one 4096-byte page at a time, once we actually
// try to read or write in that page's address range.
libc::mmap(
null::<c_void>() as *mut c_void,
bytes_to_mmap,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
0,
0,
)
} as *mut [u8; NODE_BYTES];
// This is our actual capacity, in nodes.
// It might be higher than the requested capacity due to rounding up
// to nearest page size.
let capacity = (bytes_to_mmap / NODE_BYTES) as u32;
Pool {
nodes,
num_nodes: 0,
capacity,
}
}
pub fn add<T>(&mut self, node: T) -> NodeId<T> {
// It's only safe to store this if T is the same size as S.
debug_assert_eq!(size_of::<T>(), NODE_BYTES);
let node_id = self.reserve(1);
let node_ptr = unsafe { self.nodes.offset(node_id.index as isize) } as *mut T;
unsafe { *node_ptr = node };
node_id
}
/// Reserves the given number of contiguous node slots, and returns
/// the NodeId of the first one. We only allow reserving 2^32 in a row.
fn reserve<T>(&mut self, nodes: u32) -> NodeId<T> {
// TODO once we have a free list, look in there for an open slot first!
let index = self.num_nodes;
if index < self.capacity {
self.num_nodes = index + nodes;
NodeId {
index,
_phantom: PhantomData::default(),
}
} else {
todo!("pool ran out of capacity. TODO reallocate the nodes pointer to map to a bigger space. Can use mremap on Linux, but must memcpy lots of bytes on macOS and Windows.");
}
}
pub fn get<'a, T>(&'a self, node_id: NodeId<T>) -> &'a T {
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *mut T;
&*node_ptr
}
}
// A node is available iff its bytes are all zeroes
#[allow(dead_code)]
fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
debug_assert_eq!(size_of::<T>(), NODE_BYTES);
unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];
*node_ptr == [0; NODE_BYTES]
}
}
}
impl Drop for Pool {
fn drop(&mut self) {
unsafe {
libc::munmap(
self.nodes as *mut c_void,
NODE_BYTES * self.capacity as usize,
);
}
}
}
/// A string containing at most 2^32 pool-allocated bytes.
#[derive(Debug)]
pub struct PoolStr {
first_node_id: NodeId<()>,
len: u32,
}
#[test]
fn pool_str_size() {
assert_eq!(size_of::<PoolStr>(), 8);
}
/// An array of at most 2^32 pool-allocated nodes.
#[derive(Debug)]
pub struct PoolVec<T> {
first_node_id: NodeId<T>,
len: u32,
}
#[test]
fn pool_vec_size() {
assert_eq!(size_of::<PoolVec<()>>(), 8);
}
impl<'a, T: 'a + Sized> PoolVec<T> {
pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool) -> Self {
debug_assert!(nodes.len() <= u32::MAX as usize);
debug_assert!(size_of::<T>() <= NODE_BYTES);
let len = nodes.len() as u32;
if len > 0 {
let first_node_id = pool.reserve(len);
let index = first_node_id.index as isize;
let mut next_node_ptr = unsafe { pool.nodes.offset(index) } as *mut T;
for node in nodes {
unsafe {
*next_node_ptr = node;
next_node_ptr = next_node_ptr.offset(1);
}
}
PoolVec { first_node_id, len }
} else {
PoolVec {
first_node_id: NodeId {
index: 0,
_phantom: PhantomData::default(),
},
len: 0,
}
}
}
pub fn iter<S>(self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
self.pool_list_iter(pool)
}
/// Private version of into_iter which exposes the implementation detail
/// of PoolVecIter. We don't want that struct to be public, but we
/// actually do want to have this separate function for code reuse
/// in the iterator's next() method.
#[inline(always)]
fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
PoolVecIter {
pool,
current_node_id: self.first_node_id,
len_remaining: self.len,
}
}
pub fn free<S>(self, pool: &'a mut Pool) {
// zero out the memory
unsafe {
let index = self.first_node_id.index as isize;
let node_ptr = pool.nodes.offset(index) as *mut c_void;
let bytes = self.len as usize * NODE_BYTES;
libc::memset(node_ptr, 0, bytes);
}
// TODO insert it into the pool's free list
}
}
struct PoolVecIter<'a, T> {
pool: &'a Pool,
current_node_id: NodeId<T>,
len_remaining: u32,
}
impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
where
T: 'a,
{
fn len(&self) -> usize {
self.len_remaining as usize
}
}
impl<'a, T> Iterator for PoolVecIter<'a, T>
where
T: 'a,
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
let len_remaining = self.len_remaining;
match len_remaining.cmp(&1) {
Ordering::Greater => {
// Get the current node
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
// Advance the node pointer to the next node in the current page
self.current_node_id = NodeId {
index: index + 1,
_phantom: PhantomData::default(),
};
self.len_remaining = len_remaining - 1;
Some(unsafe { &*node_ptr })
}
Ordering::Equal => {
self.len_remaining = 0;
// Don't advance the node pointer's node, because that might
// advance past the end of the page!
let index = self.current_node_id.index;
let node_ptr = unsafe { self.pool.nodes.offset(index as isize) } as *const T;
Some(unsafe { &*node_ptr })
}
Ordering::Less => {
// len_remaining was 0
None
}
}
}
}