Fix Pool implementation

This commit is contained in:
Richard Feldman 2020-12-08 22:14:59 -05:00
parent fd47d6ee71
commit c1356f0b68
2 changed files with 133 additions and 157 deletions

View file

@ -28,33 +28,45 @@ pub enum IntStyle {
/// It has a 1B discriminant and variants which hold payloads of at most 31B. /// It has a 1B discriminant and variants which hold payloads of at most 31B.
#[derive(Debug)] #[derive(Debug)]
pub enum Expr2 { pub enum Expr2 {
/// A number literal (without a dot) containing no underscores /// A negative number literal without a dot
Num { I64 {
number: i64, // 8B number: i64, // 8B
var: Variable, // 4B var: Variable, // 4B
style: IntStyle, // 1B style: IntStyle, // 1B
text: PoolStr, // 8B
}, },
/// A floating-point literal (with a dot) containing no underscores /// A nonnegative number literal without a dot
U64 {
number: u64, // 8B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A large (over 64-bit) negative number literal without a dot.
/// This only comes up for literals that won't fit in 64-bit integers.
I128 {
number: i128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A large (over 64-bit) nonnegative number literal without a dot
/// This only comes up for literals that won't fit in 64-bit integers.
U128 {
number: u128, // 16B
var: Variable, // 4B
style: IntStyle, // 1B
text: PoolStr, // 8B
},
/// A floating-point literal (with a dot)
Float { Float {
number: f64, // 8B number: f64, // 8B
var: Variable, // 4B var: Variable, // 4B
}, },
/// A number literal (without a dot) containing underscores
NumWithUnderscores {
number: i64, // 8B
var: Variable, // 4B
text: NodeId<PoolStr>, // 8B
},
/// A float literal (with a dot) containing underscores
FloatWithUnderscores {
number: f64, // 8B
var: Variable, // 4B
text: NodeId<PoolStr>, // 8B
},
/// string literals of length up to 30B /// string literals of length up to 30B
SmallStr(ArrayString<U30>), // 31B SmallStr(ArrayString<U30>), // 31B
/// string literals of length 31B or more /// string literals of length 31B or more
Str(NodeId<PoolStr>), // 8B Str(PoolStr), // 8B
// Lookups // Lookups
Var(Symbol), // 8B Var(Symbol), // 8B
@ -67,132 +79,101 @@ pub enum Expr2 {
List { List {
list_var: Variable, // 4B - required for uniqueness of the list list_var: Variable, // 4B - required for uniqueness of the list
elem_var: Variable, // 4B elem_var: Variable, // 4B
first_elem: PoolVec<Expr2>, // 16B first_elem: PoolVec<Expr2>, // 8B
}, },
If { If {
cond_var: Variable, // 4B cond_var: Variable, // 4B
expr_var: Variable, // 4B expr_var: Variable, // 4B
branches: PoolVec<(Expr2, Expr2)>, // 16B branches: PoolVec<(Expr2, Expr2)>, // 8B
final_else: NodeId<Expr2>, // 8B final_else: NodeId<Expr2>, // 4B
}, },
When { When {
cond_var: Variable, // 4B cond_var: Variable, // 4B
expr_var: Variable, // 4B expr_var: Variable, // 4B
branches: PoolVec<WhenBranch>, // 9B branches: PoolVec<WhenBranch>, // 8B
cond: NodeId<Expr2>, // 8B cond: NodeId<Expr2>, // 4B
}, },
LetRec { LetRec {
// TODO need to make this Alias type here page-friendly, which will be hard! // TODO need to make this Alias type here page-friendly, which will be hard!
aliases: PoolVec<(Symbol, Alias)>, // 9B aliases: PoolVec<(Symbol, Alias)>, // 8B
defs: PoolVec<Def>, // 9B defs: PoolVec<Def>, // 8B
body_var: Variable, // 4B body_var: Variable, // 8B
body_id: NodeId<Expr2>, // 8B body_id: NodeId<Expr2>, // 4B
},
LetNonRec {
// TODO need to make this Alias type here page-friendly, which will be hard!
aliases: PoolVec<(Symbol, Alias)>, // 8B
def_id: NodeId<Def>, // 4B
body_id: NodeId<Expr2>, // 4B
body_var: Variable, // 4B
},
Call {
/// NOTE: the first elem in this list is the expression and its variable.
/// The others are arguments. This is because we didn't have room for
/// both the expr and its variable otherwise.
expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
fn_var: Variable, // 4B
closure_var: Variable, // 4B
/// Cached outside expr_and_args so we don't have to potentially
/// traverse that whole linked list chain to count all the args.
arity: usize, // 8B - could make this smaller if need be
called_via: CalledVia, // 2B
},
RunLowLevel {
op: LowLevel, // 1B
args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
ret_var: Variable, // 4B
},
Closure {
args: PoolVec<(Variable, NodeId<Pat2>)>, // 8B
name: Symbol, // 8B
body: NodeId<Expr2>, // 4B
function_type: Variable, // 4B
recursive: Recursive, // 1B
extra: NodeId<ClosureExtra>, // 4B
}, },
// LetNonRec {
// // TODO need to make this Alias type here page-friendly, which will be hard!
// aliases: PoolVec<(Symbol, Alias)>, // 9B
// def_id: NodeId<Def>, // 8B
// body_id: NodeId<Expr2>, // 8B
// body_var: Variable, // 4B
// },
// Call {
// /// NOTE: the first elem in this list is the expression and its variable.
// /// The others are arguments. This is because we didn't have room for
// /// both the expr and its variable otherwise.
// expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
// fn_var: Variable, // 4B
// closure_var: Variable, // 4B
// /// Cached outside expr_and_args so we don't have to potentially
// /// traverse that whole linked list chain to count all the args.
// arity: usize, // 8B - could make this smaller if need be
// called_via: CalledVia, // 2B
// },
// RunLowLevel {
// op: LowLevel, // 1B
// args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
// ret_var: Variable, // 4B
// },
// Closure {
// captured_symbols: PoolVec<(Symbol, Variable)>, // 9B
// args: PoolVec<(Variable, NodeId<Pat2>)>, // 9B
// recursive: Recursive, // 1B
// extra: NodeId<ClosureExtra>, // 8B
// },
// Product Types // Product Types
// Record { Record {
// record_var: Variable, // 4B record_var: Variable, // 4B
// fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 9B fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 8B
// }, },
/// Empty record constant /// Empty record constant
// EmptyRecord, EmptyRecord,
// /// Look up exactly one field on a record, e.g. (expr).foo. /// Look up exactly one field on a record, e.g. (expr).foo.
// Access { Access {
// field: NodeId<PoolStr>, // 8B field: NodeId<PoolStr>, // 4B
// expr: NodeId<Expr2>, // 8B expr: NodeId<Expr2>, // 4B
// vars: NodeId<AccessVars>, // 8B vars: NodeId<AccessVars>, // 4B
// }, },
// /// field accessor as a function, e.g. (.foo) expr /// field accessor as a function, e.g. (.foo) expr
// Accessor { Accessor {
// record_vars_id: NodeId<RecordVars>, // 8B record_vars_id: NodeId<RecordVars>, // 4B
// function_var: Variable, // 4B function_var: Variable, // 4B
// closure_var: Variable, // 4B closure_var: Variable, // 4B
// field_id: NodeId<PoolStr>, // 8B field_id: NodeId<PoolStr>, // 4B
// }, },
// Update { Update {
// symbol: Symbol, // 8B symbol: Symbol, // 8B
// updates: PoolVec<(Lowercase, Field)>, // 9B updates: PoolVec<(Lowercase, Field)>, // 8B
// vars_id: NodeId<UpdateVars>, // 8B vars_id: NodeId<UpdateVars>, // 4B
// }, },
// Sum Types // Sum Types
// Tag { Tag {
// // NOTE: A PoolStr node is a 2B length and then 14B bytes, // NOTE: A PoolStr node is a 2B length and then 14B bytes,
// // plus more bytes in adjacent nodes if necessary. Thus we have // plus more bytes in adjacent nodes if necessary. Thus we have
// // a hard cap of 4094 bytes as the maximum length of tags and fields. // a hard cap of 4094 bytes as the maximum length of tags and fields.
// name_id: NodeId<PoolStr>, // 8B name_id: NodeId<PoolStr>, // 4B
// variant_var: Variable, // 4B variant_var: Variable, // 4B
// ext_var: Variable, // 4B ext_var: Variable, // 4B
// arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 9B arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
// }, },
// Compiles, but will crash if reached // Compiles, but will crash if reached
RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */), RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// It's critical that this fit in 1 byte. If it takes 2B, Expr::Call is too big.
/// That's why we have all the variants in here, instead of having separate
/// UnaryOp and Binary
pub enum CalledVia2 {
/// Calling with space, e.g. (foo bar)
Space,
/// (-), e.g. (-x)
Negate,
/// (!), e.g. (!x)
Not,
// highest precedence binary op
Caret,
Star,
Slash,
DoubleSlash,
Percent,
DoublePercent,
Plus,
Minus,
Equals,
NotEquals,
LessThan,
GreaterThan,
LessThanOrEq,
GreaterThanOrEq,
And,
Or,
Pizza, // lowest precedence binary op
}
#[derive(Debug)] #[derive(Debug)]
pub struct Def { pub struct Def {
pub pattern: NodeId<Pat2>, // 3B pub pattern: NodeId<Pat2>, // 3B
@ -230,15 +211,14 @@ pub struct AccessVars {
field_var: Variable, // 4B field_var: Variable, // 4B
} }
/// This is 32B, so it fits in a Node slot. /// This is overflow data from a Closure variant, which needs to store
/// more than 32B of total data
#[derive(Debug)] #[derive(Debug)]
pub struct ClosureExtra { pub struct ClosureExtra {
name: Symbol, // 8B return_type: Variable, // 4B
body: NodeId<Expr2>, // 8B captured_symbols: PoolVec<(Symbol, Variable)>, // 8B
function_type: Variable, // 4B closure_type: Variable, // 4B
closure_type: Variable, // 4B closure_ext_var: Variable, // 4B
closure_ext_var: Variable, // 4B
return_type: Variable, // 4B
} }
#[derive(Debug)] #[derive(Debug)]
@ -278,10 +258,5 @@ pub struct ExprPoolSlot(u8);
#[test] #[test]
fn size_of_expr() { fn size_of_expr() {
assert_eq!(std::mem::size_of::<Expr2>(), 32); assert_eq!(std::mem::size_of::<Expr2>(), crate::pool::NODE_BYTES);
}
#[test]
fn size_of_called_via() {
assert_eq!(std::mem::size_of::<CalledVia2>(), 1);
} }

View file

@ -15,6 +15,8 @@ use std::marker::PhantomData;
use std::mem::size_of; use std::mem::size_of;
use std::ptr::null; use std::ptr::null;
pub const NODE_BYTES: usize = 32;
// Each page has 128 slots. Each slot holds one 32B node // Each page has 128 slots. Each slot holds one 32B node
// This means each page is 4096B, which is the size of a memory page // This means each page is 4096B, which is the size of a memory page
// on typical systems where the compiler will be run. // on typical systems where the compiler will be run.
@ -56,19 +58,18 @@ impl<T> Clone for NodeId<T> {
impl<T> Copy for NodeId<T> {} impl<T> Copy for NodeId<T> {}
/// S is a slot size; e.g. Pool<[u8; 32]> for a pool of 32-bit slots pub struct Pool {
pub struct Pool<S> { nodes: *mut [u8; NODE_BYTES],
nodes: *mut S,
num_nodes: u32, num_nodes: u32,
capacity: u32, capacity: u32,
// free_1node_slots: Vec<NodeId<T>>, // free_1node_slots: Vec<NodeId<T>>,
} }
impl<S> Pool<S> { impl Pool {
pub fn with_capacity(&mut self, nodes: u32) -> Self { pub fn with_capacity(&mut self, nodes: u32) -> Self {
// round up number of nodes requested to nearest page size in bytes // round up number of nodes requested to nearest page size in bytes
let bytes_per_page = page_size::get(); let bytes_per_page = page_size::get();
let node_bytes = size_of::<S>() * nodes as usize; let node_bytes = NODE_BYTES * nodes as usize;
let leftover = node_bytes % bytes_per_page; let leftover = node_bytes % bytes_per_page;
let bytes_to_mmap = if leftover == 0 { let bytes_to_mmap = if leftover == 0 {
node_bytes node_bytes
@ -89,12 +90,12 @@ impl<S> Pool<S> {
0, 0,
0, 0,
) )
} as *mut S; } as *mut [u8; NODE_BYTES];
// This is our actual capacity, in nodes. // This is our actual capacity, in nodes.
// It might be higher than the requested capacity due to rounding up // It might be higher than the requested capacity due to rounding up
// to nearest page size. // to nearest page size.
let capacity = (bytes_to_mmap / size_of::<S>()) as u32; let capacity = (bytes_to_mmap / NODE_BYTES) as u32;
Pool { Pool {
nodes, nodes,
@ -105,7 +106,7 @@ impl<S> Pool<S> {
pub fn add<T>(&mut self, node: T) -> NodeId<T> { pub fn add<T>(&mut self, node: T) -> NodeId<T> {
// It's only safe to store this if T is the same size as S. // It's only safe to store this if T is the same size as S.
debug_assert_eq!(size_of::<T>(), size_of::<S>()); debug_assert_eq!(size_of::<T>(), NODE_BYTES);
let index = self.num_nodes; let index = self.num_nodes;
@ -141,23 +142,23 @@ impl<S> Pool<S> {
// A node is available iff its bytes are all zeroes // A node is available iff its bytes are all zeroes
#[allow(dead_code)] #[allow(dead_code)]
unsafe fn is_available<T>(&self, node_id: NodeId<T>) -> bool { fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
debug_assert_eq!(size_of::<T>(), size_of::<S>()); debug_assert_eq!(size_of::<T>(), NODE_BYTES);
unsafe { unsafe {
let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; size_of::<S>()]; let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];
*node_ptr == [0; size_of::<S>()] *node_ptr == [0; NODE_BYTES]
} }
} }
} }
impl<S> Drop for Pool<S> { impl Drop for Pool {
fn drop(&mut self) { fn drop(&mut self) {
unsafe { unsafe {
libc::munmap( libc::munmap(
self.nodes as *mut c_void, self.nodes as *mut c_void,
size_of::<S>() * self.capacity as usize, NODE_BYTES * self.capacity as usize,
); );
} }
} }
@ -192,9 +193,9 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
/// the usual array, and then there's one more node at the end which /// the usual array, and then there's one more node at the end which
/// continues the list with a new length and NodeId value. PoolVec /// continues the list with a new length and NodeId value. PoolVec
/// iterators automatically do these jumps behind the scenes when necessary. /// iterators automatically do these jumps behind the scenes when necessary.
pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool<S>) -> Self { pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool) -> Self {
debug_assert!(nodes.len() <= u32::MAX as usize); debug_assert!(nodes.len() <= u32::MAX as usize);
debug_assert!(size_of::<T>() <= size_of::<S>()); debug_assert!(size_of::<T>() <= NODE_BYTES);
let len = nodes.len() as u32; let len = nodes.len() as u32;
@ -223,7 +224,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
} }
} }
pub fn iter<S>(self, pool: &'a Pool<S>) -> impl ExactSizeIterator<Item = &'a T> { pub fn iter<S>(self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
self.pool_list_iter(pool) self.pool_list_iter(pool)
} }
@ -232,7 +233,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
/// actually do want to have this separate function for code reuse /// actually do want to have this separate function for code reuse
/// in the iterator's next() method. /// in the iterator's next() method.
#[inline(always)] #[inline(always)]
fn pool_list_iter<S>(&self, pool: &'a Pool<S>) -> PoolVecIter<'a, S, T> { fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
PoolVecIter { PoolVecIter {
pool, pool,
current_node_id: self.first_node_id, current_node_id: self.first_node_id,
@ -240,12 +241,12 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
} }
} }
pub fn free<S>(self, pool: &'a mut Pool<S>) { pub fn free<S>(self, pool: &'a mut Pool) {
// zero out the memory // zero out the memory
unsafe { unsafe {
let index = self.first_node_id.index as isize; let index = self.first_node_id.index as isize;
let node_ptr = pool.nodes.offset(index) as *mut c_void; let node_ptr = pool.nodes.offset(index) as *mut c_void;
let bytes = self.len as usize * size_of::<S>(); let bytes = self.len as usize * NODE_BYTES;
libc::memset(node_ptr, 0, bytes); libc::memset(node_ptr, 0, bytes);
} }
@ -254,13 +255,13 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
} }
} }
struct PoolVecIter<'a, S, T> { struct PoolVecIter<'a, T> {
pool: &'a Pool<S>, pool: &'a Pool,
current_node_id: NodeId<T>, current_node_id: NodeId<T>,
len_remaining: u32, len_remaining: u32,
} }
impl<'a, S, T> ExactSizeIterator for PoolVecIter<'a, S, T> impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
where where
T: 'a, T: 'a,
{ {
@ -269,7 +270,7 @@ where
} }
} }
impl<'a, S, T> Iterator for PoolVecIter<'a, S, T> impl<'a, T> Iterator for PoolVecIter<'a, T>
where where
T: 'a, T: 'a,
{ {