From c1356f0b68bd3d850f4d971837bb06277988f0b4 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 8 Dec 2020 22:14:59 -0500 Subject: [PATCH] Fix Pool implementation --- editor/src/ast.rs | 241 ++++++++++++++++++++------------------------- editor/src/pool.rs | 49 ++++----- 2 files changed, 133 insertions(+), 157 deletions(-) diff --git a/editor/src/ast.rs b/editor/src/ast.rs index da28a70b59..9a70d2a7f0 100644 --- a/editor/src/ast.rs +++ b/editor/src/ast.rs @@ -28,33 +28,45 @@ pub enum IntStyle { /// It has a 1B discriminant and variants which hold payloads of at most 31B. #[derive(Debug)] pub enum Expr2 { - /// A number literal (without a dot) containing no underscores - Num { + /// A negative number literal without a dot + I64 { number: i64, // 8B var: Variable, // 4B style: IntStyle, // 1B + text: PoolStr, // 8B }, - /// A floating-point literal (with a dot) containing no underscores + /// A nonnegative number literal without a dot + U64 { + number: u64, // 8B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + /// A large (over 64-bit) negative number literal without a dot. + /// This only comes up for literals that won't fit in 64-bit integers. + I128 { + number: i128, // 16B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + /// A large (over 64-bit) nonnegative number literal without a dot + /// This only comes up for literals that won't fit in 64-bit integers. + U128 { + number: u128, // 16B + var: Variable, // 4B + style: IntStyle, // 1B + text: PoolStr, // 8B + }, + /// A floating-point literal (with a dot) Float { number: f64, // 8B var: Variable, // 4B }, - /// A number literal (without a dot) containing underscores - NumWithUnderscores { - number: i64, // 8B - var: Variable, // 4B - text: NodeId, // 8B - }, - /// A float literal (with a dot) containing underscores - FloatWithUnderscores { - number: f64, // 8B - var: Variable, // 4B - text: NodeId, // 8B - }, /// string literals of length up to 30B SmallStr(ArrayString), // 31B /// string literals of length 31B or more - Str(NodeId), // 8B + Str(PoolStr), // 8B // Lookups Var(Symbol), // 8B @@ -67,132 +79,101 @@ pub enum Expr2 { List { list_var: Variable, // 4B - required for uniqueness of the list elem_var: Variable, // 4B - first_elem: PoolVec, // 16B + first_elem: PoolVec, // 8B }, If { cond_var: Variable, // 4B expr_var: Variable, // 4B - branches: PoolVec<(Expr2, Expr2)>, // 16B - final_else: NodeId, // 8B + branches: PoolVec<(Expr2, Expr2)>, // 8B + final_else: NodeId, // 4B }, When { cond_var: Variable, // 4B expr_var: Variable, // 4B - branches: PoolVec, // 9B - cond: NodeId, // 8B + branches: PoolVec, // 8B + cond: NodeId, // 4B }, LetRec { // TODO need to make this Alias type here page-friendly, which will be hard! - aliases: PoolVec<(Symbol, Alias)>, // 9B - defs: PoolVec, // 9B - body_var: Variable, // 4B - body_id: NodeId, // 8B + aliases: PoolVec<(Symbol, Alias)>, // 8B + defs: PoolVec, // 8B + body_var: Variable, // 8B + body_id: NodeId, // 4B + }, + LetNonRec { + // TODO need to make this Alias type here page-friendly, which will be hard! + aliases: PoolVec<(Symbol, Alias)>, // 8B + def_id: NodeId, // 4B + body_id: NodeId, // 4B + body_var: Variable, // 4B + }, + Call { + /// NOTE: the first elem in this list is the expression and its variable. + /// The others are arguments. This is because we didn't have room for + /// both the expr and its variable otherwise. + expr_and_args: PoolVec<(Variable, NodeId)>, // 8B + fn_var: Variable, // 4B + closure_var: Variable, // 4B + /// Cached outside expr_and_args so we don't have to potentially + /// traverse that whole linked list chain to count all the args. + arity: usize, // 8B - could make this smaller if need be + called_via: CalledVia, // 2B + }, + RunLowLevel { + op: LowLevel, // 1B + args: PoolVec<(Variable, NodeId)>, // 8B + ret_var: Variable, // 4B + }, + Closure { + args: PoolVec<(Variable, NodeId)>, // 8B + name: Symbol, // 8B + body: NodeId, // 4B + function_type: Variable, // 4B + recursive: Recursive, // 1B + extra: NodeId, // 4B }, - // LetNonRec { - // // TODO need to make this Alias type here page-friendly, which will be hard! - // aliases: PoolVec<(Symbol, Alias)>, // 9B - // def_id: NodeId, // 8B - // body_id: NodeId, // 8B - // body_var: Variable, // 4B - // }, - // Call { - // /// NOTE: the first elem in this list is the expression and its variable. - // /// The others are arguments. This is because we didn't have room for - // /// both the expr and its variable otherwise. - // expr_and_args: PoolVec<(Variable, NodeId)>, // 9B - // fn_var: Variable, // 4B - // closure_var: Variable, // 4B - // /// Cached outside expr_and_args so we don't have to potentially - // /// traverse that whole linked list chain to count all the args. - // arity: usize, // 8B - could make this smaller if need be - // called_via: CalledVia, // 2B - // }, - // RunLowLevel { - // op: LowLevel, // 1B - // args: PoolVec<(Variable, NodeId)>, // 9B - // ret_var: Variable, // 4B - // }, - // Closure { - // captured_symbols: PoolVec<(Symbol, Variable)>, // 9B - // args: PoolVec<(Variable, NodeId)>, // 9B - // recursive: Recursive, // 1B - // extra: NodeId, // 8B - // }, // Product Types - // Record { - // record_var: Variable, // 4B - // fields: PoolVec<(PoolStr, Variable, NodeId)>, // 9B - // }, + Record { + record_var: Variable, // 4B + fields: PoolVec<(PoolStr, Variable, NodeId)>, // 8B + }, /// Empty record constant - // EmptyRecord, - // /// Look up exactly one field on a record, e.g. (expr).foo. - // Access { - // field: NodeId, // 8B - // expr: NodeId, // 8B - // vars: NodeId, // 8B - // }, + EmptyRecord, + /// Look up exactly one field on a record, e.g. (expr).foo. + Access { + field: NodeId, // 4B + expr: NodeId, // 4B + vars: NodeId, // 4B + }, - // /// field accessor as a function, e.g. (.foo) expr - // Accessor { - // record_vars_id: NodeId, // 8B - // function_var: Variable, // 4B - // closure_var: Variable, // 4B - // field_id: NodeId, // 8B - // }, - // Update { - // symbol: Symbol, // 8B - // updates: PoolVec<(Lowercase, Field)>, // 9B - // vars_id: NodeId, // 8B - // }, + /// field accessor as a function, e.g. (.foo) expr + Accessor { + record_vars_id: NodeId, // 4B + function_var: Variable, // 4B + closure_var: Variable, // 4B + field_id: NodeId, // 4B + }, + Update { + symbol: Symbol, // 8B + updates: PoolVec<(Lowercase, Field)>, // 8B + vars_id: NodeId, // 4B + }, // Sum Types - // Tag { - // // NOTE: A PoolStr node is a 2B length and then 14B bytes, - // // plus more bytes in adjacent nodes if necessary. Thus we have - // // a hard cap of 4094 bytes as the maximum length of tags and fields. - // name_id: NodeId, // 8B - // variant_var: Variable, // 4B - // ext_var: Variable, // 4B - // arguments: PoolVec<(Variable, NodeId)>, // 9B - // }, + Tag { + // NOTE: A PoolStr node is a 2B length and then 14B bytes, + // plus more bytes in adjacent nodes if necessary. Thus we have + // a hard cap of 4094 bytes as the maximum length of tags and fields. + name_id: NodeId, // 4B + variant_var: Variable, // 4B + ext_var: Variable, // 4B + arguments: PoolVec<(Variable, NodeId)>, // 8B + }, // Compiles, but will crash if reached RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */), } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -/// It's critical that this fit in 1 byte. If it takes 2B, Expr::Call is too big. -/// That's why we have all the variants in here, instead of having separate -/// UnaryOp and Binary -pub enum CalledVia2 { - /// Calling with space, e.g. (foo bar) - Space, - - /// (-), e.g. (-x) - Negate, - /// (!), e.g. (!x) - Not, - - // highest precedence binary op - Caret, - Star, - Slash, - DoubleSlash, - Percent, - DoublePercent, - Plus, - Minus, - Equals, - NotEquals, - LessThan, - GreaterThan, - LessThanOrEq, - GreaterThanOrEq, - And, - Or, - Pizza, // lowest precedence binary op -} - #[derive(Debug)] pub struct Def { pub pattern: NodeId, // 3B @@ -230,15 +211,14 @@ pub struct AccessVars { field_var: Variable, // 4B } -/// This is 32B, so it fits in a Node slot. +/// This is overflow data from a Closure variant, which needs to store +/// more than 32B of total data #[derive(Debug)] pub struct ClosureExtra { - name: Symbol, // 8B - body: NodeId, // 8B - function_type: Variable, // 4B - closure_type: Variable, // 4B - closure_ext_var: Variable, // 4B - return_type: Variable, // 4B + return_type: Variable, // 4B + captured_symbols: PoolVec<(Symbol, Variable)>, // 8B + closure_type: Variable, // 4B + closure_ext_var: Variable, // 4B } #[derive(Debug)] @@ -278,10 +258,5 @@ pub struct ExprPoolSlot(u8); #[test] fn size_of_expr() { - assert_eq!(std::mem::size_of::(), 32); -} - -#[test] -fn size_of_called_via() { - assert_eq!(std::mem::size_of::(), 1); + assert_eq!(std::mem::size_of::(), crate::pool::NODE_BYTES); } diff --git a/editor/src/pool.rs b/editor/src/pool.rs index 1c7d13e3fb..91dfde3789 100644 --- a/editor/src/pool.rs +++ b/editor/src/pool.rs @@ -15,6 +15,8 @@ use std::marker::PhantomData; use std::mem::size_of; use std::ptr::null; +pub const NODE_BYTES: usize = 32; + // Each page has 128 slots. Each slot holds one 32B node // This means each page is 4096B, which is the size of a memory page // on typical systems where the compiler will be run. @@ -56,19 +58,18 @@ impl Clone for NodeId { impl Copy for NodeId {} -/// S is a slot size; e.g. Pool<[u8; 32]> for a pool of 32-bit slots -pub struct Pool { - nodes: *mut S, +pub struct Pool { + nodes: *mut [u8; NODE_BYTES], num_nodes: u32, capacity: u32, // free_1node_slots: Vec>, } -impl Pool { +impl Pool { pub fn with_capacity(&mut self, nodes: u32) -> Self { // round up number of nodes requested to nearest page size in bytes let bytes_per_page = page_size::get(); - let node_bytes = size_of::() * nodes as usize; + let node_bytes = NODE_BYTES * nodes as usize; let leftover = node_bytes % bytes_per_page; let bytes_to_mmap = if leftover == 0 { node_bytes @@ -89,12 +90,12 @@ impl Pool { 0, 0, ) - } as *mut S; + } as *mut [u8; NODE_BYTES]; // This is our actual capacity, in nodes. // It might be higher than the requested capacity due to rounding up // to nearest page size. - let capacity = (bytes_to_mmap / size_of::()) as u32; + let capacity = (bytes_to_mmap / NODE_BYTES) as u32; Pool { nodes, @@ -105,7 +106,7 @@ impl Pool { pub fn add(&mut self, node: T) -> NodeId { // It's only safe to store this if T is the same size as S. - debug_assert_eq!(size_of::(), size_of::()); + debug_assert_eq!(size_of::(), NODE_BYTES); let index = self.num_nodes; @@ -141,23 +142,23 @@ impl Pool { // A node is available iff its bytes are all zeroes #[allow(dead_code)] - unsafe fn is_available(&self, node_id: NodeId) -> bool { - debug_assert_eq!(size_of::(), size_of::()); + fn is_available(&self, node_id: NodeId) -> bool { + debug_assert_eq!(size_of::(), NODE_BYTES); unsafe { - let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; size_of::()]; + let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES]; - *node_ptr == [0; size_of::()] + *node_ptr == [0; NODE_BYTES] } } } -impl Drop for Pool { +impl Drop for Pool { fn drop(&mut self) { unsafe { libc::munmap( self.nodes as *mut c_void, - size_of::() * self.capacity as usize, + NODE_BYTES * self.capacity as usize, ); } } @@ -192,9 +193,9 @@ impl<'a, T: 'a + Sized> PoolVec { /// the usual array, and then there's one more node at the end which /// continues the list with a new length and NodeId value. PoolVec /// iterators automatically do these jumps behind the scenes when necessary. - pub fn new, S>(nodes: I, pool: &mut Pool) -> Self { + pub fn new, S>(nodes: I, pool: &mut Pool) -> Self { debug_assert!(nodes.len() <= u32::MAX as usize); - debug_assert!(size_of::() <= size_of::()); + debug_assert!(size_of::() <= NODE_BYTES); let len = nodes.len() as u32; @@ -223,7 +224,7 @@ impl<'a, T: 'a + Sized> PoolVec { } } - pub fn iter(self, pool: &'a Pool) -> impl ExactSizeIterator { + pub fn iter(self, pool: &'a Pool) -> impl ExactSizeIterator { self.pool_list_iter(pool) } @@ -232,7 +233,7 @@ impl<'a, T: 'a + Sized> PoolVec { /// actually do want to have this separate function for code reuse /// in the iterator's next() method. #[inline(always)] - fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, S, T> { + fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> { PoolVecIter { pool, current_node_id: self.first_node_id, @@ -240,12 +241,12 @@ impl<'a, T: 'a + Sized> PoolVec { } } - pub fn free(self, pool: &'a mut Pool) { + pub fn free(self, pool: &'a mut Pool) { // zero out the memory unsafe { let index = self.first_node_id.index as isize; let node_ptr = pool.nodes.offset(index) as *mut c_void; - let bytes = self.len as usize * size_of::(); + let bytes = self.len as usize * NODE_BYTES; libc::memset(node_ptr, 0, bytes); } @@ -254,13 +255,13 @@ impl<'a, T: 'a + Sized> PoolVec { } } -struct PoolVecIter<'a, S, T> { - pool: &'a Pool, +struct PoolVecIter<'a, T> { + pool: &'a Pool, current_node_id: NodeId, len_remaining: u32, } -impl<'a, S, T> ExactSizeIterator for PoolVecIter<'a, S, T> +impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T> where T: 'a, { @@ -269,7 +270,7 @@ where } } -impl<'a, S, T> Iterator for PoolVecIter<'a, S, T> +impl<'a, T> Iterator for PoolVecIter<'a, T> where T: 'a, {