Fix Pool implementation

2025-10-02 08:11:12 +00:00 · 2020-12-08 22:14:59 -05:00 · 2020-12-08 22:14:59 -05:00 · c1356f0b68
commit c1356f0b68
parent fd47d6ee71
2 changed files with 133 additions and 157 deletions
--- a/editor/src/ast.rs
+++ b/editor/src/ast.rs
@ -28,33 +28,45 @@ pub enum IntStyle {
 /// It has a 1B discriminant and variants which hold payloads of at most 31B.
 #[derive(Debug)]
 pub enum Expr2 {
-    /// A number literal (without a dot) containing no underscores
-    Num {
+    /// A negative number literal without a dot
+    I64 {
        number: i64,     // 8B
        var: Variable,   // 4B
        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
    },
-    /// A floating-point literal (with a dot) containing no underscores
+    /// A nonnegative number literal without a dot
+    U64 {
+        number: u64,     // 8B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A large (over 64-bit) negative number literal without a dot.
+    /// This only comes up for literals that won't fit in 64-bit integers.
+    I128 {
+        number: i128,    // 16B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A large (over 64-bit) nonnegative number literal without a dot
+    /// This only comes up for literals that won't fit in 64-bit integers.
+    U128 {
+        number: u128,    // 16B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A floating-point literal (with a dot)
    Float {
        number: f64,   // 8B
        var: Variable, // 4B
    },
-    /// A number literal (without a dot) containing underscores
-    NumWithUnderscores {
-        number: i64,           // 8B
-        var: Variable,         // 4B
-        text: NodeId<PoolStr>, // 8B
-    },
-    /// A float literal (with a dot) containing underscores
-    FloatWithUnderscores {
-        number: f64,           // 8B
-        var: Variable,         // 4B
-        text: NodeId<PoolStr>, // 8B
-    },
    /// string literals of length up to 30B
    SmallStr(ArrayString<U30>), // 31B
    /// string literals of length 31B or more
-    Str(NodeId<PoolStr>), // 8B
+    Str(PoolStr), // 8B
    // Lookups
    Var(Symbol), // 8B

@ -67,132 +79,101 @@ pub enum Expr2 {
    List {
        list_var: Variable,         // 4B - required for uniqueness of the list
        elem_var: Variable,         // 4B
-        first_elem: PoolVec<Expr2>, // 16B
+        first_elem: PoolVec<Expr2>, // 8B
    },
    If {
        cond_var: Variable,                // 4B
        expr_var: Variable,                // 4B
-        branches: PoolVec<(Expr2, Expr2)>, // 16B
-        final_else: NodeId<Expr2>,         // 8B
+        branches: PoolVec<(Expr2, Expr2)>, // 8B
+        final_else: NodeId<Expr2>,         // 4B
    },
    When {
        cond_var: Variable,            // 4B
        expr_var: Variable,            // 4B
-        branches: PoolVec<WhenBranch>, // 9B
-        cond: NodeId<Expr2>,           // 8B
+        branches: PoolVec<WhenBranch>, // 8B
+        cond: NodeId<Expr2>,           // 4B
    },
    LetRec {
        // TODO need to make this Alias type here page-friendly, which will be hard!
-        aliases: PoolVec<(Symbol, Alias)>, // 9B
-        defs: PoolVec<Def>,                // 9B
-        body_var: Variable,                // 4B
-        body_id: NodeId<Expr2>,            // 8B
+        aliases: PoolVec<(Symbol, Alias)>, // 8B
+        defs: PoolVec<Def>,                // 8B
+        body_var: Variable,                // 8B
+        body_id: NodeId<Expr2>,            // 4B
+    },
+    LetNonRec {
+        // TODO need to make this Alias type here page-friendly, which will be hard!
+        aliases: PoolVec<(Symbol, Alias)>, // 8B
+        def_id: NodeId<Def>,               // 4B
+        body_id: NodeId<Expr2>,            // 4B
+        body_var: Variable,                // 4B
+    },
+    Call {
+        /// NOTE: the first elem in this list is the expression and its variable.
+        /// The others are arguments. This is because we didn't have room for
+        /// both the expr and its variable otherwise.
+        expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+        fn_var: Variable,      // 4B
+        closure_var: Variable, // 4B
+        /// Cached outside expr_and_args so we don't have to potentially
+        /// traverse that whole linked list chain to count all the args.
+        arity: usize, // 8B - could make this smaller if need be
+        called_via: CalledVia, // 2B
+    },
+    RunLowLevel {
+        op: LowLevel,                             // 1B
+        args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+        ret_var: Variable,                        // 4B
+    },
+    Closure {
+        args: PoolVec<(Variable, NodeId<Pat2>)>, // 8B
+        name: Symbol,                            // 8B
+        body: NodeId<Expr2>,                     // 4B
+        function_type: Variable,                 // 4B
+        recursive: Recursive,                    // 1B
+        extra: NodeId<ClosureExtra>,             // 4B
    },
-    // LetNonRec {
-    //     // TODO need to make this Alias type here page-friendly, which will be hard!
-    //     aliases: PoolVec<(Symbol, Alias)>, // 9B
-    //     def_id: NodeId<Def>,               // 8B
-    //     body_id: NodeId<Expr2>,            // 8B
-    //     body_var: Variable,                // 4B
-    // },
-    // Call {
-    //     /// NOTE: the first elem in this list is the expression and its variable.
-    //     /// The others are arguments. This is because we didn't have room for
-    //     /// both the expr and its variable otherwise.
-    //     expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    //     fn_var: Variable,      // 4B
-    //     closure_var: Variable, // 4B
-    //     /// Cached outside expr_and_args so we don't have to potentially
-    //     /// traverse that whole linked list chain to count all the args.
-    //     arity: usize, // 8B - could make this smaller if need be
-    //     called_via: CalledVia, // 2B
-    // },
-    // RunLowLevel {
-    //     op: LowLevel,                             // 1B
-    //     args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    //     ret_var: Variable,                        // 4B
-    // },
-    // Closure {
-    //     captured_symbols: PoolVec<(Symbol, Variable)>, // 9B
-    //     args: PoolVec<(Variable, NodeId<Pat2>)>,       // 9B
-    //     recursive: Recursive,                          // 1B
-    //     extra: NodeId<ClosureExtra>,                   // 8B
-    // },
    // Product Types
-    // Record {
-    //     record_var: Variable,                                // 4B
-    //     fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 9B
-    // },
+    Record {
+        record_var: Variable,                                // 4B
+        fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 8B
+    },
    /// Empty record constant
-    // EmptyRecord,
-    // /// Look up exactly one field on a record, e.g. (expr).foo.
-    // Access {
-    //     field: NodeId<PoolStr>,   // 8B
-    //     expr: NodeId<Expr2>,      // 8B
-    //     vars: NodeId<AccessVars>, // 8B
-    // },
+    EmptyRecord,
+    /// Look up exactly one field on a record, e.g. (expr).foo.
+    Access {
+        field: NodeId<PoolStr>,   // 4B
+        expr: NodeId<Expr2>,      // 4B
+        vars: NodeId<AccessVars>, // 4B
+    },

-    // /// field accessor as a function, e.g. (.foo) expr
-    // Accessor {
-    //     record_vars_id: NodeId<RecordVars>, // 8B
-    //     function_var: Variable,             // 4B
-    //     closure_var: Variable,              // 4B
-    //     field_id: NodeId<PoolStr>,          // 8B
-    // },
-    // Update {
-    //     symbol: Symbol,                       // 8B
-    //     updates: PoolVec<(Lowercase, Field)>, // 9B
-    //     vars_id: NodeId<UpdateVars>,          // 8B
-    // },
+    /// field accessor as a function, e.g. (.foo) expr
+    Accessor {
+        record_vars_id: NodeId<RecordVars>, // 4B
+        function_var: Variable,             // 4B
+        closure_var: Variable,              // 4B
+        field_id: NodeId<PoolStr>,          // 4B
+    },
+    Update {
+        symbol: Symbol,                       // 8B
+        updates: PoolVec<(Lowercase, Field)>, // 8B
+        vars_id: NodeId<UpdateVars>,          // 4B
+    },

    // Sum Types
-    // Tag {
-    //     // NOTE: A PoolStr node is a 2B length and then 14B bytes,
-    //     // plus more bytes in adjacent nodes if necessary. Thus we have
-    //     // a hard cap of 4094 bytes as the maximum length of tags and fields.
-    //     name_id: NodeId<PoolStr>,                      // 8B
-    //     variant_var: Variable,                         // 4B
-    //     ext_var: Variable,                             // 4B
-    //     arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    // },
+    Tag {
+        // NOTE: A PoolStr node is a 2B length and then 14B bytes,
+        // plus more bytes in adjacent nodes if necessary. Thus we have
+        // a hard cap of 4094 bytes as the maximum length of tags and fields.
+        name_id: NodeId<PoolStr>,                      // 4B
+        variant_var: Variable,                         // 4B
+        ext_var: Variable,                             // 4B
+        arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+    },

    // Compiles, but will crash if reached
    RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-/// It's critical that this fit in 1 byte. If it takes 2B, Expr::Call is too big.
-/// That's why we have all the variants in here, instead of having separate
-/// UnaryOp and Binary
-pub enum CalledVia2 {
-    /// Calling with space, e.g. (foo bar)
-    Space,
-
-    /// (-), e.g. (-x)
-    Negate,
-    /// (!), e.g. (!x)
-    Not,
-
-    // highest precedence binary op
-    Caret,
-    Star,
-    Slash,
-    DoubleSlash,
-    Percent,
-    DoublePercent,
-    Plus,
-    Minus,
-    Equals,
-    NotEquals,
-    LessThan,
-    GreaterThan,
-    LessThanOrEq,
-    GreaterThanOrEq,
-    And,
-    Or,
-    Pizza, // lowest precedence binary op
-}
-
 #[derive(Debug)]
 pub struct Def {
    pub pattern: NodeId<Pat2>, // 3B
@ -230,15 +211,14 @@ pub struct AccessVars {
    field_var: Variable,  // 4B
 }

-/// This is 32B, so it fits in a Node slot.
+/// This is overflow data from a Closure variant, which needs to store
+/// more than 32B of total data
 #[derive(Debug)]
 pub struct ClosureExtra {
-    name: Symbol,              // 8B
-    body: NodeId<Expr2>,       // 8B
-    function_type: Variable,   // 4B
-    closure_type: Variable,    // 4B
-    closure_ext_var: Variable, // 4B
-    return_type: Variable,     // 4B
+    return_type: Variable,                         // 4B
+    captured_symbols: PoolVec<(Symbol, Variable)>, // 8B
+    closure_type: Variable,                        // 4B
+    closure_ext_var: Variable,                     // 4B
 }

 #[derive(Debug)]
@ -278,10 +258,5 @@ pub struct ExprPoolSlot(u8);

 #[test]
 fn size_of_expr() {
-    assert_eq!(std::mem::size_of::<Expr2>(), 32);
-}
-
-#[test]
-fn size_of_called_via() {
-    assert_eq!(std::mem::size_of::<CalledVia2>(), 1);
+    assert_eq!(std::mem::size_of::<Expr2>(), crate::pool::NODE_BYTES);
 }
--- a/editor/src/pool.rs
+++ b/editor/src/pool.rs
@ -15,6 +15,8 @@ use std::marker::PhantomData;
 use std::mem::size_of;
 use std::ptr::null;

+pub const NODE_BYTES: usize = 32;
+
 // Each page has 128 slots. Each slot holds one 32B node
 // This means each page is 4096B, which is the size of a memory page
 // on typical systems where the compiler will be run.
@ -56,19 +58,18 @@ impl<T> Clone for NodeId<T> {

 impl<T> Copy for NodeId<T> {}

-/// S is a slot size; e.g. Pool<[u8; 32]> for a pool of 32-bit slots
-pub struct Pool<S> {
-    nodes: *mut S,
+pub struct Pool {
+    nodes: *mut [u8; NODE_BYTES],
    num_nodes: u32,
    capacity: u32,
    // free_1node_slots: Vec<NodeId<T>>,
 }

-impl<S> Pool<S> {
+impl Pool {
    pub fn with_capacity(&mut self, nodes: u32) -> Self {
        // round up number of nodes requested to nearest page size in bytes
        let bytes_per_page = page_size::get();
-        let node_bytes = size_of::<S>() * nodes as usize;
+        let node_bytes = NODE_BYTES * nodes as usize;
        let leftover = node_bytes % bytes_per_page;
        let bytes_to_mmap = if leftover == 0 {
            node_bytes
@ -89,12 +90,12 @@ impl<S> Pool<S> {
                0,
                0,
            )
-        } as *mut S;
+        } as *mut [u8; NODE_BYTES];

        // This is our actual capacity, in nodes.
        // It might be higher than the requested capacity due to rounding up
        // to nearest page size.
-        let capacity = (bytes_to_mmap / size_of::<S>()) as u32;
+        let capacity = (bytes_to_mmap / NODE_BYTES) as u32;

        Pool {
            nodes,
@ -105,7 +106,7 @@ impl<S> Pool<S> {

    pub fn add<T>(&mut self, node: T) -> NodeId<T> {
        // It's only safe to store this if T is the same size as S.
-        debug_assert_eq!(size_of::<T>(), size_of::<S>());
+        debug_assert_eq!(size_of::<T>(), NODE_BYTES);

        let index = self.num_nodes;

@ -141,23 +142,23 @@ impl<S> Pool<S> {

    // A node is available iff its bytes are all zeroes
    #[allow(dead_code)]
-    unsafe fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
-        debug_assert_eq!(size_of::<T>(), size_of::<S>());
+    fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
+        debug_assert_eq!(size_of::<T>(), NODE_BYTES);

        unsafe {
-            let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; size_of::<S>()];
+            let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];

-            *node_ptr == [0; size_of::<S>()]
+            *node_ptr == [0; NODE_BYTES]
        }
    }
 }

-impl<S> Drop for Pool<S> {
+impl Drop for Pool {
    fn drop(&mut self) {
        unsafe {
            libc::munmap(
                self.nodes as *mut c_void,
-                size_of::<S>() * self.capacity as usize,
+                NODE_BYTES * self.capacity as usize,
            );
        }
    }
@ -192,9 +193,9 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
    /// the usual array, and then there's one more node at the end which
    /// continues the list with a new length and NodeId value. PoolVec
    /// iterators automatically do these jumps behind the scenes when necessary.
-    pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool<S>) -> Self {
+    pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool) -> Self {
        debug_assert!(nodes.len() <= u32::MAX as usize);
-        debug_assert!(size_of::<T>() <= size_of::<S>());
+        debug_assert!(size_of::<T>() <= NODE_BYTES);

        let len = nodes.len() as u32;

@ -223,7 +224,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
        }
    }

-    pub fn iter<S>(self, pool: &'a Pool<S>) -> impl ExactSizeIterator<Item = &'a T> {
+    pub fn iter<S>(self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
        self.pool_list_iter(pool)
    }

@ -232,7 +233,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
    /// actually do want to have this separate function for code reuse
    /// in the iterator's next() method.
    #[inline(always)]
-    fn pool_list_iter<S>(&self, pool: &'a Pool<S>) -> PoolVecIter<'a, S, T> {
+    fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
        PoolVecIter {
            pool,
            current_node_id: self.first_node_id,
@ -240,12 +241,12 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
        }
    }

-    pub fn free<S>(self, pool: &'a mut Pool<S>) {
+    pub fn free<S>(self, pool: &'a mut Pool) {
        // zero out the memory
        unsafe {
            let index = self.first_node_id.index as isize;
            let node_ptr = pool.nodes.offset(index) as *mut c_void;
-            let bytes = self.len as usize * size_of::<S>();
+            let bytes = self.len as usize * NODE_BYTES;

            libc::memset(node_ptr, 0, bytes);
        }
@ -254,13 +255,13 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
    }
 }

-struct PoolVecIter<'a, S, T> {
-    pool: &'a Pool<S>,
+struct PoolVecIter<'a, T> {
+    pool: &'a Pool,
    current_node_id: NodeId<T>,
    len_remaining: u32,
 }

-impl<'a, S, T> ExactSizeIterator for PoolVecIter<'a, S, T>
+impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
 where
    T: 'a,
 {
@ -269,7 +270,7 @@ where
    }
 }

-impl<'a, S, T> Iterator for PoolVecIter<'a, S, T>
+impl<'a, T> Iterator for PoolVecIter<'a, T>
 where
    T: 'a,
 {