From c1356f0b68bd3d850f4d971837bb06277988f0b4 Mon Sep 17 00:00:00 2001
From: Richard Feldman <oss@rtfeldman.com>
Date: Tue, 8 Dec 2020 22:14:59 -0500
Subject: [PATCH] Fix Pool implementation

---
 editor/src/ast.rs  | 241 ++++++++++++++++++++-------------------------
 editor/src/pool.rs |  49 ++++-----
 2 files changed, 133 insertions(+), 157 deletions(-)
diff --git a/editor/src/ast.rs b/editor/src/ast.rs
index da28a70b59..9a70d2a7f0 100644
--- a/editor/src/ast.rs
+++ b/editor/src/ast.rs
@@ -28,33 +28,45 @@ pub enum IntStyle {
 /// It has a 1B discriminant and variants which hold payloads of at most 31B.
 #[derive(Debug)]
 pub enum Expr2 {
-    /// A number literal (without a dot) containing no underscores
-    Num {
+    /// A negative number literal without a dot
+    I64 {
         number: i64,     // 8B
         var: Variable,   // 4B
         style: IntStyle, // 1B
+        text: PoolStr,   // 8B
     },
-    /// A floating-point literal (with a dot) containing no underscores
+    /// A nonnegative number literal without a dot
+    U64 {
+        number: u64,     // 8B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A large (over 64-bit) negative number literal without a dot.
+    /// This only comes up for literals that won't fit in 64-bit integers.
+    I128 {
+        number: i128,    // 16B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A large (over 64-bit) nonnegative number literal without a dot
+    /// This only comes up for literals that won't fit in 64-bit integers.
+    U128 {
+        number: u128,    // 16B
+        var: Variable,   // 4B
+        style: IntStyle, // 1B
+        text: PoolStr,   // 8B
+    },
+    /// A floating-point literal (with a dot)
     Float {
         number: f64,   // 8B
         var: Variable, // 4B
     },
-    /// A number literal (without a dot) containing underscores
-    NumWithUnderscores {
-        number: i64,           // 8B
-        var: Variable,         // 4B
-        text: NodeId<PoolStr>, // 8B
-    },
-    /// A float literal (with a dot) containing underscores
-    FloatWithUnderscores {
-        number: f64,           // 8B
-        var: Variable,         // 4B
-        text: NodeId<PoolStr>, // 8B
-    },
     /// string literals of length up to 30B
     SmallStr(ArrayString<U30>), // 31B
     /// string literals of length 31B or more
-    Str(NodeId<PoolStr>), // 8B
+    Str(PoolStr), // 8B
     // Lookups
     Var(Symbol), // 8B
 
@@ -67,132 +79,101 @@ pub enum Expr2 {
     List {
         list_var: Variable,         // 4B - required for uniqueness of the list
         elem_var: Variable,         // 4B
-        first_elem: PoolVec<Expr2>, // 16B
+        first_elem: PoolVec<Expr2>, // 8B
     },
     If {
         cond_var: Variable,                // 4B
         expr_var: Variable,                // 4B
-        branches: PoolVec<(Expr2, Expr2)>, // 16B
-        final_else: NodeId<Expr2>,         // 8B
+        branches: PoolVec<(Expr2, Expr2)>, // 8B
+        final_else: NodeId<Expr2>,         // 4B
     },
     When {
         cond_var: Variable,            // 4B
         expr_var: Variable,            // 4B
-        branches: PoolVec<WhenBranch>, // 9B
-        cond: NodeId<Expr2>,           // 8B
+        branches: PoolVec<WhenBranch>, // 8B
+        cond: NodeId<Expr2>,           // 4B
     },
     LetRec {
         // TODO need to make this Alias type here page-friendly, which will be hard!
-        aliases: PoolVec<(Symbol, Alias)>, // 9B
-        defs: PoolVec<Def>,                // 9B
-        body_var: Variable,                // 4B
-        body_id: NodeId<Expr2>,            // 8B
+        aliases: PoolVec<(Symbol, Alias)>, // 8B
+        defs: PoolVec<Def>,                // 8B
+        body_var: Variable,                // 8B
+        body_id: NodeId<Expr2>,            // 4B
+    },
+    LetNonRec {
+        // TODO need to make this Alias type here page-friendly, which will be hard!
+        aliases: PoolVec<(Symbol, Alias)>, // 8B
+        def_id: NodeId<Def>,               // 4B
+        body_id: NodeId<Expr2>,            // 4B
+        body_var: Variable,                // 4B
+    },
+    Call {
+        /// NOTE: the first elem in this list is the expression and its variable.
+        /// The others are arguments. This is because we didn't have room for
+        /// both the expr and its variable otherwise.
+        expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+        fn_var: Variable,      // 4B
+        closure_var: Variable, // 4B
+        /// Cached outside expr_and_args so we don't have to potentially
+        /// traverse that whole linked list chain to count all the args.
+        arity: usize, // 8B - could make this smaller if need be
+        called_via: CalledVia, // 2B
+    },
+    RunLowLevel {
+        op: LowLevel,                             // 1B
+        args: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+        ret_var: Variable,                        // 4B
+    },
+    Closure {
+        args: PoolVec<(Variable, NodeId<Pat2>)>, // 8B
+        name: Symbol,                            // 8B
+        body: NodeId<Expr2>,                     // 4B
+        function_type: Variable,                 // 4B
+        recursive: Recursive,                    // 1B
+        extra: NodeId<ClosureExtra>,             // 4B
     },
-    // LetNonRec {
-    //     // TODO need to make this Alias type here page-friendly, which will be hard!
-    //     aliases: PoolVec<(Symbol, Alias)>, // 9B
-    //     def_id: NodeId<Def>,               // 8B
-    //     body_id: NodeId<Expr2>,            // 8B
-    //     body_var: Variable,                // 4B
-    // },
-    // Call {
-    //     /// NOTE: the first elem in this list is the expression and its variable.
-    //     /// The others are arguments. This is because we didn't have room for
-    //     /// both the expr and its variable otherwise.
-    //     expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    //     fn_var: Variable,      // 4B
-    //     closure_var: Variable, // 4B
-    //     /// Cached outside expr_and_args so we don't have to potentially
-    //     /// traverse that whole linked list chain to count all the args.
-    //     arity: usize, // 8B - could make this smaller if need be
-    //     called_via: CalledVia, // 2B
-    // },
-    // RunLowLevel {
-    //     op: LowLevel,                             // 1B
-    //     args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    //     ret_var: Variable,                        // 4B
-    // },
-    // Closure {
-    //     captured_symbols: PoolVec<(Symbol, Variable)>, // 9B
-    //     args: PoolVec<(Variable, NodeId<Pat2>)>,       // 9B
-    //     recursive: Recursive,                          // 1B
-    //     extra: NodeId<ClosureExtra>,                   // 8B
-    // },
     // Product Types
-    // Record {
-    //     record_var: Variable,                                // 4B
-    //     fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 9B
-    // },
+    Record {
+        record_var: Variable,                                // 4B
+        fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 8B
+    },
     /// Empty record constant
-    // EmptyRecord,
-    // /// Look up exactly one field on a record, e.g. (expr).foo.
-    // Access {
-    //     field: NodeId<PoolStr>,   // 8B
-    //     expr: NodeId<Expr2>,      // 8B
-    //     vars: NodeId<AccessVars>, // 8B
-    // },
+    EmptyRecord,
+    /// Look up exactly one field on a record, e.g. (expr).foo.
+    Access {
+        field: NodeId<PoolStr>,   // 4B
+        expr: NodeId<Expr2>,      // 4B
+        vars: NodeId<AccessVars>, // 4B
+    },
 
-    // /// field accessor as a function, e.g. (.foo) expr
-    // Accessor {
-    //     record_vars_id: NodeId<RecordVars>, // 8B
-    //     function_var: Variable,             // 4B
-    //     closure_var: Variable,              // 4B
-    //     field_id: NodeId<PoolStr>,          // 8B
-    // },
-    // Update {
-    //     symbol: Symbol,                       // 8B
-    //     updates: PoolVec<(Lowercase, Field)>, // 9B
-    //     vars_id: NodeId<UpdateVars>,          // 8B
-    // },
+    /// field accessor as a function, e.g. (.foo) expr
+    Accessor {
+        record_vars_id: NodeId<RecordVars>, // 4B
+        function_var: Variable,             // 4B
+        closure_var: Variable,              // 4B
+        field_id: NodeId<PoolStr>,          // 4B
+    },
+    Update {
+        symbol: Symbol,                       // 8B
+        updates: PoolVec<(Lowercase, Field)>, // 8B
+        vars_id: NodeId<UpdateVars>,          // 4B
+    },
 
     // Sum Types
-    // Tag {
-    //     // NOTE: A PoolStr node is a 2B length and then 14B bytes,
-    //     // plus more bytes in adjacent nodes if necessary. Thus we have
-    //     // a hard cap of 4094 bytes as the maximum length of tags and fields.
-    //     name_id: NodeId<PoolStr>,                      // 8B
-    //     variant_var: Variable,                         // 4B
-    //     ext_var: Variable,                             // 4B
-    //     arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    // },
+    Tag {
+        // NOTE: A PoolStr node is a 2B length and then 14B bytes,
+        // plus more bytes in adjacent nodes if necessary. Thus we have
+        // a hard cap of 4094 bytes as the maximum length of tags and fields.
+        name_id: NodeId<PoolStr>,                      // 4B
+        variant_var: Variable,                         // 4B
+        ext_var: Variable,                             // 4B
+        arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 8B
+    },
 
     // Compiles, but will crash if reached
     RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-/// It's critical that this fit in 1 byte. If it takes 2B, Expr::Call is too big.
-/// That's why we have all the variants in here, instead of having separate
-/// UnaryOp and Binary
-pub enum CalledVia2 {
-    /// Calling with space, e.g. (foo bar)
-    Space,
-
-    /// (-), e.g. (-x)
-    Negate,
-    /// (!), e.g. (!x)
-    Not,
-
-    // highest precedence binary op
-    Caret,
-    Star,
-    Slash,
-    DoubleSlash,
-    Percent,
-    DoublePercent,
-    Plus,
-    Minus,
-    Equals,
-    NotEquals,
-    LessThan,
-    GreaterThan,
-    LessThanOrEq,
-    GreaterThanOrEq,
-    And,
-    Or,
-    Pizza, // lowest precedence binary op
-}
-
 #[derive(Debug)]
 pub struct Def {
     pub pattern: NodeId<Pat2>, // 3B
@@ -230,15 +211,14 @@ pub struct AccessVars {
     field_var: Variable,  // 4B
 }
 
-/// This is 32B, so it fits in a Node slot.
+/// This is overflow data from a Closure variant, which needs to store
+/// more than 32B of total data
 #[derive(Debug)]
 pub struct ClosureExtra {
-    name: Symbol,              // 8B
-    body: NodeId<Expr2>,       // 8B
-    function_type: Variable,   // 4B
-    closure_type: Variable,    // 4B
-    closure_ext_var: Variable, // 4B
-    return_type: Variable,     // 4B
+    return_type: Variable,                         // 4B
+    captured_symbols: PoolVec<(Symbol, Variable)>, // 8B
+    closure_type: Variable,                        // 4B
+    closure_ext_var: Variable,                     // 4B
 }
 
 #[derive(Debug)]
@@ -278,10 +258,5 @@ pub struct ExprPoolSlot(u8);
 
 #[test]
 fn size_of_expr() {
-    assert_eq!(std::mem::size_of::<Expr2>(), 32);
-}
-
-#[test]
-fn size_of_called_via() {
-    assert_eq!(std::mem::size_of::<CalledVia2>(), 1);
+    assert_eq!(std::mem::size_of::<Expr2>(), crate::pool::NODE_BYTES);
 }
diff --git a/editor/src/pool.rs b/editor/src/pool.rs
index 1c7d13e3fb..91dfde3789 100644
--- a/editor/src/pool.rs
+++ b/editor/src/pool.rs
@@ -15,6 +15,8 @@ use std::marker::PhantomData;
 use std::mem::size_of;
 use std::ptr::null;
 
+pub const NODE_BYTES: usize = 32;
+
 // Each page has 128 slots. Each slot holds one 32B node
 // This means each page is 4096B, which is the size of a memory page
 // on typical systems where the compiler will be run.
@@ -56,19 +58,18 @@ impl<T> Clone for NodeId<T> {
 
 impl<T> Copy for NodeId<T> {}
 
-/// S is a slot size; e.g. Pool<[u8; 32]> for a pool of 32-bit slots
-pub struct Pool<S> {
-    nodes: *mut S,
+pub struct Pool {
+    nodes: *mut [u8; NODE_BYTES],
     num_nodes: u32,
     capacity: u32,
     // free_1node_slots: Vec<NodeId<T>>,
 }
 
-impl<S> Pool<S> {
+impl Pool {
     pub fn with_capacity(&mut self, nodes: u32) -> Self {
         // round up number of nodes requested to nearest page size in bytes
         let bytes_per_page = page_size::get();
-        let node_bytes = size_of::<S>() * nodes as usize;
+        let node_bytes = NODE_BYTES * nodes as usize;
         let leftover = node_bytes % bytes_per_page;
         let bytes_to_mmap = if leftover == 0 {
             node_bytes
@@ -89,12 +90,12 @@ impl<S> Pool<S> {
                 0,
                 0,
             )
-        } as *mut S;
+        } as *mut [u8; NODE_BYTES];
 
         // This is our actual capacity, in nodes.
         // It might be higher than the requested capacity due to rounding up
         // to nearest page size.
-        let capacity = (bytes_to_mmap / size_of::<S>()) as u32;
+        let capacity = (bytes_to_mmap / NODE_BYTES) as u32;
 
         Pool {
             nodes,
@@ -105,7 +106,7 @@ impl<S> Pool<S> {
 
     pub fn add<T>(&mut self, node: T) -> NodeId<T> {
         // It's only safe to store this if T is the same size as S.
-        debug_assert_eq!(size_of::<T>(), size_of::<S>());
+        debug_assert_eq!(size_of::<T>(), NODE_BYTES);
 
         let index = self.num_nodes;
 
@@ -141,23 +142,23 @@ impl<S> Pool<S> {
 
     // A node is available iff its bytes are all zeroes
     #[allow(dead_code)]
-    unsafe fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
-        debug_assert_eq!(size_of::<T>(), size_of::<S>());
+    fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
+        debug_assert_eq!(size_of::<T>(), NODE_BYTES);
 
         unsafe {
-            let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; size_of::<S>()];
+            let node_ptr = self.nodes.offset(node_id.index as isize) as *const [u8; NODE_BYTES];
 
-            *node_ptr == [0; size_of::<S>()]
+            *node_ptr == [0; NODE_BYTES]
         }
     }
 }
 
-impl<S> Drop for Pool<S> {
+impl Drop for Pool {
     fn drop(&mut self) {
         unsafe {
             libc::munmap(
                 self.nodes as *mut c_void,
-                size_of::<S>() * self.capacity as usize,
+                NODE_BYTES * self.capacity as usize,
             );
         }
     }
@@ -192,9 +193,9 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
     /// the usual array, and then there's one more node at the end which
     /// continues the list with a new length and NodeId value. PoolVec
     /// iterators automatically do these jumps behind the scenes when necessary.
-    pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool<S>) -> Self {
+    pub fn new<I: ExactSizeIterator<Item = T>, S>(nodes: I, pool: &mut Pool) -> Self {
         debug_assert!(nodes.len() <= u32::MAX as usize);
-        debug_assert!(size_of::<T>() <= size_of::<S>());
+        debug_assert!(size_of::<T>() <= NODE_BYTES);
 
         let len = nodes.len() as u32;
 
@@ -223,7 +224,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
         }
     }
 
-    pub fn iter<S>(self, pool: &'a Pool<S>) -> impl ExactSizeIterator<Item = &'a T> {
+    pub fn iter<S>(self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
         self.pool_list_iter(pool)
     }
 
@@ -232,7 +233,7 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
     /// actually do want to have this separate function for code reuse
     /// in the iterator's next() method.
     #[inline(always)]
-    fn pool_list_iter<S>(&self, pool: &'a Pool<S>) -> PoolVecIter<'a, S, T> {
+    fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
         PoolVecIter {
             pool,
             current_node_id: self.first_node_id,
@@ -240,12 +241,12 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
         }
     }
 
-    pub fn free<S>(self, pool: &'a mut Pool<S>) {
+    pub fn free<S>(self, pool: &'a mut Pool) {
         // zero out the memory
         unsafe {
             let index = self.first_node_id.index as isize;
             let node_ptr = pool.nodes.offset(index) as *mut c_void;
-            let bytes = self.len as usize * size_of::<S>();
+            let bytes = self.len as usize * NODE_BYTES;
 
             libc::memset(node_ptr, 0, bytes);
         }
@@ -254,13 +255,13 @@ impl<'a, T: 'a + Sized> PoolVec<T> {
     }
 }
 
-struct PoolVecIter<'a, S, T> {
-    pool: &'a Pool<S>,
+struct PoolVecIter<'a, T> {
+    pool: &'a Pool,
     current_node_id: NodeId<T>,
     len_remaining: u32,
 }
 
-impl<'a, S, T> ExactSizeIterator for PoolVecIter<'a, S, T>
+impl<'a, T> ExactSizeIterator for PoolVecIter<'a, T>
 where
     T: 'a,
 {
@@ -269,7 +270,7 @@ where
     }
 }
 
-impl<'a, S, T> Iterator for PoolVecIter<'a, S, T>
+impl<'a, T> Iterator for PoolVecIter<'a, T>
 where
     T: 'a,
 {