Add editor::pool

2025-09-30 15:21:12 +00:00 · 2020-12-08 22:16:14 -05:00 · 2020-12-08 22:16:14 -05:00 · ef45e77a35
commit ef45e77a35
parent 63e91fb01e
3 changed files with 312 additions and 393 deletions
--- a/editor/src/ast.rs
+++ b/editor/src/ast.rs
@ -1,4 +1,4 @@
-use crate::bucket::{BucketList, BucketStr, NodeId};
+use crate::pool::{NodeId, PoolStr, PoolVec};
 use arraystring::{typenum::U30, ArrayString};
 use roc_can::def::Annotation;
 use roc_can::expr::{Field, Recursive};
@ -6,7 +6,6 @@ use roc_module::ident::Lowercase;
 use roc_module::low_level::LowLevel;
 use roc_module::operator::CalledVia;
 use roc_module::symbol::Symbol;
 use roc_problem::can::RuntimeError;
 use roc_types::subs::Variable;
 use roc_types::types::Alias;
@ -42,27 +41,20 @@ pub enum Expr2 {
    },
    /// A number literal (without a dot) containing underscores
    NumWithUnderscores {
-        number: i64,             // 8B
+        number: i64,           // 8B
-        var: Variable,           // 4B
+        var: Variable,         // 4B
-        text: NodeId<BucketStr>, // 8B
+        text: NodeId<PoolStr>, // 8B
    },
    /// A float literal (with a dot) containing underscores
    FloatWithUnderscores {
-        number: f64,             // 8B
+        number: f64,           // 8B
-        var: Variable,           // 4B
+        var: Variable,         // 4B
-        text: NodeId<BucketStr>, // 8B
+        text: NodeId<PoolStr>, // 8B
    },
    /// string literals of length up to 30B
    SmallStr(ArrayString<U30>), // 31B
-    /// string literals of length up to 4094B
+    /// string literals of length 31B or more
-    MedStr {
+    Str(NodeId<PoolStr>), // 8B
        str: NodeId<BucketStr>, // 8B
    },
    /// string literals of length over 4094B, but requires calling malloc/free
    BigStr {
        pointer: *const u8, // 8B
        len: u32, // 4B, meaning maximum string literal size of 4GB. Could theoretically fit 7B here, which would get closer to the full isize::MAX
    },
    // Lookups
    Var(Symbol), // 8B
@ -73,96 +65,96 @@ pub enum Expr2 {
        elem_var: Variable, // 4B
    },
    List {
-        list_var: Variable,       // 4B - required for uniqueness of the list
+        list_var: Variable,         // 4B - required for uniqueness of the list
-        elem_var: Variable,       // 4B
+        elem_var: Variable,         // 4B
-        elems: BucketList<Expr2>, // 9B
+        first_elem: PoolVec<Expr2>, // 16B
    },
    If {
-        cond_var: Variable,                   // 4B
+        cond_var: Variable,                // 4B
-        expr_var: Variable,                   // 4B
+        expr_var: Variable,                // 4B
-        branches: BucketList<(Expr2, Expr2)>, // 9B
+        branches: PoolVec<(Expr2, Expr2)>, // 16B
-        final_else: NodeId<Expr2>,            // 8B
+        final_else: NodeId<Expr2>,         // 8B
    },
    When {
        cond_var: Variable,               // 4B
        expr_var: Variable,               // 4B
        branches: BucketList<WhenBranch>, // 9B
        cond: NodeId<Expr2>,              // 8B
    },
    LetRec {
        // TODO need to make this Alias type here bucket-friendly, which will be hard!
        aliases: BucketList<(Symbol, Alias)>, // 9B
        defs: BucketList<Def>,                // 9B
        body_var: Variable,                   // 4B
        body_id: NodeId<Expr2>,               // 8B
    },
    LetNonRec {
        // TODO need to make this Alias type here bucket-friendly, which will be hard!
        aliases: BucketList<(Symbol, Alias)>, // 9B
        def_id: NodeId<Def>,                  // 8B
        body_id: NodeId<Expr2>,               // 8B
        body_var: Variable,                   // 4B
    },
    Call {
        /// NOTE: the first elem in this list is the expression and its variable.
        /// The others are arguments. This is because we didn't have room for
        /// both the expr and its variable otherwise.
        expr_and_args: BucketList<(Variable, NodeId<Expr2>)>, // 9B
        fn_var: Variable,      // 4B
        closure_var: Variable, // 4B
        /// Cached outside expr_and_args so we don't have to potentially
        /// traverse that whole linked list chain to count all the args.
        arity: usize, // 8B - could make this smaller if need be
        called_via: CalledVia, // 2B
    },
    RunLowLevel {
        op: LowLevel,                                // 1B
        args: BucketList<(Variable, NodeId<Expr2>)>, // 9B
        ret_var: Variable,                           // 4B
    },
    Closure {
        captured_symbols: BucketList<(Symbol, Variable)>, // 9B
        args: BucketList<(Variable, NodeId<Pat2>)>,       // 9B
        recursive: Recursive,                             // 1B
        extra: NodeId<ClosureExtra>,                      // 8B
    },
    // When {
    //     cond_var: Variable,            // 4B
    //     expr_var: Variable,            // 4B
    //     branches: PoolVec<WhenBranch>, // 9B
    //     cond: NodeId<Expr2>,           // 8B
    // },
    // LetRec {
    //     // TODO need to make this Alias type here page-friendly, which will be hard!
    //     aliases: PoolVec<(Symbol, Alias)>, // 9B
    //     defs: PoolVec<Def>,                // 9B
    //     body_var: Variable,                // 4B
    //     body_id: NodeId<Expr2>,            // 8B
    // },
    // LetNonRec {
    //     // TODO need to make this Alias type here page-friendly, which will be hard!
    //     aliases: PoolVec<(Symbol, Alias)>, // 9B
    //     def_id: NodeId<Def>,               // 8B
    //     body_id: NodeId<Expr2>,            // 8B
    //     body_var: Variable,                // 4B
    // },
    // Call {
    //     /// NOTE: the first elem in this list is the expression and its variable.
    //     /// The others are arguments. This is because we didn't have room for
    //     /// both the expr and its variable otherwise.
    //     expr_and_args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
    //     fn_var: Variable,      // 4B
    //     closure_var: Variable, // 4B
    //     /// Cached outside expr_and_args so we don't have to potentially
    //     /// traverse that whole linked list chain to count all the args.
    //     arity: usize, // 8B - could make this smaller if need be
    //     called_via: CalledVia, // 2B
    // },
    // RunLowLevel {
    //     op: LowLevel,                             // 1B
    //     args: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
    //     ret_var: Variable,                        // 4B
    // },
    // Closure {
    //     captured_symbols: PoolVec<(Symbol, Variable)>, // 9B
    //     args: PoolVec<(Variable, NodeId<Pat2>)>,       // 9B
    //     recursive: Recursive,                          // 1B
    //     extra: NodeId<ClosureExtra>,                   // 8B
    // },
    // Product Types
-    Record {
+    // Record {
-        record_var: Variable,                                     // 4B
+    //     record_var: Variable,                                // 4B
-        fields: BucketList<(BucketStr, Variable, NodeId<Expr2>)>, // 9B
+    //     fields: PoolVec<(PoolStr, Variable, NodeId<Expr2>)>, // 9B
-    },
+    // },
    /// Empty record constant
-    EmptyRecord,
+    // EmptyRecord,
-    /// Look up exactly one field on a record, e.g. (expr).foo.
+    // /// Look up exactly one field on a record, e.g. (expr).foo.
-    Access {
+    // Access {
-        field: NodeId<BucketStr>, // 8B
+    //     field: NodeId<PoolStr>,   // 8B
-        expr: NodeId<Expr2>,      // 8B
+    //     expr: NodeId<Expr2>,      // 8B
-        vars: NodeId<AccessVars>, // 8B
+    //     vars: NodeId<AccessVars>, // 8B
-    },
+    // },
-    /// field accessor as a function, e.g. (.foo) expr
+    // /// field accessor as a function, e.g. (.foo) expr
-    Accessor {
+    // Accessor {
-        record_vars_id: NodeId<RecordVars>, // 8B
+    //     record_vars_id: NodeId<RecordVars>, // 8B
-        function_var: Variable,             // 4B
+    //     function_var: Variable,             // 4B
-        closure_var: Variable,              // 4B
+    //     closure_var: Variable,              // 4B
-        field_id: NodeId<BucketStr>,        // 8B
+    //     field_id: NodeId<PoolStr>,          // 8B
-    },
+    // },
-    Update {
+    // Update {
-        symbol: Symbol,                          // 8B
+    //     symbol: Symbol,                       // 8B
-        updates: BucketList<(Lowercase, Field)>, // 9B
+    //     updates: PoolVec<(Lowercase, Field)>, // 9B
-        vars_id: NodeId<UpdateVars>,             // 8B
+    //     vars_id: NodeId<UpdateVars>,          // 8B
-    },
+    // },
    // Sum Types
-    Tag {
+    // Tag {
-        // NOTE: A BucketStr node is a 2B length and then 14B bytes,
+    //     // NOTE: A PoolStr node is a 2B length and then 14B bytes,
-        // plus more bytes in adjacent nodes if necessary. Thus we have
+    //     // plus more bytes in adjacent nodes if necessary. Thus we have
-        // a hard cap of 4094 bytes as the maximum length of tags and fields.
+    //     // a hard cap of 4094 bytes as the maximum length of tags and fields.
-        name_id: NodeId<BucketStr>,                       // 8B
+    //     name_id: NodeId<PoolStr>,                      // 8B
-        variant_var: Variable,                            // 4B
+    //     variant_var: Variable,                         // 4B
-        ext_var: Variable,                                // 4B
+    //     ext_var: Variable,                             // 4B
-        arguments: BucketList<(Variable, NodeId<Expr2>)>, // 9B
+    //     arguments: PoolVec<(Variable, NodeId<Expr2>)>, // 9B
-    },
+    // },
    // Compiles, but will crash if reached
    RuntimeError(/* TODO make a version of RuntimeError that fits in 15B */),
@ -206,8 +198,8 @@ pub struct Def {
    pub pattern: NodeId<Pat2>, // 3B
    pub expr: NodeId<Expr2>,   // 3B
    // TODO maybe need to combine these vars behind a pointer?
-    pub expr_var: Variable,                           // 4B
+    pub expr_var: Variable,                        // 4B
-    pub pattern_vars: BucketList<(Symbol, Variable)>, // 4B
+    pub pattern_vars: PoolVec<(Symbol, Variable)>, // 4B
    // TODO how big is an annotation? What about an Option<Annotation>?
    pub annotation: Option<Annotation>, // ???
 }
@ -239,7 +231,7 @@ pub struct AccessVars {
 }
 /// This is 32B, so it fits in a Node slot.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug)]
 pub struct ClosureExtra {
    name: Symbol,              // 8B
    body: NodeId<Expr2>,       // 8B
@ -251,110 +243,38 @@ pub struct ClosureExtra {
 #[derive(Debug)]
 pub struct WhenBranch {
-    pub patterns: BucketList<Pat2>,   // 4B
+    pub patterns: PoolVec<Pat2>,      // 4B
    pub body: NodeId<Expr2>,          // 3B
    pub guard: Option<NodeId<Expr2>>, // 4B
 }
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct PatternId {
-    /// TODO: PatternBucketId
+    /// TODO: PatternPoolId
-    bucket_id: ExprBucketId,
+    page_id: ExprPoolId,
-    /// TODO: PatternBucketSlot
+    /// TODO: PatternPoolSlot
-    slot: ExprBucketSlot,
+    slot: ExprPoolSlot,
 }
 // Each bucket has metadata and slots.
 // The metadata determines things like which slots are free.
 #[derive(Debug)]
 pub struct ExprBucket {
    // We can store this as a u8 because whenever we create a bucket, we
    // always fill at least one slot. So there will never be 256 unused slots
    // remaining; the most there will ever be will be 255.
    //
    // Note that there can be "holes" in this as we remove nodes; those
    // are recorded in the containing struct, not here.
    //
    // Also note that we can derive from this the next unused slot.
    unused_slots_remaining: u8,
    slots: Box<ExprBucketSlots>,
 }
 pub struct Exprs {
    // Whenever we free a slot of a particular size, we make a note of it
    // here, so we can reuse it later. This can lead to poor data locality
    // over time, but the alternative is memory fragmentation and ever-growing
    // memory usage. We could in theory go up to free_128node_slots, but in
    // practice it seems unlikely that it would be worth the bookkeeping
    // effort to go that high.
    //
    // TODO: this could be refactored Into `free_slots: [5; Vec<ExprId>]`
    // where (2 ^ index) is the size node in that slot. It's less
    // self-documenting but might allow for better code reuse.
    pub free_1node_slots: Vec<ExprId>,
    pub free_2node_slots: Vec<ExprId>,
    pub free_4node_slots: Vec<ExprId>,
    pub free_8node_slots: Vec<ExprId>,
    pub free_16node_slots: Vec<ExprId>,
    // Note that empty_buckets is equivalent to free_256node_slots - it means
    // the entire bucket is empty, at which point we can fill it with
    // whatever we please.
    pub empty_buckets: Vec<ExprBucketId>,
    pub buckets: Vec<ExprBucket>,
 }
 // Each bucket has 128 slots. Each slot holds one 32B node
 // This means each bucket is 4096B, which is the size of a memory page
 // on typical systems where the compiler will be run.
 //
 // Nice things about this system include:
 // * Allocating a new bucket is as simple as asking the OS for a memory page.
 // * Since each node is 32B, each node's memory address will be a multiple of 16.
 // * Thanks to the free lists and our consistent chunk sizes, we should
 //   end up with very little fragmentation.
 // * Finding a slot for a given node should be very fast: see if the relevant
 //   free list has any openings; if not, try the next size up.
 //
 // Less nice things include:
 // * This system makes it very hard to ever give a page back to the OS.
 //   We could try doing the Mesh Allocator strategy: whenever we allocate
 //   something, assign it to a random slot in the bucket, and then periodically
 //   try to merge two pages into one (by locking and remapping them in the OS)
 //   and then returning the redundant physical page back to the OS. This should
 //   work in theory, but is pretty complicated, and we'd need to schedule it.
 //   Keep in mind that we can't use the Mesh Allocator itself because it returns
 //   usize pointers, which would be too big for us to have 16B nodes.
 //   On the plus side, we could be okay with higher memory usage early on,
 //   and then later use the Mesh strategy to reduce long-running memory usage.
 type ExprBucketSlots = [Expr2; 128];
 #[test]
 fn size_of_expr_bucket() {
    assert_eq!(
        std::mem::size_of::<ExprBucketSlots>(),
        crate::bucket::BUCKET_BYTES
    );
 }
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct PatId {
-    bucket_id: ExprBucketId, // TODO PatBucketId
+    page_id: ExprPoolId, // TODO PatPoolId
-    slot: ExprBucketSlot,    // TODO PatBucketSlot
+    slot: ExprPoolSlot,  // TODO PatPoolSlot
 }
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct ExprId {
-    bucket_id: ExprBucketId,
+    page_id: ExprPoolId,
-    slot: ExprBucketSlot,
+    slot: ExprPoolSlot,
 }
-// We have a maximum of 65K buckets.
+// We have a maximum of 65K pages.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct ExprBucketId(u16);
+pub struct ExprPoolId(u16);
-/// Each of these is the index of one 16B node inside a bucket's 4096B
+/// Each of these is the index of one 16B node inside a page's 4096B
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct ExprBucketSlot(u8);
+pub struct ExprPoolSlot(u8);
 #[test]
 fn size_of_expr() {
--- a/editor/src/lib.rs
+++ b/editor/src/lib.rs
@ -27,7 +27,7 @@ use winit::event::{Event, ModifiersState};
 use winit::event_loop::ControlFlow;
 pub mod ast;
-pub mod bucket;
+pub mod pool;
 mod buffer;
 pub mod file;
 mod keyboard_input;
--- a/editor/src/pool.rs
+++ b/editor/src/pool.rs
@ -10,7 +10,7 @@
 ///
 /// Pages also use the node value 0 (all 0 bits) to mark nodes as unoccupied.
 /// This is important for performance.
-use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
+use libc::{c_void, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
 use std::mem::size_of;
 use std::ptr::null;
@ -18,7 +18,31 @@ pub const NODE_SIZE: usize = 32;
 // Pages are an internal concept which never leave this module.
 const PAGE_BYTES: usize = 4096;
-const NODES_PER_PAGE: usize = PAGE_BYTES / NODE_SIZE;
+const NODES_PER_PAGE: u8 = (PAGE_BYTES / NODE_SIZE) as u8;
 // Each page has 128 slots. Each slot holds one 32B node
 // This means each page is 4096B, which is the size of a memory page
 // on typical systems where the compiler will be run.
 //
 // Nice things about this system include:
 // * Allocating a new page is as simple as asking the OS for a memory page.
 // * Since each node is 32B, each node's memory address will be a multiple of 16.
 // * Thanks to the free lists and our consistent chunk sizes, we should
 //   end up with very little fragmentation.
 // * Finding a slot for a given node should be very fast: see if the relevant
 //   free list has any openings; if not, try the next size up.
 //
 // Less nice things include:
 // * This system makes it very hard to ever give a page back to the OS.
 //   We could try doing the Mesh Allocator strategy: whenever we allocate
 //   something, assign it to a random slot in the page, and then periodically
 //   try to merge two pages into one (by locking and remapping them in the OS)
 //   and then returning the redundant physical page back to the OS. This should
 //   work in theory, but is pretty complicated, and we'd need to schedule it.
 //   Keep in mind that we can't use the Mesh Allocator itself because it returns
 //   usize pointers, which would be too big for us to have 16B nodes.
 //   On the plus side, we could be okay with higher memory usage early on,
 //   and then later use the Mesh strategy to reduce long-running memory usage.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct NodeId<T: Sized>(*const T);
@ -29,114 +53,67 @@ pub struct Pool {
 }
 impl Pool {
    /// Returns a pool with a capacity equal to the given number of 4096-byte pages.
    // pub fn with_pages(pages: usize) {
    //     todo!();
    // }
    // fn find_space_for(&mut self, nodes: u8) -> Result<PageId<T>, ()> {}
-    pub fn add<T: Sized>(&mut self) -> Result<NodeId<T>, ()> {
+    pub fn add<T: Sized>(&mut self, node: T) -> NodeId<T> {
-        let num_pages = self.buckets.len();
+        // It's only safe to store this as a *mut T if T is the size of a node.
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
-        match self.pages.last() {}
+        match self.pages.last_mut() {
            Some(page) if page.node_count < NODES_PER_PAGE => Pool::add_to_page(page, node),
            _ => {
                // This page is either full or doesn't exist, so create a new one.
                let mut page = Page::default();
                let node_id = Pool::add_to_page(&mut page, node);
-        if self.next_unused_node.offset_from(self.first_node) < NODES_PER_PAGE {
+                self.pages.push(page);
            let bucket = Page::default();
-            self.buckets.push(bucket);
+                node_id
-
+            }
            Ok(NodeId(bucket.first_node as *const T))
        } else {
            Err(())
        }
    }
-    fn get_unchecked<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> &'a T {
+    /// Reserves the given number of contiguous node slots, and returns
    /// the NodeId of the first one. We only allow reserving 2^32 in a row.
    fn reserve<T: Sized>(&mut self, _nodes: u32) -> NodeId<T> {
        todo!("Implement Pool::reserve");
    }
    fn add_to_page<T: Sized>(page: &mut Page, node: T) -> NodeId<T> {
        unsafe {
-            self.buckets
+            let node_ptr = (page.first_node as *const T).offset(page.node_count as isize) as *mut T;
-                .get(node_id.bucket_id.value as usize)
+
-                .unwrap()
+            *node_ptr = node;
-                .get_unchecked(node_id.node.value)
+
            page.node_count += 1;
            NodeId(node_ptr)
        }
    }
-    pub fn get<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
+    pub fn get<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> &'a T {
-        self.buckets
+        unsafe { &*node_id.0 }
-            .get(node_id.bucket_id.value as usize)
+    }
-            .and_then(|bucket| bucket.get(node_id.node))
+
    // A node is available iff its bytes are all zeroes
    #[allow(dead_code)]
    unsafe fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
        let ptr = node_id.0 as *const [u8; NODE_SIZE];
        *ptr == [0; NODE_SIZE]
    }
 }
 struct Page {
-    #[allow(dead_code)]
+    first_node: *const [u8; NODE_SIZE],
-    next_unused_node: *const [u8; NODE_SIZE],
+    node_count: u8,
    first_node: *mut [u8; NODE_SIZE],
 }
 impl Page {
    /// If there's room left in the bucket, adds the item and returns
    /// the node where it was put. If there was no room left, returns Err(()).
    #[allow(dead_code)]
    pub fn add<T: Sized>(&mut self, node: T) -> Result<NodeId<T>, ()> {
        // It's only safe to store this as a *const T if T is the size of a node.
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
        // Once next_unused_node exceeds NODES_PER_PAGE, we have no room left.
        if self.next_unused_node <= NODES_PER_PAGE {
            let chosen_node = self.next_unused_node;
            unsafe { *chosen_node = node };
            self.next_unused_node = self.next_unused_node.add(1);
            Ok(NodeId(chosen_node))
        } else {
            // No room left!
            Err(())
        }
    }
    /// If the given node is available, inserts the given node into it.
    /// Otherwise, returns the node that was in the already-occupied node.
    #[allow(dead_code)]
    pub fn insert<T: Sized>(&mut self, node: T, node: NodeId<T>) -> Result<(), &T> {
        // It's only safe to store this as a *const T if T is the size of a node.
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
        let node = node.0;
        unsafe {
            if self.is_available(node) {
                self.put_unchecked(node, node);
                Ok(())
            } else {
                Err(self.get_unchecked(node))
            }
        }
    }
    pub fn get<'a, T: Sized>(&'a self, node: NodeId<T>) -> Option<&'a T> {
        // It's only safe to store this as a *const T if T is the size of a node.
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
        unsafe {
            let node_ptr = self.first_node.offset(node.value as isize) as *const T;
            let value: &[u8; NODE_SIZE] = &*(node_ptr as *const [u8; NODE_SIZE]);
            if *value != [0; NODE_SIZE] {
                Some(&*(value as *const [u8; NODE_SIZE] as *const T))
            } else {
                None
            }
        }
    }
    unsafe fn get_unchecked<T>(&self, node: u8) -> &T {
        &*(self.first_node.offset(node as isize) as *const T)
    }
    // A node is available iff its bytes are all zeroes
    unsafe fn is_available<T>(&self, node_id: NodeId<T>) -> bool {
        debug_assert_eq!(size_of::<T>(), NODE_SIZE);
        *node_id.0 == [0; NODE_SIZE]
    }
 }
 impl Default for Page {
@ -144,7 +121,7 @@ impl Default for Page {
        let first_node = if page_size::get() == 4096 {
            unsafe {
                // mmap exactly one memory page (4096 bytes)
-                mmap(
+                libc::mmap(
                    null::<c_void>() as *mut c_void,
                    PAGE_BYTES,
                    PROT_READ | PROT_WRITE,
@ -157,12 +134,12 @@ impl Default for Page {
            // Somehow the page size is not 4096 bytes, so fall back on calloc.
            // (We use calloc over malloc because we rely on the bytes having
            // been zeroed to tell which nodes are available.)
-            unsafe { calloc(1, PAGE_BYTES) }
+            unsafe { libc::calloc(1, PAGE_BYTES) }
        } as *mut [u8; NODE_SIZE];
        Page {
            next_unused_node: first_node,
            first_node,
            node_count: 0,
        }
    }
 }
@ -171,163 +148,185 @@ impl Drop for Page {
    fn drop(&mut self) {
        if page_size::get() == 4096 {
            unsafe {
-                munmap(self.first_node as *mut c_void, PAGE_BYTES);
+                libc::munmap(self.first_node as *mut c_void, PAGE_BYTES);
            }
        } else {
            unsafe {
-                free(self.first_node as *mut c_void);
+                libc::free(self.first_node as *mut c_void);
            }
        }
    }
 }
 /// A string of at most 2^32 bytes, allocated in a pool if it fits in a Page,
 /// or using malloc as a fallback if not. Like std::str::String, this has
 /// both a length and capacity.
 #[derive(Debug)]
-pub struct PageStr {
+pub struct PoolStr {
    first_node_id: NodeId<()>,
-    first_segment_len: u8,
+    len: u32,
    cap: u32,
 }
 #[test]
-fn size_of_bucket_str() {
+fn pool_str_size() {
-    assert_eq!(std::mem::size_of::<PageList<()>>(), 4);
+    assert_eq!(size_of::<PoolStr>(), size_of::<usize>() + 8);
 }
-/// A non-empty list inside a bucket. It takes 4B of memory.
+/// An array of at most 2^32 elements, allocated in a pool if it fits in a Page,
-///
+/// or using malloc as a fallback if not. Like std::vec::Vec, this has both
-/// This is internally represented as an array of at most 255 nodes, which
+/// a length and capacity.
 /// can grow to 256+ nodes by having the last nodeent be a linked list Cons
 /// cell which points to another such backing array which has more nodes.
 ///
 /// In practice, these will almost be far below 256 nodes, but in theory
 /// they can be enormous in length thanks to the linked list fallback.
 ///
 /// Since these are non-empty lists, we need separate variants for collections
 /// that can be empty, e.g. EmptyRecord and EmptyList. In contrast, we don't
 /// need an EmptyList or EmptyWhen, since although those use PageList
 /// to store their branches, having zero branches is syntactically invalid.
 /// Same with Call and Closure, since all functions must have 1+ arguments.
 #[derive(Debug)]
-pub struct PageList<T: Sized> {
+pub struct PoolVec<T: Sized> {
    first_node_id: NodeId<T>,
-    first_segment_len: u8,
+    len: u32,
    cap: u32,
 }
 #[test]
-fn size_of_bucket_list() {
+fn pool_vec_size() {
-    assert_eq!(std::mem::size_of::<PageList<()>>(), 4);
+    assert_eq!(size_of::<PoolVec<()>>(), size_of::<usize>() + 8);
 }
-impl<'a, T: 'a + Sized> PageList<T> {
+impl<'a, T: 'a + Sized> PoolVec<T> {
-    /// If given a first_segment_len of 0, that means this is a PageList
+    /// If given a slice of length > 128, the first 128 nodes will be stored in
-    /// consisting of 256+ nodes. The first 255 are stored in the usual
+    /// the usual array, and then there's one more node at the end which
-    /// array, and then there's one more nodeent at the end which continues
+    /// continues the list with a new length and NodeId value. PoolVec
-    /// the list with a new length and NodeId value. PageList iterators
+    /// iterators automatically do these jumps behind the scenes when necessary.
-    /// automatically do these jumps behind the scenes when necessary.
+    pub fn new<I: ExactSizeIterator<Item = T>>(nodes: I, pool: &mut Pool) -> Self {
-    pub fn new(first_node_id: NodeId<T>, first_segment_len: u8) -> Self {
+        debug_assert!(nodes.len() <= u32::MAX as usize);
-        PageList {
+        debug_assert!(size_of::<T>() <= NODE_SIZE);
-            first_segment_len,
+
-            first_node_id: first_node_id.bucket_id,
+        let len = nodes.len() as u32;
-            first_node_sl: first_node_id.node,
+
        if len > 0 {
            if len <= NODES_PER_PAGE as u32 {
                let first_node_id = pool.reserve(len);
                let mut next_node_ptr = first_node_id.0 as *mut T;
                for node in nodes {
                    unsafe {
                        *next_node_ptr = node;
                        next_node_ptr = next_node_ptr.offset(1);
                    }
                }
                PoolVec {
                    first_node_id,
                    len,
                    cap: len,
                }
            } else {
                let first_node_ptr = unsafe {
                    // mmap enough memory to hold it
                    libc::mmap(
                        null::<c_void>() as *mut c_void,
                        len as usize,
                        PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS,
                        0,
                        0,
                    )
                };
                PoolVec {
                    first_node_id: NodeId(first_node_ptr as *const T),
                    len,
                    cap: len,
                }
            }
        } else {
            PoolVec {
                first_node_id: NodeId(std::ptr::null()),
                len: 0,
                cap: 0,
            }
        }
    }
-    pub fn into_iter(self, buckets: &'a Pages) -> impl Iterator<Item = &'a T> {
+    pub fn iter(self, pool: &'a Pool) -> impl ExactSizeIterator<Item = &'a T> {
-        self.bucket_list_iter(buckets)
+        self.pool_list_iter(pool)
    }
    /// Private version of into_iter which exposes the implementation detail
-    /// of PageListIter. We don't want that struct to be public, but we
+    /// of PoolVecIter. We don't want that struct to be public, but we
    /// actually do want to have this separate function for code reuse
    /// in the iterator's next() method.
-    fn bucket_list_iter(&self, buckets: &'a Pages) -> PageListIter<'a, T> {
+    #[inline(always)]
-        let first_segment_len = self.first_segment_len;
+    fn pool_list_iter(&self, pool: &'a Pool) -> PoolVecIter<'a, T> {
-        let continues_with_cons = first_segment_len == 0;
+        PoolVecIter {
-        let len_remaining = if continues_with_cons {
+            _pool: pool,
-            // We have 255 nodes followed by a Cons cell continuing the list.
+            current_node_id: NodeId(self.first_node_id.0),
-            u8::MAX
+            len_remaining: self.len,
-        } else {
+        }
-            first_segment_len
+    }
        };
-        PageListIter {
+    pub fn free(self) {
-            continues_with_cons,
+        if self.len <= NODES_PER_PAGE as u32 {
-            len_remaining,
+            // If this was small enough to fit in a Page, then zero it out.
-            bucket_id: self.first_node_id,
+            unsafe {
-            node: self.first_node_sl,
+                libc::memset(
-            buckets,
+                    self.first_node_id.0 as *mut c_void,
                    0,
                    self.len as usize * NODE_SIZE,
                );
            }
        // TODO insert it into the pool's free list
        } else {
            // This was bigger than a Page, so we mmap'd it. Now we free it!
            unsafe {
                libc::munmap(self.first_node_id.0 as *mut c_void, self.len as usize);
            }
        }
    }
 }
-struct PageListIter<'a, T: Sized> {
+struct PoolVecIter<'a, T: Sized> {
    /// This iterator returns elements which have the same lifetime as the pool
    _pool: &'a Pool,
    current_node_id: NodeId<T>,
-    len_remaining: u8,
+    len_remaining: u32,
    continues_with_cons: bool,
    buckets: &'a Pages,
 }
-impl<'a, T: Sized> Iterator for PageListIter<'a, T>
+impl<'a, T: Sized> ExactSizeIterator for PoolVecIter<'a, T>
 where
    T: 'a,
 {
    fn len(&self) -> usize {
        self.len_remaining as usize
    }
 }
 impl<'a, T: Sized> Iterator for PoolVecIter<'a, T>
 where
    T: 'a,
 {
    type Item = &'a T;
    fn next(&mut self) -> Option<Self::Item> {
-        match self.len_remaining {
+        let len_remaining = self.len_remaining;
            0 => match self.continues_with_cons {
                // We're done! This is by far the most common case, so we put
                // it first to avoid branch mispredictions.
                false => None,
                // We need to continue with a Cons cell.
                true => {
                    let node_id = NodeId {
                        bucket_id: self.bucket_id,
                        node: self.node,
                    }
                    .next_node();
-                    // Since we have continues_with_cons set, the next node
+        if len_remaining > 1 {
-                    // will definitely be occupied with a PageList struct.
+            // Get the current node
-                    let node = self.buckets.get_unchecked(node_id);
+            let node_ptr = self.current_node_id.0;
                    let next_list = unsafe { &*(node as *const T as *const PageList<T>) };
-                    // Replace the current iterator with an iterator into that
+            // Advance the node pointer to the next node in the current page
-                    // list, and then continue with next() on that iterator.
+            self.current_node_id = NodeId(unsafe { node_ptr.offset(1) });
-                    let next_iter = next_list.bucket_list_iter(self.buckets);
+            self.len_remaining = len_remaining - 1;
-                    self.bucket_id = next_iter.bucket_id;
+            Some(unsafe { &*node_ptr })
-                    self.node = next_iter.node;
+        } else if len_remaining == 1 {
-                    self.len_remaining = next_iter.len_remaining;
+            self.len_remaining = 0;
                    self.continues_with_cons = next_iter.continues_with_cons;
-                    self.next()
+            // Don't advance the node pointer's node, because that might
-                }
+            // advance past the end of the page!
            },
            1 => {
                self.len_remaining = 0;
-                // Don't advance the node pointer's node, because that might
+            Some(unsafe { &*self.current_node_id.0 })
-                // advance past the end of the bucket!
+        } else {
-
+            // len_remaining was 0
-                Some(self.buckets.get_unchecked(NodeId {
+            None
                    bucket_id: self.bucket_id,
                    node: self.node,
                }))
            }
            len_remaining => {
                // Get the current node
                let node_id = NodeId {
                    bucket_id: self.bucket_id,
                    node: self.node,
                };
                let node = self.buckets.get_unchecked(node_id);
                // Advance the node pointer to the next node in the current bucket
                self.node = self.node.increment();
                self.len_remaining = len_remaining - 1;
                Some(node)
            }
        }
    }
 }