mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 23:04:49 +00:00
Revise a bunch of bucket stuff
This commit is contained in:
parent
bf7f1d49e2
commit
b43ff799ff
3 changed files with 379 additions and 79 deletions
|
@ -169,9 +169,9 @@ pub struct Field {
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Recursive {
|
pub enum Recursive {
|
||||||
Recursive,
|
NotRecursive = 0,
|
||||||
TailRecursive,
|
Recursive = 1,
|
||||||
NotRecursive,
|
TailRecursive = 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
|
|
@ -243,75 +243,209 @@ pub enum IntStyle {
|
||||||
Binary,
|
Binary,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Experimental idea for an Expr that fits in 16B
|
/// Experimental idea for an Expr that fits in 16B.
|
||||||
|
/// It has a 1B discriminant and variants which hold payloads of at most 15B.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Expr2 {
|
pub enum Expr2 {
|
||||||
/// An integer literal (without a dot)
|
/// A number literal (without a dot) containing no underscores
|
||||||
Num {
|
Num {
|
||||||
number: i64,
|
number: i64, // 8B
|
||||||
var: Variable,
|
var: Variable, // 4B
|
||||||
style: IntStyle,
|
style: IntStyle, // 1B
|
||||||
},
|
},
|
||||||
/// A floating-point literal (with a dot)
|
/// A floating-point literal (with a dot) containing no underscores
|
||||||
Float {
|
Float {
|
||||||
number: f64,
|
number: f64, // 8B
|
||||||
var: Variable,
|
var: Variable, // 4B
|
||||||
},
|
},
|
||||||
/// A formatted integer literal (containing underscores)
|
/// A number literal (without a dot) containing underscores
|
||||||
FormattedInt {
|
NumWithUnderscores {
|
||||||
text_bytes: *const u8,
|
number: i64, // 8B
|
||||||
text_len: u8, // numeric literals can be at most 255 chars long
|
var: Variable, // 4B
|
||||||
var: Variable,
|
text: NodeId<BucketStr>, // 3B
|
||||||
style: IntStyle,
|
|
||||||
},
|
},
|
||||||
/// A formatted float literal (containing underscores)
|
/// A float literal (with a dot) containing underscores
|
||||||
FormattedFloat {
|
FloatWithUnderscores {
|
||||||
text_bytes: *const u8,
|
number: f64, // 8B
|
||||||
text_len: u8, // numeric literals can be at most 255 chars long
|
var: Variable, // 4B
|
||||||
var: Variable,
|
text: NodeId<BucketStr>, // 3B
|
||||||
},
|
},
|
||||||
SmallStr(ArrayString<U14>),
|
/// string literals of length up to 14B
|
||||||
|
SmallStr(ArrayString<U14>), // 15B
|
||||||
|
/// string literals of length up to 4094B
|
||||||
|
MedStr(NodeId<BucketStr>), // 4B
|
||||||
|
/// string literals of length over 4094B, but requires calling malloc/free
|
||||||
BigStr {
|
BigStr {
|
||||||
bytes: *const u8,
|
pointer: *const u8, // 8B on 64-bit systems
|
||||||
len: u32, // string literals can be at most 2^32 (~4 billion) bytes long
|
len: u32, // 4B, meaning maximum string literal size of 4GB. Could theoretically fit 7B here, which would go up to the full isize::MAX
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Lookups
|
||||||
|
Var(Symbol), // 8B
|
||||||
|
|
||||||
|
/// Separate from List because BuckeList must be non-empty, and in this case
|
||||||
|
/// the list literal has no elements
|
||||||
|
EmptyList {
|
||||||
|
list_var: Variable, // 4B - required for uniqueness of the list
|
||||||
|
elem_var: Variable, // 4B
|
||||||
|
},
|
||||||
|
|
||||||
|
List {
|
||||||
|
list_var: Variable, // 4B - required for uniqueness of the list
|
||||||
|
elem_var: Variable, // 4B
|
||||||
|
elems: BucketList<Expr2>, // 4B
|
||||||
|
},
|
||||||
|
|
||||||
If {
|
If {
|
||||||
cond_var: Variable,
|
cond_var: Variable, // 4B
|
||||||
expr_var: Variable,
|
expr_var: Variable, // 4B
|
||||||
// Each branch is an (Expr, Expr) tuple.
|
branches: BucketList<(Expr2, Expr2)>, // 4B
|
||||||
// Make sure to put them in the bucket contiguously.
|
final_else: NodeId<Expr2>, // 3B
|
||||||
first_branch: ExprId,
|
|
||||||
num_branches: u8,
|
|
||||||
final_else: ExprId,
|
|
||||||
},
|
},
|
||||||
When {
|
When {
|
||||||
cond_var: Variable,
|
cond_var: Variable, // 4B
|
||||||
expr_var: Variable,
|
expr_var: Variable, // 4B
|
||||||
cond: ExprId,
|
branches: BucketList<WhenBranch>, // 4B
|
||||||
// Make sure to put these branches
|
cond: ExprId, // 3B
|
||||||
// in the bucket contiguously.
|
|
||||||
first_branch: WhenBranchId,
|
|
||||||
num_branches: u8,
|
|
||||||
},
|
},
|
||||||
|
LetRec {
|
||||||
|
// TODO need to make this Alias type here bucket-friendly, which will be hard!
|
||||||
|
aliases: BucketList<(Symbol, Alias)>, // 4B
|
||||||
|
defs: BucketList<Def>, // 4B
|
||||||
|
body_var: Variable, // 4B
|
||||||
|
body: NodeId<Expr2>, // 3B
|
||||||
|
},
|
||||||
|
LetNonRec {
|
||||||
|
// TODO need to make this Alias type here bucket-friendly, which will be hard!
|
||||||
|
aliases: BucketList<(Symbol, Alias)>, // 4B
|
||||||
|
def: NodeId<Def>, // 3B
|
||||||
|
body: NodeId<Expr2>, // 3B
|
||||||
|
body_var: Variable, // 4B
|
||||||
|
},
|
||||||
|
Call {
|
||||||
|
/// NOTE: the first elem in this list is the expression and its variable.
|
||||||
|
/// The others are arguments. This is because we didn't have room for
|
||||||
|
/// both the expr and its variable otherwise.
|
||||||
|
expr_and_args: BucketList<(Variable, NodeId<Expr2>)>, // 4B
|
||||||
|
fn_var: Variable, // 4B
|
||||||
|
closure_var: Variable, // 4B
|
||||||
|
called_via: CalledVia, // 1B
|
||||||
|
/// Cached outside expr_and_args so we don't have to potentially
|
||||||
|
/// traverse that whole linked list chain to count all the args.
|
||||||
|
arity: u16, // 2B
|
||||||
|
},
|
||||||
|
RunLowLevel {
|
||||||
|
op: LowLevel, // 1B
|
||||||
|
args: BucketList<(Variable, NodeId<Expr2>)>, // 4B
|
||||||
|
ret_var: Variable, // 4B
|
||||||
|
},
|
||||||
|
Closure {
|
||||||
|
/// NOTE: the first elem in this list is the function's name Symbol, plus Variable::NONE
|
||||||
|
///
|
||||||
|
/// This is not ideal, but there's no room for an 8-byte Symbol
|
||||||
|
/// in a 16B node that already needs to hold this much other data.
|
||||||
|
captured_symbols: BucketList<(Symbol, Variable)>, // 4B
|
||||||
|
args: BucketList<(Variable, NodeId<Pat2>)>, // 4B
|
||||||
|
body: NodeId<Expr2>, // 3B
|
||||||
|
recursive: Recursive, // 1B
|
||||||
|
vars: NodeId<ClosureVars>, // 3B
|
||||||
|
},
|
||||||
|
|
||||||
|
// Product Types
|
||||||
|
Record {
|
||||||
|
record_var: Variable, // 4B
|
||||||
|
fields: BucketList<(BucketStr, Variable, NodeId<Expr2>)>, // 4B
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Empty record constant
|
||||||
|
EmptyRecord,
|
||||||
|
|
||||||
|
/// Look up exactly one field on a record, e.g. (expr).foo.
|
||||||
|
Access {
|
||||||
|
field: NodeId<BucketStr>, // 3B
|
||||||
|
expr: NodeId<Expr2>, // 3B
|
||||||
|
vars: NodeId<AccessVars>, // 3B
|
||||||
|
},
|
||||||
|
|
||||||
|
/// field accessor as a function, e.g. (.foo) expr
|
||||||
|
Accessor {
|
||||||
|
record_vars: NodeId<RecordVars>, // 3B
|
||||||
|
function_var: Variable, // 4B
|
||||||
|
closure_var: Variable, // 4B
|
||||||
|
field: NodeId<BucketStr>, // 3B
|
||||||
|
},
|
||||||
|
Update {
|
||||||
|
symbol: Symbol, // 8B
|
||||||
|
updates: BucketList<(Lowercase, Field)>, // 4B
|
||||||
|
vars: NodeId<UpdateVars>, // 3B
|
||||||
|
},
|
||||||
|
|
||||||
|
// Sum Types
|
||||||
|
Tag {
|
||||||
|
// NOTE: A BucketStr node is a 2B length and then 14B bytes,
|
||||||
|
// plus more bytes in adjacent nodes if necessary. Thus we have
|
||||||
|
// a hard cap of 4094 bytes as the maximum length of tags and fields.
|
||||||
|
name: NodeId<BucketStr>, // 3B
|
||||||
|
variant_var: Variable, // 4B
|
||||||
|
ext_var: Variable, // 4B
|
||||||
|
arguments: BucketList<(Variable, NodeId<Expr2>)>, // 4B
|
||||||
|
},
|
||||||
|
|
||||||
|
// Compiles, but will crash if reached
|
||||||
|
RuntimeError(RuntimeError),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct Def {
|
||||||
|
pub pattern: NodeId<Pat2>, // 3B
|
||||||
|
pub expr: NodeId<Expr2>, // 3B
|
||||||
|
// TODO maybe need to combine these vars behind a pointer?
|
||||||
|
pub expr_var: Variable, // 4B
|
||||||
|
pub pattern_vars: BucketList<(Symbol, Variable)>, // 4B
|
||||||
|
// TODO how big is an annotation? What about an Option<Annotation>?
|
||||||
|
pub annotation: Option<Annotation>, // ???
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
pub struct WhenBranchId {
|
enum Pat2 {
|
||||||
/// TODO: WhenBranchBucketId
|
Todo,
|
||||||
bucket_id: ExprBucketId,
|
}
|
||||||
/// TODO: WhenBranchBucketSlot
|
|
||||||
slot: ExprBucketSlot,
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
struct UpdateVars {
|
||||||
|
record_var: Variable, // 4B
|
||||||
|
ext_var: Variable, // 4B
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
struct RecordVars {
|
||||||
|
record_var: Variable, // 4B
|
||||||
|
ext_var: Variable, // 4B
|
||||||
|
field_var: Variable, // 4B
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is 15B, so it fits in a Node slot.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
struct AccessVars {
|
||||||
|
record_var: Variable, // 4B
|
||||||
|
ext_var: Variable, // 4B
|
||||||
|
field_var: Variable, // 4B
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is 16B, so it fits in a Node slot.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
struct ClosureVars {
|
||||||
|
function_type: Variable,
|
||||||
|
closure_type: Variable,
|
||||||
|
closure_ext_var: Variable,
|
||||||
|
return_type: Variable,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub struct WhenBranch {
|
pub struct WhenBranch {
|
||||||
/// TODO: what if each branch had exactly 1 pattern?
|
pub patterns: BucketList<Pat2>, // 4B
|
||||||
/// That would save us 1B from storing the length.
|
pub body: NodeId<Expr2>, // 3B
|
||||||
pub first_pattern: PatternId,
|
pub guard: Option<NodeId<Expr2>>, // 4B
|
||||||
pub num_patterns: u8,
|
|
||||||
pub body: ExprId,
|
|
||||||
/// TODO: should we have an ExprId::NULL for this?
|
|
||||||
pub guard: Option<ExprId>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
@ -397,6 +531,12 @@ fn size_of_expr_bucket() {
|
||||||
assert_eq!(std::mem::size_of::<ExprBucketSlots>(), 4096);
|
assert_eq!(std::mem::size_of::<ExprBucketSlots>(), 4096);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
pub struct PatId {
|
||||||
|
bucket_id: ExprBucketId, // TODO PatBucketId
|
||||||
|
slot: ExprBucketSlot, // TODO PatBucketSlot
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
pub struct ExprId {
|
pub struct ExprId {
|
||||||
bucket_id: ExprBucketId,
|
bucket_id: ExprBucketId,
|
||||||
|
|
|
@ -1,23 +1,40 @@
|
||||||
/// A bucket
|
/// A bucket of 16-byte nodes. The node value 0 is reserved for the bucket's
|
||||||
|
/// use, and valid nodes may never have that value.
|
||||||
|
///
|
||||||
|
/// By design, each bucket is 4096 bytes large. When you make a bucket, it
|
||||||
|
/// uses mmap to reserve one anonymous memory page in which to store nodes.
|
||||||
|
/// Since nodes are 16 bytes, one bucket can store 256 nodes; you can access
|
||||||
|
/// a particular node by its BucketSlot, which is an opaque wrapper around a u8.
|
||||||
|
///
|
||||||
|
/// Buckets also use the node value 0 (all 0 bits) to mark slots as unoccupied.
|
||||||
|
/// This is important for performance.
|
||||||
use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
|
use libc::{c_void, calloc, free, mmap, munmap, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::mem::{self, size_of};
|
use std::mem::{self, size_of};
|
||||||
use std::ptr::null;
|
use std::ptr::null;
|
||||||
use std::{u16, u8};
|
|
||||||
|
|
||||||
const BUCKET_BYTES: usize = 4096;
|
const BUCKET_BYTES: usize = 4096;
|
||||||
|
|
||||||
pub struct NodeId<T> {
|
pub struct NodeId<T: Sized> {
|
||||||
pub bucket_id: BucketId<T>,
|
pub bucket_id: BucketId<T>,
|
||||||
pub slot: BucketSlot<T>,
|
pub slot: BucketSlot<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BucketId<T> {
|
impl<T: Sized> NodeId<T> {
|
||||||
|
fn next_slot(&self) -> Self {
|
||||||
|
NodeId {
|
||||||
|
bucket_id: self.bucket_id,
|
||||||
|
slot: self.slot.increment(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BucketId<T: Sized> {
|
||||||
value: u16,
|
value: u16,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> BucketId<T> {
|
impl<T: Sized> BucketId<T> {
|
||||||
fn from_u16(value: u16) -> Self {
|
fn from_u16(value: u16) -> Self {
|
||||||
BucketId {
|
BucketId {
|
||||||
value,
|
value,
|
||||||
|
@ -26,26 +43,36 @@ impl<T> BucketId<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BucketSlot<T> {
|
pub struct BucketSlot<T: Sized> {
|
||||||
value: u8,
|
value: u8,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> BucketSlot<T> {
|
impl<T: Sized> BucketSlot<T> {
|
||||||
fn from_u8(value: u8) -> Self {
|
fn from_u8(value: u8) -> Self {
|
||||||
BucketSlot {
|
BucketSlot {
|
||||||
value,
|
value,
|
||||||
_phantom: PhantomData::default(),
|
_phantom: PhantomData::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn increment(&self) -> Self {
|
||||||
|
BucketSlot {
|
||||||
|
value: self.value + 1,
|
||||||
|
_phantom: PhantomData::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Buckets<T> {
|
pub struct Buckets {
|
||||||
buckets: Vec<Bucket<T>>,
|
buckets: Vec<Bucket>,
|
||||||
|
// free_1node_slots: Vec<NodeId<T>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Buckets<T> {
|
impl Buckets {
|
||||||
pub fn add(&mut self) -> Result<BucketId<T>, ()> {
|
// fn find_space_for(&mut self, nodes: u8) -> Result<BucketId<T>, ()> {}
|
||||||
|
|
||||||
|
pub fn add<T: Sized>(&mut self, node: T) -> Result<BucketId<T>, ()> {
|
||||||
let num_buckets = self.buckets.len();
|
let num_buckets = self.buckets.len();
|
||||||
|
|
||||||
if num_buckets <= u16::MAX as usize {
|
if num_buckets <= u16::MAX as usize {
|
||||||
|
@ -59,23 +86,33 @@ impl<T> Buckets<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get<'a>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
|
fn get_unchecked<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> &'a T {
|
||||||
|
self.buckets
|
||||||
|
.get(node_id.bucket_id.value as usize)
|
||||||
|
.unwrap()
|
||||||
|
.get_unchecked(node_id.slot.value)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get<'a, T: Sized>(&'a self, node_id: NodeId<T>) -> Option<&'a T> {
|
||||||
self.buckets
|
self.buckets
|
||||||
.get(node_id.bucket_id.value as usize)
|
.get(node_id.bucket_id.value as usize)
|
||||||
.and_then(|bucket| bucket.get(node_id.slot))
|
.and_then(|bucket| bucket.get(node_id.slot))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Bucket<T> {
|
struct Bucket {
|
||||||
next_unused_slot: u16,
|
next_unused_slot: u16,
|
||||||
first_slot: *mut T,
|
first_slot: *mut [u8; 16],
|
||||||
_phantom: PhantomData<T>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Bucket<T> {
|
impl Bucket {
|
||||||
/// If there's room left in the bucket, adds the item and returns
|
/// If there's room left in the bucket, adds the item and returns
|
||||||
/// the slot where it was put. If there was no room left, returns Err(()).
|
/// the slot where it was put. If there was no room left, returns Err(()).
|
||||||
pub fn add(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
|
pub fn add<T: Sized>(&mut self, node: T) -> Result<BucketSlot<T>, ()> {
|
||||||
|
// It's only safe to store this as a *const T if T is 16 bytes.
|
||||||
|
// This is designed to be used exclusively with 16-byte nodes!
|
||||||
|
debug_assert_eq!(size_of::<T>(), 16);
|
||||||
|
|
||||||
// Once next_unused_slot exceeds u8::MAX, we have no room left.
|
// Once next_unused_slot exceeds u8::MAX, we have no room left.
|
||||||
if self.next_unused_slot <= u8::MAX as u16 {
|
if self.next_unused_slot <= u8::MAX as u16 {
|
||||||
let chosen_slot = self.next_unused_slot as u8;
|
let chosen_slot = self.next_unused_slot as u8;
|
||||||
|
@ -92,7 +129,11 @@ impl<T> Bucket<T> {
|
||||||
|
|
||||||
/// If the given slot is available, inserts the given node into it.
|
/// If the given slot is available, inserts the given node into it.
|
||||||
/// Otherwise, returns the node that was in the already-occupied slot.
|
/// Otherwise, returns the node that was in the already-occupied slot.
|
||||||
pub fn insert(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
|
pub fn insert<T: Sized>(&mut self, node: T, slot: BucketSlot<T>) -> Result<(), &T> {
|
||||||
|
// It's only safe to use this if T is 16 bytes.
|
||||||
|
// This is designed to be used exclusively with 16-byte nodes!
|
||||||
|
debug_assert_eq!(size_of::<T>(), 16);
|
||||||
|
|
||||||
let slot = slot.value;
|
let slot = slot.value;
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -106,7 +147,11 @@ impl<T> Bucket<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get<'a>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
|
pub fn get<'a, T: Sized>(&'a self, slot: BucketSlot<T>) -> Option<&'a T> {
|
||||||
|
// It's only safe to store this as a *const T if T is 16 bytes.
|
||||||
|
// This is designed to be used exclusively with 16-byte nodes!
|
||||||
|
debug_assert_eq!(size_of::<T>(), 16);
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
|
let slot_ptr = self.first_slot.offset(slot.value as isize) as *const T;
|
||||||
let value = &*slot_ptr;
|
let value = &*slot_ptr;
|
||||||
|
@ -119,13 +164,17 @@ impl<T> Bucket<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn put_unchecked(&mut self, node: T, slot: u8) {
|
unsafe fn put_unchecked<T: Sized>(&mut self, node: T, slot: u8) {
|
||||||
|
// It's only safe to store this as a *const T if T is 16 bytes.
|
||||||
|
// This is designed to be used exclusively with 16-byte nodes!
|
||||||
|
debug_assert_eq!(size_of::<T>(), 16);
|
||||||
|
|
||||||
let slot_ptr = self.first_slot.offset(slot as isize);
|
let slot_ptr = self.first_slot.offset(slot as isize);
|
||||||
|
|
||||||
*slot_ptr = node;
|
*slot_ptr = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn get_unchecked<'a>(&'a self, slot: u8) -> &'a T {
|
unsafe fn get_unchecked<'a, T>(&'a self, slot: u8) -> &'a T {
|
||||||
&*self.first_slot.offset(slot as isize)
|
&*self.first_slot.offset(slot as isize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,12 +186,8 @@ impl<T> Bucket<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for Bucket<T> {
|
impl Default for Bucket {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
// It's only safe to store this as a *const T if T is 16 bytes.
|
|
||||||
// This is designed to be used exclusively with 16-byte nodes!
|
|
||||||
debug_assert_eq!(size_of::<T>(), 16);
|
|
||||||
|
|
||||||
let first_slot = if page_size::get() == 4096 {
|
let first_slot = if page_size::get() == 4096 {
|
||||||
unsafe {
|
unsafe {
|
||||||
// mmap exactly one memory page (4096 bytes)
|
// mmap exactly one memory page (4096 bytes)
|
||||||
|
@ -160,7 +205,7 @@ impl<T> Default for Bucket<T> {
|
||||||
// (We use calloc over malloc because we rely on the bytes having
|
// (We use calloc over malloc because we rely on the bytes having
|
||||||
// been zeroed to tell which slots are available.)
|
// been zeroed to tell which slots are available.)
|
||||||
unsafe { calloc(1, BUCKET_BYTES) }
|
unsafe { calloc(1, BUCKET_BYTES) }
|
||||||
} as *mut T;
|
} as *mut [u8; 16];
|
||||||
|
|
||||||
Bucket {
|
Bucket {
|
||||||
next_unused_slot: 0,
|
next_unused_slot: 0,
|
||||||
|
@ -170,7 +215,7 @@ impl<T> Default for Bucket<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Drop for Bucket<T> {
|
impl Drop for Bucket {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if page_size::get() == 4096 {
|
if page_size::get() == 4096 {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -183,3 +228,118 @@ impl<T> Drop for Bucket<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A non-empty list inside a bucket. It takes 4B of memory.
|
||||||
|
///
|
||||||
|
/// This is internally represented as an array of at most 255 nodes, which
|
||||||
|
/// can grow to 256+ nodes by having the last nodeent be a linked list Cons
|
||||||
|
/// cell which points to another such backing array which has more nodes.
|
||||||
|
///
|
||||||
|
/// In practice, these will almost be far below 256 nodes, but in theory
|
||||||
|
/// they can be enormous in length thanks to the linked list fallback.
|
||||||
|
///
|
||||||
|
/// Since these are non-empty lists, we need separate variants for collections
|
||||||
|
/// that can be empty, e.g. EmptyRecord and EmptyList. In contrast, we don't
|
||||||
|
/// need an EmptyList or EmptyWhen, since although those use BucketList
|
||||||
|
/// to store their branches, having zero branches is syntactically invalid.
|
||||||
|
/// Same with Call and Closure, since all functions must have 1+ arguments.
|
||||||
|
pub struct BucketList<T: Sized> {
|
||||||
|
first_node_id: NodeId<T>,
|
||||||
|
first_segment_len: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Sized> BucketList<T> {
|
||||||
|
/// If given a first_segment_len of 0, that means this is a BucketList
|
||||||
|
/// consisting of 256+ nodes. The first 255 are stored in the usual
|
||||||
|
/// array, and then there's one more nodeent at the end which continues
|
||||||
|
/// the list with a new length and NodeId value. BucketList iterators
|
||||||
|
/// automatically do these jumps behind the scenes when necessary.
|
||||||
|
pub fn new(first_node_id: NodeId<T>, first_segment_len: u8) -> Self {
|
||||||
|
BucketList {
|
||||||
|
first_segment_len,
|
||||||
|
first_node_id,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_iter<'a>(self, buckets: &'a Buckets) -> impl Iterator<Item = &'a T> {
|
||||||
|
self.into_bucket_list_iter(buckets)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Private version of into_iter which exposes the implementation detail
|
||||||
|
/// of BucketListIter. We don't want that struct to be public, but we
|
||||||
|
/// actually do want to have this separate function for code reuse
|
||||||
|
/// in the iterator's next() method.
|
||||||
|
fn into_bucket_list_iter<'a>(self, buckets: &'a Buckets) -> BucketListIter<'a, T> {
|
||||||
|
let first_segment_len = self.first_segment_len;
|
||||||
|
let continues_with_cons = first_segment_len == 0;
|
||||||
|
let len_remaining = if continues_with_cons {
|
||||||
|
// We have 255 nodes followed by a Cons cell continuing the list.
|
||||||
|
u8::MAX
|
||||||
|
} else {
|
||||||
|
first_segment_len
|
||||||
|
};
|
||||||
|
|
||||||
|
BucketListIter {
|
||||||
|
continues_with_cons,
|
||||||
|
len_remaining,
|
||||||
|
node_id: self.first_node_id,
|
||||||
|
buckets,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BucketListIter<'a, T: Sized> {
|
||||||
|
node_id: NodeId<T>,
|
||||||
|
len_remaining: u8,
|
||||||
|
continues_with_cons: bool,
|
||||||
|
buckets: &'a Buckets,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: Sized> Iterator for BucketListIter<'a, T> {
|
||||||
|
type Item = &'a T;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.len_remaining {
|
||||||
|
0 => match self.continues_with_cons {
|
||||||
|
// We're done! This is by far the most common case, so we put
|
||||||
|
// it first to avoid branch mispredictions.
|
||||||
|
False => None,
|
||||||
|
// We need to continue with a Cons cell.
|
||||||
|
True => {
|
||||||
|
// Since we have continues_with_cons set, the next slot
|
||||||
|
// will definitely be occupied with a BucketList struct.
|
||||||
|
let node = self.buckets.get_unchecked(self.node_id.next_slot());
|
||||||
|
let next_list = unsafe { mem::transmute::<&T, &BucketList<T>>(node) };
|
||||||
|
|
||||||
|
// Replace the current iterator with an iterator into that
|
||||||
|
// list, and then continue with next() on that iterator.
|
||||||
|
let next_iter = next_list.into_bucket_list_iter(self.buckets);
|
||||||
|
|
||||||
|
self.node_id = next_iter.node_id;
|
||||||
|
self.len_remaining = next_iter.len_remaining;
|
||||||
|
self.continues_with_cons = next_iter.continues_with_cons;
|
||||||
|
|
||||||
|
self.next()
|
||||||
|
}
|
||||||
|
},
|
||||||
|
1 => {
|
||||||
|
self.len_remaining = 0;
|
||||||
|
|
||||||
|
// Don't advance the node pointer's slot, because that might
|
||||||
|
// advance past the end of the bucket!
|
||||||
|
|
||||||
|
Some(self.buckets.get_unchecked(self.node_id))
|
||||||
|
}
|
||||||
|
len_remaining => {
|
||||||
|
// Get the current node
|
||||||
|
let node = self.buckets.get_unchecked(self.node_id);
|
||||||
|
|
||||||
|
// Advance the node pointer to the next slot in the current bucket
|
||||||
|
self.node_id = self.node_id.next_slot();
|
||||||
|
self.len_remaining = len_remaining - 1;
|
||||||
|
|
||||||
|
Some(node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue