Add support for interning normalized recursive layouts

This commit is contained in:
Ayaz Hafiz 2023-01-20 16:45:29 -06:00
parent 8750127111
commit 7169d0974d
No known key found for this signature in database
GPG key ID: 0E2A37416A25EF58

View file

@ -5,6 +5,7 @@ use std::{
sync::Arc,
};
use bumpalo::Bump;
use parking_lot::{Mutex, RwLock};
use roc_builtins::bitcode::{FloatWidth, IntWidth};
use roc_collections::{default_hasher, BumpMap};
@ -139,6 +140,11 @@ pub trait LayoutInterner<'a>: Sized {
representation: InLayout<'a>,
) -> LambdaSet<'a>;
/// Inserts a recursive layout into the interner.
/// Takes a normalized recursive layout with the recursion pointer set to [Layout::VOID].
/// Will update the RecursivePointer as appropriate during insertion.
fn insert_recursive(&mut self, arena: &'a Bump, normalized_layout: Layout<'a>) -> InLayout<'a>;
/// Retrieves a value from the interner.
fn get(&self, key: InLayout<'a>) -> Layout<'a>;
@ -301,6 +307,14 @@ pub struct STLayoutInterner<'a> {
target_info: TargetInfo,
}
/// Interner constructed with an exclusive lock over [GlobalLayoutInterner]
struct LockedGlobalInterner<'a, 'r> {
map: &'r mut BumpMap<Layout<'a>, InLayout<'a>>,
normalized_lambda_set_map: &'r mut BumpMap<LambdaSet<'a>, LambdaSet<'a>>,
vec: &'r mut Vec<Layout<'a>>,
target_info: TargetInfo,
}
/// Generic hasher for a value, to be used by all interners.
///
/// This uses the [default_hasher], so interner maps should also rely on [default_hasher].
@ -426,6 +440,52 @@ impl<'a> GlobalLayoutInterner<'a> {
}
}
fn get_or_insert_hashed_normalized_recursive(
&self,
arena: &'a Bump,
normalized: Layout<'a>,
normalized_hash: u64,
) -> WrittenGlobalRecursive<'a> {
let mut map = self.0.map.lock();
if let Some((_, &interned)) = map
.raw_entry()
.from_key_hashed_nocheck(normalized_hash, &normalized)
{
let full_layout = self.0.vec.read()[interned.0];
return WrittenGlobalRecursive {
interned_layout: interned,
full_layout,
};
}
let mut vec = self.0.vec.write();
let mut normalized_lambda_set_map = self.0.normalized_lambda_set_map.lock();
let slot = unsafe { InLayout::from_index(vec.len()) };
vec.push(Layout::VOID_NAKED);
let mut interner = LockedGlobalInterner {
map: &mut map,
normalized_lambda_set_map: &mut normalized_lambda_set_map,
vec: &mut vec,
target_info: self.0.target_info,
};
let full_layout = reify::reify_recursive_layout(arena, &mut interner, slot, normalized);
vec[slot.0] = full_layout;
let _old = map.insert(normalized, slot);
debug_assert!(_old.is_none());
let _old_full_layout = map.insert(full_layout, slot);
debug_assert!(_old_full_layout.is_none());
WrittenGlobalRecursive {
interned_layout: slot,
full_layout,
}
}
fn get(&self, interned: InLayout<'a>) -> Layout<'a> {
let InLayout(index, _) = interned;
self.0.vec.read()[index]
@ -441,6 +501,11 @@ struct WrittenGlobalLambdaSet<'a> {
full_layout: Layout<'a>,
}
struct WrittenGlobalRecursive<'a> {
interned_layout: InLayout<'a>,
full_layout: Layout<'a>,
}
impl<'a> TLLayoutInterner<'a> {
/// Records an interned value in thread-specific storage, for faster access on lookups.
fn record(&self, key: Layout<'a>, interned: InLayout<'a>) {
@ -514,6 +579,42 @@ impl<'a> LayoutInterner<'a> for TLLayoutInterner<'a> {
full_lambda_set
}
fn insert_recursive(&mut self, arena: &'a Bump, normalized_layout: Layout<'a>) -> InLayout<'a> {
// - Check if the normalized layout already has an interned slot. If it does we're done, since no
// recursive layout would ever have have VOID as the recursion pointer.
// - If not, allocate a slot and compute the recursive layout with the recursion pointer
// resolving to the new slot.
// - Point the resolved and normalized layout to the new slot.
let global = &self.parent;
let normalized_hash = hash(normalized_layout);
let mut new_interned_full_layout = None;
let (&mut normalized_layout, &mut interned) = self
.map
.raw_entry_mut()
.from_key_hashed_nocheck(normalized_hash, &normalized_layout)
.or_insert_with(|| {
let WrittenGlobalRecursive {
interned_layout,
full_layout,
} = global.get_or_insert_hashed_normalized_recursive(
arena,
normalized_layout,
normalized_hash,
);
// The new filled-in layout isn't present in our thread; make sure it is for future
// reference.
new_interned_full_layout = Some(full_layout);
(normalized_layout, interned_layout)
});
self.record(normalized_layout, interned);
if let Some(full_layout) = new_interned_full_layout {
self.record(full_layout, interned);
}
interned
}
fn get(&self, key: InLayout<'a>) -> Layout<'a> {
if let Some(Some(value)) = self.vec.borrow().get(key.0) {
return *value;
@ -565,63 +666,252 @@ impl<'a> STLayoutInterner<'a> {
}
}
impl<'a> LayoutInterner<'a> for STLayoutInterner<'a> {
fn insert(&mut self, value: Layout<'a>) -> InLayout<'a> {
let hash = hash(value);
let (_, interned) = self
.map
.raw_entry_mut()
.from_key_hashed_nocheck(hash, &value)
.or_insert_with(|| {
let interned = InLayout(self.vec.len(), Default::default());
self.vec.push(value);
(value, interned)
});
*interned
macro_rules! st_impl {
($($lt:lifetime)? $interner:ident) => {
impl<'a$(, $lt)?> LayoutInterner<'a> for $interner<'a$(, $lt)?> {
fn insert(&mut self, value: Layout<'a>) -> InLayout<'a> {
let hash = hash(value);
let (_, interned) = self
.map
.raw_entry_mut()
.from_key_hashed_nocheck(hash, &value)
.or_insert_with(|| {
let interned = InLayout(self.vec.len(), Default::default());
self.vec.push(value);
(value, interned)
});
*interned
}
fn insert_lambda_set(
&mut self,
args: &'a &'a [InLayout<'a>],
ret: InLayout<'a>,
set: &'a &'a [(Symbol, &'a [InLayout<'a>])],
representation: InLayout<'a>,
) -> LambdaSet<'a> {
// IDEA:
// - check if the "normalized" lambda set (with a void full_layout slot) maps to an
// inserted lambda set
// - if so, use that one immediately
// - otherwise, allocate a new slot, intern the lambda set, and then fill the slot in
let normalized_lambda_set =
make_normalized_lamdba_set(args, ret, set, representation);
if let Some(lambda_set) = self.normalized_lambda_set_map.get(&normalized_lambda_set)
{
return *lambda_set;
}
// This lambda set must be new to the interner, reserve a slot and fill it in.
let slot = unsafe { InLayout::from_index(self.vec.len()) };
let lambda_set = LambdaSet {
args,
ret,
set,
representation,
full_layout: slot,
};
let filled_slot = self.insert(Layout::LambdaSet(lambda_set));
assert_eq!(slot, filled_slot);
self.normalized_lambda_set_map
.insert(normalized_lambda_set, lambda_set);
lambda_set
}
fn insert_recursive(
&mut self,
arena: &'a Bump,
normalized_layout: Layout<'a>,
) -> InLayout<'a> {
// IDEA:
// - check if the normalized layout (with a void recursion pointer) maps to an
// inserted lambda set
// - if so, use that one immediately
// - otherwise, allocate a new slot, update the recursive layout, and intern
if let Some(in_layout) = self.map.get(&normalized_layout) {
return *in_layout;
}
// This recursive layout must be new to the interner, reserve a slot and fill it in.
let slot = unsafe { InLayout::from_index(self.vec.len()) };
self.vec.push(Layout::VOID_NAKED);
let full_layout =
reify::reify_recursive_layout(arena, self, slot, normalized_layout);
self.vec[slot.0] = full_layout;
self.map.insert(normalized_layout, slot);
self.map.insert(full_layout, slot);
slot
}
fn get(&self, key: InLayout<'a>) -> Layout<'a> {
let InLayout(index, _) = key;
self.vec[index]
}
fn target_info(&self) -> TargetInfo {
self.target_info
}
}
};
}
st_impl!(STLayoutInterner);
st_impl!('r LockedGlobalInterner);
mod reify {
use bumpalo::{collections::Vec, Bump};
use crate::layout::{Builtin, LambdaSet, Layout, UnionLayout};
use super::{InLayout, LayoutInterner};
// TODO: if recursion becomes a problem we could make this iterative
pub fn reify_recursive_layout<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
normalized_layout: Layout<'a>,
) -> Layout<'a> {
match normalized_layout {
Layout::Builtin(builtin) => {
Layout::Builtin(reify_builtin(arena, interner, slot, builtin))
}
Layout::Struct {
field_order_hash,
field_layouts,
} => Layout::Struct {
field_order_hash,
field_layouts: reify_layout_slice(arena, interner, slot, field_layouts),
},
Layout::Boxed(lay) => Layout::Boxed(reify_layout(arena, interner, slot, lay)),
Layout::Union(un) => Layout::Union(reify_union(arena, interner, slot, un)),
Layout::LambdaSet(ls) => Layout::LambdaSet(reify_lambda_set(arena, interner, slot, ls)),
Layout::RecursivePointer(l) => {
debug_assert_eq!(
l,
Layout::VOID,
"normalized layouts must always have VOID as the recursive pointer!"
);
Layout::RecursivePointer(slot)
}
}
}
fn insert_lambda_set(
&mut self,
args: &'a &'a [InLayout<'a>],
ret: InLayout<'a>,
set: &'a &'a [(Symbol, &'a [InLayout<'a>])],
representation: InLayout<'a>,
) -> LambdaSet<'a> {
// IDEA:
// - check if the "normalized" lambda set (with a void full_layout slot) maps to an
// inserted lambda set
// - if so, use that one immediately
// - otherwise, allocate a new slot, intern the lambda set, and then fill the slot in
let normalized_lambda_set = make_normalized_lamdba_set(args, ret, set, representation);
if let Some(lambda_set) = self.normalized_lambda_set_map.get(&normalized_lambda_set) {
return *lambda_set;
}
fn reify_layout<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
layout: InLayout<'a>,
) -> InLayout<'a> {
let layout = reify_recursive_layout(arena, interner, slot, interner.get(layout));
interner.insert(layout)
}
// This lambda set must be new to the interner, reserve a slot and fill it in.
let slot = unsafe { InLayout::from_index(self.vec.len()) };
let lambda_set = LambdaSet {
fn reify_layout_slice<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
layouts: &[InLayout<'a>],
) -> &'a [InLayout<'a>] {
let mut slice = Vec::with_capacity_in(layouts.len(), arena);
for &layout in layouts {
slice.push(reify_layout(arena, interner, slot, layout));
}
slice.into_bump_slice()
}
fn reify_layout_slice_slice<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
layouts: &[&[InLayout<'a>]],
) -> &'a [&'a [InLayout<'a>]] {
let mut slice = Vec::with_capacity_in(layouts.len(), arena);
for &layouts in layouts {
slice.push(reify_layout_slice(arena, interner, slot, layouts));
}
slice.into_bump_slice()
}
fn reify_builtin<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
builtin: Builtin<'a>,
) -> Builtin<'a> {
match builtin {
Builtin::Int(_)
| Builtin::Float(_)
| Builtin::Bool
| Builtin::Decimal
| Builtin::Str => builtin,
Builtin::List(elem) => Builtin::List(reify_layout(arena, interner, slot, elem)),
}
}
fn reify_union<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
union: UnionLayout<'a>,
) -> UnionLayout<'a> {
match union {
UnionLayout::NonRecursive(tags) => {
UnionLayout::NonRecursive(reify_layout_slice_slice(arena, interner, slot, tags))
}
UnionLayout::Recursive(tags) => {
UnionLayout::Recursive(reify_layout_slice_slice(arena, interner, slot, tags))
}
UnionLayout::NonNullableUnwrapped(fields) => {
UnionLayout::NonNullableUnwrapped(reify_layout_slice(arena, interner, slot, fields))
}
UnionLayout::NullableWrapped {
nullable_id,
other_tags,
} => UnionLayout::NullableWrapped {
nullable_id,
other_tags: reify_layout_slice_slice(arena, interner, slot, other_tags),
},
UnionLayout::NullableUnwrapped {
nullable_id,
other_fields,
} => UnionLayout::NullableUnwrapped {
nullable_id,
other_fields: reify_layout_slice(arena, interner, slot, other_fields),
},
}
}
fn reify_lambda_set<'a>(
arena: &'a Bump,
interner: &mut impl LayoutInterner<'a>,
slot: InLayout<'a>,
lambda_set: LambdaSet<'a>,
) -> LambdaSet<'a> {
let LambdaSet {
args,
ret,
set,
representation,
full_layout: slot,
full_layout: _,
} = lambda_set;
let args = reify_layout_slice(arena, interner, slot, args);
let ret = reify_layout(arena, interner, slot, ret);
let set = {
let mut new_set = Vec::with_capacity_in(set.len(), arena);
for (lambda, captures) in set.iter() {
new_set.push((*lambda, reify_layout_slice(arena, interner, slot, captures)));
}
new_set.into_bump_slice()
};
let filled_slot = self.insert(Layout::LambdaSet(lambda_set));
assert_eq!(slot, filled_slot);
let representation = reify_layout(arena, interner, slot, representation);
self.normalized_lambda_set_map
.insert(normalized_lambda_set, lambda_set);
lambda_set
}
fn get(&self, key: InLayout<'a>) -> Layout<'a> {
let InLayout(index, _) = key;
self.vec[index]
}
fn target_info(&self) -> TargetInfo {
self.target_info
interner.insert_lambda_set(arena.alloc(args), ret, arena.alloc(set), representation)
}
}
@ -630,7 +920,7 @@ mod insert_lambda_set {
use roc_module::symbol::Symbol;
use roc_target::TargetInfo;
use crate::layout::Layout;
use crate::layout::{LambdaSet, Layout};
use super::{GlobalLayoutInterner, InLayout, LayoutInterner};
@ -652,9 +942,9 @@ mod insert_lambda_set {
let mut interner = global.fork();
handles.push(std::thread::spawn(move || {
interner.insert_lambda_set(TEST_ARGS, TEST_RET, set, repr)
}));
}))
}
let ins: Vec<_> = handles.into_iter().map(|t| t.join().unwrap()).collect();
let ins: Vec<LambdaSet> = handles.into_iter().map(|t| t.join().unwrap()).collect();
let interned = ins[0];
assert!(ins.iter().all(|in2| interned == *in2));
}
@ -707,3 +997,148 @@ mod insert_lambda_set {
assert_eq!(in1, in2);
}
}
#[cfg(test)]
mod insert_recursive_layout {
use bumpalo::Bump;
use roc_target::TargetInfo;
use crate::layout::{Builtin, InLayout, Layout, UnionLayout};
use super::{GlobalLayoutInterner, LayoutInterner, TLLayoutInterner};
const TARGET_INFO: TargetInfo = TargetInfo::default_x86_64();
fn make_layout<'a>(arena: &'a Bump, interner: &mut impl LayoutInterner<'a>) -> Layout<'a> {
Layout::Union(UnionLayout::Recursive(&*arena.alloc([
&*arena.alloc([interner.insert(Layout::Builtin(Builtin::List(Layout::RECURSIVE_PTR)))]),
&*arena.alloc_slice_fill_iter([interner.insert(Layout::struct_no_name_order(
&*arena.alloc([Layout::RECURSIVE_PTR]),
))]),
])))
}
fn get_rec_ptr_index<'a>(interner: &TLLayoutInterner<'a>, layout: InLayout<'a>) -> usize {
match interner.get(layout) {
Layout::Union(UnionLayout::Recursive(&[&[l1], &[l2]])) => {
match (interner.get(l1), interner.get(l2)) {
(
Layout::Builtin(Builtin::List(l1)),
Layout::Struct {
field_order_hash: _,
field_layouts: &[l2],
},
) => match (interner.get(l1), interner.get(l2)) {
(Layout::RecursivePointer(i1), Layout::RecursivePointer(i2)) => {
assert_eq!(i1, i2);
assert_ne!(i1, Layout::VOID);
i1.0
}
_ => unreachable!(),
},
_ => unreachable!(),
}
}
_ => unreachable!(),
}
}
#[test]
fn write_two_threads() {
let arena = &Bump::new();
let global = GlobalLayoutInterner::with_capacity(2, TARGET_INFO);
let layout = {
let mut interner = global.fork();
make_layout(arena, &mut interner)
};
let in1 = {
let mut interner = global.fork();
interner.insert_recursive(arena, layout);
};
let in2 = {
let mut interner = global.fork();
interner.insert_recursive(arena, layout);
};
assert_eq!(in1, in2);
}
#[test]
fn many_threads_read_write() {
for _ in 0..100 {
let mut arenas: Vec<_> = std::iter::repeat_with(|| Bump::new()).take(10).collect();
let global = GlobalLayoutInterner::with_capacity(2, TARGET_INFO);
std::thread::scope(|s| {
let mut handles = Vec::with_capacity(10);
for arena in arenas.iter_mut() {
let mut interner = global.fork();
let handle = s.spawn(move || {
let layout = make_layout(arena, &mut interner);
let in_layout = interner.insert_recursive(arena, layout);
(in_layout, get_rec_ptr_index(&interner, in_layout))
});
handles.push(handle);
}
let ins: Vec<(InLayout, usize)> =
handles.into_iter().map(|t| t.join().unwrap()).collect();
let interned = ins[0];
assert!(ins.iter().all(|in2| interned == *in2));
});
}
}
#[test]
fn insert_then_reintern() {
let arena = &Bump::new();
let global = GlobalLayoutInterner::with_capacity(2, TARGET_INFO);
let mut interner = global.fork();
let layout = make_layout(arena, &mut interner);
let interned_layout = interner.insert_recursive(arena, layout);
let full_layout = interner.get(interned_layout);
assert_ne!(layout, full_layout);
assert_eq!(interner.insert(full_layout), interned_layout);
}
#[test]
fn write_global_then_single_threaded() {
let arena = &Bump::new();
let global = GlobalLayoutInterner::with_capacity(2, TARGET_INFO);
let layout = {
let mut interner = global.fork();
make_layout(arena, &mut interner)
};
let in1 = {
let mut interner = global.fork();
interner.insert_recursive(arena, layout);
};
let in2 = {
let mut st_interner = global.unwrap().unwrap();
st_interner.insert_recursive(arena, layout);
};
assert_eq!(in1, in2);
}
#[test]
fn write_single_threaded_then_global() {
let arena = &Bump::new();
let global = GlobalLayoutInterner::with_capacity(2, TARGET_INFO);
let mut st_interner = global.unwrap().unwrap();
let layout = make_layout(arena, &mut st_interner);
let in1 = st_interner.insert_recursive(arena, layout);
let global = st_interner.into_global();
let mut interner = global.fork();
let in2 = interner.insert_recursive(arena, layout);
assert_eq!(in1, in2);
}
}