misc cleanup + change refcount pointer to avoid branching

This commit is contained in:
Brendan Hansknecht 2023-03-12 10:29:53 -07:00
parent 216fd3f9f6
commit dfb748fb03
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
11 changed files with 108 additions and 210 deletions

View file

@ -15,9 +15,14 @@ const IncN = fn (?[*]u8, usize) callconv(.C) void;
const Dec = fn (?[*]u8) callconv(.C) void;
const HasTagId = fn (u16, ?[*]u8) callconv(.C) extern struct { matched: bool, data: ?[*]u8 };
const SEAMLESS_SLICE_BIT: usize =
@bitCast(usize, @as(isize, std.math.minInt(isize)));
pub const RocList = extern struct {
bytes: ?[*]u8,
length: usize,
// This technically points to directly after the refcount.
// This is an optimization that enables use one code path for regular lists and slices for geting the refcount ptr.
capacity_or_ref_ptr: usize,
pub inline fn len(self: RocList) usize {
@ -25,16 +30,24 @@ pub const RocList = extern struct {
}
pub fn getCapacity(self: RocList) usize {
if (!self.isSeamlessSlice()) {
return self.capacity_or_ref_ptr;
}
return self.length;
const list_capacity = self.capacity_or_ref_ptr;
const slice_capacity = self.length;
const slice_mask = self.seamlessSliceMask();
const capacity = (list_capacity & ~slice_mask) | (slice_capacity & slice_mask);
return capacity;
}
pub fn isSeamlessSlice(self: RocList) bool {
return @bitCast(isize, self.capacity_or_ref_ptr) < 0;
}
// This returns all ones if the list is a seamless slice.
// Otherwise, it returns all zeros.
// This is done without branching for optimization purposes.
pub fn seamlessSliceMask(self: RocList) usize {
return @bitCast(usize, @bitCast(isize, self.capacity_or_ref_ptr) >> (@bitSizeOf(isize) - 1));
}
pub fn isEmpty(self: RocList) bool {
return self.len() == 0;
}
@ -86,16 +99,20 @@ pub const RocList = extern struct {
return list;
}
// returns a pointer to just after the refcount.
// It is just after the refcount as an optimization for other shared code paths.
// For regular list, it just returns their bytes pointer.
// For seamless slices, it returns the pointer stored in capacity_or_ref_ptr.
pub fn getRefcountPtr(self: RocList) ?[*]u8 {
const list_ref_ptr = @ptrToInt(self.bytes);
const slice_ref_ptr = self.capacity_or_ref_ptr << 1;
const slice_mask = self.seamlessSliceMask();
const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
return @intToPtr(?[*]u8, ref_ptr);
}
pub fn decref(self: RocList, alignment: u32) void {
// TODO: I am pretty sure there is a way to do this without a branch.
// Bit manipulation should be able to conditionally select the correct pointer.
// If this is updated, we should also update decref in build_list.rs and modify_refcount_list from refcounting.rs
if (self.isSeamlessSlice()) {
const ref_ptr = @intToPtr([*]isize, self.capacity_or_ref_ptr << 1);
utils.decref_ptr_to_refcount(ref_ptr, alignment);
} else {
utils.decref(self.bytes, self.getCapacity(), alignment);
}
utils.decref(self.getRefcountPtr(), self.getCapacity(), alignment);
}
pub fn elements(self: RocList, comptime T: type) ?[*]T {
@ -178,7 +195,7 @@ pub const RocList = extern struct {
) RocList {
if (self.bytes) |source_ptr| {
if (self.isUnique() and !self.isSeamlessSlice()) {
const capacity = self.getCapacity();
const capacity = self.capacity_or_ref_ptr;
if (capacity >= new_length) {
return RocList{ .bytes = self.bytes, .length = new_length, .capacity_or_ref_ptr = capacity };
} else {
@ -579,27 +596,19 @@ pub fn listSublist(
dec(element);
}
if (list.isUnique() and start == 0) {
if (start == 0 and list.isUnique()) {
var output = list;
output.length = keep_len;
return output;
} else {
if (list.isSeamlessSlice()) {
return RocList{
.bytes = source_ptr + start * element_width,
.length = keep_len,
.capacity_or_ref_ptr = list.capacity_or_ref_ptr,
};
}
const ref_ptr = @ptrCast([*]isize, @alignCast(@alignOf(isize), source_ptr)) - 1;
// This should be a usize with only the highest bit set.
const seamless_slice_bit =
@bitCast(usize, @as(isize, std.math.minInt(isize)));
const list_ref_ptr = (@ptrToInt(source_ptr) >> 1) | SEAMLESS_SLICE_BIT;
const slice_ref_ptr = list.capacity_or_ref_ptr;
const slice_mask = list.seamlessSliceMask();
const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
return RocList{
.bytes = source_ptr + start * element_width,
.length = keep_len,
.capacity_or_ref_ptr = (@ptrToInt(ref_ptr) >> 1) | seamless_slice_bit,
.capacity_or_ref_ptr = ref_ptr,
};
}
}
@ -900,6 +909,18 @@ pub fn listIsUnique(
return list.isEmpty() or list.isUnique();
}
pub fn listCapacity(
list: RocList,
) callconv(.C) usize {
return list.getCapacity();
}
pub fn listRefcountPtr(
list: RocList,
) callconv(.C) ?[*]u8 {
return list.getRefcountPtr();
}
test "listConcat: non-unique with unique overlapping" {
var nonUnique = RocList.fromSlice(u8, ([_]u8{1})[0..]);
var bytes: [*]u8 = @ptrCast([*]u8, nonUnique.bytes);

View file

@ -54,6 +54,8 @@ comptime {
exportListFn(list.listReplaceInPlace, "replace_in_place");
exportListFn(list.listSwap, "swap");
exportListFn(list.listIsUnique, "is_unique");
exportListFn(list.listCapacity, "capacity");
exportListFn(list.listRefcountPtr, "refcount_ptr");
}
// Num Module

View file

@ -58,6 +58,7 @@ pub const RocStr = extern struct {
}
pub fn fromByteList(list: RocList) RocStr {
// TODO: upon adding string seamless slices, I believe this branch can be changed to bit manipulation.
if (list.isSeamlessSlice()) {
// Str doesn't have seamless slices yet.
// Need to copy.

View file

@ -204,7 +204,7 @@ pub fn decref(
decref_ptr_to_refcount(isizes - 1, alignment);
}
pub inline fn decref_ptr_to_refcount(
inline fn decref_ptr_to_refcount(
refcount_ptr: [*]isize,
alignment: u32,
) void {

View file

@ -350,6 +350,8 @@ pub const LIST_IS_UNIQUE: &str = "roc_builtins.list.is_unique";
pub const LIST_PREPEND: &str = "roc_builtins.list.prepend";
pub const LIST_APPEND_UNSAFE: &str = "roc_builtins.list.append_unsafe";
pub const LIST_RESERVE: &str = "roc_builtins.list.reserve";
pub const LIST_CAPACITY: &str = "roc_builtins.list.capacity";
pub const LIST_REFCOUNT_PTR: &str = "roc_builtins.list.refcount_ptr";
pub const DEC_FROM_STR: &str = "roc_builtins.dec.from_str";
pub const DEC_TO_STR: &str = "roc_builtins.dec.to_str";

View file

@ -2789,7 +2789,7 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>(
let alignment =
element_layout.alignment_bytes(layout_interner, env.target_info);
build_list::decref(env, value.into_struct_value(), alignment, parent);
build_list::decref(env, value.into_struct_value(), alignment);
}
lay if lay.is_refcounted() => {

View file

@ -394,15 +394,34 @@ pub(crate) fn list_len<'ctx>(
}
/// List.capacity : List * -> Nat
pub(crate) fn list_capacity<'ctx>(
builder: &Builder<'ctx>,
pub(crate) fn list_capacity<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
wrapper_struct: StructValue<'ctx>,
) -> IntValue<'ctx> {
// TODO: This won't work on seemless slices. Needs to return the len if the capacity is negative.
builder
.build_extract_value(wrapper_struct, Builtin::WRAPPER_CAPACITY, "list_capacity")
.unwrap()
.into_int_value()
call_list_bitcode_fn(
env,
&[wrapper_struct],
&[],
BitcodeReturns::Basic,
bitcode::LIST_CAPACITY,
)
.into_int_value()
}
// Gets a pointer to just after the refcount for a list or seamless slice.
// The value is just after the refcount so that normal lists and seamless slices can share code paths easily.
pub(crate) fn list_refcount_ptr<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
wrapper_struct: StructValue<'ctx>,
) -> PointerValue<'ctx> {
call_list_bitcode_fn(
env,
&[wrapper_struct],
&[],
BitcodeReturns::Basic,
bitcode::LIST_REFCOUNT_PTR,
)
.into_pointer_value()
}
pub(crate) fn destructure<'ctx>(
@ -801,56 +820,8 @@ pub(crate) fn decref<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
wrapper_struct: StructValue<'ctx>,
alignment: u32,
parent: FunctionValue<'ctx>,
) {
let builder = env.builder;
let ctx = env.context;
let refcount_ptr = list_refcount_ptr(env, wrapper_struct);
let capacity = list_capacity(builder, wrapper_struct);
let is_regular_list = builder.build_int_compare(
IntPredicate::SGE,
capacity,
env.ptr_int().const_zero(),
"cap >= 0",
);
let get_list_ptr_block = ctx.append_basic_block(parent, "get_list_ptr");
let get_slice_ptr_block = ctx.append_basic_block(parent, "get_slice_ptr");
let decref_block = ctx.append_basic_block(parent, "decref");
builder.build_conditional_branch(is_regular_list, get_list_ptr_block, get_slice_ptr_block);
builder.position_at_end(get_list_ptr_block);
let ptr_type = env.context.i8_type().ptr_type(AddressSpace::default());
let (_, list_ptr) = load_list(builder, wrapper_struct, ptr_type);
builder.build_unconditional_branch(decref_block);
builder.position_at_end(get_slice_ptr_block);
let refcount_ptr_int = builder.build_left_shift(
capacity,
env.ptr_int().const_int(1, false),
"extract_refcount_from_capacity",
);
// TODO: adding one here feels silly.
// We should probably expose a zig builtin to directly deal with the refcount pointer.
// Instead we add one and then zig subtracts one.
let slice_ptr_int = builder.build_int_add(
refcount_ptr_int,
env.ptr_int().const_int(1, false),
"inc_refcount_ptr",
);
let slice_ptr = builder.build_int_to_ptr(slice_ptr_int, ptr_type, "to_slice_ptr");
builder.build_unconditional_branch(decref_block);
builder.position_at_end(decref_block);
let result = builder.build_phi(ptr_type, "branch");
result.add_incoming(&[
(&list_ptr, get_list_ptr_block),
(&slice_ptr, get_slice_ptr_block),
]);
crate::llvm::refcounting::decref_pointer_check_null(
env,
result.as_basic_value().into_pointer_value(),
alignment,
);
crate::llvm::refcounting::decref_pointer_check_null(env, refcount_ptr, alignment);
}

View file

@ -28,8 +28,8 @@ use crate::llvm::{
load_roc_value, roc_function_call, BuilderExt, RocReturn,
},
build_list::{
list_append_unsafe, list_capacity, list_concat, list_drop_at, list_get_unsafe, list_len,
list_map, list_map2, list_map3, list_map4, list_prepend, list_replace_unsafe, list_reserve,
list_append_unsafe, list_concat, list_drop_at, list_get_unsafe, list_len, list_map,
list_map2, list_map3, list_map4, list_prepend, list_replace_unsafe, list_reserve,
list_sort_with, list_sublist, list_swap, list_symbol_to_c_abi, list_with_capacity,
pass_update_mode,
},
@ -632,10 +632,16 @@ pub(crate) fn run_low_level<'a, 'ctx, 'env>(
list_len(env.builder, list.into_struct_value()).into()
}
ListGetCapacity => {
// List.capacity : List * -> Nat
// List.capacity: List a -> Nat
arguments!(list);
list_capacity(env.builder, list.into_struct_value()).into()
call_list_bitcode_fn(
env,
&[list.into_struct_value()],
&[],
BitcodeReturns::Basic,
bitcode::LIST_CAPACITY,
)
}
ListWithCapacity => {
// List.withCapacity : Nat -> List a

View file

@ -5,7 +5,9 @@ use crate::llvm::build::{
add_func, cast_basic_basic, get_tag_id, tag_pointer_clear_tag_id, use_roc_value, Env,
FAST_CALL_CONV,
};
use crate::llvm::build_list::{incrementing_elem_loop, list_capacity, load_list};
use crate::llvm::build_list::{
incrementing_elem_loop, list_capacity, list_refcount_ptr, load_list,
};
use crate::llvm::convert::{basic_type_from_layout, zig_str_type, RocUnion};
use bumpalo::collections::Vec;
use inkwell::basic_block::BasicBlock;
@ -45,21 +47,6 @@ impl<'ctx> PointerToRefcount<'ctx> {
Self { value }
}
/// # Safety
///
/// the invariant is that the given pointer really points to the refcount,
/// not the data, and only is the start of the allocated buffer if the
/// alignment works out that way.
pub unsafe fn from_ptr_int<'a, 'env>(env: &Env<'a, 'ctx, 'env>, int: IntValue<'ctx>) -> Self {
let refcount_type = env.ptr_int();
let refcount_ptr_type = refcount_type.ptr_type(AddressSpace::default());
let value = env
.builder
.build_int_to_ptr(int, refcount_ptr_type, "to_refcount_ptr");
Self { value }
}
pub fn from_ptr_to_data<'a, 'env>(
env: &Env<'a, 'ctx, 'env>,
data_ptr: PointerValue<'ctx>,
@ -706,10 +693,10 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>(
let parent = fn_val;
let original_wrapper = arg_val.into_struct_value();
let capacity = list_capacity(builder, original_wrapper);
let capacity = list_capacity(env, original_wrapper);
let is_non_empty = builder.build_int_compare(
IntPredicate::SGT,
IntPredicate::UGT,
capacity,
env.ptr_int().const_zero(),
"cap > 0",
@ -717,11 +704,9 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>(
// build blocks
let modification_list_block = ctx.append_basic_block(parent, "modification_list_block");
let check_slice_block = ctx.append_basic_block(parent, "check_slice_block");
let modification_slice_block = ctx.append_basic_block(parent, "modification_slice_block");
let cont_block = ctx.append_basic_block(parent, "modify_rc_list_cont");
builder.build_conditional_branch(is_non_empty, modification_list_block, check_slice_block);
builder.build_conditional_branch(is_non_empty, modification_list_block, cont_block);
builder.position_at_end(modification_list_block);
@ -754,60 +739,8 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>(
);
}
let refcount_ptr = PointerToRefcount::from_list_wrapper(env, original_wrapper);
let call_mode = mode_to_call_mode(fn_val, mode);
refcount_ptr.modify(call_mode, layout, env, layout_interner);
builder.build_unconditional_branch(cont_block);
builder.position_at_end(check_slice_block);
let is_seamless_slice = builder.build_int_compare(
IntPredicate::SLT,
capacity,
env.ptr_int().const_zero(),
"cap < 0",
);
builder.build_conditional_branch(is_seamless_slice, modification_slice_block, cont_block);
builder.position_at_end(modification_slice_block);
if layout_interner.contains_refcounted(element_layout) {
let ptr_type = basic_type_from_layout(env, layout_interner, element_layout)
.ptr_type(AddressSpace::default());
let (len, ptr) = load_list(env.builder, original_wrapper, ptr_type);
let loop_fn = |layout_interner, _index, element| {
modify_refcount_layout_help(
env,
layout_interner,
layout_ids,
mode.to_call_mode(fn_val),
element,
element_layout,
);
};
incrementing_elem_loop(
env,
layout_interner,
parent,
element_layout,
ptr,
len,
"modify_rc_index",
loop_fn,
);
}
// a slices refcount is `capacity << 1`
let refcount_ptr_int = builder.build_left_shift(
capacity,
env.ptr_int().const_int(1, false),
"extract_refcount_from_capacity",
);
let refcount_ptr = unsafe { PointerToRefcount::from_ptr_int(env, refcount_ptr_int) };
let refcount_ptr =
PointerToRefcount::from_ptr_to_data(env, list_refcount_ptr(env, original_wrapper));
let call_mode = mode_to_call_mode(fn_val, mode);
refcount_ptr.modify(call_mode, layout, env, layout_interner);

View file

@ -318,44 +318,7 @@ impl<'a> LowLevelCall<'a> {
_ => internal_error!("invalid storage for List"),
},
ListGetCapacity => match backend.storage.get(&self.arguments[0]) {
StoredValue::StackMemory { location, .. } => {
let (local_id, offset) =
location.local_and_offset(backend.storage.stack_frame_pointer);
backend.code_builder.get_local(local_id);
// List is stored as (pointer, length, capacity),
// with each of those fields being 4 bytes on wasm.
// So the capacity is 8 bytes after the start of the struct.
//
// WRAPPER_CAPACITY represents the index of the capacity field
// (which is 2 as of the writing of this comment). If the field order
// ever changes, WRAPPER_CAPACITY should be updated and this logic should
// continue to work even though this comment may become inaccurate.
// On top of this, if the capacity is less than zero, the list is a seamless slice.
// We need to return the length in that case.
let code_builder = &mut backend.code_builder;
let tmp = backend.storage.create_anonymous_local(ValueType::I32);
code_builder.i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_CAPACITY));
code_builder.i64_const(0);
code_builder.i32_lt_s(); // capacity < 0
code_builder.if_();
{
code_builder.i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_LEN));
code_builder.set_local(tmp);
}
code_builder.else_();
{
code_builder
.i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_CAPACITY));
code_builder.set_local(tmp);
}
code_builder.end();
code_builder.get_local(tmp);
}
_ => internal_error!("invalid storage for List"),
},
ListGetCapacity => self.load_args_and_call_zig(backend, bitcode::LIST_CAPACITY),
ListIsUnique => self.load_args_and_call_zig(backend, bitcode::LIST_IS_UNIQUE),

View file

@ -28,6 +28,8 @@ use serde::{
pub struct RocList<T> {
elements: Option<NonNull<ManuallyDrop<T>>>,
length: usize,
// This technically points to directly after the refcount.
// This is an optimization that enables use one code path for regular lists and slices for geting the refcount ptr.
capacity_or_ref_ptr: usize,
}
@ -181,14 +183,11 @@ impl<T> RocList<T> {
/// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty.
pub(crate) unsafe fn ptr_to_allocation(&self) -> *mut c_void {
let alignment = Self::alloc_alignment() as usize;
if self.is_seamless_slice() {
(self.capacity_or_ref_ptr << 1) as *mut _
((self.capacity_or_ref_ptr << 1) - alignment) as *mut _
} else {
unsafe {
self.ptr_to_first_elem()
.cast::<u8>()
.sub(Self::alloc_alignment() as usize) as *mut _
}
unsafe { self.ptr_to_first_elem().cast::<u8>().sub(alignment) as *mut _ }
}
}