From dfb748fb030767bb352aa1dd166a206aeab75a6e Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Sun, 12 Mar 2023 10:29:53 -0700 Subject: [PATCH] misc cleanup + change refcount pointer to avoid branching --- crates/compiler/builtins/bitcode/src/list.zig | 77 +++++++++++------ crates/compiler/builtins/bitcode/src/main.zig | 2 + crates/compiler/builtins/bitcode/src/str.zig | 1 + .../compiler/builtins/bitcode/src/utils.zig | 2 +- crates/compiler/builtins/src/bitcode.rs | 2 + crates/compiler/gen_llvm/src/llvm/build.rs | 2 +- .../compiler/gen_llvm/src/llvm/build_list.rs | 85 ++++++------------- crates/compiler/gen_llvm/src/llvm/lowlevel.rs | 14 ++- .../compiler/gen_llvm/src/llvm/refcounting.rs | 83 ++---------------- crates/compiler/gen_wasm/src/low_level.rs | 39 +-------- crates/roc_std/src/roc_list.rs | 11 ++- 11 files changed, 108 insertions(+), 210 deletions(-) diff --git a/crates/compiler/builtins/bitcode/src/list.zig b/crates/compiler/builtins/bitcode/src/list.zig index 0c07870d0d..2d16ecfade 100644 --- a/crates/compiler/builtins/bitcode/src/list.zig +++ b/crates/compiler/builtins/bitcode/src/list.zig @@ -15,9 +15,14 @@ const IncN = fn (?[*]u8, usize) callconv(.C) void; const Dec = fn (?[*]u8) callconv(.C) void; const HasTagId = fn (u16, ?[*]u8) callconv(.C) extern struct { matched: bool, data: ?[*]u8 }; +const SEAMLESS_SLICE_BIT: usize = + @bitCast(usize, @as(isize, std.math.minInt(isize))); + pub const RocList = extern struct { bytes: ?[*]u8, length: usize, + // This technically points to directly after the refcount. + // This is an optimization that enables use one code path for regular lists and slices for geting the refcount ptr. capacity_or_ref_ptr: usize, pub inline fn len(self: RocList) usize { @@ -25,16 +30,24 @@ pub const RocList = extern struct { } pub fn getCapacity(self: RocList) usize { - if (!self.isSeamlessSlice()) { - return self.capacity_or_ref_ptr; - } - return self.length; + const list_capacity = self.capacity_or_ref_ptr; + const slice_capacity = self.length; + const slice_mask = self.seamlessSliceMask(); + const capacity = (list_capacity & ~slice_mask) | (slice_capacity & slice_mask); + return capacity; } pub fn isSeamlessSlice(self: RocList) bool { return @bitCast(isize, self.capacity_or_ref_ptr) < 0; } + // This returns all ones if the list is a seamless slice. + // Otherwise, it returns all zeros. + // This is done without branching for optimization purposes. + pub fn seamlessSliceMask(self: RocList) usize { + return @bitCast(usize, @bitCast(isize, self.capacity_or_ref_ptr) >> (@bitSizeOf(isize) - 1)); + } + pub fn isEmpty(self: RocList) bool { return self.len() == 0; } @@ -86,16 +99,20 @@ pub const RocList = extern struct { return list; } + // returns a pointer to just after the refcount. + // It is just after the refcount as an optimization for other shared code paths. + // For regular list, it just returns their bytes pointer. + // For seamless slices, it returns the pointer stored in capacity_or_ref_ptr. + pub fn getRefcountPtr(self: RocList) ?[*]u8 { + const list_ref_ptr = @ptrToInt(self.bytes); + const slice_ref_ptr = self.capacity_or_ref_ptr << 1; + const slice_mask = self.seamlessSliceMask(); + const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask); + return @intToPtr(?[*]u8, ref_ptr); + } + pub fn decref(self: RocList, alignment: u32) void { - // TODO: I am pretty sure there is a way to do this without a branch. - // Bit manipulation should be able to conditionally select the correct pointer. - // If this is updated, we should also update decref in build_list.rs and modify_refcount_list from refcounting.rs - if (self.isSeamlessSlice()) { - const ref_ptr = @intToPtr([*]isize, self.capacity_or_ref_ptr << 1); - utils.decref_ptr_to_refcount(ref_ptr, alignment); - } else { - utils.decref(self.bytes, self.getCapacity(), alignment); - } + utils.decref(self.getRefcountPtr(), self.getCapacity(), alignment); } pub fn elements(self: RocList, comptime T: type) ?[*]T { @@ -178,7 +195,7 @@ pub const RocList = extern struct { ) RocList { if (self.bytes) |source_ptr| { if (self.isUnique() and !self.isSeamlessSlice()) { - const capacity = self.getCapacity(); + const capacity = self.capacity_or_ref_ptr; if (capacity >= new_length) { return RocList{ .bytes = self.bytes, .length = new_length, .capacity_or_ref_ptr = capacity }; } else { @@ -579,27 +596,19 @@ pub fn listSublist( dec(element); } - if (list.isUnique() and start == 0) { + if (start == 0 and list.isUnique()) { var output = list; output.length = keep_len; return output; } else { - if (list.isSeamlessSlice()) { - return RocList{ - .bytes = source_ptr + start * element_width, - .length = keep_len, - .capacity_or_ref_ptr = list.capacity_or_ref_ptr, - }; - } - - const ref_ptr = @ptrCast([*]isize, @alignCast(@alignOf(isize), source_ptr)) - 1; - // This should be a usize with only the highest bit set. - const seamless_slice_bit = - @bitCast(usize, @as(isize, std.math.minInt(isize))); + const list_ref_ptr = (@ptrToInt(source_ptr) >> 1) | SEAMLESS_SLICE_BIT; + const slice_ref_ptr = list.capacity_or_ref_ptr; + const slice_mask = list.seamlessSliceMask(); + const ref_ptr = (list_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask); return RocList{ .bytes = source_ptr + start * element_width, .length = keep_len, - .capacity_or_ref_ptr = (@ptrToInt(ref_ptr) >> 1) | seamless_slice_bit, + .capacity_or_ref_ptr = ref_ptr, }; } } @@ -900,6 +909,18 @@ pub fn listIsUnique( return list.isEmpty() or list.isUnique(); } +pub fn listCapacity( + list: RocList, +) callconv(.C) usize { + return list.getCapacity(); +} + +pub fn listRefcountPtr( + list: RocList, +) callconv(.C) ?[*]u8 { + return list.getRefcountPtr(); +} + test "listConcat: non-unique with unique overlapping" { var nonUnique = RocList.fromSlice(u8, ([_]u8{1})[0..]); var bytes: [*]u8 = @ptrCast([*]u8, nonUnique.bytes); diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index b6d0ce66b5..e0c14e1431 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -54,6 +54,8 @@ comptime { exportListFn(list.listReplaceInPlace, "replace_in_place"); exportListFn(list.listSwap, "swap"); exportListFn(list.listIsUnique, "is_unique"); + exportListFn(list.listCapacity, "capacity"); + exportListFn(list.listRefcountPtr, "refcount_ptr"); } // Num Module diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index aa91260f42..8c967c7510 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -58,6 +58,7 @@ pub const RocStr = extern struct { } pub fn fromByteList(list: RocList) RocStr { + // TODO: upon adding string seamless slices, I believe this branch can be changed to bit manipulation. if (list.isSeamlessSlice()) { // Str doesn't have seamless slices yet. // Need to copy. diff --git a/crates/compiler/builtins/bitcode/src/utils.zig b/crates/compiler/builtins/bitcode/src/utils.zig index d9fb095914..841b143863 100644 --- a/crates/compiler/builtins/bitcode/src/utils.zig +++ b/crates/compiler/builtins/bitcode/src/utils.zig @@ -204,7 +204,7 @@ pub fn decref( decref_ptr_to_refcount(isizes - 1, alignment); } -pub inline fn decref_ptr_to_refcount( +inline fn decref_ptr_to_refcount( refcount_ptr: [*]isize, alignment: u32, ) void { diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index 69782f2afa..5d3ddad35c 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -350,6 +350,8 @@ pub const LIST_IS_UNIQUE: &str = "roc_builtins.list.is_unique"; pub const LIST_PREPEND: &str = "roc_builtins.list.prepend"; pub const LIST_APPEND_UNSAFE: &str = "roc_builtins.list.append_unsafe"; pub const LIST_RESERVE: &str = "roc_builtins.list.reserve"; +pub const LIST_CAPACITY: &str = "roc_builtins.list.capacity"; +pub const LIST_REFCOUNT_PTR: &str = "roc_builtins.list.refcount_ptr"; pub const DEC_FROM_STR: &str = "roc_builtins.dec.from_str"; pub const DEC_TO_STR: &str = "roc_builtins.dec.to_str"; diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 6427197bc3..66cfaebb6b 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -2789,7 +2789,7 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>( let alignment = element_layout.alignment_bytes(layout_interner, env.target_info); - build_list::decref(env, value.into_struct_value(), alignment, parent); + build_list::decref(env, value.into_struct_value(), alignment); } lay if lay.is_refcounted() => { diff --git a/crates/compiler/gen_llvm/src/llvm/build_list.rs b/crates/compiler/gen_llvm/src/llvm/build_list.rs index c0fe38a5ca..7e38259800 100644 --- a/crates/compiler/gen_llvm/src/llvm/build_list.rs +++ b/crates/compiler/gen_llvm/src/llvm/build_list.rs @@ -394,15 +394,34 @@ pub(crate) fn list_len<'ctx>( } /// List.capacity : List * -> Nat -pub(crate) fn list_capacity<'ctx>( - builder: &Builder<'ctx>, +pub(crate) fn list_capacity<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, wrapper_struct: StructValue<'ctx>, ) -> IntValue<'ctx> { - // TODO: This won't work on seemless slices. Needs to return the len if the capacity is negative. - builder - .build_extract_value(wrapper_struct, Builtin::WRAPPER_CAPACITY, "list_capacity") - .unwrap() - .into_int_value() + call_list_bitcode_fn( + env, + &[wrapper_struct], + &[], + BitcodeReturns::Basic, + bitcode::LIST_CAPACITY, + ) + .into_int_value() +} + +// Gets a pointer to just after the refcount for a list or seamless slice. +// The value is just after the refcount so that normal lists and seamless slices can share code paths easily. +pub(crate) fn list_refcount_ptr<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + wrapper_struct: StructValue<'ctx>, +) -> PointerValue<'ctx> { + call_list_bitcode_fn( + env, + &[wrapper_struct], + &[], + BitcodeReturns::Basic, + bitcode::LIST_REFCOUNT_PTR, + ) + .into_pointer_value() } pub(crate) fn destructure<'ctx>( @@ -801,56 +820,8 @@ pub(crate) fn decref<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, wrapper_struct: StructValue<'ctx>, alignment: u32, - parent: FunctionValue<'ctx>, ) { - let builder = env.builder; - let ctx = env.context; + let refcount_ptr = list_refcount_ptr(env, wrapper_struct); - let capacity = list_capacity(builder, wrapper_struct); - let is_regular_list = builder.build_int_compare( - IntPredicate::SGE, - capacity, - env.ptr_int().const_zero(), - "cap >= 0", - ); - - let get_list_ptr_block = ctx.append_basic_block(parent, "get_list_ptr"); - let get_slice_ptr_block = ctx.append_basic_block(parent, "get_slice_ptr"); - let decref_block = ctx.append_basic_block(parent, "decref"); - builder.build_conditional_branch(is_regular_list, get_list_ptr_block, get_slice_ptr_block); - - builder.position_at_end(get_list_ptr_block); - let ptr_type = env.context.i8_type().ptr_type(AddressSpace::default()); - let (_, list_ptr) = load_list(builder, wrapper_struct, ptr_type); - builder.build_unconditional_branch(decref_block); - - builder.position_at_end(get_slice_ptr_block); - let refcount_ptr_int = builder.build_left_shift( - capacity, - env.ptr_int().const_int(1, false), - "extract_refcount_from_capacity", - ); - // TODO: adding one here feels silly. - // We should probably expose a zig builtin to directly deal with the refcount pointer. - // Instead we add one and then zig subtracts one. - let slice_ptr_int = builder.build_int_add( - refcount_ptr_int, - env.ptr_int().const_int(1, false), - "inc_refcount_ptr", - ); - let slice_ptr = builder.build_int_to_ptr(slice_ptr_int, ptr_type, "to_slice_ptr"); - builder.build_unconditional_branch(decref_block); - - builder.position_at_end(decref_block); - let result = builder.build_phi(ptr_type, "branch"); - result.add_incoming(&[ - (&list_ptr, get_list_ptr_block), - (&slice_ptr, get_slice_ptr_block), - ]); - - crate::llvm::refcounting::decref_pointer_check_null( - env, - result.as_basic_value().into_pointer_value(), - alignment, - ); + crate::llvm::refcounting::decref_pointer_check_null(env, refcount_ptr, alignment); } diff --git a/crates/compiler/gen_llvm/src/llvm/lowlevel.rs b/crates/compiler/gen_llvm/src/llvm/lowlevel.rs index 4a1d09cc27..7424c0bd7e 100644 --- a/crates/compiler/gen_llvm/src/llvm/lowlevel.rs +++ b/crates/compiler/gen_llvm/src/llvm/lowlevel.rs @@ -28,8 +28,8 @@ use crate::llvm::{ load_roc_value, roc_function_call, BuilderExt, RocReturn, }, build_list::{ - list_append_unsafe, list_capacity, list_concat, list_drop_at, list_get_unsafe, list_len, - list_map, list_map2, list_map3, list_map4, list_prepend, list_replace_unsafe, list_reserve, + list_append_unsafe, list_concat, list_drop_at, list_get_unsafe, list_len, list_map, + list_map2, list_map3, list_map4, list_prepend, list_replace_unsafe, list_reserve, list_sort_with, list_sublist, list_swap, list_symbol_to_c_abi, list_with_capacity, pass_update_mode, }, @@ -632,10 +632,16 @@ pub(crate) fn run_low_level<'a, 'ctx, 'env>( list_len(env.builder, list.into_struct_value()).into() } ListGetCapacity => { - // List.capacity : List * -> Nat + // List.capacity: List a -> Nat arguments!(list); - list_capacity(env.builder, list.into_struct_value()).into() + call_list_bitcode_fn( + env, + &[list.into_struct_value()], + &[], + BitcodeReturns::Basic, + bitcode::LIST_CAPACITY, + ) } ListWithCapacity => { // List.withCapacity : Nat -> List a diff --git a/crates/compiler/gen_llvm/src/llvm/refcounting.rs b/crates/compiler/gen_llvm/src/llvm/refcounting.rs index ea592a3b44..4f4875b7b1 100644 --- a/crates/compiler/gen_llvm/src/llvm/refcounting.rs +++ b/crates/compiler/gen_llvm/src/llvm/refcounting.rs @@ -5,7 +5,9 @@ use crate::llvm::build::{ add_func, cast_basic_basic, get_tag_id, tag_pointer_clear_tag_id, use_roc_value, Env, FAST_CALL_CONV, }; -use crate::llvm::build_list::{incrementing_elem_loop, list_capacity, load_list}; +use crate::llvm::build_list::{ + incrementing_elem_loop, list_capacity, list_refcount_ptr, load_list, +}; use crate::llvm::convert::{basic_type_from_layout, zig_str_type, RocUnion}; use bumpalo::collections::Vec; use inkwell::basic_block::BasicBlock; @@ -45,21 +47,6 @@ impl<'ctx> PointerToRefcount<'ctx> { Self { value } } - /// # Safety - /// - /// the invariant is that the given pointer really points to the refcount, - /// not the data, and only is the start of the allocated buffer if the - /// alignment works out that way. - pub unsafe fn from_ptr_int<'a, 'env>(env: &Env<'a, 'ctx, 'env>, int: IntValue<'ctx>) -> Self { - let refcount_type = env.ptr_int(); - let refcount_ptr_type = refcount_type.ptr_type(AddressSpace::default()); - let value = env - .builder - .build_int_to_ptr(int, refcount_ptr_type, "to_refcount_ptr"); - - Self { value } - } - pub fn from_ptr_to_data<'a, 'env>( env: &Env<'a, 'ctx, 'env>, data_ptr: PointerValue<'ctx>, @@ -706,10 +693,10 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>( let parent = fn_val; let original_wrapper = arg_val.into_struct_value(); - let capacity = list_capacity(builder, original_wrapper); + let capacity = list_capacity(env, original_wrapper); let is_non_empty = builder.build_int_compare( - IntPredicate::SGT, + IntPredicate::UGT, capacity, env.ptr_int().const_zero(), "cap > 0", @@ -717,11 +704,9 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>( // build blocks let modification_list_block = ctx.append_basic_block(parent, "modification_list_block"); - let check_slice_block = ctx.append_basic_block(parent, "check_slice_block"); - let modification_slice_block = ctx.append_basic_block(parent, "modification_slice_block"); let cont_block = ctx.append_basic_block(parent, "modify_rc_list_cont"); - builder.build_conditional_branch(is_non_empty, modification_list_block, check_slice_block); + builder.build_conditional_branch(is_non_empty, modification_list_block, cont_block); builder.position_at_end(modification_list_block); @@ -754,60 +739,8 @@ fn modify_refcount_list_help<'a, 'ctx, 'env>( ); } - let refcount_ptr = PointerToRefcount::from_list_wrapper(env, original_wrapper); - let call_mode = mode_to_call_mode(fn_val, mode); - refcount_ptr.modify(call_mode, layout, env, layout_interner); - - builder.build_unconditional_branch(cont_block); - - builder.position_at_end(check_slice_block); - - let is_seamless_slice = builder.build_int_compare( - IntPredicate::SLT, - capacity, - env.ptr_int().const_zero(), - "cap < 0", - ); - builder.build_conditional_branch(is_seamless_slice, modification_slice_block, cont_block); - - builder.position_at_end(modification_slice_block); - - if layout_interner.contains_refcounted(element_layout) { - let ptr_type = basic_type_from_layout(env, layout_interner, element_layout) - .ptr_type(AddressSpace::default()); - - let (len, ptr) = load_list(env.builder, original_wrapper, ptr_type); - - let loop_fn = |layout_interner, _index, element| { - modify_refcount_layout_help( - env, - layout_interner, - layout_ids, - mode.to_call_mode(fn_val), - element, - element_layout, - ); - }; - - incrementing_elem_loop( - env, - layout_interner, - parent, - element_layout, - ptr, - len, - "modify_rc_index", - loop_fn, - ); - } - - // a slices refcount is `capacity << 1` - let refcount_ptr_int = builder.build_left_shift( - capacity, - env.ptr_int().const_int(1, false), - "extract_refcount_from_capacity", - ); - let refcount_ptr = unsafe { PointerToRefcount::from_ptr_int(env, refcount_ptr_int) }; + let refcount_ptr = + PointerToRefcount::from_ptr_to_data(env, list_refcount_ptr(env, original_wrapper)); let call_mode = mode_to_call_mode(fn_val, mode); refcount_ptr.modify(call_mode, layout, env, layout_interner); diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index c16bdf8ae8..fc4ca6acb9 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -318,44 +318,7 @@ impl<'a> LowLevelCall<'a> { _ => internal_error!("invalid storage for List"), }, - ListGetCapacity => match backend.storage.get(&self.arguments[0]) { - StoredValue::StackMemory { location, .. } => { - let (local_id, offset) = - location.local_and_offset(backend.storage.stack_frame_pointer); - backend.code_builder.get_local(local_id); - // List is stored as (pointer, length, capacity), - // with each of those fields being 4 bytes on wasm. - // So the capacity is 8 bytes after the start of the struct. - // - // WRAPPER_CAPACITY represents the index of the capacity field - // (which is 2 as of the writing of this comment). If the field order - // ever changes, WRAPPER_CAPACITY should be updated and this logic should - // continue to work even though this comment may become inaccurate. - - // On top of this, if the capacity is less than zero, the list is a seamless slice. - // We need to return the length in that case. - let code_builder = &mut backend.code_builder; - let tmp = backend.storage.create_anonymous_local(ValueType::I32); - code_builder.i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_CAPACITY)); - code_builder.i64_const(0); - code_builder.i32_lt_s(); // capacity < 0 - - code_builder.if_(); - { - code_builder.i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_LEN)); - code_builder.set_local(tmp); - } - code_builder.else_(); - { - code_builder - .i32_load(Align::Bytes4, offset + (4 * Builtin::WRAPPER_CAPACITY)); - code_builder.set_local(tmp); - } - code_builder.end(); - code_builder.get_local(tmp); - } - _ => internal_error!("invalid storage for List"), - }, + ListGetCapacity => self.load_args_and_call_zig(backend, bitcode::LIST_CAPACITY), ListIsUnique => self.load_args_and_call_zig(backend, bitcode::LIST_IS_UNIQUE), diff --git a/crates/roc_std/src/roc_list.rs b/crates/roc_std/src/roc_list.rs index 6ea17b0e11..60b6e890aa 100644 --- a/crates/roc_std/src/roc_list.rs +++ b/crates/roc_std/src/roc_list.rs @@ -28,6 +28,8 @@ use serde::{ pub struct RocList { elements: Option>>, length: usize, + // This technically points to directly after the refcount. + // This is an optimization that enables use one code path for regular lists and slices for geting the refcount ptr. capacity_or_ref_ptr: usize, } @@ -181,14 +183,11 @@ impl RocList { /// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty. pub(crate) unsafe fn ptr_to_allocation(&self) -> *mut c_void { + let alignment = Self::alloc_alignment() as usize; if self.is_seamless_slice() { - (self.capacity_or_ref_ptr << 1) as *mut _ + ((self.capacity_or_ref_ptr << 1) - alignment) as *mut _ } else { - unsafe { - self.ptr_to_first_elem() - .cast::() - .sub(Self::alloc_alignment() as usize) as *mut _ - } + unsafe { self.ptr_to_first_elem().cast::().sub(alignment) as *mut _ } } }