switch to passing the copy function into zig (implement llvm)

This commit is contained in:
Brendan Hansknecht 2024-07-21 19:04:57 -07:00
parent d028aa3c74
commit 82764dc697
No known key found for this signature in database
GPG key ID: 0EA784685083E75B
5 changed files with 173 additions and 102 deletions

View file

@ -11,7 +11,7 @@ const expectEqual = std.testing.expectEqual;
const Opaque = ?[*]u8;
const EqFn = *const fn (Opaque, Opaque) callconv(.C) bool;
const CompareFn = *const fn (Opaque, Opaque, Opaque) callconv(.C) u8;
const CopyFn = *const fn (Opaque, Opaque, usize) void;
const CopyFn = *const fn (Opaque, Opaque) callconv(.C) void;
const Inc = *const fn (?[*]u8) callconv(.C) void;
const IncN = *const fn (?[*]u8, usize) callconv(.C) void;
@ -427,6 +427,7 @@ pub fn listAppendUnsafe(
list: RocList,
element: Opaque,
element_width: usize,
copy: CopyFn,
) callconv(.C) RocList {
const old_length = list.len();
var output = list;
@ -435,7 +436,7 @@ pub fn listAppendUnsafe(
if (output.bytes) |bytes| {
if (element) |source| {
const target = bytes + old_length * element_width;
copy_element_fn(element_width)(target, source, element_width);
copy(target, source);
}
}
@ -450,9 +451,10 @@ fn listAppend(
elements_refcounted: bool,
inc: Inc,
update_mode: UpdateMode,
copy: CopyFn,
) callconv(.C) RocList {
const with_capacity = listReserve(list, alignment, 1, element_width, elements_refcounted, inc, update_mode);
return listAppendUnsafe(with_capacity, element, element_width);
return listAppendUnsafe(with_capacity, element, element_width, copy);
}
pub fn listPrepend(
@ -462,6 +464,7 @@ pub fn listPrepend(
element_width: usize,
elements_refcounted: bool,
inc: Inc,
copy: CopyFn,
) callconv(.C) RocList {
const old_length = list.len();
// TODO: properly wire in update mode.
@ -477,7 +480,7 @@ pub fn listPrepend(
// finally copy in the new first element
if (element) |source| {
copy_element_fn(element_width)(target, source, element_width);
copy(target, source);
}
}
@ -494,6 +497,7 @@ pub fn listSwap(
inc: Inc,
dec: Dec,
update_mode: UpdateMode,
copy: CopyFn,
) callconv(.C) RocList {
// Early exit to avoid swapping the same element.
if (index_1 == index_2)
@ -515,11 +519,10 @@ pub fn listSwap(
const source_ptr = @as([*]u8, @ptrCast(newList.bytes));
const copy_fn_ptr = copy_element_fn(element_width);
swapElements(copy_fn_ptr, source_ptr, element_width, @as(usize,
swapElements(source_ptr, element_width, @as(usize,
// We already verified that both indices are less than the stored list length,
// which is usize, so casting them to usize will definitely be lossless.
@intCast(index_1)), @as(usize, @intCast(index_2)));
@intCast(index_1)), @as(usize, @intCast(index_2)), copy);
return newList;
}
@ -687,7 +690,15 @@ pub fn listDropAt(
}
}
fn partition(copy_fn_ptr: CopyFn, source_ptr: [*]u8, transform: Opaque, wrapper: CompareFn, element_width: usize, low: isize, high: isize) isize {
fn partition(
source_ptr: [*]u8,
transform: Opaque,
wrapper: CompareFn,
element_width: usize,
low: isize,
high: isize,
copy: CopyFn,
) isize {
const pivot = source_ptr + (@as(usize, @intCast(high)) * element_width);
var i = (low - 1); // Index of smaller element and indicates the right position of pivot found so far
var j = low;
@ -702,22 +713,30 @@ fn partition(copy_fn_ptr: CopyFn, source_ptr: [*]u8, transform: Opaque, wrapper:
utils.Ordering.LT => {
// the current element is smaller than the pivot; swap it
i += 1;
swapElements(copy_fn_ptr, source_ptr, element_width, @as(usize, @intCast(i)), @as(usize, @intCast(j)));
swapElements(source_ptr, element_width, @as(usize, @intCast(i)), @as(usize, @intCast(j)), copy);
},
utils.Ordering.EQ, utils.Ordering.GT => {},
}
}
swapElements(copy_fn_ptr, source_ptr, element_width, @as(usize, @intCast(i + 1)), @as(usize, @intCast(high)));
swapElements(source_ptr, element_width, @as(usize, @intCast(i + 1)), @as(usize, @intCast(high)), copy);
return (i + 1);
}
fn quicksort(copy_fn_ptr: CopyFn, source_ptr: [*]u8, transform: Opaque, wrapper: CompareFn, element_width: usize, low: isize, high: isize) void {
fn quicksort(
source_ptr: [*]u8,
transform: Opaque,
wrapper: CompareFn,
element_width: usize,
low: isize,
high: isize,
copy: CopyFn,
) void {
if (low < high) {
// partition index
const pi = partition(copy_fn_ptr, source_ptr, transform, wrapper, element_width, low, high);
const pi = partition(source_ptr, transform, wrapper, element_width, low, high, copy);
_ = quicksort(copy_fn_ptr, source_ptr, transform, wrapper, element_width, low, pi - 1); // before pi
_ = quicksort(copy_fn_ptr, source_ptr, transform, wrapper, element_width, pi + 1, high); // after pi
_ = quicksort(source_ptr, transform, wrapper, element_width, low, pi - 1, copy); // before pi
_ = quicksort(source_ptr, transform, wrapper, element_width, pi + 1, high, copy); // after pi
}
}
@ -732,6 +751,7 @@ pub fn listSortWith(
elements_refcounted: bool,
inc: Inc,
dec: Dec,
copy: CopyFn,
) callconv(.C) RocList {
var list = input.makeUnique(alignment, element_width, elements_refcounted, inc, dec);
@ -742,7 +762,7 @@ pub fn listSortWith(
if (list.bytes) |source_ptr| {
const low = 0;
const high: isize = @as(isize, @intCast(list.len())) - 1;
quicksort(copy_element_fn(element_width), source_ptr, data, caller, element_width, low, high);
quicksort(source_ptr, data, caller, element_width, low, high, copy);
}
return list;
@ -750,16 +770,21 @@ pub fn listSortWith(
// SWAP ELEMENTS
fn swap(copy_fn_ptr: CopyFn, element_width: usize, p1: [*]u8, p2: [*]u8) void {
const threshold: usize = @sizeOf(u256);
fn swap(
element_width: usize,
p1: [*]u8,
p2: [*]u8,
copy: CopyFn,
) void {
const threshold: usize = 64;
var buffer_actual: [threshold]u8 = undefined;
const buffer: [*]u8 = buffer_actual[0..];
if (element_width <= threshold) {
copy_fn_ptr(buffer, p1, element_width);
copy_fn_ptr(p1, p2, element_width);
copy_fn_ptr(p2, buffer, element_width);
copy(buffer, p1);
copy(p1, p2);
copy(p2, buffer);
return;
}
@ -767,8 +792,6 @@ fn swap(copy_fn_ptr: CopyFn, element_width: usize, p1: [*]u8, p2: [*]u8) void {
var ptr1 = p1;
var ptr2 = p2;
const copy_buffer_ptr = comptime copy_element_fn(threshold);
while (true) {
if (width < threshold) {
@memcpy(buffer[0..width], ptr1[0..width]);
@ -776,9 +799,9 @@ fn swap(copy_fn_ptr: CopyFn, element_width: usize, p1: [*]u8, p2: [*]u8) void {
@memcpy(ptr2[0..width], buffer[0..width]);
return;
} else {
copy_buffer_ptr(buffer, ptr1, threshold);
copy_buffer_ptr(ptr1, ptr2, threshold);
copy_buffer_ptr(ptr2, buffer, threshold);
@memcpy(buffer[0..threshold], ptr1[0..threshold]);
@memcpy(ptr1[0..threshold], ptr2[0..threshold]);
@memcpy(ptr2[0..threshold], buffer[0..threshold]);
ptr1 += threshold;
ptr2 += threshold;
@ -788,11 +811,17 @@ fn swap(copy_fn_ptr: CopyFn, element_width: usize, p1: [*]u8, p2: [*]u8) void {
}
}
fn swapElements(copy_fn_ptr: CopyFn, source_ptr: [*]u8, element_width: usize, index_1: usize, index_2: usize) void {
fn swapElements(
source_ptr: [*]u8,
element_width: usize,
index_1: usize,
index_2: usize,
copy: CopyFn,
) void {
const element_at_i = source_ptr + (index_1 * element_width);
const element_at_j = source_ptr + (index_2 * element_width);
return swap(copy_fn_ptr, element_width, element_at_i, element_at_j);
return swap(element_width, element_at_i, element_at_j, copy);
}
pub fn listConcat(
@ -909,6 +938,7 @@ pub fn listReplaceInPlace(
element: Opaque,
element_width: usize,
out_element: ?[*]u8,
copy: CopyFn,
) callconv(.C) RocList {
// INVARIANT: bounds checking happens on the roc side
//
@ -917,7 +947,7 @@ pub fn listReplaceInPlace(
// so we don't do a bounds check here. Hence, the list is also non-empty,
// because inserting into an empty list is always out of bounds,
// and it's always safe to cast index to usize.
return listReplaceInPlaceHelp(list, @as(usize, @intCast(index)), element, element_width, out_element);
return listReplaceInPlaceHelp(list, @as(usize, @intCast(index)), element, element_width, out_element, copy);
}
pub fn listReplace(
@ -930,6 +960,7 @@ pub fn listReplace(
inc: Inc,
dec: Dec,
out_element: ?[*]u8,
copy: CopyFn,
) callconv(.C) RocList {
// INVARIANT: bounds checking happens on the roc side
//
@ -939,7 +970,7 @@ pub fn listReplace(
// because inserting into an empty list is always out of bounds,
// and it's always safe to cast index to usize.
// because inserting into an empty list is always out of bounds
return listReplaceInPlaceHelp(list.makeUnique(alignment, element_width, elements_refcounted, inc, dec), @as(usize, @intCast(index)), element, element_width, out_element);
return listReplaceInPlaceHelp(list.makeUnique(alignment, element_width, elements_refcounted, inc, dec), @as(usize, @intCast(index)), element, element_width, out_element, copy);
}
inline fn listReplaceInPlaceHelp(
@ -948,16 +979,16 @@ inline fn listReplaceInPlaceHelp(
element: Opaque,
element_width: usize,
out_element: ?[*]u8,
copy: CopyFn,
) RocList {
// the element we will replace
var element_at_index = (list.bytes orelse unreachable) + (index * element_width);
const copy_fn_ptr = copy_element_fn(element_width);
// copy out the old element
copy_fn_ptr((out_element orelse unreachable), element_at_index, element_width);
copy((out_element orelse unreachable), element_at_index);
// copy in the new element
copy_fn_ptr(element_at_index, (element orelse unreachable), element_width);
copy(element_at_index, (element orelse unreachable));
return list;
}
@ -1030,59 +1061,6 @@ pub fn listConcatUtf8(
}
}
fn copy_element_fn(element_width: usize) CopyFn {
const max_inline = @sizeOf(u256);
switch (element_width) {
inline 0...max_inline => |i| return memcpy_sized(i),
else => return &memcpy_opaque,
}
}
fn memcpy_opaque(dst: Opaque, src: Opaque, element_width: usize) void {
@memcpy(@as([*]u8, @ptrCast(dst))[0..element_width], @as([*]u8, @ptrCast(src))[0..element_width]);
}
fn memcpy_sized(comptime size: usize) CopyFn {
return &(struct {
pub fn memcpy(dst: Opaque, src: Opaque, _: usize) void {
// due to the memcpy size being known at compile time to zig, llvm can optimize it to not actually be an memcpy call.
// Instead it can inline to direct memory moves.
@memcpy(@as([*]u8, @ptrCast(dst))[0..size], @as([*]u8, @ptrCast(src))[0..size]);
}
}.memcpy);
}
test "gen memcpy fn" {
{
const element_width = @sizeOf(u8);
const copy_fn_ptr = copy_element_fn(element_width);
var x: u8 = 7;
var y: u8 = 12;
copy_fn_ptr(@ptrCast(&x), @ptrCast(&y), element_width);
try expectEqual(y, x);
}
{
const element_width = @sizeOf(u32);
const copy_fn_ptr = copy_element_fn(element_width);
var x: u32 = 7;
var y: u32 = 0xDEAD_BEEF;
copy_fn_ptr(@ptrCast(&x), @ptrCast(&y), element_width);
try expectEqual(y, x);
}
{
const element_width = @sizeOf(u512);
const copy_fn_ptr = copy_element_fn(element_width);
var x: u512 = 7;
var y: u512 = 1 << 500;
copy_fn_ptr(@ptrCast(&x), @ptrCast(&y), element_width);
try expectEqual(y, x);
}
}
test "listConcatUtf8" {
const list = RocList.fromSlice(u8, &[_]u8{ 1, 2, 3, 4 }, false);
defer list.decref(1, 1, false, &rcNone);