mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 20:28:02 +00:00
Improve roc atomic refcounting by using first bit to indicate threadlocal
By avoiding atomic refcounting for any threadlocal variable, this greatly improves performance. Leads to the performance of threadlocal refcounting being only ~4% behind non-atomic refcounting. The old atomic refcounting could be as far as ~50% behind (though normally 10-20%). This does not enable anything atomic related. If we enable all forms of data loading (List.get, Box.unbox, loading recursive tag) to propagate the atomic flag, this should be a reasonable implementation for roc by default. With propagation, a platform would just set a list to atomic. On load, individual elements would also get set to atomic. We also should probably revert back to threadlocal if an atomic refcount drops to 1.
This commit is contained in:
parent
f662f2910f
commit
f901946455
1 changed files with 26 additions and 7 deletions
|
@ -177,6 +177,9 @@ pub const Dec = fn (?[*]u8) callconv(.C) void;
|
|||
const REFCOUNT_MAX_ISIZE: isize = 0;
|
||||
pub const REFCOUNT_ONE_ISIZE: isize = std.math.minInt(isize);
|
||||
pub const REFCOUNT_ONE: usize = @as(usize, @bitCast(REFCOUNT_ONE_ISIZE));
|
||||
// Only top bit set.
|
||||
pub const REFCOUNT_IS_NOT_ATOMIC_MASK: isize = REFCOUNT_ONE_ISIZE;
|
||||
pub const REFCOUNT_ONE_ATOMIC_ISIZE: isize = 1;
|
||||
|
||||
pub const IntWidth = enum(u8) {
|
||||
U8 = 0,
|
||||
|
@ -207,13 +210,14 @@ pub fn increfRcPtrC(ptr_to_refcount: *isize, amount: isize) callconv(.C) void {
|
|||
}
|
||||
|
||||
// Ensure that the refcount is not whole program lifetime.
|
||||
if (ptr_to_refcount.* != REFCOUNT_MAX_ISIZE) {
|
||||
const refcount: isize = ptr_to_refcount.*;
|
||||
if (refcount != REFCOUNT_MAX_ISIZE) {
|
||||
// Note: we assume that a refcount will never overflow.
|
||||
// As such, we do not need to cap incrementing.
|
||||
switch (RC_TYPE) {
|
||||
.normal => {
|
||||
if (DEBUG_INCDEC and builtin.target.cpu.arch != .wasm32) {
|
||||
const old = @as(usize, @bitCast(ptr_to_refcount.*));
|
||||
const old = @as(usize, @bitCast(refcount));
|
||||
const new = old + @as(usize, @intCast(amount));
|
||||
|
||||
const oldH = old - REFCOUNT_ONE + 1;
|
||||
|
@ -222,10 +226,16 @@ pub fn increfRcPtrC(ptr_to_refcount: *isize, amount: isize) callconv(.C) void {
|
|||
std.debug.print("{} + {} = {}!\n", .{ oldH, amount, newH });
|
||||
}
|
||||
|
||||
ptr_to_refcount.* += amount;
|
||||
ptr_to_refcount.* = refcount + amount;
|
||||
},
|
||||
.atomic => {
|
||||
_ = @atomicRmw(isize, ptr_to_refcount, .Add, amount, .monotonic);
|
||||
// If the first bit of the refcount is set, this variable is threadlocal.
|
||||
// Use normal refcounting instead of atomic.
|
||||
if (refcount & REFCOUNT_IS_NOT_ATOMIC_MASK != 0) {
|
||||
ptr_to_refcount.* = refcount + amount;
|
||||
} else {
|
||||
_ = @atomicRmw(isize, ptr_to_refcount, .Add, amount, .monotonic);
|
||||
}
|
||||
},
|
||||
.none => unreachable,
|
||||
}
|
||||
|
@ -387,9 +397,18 @@ inline fn decref_ptr_to_refcount(
|
|||
}
|
||||
},
|
||||
.atomic => {
|
||||
const last = @atomicRmw(isize, &refcount_ptr[0], .Sub, 1, .monotonic);
|
||||
if (last == REFCOUNT_ONE_ISIZE) {
|
||||
free_ptr_to_refcount(refcount_ptr, alignment, elements_refcounted);
|
||||
// If the first bit of the refcount is set, this variable is threadlocal.
|
||||
// Use normal refcounting instead of atomic.
|
||||
if (refcount & REFCOUNT_IS_NOT_ATOMIC_MASK != 0) {
|
||||
refcount_ptr[0] = refcount -% 1;
|
||||
if (refcount == REFCOUNT_ONE_ISIZE) {
|
||||
free_ptr_to_refcount(refcount_ptr, alignment, elements_refcounted);
|
||||
}
|
||||
} else {
|
||||
const last = @atomicRmw(isize, &refcount_ptr[0], .Sub, 1, .monotonic);
|
||||
if (last == REFCOUNT_ONE_ATOMIC_ISIZE) {
|
||||
free_ptr_to_refcount(refcount_ptr, alignment, elements_refcounted);
|
||||
}
|
||||
}
|
||||
},
|
||||
.none => unreachable,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue