mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-02 03:12:20 +00:00
Add capacity growth to RocStr
Also, cleans up the alloc and realloc api for both list and str. Updates Str.reserve to match List.reserve
This commit is contained in:
parent
cc90709879
commit
96b5d365b2
5 changed files with 98 additions and 145 deletions
|
@ -125,53 +125,6 @@ pub const RocList = extern struct {
|
|||
return new_list;
|
||||
}
|
||||
|
||||
// We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList.
|
||||
// Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back:
|
||||
//
|
||||
// (1) initial size
|
||||
// Instead of growing to size 1 from empty, fbvector allocates at least
|
||||
// 64 bytes. You may still use reserve to reserve a lesser amount of
|
||||
// memory.
|
||||
// (2) 1.5x
|
||||
// For medium-sized vectors, the growth strategy is 1.5x. See the docs
|
||||
// for details.
|
||||
// This does not apply to very small or very large fbvectors. This is a
|
||||
// heuristic.
|
||||
//
|
||||
// In our case, we exposed allocate and reallocate, which will use a smart growth stategy.
|
||||
// We also expose allocateExact and reallocateExact for case where a specific number of elements is requested.
|
||||
|
||||
// calculateCapacity should only be called in cases the list will be growing.
|
||||
// requested_length should always be greater than old_capacity.
|
||||
inline fn calculateCapacity(
|
||||
old_capacity: usize,
|
||||
requested_length: usize,
|
||||
element_width: usize,
|
||||
) usize {
|
||||
// TODO: there are two adjustments that would likely lead to better results for Roc.
|
||||
// 1. Deal with the fact we allocate an extra u64 for refcount.
|
||||
// This may lead to allocating page size + 8 bytes.
|
||||
// That could mean allocating an entire page for 8 bytes of data which isn't great.
|
||||
// 2. Deal with the fact that we can request more than 1 element at a time.
|
||||
// fbvector assumes just appending 1 element at a time when using this algorithm.
|
||||
// As such, they will generally grow in a way that should better match certain memory multiple.
|
||||
// This is also the normal case for roc, but we could also grow by a much larger amount.
|
||||
// We may want to round to multiples of 2 or something similar.
|
||||
var new_capacity: usize = 0;
|
||||
if (element_width == 0) {
|
||||
return requested_length;
|
||||
} else if (old_capacity == 0) {
|
||||
new_capacity = 64 / element_width;
|
||||
} else if (old_capacity < 4096 / element_width) {
|
||||
new_capacity = old_capacity * 2;
|
||||
} else if (old_capacity > 4096 * 32 / element_width) {
|
||||
new_capacity = old_capacity * 2;
|
||||
} else {
|
||||
new_capacity = (old_capacity * 3 + 1) / 2;
|
||||
}
|
||||
return @maximum(new_capacity, requested_length);
|
||||
}
|
||||
|
||||
pub fn allocate(
|
||||
alignment: u32,
|
||||
length: usize,
|
||||
|
@ -181,7 +134,7 @@ pub const RocList = extern struct {
|
|||
return empty();
|
||||
}
|
||||
|
||||
const capacity = calculateCapacity(0, length, element_width);
|
||||
const capacity = utils.calculateCapacity(0, length, element_width);
|
||||
const data_bytes = capacity * element_width;
|
||||
return RocList{
|
||||
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
|
||||
|
@ -190,23 +143,6 @@ pub const RocList = extern struct {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn allocateExact(
|
||||
alignment: u32,
|
||||
length: usize,
|
||||
element_width: usize,
|
||||
) RocList {
|
||||
if (length == 0) {
|
||||
return empty();
|
||||
}
|
||||
|
||||
const data_bytes = length * element_width;
|
||||
return RocList{
|
||||
.bytes = utils.allocateWithRefcount(data_bytes, alignment),
|
||||
.length = length,
|
||||
.capacity = length,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reallocate(
|
||||
self: RocList,
|
||||
alignment: u32,
|
||||
|
@ -218,67 +154,36 @@ pub const RocList = extern struct {
|
|||
if (self.capacity >= new_length) {
|
||||
return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity };
|
||||
} else {
|
||||
const new_capacity = calculateCapacity(self.capacity, new_length, element_width);
|
||||
const new_capacity = utils.calculateCapacity(self.capacity, new_length, element_width);
|
||||
const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_capacity, element_width);
|
||||
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_capacity };
|
||||
}
|
||||
}
|
||||
// TODO: Investigate the performance of this.
|
||||
// Maybe we should just always reallocate to the new_length instead of expanding capacity?
|
||||
const new_capacity = if (self.capacity >= new_length) self.capacity else calculateCapacity(self.capacity, new_length, element_width);
|
||||
return self.reallocateFresh(alignment, new_length, new_capacity, element_width);
|
||||
return self.reallocateFresh(alignment, new_length, element_width);
|
||||
}
|
||||
return RocList.allocate(alignment, new_length, element_width);
|
||||
}
|
||||
|
||||
pub fn reallocateExact(
|
||||
self: RocList,
|
||||
alignment: u32,
|
||||
new_length: usize,
|
||||
element_width: usize,
|
||||
) RocList {
|
||||
if (self.bytes) |source_ptr| {
|
||||
if (self.isUnique()) {
|
||||
if (self.capacity >= new_length) {
|
||||
return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity };
|
||||
} else {
|
||||
const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_length, element_width);
|
||||
return RocList{ .bytes = new_source, .length = new_length, .capacity = new_length };
|
||||
}
|
||||
}
|
||||
return self.reallocateFresh(alignment, new_length, new_length, element_width);
|
||||
}
|
||||
return RocList.allocateExact(alignment, new_length, element_width);
|
||||
}
|
||||
|
||||
/// reallocate by explicitly making a new allocation and copying elements over
|
||||
fn reallocateFresh(
|
||||
self: RocList,
|
||||
alignment: u32,
|
||||
new_length: usize,
|
||||
new_capacity: usize,
|
||||
element_width: usize,
|
||||
) RocList {
|
||||
const old_length = self.length;
|
||||
const delta_length = new_length - old_length;
|
||||
|
||||
const data_bytes = new_capacity * element_width;
|
||||
const first_slot = utils.allocateWithRefcount(data_bytes, alignment);
|
||||
const result = RocList.allocate(alignment, new_length, element_width);
|
||||
|
||||
// transfer the memory
|
||||
if (self.bytes) |source_ptr| {
|
||||
const dest_ptr = first_slot;
|
||||
const dest_ptr = result.bytes orelse unreachable;
|
||||
|
||||
@memcpy(dest_ptr, source_ptr, old_length * element_width);
|
||||
@memset(dest_ptr + old_length * element_width, 0, delta_length * element_width);
|
||||
}
|
||||
|
||||
const result = RocList{
|
||||
.bytes = first_slot,
|
||||
.length = new_length,
|
||||
.capacity = new_capacity,
|
||||
};
|
||||
|
||||
utils.decref(self.bytes, old_length * element_width, alignment);
|
||||
|
||||
return result;
|
||||
|
@ -503,7 +408,7 @@ pub fn listWithCapacity(
|
|||
alignment: u32,
|
||||
element_width: usize,
|
||||
) callconv(.C) RocList {
|
||||
var output = RocList.allocateExact(alignment, capacity, element_width);
|
||||
var output = RocList.allocate(alignment, capacity, element_width);
|
||||
output.length = 0;
|
||||
return output;
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ pub const RocStr = extern struct {
|
|||
// This clones the pointed-to bytes if they won't fit in a
|
||||
// small string, and returns a (pointer, len) tuple which points to them.
|
||||
pub fn init(bytes_ptr: [*]const u8, length: usize) RocStr {
|
||||
var result = RocStr.allocate(length, length);
|
||||
var result = RocStr.allocate(length);
|
||||
@memcpy(result.asU8ptr(), bytes_ptr, length);
|
||||
|
||||
return result;
|
||||
|
@ -70,11 +70,14 @@ pub const RocStr = extern struct {
|
|||
};
|
||||
}
|
||||
|
||||
// allocate space for a (big or small) RocStr, but put nothing in it yet
|
||||
pub fn allocate(length: usize, capacity: usize) RocStr {
|
||||
const result_is_big = capacity >= SMALL_STRING_SIZE;
|
||||
// allocate space for a (big or small) RocStr, but put nothing in it yet.
|
||||
// May have a larger capacity than the length.
|
||||
pub fn allocate(length: usize) RocStr {
|
||||
const element_width = 1;
|
||||
const result_is_big = length >= SMALL_STRING_SIZE;
|
||||
|
||||
if (result_is_big) {
|
||||
const capacity = utils.calculateCapacity(0, length, element_width);
|
||||
return RocStr.allocateBig(length, capacity);
|
||||
} else {
|
||||
var string = RocStr.empty();
|
||||
|
@ -91,25 +94,6 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
}
|
||||
|
||||
// This takes ownership of the pointed-to bytes if they won't fit in a
|
||||
// small string, and returns a (pointer, len) tuple which points to them.
|
||||
pub fn withCapacity(length: usize) RocStr {
|
||||
const roc_str_size = @sizeOf(RocStr);
|
||||
|
||||
if (length < roc_str_size) {
|
||||
return RocStr.empty();
|
||||
} else {
|
||||
var new_bytes = utils.alloc(length, RocStr.alignment) catch unreachable;
|
||||
|
||||
var new_bytes_ptr: [*]u8 = @ptrCast([*]u8, &new_bytes);
|
||||
|
||||
return RocStr{
|
||||
.str_bytes = new_bytes_ptr,
|
||||
.str_len = length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eq(self: RocStr, other: RocStr) bool {
|
||||
// If they are byte-for-byte equal, they're definitely equal!
|
||||
if (self.str_bytes == other.str_bytes and self.str_len == other.str_len and self.str_capacity == other.str_capacity) {
|
||||
|
@ -169,13 +153,19 @@ pub const RocStr = extern struct {
|
|||
pub fn reallocate(
|
||||
self: RocStr,
|
||||
new_length: usize,
|
||||
new_capacity: usize,
|
||||
) RocStr {
|
||||
const element_width = 1;
|
||||
const old_capacity = self.getCapacity();
|
||||
|
||||
if (self.str_bytes) |source_ptr| {
|
||||
if (self.isUnique() and !self.isSmallStr()) {
|
||||
const unique = self.isUnique();
|
||||
if (unique and old_capacity > new_length) {
|
||||
var output = self;
|
||||
output.setLen(new_length);
|
||||
return output;
|
||||
}
|
||||
if (unique and !self.isSmallStr()) {
|
||||
const new_capacity = utils.calculateCapacity(old_capacity, new_length, element_width);
|
||||
const new_source = utils.unsafeReallocate(
|
||||
source_ptr,
|
||||
RocStr.alignment,
|
||||
|
@ -188,19 +178,18 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
}
|
||||
|
||||
return self.reallocateFresh(new_length, new_capacity);
|
||||
return self.reallocateFresh(new_length);
|
||||
}
|
||||
|
||||
/// reallocate by explicitly making a new allocation and copying elements over
|
||||
pub fn reallocateFresh(
|
||||
fn reallocateFresh(
|
||||
self: RocStr,
|
||||
new_length: usize,
|
||||
new_capacity: usize,
|
||||
) RocStr {
|
||||
const old_length = self.len();
|
||||
const delta_length = new_length - old_length;
|
||||
|
||||
const result = RocStr.allocate(new_length, new_capacity);
|
||||
const result = RocStr.allocate(new_length);
|
||||
|
||||
// transfer the memory
|
||||
|
||||
|
@ -238,6 +227,14 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn setLen(self: *RocStr, length: usize) void {
|
||||
if (self.isSmallStr()) {
|
||||
self.asU8ptr()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
|
||||
} else {
|
||||
self.str_len = length;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getCapacity(self: RocStr) usize {
|
||||
if (self.isSmallStr()) {
|
||||
return SMALL_STR_MAX_LENGTH;
|
||||
|
@ -1387,7 +1384,7 @@ pub fn repeat(string: RocStr, count: usize) callconv(.C) RocStr {
|
|||
const bytes_len = string.len();
|
||||
const bytes_ptr = string.asU8ptr();
|
||||
|
||||
var ret_string = RocStr.allocate(count * bytes_len, count * bytes_len);
|
||||
var ret_string = RocStr.allocate(count * bytes_len);
|
||||
var ret_string_ptr = ret_string.asU8ptr();
|
||||
|
||||
var i: usize = 0;
|
||||
|
@ -1528,7 +1525,7 @@ fn strConcat(arg1: RocStr, arg2: RocStr) RocStr {
|
|||
} else {
|
||||
const combined_length = arg1.len() + arg2.len();
|
||||
|
||||
const result = arg1.reallocate(combined_length, combined_length);
|
||||
const result = arg1.reallocate(combined_length);
|
||||
|
||||
@memcpy(result.asU8ptr() + arg1.len(), arg2.asU8ptr(), arg2.len());
|
||||
|
||||
|
@ -1600,7 +1597,7 @@ fn strJoinWith(list: RocListStr, separator: RocStr) RocStr {
|
|||
// include size of the separator
|
||||
total_size += separator.len() * (len - 1);
|
||||
|
||||
var result = RocStr.allocate(total_size, total_size);
|
||||
var result = RocStr.allocate(total_size);
|
||||
var result_ptr = result.asU8ptr();
|
||||
|
||||
var offset: usize = 0;
|
||||
|
@ -2512,14 +2509,14 @@ test "capacity: big string" {
|
|||
var data = RocStr.init(data_bytes, data_bytes.len);
|
||||
defer data.deinit();
|
||||
|
||||
try expectEqual(data.getCapacity(), data_bytes.len);
|
||||
try expect(data.getCapacity() >= data_bytes.len);
|
||||
}
|
||||
|
||||
pub fn appendScalar(string: RocStr, scalar_u32: u32) callconv(.C) RocStr {
|
||||
const scalar = @intCast(u21, scalar_u32);
|
||||
const width = std.unicode.utf8CodepointSequenceLength(scalar) catch unreachable;
|
||||
|
||||
var output = string.reallocate(string.len() + width, string.len() + width);
|
||||
var output = string.reallocate(string.len() + width);
|
||||
var slice = output.asSliceWithCapacity();
|
||||
|
||||
_ = std.unicode.utf8Encode(scalar, slice[string.len() .. string.len() + width]) catch unreachable;
|
||||
|
@ -2587,17 +2584,21 @@ test "appendScalar: big 😀" {
|
|||
try expect(actual.eq(expected));
|
||||
}
|
||||
|
||||
pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr {
|
||||
if (capacity > string.getCapacity()) {
|
||||
// expand allocation but keep string length the same
|
||||
return string.reallocate(string.len(), capacity);
|
||||
} else {
|
||||
pub fn reserve(string: RocStr, spare: usize) callconv(.C) RocStr {
|
||||
const old_length = string.len();
|
||||
if (string.getCapacity() >= old_length + spare) {
|
||||
return string;
|
||||
} else {
|
||||
var output = string.reallocate(old_length + spare);
|
||||
output.setLen(old_length);
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn withCapacity(capacity: usize) callconv(.C) RocStr {
|
||||
return RocStr.allocate(0, capacity);
|
||||
var str = RocStr.allocate(capacity);
|
||||
str.setLen(0);
|
||||
return str;
|
||||
}
|
||||
|
||||
pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } {
|
||||
|
|
|
@ -213,6 +213,53 @@ inline fn decref_ptr_to_refcount(
|
|||
}
|
||||
}
|
||||
|
||||
// We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList.
|
||||
// Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back:
|
||||
//
|
||||
// (1) initial size
|
||||
// Instead of growing to size 1 from empty, fbvector allocates at least
|
||||
// 64 bytes. You may still use reserve to reserve a lesser amount of
|
||||
// memory.
|
||||
// (2) 1.5x
|
||||
// For medium-sized vectors, the growth strategy is 1.5x. See the docs
|
||||
// for details.
|
||||
// This does not apply to very small or very large fbvectors. This is a
|
||||
// heuristic.
|
||||
//
|
||||
// In our case, we exposed allocate and reallocate, which will use a smart growth stategy.
|
||||
// We also expose allocateExact and reallocateExact for case where a specific number of elements is requested.
|
||||
|
||||
// calculateCapacity should only be called in cases the list will be growing.
|
||||
// requested_length should always be greater than old_capacity.
|
||||
pub inline fn calculateCapacity(
|
||||
old_capacity: usize,
|
||||
requested_length: usize,
|
||||
element_width: usize,
|
||||
) usize {
|
||||
// TODO: there are two adjustments that would likely lead to better results for Roc.
|
||||
// 1. Deal with the fact we allocate an extra u64 for refcount.
|
||||
// This may lead to allocating page size + 8 bytes.
|
||||
// That could mean allocating an entire page for 8 bytes of data which isn't great.
|
||||
// 2. Deal with the fact that we can request more than 1 element at a time.
|
||||
// fbvector assumes just appending 1 element at a time when using this algorithm.
|
||||
// As such, they will generally grow in a way that should better match certain memory multiple.
|
||||
// This is also the normal case for roc, but we could also grow by a much larger amount.
|
||||
// We may want to round to multiples of 2 or something similar.
|
||||
var new_capacity: usize = 0;
|
||||
if (element_width == 0) {
|
||||
return requested_length;
|
||||
} else if (old_capacity == 0) {
|
||||
new_capacity = 64 / element_width;
|
||||
} else if (old_capacity < 4096 / element_width) {
|
||||
new_capacity = old_capacity * 2;
|
||||
} else if (old_capacity > 4096 * 32 / element_width) {
|
||||
new_capacity = old_capacity * 2;
|
||||
} else {
|
||||
new_capacity = (old_capacity * 3 + 1) / 2;
|
||||
}
|
||||
return @maximum(new_capacity, requested_length);
|
||||
}
|
||||
|
||||
pub fn allocateWithRefcountC(
|
||||
data_bytes: usize,
|
||||
element_alignment: u32,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue