From 96b5d365b2f0276c25f6e5359bd65478709b5b0f Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Mon, 10 Oct 2022 22:24:55 -0700 Subject: [PATCH] Add capacity growth to RocStr Also, cleans up the alloc and realloc api for both list and str. Updates Str.reserve to match List.reserve --- crates/compiler/builtins/bitcode/src/list.zig | 107 +----------------- crates/compiler/builtins/bitcode/src/str.zig | 81 ++++++------- .../compiler/builtins/bitcode/src/utils.zig | 47 ++++++++ crates/compiler/builtins/roc/Str.roc | 4 +- crates/roc_std/tests/test_roc_std.rs | 4 +- 5 files changed, 98 insertions(+), 145 deletions(-) diff --git a/crates/compiler/builtins/bitcode/src/list.zig b/crates/compiler/builtins/bitcode/src/list.zig index a906179130..df89083b43 100644 --- a/crates/compiler/builtins/bitcode/src/list.zig +++ b/crates/compiler/builtins/bitcode/src/list.zig @@ -125,53 +125,6 @@ pub const RocList = extern struct { return new_list; } - // We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList. - // Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back: - // - // (1) initial size - // Instead of growing to size 1 from empty, fbvector allocates at least - // 64 bytes. You may still use reserve to reserve a lesser amount of - // memory. - // (2) 1.5x - // For medium-sized vectors, the growth strategy is 1.5x. See the docs - // for details. - // This does not apply to very small or very large fbvectors. This is a - // heuristic. - // - // In our case, we exposed allocate and reallocate, which will use a smart growth stategy. - // We also expose allocateExact and reallocateExact for case where a specific number of elements is requested. - - // calculateCapacity should only be called in cases the list will be growing. - // requested_length should always be greater than old_capacity. - inline fn calculateCapacity( - old_capacity: usize, - requested_length: usize, - element_width: usize, - ) usize { - // TODO: there are two adjustments that would likely lead to better results for Roc. - // 1. Deal with the fact we allocate an extra u64 for refcount. - // This may lead to allocating page size + 8 bytes. - // That could mean allocating an entire page for 8 bytes of data which isn't great. - // 2. Deal with the fact that we can request more than 1 element at a time. - // fbvector assumes just appending 1 element at a time when using this algorithm. - // As such, they will generally grow in a way that should better match certain memory multiple. - // This is also the normal case for roc, but we could also grow by a much larger amount. - // We may want to round to multiples of 2 or something similar. - var new_capacity: usize = 0; - if (element_width == 0) { - return requested_length; - } else if (old_capacity == 0) { - new_capacity = 64 / element_width; - } else if (old_capacity < 4096 / element_width) { - new_capacity = old_capacity * 2; - } else if (old_capacity > 4096 * 32 / element_width) { - new_capacity = old_capacity * 2; - } else { - new_capacity = (old_capacity * 3 + 1) / 2; - } - return @maximum(new_capacity, requested_length); - } - pub fn allocate( alignment: u32, length: usize, @@ -181,7 +134,7 @@ pub const RocList = extern struct { return empty(); } - const capacity = calculateCapacity(0, length, element_width); + const capacity = utils.calculateCapacity(0, length, element_width); const data_bytes = capacity * element_width; return RocList{ .bytes = utils.allocateWithRefcount(data_bytes, alignment), @@ -190,23 +143,6 @@ pub const RocList = extern struct { }; } - pub fn allocateExact( - alignment: u32, - length: usize, - element_width: usize, - ) RocList { - if (length == 0) { - return empty(); - } - - const data_bytes = length * element_width; - return RocList{ - .bytes = utils.allocateWithRefcount(data_bytes, alignment), - .length = length, - .capacity = length, - }; - } - pub fn reallocate( self: RocList, alignment: u32, @@ -218,67 +154,36 @@ pub const RocList = extern struct { if (self.capacity >= new_length) { return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity }; } else { - const new_capacity = calculateCapacity(self.capacity, new_length, element_width); + const new_capacity = utils.calculateCapacity(self.capacity, new_length, element_width); const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_capacity, element_width); return RocList{ .bytes = new_source, .length = new_length, .capacity = new_capacity }; } } - // TODO: Investigate the performance of this. - // Maybe we should just always reallocate to the new_length instead of expanding capacity? - const new_capacity = if (self.capacity >= new_length) self.capacity else calculateCapacity(self.capacity, new_length, element_width); - return self.reallocateFresh(alignment, new_length, new_capacity, element_width); + return self.reallocateFresh(alignment, new_length, element_width); } return RocList.allocate(alignment, new_length, element_width); } - pub fn reallocateExact( - self: RocList, - alignment: u32, - new_length: usize, - element_width: usize, - ) RocList { - if (self.bytes) |source_ptr| { - if (self.isUnique()) { - if (self.capacity >= new_length) { - return RocList{ .bytes = self.bytes, .length = new_length, .capacity = self.capacity }; - } else { - const new_source = utils.unsafeReallocate(source_ptr, alignment, self.len(), new_length, element_width); - return RocList{ .bytes = new_source, .length = new_length, .capacity = new_length }; - } - } - return self.reallocateFresh(alignment, new_length, new_length, element_width); - } - return RocList.allocateExact(alignment, new_length, element_width); - } - /// reallocate by explicitly making a new allocation and copying elements over fn reallocateFresh( self: RocList, alignment: u32, new_length: usize, - new_capacity: usize, element_width: usize, ) RocList { const old_length = self.length; const delta_length = new_length - old_length; - const data_bytes = new_capacity * element_width; - const first_slot = utils.allocateWithRefcount(data_bytes, alignment); + const result = RocList.allocate(alignment, new_length, element_width); // transfer the memory if (self.bytes) |source_ptr| { - const dest_ptr = first_slot; + const dest_ptr = result.bytes orelse unreachable; @memcpy(dest_ptr, source_ptr, old_length * element_width); @memset(dest_ptr + old_length * element_width, 0, delta_length * element_width); } - const result = RocList{ - .bytes = first_slot, - .length = new_length, - .capacity = new_capacity, - }; - utils.decref(self.bytes, old_length * element_width, alignment); return result; @@ -503,7 +408,7 @@ pub fn listWithCapacity( alignment: u32, element_width: usize, ) callconv(.C) RocList { - var output = RocList.allocateExact(alignment, capacity, element_width); + var output = RocList.allocate(alignment, capacity, element_width); output.length = 0; return output; } diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index 6feacda78e..3b3463f5f4 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -50,7 +50,7 @@ pub const RocStr = extern struct { // This clones the pointed-to bytes if they won't fit in a // small string, and returns a (pointer, len) tuple which points to them. pub fn init(bytes_ptr: [*]const u8, length: usize) RocStr { - var result = RocStr.allocate(length, length); + var result = RocStr.allocate(length); @memcpy(result.asU8ptr(), bytes_ptr, length); return result; @@ -70,11 +70,14 @@ pub const RocStr = extern struct { }; } - // allocate space for a (big or small) RocStr, but put nothing in it yet - pub fn allocate(length: usize, capacity: usize) RocStr { - const result_is_big = capacity >= SMALL_STRING_SIZE; + // allocate space for a (big or small) RocStr, but put nothing in it yet. + // May have a larger capacity than the length. + pub fn allocate(length: usize) RocStr { + const element_width = 1; + const result_is_big = length >= SMALL_STRING_SIZE; if (result_is_big) { + const capacity = utils.calculateCapacity(0, length, element_width); return RocStr.allocateBig(length, capacity); } else { var string = RocStr.empty(); @@ -91,25 +94,6 @@ pub const RocStr = extern struct { } } - // This takes ownership of the pointed-to bytes if they won't fit in a - // small string, and returns a (pointer, len) tuple which points to them. - pub fn withCapacity(length: usize) RocStr { - const roc_str_size = @sizeOf(RocStr); - - if (length < roc_str_size) { - return RocStr.empty(); - } else { - var new_bytes = utils.alloc(length, RocStr.alignment) catch unreachable; - - var new_bytes_ptr: [*]u8 = @ptrCast([*]u8, &new_bytes); - - return RocStr{ - .str_bytes = new_bytes_ptr, - .str_len = length, - }; - } - } - pub fn eq(self: RocStr, other: RocStr) bool { // If they are byte-for-byte equal, they're definitely equal! if (self.str_bytes == other.str_bytes and self.str_len == other.str_len and self.str_capacity == other.str_capacity) { @@ -169,13 +153,19 @@ pub const RocStr = extern struct { pub fn reallocate( self: RocStr, new_length: usize, - new_capacity: usize, ) RocStr { const element_width = 1; const old_capacity = self.getCapacity(); if (self.str_bytes) |source_ptr| { - if (self.isUnique() and !self.isSmallStr()) { + const unique = self.isUnique(); + if (unique and old_capacity > new_length) { + var output = self; + output.setLen(new_length); + return output; + } + if (unique and !self.isSmallStr()) { + const new_capacity = utils.calculateCapacity(old_capacity, new_length, element_width); const new_source = utils.unsafeReallocate( source_ptr, RocStr.alignment, @@ -188,19 +178,18 @@ pub const RocStr = extern struct { } } - return self.reallocateFresh(new_length, new_capacity); + return self.reallocateFresh(new_length); } /// reallocate by explicitly making a new allocation and copying elements over - pub fn reallocateFresh( + fn reallocateFresh( self: RocStr, new_length: usize, - new_capacity: usize, ) RocStr { const old_length = self.len(); const delta_length = new_length - old_length; - const result = RocStr.allocate(new_length, new_capacity); + const result = RocStr.allocate(new_length); // transfer the memory @@ -238,6 +227,14 @@ pub const RocStr = extern struct { } } + pub fn setLen(self: *RocStr, length: usize) void { + if (self.isSmallStr()) { + self.asU8ptr()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000; + } else { + self.str_len = length; + } + } + pub fn getCapacity(self: RocStr) usize { if (self.isSmallStr()) { return SMALL_STR_MAX_LENGTH; @@ -1387,7 +1384,7 @@ pub fn repeat(string: RocStr, count: usize) callconv(.C) RocStr { const bytes_len = string.len(); const bytes_ptr = string.asU8ptr(); - var ret_string = RocStr.allocate(count * bytes_len, count * bytes_len); + var ret_string = RocStr.allocate(count * bytes_len); var ret_string_ptr = ret_string.asU8ptr(); var i: usize = 0; @@ -1528,7 +1525,7 @@ fn strConcat(arg1: RocStr, arg2: RocStr) RocStr { } else { const combined_length = arg1.len() + arg2.len(); - const result = arg1.reallocate(combined_length, combined_length); + const result = arg1.reallocate(combined_length); @memcpy(result.asU8ptr() + arg1.len(), arg2.asU8ptr(), arg2.len()); @@ -1600,7 +1597,7 @@ fn strJoinWith(list: RocListStr, separator: RocStr) RocStr { // include size of the separator total_size += separator.len() * (len - 1); - var result = RocStr.allocate(total_size, total_size); + var result = RocStr.allocate(total_size); var result_ptr = result.asU8ptr(); var offset: usize = 0; @@ -2512,14 +2509,14 @@ test "capacity: big string" { var data = RocStr.init(data_bytes, data_bytes.len); defer data.deinit(); - try expectEqual(data.getCapacity(), data_bytes.len); + try expect(data.getCapacity() >= data_bytes.len); } pub fn appendScalar(string: RocStr, scalar_u32: u32) callconv(.C) RocStr { const scalar = @intCast(u21, scalar_u32); const width = std.unicode.utf8CodepointSequenceLength(scalar) catch unreachable; - var output = string.reallocate(string.len() + width, string.len() + width); + var output = string.reallocate(string.len() + width); var slice = output.asSliceWithCapacity(); _ = std.unicode.utf8Encode(scalar, slice[string.len() .. string.len() + width]) catch unreachable; @@ -2587,17 +2584,21 @@ test "appendScalar: big 😀" { try expect(actual.eq(expected)); } -pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr { - if (capacity > string.getCapacity()) { - // expand allocation but keep string length the same - return string.reallocate(string.len(), capacity); - } else { +pub fn reserve(string: RocStr, spare: usize) callconv(.C) RocStr { + const old_length = string.len(); + if (string.getCapacity() >= old_length + spare) { return string; + } else { + var output = string.reallocate(old_length + spare); + output.setLen(old_length); + return output; } } pub fn withCapacity(capacity: usize) callconv(.C) RocStr { - return RocStr.allocate(0, capacity); + var str = RocStr.allocate(capacity); + str.setLen(0); + return str; } pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } { diff --git a/crates/compiler/builtins/bitcode/src/utils.zig b/crates/compiler/builtins/bitcode/src/utils.zig index 99c739977f..b481205f75 100644 --- a/crates/compiler/builtins/bitcode/src/utils.zig +++ b/crates/compiler/builtins/bitcode/src/utils.zig @@ -213,6 +213,53 @@ inline fn decref_ptr_to_refcount( } } +// We follow roughly the [fbvector](https://github.com/facebook/folly/blob/main/folly/docs/FBVector.md) when it comes to growing a RocList. +// Here is [their growth strategy](https://github.com/facebook/folly/blob/3e0525988fd444201b19b76b390a5927c15cb697/folly/FBVector.h#L1128) for push_back: +// +// (1) initial size +// Instead of growing to size 1 from empty, fbvector allocates at least +// 64 bytes. You may still use reserve to reserve a lesser amount of +// memory. +// (2) 1.5x +// For medium-sized vectors, the growth strategy is 1.5x. See the docs +// for details. +// This does not apply to very small or very large fbvectors. This is a +// heuristic. +// +// In our case, we exposed allocate and reallocate, which will use a smart growth stategy. +// We also expose allocateExact and reallocateExact for case where a specific number of elements is requested. + +// calculateCapacity should only be called in cases the list will be growing. +// requested_length should always be greater than old_capacity. +pub inline fn calculateCapacity( + old_capacity: usize, + requested_length: usize, + element_width: usize, +) usize { + // TODO: there are two adjustments that would likely lead to better results for Roc. + // 1. Deal with the fact we allocate an extra u64 for refcount. + // This may lead to allocating page size + 8 bytes. + // That could mean allocating an entire page for 8 bytes of data which isn't great. + // 2. Deal with the fact that we can request more than 1 element at a time. + // fbvector assumes just appending 1 element at a time when using this algorithm. + // As such, they will generally grow in a way that should better match certain memory multiple. + // This is also the normal case for roc, but we could also grow by a much larger amount. + // We may want to round to multiples of 2 or something similar. + var new_capacity: usize = 0; + if (element_width == 0) { + return requested_length; + } else if (old_capacity == 0) { + new_capacity = 64 / element_width; + } else if (old_capacity < 4096 / element_width) { + new_capacity = old_capacity * 2; + } else if (old_capacity > 4096 * 32 / element_width) { + new_capacity = old_capacity * 2; + } else { + new_capacity = (old_capacity * 3 + 1) / 2; + } + return @maximum(new_capacity, requested_length); +} + pub fn allocateWithRefcountC( data_bytes: usize, element_alignment: u32, diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index f79973f922..3e10116b05 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -299,7 +299,7 @@ replaceEach = \haystack, needle, flower -> Ok { before, after } -> # We found at least one needle, so start the buffer off with # `before` followed by the first replacement flower. - Str.reserve "" (Str.countUtf8Bytes haystack) + Str.withCapacity (Str.countUtf8Bytes haystack) |> Str.concat before |> Str.concat flower |> replaceEachHelp after needle flower @@ -504,7 +504,7 @@ walkUtf8WithIndexHelp = \string, state, step, index, length -> else state -## Make sure at least some number of bytes fit in this string without reallocating +## Enlarge the Str for at least capacity additional bytes reserve : Str, Nat -> Str ## is UB when the scalar is invalid diff --git a/crates/roc_std/tests/test_roc_std.rs b/crates/roc_std/tests/test_roc_std.rs index 9c5d9c1824..d81b93d896 100644 --- a/crates/roc_std/tests/test_roc_std.rs +++ b/crates/roc_std/tests/test_roc_std.rs @@ -126,7 +126,7 @@ mod test_roc_std { roc_str.reserve(42); - assert_eq!(roc_str.capacity(), 42); + assert_gte!(roc_str.capacity(), 42); } #[test] @@ -135,7 +135,7 @@ mod test_roc_std { roc_str.reserve(5000); - assert_eq!(roc_str.capacity(), 5000); + assert_gte!(roc_str.capacity(), 5000); } #[test]