mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-03 19:58:18 +00:00
add seamless slices for str
This commit is contained in:
parent
24c403eba0
commit
3978059aa2
5 changed files with 122 additions and 35 deletions
|
@ -155,6 +155,7 @@ comptime {
|
|||
exportStrFn(str.strCloneTo, "clone_to");
|
||||
exportStrFn(str.withCapacity, "with_capacity");
|
||||
exportStrFn(str.strGraphemes, "graphemes");
|
||||
exportStrFn(str.strRefcountPtr, "refcount_ptr");
|
||||
|
||||
inline for (INTEGERS) |T| {
|
||||
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
||||
|
|
|
@ -18,6 +18,7 @@ const InPlace = enum(u8) {
|
|||
|
||||
const MASK_ISIZE: isize = std.math.minInt(isize);
|
||||
const MASK: usize = @bitCast(usize, MASK_ISIZE);
|
||||
const SEAMLESS_SLICE_BIT: usize = MASK;
|
||||
|
||||
const SMALL_STR_MAX_LENGTH = SMALL_STRING_SIZE - 1;
|
||||
const SMALL_STRING_SIZE = @sizeOf(RocStr);
|
||||
|
@ -58,11 +59,12 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
|
||||
pub fn fromByteList(list: RocList) RocStr {
|
||||
// TODO: upon adding string seamless slices, I believe this branch can be changed to bit manipulation.
|
||||
if (list.isSeamlessSlice()) {
|
||||
// Str doesn't have seamless slices yet.
|
||||
// Need to copy.
|
||||
return RocStr.init(@ptrCast([*]const u8, list.bytes), list.length);
|
||||
return RocStr{
|
||||
.str_bytes = list.bytes,
|
||||
.str_len = list.length | SEAMLESS_SLICE_BIT,
|
||||
.str_capacity = list.capacity_or_ref_ptr & (~SEAMLESS_SLICE_BIT),
|
||||
};
|
||||
}
|
||||
return RocStr{
|
||||
.str_bytes = list.bytes,
|
||||
|
@ -71,6 +73,10 @@ pub const RocStr = extern struct {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn isSeamlessSlice(self: RocStr) bool {
|
||||
return !self.isSmallStr() and @bitCast(isize, self.str_len) < 0;
|
||||
}
|
||||
|
||||
pub fn fromSlice(slice: []const u8) RocStr {
|
||||
return RocStr.init(slice.ptr, slice.len);
|
||||
}
|
||||
|
@ -103,9 +109,39 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
}
|
||||
|
||||
// This returns all ones if the list is a seamless slice.
|
||||
// Otherwise, it returns all zeros.
|
||||
// This is done without branching for optimization purposes.
|
||||
pub fn seamlessSliceMask(self: RocStr) usize {
|
||||
return @bitCast(usize, @bitCast(isize, self.str_len) >> (@bitSizeOf(isize) - 1));
|
||||
}
|
||||
|
||||
// returns a pointer to just after the refcount.
|
||||
// It is just after the refcount as an optimization for other shared code paths.
|
||||
// For regular list, it just returns their bytes pointer.
|
||||
// For seamless slices, it returns the pointer stored in capacity_or_ref_ptr.
|
||||
// This does not return a valid value if the input is a small string.
|
||||
pub fn getRefcountPtr(self: RocStr) ?[*]u8 {
|
||||
const str_ref_ptr = @ptrToInt(self.str_bytes);
|
||||
const slice_ref_ptr = self.str_capacity << 1;
|
||||
const slice_mask = self.seamlessSliceMask();
|
||||
const ref_ptr = (str_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
|
||||
return @intToPtr(?[*]u8, ref_ptr);
|
||||
}
|
||||
|
||||
pub fn incref(self: RocStr, n: usize) void {
|
||||
if (!self.isSmallStr()) {
|
||||
const ref_ptr = self.getRefcountPtr();
|
||||
if (ref_ptr != null) {
|
||||
const isizes: [*]isize = @ptrCast([*]isize, @alignCast(@alignOf(isize), ref_ptr));
|
||||
utils.increfC(@ptrCast(*isize, isizes - 1), @intCast(isize, n));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decref(self: RocStr) void {
|
||||
if (!self.isSmallStr()) {
|
||||
utils.decref(self.str_bytes, self.str_capacity, RocStr.alignment);
|
||||
utils.decref(self.getRefcountPtr(), self.str_capacity, RocStr.alignment);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,17 +165,18 @@ pub const RocStr = extern struct {
|
|||
|
||||
// It's faster to compare pointer-sized words rather than bytes, as far as possible
|
||||
// The bytes are always pointer-size aligned due to the refcount
|
||||
const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
||||
const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
||||
var w: usize = 0;
|
||||
while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
||||
if (self_words[w] != other_words[w]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
||||
// const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
||||
// var w: usize = 0;
|
||||
// while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
||||
// if (self_words[w] != other_words[w]) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Compare the leftover bytes
|
||||
var b = w * @sizeOf(usize);
|
||||
// var b = w * @sizeOf(usize);
|
||||
var b: usize = 0;
|
||||
while (b < self_len) : (b += 1) {
|
||||
if (self_bytes[b] != other_bytes[b]) {
|
||||
return false;
|
||||
|
@ -238,7 +275,7 @@ pub const RocStr = extern struct {
|
|||
if (self.isSmallStr()) {
|
||||
return self.asArray()[@sizeOf(RocStr) - 1] ^ 0b1000_0000;
|
||||
} else {
|
||||
return self.str_len;
|
||||
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,13 +283,15 @@ pub const RocStr = extern struct {
|
|||
if (self.isSmallStr()) {
|
||||
self.asU8ptrMut()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
|
||||
} else {
|
||||
self.str_len = length;
|
||||
self.str_len = length | (SEAMLESS_SLICE_BIT & self.str_len);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getCapacity(self: RocStr) usize {
|
||||
if (self.isSmallStr()) {
|
||||
return SMALL_STR_MAX_LENGTH;
|
||||
} else if (self.isSeamlessSlice()) {
|
||||
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||
} else {
|
||||
return self.str_capacity;
|
||||
}
|
||||
|
@ -299,6 +338,9 @@ pub const RocStr = extern struct {
|
|||
// then the next byte is off the end of the struct;
|
||||
// in that case, we are also not null-terminated!
|
||||
return length != 0 and length != longest_small_str;
|
||||
} else if (self.isSeamlessSlice()) {
|
||||
// Seamless slices can not use the character past the end even if it is null.
|
||||
return false;
|
||||
} else {
|
||||
// This is a big string, and it's not empty, so we can safely
|
||||
// dereference the pointer.
|
||||
|
@ -334,7 +376,7 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
|
||||
fn refcountMachine(self: RocStr) usize {
|
||||
if (self.getCapacity() == 0 or self.isSmallStr()) {
|
||||
if ((self.getCapacity() == 0 and !self.isSeamlessSlice()) or self.isSmallStr()) {
|
||||
return utils.REFCOUNT_ONE;
|
||||
}
|
||||
|
||||
|
@ -811,13 +853,34 @@ pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
|||
return list;
|
||||
}
|
||||
|
||||
const Init = fn (bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr;
|
||||
fn initFromSmallStr(bytes: [*]u8, offset: usize, len: usize, _: usize) RocStr {
|
||||
return RocStr.init(bytes + offset, len);
|
||||
}
|
||||
|
||||
// The ref_ptr must already be shifted to be ready for storing in a seamless slice.
|
||||
fn initFromBigStr(bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr {
|
||||
// Here we can make seamless slices instead of copying to a new small str.
|
||||
return RocStr{
|
||||
.str_bytes = bytes + offset,
|
||||
.str_len = len | SEAMLESS_SLICE_BIT,
|
||||
.str_capacity = ref_ptr,
|
||||
};
|
||||
}
|
||||
|
||||
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||
var ret_array_index: usize = 0;
|
||||
var slice_start_index: usize = 0;
|
||||
var str_index: usize = 0;
|
||||
|
||||
const str_bytes = string.asU8ptr();
|
||||
var mut_str = string;
|
||||
const str_bytes = mut_str.asU8ptrMut();
|
||||
const str_len = string.len();
|
||||
const ref_ptr = @ptrToInt(string.getRefcountPtr()) >> 1;
|
||||
const init_fn = if (string.isSmallStr())
|
||||
initFromSmallStr
|
||||
else
|
||||
initFromBigStr;
|
||||
|
||||
const delimiter_bytes_ptrs = delimiter.asU8ptr();
|
||||
const delimiter_len = delimiter.len();
|
||||
|
@ -849,7 +912,7 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|||
if (matches_delimiter) {
|
||||
const segment_len: usize = str_index - slice_start_index;
|
||||
|
||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, segment_len);
|
||||
array[ret_array_index] = init_fn(str_bytes, slice_start_index, segment_len, ref_ptr);
|
||||
slice_start_index = str_index + delimiter_len;
|
||||
ret_array_index += 1;
|
||||
str_index += delimiter_len;
|
||||
|
@ -859,7 +922,12 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|||
}
|
||||
}
|
||||
|
||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, str_len - slice_start_index);
|
||||
array[ret_array_index] = init_fn(str_bytes, slice_start_index, str_len - slice_start_index, ref_ptr);
|
||||
|
||||
if (!string.isSmallStr()) {
|
||||
// Correct refcount for all of the splits made.
|
||||
mut_str.incref(ret_array_index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
test "strSplitHelp: empty delimiter" {
|
||||
|
@ -2800,3 +2868,9 @@ pub fn strCloneTo(
|
|||
return extra_offset + slice.len;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn strRefcountPtr(
|
||||
string: RocStr,
|
||||
) callconv(.C) ?[*]u8 {
|
||||
return string.getRefcountPtr();
|
||||
}
|
||||
|
|
|
@ -333,6 +333,7 @@ pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
|
|||
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
||||
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
||||
pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes";
|
||||
pub const STR_REFCOUNT_PTR: &str = "roc_builtins.str.refcount_ptr";
|
||||
|
||||
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
||||
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue