mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
add seamless slices for str
This commit is contained in:
parent
24c403eba0
commit
3978059aa2
5 changed files with 122 additions and 35 deletions
|
@ -155,6 +155,7 @@ comptime {
|
|||
exportStrFn(str.strCloneTo, "clone_to");
|
||||
exportStrFn(str.withCapacity, "with_capacity");
|
||||
exportStrFn(str.strGraphemes, "graphemes");
|
||||
exportStrFn(str.strRefcountPtr, "refcount_ptr");
|
||||
|
||||
inline for (INTEGERS) |T| {
|
||||
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
||||
|
|
|
@ -18,6 +18,7 @@ const InPlace = enum(u8) {
|
|||
|
||||
const MASK_ISIZE: isize = std.math.minInt(isize);
|
||||
const MASK: usize = @bitCast(usize, MASK_ISIZE);
|
||||
const SEAMLESS_SLICE_BIT: usize = MASK;
|
||||
|
||||
const SMALL_STR_MAX_LENGTH = SMALL_STRING_SIZE - 1;
|
||||
const SMALL_STRING_SIZE = @sizeOf(RocStr);
|
||||
|
@ -58,11 +59,12 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
|
||||
pub fn fromByteList(list: RocList) RocStr {
|
||||
// TODO: upon adding string seamless slices, I believe this branch can be changed to bit manipulation.
|
||||
if (list.isSeamlessSlice()) {
|
||||
// Str doesn't have seamless slices yet.
|
||||
// Need to copy.
|
||||
return RocStr.init(@ptrCast([*]const u8, list.bytes), list.length);
|
||||
return RocStr{
|
||||
.str_bytes = list.bytes,
|
||||
.str_len = list.length | SEAMLESS_SLICE_BIT,
|
||||
.str_capacity = list.capacity_or_ref_ptr & (~SEAMLESS_SLICE_BIT),
|
||||
};
|
||||
}
|
||||
return RocStr{
|
||||
.str_bytes = list.bytes,
|
||||
|
@ -71,6 +73,10 @@ pub const RocStr = extern struct {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn isSeamlessSlice(self: RocStr) bool {
|
||||
return !self.isSmallStr() and @bitCast(isize, self.str_len) < 0;
|
||||
}
|
||||
|
||||
pub fn fromSlice(slice: []const u8) RocStr {
|
||||
return RocStr.init(slice.ptr, slice.len);
|
||||
}
|
||||
|
@ -103,9 +109,39 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
}
|
||||
|
||||
// This returns all ones if the list is a seamless slice.
|
||||
// Otherwise, it returns all zeros.
|
||||
// This is done without branching for optimization purposes.
|
||||
pub fn seamlessSliceMask(self: RocStr) usize {
|
||||
return @bitCast(usize, @bitCast(isize, self.str_len) >> (@bitSizeOf(isize) - 1));
|
||||
}
|
||||
|
||||
// returns a pointer to just after the refcount.
|
||||
// It is just after the refcount as an optimization for other shared code paths.
|
||||
// For regular list, it just returns their bytes pointer.
|
||||
// For seamless slices, it returns the pointer stored in capacity_or_ref_ptr.
|
||||
// This does not return a valid value if the input is a small string.
|
||||
pub fn getRefcountPtr(self: RocStr) ?[*]u8 {
|
||||
const str_ref_ptr = @ptrToInt(self.str_bytes);
|
||||
const slice_ref_ptr = self.str_capacity << 1;
|
||||
const slice_mask = self.seamlessSliceMask();
|
||||
const ref_ptr = (str_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
|
||||
return @intToPtr(?[*]u8, ref_ptr);
|
||||
}
|
||||
|
||||
pub fn incref(self: RocStr, n: usize) void {
|
||||
if (!self.isSmallStr()) {
|
||||
const ref_ptr = self.getRefcountPtr();
|
||||
if (ref_ptr != null) {
|
||||
const isizes: [*]isize = @ptrCast([*]isize, @alignCast(@alignOf(isize), ref_ptr));
|
||||
utils.increfC(@ptrCast(*isize, isizes - 1), @intCast(isize, n));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decref(self: RocStr) void {
|
||||
if (!self.isSmallStr()) {
|
||||
utils.decref(self.str_bytes, self.str_capacity, RocStr.alignment);
|
||||
utils.decref(self.getRefcountPtr(), self.str_capacity, RocStr.alignment);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,17 +165,18 @@ pub const RocStr = extern struct {
|
|||
|
||||
// It's faster to compare pointer-sized words rather than bytes, as far as possible
|
||||
// The bytes are always pointer-size aligned due to the refcount
|
||||
const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
||||
const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
||||
var w: usize = 0;
|
||||
while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
||||
if (self_words[w] != other_words[w]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
||||
// const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
||||
// var w: usize = 0;
|
||||
// while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
||||
// if (self_words[w] != other_words[w]) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Compare the leftover bytes
|
||||
var b = w * @sizeOf(usize);
|
||||
// var b = w * @sizeOf(usize);
|
||||
var b: usize = 0;
|
||||
while (b < self_len) : (b += 1) {
|
||||
if (self_bytes[b] != other_bytes[b]) {
|
||||
return false;
|
||||
|
@ -238,7 +275,7 @@ pub const RocStr = extern struct {
|
|||
if (self.isSmallStr()) {
|
||||
return self.asArray()[@sizeOf(RocStr) - 1] ^ 0b1000_0000;
|
||||
} else {
|
||||
return self.str_len;
|
||||
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,13 +283,15 @@ pub const RocStr = extern struct {
|
|||
if (self.isSmallStr()) {
|
||||
self.asU8ptrMut()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
|
||||
} else {
|
||||
self.str_len = length;
|
||||
self.str_len = length | (SEAMLESS_SLICE_BIT & self.str_len);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getCapacity(self: RocStr) usize {
|
||||
if (self.isSmallStr()) {
|
||||
return SMALL_STR_MAX_LENGTH;
|
||||
} else if (self.isSeamlessSlice()) {
|
||||
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||
} else {
|
||||
return self.str_capacity;
|
||||
}
|
||||
|
@ -299,6 +338,9 @@ pub const RocStr = extern struct {
|
|||
// then the next byte is off the end of the struct;
|
||||
// in that case, we are also not null-terminated!
|
||||
return length != 0 and length != longest_small_str;
|
||||
} else if (self.isSeamlessSlice()) {
|
||||
// Seamless slices can not use the character past the end even if it is null.
|
||||
return false;
|
||||
} else {
|
||||
// This is a big string, and it's not empty, so we can safely
|
||||
// dereference the pointer.
|
||||
|
@ -334,7 +376,7 @@ pub const RocStr = extern struct {
|
|||
}
|
||||
|
||||
fn refcountMachine(self: RocStr) usize {
|
||||
if (self.getCapacity() == 0 or self.isSmallStr()) {
|
||||
if ((self.getCapacity() == 0 and !self.isSeamlessSlice()) or self.isSmallStr()) {
|
||||
return utils.REFCOUNT_ONE;
|
||||
}
|
||||
|
||||
|
@ -811,13 +853,34 @@ pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
|||
return list;
|
||||
}
|
||||
|
||||
const Init = fn (bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr;
|
||||
fn initFromSmallStr(bytes: [*]u8, offset: usize, len: usize, _: usize) RocStr {
|
||||
return RocStr.init(bytes + offset, len);
|
||||
}
|
||||
|
||||
// The ref_ptr must already be shifted to be ready for storing in a seamless slice.
|
||||
fn initFromBigStr(bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr {
|
||||
// Here we can make seamless slices instead of copying to a new small str.
|
||||
return RocStr{
|
||||
.str_bytes = bytes + offset,
|
||||
.str_len = len | SEAMLESS_SLICE_BIT,
|
||||
.str_capacity = ref_ptr,
|
||||
};
|
||||
}
|
||||
|
||||
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||
var ret_array_index: usize = 0;
|
||||
var slice_start_index: usize = 0;
|
||||
var str_index: usize = 0;
|
||||
|
||||
const str_bytes = string.asU8ptr();
|
||||
var mut_str = string;
|
||||
const str_bytes = mut_str.asU8ptrMut();
|
||||
const str_len = string.len();
|
||||
const ref_ptr = @ptrToInt(string.getRefcountPtr()) >> 1;
|
||||
const init_fn = if (string.isSmallStr())
|
||||
initFromSmallStr
|
||||
else
|
||||
initFromBigStr;
|
||||
|
||||
const delimiter_bytes_ptrs = delimiter.asU8ptr();
|
||||
const delimiter_len = delimiter.len();
|
||||
|
@ -849,7 +912,7 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|||
if (matches_delimiter) {
|
||||
const segment_len: usize = str_index - slice_start_index;
|
||||
|
||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, segment_len);
|
||||
array[ret_array_index] = init_fn(str_bytes, slice_start_index, segment_len, ref_ptr);
|
||||
slice_start_index = str_index + delimiter_len;
|
||||
ret_array_index += 1;
|
||||
str_index += delimiter_len;
|
||||
|
@ -859,7 +922,12 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|||
}
|
||||
}
|
||||
|
||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, str_len - slice_start_index);
|
||||
array[ret_array_index] = init_fn(str_bytes, slice_start_index, str_len - slice_start_index, ref_ptr);
|
||||
|
||||
if (!string.isSmallStr()) {
|
||||
// Correct refcount for all of the splits made.
|
||||
mut_str.incref(ret_array_index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
test "strSplitHelp: empty delimiter" {
|
||||
|
@ -2800,3 +2868,9 @@ pub fn strCloneTo(
|
|||
return extra_offset + slice.len;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn strRefcountPtr(
|
||||
string: RocStr,
|
||||
) callconv(.C) ?[*]u8 {
|
||||
return string.getRefcountPtr();
|
||||
}
|
||||
|
|
|
@ -333,6 +333,7 @@ pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
|
|||
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
||||
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
||||
pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes";
|
||||
pub const STR_REFCOUNT_PTR: &str = "roc_builtins.str.refcount_ptr";
|
||||
|
||||
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
||||
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
||||
|
|
|
@ -63,3 +63,19 @@ pub(crate) fn str_equal<'a, 'ctx, 'env>(
|
|||
bitcode::STR_EQUAL,
|
||||
)
|
||||
}
|
||||
|
||||
// Gets a pointer to just after the refcount for a list or seamless slice.
|
||||
// The value is just after the refcount so that normal lists and seamless slices can share code paths easily.
|
||||
pub(crate) fn str_refcount_ptr<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
value: BasicValueEnum<'ctx>,
|
||||
) -> PointerValue<'ctx> {
|
||||
call_str_bitcode_fn(
|
||||
env,
|
||||
&[value],
|
||||
&[],
|
||||
BitcodeReturns::Basic,
|
||||
bitcode::STR_REFCOUNT_PTR,
|
||||
)
|
||||
.into_pointer_value()
|
||||
}
|
||||
|
|
|
@ -2,18 +2,19 @@ use crate::debug_info_init;
|
|||
use crate::llvm::bitcode::call_void_bitcode_fn;
|
||||
use crate::llvm::build::BuilderExt;
|
||||
use crate::llvm::build::{
|
||||
add_func, cast_basic_basic, get_tag_id, tag_pointer_clear_tag_id, use_roc_value, Env,
|
||||
FAST_CALL_CONV,
|
||||
add_func, cast_basic_basic, create_entry_block_alloca, get_tag_id, tag_pointer_clear_tag_id,
|
||||
use_roc_value, Env, FAST_CALL_CONV,
|
||||
};
|
||||
use crate::llvm::build_list::{
|
||||
incrementing_elem_loop, list_capacity_or_ref_ptr, list_refcount_ptr, load_list,
|
||||
};
|
||||
use crate::llvm::build_str::str_refcount_ptr;
|
||||
use crate::llvm::convert::{basic_type_from_layout, zig_str_type, RocUnion};
|
||||
use bumpalo::collections::Vec;
|
||||
use inkwell::basic_block::BasicBlock;
|
||||
use inkwell::module::Linkage;
|
||||
use inkwell::types::{AnyTypeEnum, BasicMetadataTypeEnum, BasicType, BasicTypeEnum};
|
||||
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
|
||||
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue};
|
||||
use inkwell::{AddressSpace, IntPredicate};
|
||||
use roc_module::symbol::Interns;
|
||||
use roc_module::symbol::Symbol;
|
||||
|
@ -75,16 +76,6 @@ impl<'ctx> PointerToRefcount<'ctx> {
|
|||
}
|
||||
}
|
||||
|
||||
fn from_list_wrapper(env: &Env<'_, 'ctx, '_>, list_wrapper: StructValue<'ctx>) -> Self {
|
||||
let data_ptr = env
|
||||
.builder
|
||||
.build_extract_value(list_wrapper, Builtin::WRAPPER_PTR, "read_list_ptr")
|
||||
.unwrap()
|
||||
.into_pointer_value();
|
||||
|
||||
Self::from_ptr_to_data(env, data_ptr)
|
||||
}
|
||||
|
||||
pub fn is_1<'a, 'env>(&self, env: &Env<'a, 'ctx, 'env>) -> IntValue<'ctx> {
|
||||
let current = self.get_refcount(env);
|
||||
let one = match env.target_info.ptr_width() {
|
||||
|
@ -815,9 +806,9 @@ fn modify_refcount_str_help<'a, 'ctx, 'env>(
|
|||
|
||||
let parent = fn_val;
|
||||
|
||||
let str_type = zig_str_type(env);
|
||||
let arg_val =
|
||||
if Layout::Builtin(Builtin::Str).is_passed_by_reference(layout_interner, env.target_info) {
|
||||
let str_type = zig_str_type(env);
|
||||
env.builder
|
||||
.new_build_load(str_type, arg_val.into_pointer_value(), "load_str_to_stack")
|
||||
} else {
|
||||
|
@ -848,7 +839,11 @@ fn modify_refcount_str_help<'a, 'ctx, 'env>(
|
|||
builder.build_conditional_branch(is_big_and_non_empty, modification_block, cont_block);
|
||||
builder.position_at_end(modification_block);
|
||||
|
||||
let refcount_ptr = PointerToRefcount::from_list_wrapper(env, str_wrapper);
|
||||
let str_alloca = create_entry_block_alloca(env, parent, str_type.into(), "str_alloca");
|
||||
env.builder.build_store(str_alloca, str_wrapper);
|
||||
|
||||
let refcount_ptr =
|
||||
PointerToRefcount::from_ptr_to_data(env, str_refcount_ptr(env, str_alloca.into()));
|
||||
let call_mode = mode_to_call_mode(fn_val, mode);
|
||||
refcount_ptr.modify(call_mode, layout, env, layout_interner);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue