mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
add seamless slices for str
This commit is contained in:
parent
24c403eba0
commit
3978059aa2
5 changed files with 122 additions and 35 deletions
|
@ -155,6 +155,7 @@ comptime {
|
||||||
exportStrFn(str.strCloneTo, "clone_to");
|
exportStrFn(str.strCloneTo, "clone_to");
|
||||||
exportStrFn(str.withCapacity, "with_capacity");
|
exportStrFn(str.withCapacity, "with_capacity");
|
||||||
exportStrFn(str.strGraphemes, "graphemes");
|
exportStrFn(str.strGraphemes, "graphemes");
|
||||||
|
exportStrFn(str.strRefcountPtr, "refcount_ptr");
|
||||||
|
|
||||||
inline for (INTEGERS) |T| {
|
inline for (INTEGERS) |T| {
|
||||||
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
||||||
|
|
|
@ -18,6 +18,7 @@ const InPlace = enum(u8) {
|
||||||
|
|
||||||
const MASK_ISIZE: isize = std.math.minInt(isize);
|
const MASK_ISIZE: isize = std.math.minInt(isize);
|
||||||
const MASK: usize = @bitCast(usize, MASK_ISIZE);
|
const MASK: usize = @bitCast(usize, MASK_ISIZE);
|
||||||
|
const SEAMLESS_SLICE_BIT: usize = MASK;
|
||||||
|
|
||||||
const SMALL_STR_MAX_LENGTH = SMALL_STRING_SIZE - 1;
|
const SMALL_STR_MAX_LENGTH = SMALL_STRING_SIZE - 1;
|
||||||
const SMALL_STRING_SIZE = @sizeOf(RocStr);
|
const SMALL_STRING_SIZE = @sizeOf(RocStr);
|
||||||
|
@ -58,11 +59,12 @@ pub const RocStr = extern struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fromByteList(list: RocList) RocStr {
|
pub fn fromByteList(list: RocList) RocStr {
|
||||||
// TODO: upon adding string seamless slices, I believe this branch can be changed to bit manipulation.
|
|
||||||
if (list.isSeamlessSlice()) {
|
if (list.isSeamlessSlice()) {
|
||||||
// Str doesn't have seamless slices yet.
|
return RocStr{
|
||||||
// Need to copy.
|
.str_bytes = list.bytes,
|
||||||
return RocStr.init(@ptrCast([*]const u8, list.bytes), list.length);
|
.str_len = list.length | SEAMLESS_SLICE_BIT,
|
||||||
|
.str_capacity = list.capacity_or_ref_ptr & (~SEAMLESS_SLICE_BIT),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
return RocStr{
|
return RocStr{
|
||||||
.str_bytes = list.bytes,
|
.str_bytes = list.bytes,
|
||||||
|
@ -71,6 +73,10 @@ pub const RocStr = extern struct {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn isSeamlessSlice(self: RocStr) bool {
|
||||||
|
return !self.isSmallStr() and @bitCast(isize, self.str_len) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn fromSlice(slice: []const u8) RocStr {
|
pub fn fromSlice(slice: []const u8) RocStr {
|
||||||
return RocStr.init(slice.ptr, slice.len);
|
return RocStr.init(slice.ptr, slice.len);
|
||||||
}
|
}
|
||||||
|
@ -103,9 +109,39 @@ pub const RocStr = extern struct {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This returns all ones if the list is a seamless slice.
|
||||||
|
// Otherwise, it returns all zeros.
|
||||||
|
// This is done without branching for optimization purposes.
|
||||||
|
pub fn seamlessSliceMask(self: RocStr) usize {
|
||||||
|
return @bitCast(usize, @bitCast(isize, self.str_len) >> (@bitSizeOf(isize) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a pointer to just after the refcount.
|
||||||
|
// It is just after the refcount as an optimization for other shared code paths.
|
||||||
|
// For regular list, it just returns their bytes pointer.
|
||||||
|
// For seamless slices, it returns the pointer stored in capacity_or_ref_ptr.
|
||||||
|
// This does not return a valid value if the input is a small string.
|
||||||
|
pub fn getRefcountPtr(self: RocStr) ?[*]u8 {
|
||||||
|
const str_ref_ptr = @ptrToInt(self.str_bytes);
|
||||||
|
const slice_ref_ptr = self.str_capacity << 1;
|
||||||
|
const slice_mask = self.seamlessSliceMask();
|
||||||
|
const ref_ptr = (str_ref_ptr & ~slice_mask) | (slice_ref_ptr & slice_mask);
|
||||||
|
return @intToPtr(?[*]u8, ref_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn incref(self: RocStr, n: usize) void {
|
||||||
|
if (!self.isSmallStr()) {
|
||||||
|
const ref_ptr = self.getRefcountPtr();
|
||||||
|
if (ref_ptr != null) {
|
||||||
|
const isizes: [*]isize = @ptrCast([*]isize, @alignCast(@alignOf(isize), ref_ptr));
|
||||||
|
utils.increfC(@ptrCast(*isize, isizes - 1), @intCast(isize, n));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn decref(self: RocStr) void {
|
pub fn decref(self: RocStr) void {
|
||||||
if (!self.isSmallStr()) {
|
if (!self.isSmallStr()) {
|
||||||
utils.decref(self.str_bytes, self.str_capacity, RocStr.alignment);
|
utils.decref(self.getRefcountPtr(), self.str_capacity, RocStr.alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,17 +165,18 @@ pub const RocStr = extern struct {
|
||||||
|
|
||||||
// It's faster to compare pointer-sized words rather than bytes, as far as possible
|
// It's faster to compare pointer-sized words rather than bytes, as far as possible
|
||||||
// The bytes are always pointer-size aligned due to the refcount
|
// The bytes are always pointer-size aligned due to the refcount
|
||||||
const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
// const self_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), self_bytes));
|
||||||
const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
// const other_words = @ptrCast([*]const usize, @alignCast(@alignOf(usize), other_bytes));
|
||||||
var w: usize = 0;
|
// var w: usize = 0;
|
||||||
while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
// while (w < self_len / @sizeOf(usize)) : (w += 1) {
|
||||||
if (self_words[w] != other_words[w]) {
|
// if (self_words[w] != other_words[w]) {
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Compare the leftover bytes
|
// Compare the leftover bytes
|
||||||
var b = w * @sizeOf(usize);
|
// var b = w * @sizeOf(usize);
|
||||||
|
var b: usize = 0;
|
||||||
while (b < self_len) : (b += 1) {
|
while (b < self_len) : (b += 1) {
|
||||||
if (self_bytes[b] != other_bytes[b]) {
|
if (self_bytes[b] != other_bytes[b]) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -238,7 +275,7 @@ pub const RocStr = extern struct {
|
||||||
if (self.isSmallStr()) {
|
if (self.isSmallStr()) {
|
||||||
return self.asArray()[@sizeOf(RocStr) - 1] ^ 0b1000_0000;
|
return self.asArray()[@sizeOf(RocStr) - 1] ^ 0b1000_0000;
|
||||||
} else {
|
} else {
|
||||||
return self.str_len;
|
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,13 +283,15 @@ pub const RocStr = extern struct {
|
||||||
if (self.isSmallStr()) {
|
if (self.isSmallStr()) {
|
||||||
self.asU8ptrMut()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
|
self.asU8ptrMut()[@sizeOf(RocStr) - 1] = @intCast(u8, length) | 0b1000_0000;
|
||||||
} else {
|
} else {
|
||||||
self.str_len = length;
|
self.str_len = length | (SEAMLESS_SLICE_BIT & self.str_len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn getCapacity(self: RocStr) usize {
|
pub fn getCapacity(self: RocStr) usize {
|
||||||
if (self.isSmallStr()) {
|
if (self.isSmallStr()) {
|
||||||
return SMALL_STR_MAX_LENGTH;
|
return SMALL_STR_MAX_LENGTH;
|
||||||
|
} else if (self.isSeamlessSlice()) {
|
||||||
|
return self.str_len & (~SEAMLESS_SLICE_BIT);
|
||||||
} else {
|
} else {
|
||||||
return self.str_capacity;
|
return self.str_capacity;
|
||||||
}
|
}
|
||||||
|
@ -299,6 +338,9 @@ pub const RocStr = extern struct {
|
||||||
// then the next byte is off the end of the struct;
|
// then the next byte is off the end of the struct;
|
||||||
// in that case, we are also not null-terminated!
|
// in that case, we are also not null-terminated!
|
||||||
return length != 0 and length != longest_small_str;
|
return length != 0 and length != longest_small_str;
|
||||||
|
} else if (self.isSeamlessSlice()) {
|
||||||
|
// Seamless slices can not use the character past the end even if it is null.
|
||||||
|
return false;
|
||||||
} else {
|
} else {
|
||||||
// This is a big string, and it's not empty, so we can safely
|
// This is a big string, and it's not empty, so we can safely
|
||||||
// dereference the pointer.
|
// dereference the pointer.
|
||||||
|
@ -334,7 +376,7 @@ pub const RocStr = extern struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn refcountMachine(self: RocStr) usize {
|
fn refcountMachine(self: RocStr) usize {
|
||||||
if (self.getCapacity() == 0 or self.isSmallStr()) {
|
if ((self.getCapacity() == 0 and !self.isSeamlessSlice()) or self.isSmallStr()) {
|
||||||
return utils.REFCOUNT_ONE;
|
return utils.REFCOUNT_ONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -811,13 +853,34 @@ pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Init = fn (bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr;
|
||||||
|
fn initFromSmallStr(bytes: [*]u8, offset: usize, len: usize, _: usize) RocStr {
|
||||||
|
return RocStr.init(bytes + offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The ref_ptr must already be shifted to be ready for storing in a seamless slice.
|
||||||
|
fn initFromBigStr(bytes: [*]u8, offset: usize, len: usize, ref_ptr: usize) RocStr {
|
||||||
|
// Here we can make seamless slices instead of copying to a new small str.
|
||||||
|
return RocStr{
|
||||||
|
.str_bytes = bytes + offset,
|
||||||
|
.str_len = len | SEAMLESS_SLICE_BIT,
|
||||||
|
.str_capacity = ref_ptr,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||||
var ret_array_index: usize = 0;
|
var ret_array_index: usize = 0;
|
||||||
var slice_start_index: usize = 0;
|
var slice_start_index: usize = 0;
|
||||||
var str_index: usize = 0;
|
var str_index: usize = 0;
|
||||||
|
|
||||||
const str_bytes = string.asU8ptr();
|
var mut_str = string;
|
||||||
|
const str_bytes = mut_str.asU8ptrMut();
|
||||||
const str_len = string.len();
|
const str_len = string.len();
|
||||||
|
const ref_ptr = @ptrToInt(string.getRefcountPtr()) >> 1;
|
||||||
|
const init_fn = if (string.isSmallStr())
|
||||||
|
initFromSmallStr
|
||||||
|
else
|
||||||
|
initFromBigStr;
|
||||||
|
|
||||||
const delimiter_bytes_ptrs = delimiter.asU8ptr();
|
const delimiter_bytes_ptrs = delimiter.asU8ptr();
|
||||||
const delimiter_len = delimiter.len();
|
const delimiter_len = delimiter.len();
|
||||||
|
@ -849,7 +912,7 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||||
if (matches_delimiter) {
|
if (matches_delimiter) {
|
||||||
const segment_len: usize = str_index - slice_start_index;
|
const segment_len: usize = str_index - slice_start_index;
|
||||||
|
|
||||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, segment_len);
|
array[ret_array_index] = init_fn(str_bytes, slice_start_index, segment_len, ref_ptr);
|
||||||
slice_start_index = str_index + delimiter_len;
|
slice_start_index = str_index + delimiter_len;
|
||||||
ret_array_index += 1;
|
ret_array_index += 1;
|
||||||
str_index += delimiter_len;
|
str_index += delimiter_len;
|
||||||
|
@ -859,7 +922,12 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
array[ret_array_index] = RocStr.init(str_bytes + slice_start_index, str_len - slice_start_index);
|
array[ret_array_index] = init_fn(str_bytes, slice_start_index, str_len - slice_start_index, ref_ptr);
|
||||||
|
|
||||||
|
if (!string.isSmallStr()) {
|
||||||
|
// Correct refcount for all of the splits made.
|
||||||
|
mut_str.incref(ret_array_index + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "strSplitHelp: empty delimiter" {
|
test "strSplitHelp: empty delimiter" {
|
||||||
|
@ -2800,3 +2868,9 @@ pub fn strCloneTo(
|
||||||
return extra_offset + slice.len;
|
return extra_offset + slice.len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn strRefcountPtr(
|
||||||
|
string: RocStr,
|
||||||
|
) callconv(.C) ?[*]u8 {
|
||||||
|
return string.getRefcountPtr();
|
||||||
|
}
|
||||||
|
|
|
@ -333,6 +333,7 @@ pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe";
|
||||||
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
||||||
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
||||||
pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes";
|
pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes";
|
||||||
|
pub const STR_REFCOUNT_PTR: &str = "roc_builtins.str.refcount_ptr";
|
||||||
|
|
||||||
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
||||||
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
||||||
|
|
|
@ -63,3 +63,19 @@ pub(crate) fn str_equal<'a, 'ctx, 'env>(
|
||||||
bitcode::STR_EQUAL,
|
bitcode::STR_EQUAL,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gets a pointer to just after the refcount for a list or seamless slice.
|
||||||
|
// The value is just after the refcount so that normal lists and seamless slices can share code paths easily.
|
||||||
|
pub(crate) fn str_refcount_ptr<'a, 'ctx, 'env>(
|
||||||
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
value: BasicValueEnum<'ctx>,
|
||||||
|
) -> PointerValue<'ctx> {
|
||||||
|
call_str_bitcode_fn(
|
||||||
|
env,
|
||||||
|
&[value],
|
||||||
|
&[],
|
||||||
|
BitcodeReturns::Basic,
|
||||||
|
bitcode::STR_REFCOUNT_PTR,
|
||||||
|
)
|
||||||
|
.into_pointer_value()
|
||||||
|
}
|
||||||
|
|
|
@ -2,18 +2,19 @@ use crate::debug_info_init;
|
||||||
use crate::llvm::bitcode::call_void_bitcode_fn;
|
use crate::llvm::bitcode::call_void_bitcode_fn;
|
||||||
use crate::llvm::build::BuilderExt;
|
use crate::llvm::build::BuilderExt;
|
||||||
use crate::llvm::build::{
|
use crate::llvm::build::{
|
||||||
add_func, cast_basic_basic, get_tag_id, tag_pointer_clear_tag_id, use_roc_value, Env,
|
add_func, cast_basic_basic, create_entry_block_alloca, get_tag_id, tag_pointer_clear_tag_id,
|
||||||
FAST_CALL_CONV,
|
use_roc_value, Env, FAST_CALL_CONV,
|
||||||
};
|
};
|
||||||
use crate::llvm::build_list::{
|
use crate::llvm::build_list::{
|
||||||
incrementing_elem_loop, list_capacity_or_ref_ptr, list_refcount_ptr, load_list,
|
incrementing_elem_loop, list_capacity_or_ref_ptr, list_refcount_ptr, load_list,
|
||||||
};
|
};
|
||||||
|
use crate::llvm::build_str::str_refcount_ptr;
|
||||||
use crate::llvm::convert::{basic_type_from_layout, zig_str_type, RocUnion};
|
use crate::llvm::convert::{basic_type_from_layout, zig_str_type, RocUnion};
|
||||||
use bumpalo::collections::Vec;
|
use bumpalo::collections::Vec;
|
||||||
use inkwell::basic_block::BasicBlock;
|
use inkwell::basic_block::BasicBlock;
|
||||||
use inkwell::module::Linkage;
|
use inkwell::module::Linkage;
|
||||||
use inkwell::types::{AnyTypeEnum, BasicMetadataTypeEnum, BasicType, BasicTypeEnum};
|
use inkwell::types::{AnyTypeEnum, BasicMetadataTypeEnum, BasicType, BasicTypeEnum};
|
||||||
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
|
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue};
|
||||||
use inkwell::{AddressSpace, IntPredicate};
|
use inkwell::{AddressSpace, IntPredicate};
|
||||||
use roc_module::symbol::Interns;
|
use roc_module::symbol::Interns;
|
||||||
use roc_module::symbol::Symbol;
|
use roc_module::symbol::Symbol;
|
||||||
|
@ -75,16 +76,6 @@ impl<'ctx> PointerToRefcount<'ctx> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_list_wrapper(env: &Env<'_, 'ctx, '_>, list_wrapper: StructValue<'ctx>) -> Self {
|
|
||||||
let data_ptr = env
|
|
||||||
.builder
|
|
||||||
.build_extract_value(list_wrapper, Builtin::WRAPPER_PTR, "read_list_ptr")
|
|
||||||
.unwrap()
|
|
||||||
.into_pointer_value();
|
|
||||||
|
|
||||||
Self::from_ptr_to_data(env, data_ptr)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_1<'a, 'env>(&self, env: &Env<'a, 'ctx, 'env>) -> IntValue<'ctx> {
|
pub fn is_1<'a, 'env>(&self, env: &Env<'a, 'ctx, 'env>) -> IntValue<'ctx> {
|
||||||
let current = self.get_refcount(env);
|
let current = self.get_refcount(env);
|
||||||
let one = match env.target_info.ptr_width() {
|
let one = match env.target_info.ptr_width() {
|
||||||
|
@ -815,9 +806,9 @@ fn modify_refcount_str_help<'a, 'ctx, 'env>(
|
||||||
|
|
||||||
let parent = fn_val;
|
let parent = fn_val;
|
||||||
|
|
||||||
|
let str_type = zig_str_type(env);
|
||||||
let arg_val =
|
let arg_val =
|
||||||
if Layout::Builtin(Builtin::Str).is_passed_by_reference(layout_interner, env.target_info) {
|
if Layout::Builtin(Builtin::Str).is_passed_by_reference(layout_interner, env.target_info) {
|
||||||
let str_type = zig_str_type(env);
|
|
||||||
env.builder
|
env.builder
|
||||||
.new_build_load(str_type, arg_val.into_pointer_value(), "load_str_to_stack")
|
.new_build_load(str_type, arg_val.into_pointer_value(), "load_str_to_stack")
|
||||||
} else {
|
} else {
|
||||||
|
@ -848,7 +839,11 @@ fn modify_refcount_str_help<'a, 'ctx, 'env>(
|
||||||
builder.build_conditional_branch(is_big_and_non_empty, modification_block, cont_block);
|
builder.build_conditional_branch(is_big_and_non_empty, modification_block, cont_block);
|
||||||
builder.position_at_end(modification_block);
|
builder.position_at_end(modification_block);
|
||||||
|
|
||||||
let refcount_ptr = PointerToRefcount::from_list_wrapper(env, str_wrapper);
|
let str_alloca = create_entry_block_alloca(env, parent, str_type.into(), "str_alloca");
|
||||||
|
env.builder.build_store(str_alloca, str_wrapper);
|
||||||
|
|
||||||
|
let refcount_ptr =
|
||||||
|
PointerToRefcount::from_ptr_to_data(env, str_refcount_ptr(env, str_alloca.into()));
|
||||||
let call_mode = mode_to_call_mode(fn_val, mode);
|
let call_mode = mode_to_call_mode(fn_val, mode);
|
||||||
refcount_ptr.modify(call_mode, layout, env, layout_interner);
|
refcount_ptr.modify(call_mode, layout, env, layout_interner);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue