mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 23:31:12 +00:00
use zig-defined Str.concat
This commit is contained in:
parent
1a71b8bac7
commit
63f4fbf1ef
3 changed files with 124 additions and 408 deletions
|
@ -1,9 +1,7 @@
|
|||
use crate::llvm::build::{
|
||||
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
|
||||
};
|
||||
use crate::llvm::build_list::{
|
||||
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
|
||||
};
|
||||
use crate::llvm::build_list::{allocate_list, build_basic_phi2, load_list_ptr, store_list};
|
||||
use crate::llvm::convert::collection;
|
||||
use inkwell::builder::Builder;
|
||||
use inkwell::types::BasicTypeEnum;
|
||||
|
@ -90,333 +88,116 @@ pub fn str_split<'a, 'ctx, 'env>(
|
|||
)
|
||||
}
|
||||
|
||||
/*
|
||||
fn cast_to_zig_str(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
str_as_struct: StructValue<'ctx>,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
// get the RocStr type defined by zig
|
||||
let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
|
||||
|
||||
// convert `{ *mut u8, i64 }` to `RocStr`
|
||||
builder.build_bitcast(str_as_struct, roc_str_type, "convert_to_zig_rocstr");
|
||||
}
|
||||
|
||||
fn cast_from_zig_str(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
str_as_struct: StructValue<'ctx>,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
|
||||
|
||||
// convert `RocStr` to `{ *mut u8, i64 }`
|
||||
builder.build_bitcast(str_as_struct, ret_type, "convert_from_zig_rocstr");
|
||||
}
|
||||
*/
|
||||
|
||||
fn str_symbol_to_i128<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
scope: &Scope<'a, 'ctx>,
|
||||
symbol: Symbol,
|
||||
) -> IntValue<'ctx> {
|
||||
let str_ptr = ptr_from_symbol(scope, symbol);
|
||||
|
||||
let i128_ptr = env
|
||||
.builder
|
||||
.build_bitcast(
|
||||
*str_ptr,
|
||||
env.context.i128_type().ptr_type(AddressSpace::Generic),
|
||||
"cast",
|
||||
)
|
||||
.into_pointer_value();
|
||||
|
||||
env.builder
|
||||
.build_load(i128_ptr, "load_as_i128")
|
||||
.into_int_value()
|
||||
}
|
||||
|
||||
fn zig_str_to_struct<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
zig_str: StructValue<'ctx>,
|
||||
) -> StructValue<'ctx> {
|
||||
let builder = env.builder;
|
||||
|
||||
// get the RocStr type defined by zig
|
||||
let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap();
|
||||
|
||||
let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes));
|
||||
|
||||
let foo = builder.build_alloca(zig_str_type, "zig_result");
|
||||
|
||||
builder.build_store(foo, zig_str);
|
||||
|
||||
let ptr3 = builder
|
||||
.build_bitcast(
|
||||
foo,
|
||||
env.context.i128_type().ptr_type(AddressSpace::Generic),
|
||||
"cast",
|
||||
)
|
||||
.into_pointer_value();
|
||||
|
||||
let ptr4 = builder
|
||||
.build_bitcast(
|
||||
ptr3,
|
||||
ret_type.into_struct_type().ptr_type(AddressSpace::Generic),
|
||||
"cast",
|
||||
)
|
||||
.into_pointer_value();
|
||||
|
||||
builder.build_load(ptr4, "load").into_struct_value()
|
||||
}
|
||||
|
||||
/// Str.concat : Str, Str -> Str
|
||||
pub fn str_concat<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
inplace: InPlace,
|
||||
scope: &Scope<'a, 'ctx>,
|
||||
parent: FunctionValue<'ctx>,
|
||||
first_str_symbol: Symbol,
|
||||
second_str_symbol: Symbol,
|
||||
_parent: FunctionValue<'ctx>,
|
||||
str1_symbol: Symbol,
|
||||
str2_symbol: Symbol,
|
||||
) -> BasicValueEnum<'ctx> {
|
||||
let builder = env.builder;
|
||||
let ctx = env.context;
|
||||
// swap the arguments; second argument comes before the second in the output string
|
||||
let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol);
|
||||
let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol);
|
||||
|
||||
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
|
||||
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
|
||||
|
||||
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
|
||||
|
||||
load_str(
|
||||
let zig_result = call_bitcode_fn(
|
||||
env,
|
||||
parent,
|
||||
*second_str_ptr,
|
||||
ret_type,
|
||||
|second_str_ptr, second_str_len, second_str_smallness| {
|
||||
load_str(
|
||||
env,
|
||||
parent,
|
||||
*first_str_ptr,
|
||||
ret_type,
|
||||
|first_str_ptr, first_str_len, first_str_smallness| {
|
||||
// first_str_len > 0
|
||||
// We do this check to avoid allocating memory. If the first input
|
||||
// str is empty, then we can just return the second str cloned
|
||||
let first_str_length_comparison = str_is_not_empty(env, first_str_len);
|
||||
|
||||
let if_first_str_is_empty = || {
|
||||
// second_str_len > 0
|
||||
// We do this check to avoid allocating memory. If the second input
|
||||
// str is empty, then we can just return an empty str
|
||||
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
|
||||
|
||||
let if_second_str_is_nonempty = || {
|
||||
let (new_wrapper, _) = clone_nonempty_str(
|
||||
env,
|
||||
inplace,
|
||||
second_str_smallness,
|
||||
second_str_len,
|
||||
second_str_ptr,
|
||||
);
|
||||
|
||||
BasicValueEnum::StructValue(new_wrapper)
|
||||
};
|
||||
|
||||
let if_second_str_is_empty = || empty_list(env);
|
||||
|
||||
build_basic_phi2(
|
||||
env,
|
||||
parent,
|
||||
second_str_length_comparison,
|
||||
if_second_str_is_nonempty,
|
||||
if_second_str_is_empty,
|
||||
ret_type,
|
||||
)
|
||||
};
|
||||
|
||||
let if_first_str_is_not_empty = || {
|
||||
let if_second_str_is_empty = || {
|
||||
let (new_wrapper, _) = clone_nonempty_str(
|
||||
env,
|
||||
inplace,
|
||||
first_str_smallness,
|
||||
first_str_len,
|
||||
first_str_ptr,
|
||||
);
|
||||
|
||||
BasicValueEnum::StructValue(new_wrapper)
|
||||
};
|
||||
|
||||
// second_str_len > 0
|
||||
// We do this check to avoid allocating memory. If the second input
|
||||
// str is empty, then we can just return the first str cloned
|
||||
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
|
||||
|
||||
let if_second_str_is_not_empty = || {
|
||||
let combined_str_len = builder.build_int_add(
|
||||
first_str_len,
|
||||
second_str_len,
|
||||
"add_list_lengths",
|
||||
);
|
||||
|
||||
// The combined string is big iff its length is
|
||||
// greater than or equal to the size in memory
|
||||
// of a small str (e.g. len >= 16 on 64-bit targets)
|
||||
let is_big = env.builder.build_int_compare(
|
||||
IntPredicate::UGE,
|
||||
combined_str_len,
|
||||
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
|
||||
"str_is_big",
|
||||
);
|
||||
|
||||
let if_big = || {
|
||||
let combined_str_ptr =
|
||||
allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
|
||||
|
||||
// TODO replace FIRST_LOOP with a memcpy!
|
||||
// FIRST LOOP
|
||||
let first_loop = |first_index, first_str_elem| {
|
||||
// The pointer to the element in the combined list
|
||||
let combined_str_elem_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
combined_str_ptr,
|
||||
&[first_index],
|
||||
"load_index_combined_list",
|
||||
)
|
||||
};
|
||||
|
||||
// Mutate the new array in-place to change the element.
|
||||
builder.build_store(combined_str_elem_ptr, first_str_elem);
|
||||
};
|
||||
|
||||
let index_name = "#index";
|
||||
|
||||
let index_alloca = incrementing_elem_loop(
|
||||
builder,
|
||||
ctx,
|
||||
parent,
|
||||
first_str_ptr,
|
||||
first_str_len,
|
||||
index_name,
|
||||
first_loop,
|
||||
);
|
||||
|
||||
// Reset the index variable to 0
|
||||
builder
|
||||
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
|
||||
|
||||
// TODO replace SECOND_LOOP with a memcpy!
|
||||
// SECOND LOOP
|
||||
let second_loop = |second_index, second_str_elem| {
|
||||
// The pointer to the element in the combined str.
|
||||
// Note that the pointer does not start at the index
|
||||
// 0, it starts at the index of first_str_len. In that
|
||||
// sense it is "offset".
|
||||
let offset_combined_str_char_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
combined_str_ptr,
|
||||
&[first_str_len],
|
||||
"elem",
|
||||
)
|
||||
};
|
||||
|
||||
// The pointer to the char from the second str
|
||||
// in the combined list
|
||||
let combined_str_char_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
offset_combined_str_char_ptr,
|
||||
&[second_index],
|
||||
"load_index_combined_list",
|
||||
)
|
||||
};
|
||||
|
||||
// Mutate the new array in-place to change the element.
|
||||
builder.build_store(combined_str_char_ptr, second_str_elem);
|
||||
};
|
||||
|
||||
incrementing_elem_loop(
|
||||
builder,
|
||||
ctx,
|
||||
parent,
|
||||
second_str_ptr,
|
||||
second_str_len,
|
||||
index_name,
|
||||
second_loop,
|
||||
);
|
||||
|
||||
store_list(env, combined_str_ptr, combined_str_len)
|
||||
};
|
||||
|
||||
let if_small = || {
|
||||
let combined_str_ptr = builder.build_array_alloca(
|
||||
ctx.i8_type(),
|
||||
ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
|
||||
"alloca_small_str",
|
||||
);
|
||||
|
||||
// TODO replace FIRST_LOOP with a memcpy!
|
||||
// FIRST LOOP
|
||||
let first_loop = |first_index, first_str_elem| {
|
||||
// The pointer to the element in the combined list
|
||||
let combined_str_elem_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
combined_str_ptr,
|
||||
&[first_index],
|
||||
"load_index_combined_list",
|
||||
)
|
||||
};
|
||||
|
||||
// Mutate the new array in-place to change the element.
|
||||
builder.build_store(combined_str_elem_ptr, first_str_elem);
|
||||
};
|
||||
|
||||
let index_name = "#index";
|
||||
|
||||
let index_alloca = incrementing_elem_loop(
|
||||
builder,
|
||||
ctx,
|
||||
parent,
|
||||
first_str_ptr,
|
||||
first_str_len,
|
||||
index_name,
|
||||
first_loop,
|
||||
);
|
||||
|
||||
// Reset the index variable to 0
|
||||
builder
|
||||
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
|
||||
|
||||
// TODO replace SECOND_LOOP with a memcpy!
|
||||
// SECOND LOOP
|
||||
let second_loop = |second_index, second_str_elem| {
|
||||
// The pointer to the element in the combined str.
|
||||
// Note that the pointer does not start at the index
|
||||
// 0, it starts at the index of first_str_len. In that
|
||||
// sense it is "offset".
|
||||
let offset_combined_str_char_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
combined_str_ptr,
|
||||
&[first_str_len],
|
||||
"elem",
|
||||
)
|
||||
};
|
||||
|
||||
// The pointer to the char from the second str
|
||||
// in the combined list
|
||||
let combined_str_char_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
offset_combined_str_char_ptr,
|
||||
&[second_index],
|
||||
"load_index_combined_list",
|
||||
)
|
||||
};
|
||||
|
||||
// Mutate the new array in-place to change the element.
|
||||
builder.build_store(combined_str_char_ptr, second_str_elem);
|
||||
};
|
||||
|
||||
incrementing_elem_loop(
|
||||
builder,
|
||||
ctx,
|
||||
parent,
|
||||
second_str_ptr,
|
||||
second_str_len,
|
||||
index_name,
|
||||
second_loop,
|
||||
);
|
||||
|
||||
let final_byte = builder.build_int_cast(
|
||||
combined_str_len,
|
||||
ctx.i8_type(),
|
||||
"str_len_to_i8",
|
||||
);
|
||||
|
||||
let final_byte = builder.build_or(
|
||||
final_byte,
|
||||
ctx.i8_type().const_int(0b1000_0000, false),
|
||||
"str_len_set_discriminant",
|
||||
);
|
||||
|
||||
let final_byte_ptr = unsafe {
|
||||
builder.build_in_bounds_gep(
|
||||
combined_str_ptr,
|
||||
&[ctx
|
||||
.i8_type()
|
||||
.const_int(env.small_str_bytes() as u64 - 1, false)],
|
||||
"str_literal_final_byte",
|
||||
)
|
||||
};
|
||||
|
||||
builder.build_store(final_byte_ptr, final_byte);
|
||||
|
||||
builder.build_load(
|
||||
builder
|
||||
.build_bitcast(
|
||||
combined_str_ptr,
|
||||
collection(ctx, env.ptr_bytes)
|
||||
.ptr_type(AddressSpace::Generic),
|
||||
"cast_collection",
|
||||
)
|
||||
.into_pointer_value(),
|
||||
"small_str_array",
|
||||
)
|
||||
};
|
||||
|
||||
// If the combined length fits in a small string,
|
||||
// write into a small string!
|
||||
build_basic_phi2(
|
||||
env,
|
||||
parent,
|
||||
is_big,
|
||||
// the result of a Str.concat is most likely big
|
||||
if_big,
|
||||
if_small,
|
||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
||||
)
|
||||
};
|
||||
|
||||
build_basic_phi2(
|
||||
env,
|
||||
parent,
|
||||
second_str_length_comparison,
|
||||
if_second_str_is_not_empty,
|
||||
if_second_str_is_empty,
|
||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
||||
)
|
||||
};
|
||||
|
||||
build_basic_phi2(
|
||||
env,
|
||||
parent,
|
||||
first_str_length_comparison,
|
||||
if_first_str_is_not_empty,
|
||||
if_first_str_is_empty,
|
||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
||||
)
|
||||
},
|
||||
)
|
||||
},
|
||||
&[
|
||||
env.context
|
||||
.i32_type()
|
||||
.const_int(env.ptr_bytes as u64, false)
|
||||
.into(),
|
||||
env.context
|
||||
.i8_type()
|
||||
.const_int(inplace as u64, false)
|
||||
.into(),
|
||||
str1_i128.into(),
|
||||
str2_i128.into(),
|
||||
],
|
||||
&bitcode::STR_CONCAT,
|
||||
)
|
||||
.into_struct_value();
|
||||
|
||||
zig_str_to_struct(env, zig_result).into()
|
||||
}
|
||||
|
||||
/// Obtain the string's length, cast from i8 to usize
|
||||
|
@ -511,82 +292,6 @@ enum Smallness {
|
|||
Big,
|
||||
}
|
||||
|
||||
fn clone_nonempty_str<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
inplace: InPlace,
|
||||
smallness: Smallness,
|
||||
len: IntValue<'ctx>,
|
||||
bytes_ptr: PointerValue<'ctx>,
|
||||
) -> (StructValue<'ctx>, PointerValue<'ctx>) {
|
||||
let builder = env.builder;
|
||||
let ctx = env.context;
|
||||
let ptr_bytes = env.ptr_bytes;
|
||||
|
||||
// Allocate space for the new str that we'll copy into.
|
||||
match smallness {
|
||||
Smallness::Small => {
|
||||
let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
|
||||
let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
|
||||
let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
|
||||
|
||||
builder.build_store(alloca, wrapper_struct);
|
||||
|
||||
(wrapper_struct.into_struct_value(), alloca)
|
||||
}
|
||||
Smallness::Big => {
|
||||
let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
|
||||
|
||||
// TODO check if malloc returned null; if so, runtime error for OOM!
|
||||
|
||||
// Copy the bytes from the original array into the new
|
||||
// one we just malloc'd.
|
||||
builder
|
||||
.build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len)
|
||||
.unwrap();
|
||||
|
||||
// Create a fresh wrapper struct for the newly populated array
|
||||
let struct_type = collection(ctx, env.ptr_bytes);
|
||||
let mut struct_val;
|
||||
|
||||
// Store the pointer
|
||||
struct_val = builder
|
||||
.build_insert_value(
|
||||
struct_type.get_undef(),
|
||||
clone_ptr,
|
||||
Builtin::WRAPPER_PTR,
|
||||
"insert_ptr",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Store the length
|
||||
struct_val = builder
|
||||
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
|
||||
.unwrap();
|
||||
|
||||
let answer = builder
|
||||
.build_bitcast(
|
||||
struct_val.into_struct_value(),
|
||||
collection(ctx, ptr_bytes),
|
||||
"cast_collection",
|
||||
)
|
||||
.into_struct_value();
|
||||
|
||||
(answer, clone_ptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
bytes_ptr: PointerValue<'ctx>,
|
||||
) -> PointerValue<'ctx> {
|
||||
let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
|
||||
|
||||
env.builder
|
||||
.build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
|
||||
.into_pointer_value()
|
||||
}
|
||||
|
||||
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
|
||||
env: &Env<'a, 'ctx, 'env>,
|
||||
wrapper_ptr: PointerValue<'ctx>,
|
||||
|
@ -661,6 +366,7 @@ fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>)
|
|||
.into_int_value()
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
|
||||
env.builder.build_int_compare(
|
||||
IntPredicate::UGT,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue