use zig-defined Str.concat

This commit is contained in:
Folkert 2020-12-01 22:03:13 +01:00
parent 1a71b8bac7
commit 63f4fbf1ef
3 changed files with 124 additions and 408 deletions

View file

@ -1,9 +1,7 @@
use crate::llvm::build::{
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
};
use crate::llvm::build_list::{
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
};
use crate::llvm::build_list::{allocate_list, build_basic_phi2, load_list_ptr, store_list};
use crate::llvm::convert::collection;
use inkwell::builder::Builder;
use inkwell::types::BasicTypeEnum;
@ -90,333 +88,116 @@ pub fn str_split<'a, 'ctx, 'env>(
)
}
/*
fn cast_to_zig_str(
env: &Env<'a, 'ctx, 'env>,
str_as_struct: StructValue<'ctx>,
) -> BasicValueEnum<'ctx> {
// get the RocStr type defined by zig
let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
// convert `{ *mut u8, i64 }` to `RocStr`
builder.build_bitcast(str_as_struct, roc_str_type, "convert_to_zig_rocstr");
}
fn cast_from_zig_str(
env: &Env<'a, 'ctx, 'env>,
str_as_struct: StructValue<'ctx>,
) -> BasicValueEnum<'ctx> {
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
// convert `RocStr` to `{ *mut u8, i64 }`
builder.build_bitcast(str_as_struct, ret_type, "convert_from_zig_rocstr");
}
*/
fn str_symbol_to_i128<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>,
symbol: Symbol,
) -> IntValue<'ctx> {
let str_ptr = ptr_from_symbol(scope, symbol);
let i128_ptr = env
.builder
.build_bitcast(
*str_ptr,
env.context.i128_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
env.builder
.build_load(i128_ptr, "load_as_i128")
.into_int_value()
}
fn zig_str_to_struct<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
zig_str: StructValue<'ctx>,
) -> StructValue<'ctx> {
let builder = env.builder;
// get the RocStr type defined by zig
let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap();
let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes));
let foo = builder.build_alloca(zig_str_type, "zig_result");
builder.build_store(foo, zig_str);
let ptr3 = builder
.build_bitcast(
foo,
env.context.i128_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
let ptr4 = builder
.build_bitcast(
ptr3,
ret_type.into_struct_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
builder.build_load(ptr4, "load").into_struct_value()
}
/// Str.concat : Str, Str -> Str
pub fn str_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
inplace: InPlace,
scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
first_str_symbol: Symbol,
second_str_symbol: Symbol,
_parent: FunctionValue<'ctx>,
str1_symbol: Symbol,
str2_symbol: Symbol,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
// swap the arguments; second argument comes before the second in the output string
let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol);
let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol);
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
let zig_result = call_bitcode_fn(
env,
parent,
*second_str_ptr,
ret_type,
|second_str_ptr, second_str_len, second_str_smallness| {
load_str(
env,
parent,
*first_str_ptr,
ret_type,
|first_str_ptr, first_str_len, first_str_smallness| {
// first_str_len > 0
// We do this check to avoid allocating memory. If the first input
// str is empty, then we can just return the second str cloned
let first_str_length_comparison = str_is_not_empty(env, first_str_len);
let if_first_str_is_empty = || {
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return an empty str
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_nonempty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
second_str_smallness,
second_str_len,
second_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
let if_second_str_is_empty = || empty_list(env);
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_nonempty,
if_second_str_is_empty,
ret_type,
)
};
let if_first_str_is_not_empty = || {
let if_second_str_is_empty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
first_str_smallness,
first_str_len,
first_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return the first str cloned
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_not_empty = || {
let combined_str_len = builder.build_int_add(
first_str_len,
second_str_len,
"add_list_lengths",
);
// The combined string is big iff its length is
// greater than or equal to the size in memory
// of a small str (e.g. len >= 16 on 64-bit targets)
let is_big = env.builder.build_int_compare(
IntPredicate::UGE,
combined_str_len,
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
"str_is_big",
);
let if_big = || {
let combined_str_ptr =
allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
store_list(env, combined_str_ptr, combined_str_len)
};
let if_small = || {
let combined_str_ptr = builder.build_array_alloca(
ctx.i8_type(),
ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
"alloca_small_str",
);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
let final_byte = builder.build_int_cast(
combined_str_len,
ctx.i8_type(),
"str_len_to_i8",
);
let final_byte = builder.build_or(
final_byte,
ctx.i8_type().const_int(0b1000_0000, false),
"str_len_set_discriminant",
);
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[ctx
.i8_type()
.const_int(env.small_str_bytes() as u64 - 1, false)],
"str_literal_final_byte",
)
};
builder.build_store(final_byte_ptr, final_byte);
builder.build_load(
builder
.build_bitcast(
combined_str_ptr,
collection(ctx, env.ptr_bytes)
.ptr_type(AddressSpace::Generic),
"cast_collection",
)
.into_pointer_value(),
"small_str_array",
)
};
// If the combined length fits in a small string,
// write into a small string!
build_basic_phi2(
env,
parent,
is_big,
// the result of a Str.concat is most likely big
if_big,
if_small,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_not_empty,
if_second_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
first_str_length_comparison,
if_first_str_is_not_empty,
if_first_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
},
)
},
&[
env.context
.i32_type()
.const_int(env.ptr_bytes as u64, false)
.into(),
env.context
.i8_type()
.const_int(inplace as u64, false)
.into(),
str1_i128.into(),
str2_i128.into(),
],
&bitcode::STR_CONCAT,
)
.into_struct_value();
zig_str_to_struct(env, zig_result).into()
}
/// Obtain the string's length, cast from i8 to usize
@ -511,82 +292,6 @@ enum Smallness {
Big,
}
fn clone_nonempty_str<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
inplace: InPlace,
smallness: Smallness,
len: IntValue<'ctx>,
bytes_ptr: PointerValue<'ctx>,
) -> (StructValue<'ctx>, PointerValue<'ctx>) {
let builder = env.builder;
let ctx = env.context;
let ptr_bytes = env.ptr_bytes;
// Allocate space for the new str that we'll copy into.
match smallness {
Smallness::Small => {
let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
builder.build_store(alloca, wrapper_struct);
(wrapper_struct.into_struct_value(), alloca)
}
Smallness::Big => {
let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
// TODO check if malloc returned null; if so, runtime error for OOM!
// Copy the bytes from the original array into the new
// one we just malloc'd.
builder
.build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len)
.unwrap();
// Create a fresh wrapper struct for the newly populated array
let struct_type = collection(ctx, env.ptr_bytes);
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
clone_ptr,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
let answer = builder
.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
.into_struct_value();
(answer, clone_ptr)
}
}
}
fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
bytes_ptr: PointerValue<'ctx>,
) -> PointerValue<'ctx> {
let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
env.builder
.build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
.into_pointer_value()
}
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
wrapper_ptr: PointerValue<'ctx>,
@ -661,6 +366,7 @@ fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>)
.into_int_value()
}
#[allow(dead_code)]
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
env.builder.build_int_compare(
IntPredicate::UGT,