use zig-defined Str.concat

This commit is contained in:
Folkert 2020-12-01 22:03:13 +01:00
parent 1a71b8bac7
commit 63f4fbf1ef
3 changed files with 124 additions and 408 deletions

View file

@ -620,19 +620,19 @@ test "RocStr.concat: small concat small" {
result.drop(); result.drop();
} }
pub fn strConcat(ptr_size: u32, arg1: RocStr, arg2: RocStr) callconv(.C) RocStr { pub fn strConcat(ptr_size: u32, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) callconv(.C) RocStr {
return switch (ptr_size) { return switch (ptr_size) {
4 => strConcatHelp(i32, arg1, arg2), 4 => strConcatHelp(i32, result_in_place, arg1, arg2),
8 => strConcatHelp(i64, arg1, arg2), 8 => strConcatHelp(i64, result_in_place, arg1, arg2),
else => unreachable, else => unreachable,
}; };
} }
fn strConcatHelp(comptime T: type, arg1: RocStr, arg2: RocStr) RocStr { fn strConcatHelp(comptime T: type, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) RocStr {
if (arg1.is_empty()) { if (arg1.is_empty()) {
return cloneNonemptyStr(T, arg2); return cloneNonemptyStr(T, result_in_place, arg2);
} else if (arg2.is_empty()) { } else if (arg2.is_empty()) {
return cloneNonemptyStr(T, arg1); return cloneNonemptyStr(T, result_in_place, arg1);
} else { } else {
const combined_length = arg1.len() + arg2.len(); const combined_length = arg1.len() + arg2.len();
@ -640,7 +640,7 @@ fn strConcatHelp(comptime T: type, arg1: RocStr, arg2: RocStr) RocStr {
const result_is_big = combined_length >= small_str_bytes; const result_is_big = combined_length >= small_str_bytes;
if (result_is_big) { if (result_is_big) {
var result = allocate_str(T, combined_length); var result = allocate_str(T, result_in_place, combined_length);
{ {
const old_if_small = &@bitCast([16]u8, arg1); const old_if_small = &@bitCast([16]u8, arg1);
@ -694,12 +694,17 @@ fn strConcatHelp(comptime T: type, arg1: RocStr, arg2: RocStr) RocStr {
} }
} }
fn cloneNonemptyStr(comptime T: type, str: RocStr) RocStr { const InPlace = packed enum(u8) {
InPlace,
Clone,
};
fn cloneNonemptyStr(comptime T: type, in_place: InPlace, str: RocStr) RocStr {
if (str.is_small_str() or str.is_empty()) { if (str.is_small_str() or str.is_empty()) {
// just return the bytes // just return the bytes
return str; return str;
} else { } else {
var new_str = allocate_str(T, str.str_len); var new_str = allocate_str(T, in_place, str.str_len);
var old_bytes: [*]u8 = @ptrCast([*]u8, str.str_bytes); var old_bytes: [*]u8 = @ptrCast([*]u8, str.str_bytes);
var new_bytes: [*]u8 = @ptrCast([*]u8, new_str.str_bytes); var new_bytes: [*]u8 = @ptrCast([*]u8, new_str.str_bytes);
@ -710,11 +715,15 @@ fn cloneNonemptyStr(comptime T: type, str: RocStr) RocStr {
} }
} }
fn allocate_str(comptime T: type, number_of_chars: u64) RocStr { fn allocate_str(comptime T: type, in_place: InPlace, number_of_chars: u64) RocStr {
const length = @sizeOf(T) + number_of_chars; const length = @sizeOf(T) + number_of_chars;
var new_bytes: [*]T = @ptrCast([*]T, @alignCast(@alignOf(T), malloc(length))); var new_bytes: [*]T = @ptrCast([*]T, @alignCast(@alignOf(T), malloc(length)));
new_bytes[0] = std.math.maxInt(T); if (in_place == InPlace.InPlace) {
new_bytes[0] = @intCast(T, number_of_chars);
} else {
new_bytes[0] = std.math.minInt(T);
}
var first_element = @ptrCast([*]align(@alignOf(T)) u8, new_bytes); var first_element = @ptrCast([*]align(@alignOf(T)) u8, new_bytes);
first_element += 8; first_element += 8;

View file

@ -2383,6 +2383,7 @@ fn call_with_args<'a, 'ctx, 'env>(
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
#[repr(u8)]
pub enum InPlace { pub enum InPlace {
InPlace, InPlace,
Clone, Clone,

View file

@ -1,9 +1,7 @@
use crate::llvm::build::{ use crate::llvm::build::{
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope, call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
}; };
use crate::llvm::build_list::{ use crate::llvm::build_list::{allocate_list, build_basic_phi2, load_list_ptr, store_list};
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
};
use crate::llvm::convert::collection; use crate::llvm::convert::collection;
use inkwell::builder::Builder; use inkwell::builder::Builder;
use inkwell::types::BasicTypeEnum; use inkwell::types::BasicTypeEnum;
@ -90,333 +88,116 @@ pub fn str_split<'a, 'ctx, 'env>(
) )
} }
/*
fn cast_to_zig_str(
env: &Env<'a, 'ctx, 'env>,
str_as_struct: StructValue<'ctx>,
) -> BasicValueEnum<'ctx> {
// get the RocStr type defined by zig
let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
// convert `{ *mut u8, i64 }` to `RocStr`
builder.build_bitcast(str_as_struct, roc_str_type, "convert_to_zig_rocstr");
}
fn cast_from_zig_str(
env: &Env<'a, 'ctx, 'env>,
str_as_struct: StructValue<'ctx>,
) -> BasicValueEnum<'ctx> {
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
// convert `RocStr` to `{ *mut u8, i64 }`
builder.build_bitcast(str_as_struct, ret_type, "convert_from_zig_rocstr");
}
*/
fn str_symbol_to_i128<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>,
symbol: Symbol,
) -> IntValue<'ctx> {
let str_ptr = ptr_from_symbol(scope, symbol);
let i128_ptr = env
.builder
.build_bitcast(
*str_ptr,
env.context.i128_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
env.builder
.build_load(i128_ptr, "load_as_i128")
.into_int_value()
}
fn zig_str_to_struct<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
zig_str: StructValue<'ctx>,
) -> StructValue<'ctx> {
let builder = env.builder;
// get the RocStr type defined by zig
let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap();
let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes));
let foo = builder.build_alloca(zig_str_type, "zig_result");
builder.build_store(foo, zig_str);
let ptr3 = builder
.build_bitcast(
foo,
env.context.i128_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
let ptr4 = builder
.build_bitcast(
ptr3,
ret_type.into_struct_type().ptr_type(AddressSpace::Generic),
"cast",
)
.into_pointer_value();
builder.build_load(ptr4, "load").into_struct_value()
}
/// Str.concat : Str, Str -> Str /// Str.concat : Str, Str -> Str
pub fn str_concat<'a, 'ctx, 'env>( pub fn str_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>, env: &Env<'a, 'ctx, 'env>,
inplace: InPlace, inplace: InPlace,
scope: &Scope<'a, 'ctx>, scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>, _parent: FunctionValue<'ctx>,
first_str_symbol: Symbol, str1_symbol: Symbol,
second_str_symbol: Symbol, str2_symbol: Symbol,
) -> BasicValueEnum<'ctx> { ) -> BasicValueEnum<'ctx> {
let builder = env.builder; // swap the arguments; second argument comes before the second in the output string
let ctx = env.context; let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol);
let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol);
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol); let zig_result = call_bitcode_fn(
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
env, env,
parent, &[
*second_str_ptr, env.context
ret_type, .i32_type()
|second_str_ptr, second_str_len, second_str_smallness| { .const_int(env.ptr_bytes as u64, false)
load_str( .into(),
env, env.context
parent, .i8_type()
*first_str_ptr, .const_int(inplace as u64, false)
ret_type, .into(),
|first_str_ptr, first_str_len, first_str_smallness| { str1_i128.into(),
// first_str_len > 0 str2_i128.into(),
// We do this check to avoid allocating memory. If the first input ],
// str is empty, then we can just return the second str cloned &bitcode::STR_CONCAT,
let first_str_length_comparison = str_is_not_empty(env, first_str_len);
let if_first_str_is_empty = || {
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return an empty str
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_nonempty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
second_str_smallness,
second_str_len,
second_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
let if_second_str_is_empty = || empty_list(env);
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_nonempty,
if_second_str_is_empty,
ret_type,
)
};
let if_first_str_is_not_empty = || {
let if_second_str_is_empty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
first_str_smallness,
first_str_len,
first_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return the first str cloned
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_not_empty = || {
let combined_str_len = builder.build_int_add(
first_str_len,
second_str_len,
"add_list_lengths",
);
// The combined string is big iff its length is
// greater than or equal to the size in memory
// of a small str (e.g. len >= 16 on 64-bit targets)
let is_big = env.builder.build_int_compare(
IntPredicate::UGE,
combined_str_len,
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
"str_is_big",
);
let if_big = || {
let combined_str_ptr =
allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
store_list(env, combined_str_ptr, combined_str_len)
};
let if_small = || {
let combined_str_ptr = builder.build_array_alloca(
ctx.i8_type(),
ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
"alloca_small_str",
);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
let final_byte = builder.build_int_cast(
combined_str_len,
ctx.i8_type(),
"str_len_to_i8",
);
let final_byte = builder.build_or(
final_byte,
ctx.i8_type().const_int(0b1000_0000, false),
"str_len_set_discriminant",
);
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[ctx
.i8_type()
.const_int(env.small_str_bytes() as u64 - 1, false)],
"str_literal_final_byte",
)
};
builder.build_store(final_byte_ptr, final_byte);
builder.build_load(
builder
.build_bitcast(
combined_str_ptr,
collection(ctx, env.ptr_bytes)
.ptr_type(AddressSpace::Generic),
"cast_collection",
)
.into_pointer_value(),
"small_str_array",
)
};
// If the combined length fits in a small string,
// write into a small string!
build_basic_phi2(
env,
parent,
is_big,
// the result of a Str.concat is most likely big
if_big,
if_small,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_not_empty,
if_second_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
first_str_length_comparison,
if_first_str_is_not_empty,
if_first_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
},
)
},
) )
.into_struct_value();
zig_str_to_struct(env, zig_result).into()
} }
/// Obtain the string's length, cast from i8 to usize /// Obtain the string's length, cast from i8 to usize
@ -511,82 +292,6 @@ enum Smallness {
Big, Big,
} }
fn clone_nonempty_str<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
inplace: InPlace,
smallness: Smallness,
len: IntValue<'ctx>,
bytes_ptr: PointerValue<'ctx>,
) -> (StructValue<'ctx>, PointerValue<'ctx>) {
let builder = env.builder;
let ctx = env.context;
let ptr_bytes = env.ptr_bytes;
// Allocate space for the new str that we'll copy into.
match smallness {
Smallness::Small => {
let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
builder.build_store(alloca, wrapper_struct);
(wrapper_struct.into_struct_value(), alloca)
}
Smallness::Big => {
let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
// TODO check if malloc returned null; if so, runtime error for OOM!
// Copy the bytes from the original array into the new
// one we just malloc'd.
builder
.build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len)
.unwrap();
// Create a fresh wrapper struct for the newly populated array
let struct_type = collection(ctx, env.ptr_bytes);
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
clone_ptr,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
let answer = builder
.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
.into_struct_value();
(answer, clone_ptr)
}
}
}
fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
bytes_ptr: PointerValue<'ctx>,
) -> PointerValue<'ctx> {
let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
env.builder
.build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
.into_pointer_value()
}
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>( fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>, env: &Env<'a, 'ctx, 'env>,
wrapper_ptr: PointerValue<'ctx>, wrapper_ptr: PointerValue<'ctx>,
@ -661,6 +366,7 @@ fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>)
.into_int_value() .into_int_value()
} }
#[allow(dead_code)]
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> { fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
env.builder.build_int_compare( env.builder.build_int_compare(
IntPredicate::UGT, IntPredicate::UGT,