roc/compiler/gen/src/llvm/build_str.rs
2020-10-20 20:40:37 -04:00

643 lines
25 KiB
Rust

use crate::llvm::build::{
call_bitcode_fn, load_symbol, load_symbol_and_layout, ptr_from_symbol, Env, InPlace, Scope,
};
use crate::llvm::build_list::{
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, incrementing_index_loop,
list_single, load_list_ptr, store_list,
};
use crate::llvm::convert::{collection, ptr_int};
use inkwell::builder::Builder;
use inkwell::types::BasicTypeEnum;
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
use inkwell::{AddressSpace, IntPredicate};
use roc_module::symbol::Symbol;
use roc_mono::layout::{Builtin, Layout};
pub static CHAR_LAYOUT: Layout = Layout::Builtin(Builtin::Int8);
/// Str.split : Str, Str -> List Str
pub fn str_split<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
inplace: InPlace,
str_symbol: Symbol,
delimiter_symbol: Symbol,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
let str_ptr = ptr_from_symbol(scope, str_symbol);
let delimiter_ptr = ptr_from_symbol(scope, delimiter_symbol);
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
env,
parent,
*str_ptr,
str_wrapper_type,
|_, str_len, str_smallness| {
load_str(
env,
parent,
*delimiter_ptr,
str_wrapper_type,
|_, delimiter_len, delimiter_smallness| {
let str_ = builder.build_load(*str_ptr, "get_str");
let delimiter = builder.build_load(*delimiter_ptr, "get_delimiter");
let segment_count = call_bitcode_fn(env, &[str_, delimiter], "count_segments_")
.into_int_value();
let ret_list_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, segment_count);
let ret_list = builder.build_load(ret_list_ptr, "get_str_split_ret_list");
call_bitcode_fn(env, &[ret_list, str_, delimiter], "str_split_")
},
)
},
)
}
/// Str.concat : Str, Str -> Str
pub fn str_concat<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
inplace: InPlace,
scope: &Scope<'a, 'ctx>,
parent: FunctionValue<'ctx>,
first_str_symbol: Symbol,
second_str_symbol: Symbol,
) -> BasicValueEnum<'ctx> {
let builder = env.builder;
let ctx = env.context;
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
env,
parent,
*second_str_ptr,
str_wrapper_type,
|second_str_ptr, second_str_len, second_str_smallness| {
load_str(
env,
parent,
*first_str_ptr,
str_wrapper_type,
|first_str_ptr, first_str_len, first_str_smallness| {
// first_str_len > 0
// We do this check to avoid allocating memory. If the first input
// str is empty, then we can just return the second str cloned
let first_str_length_comparison = str_is_not_empty(env, first_str_len);
let if_first_str_is_empty = || {
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return an empty str
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_nonempty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
second_str_smallness,
second_str_len,
second_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
let if_second_str_is_empty = || empty_list(env);
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_nonempty,
if_second_str_is_empty,
str_wrapper_type,
)
};
let if_first_str_is_not_empty = || {
let if_second_str_is_empty = || {
let (new_wrapper, _) = clone_nonempty_str(
env,
inplace,
first_str_smallness,
first_str_len,
first_str_ptr,
);
BasicValueEnum::StructValue(new_wrapper)
};
// second_str_len > 0
// We do this check to avoid allocating memory. If the second input
// str is empty, then we can just return the first str cloned
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
let if_second_str_is_not_empty = || {
let combined_str_len = builder.build_int_add(
first_str_len,
second_str_len,
"add_list_lengths",
);
// The combined string is big iff its length is
// greater than or equal to the size in memory
// of a small str (e.g. len >= 16 on 64-bit targets)
let is_big = env.builder.build_int_compare(
IntPredicate::UGE,
combined_str_len,
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
"str_is_big",
);
let if_big = || {
let combined_str_ptr =
allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
store_list(env, combined_str_ptr, combined_str_len)
};
let if_small = || {
let combined_str_ptr = builder.build_array_alloca(
ctx.i8_type(),
ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
"alloca_small_str",
);
// TODO replace FIRST_LOOP with a memcpy!
// FIRST LOOP
let first_loop = |first_index, first_str_elem| {
// The pointer to the element in the combined list
let combined_str_elem_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_elem_ptr, first_str_elem);
};
let index_name = "#index";
let index_alloca = incrementing_elem_loop(
builder,
ctx,
parent,
first_str_ptr,
first_str_len,
index_name,
first_loop,
);
// Reset the index variable to 0
builder
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
// TODO replace SECOND_LOOP with a memcpy!
// SECOND LOOP
let second_loop = |second_index, second_str_elem| {
// The pointer to the element in the combined str.
// Note that the pointer does not start at the index
// 0, it starts at the index of first_str_len. In that
// sense it is "offset".
let offset_combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[first_str_len],
"elem",
)
};
// The pointer to the char from the second str
// in the combined list
let combined_str_char_ptr = unsafe {
builder.build_in_bounds_gep(
offset_combined_str_char_ptr,
&[second_index],
"load_index_combined_list",
)
};
// Mutate the new array in-place to change the element.
builder.build_store(combined_str_char_ptr, second_str_elem);
};
incrementing_elem_loop(
builder,
ctx,
parent,
second_str_ptr,
second_str_len,
index_name,
second_loop,
);
let final_byte = builder.build_int_cast(
combined_str_len,
ctx.i8_type(),
"str_len_to_i8",
);
let final_byte = builder.build_or(
final_byte,
ctx.i8_type().const_int(0b1000_0000, false),
"str_len_set_discriminant",
);
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
combined_str_ptr,
&[ctx
.i8_type()
.const_int(env.small_str_bytes() as u64 - 1, false)],
"str_literal_final_byte",
)
};
builder.build_store(final_byte_ptr, final_byte);
builder.build_load(
builder
.build_bitcast(
combined_str_ptr,
collection(ctx, env.ptr_bytes)
.ptr_type(AddressSpace::Generic),
"cast_collection",
)
.into_pointer_value(),
"small_str_array",
)
};
// If the combined length fits in a small string,
// write into a small string!
build_basic_phi2(
env,
parent,
is_big,
// the result of a Str.concat is most likely big
if_big,
if_small,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
second_str_length_comparison,
if_second_str_is_not_empty,
if_second_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
};
build_basic_phi2(
env,
parent,
first_str_length_comparison,
if_first_str_is_not_empty,
if_first_str_is_empty,
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
)
},
)
},
)
}
/// Obtain the string's length, cast from i8 to usize
fn str_len_from_final_byte<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
final_byte: IntValue<'ctx>,
) -> IntValue<'ctx> {
let builder = env.builder;
let ctx = env.context;
let bitmask = ctx.i8_type().const_int(0b0111_1111, false);
let len_i8 = builder.build_and(final_byte, bitmask, "small_str_length");
builder.build_int_cast(len_i8, env.ptr_int(), "len_as_usize")
}
/// Used by LowLevel::StrIsEmpty
pub fn str_len<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
parent: FunctionValue<'ctx>,
wrapper_ptr: PointerValue<'ctx>,
) -> IntValue<'ctx> {
let builder = env.builder;
let if_small = |final_byte| {
let len = str_len_from_final_byte(env, final_byte);
BasicValueEnum::IntValue(len)
};
let if_big = |_| {
let len = big_str_len(
builder,
builder
.build_load(wrapper_ptr, "big_str")
.into_struct_value(),
);
BasicValueEnum::IntValue(len)
};
if_small_str(
env,
parent,
wrapper_ptr,
if_small,
if_big,
BasicTypeEnum::IntType(env.ptr_int()),
)
.into_int_value()
}
fn load_str<'a, 'ctx, 'env, Callback>(
env: &Env<'a, 'ctx, 'env>,
parent: FunctionValue<'ctx>,
wrapper_ptr: PointerValue<'ctx>,
ret_type: BasicTypeEnum<'ctx>,
cb: Callback,
) -> BasicValueEnum<'ctx>
where
Callback: Fn(PointerValue<'ctx>, IntValue<'ctx>, Smallness) -> BasicValueEnum<'ctx>,
{
let builder = env.builder;
let if_small = |final_byte| {
cb(
cast_str_wrapper_to_array(env, wrapper_ptr),
str_len_from_final_byte(env, final_byte),
Smallness::Small,
)
};
let if_big = |wrapper_struct| {
let list_ptr = load_list_ptr(
builder,
wrapper_struct,
env.context.i8_type().ptr_type(AddressSpace::Generic),
);
cb(
list_ptr,
big_str_len(builder, wrapper_struct),
Smallness::Big,
)
};
if_small_str(env, parent, wrapper_ptr, if_small, if_big, ret_type)
}
#[derive(Debug, Copy, Clone)]
enum Smallness {
Small,
Big,
}
fn clone_nonempty_str<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
inplace: InPlace,
smallness: Smallness,
len: IntValue<'ctx>,
bytes_ptr: PointerValue<'ctx>,
) -> (StructValue<'ctx>, PointerValue<'ctx>) {
let builder = env.builder;
let ctx = env.context;
let ptr_bytes = env.ptr_bytes;
// Allocate space for the new str that we'll copy into.
match smallness {
Smallness::Small => {
let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
builder.build_store(alloca, wrapper_struct);
(wrapper_struct.into_struct_value(), alloca)
}
Smallness::Big => {
let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
let int_type = ptr_int(ctx, ptr_bytes);
let ptr_as_int = builder.build_ptr_to_int(clone_ptr, int_type, "list_cast_ptr");
// TODO check if malloc returned null; if so, runtime error for OOM!
// Copy the bytes from the original array into the new
// one we just malloc'd.
builder.build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len);
// Create a fresh wrapper struct for the newly populated array
let struct_type = collection(ctx, env.ptr_bytes);
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr_as_int,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
let answer = builder
.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
.into_struct_value();
(answer, clone_ptr)
}
}
}
fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
bytes_ptr: PointerValue<'ctx>,
) -> PointerValue<'ctx> {
let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
env.builder
.build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
.into_pointer_value()
}
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
env: &Env<'a, 'ctx, 'env>,
wrapper_ptr: PointerValue<'ctx>,
) -> PointerValue<'ctx> {
let array_ptr_type = env.context.i8_type().ptr_type(AddressSpace::Generic);
env.builder
.build_bitcast(wrapper_ptr, array_ptr_type, "str_as_array_ptr")
.into_pointer_value()
}
fn if_small_str<'a, 'ctx, 'env, IfSmallFn, IfBigFn>(
env: &Env<'a, 'ctx, 'env>,
parent: FunctionValue<'ctx>,
wrapper_ptr: PointerValue<'ctx>,
mut if_small: IfSmallFn,
mut if_big: IfBigFn,
ret_type: BasicTypeEnum<'ctx>,
) -> BasicValueEnum<'ctx>
where
IfSmallFn: FnMut(IntValue<'ctx>) -> BasicValueEnum<'ctx>,
IfBigFn: FnMut(StructValue<'ctx>) -> BasicValueEnum<'ctx>,
{
let builder = env.builder;
let ctx = env.context;
let byte_array_ptr = cast_str_wrapper_to_array(env, wrapper_ptr);
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
byte_array_ptr,
&[ctx
.i8_type()
.const_int(env.small_str_bytes() as u64 - 1, false)],
"final_byte_ptr",
)
};
let final_byte = builder
.build_load(final_byte_ptr, "load_final_byte")
.into_int_value();
let bitmask = ctx.i8_type().const_int(0b1000_0000, false);
let is_small_i8 = builder.build_int_compare(
IntPredicate::NE,
ctx.i8_type().const_zero(),
builder.build_and(final_byte, bitmask, "is_small"),
"is_small_comparison",
);
let is_small = builder.build_int_cast(is_small_i8, ctx.bool_type(), "is_small_as_bool");
build_basic_phi2(
env,
parent,
is_small,
|| if_small(final_byte),
|| {
if_big(
builder
.build_load(wrapper_ptr, "load_wrapper_struct")
.into_struct_value(),
)
},
ret_type,
)
}
fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>) -> IntValue<'ctx> {
builder
.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "big_str_len")
.unwrap()
.into_int_value()
}
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
env.builder.build_int_compare(
IntPredicate::UGT,
len,
env.ptr_int().const_zero(),
"str_len_is_nonzero",
)
}