LLVM backend: set small string flag on empty string

This commit is contained in:
Brian Carroll 2021-12-16 14:47:01 +00:00
parent a481e34f31
commit c5d0a42c20
2 changed files with 94 additions and 112 deletions

View file

@ -15,8 +15,8 @@ use crate::llvm::build_list::{
list_single, list_sort_with, list_sublist, list_swap, list_single, list_sort_with, list_sublist, list_swap,
}; };
use crate::llvm::build_str::{ use crate::llvm::build_str::{
empty_str, str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_concat, str_count_graphemes, str_ends_with, str_from_float, str_from_int, str_from_utf8,
str_from_utf8, str_from_utf8_range, str_join_with, str_number_of_bytes, str_repeat, str_split, str_from_utf8_range, str_join_with, str_number_of_bytes, str_repeat, str_split,
str_starts_with, str_starts_with_code_point, str_to_utf8, str_trim, str_trim_left, str_starts_with, str_starts_with_code_point, str_to_utf8, str_trim, str_trim_left,
str_trim_right, str_trim_right,
}; };
@ -779,118 +779,109 @@ pub fn build_exp_literal<'a, 'ctx, 'env>(
Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(), Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(),
Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(), Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(),
Str(str_literal) => { Str(str_literal) => {
if str_literal.is_empty() { let ctx = env.context;
empty_str(env) let builder = env.builder;
} else { let number_of_chars = str_literal.len() as u64;
let ctx = env.context;
let builder = env.builder;
let number_of_chars = str_literal.len() as u64;
let str_type = super::convert::zig_str_type(env); let str_type = super::convert::zig_str_type(env);
if str_literal.len() < env.small_str_bytes() as usize { if str_literal.len() < env.small_str_bytes() as usize {
// TODO support big endian systems // TODO support big endian systems
let array_alloca = builder.build_array_alloca( let array_alloca = builder.build_array_alloca(
ctx.i8_type(), ctx.i8_type(),
ctx.i8_type().const_int(env.small_str_bytes() as u64, false), ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
"alloca_small_str", "alloca_small_str",
); );
// Zero out all the bytes. If we don't do this, then // Zero out all the bytes. If we don't do this, then
// small strings would have uninitialized bytes, which could // small strings would have uninitialized bytes, which could
// cause string equality checks to fail randomly. // cause string equality checks to fail randomly.
// //
// We're running memset over *all* the bytes, even though // We're running memset over *all* the bytes, even though
// the final one is about to be manually overridden, on // the final one is about to be manually overridden, on
// the theory that LLVM will optimize the memset call // the theory that LLVM will optimize the memset call
// into two instructions to move appropriately-sized // into two instructions to move appropriately-sized
// zero integers into the appropriate locations instead // zero integers into the appropriate locations instead
// of doing any iteration. // of doing any iteration.
// //
// TODO: look at the compiled output to verify this theory! // TODO: look at the compiled output to verify this theory!
env.call_memset( env.call_memset(
array_alloca,
ctx.i8_type().const_zero(),
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
);
let final_byte = (str_literal.len() as u8) | 0b1000_0000;
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
array_alloca, array_alloca,
ctx.i8_type().const_zero(), &[ctx
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
);
let final_byte = (str_literal.len() as u8) | 0b1000_0000;
let final_byte_ptr = unsafe {
builder.build_in_bounds_gep(
array_alloca,
&[ctx
.i8_type()
.const_int(env.small_str_bytes() as u64 - 1, false)],
"str_literal_final_byte",
)
};
builder.build_store(
final_byte_ptr,
ctx.i8_type().const_int(final_byte as u64, false),
);
// Copy the elements from the list literal into the array
for (index, character) in str_literal.as_bytes().iter().enumerate() {
let val = env
.context
.i8_type() .i8_type()
.const_int(*character as u64, false) .const_int(env.small_str_bytes() as u64 - 1, false)],
.as_basic_value_enum(); "str_literal_final_byte",
let index_val = ctx.i64_type().const_int(index as u64, false);
let elem_ptr = unsafe {
builder.build_in_bounds_gep(array_alloca, &[index_val], "index")
};
builder.build_store(elem_ptr, val);
}
builder.build_load(
builder
.build_bitcast(
array_alloca,
str_type.ptr_type(AddressSpace::Generic),
"cast_collection",
)
.into_pointer_value(),
"small_str_array",
) )
} else { };
let ptr = define_global_str_literal_ptr(env, *str_literal);
let number_of_elements = env.ptr_int().const_int(number_of_chars, false);
let struct_type = str_type; builder.build_store(
final_byte_ptr,
ctx.i8_type().const_int(final_byte as u64, false),
);
let mut struct_val; // Copy the elements from the list literal into the array
for (index, character) in str_literal.as_bytes().iter().enumerate() {
let val = env
.context
.i8_type()
.const_int(*character as u64, false)
.as_basic_value_enum();
let index_val = ctx.i64_type().const_int(index as u64, false);
let elem_ptr =
unsafe { builder.build_in_bounds_gep(array_alloca, &[index_val], "index") };
// Store the pointer builder.build_store(elem_ptr, val);
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr,
Builtin::WRAPPER_PTR,
"insert_ptr_str_literal",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(
struct_val,
number_of_elements,
Builtin::WRAPPER_LEN,
"insert_len",
)
.unwrap();
builder.build_bitcast(
struct_val.into_struct_value(),
str_type,
"cast_collection",
)
} }
builder.build_load(
builder
.build_bitcast(
array_alloca,
str_type.ptr_type(AddressSpace::Generic),
"cast_collection",
)
.into_pointer_value(),
"small_str_array",
)
} else {
let ptr = define_global_str_literal_ptr(env, *str_literal);
let number_of_elements = env.ptr_int().const_int(number_of_chars, false);
let struct_type = str_type;
let mut struct_val;
// Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr,
Builtin::WRAPPER_PTR,
"insert_ptr_str_literal",
)
.unwrap();
// Store the length
struct_val = builder
.build_insert_value(
struct_val,
number_of_elements,
Builtin::WRAPPER_LEN,
"insert_len",
)
.unwrap();
builder.build_bitcast(struct_val.into_struct_value(), str_type, "cast_collection")
} }
} }
} }

View file

@ -432,12 +432,3 @@ pub fn str_equal<'a, 'ctx, 'env>(
bitcode::STR_EQUAL, bitcode::STR_EQUAL,
) )
} }
// TODO investigate: does this cause problems when the layout is known? this value is now not refcounted!
pub fn empty_str<'a, 'ctx, 'env>(env: &Env<'a, 'ctx, 'env>) -> BasicValueEnum<'ctx> {
let struct_type = super::convert::zig_str_type(env);
// The pointer should be null (aka zero) and the length should be zero,
// so the whole struct should be a const_zero
BasicValueEnum::StructValue(struct_type.const_zero())
}