Implement String literals as lists of bytes instead of null terminating c-strings

This commit is contained in:
Chad Stearns 2020-08-16 04:38:24 -04:00
parent 27b2d10b2a
commit 4ded732a09
3 changed files with 58 additions and 42 deletions

View file

@ -15,7 +15,7 @@ use inkwell::module::{Linkage, Module};
use inkwell::passes::{PassManager, PassManagerBuilder}; use inkwell::passes::{PassManager, PassManagerBuilder};
use inkwell::types::{BasicTypeEnum, FunctionType, IntType, StructType}; use inkwell::types::{BasicTypeEnum, FunctionType, IntType, StructType};
use inkwell::values::BasicValueEnum::{self, *}; use inkwell::values::BasicValueEnum::{self, *};
use inkwell::values::{FloatValue, FunctionValue, IntValue, PointerValue, StructValue}; use inkwell::values::{BasicValue, FloatValue, FunctionValue, IntValue, PointerValue, StructValue};
use inkwell::AddressSpace; use inkwell::AddressSpace;
use inkwell::{IntPredicate, OptimizationLevel}; use inkwell::{IntPredicate, OptimizationLevel};
use roc_collections::all::{ImMap, MutSet}; use roc_collections::all::{ImMap, MutSet};
@ -217,43 +217,65 @@ pub fn build_exp_literal<'a, 'ctx, 'env>(
Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(), Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(),
Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(), Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(),
Str(str_literal) => { Str(str_literal) => {
if str_literal.is_empty() {
panic!("TODO build an empty string in LLVM");
} else {
let ctx = env.context; let ctx = env.context;
let builder = env.builder; let builder = env.builder;
let str_len = str_literal.len() + 1/* TODO drop the +1 when we have structs and this is no longer a NUL-terminated CString.*/;
let byte_type = ctx.i8_type(); let len_u64 = str_literal.len() as u64;
let nul_terminator = byte_type.const_zero(); let elem_layout = Layout::Builtin(Builtin::Int8);
let len_val = ctx.i64_type().const_int(str_len as u64, false);
let ptr = env let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;
.builder
.build_array_malloc(ctx.i8_type(), len_val, "str_ptr") let ptr = {
.unwrap(); let bytes_len = elem_bytes * len_u64;
let len_type = env.ptr_int();
let len = len_type.const_int(bytes_len, false);
allocate_list(env, &elem_layout, len)
// TODO check if malloc returned null; if so, runtime error for OOM! // TODO check if malloc returned null; if so, runtime error for OOM!
};
// Copy the bytes from the string literal into the array // Copy the elements from the list literal into the array
for (index, byte) in str_literal.bytes().enumerate() { for (index, char) in str_literal.as_bytes().iter().enumerate() {
let val = env
.context
.i8_type()
.const_int(*char as u64, false)
.as_basic_value_enum();
let index_val = ctx.i64_type().const_int(index as u64, false); let index_val = ctx.i64_type().const_int(index as u64, false);
let elem_ptr = let elem_ptr = unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "index") };
unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "byte") };
builder.build_store(elem_ptr, byte_type.const_int(byte as u64, false)); builder.build_store(elem_ptr, val);
} }
// Add a NUL terminator at the end. let ptr_bytes = env.ptr_bytes;
// TODO: Instead of NUL-terminating, return a struct let int_type = ptr_int(ctx, ptr_bytes);
// with the pointer and also the length and capacity. let ptr_as_int = builder.build_ptr_to_int(ptr, int_type, "list_cast_ptr");
let index_val = ctx.i64_type().const_int(str_len as u64 - 1, false); let struct_type = collection(ctx, ptr_bytes);
let elem_ptr = let len = BasicValueEnum::IntValue(env.ptr_int().const_int(len_u64, false));
unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "nul_terminator") }; let mut struct_val;
builder.build_store(elem_ptr, nul_terminator); // Store the pointer
struct_val = builder
.build_insert_value(
struct_type.get_undef(),
ptr_as_int,
Builtin::WRAPPER_PTR,
"insert_ptr",
)
.unwrap();
BasicValueEnum::PointerValue(ptr) // Store the length
} struct_val = builder
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
.unwrap();
// Bitcast to an array of raw bytes
builder.build_bitcast(
struct_val.into_struct_value(),
collection(ctx, ptr_bytes),
"cast_collection",
)
} }
} }
} }

View file

@ -13,17 +13,11 @@ mod helpers;
#[cfg(test)] #[cfg(test)]
mod gen_primitives { mod gen_primitives {
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
#[test] #[test]
fn basic_str() { fn basic_str() {
assert_evals_to!( assert_evals_to!("\"\"", "", &'static str);
"\"shirt and hat\"", // assert_evals_to!("\"shirt and hat\"", "shirt and hat", &'static str);
CString::new("shirt and hat").unwrap().as_c_str(),
*const c_char,
CStr::from_ptr
);
} }
#[test] #[test]

View file

@ -181,7 +181,7 @@ pub fn helper_without_uniqueness<'a>(
); );
// Uncomment this to see the module's un-optimized LLVM instruction output: // Uncomment this to see the module's un-optimized LLVM instruction output:
// env.module.print_to_stderr(); env.module.print_to_stderr();
if main_fn.verify(true) { if main_fn.verify(true) {
function_pass.run_on(&main_fn); function_pass.run_on(&main_fn);
@ -197,7 +197,7 @@ pub fn helper_without_uniqueness<'a>(
} }
// Uncomment this to see the module's optimized LLVM instruction output: // Uncomment this to see the module's optimized LLVM instruction output:
// env.module.print_to_stderr(); env.module.print_to_stderr();
(main_fn_name, execution_engine.clone()) (main_fn_name, execution_engine.clone())
} }