diff --git a/Cargo.lock b/Cargo.lock index 0fc3211e9b..a537330d71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,9 +2,9 @@ # It is not intended for manual editing. [[package]] name = "aho-corasick" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5e63fd144e18ba274ae7095c0197a870a7b9468abc801dd62f190d80817d2ec" +checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" dependencies = [ "memchr", ] @@ -337,7 +337,7 @@ dependencies = [ [[package]] name = "inkwell" version = "0.1.0" -source = "git+https://github.com/rtfeldman/inkwell?tag=llvm8-0.release1#647a7cbfc2a6be3b41b8de3b1c63ec1fe4e79267" +source = "git+https://github.com/rtfeldman/inkwell?tag=llvm8-0.release2#afdfd85c1183869e5d17b930c798c3087ff1c737" dependencies = [ "either", "inkwell_internals", @@ -351,7 +351,7 @@ dependencies = [ [[package]] name = "inkwell_internals" version = "0.1.0" -source = "git+https://github.com/rtfeldman/inkwell?tag=llvm8-0.release1#647a7cbfc2a6be3b41b8de3b1c63ec1fe4e79267" +source = "git+https://github.com/rtfeldman/inkwell?tag=llvm8-0.release2#afdfd85c1183869e5d17b930c798c3087ff1c737" dependencies = [ "proc-macro2 0.4.30", "quote 0.6.13", @@ -372,9 +372,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.67" +version = "0.2.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" +checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0" [[package]] name = "llvm-sys" @@ -486,9 +486,9 @@ dependencies = [ [[package]] name = "proc-macro-hack" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5" +checksum = "f918f2b601f93baa836c1c2945faef682ba5b6d4828ecb45eeb7cc3c71b811b4" dependencies = [ "proc-macro2 1.0.9", "quote 1.0.3", @@ -703,9 +703,9 @@ checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" [[package]] name = "regex" -version = "1.3.4" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" +checksum = "8900ebc1363efa7ea1c399ccc32daed870b4002651e0bed86e72d501ebbe0048" dependencies = [ "aho-corasick", "memchr", @@ -715,9 +715,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.16" +version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1132f845907680735a84409c3bebc64d1364a5683ffbce899550cd09d5eaefc1" +checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" [[package]] name = "region" @@ -833,6 +833,7 @@ dependencies = [ "indoc", "inkwell", "inlinable_string", + "libc", "maplit", "pretty_assertions", "quickcheck", @@ -1085,9 +1086,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "sized-chunks" -version = "0.5.1" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f59f81ec9833a580d2448e958d16bd872637798f3ab300b693c48f136fb76ff" +checksum = "d59044ea371ad781ff976f7b06480b9f0180e834eda94114f2afb4afc12b7718" dependencies = [ "bitmaps", "typenum", diff --git a/compiler/gen/Cargo.toml b/compiler/gen/Cargo.toml index 20838969b7..c6fc276305 100644 --- a/compiler/gen/Cargo.toml +++ b/compiler/gen/Cargo.toml @@ -37,7 +37,7 @@ inlinable_string = "0.1.0" # commit of TheDan64/inkwell, push a new tag which points to the latest commit, # change the tag value in this Cargo.toml to point to that tag, and `cargo update`. # This way, GitHub Actions works and nobody's builds get broken. -inkwell = { git = "https://github.com/rtfeldman/inkwell", tag = "llvm8-0.release1" } +inkwell = { git = "https://github.com/rtfeldman/inkwell", tag = "llvm8-0.release2" } target-lexicon = "0.10" # NOTE: we must use the same version of target-lexicon as cranelift! cranelift = "0.59" # All cranelift crates should have the same version! cranelift-simplejit = "0.59" # All cranelift crates should have the same version! @@ -54,3 +54,4 @@ quickcheck = "0.8" quickcheck_macros = "0.8" tokio = { version = "0.2", features = ["blocking", "fs", "sync", "rt-threaded"] } bumpalo = { version = "3.2", features = ["collections"] } +libc = "0.2" diff --git a/compiler/gen/src/crane/build.rs b/compiler/gen/src/crane/build.rs index 4727f9e65e..91baa2ad4f 100644 --- a/compiler/gen/src/crane/build.rs +++ b/compiler/gen/src/crane/build.rs @@ -351,6 +351,7 @@ pub fn build_expr<'a, B: Backend>( let bytes_len = str_literal.len() + 1/* TODO drop the +1 when we have structs and this is no longer a NUL-terminated CString.*/; let size = builder.ins().iconst(types::I64, bytes_len as i64); let ptr = call_malloc(env, module, builder, size); + // TODO check if malloc returned null; if so, runtime error for OOM! let mem_flags = MemFlags::new(); // Store the bytes from the string literal in the array @@ -389,6 +390,8 @@ pub fn build_expr<'a, B: Backend>( let bytes_len = elem_bytes as usize * elems.len(); let size = builder.ins().iconst(types::I64, bytes_len as i64); let elems_ptr = call_malloc(env, module, builder, size); + + // TODO check if malloc returned null; if so, runtime error for OOM! let mem_flags = MemFlags::new(); // Copy the elements from the literal into the array @@ -1044,6 +1047,8 @@ fn clone_list( // Allocate space for the new array that we'll copy into. let new_elems_ptr = call_malloc(env, module, builder, size); + // TODO check if malloc returned null; if so, runtime error for OOM! + // Either memcpy or deep clone the array elements if elem_layout.safe_to_memcpy() { // Copy the bytes from the original array into the new diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 1d1e015355..7ace324eef 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -6,10 +6,10 @@ use inkwell::module::{Linkage, Module}; use inkwell::types::{BasicTypeEnum, IntType, StructType}; use inkwell::values::BasicValueEnum::{self, *}; use inkwell::values::{FunctionValue, IntValue, PointerValue, StructValue}; -use inkwell::{AddressSpace, FloatPredicate, IntPredicate}; +use inkwell::{FloatPredicate, IntPredicate}; use crate::llvm::convert::{ - basic_type_from_layout, collection_wrapper, get_array_type, get_fn_type, ptr_int, + basic_type_from_layout, collection_wrapper, empty_collection, get_fn_type, ptr_int, }; use roc_collections::all::ImMap; use roc_module::symbol::{Interns, Symbol}; @@ -160,7 +160,7 @@ pub fn build_expr<'a, 'ctx, 'env>( arg_tuples.push((build_expr(env, scope, parent, arg, procs), layout)); } - call_with_args(*symbol, arg_tuples.into_bump_slice(), env) + call_with_args(*symbol, parent, arg_tuples.into_bump_slice(), env) } }, FunctionPointer(symbol) => { @@ -208,20 +208,23 @@ pub fn build_expr<'a, 'ctx, 'env>( } else { let ctx = env.context; let builder = env.builder; - let bytes_len = str_literal.len() + 1/* TODO drop the +1 when we have structs and this is no longer a NUL-terminated CString.*/; + let str_len = str_literal.len() + 1/* TODO drop the +1 when we have structs and this is no longer a NUL-terminated CString.*/; let byte_type = ctx.i8_type(); let nul_terminator = byte_type.const_zero(); - let len = ctx.i32_type().const_int(bytes_len as u64, false); + let len_val = ctx.i32_type().const_int(str_len as u64, false); let ptr = env .builder - .build_array_malloc(ctx.i8_type(), len, "str_ptr") + .build_array_malloc(ctx.i8_type(), len_val, "str_ptr") .unwrap(); + // TODO check if malloc returned null; if so, runtime error for OOM! + // Copy the bytes from the string literal into the array for (index, byte) in str_literal.bytes().enumerate() { - let index = ctx.i32_type().const_int(index as u64, false); - let elem_ptr = unsafe { builder.build_gep(ptr, &[index], "byte") }; + let index_val = ctx.i32_type().const_int(index as u64, false); + let elem_ptr = + unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "byte") }; builder.build_store(elem_ptr, byte_type.const_int(byte as u64, false)); } @@ -229,8 +232,9 @@ pub fn build_expr<'a, 'ctx, 'env>( // Add a NUL terminator at the end. // TODO: Instead of NUL-terminating, return a struct // with the pointer and also the length and capacity. - let index = ctx.i32_type().const_int(bytes_len as u64 - 1, false); - let elem_ptr = unsafe { builder.build_gep(ptr, &[index], "nul_terminator") }; + let index_val = ctx.i32_type().const_int(str_len as u64 - 1, false); + let elem_ptr = + unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "nul_terminator") }; builder.build_store(elem_ptr, nul_terminator); @@ -243,21 +247,11 @@ pub fn build_expr<'a, 'ctx, 'env>( let builder = env.builder; if elems.is_empty() { - let array_type = get_array_type(&elem_type, 0); - let ptr_type = array_type.ptr_type(AddressSpace::Generic); - let struct_type = collection_wrapper(ctx, ptr_type, env.ptr_bytes); + let struct_type = empty_collection(ctx, env.ptr_bytes); - // The first field in the struct should be the pointer. - let struct_val = builder - .build_insert_value( - struct_type.const_zero(), - BasicValueEnum::PointerValue(ptr_type.const_null()), - Builtin::WRAPPER_PTR, - "insert_ptr", - ) - .unwrap(); - - BasicValueEnum::StructValue(struct_val.into_struct_value()) + // THe pointer should be null (aka zero) and the length should be zero, + // so the whole struct should be a const_zero + BasicValueEnum::StructValue(struct_type.const_zero()) } else { let len_u64 = elems.len() as u64; let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; @@ -270,13 +264,15 @@ pub fn build_expr<'a, 'ctx, 'env>( env.builder .build_array_malloc(elem_type, len, "create_list_ptr") .unwrap() + + // TODO check if malloc returned null; if so, runtime error for OOM! }; // Copy the elements from the list literal into the array for (index, elem) in elems.iter().enumerate() { - let offset = ctx.i32_type().const_int(elem_bytes * index as u64, false); - let elem_ptr = unsafe { builder.build_gep(ptr, &[offset], "elem") }; - + let index_val = ctx.i32_type().const_int(index as u64, false); + let elem_ptr = + unsafe { builder.build_in_bounds_gep(ptr, &[index_val], "index") }; let val = build_expr(env, &scope, parent, &elem, procs); builder.build_store(elem_ptr, val); @@ -287,17 +283,17 @@ pub fn build_expr<'a, 'ctx, 'env>( let len = BasicValueEnum::IntValue(env.ptr_int().const_int(len_u64, false)); let mut struct_val; - // Field 0: pointer + // Store the pointer struct_val = builder .build_insert_value( - struct_type.const_zero(), + struct_type.get_undef(), ptr_val, Builtin::WRAPPER_PTR, "insert_ptr", ) .unwrap(); - // Field 1: length + // Store the length struct_val = builder .build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len") .unwrap(); @@ -926,6 +922,7 @@ pub fn verify_fn(fn_val: FunctionValue<'_>) { #[allow(clippy::cognitive_complexity)] fn call_with_args<'a, 'ctx, 'env>( symbol: Symbol, + parent: FunctionValue<'ctx>, args: &[(BasicValueEnum<'ctx>, &'a Layout<'a>)], env: &Env<'a, 'ctx, 'env>, ) -> BasicValueEnum<'ctx> { @@ -997,18 +994,14 @@ fn call_with_args<'a, 'ctx, 'env>( Symbol::LIST_LEN => { debug_assert!(args.len() == 1); - let wrapper_struct = args[0].0.into_struct_value(); - let builder = env.builder; - - // Get the usize int length - builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value().into() + BasicValueEnum::IntValue(load_list_len(env.builder, args[0].0.into_struct_value())) } Symbol::LIST_IS_EMPTY => { debug_assert!(args.len() == 1); let list_struct = args[0].0.into_struct_value(); let builder = env.builder; - let list_len = builder.build_extract_value(list_struct, 1, "unwrapped_list_len").unwrap().into_int_value(); + let list_len = load_list_len(builder, list_struct); let zero = env.ptr_int().const_zero(); let answer = builder.build_int_compare(IntPredicate::EQ, list_len, zero, "is_zero"); @@ -1073,24 +1066,20 @@ fn call_with_args<'a, 'ctx, 'env>( let wrapper_struct = args[0].0.into_struct_value(); let elem_index = args[1].0.into_int_value(); - // Get the length from the wrapper struct - let _list_len = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value(); + // Get the usize length from the wrapper struct + let _list_len = load_list_len(builder, wrapper_struct); // TODO here, check to see if the requested index exceeds the length of the array. match list_layout { - Layout::Builtin(Builtin::List(elem_layout)) => { - // Get the pointer to the array data - let array_data_ptr = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "unwrapped_list_ptr").unwrap().into_pointer_value(); - - let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; - let elem_size = env.context.i64_type().const_int(elem_bytes, false); - - // Calculate the offset at runtime by multiplying the index by the size of an element. - let offset_bytes = builder.build_int_mul(elem_index, elem_size, "mul_offset"); + Layout::Builtin(Builtin::List(_)) => { + // Load the pointer to the array data + let array_data_ptr = load_list_ptr(builder, wrapper_struct); // We already checked the bounds earlier. - let elem_ptr = unsafe { builder.build_in_bounds_gep(array_data_ptr, &[offset_bytes], "elem") }; + let elem_ptr = unsafe { + builder.build_in_bounds_gep(array_data_ptr, &[elem_index], "elem") + }; builder.build_load(elem_ptr, "List.get") } @@ -1099,38 +1088,88 @@ fn call_with_args<'a, 'ctx, 'env>( } } } - Symbol::LIST_SET /* TODO clone first for LIST_SET! */ | Symbol::LIST_SET_IN_PLACE => { + Symbol::LIST_SET => { + // List.set : List elem, Int, elem -> List elem let builder = env.builder; debug_assert!(args.len() == 3); - let wrapper_struct = args[0].0.into_struct_value(); + let original_wrapper = args[0].0.into_struct_value(); let elem_index = args[1].0.into_int_value(); - let (elem, elem_layout) = args[2]; - // Slot 1 in the wrapper struct is the length - let _list_len = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value(); + // Load the usize length from the wrapper. We need it for bounds checking. + let list_len = load_list_len(builder, original_wrapper); - // TODO here, check to see if the requested index exceeds the length of the array. - // If so, bail out and return the list unaltered. + // Bounds check: only proceed if index < length. + // Otherwise, return the list unaltered. + let comparison = bounds_check_comparison(builder, elem_index, list_len); - // Slot 0 in the wrapper struct is the pointer to the array data - let array_data_ptr = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "unwrapped_list_ptr").unwrap().into_pointer_value(); + // If the index is in bounds, clone and mutate in place. + let then_val = { + let (elem, elem_layout) = args[2]; + let (cloned_wrapper, array_data_ptr) = { + clone_list( + env, + list_len, + load_list_ptr(builder, original_wrapper), + elem_layout, + ) + }; - let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; - let elem_size = env.context.i64_type().const_int(elem_bytes, false); + // If we got here, we passed the bounds check, so this is an in-bounds GEP + let elem_ptr = unsafe { + builder.build_in_bounds_gep(array_data_ptr, &[elem_index], "load_index") + }; - // Calculate the offset at runtime by multiplying the index by the size of an element. - let offset_bytes = builder.build_int_mul(elem_index, elem_size, "mul_offset"); + // Mutate the new array in-place to change the element. + builder.build_store(elem_ptr, elem); - // We already checked the bounds earlier. - let elem_ptr = unsafe { builder.build_in_bounds_gep(array_data_ptr, &[offset_bytes], "elem") }; + BasicValueEnum::StructValue(cloned_wrapper) + }; - // Mutate the array in-place. - builder.build_store(elem_ptr, elem); + // If the index was out of bounds, return the original list unaltered. + let else_val = BasicValueEnum::StructValue(original_wrapper); + let ret_type = original_wrapper.get_type(); - // Return the wrapper unchanged, since pointer, length and capacity are all unchanged - wrapper_struct.into() + build_basic_phi2(env, parent, comparison, then_val, else_val, ret_type.into()) + } + Symbol::LIST_SET_IN_PLACE => { + let builder = env.builder; + + debug_assert!(args.len() == 3); + + let original_wrapper = args[0].0.into_struct_value(); + let elem_index = args[1].0.into_int_value(); + let (elem, _elem_layout) = args[2]; + + // Load the usize length + let list_len = load_list_len(builder, original_wrapper); + + // Bounds check: only proceed if index < length. + // Otherwise, return the list unaltered. + let comparison = bounds_check_comparison(builder, elem_index, list_len); + + // If the index is in bounds, clone and mutate in place. + let then_val = { + // Load the pointer to the elements + let array_data_ptr = load_list_ptr(builder, original_wrapper); + + // We already checked the bounds earlier. + let elem_ptr = + unsafe { builder.build_in_bounds_gep(array_data_ptr, &[elem_index], "elem") }; + + // Mutate the array in-place. + builder.build_store(elem_ptr, elem); + + // Return the wrapper unchanged, since pointer and length are unchanged + BasicValueEnum::StructValue(original_wrapper) + }; + + // If the index was out of bounds, return the original list unaltered. + let else_val = BasicValueEnum::StructValue(original_wrapper); + let ret_type = original_wrapper.get_type(); + + build_basic_phi2(env, parent, comparison, then_val, else_val, ret_type.into()) } _ => { let fn_val = env @@ -1144,7 +1183,9 @@ fn call_with_args<'a, 'ctx, 'env>( arg_vals.push(*arg); } - let call = env.builder.build_call(fn_val, arg_vals.into_bump_slice(), "call"); + let call = env + .builder + .build_call(fn_val, arg_vals.into_bump_slice(), "call"); call.try_as_basic_value() .left() @@ -1152,3 +1193,99 @@ fn call_with_args<'a, 'ctx, 'env>( } } } + +fn load_list_len<'ctx>( + builder: &Builder<'ctx>, + wrapper_struct: StructValue<'ctx>, +) -> IntValue<'ctx> { + builder + .build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "list_len") + .unwrap() + .into_int_value() +} + +fn load_list_ptr<'ctx>( + builder: &Builder<'ctx>, + wrapper_struct: StructValue<'ctx>, +) -> PointerValue<'ctx> { + builder + .build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "list_ptr") + .unwrap() + .into_pointer_value() +} + +fn clone_list<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + list_len: IntValue<'ctx>, + elems_ptr: PointerValue<'ctx>, + elem_layout: &Layout<'_>, +) -> (StructValue<'ctx>, PointerValue<'ctx>) { + let builder = env.builder; + let ctx = env.context; + let ptr_bytes = env.ptr_bytes; + + // Calculate the number of bytes we'll need to allocate. + let elem_bytes = env + .ptr_int() + .const_int(elem_layout.stack_size(env.ptr_bytes) as u64, false); + let size = env + .builder + .build_int_mul(elem_bytes, list_len, "mul_len_by_elem_bytes"); + + // Allocate space for the new array that we'll copy into. + let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes); + let clone_ptr = builder + .build_array_malloc(elem_type, list_len, "list_ptr") + .unwrap(); + + // TODO check if malloc returned null; if so, runtime error for OOM! + + // Either memcpy or deep clone the array elements + if elem_layout.safe_to_memcpy() { + // Copy the bytes from the original array into the new + // one we just malloc'd. + // + // TODO how do we decide when to do the small memcpy vs the normal one? + builder + .build_memcpy(clone_ptr, ptr_bytes, elems_ptr, ptr_bytes, size) + .unwrap_or_else(|err| { + panic!("Error while attempting LLVM memcpy: {:?}", err); + }); + } else { + panic!("TODO Cranelift currently only knows how to clone list elements that are Copy."); + } + + // Create a fresh wrapper struct for the newly populated array + let struct_type = collection_wrapper(ctx, clone_ptr.get_type(), env.ptr_bytes); + let mut struct_val; + + // Store the pointer + struct_val = builder + .build_insert_value( + struct_type.get_undef(), + clone_ptr, + Builtin::WRAPPER_PTR, + "insert_ptr", + ) + .unwrap(); + + // Store the length + struct_val = builder + .build_insert_value(struct_val, list_len, Builtin::WRAPPER_LEN, "insert_len") + .unwrap(); + + (struct_val.into_struct_value(), clone_ptr) +} + +fn bounds_check_comparison<'ctx>( + builder: &Builder<'ctx>, + elem_index: IntValue<'ctx>, + len: IntValue<'ctx>, +) -> IntValue<'ctx> { + // + // Note: Check for index < length as the "true" condition, + // to avoid misprediction. (In practice this should usually pass, + // and CPUs generally default to predicting that a forward jump + // shouldn't be taken; that is, they predict "else" won't be taken.) + builder.build_int_compare(IntPredicate::ULT, elem_index, len, "bounds_check") +} diff --git a/compiler/gen/src/llvm/convert.rs b/compiler/gen/src/llvm/convert.rs index 515910d4c8..74956ce45c 100644 --- a/compiler/gen/src/llvm/convert.rs +++ b/compiler/gen/src/llvm/convert.rs @@ -5,7 +5,7 @@ use inkwell::types::BasicTypeEnum::{self, *}; use inkwell::types::{ArrayType, BasicType, FunctionType, IntType, PointerType, StructType}; use inkwell::AddressSpace; -use roc_mono::layout::Layout; +use roc_mono::layout::{Builtin, Layout}; /// TODO could this be added to Inkwell itself as a method on BasicValueEnum? pub fn get_fn_type<'ctx>( @@ -123,18 +123,26 @@ pub fn basic_type_from_layout<'ctx>( collection_wrapper(context, ptr_type, ptr_bytes).into() } - EmptyList => { - let array_type = - get_array_type(&context.opaque_struct_type("empty_list_elem").into(), 0); - let ptr_type = array_type.ptr_type(AddressSpace::Generic); - - collection_wrapper(context, ptr_type, ptr_bytes).into() - } + EmptyList => BasicTypeEnum::StructType(empty_collection(context, ptr_bytes)), }, } } -/// (pointer: usize, length: u32, capacity: u32) +/// A length usize and a pointer to some elements. +/// Could be a wrapper for a List or a Str. +/// +/// The order of these doesn't matter, since they should be initialized +/// to zero anyway for an empty collection; as such, we return a +/// (usize, usize) struct layout no matter what. +pub fn empty_collection(ctx: &Context, ptr_bytes: u32) -> StructType<'_> { + let usize_type = BasicTypeEnum::IntType(ptr_int(ctx, ptr_bytes)); + + ctx.struct_type(&[usize_type, usize_type], false) +} + +/// A length usize and a pointer to some elements. +/// +/// Could be a wrapper for a List or a Str. pub fn collection_wrapper<'ctx>( ctx: &'ctx Context, ptr_type: PointerType<'ctx>, @@ -143,7 +151,14 @@ pub fn collection_wrapper<'ctx>( let ptr_type_enum = BasicTypeEnum::PointerType(ptr_type); let len_type = BasicTypeEnum::IntType(ptr_int(ctx, ptr_bytes)); - ctx.struct_type(&[ptr_type_enum, len_type], false) + // This conditional is based on a constant, so the branch should be optimized away. + // The reason for keeping the conditional here is so we can flip the order + // of the fields (by changing the constants) without breaking this code. + if Builtin::WRAPPER_PTR == 0 { + ctx.struct_type(&[ptr_type_enum, len_type], false) + } else { + ctx.struct_type(&[len_type, ptr_type_enum], false) + } } pub fn ptr_int(ctx: &Context, ptr_bytes: u32) -> IntType<'_> { diff --git a/compiler/gen/tests/test_gen.rs b/compiler/gen/tests/test_gen.rs index 42a5716fdd..c709489e17 100644 --- a/compiler/gen/tests/test_gen.rs +++ b/compiler/gen/tests/test_gen.rs @@ -5,6 +5,7 @@ extern crate indoc; extern crate bumpalo; extern crate inkwell; +extern crate libc; extern crate roc_gen; mod helpers; @@ -535,6 +536,16 @@ mod test_gen { // fn int_list_is_empty() { // assert_evals_to!("List.isEmpty [ 12, 9, 6, 3 ]", 0, u8, |x| x); // } + // + #[test] + fn empty_list_literal() { + assert_llvm_evals_to!("[]", &[], &'static [i64], |x| x); + } + + #[test] + fn int_list_literal() { + assert_llvm_evals_to!("[ 12, 9, 6, 3 ]", &[12, 9, 6, 3], &'static [i64], |x| x); + } #[test] fn head_int_list() { @@ -543,29 +554,68 @@ mod test_gen { #[test] fn get_int_list() { - assert_evals_to!("List.getUnsafe [ 12, 9, 6, 3 ] 1", 9, i64); + assert_evals_to!("List.getUnsafe [ 12, 9, 6 ] 1", 9, i64); } #[test] - fn set_unique_int_list() { + fn get_set_unique_int_list() { assert_evals_to!("List.getUnsafe (List.set [ 12, 9, 7, 3 ] 1 42) 1", 42, i64); } + #[test] + fn set_unique_int_list() { + assert_opt_evals_to!( + "List.set [ 12, 9, 7, 1, 5 ] 2 33", + &[12, 9, 33, 1, 5], + &'static [i64], + |x| x + ); + } + + #[test] + fn set_unique_list_oob() { + assert_opt_evals_to!( + "List.set [ 3, 17, 4.1 ] 1337 9.25", + &[3.0, 17.0, 4.1], + &'static [f64], + |x| x + ); + } + #[test] fn set_shared_int_list() { - assert_crane_evals_to!( + assert_opt_evals_to!( + indoc!( + r#" + shared = [ 2.1, 4.3 ] + + # This should not mutate the original + x = List.getUnsafe (List.set shared 1 7.7) 1 + + { x, y: List.getUnsafe shared 1 } + "# + ), + (7.7, 4.3), + (f64, f64), + |x| x + ); + } + + #[test] + fn set_shared_list_oob() { + assert_opt_evals_to!( indoc!( r#" shared = [ 2, 4 ] - # This should not mutate the original - x = List.set shared 1 77 + # This List.set is out of bounds, and should have no effect + x = List.getUnsafe (List.set shared 422 0) 1 - List.getUnsafe shared 1 + { x, y: List.getUnsafe shared 1 } "# ), - 4, - i64, + (4, 4), + (i64, i64), |x| x ); } diff --git a/compiler/mono/src/layout.rs b/compiler/mono/src/layout.rs index 939062c832..e6e312cb76 100644 --- a/compiler/mono/src/layout.rs +++ b/compiler/mono/src/layout.rs @@ -143,10 +143,12 @@ impl<'a> Builtin<'a> { pub const SET_WORDS: u32 = Builtin::MAP_WORDS; // Set is an alias for Map with {} for value pub const LIST_WORDS: u32 = 2; - /// Layout of collection wrapper - a struct of (pointer, length, capacity) + /// Layout of collection wrapper for List and Str - a struct of (pointre, length). + /// + /// We choose this layout (with pointer first) because it's how + /// Rust slices are laid out, meaning we can cast to/from them for free. pub const WRAPPER_PTR: u32 = 0; pub const WRAPPER_LEN: u32 = 1; - pub const WRAPPER_CAPACITY: u32 = 2; pub fn stack_size(&self, pointer_size: u32) -> u32 { use Builtin::*; diff --git a/compiler/mono/tests/test_opt.rs b/compiler/mono/tests/test_opt.rs index 7acdd776cb..324df1f3a9 100644 --- a/compiler/mono/tests/test_opt.rs +++ b/compiler/mono/tests/test_opt.rs @@ -20,6 +20,183 @@ mod test_opt { // HELPERS + #[derive(Debug, Default, PartialEq, Eq)] + struct CallProblems { + missing: Vec, + unexpected: Vec, + } + + fn contains_named_calls(src: &str, mut calls: Vec) { + let arena = Bump::new(); + let (loc_expr, _, _problems, subs, var, constraint, home, mut interns) = uniq_expr(src); + + let mut unify_problems = Vec::new(); + let (_content, mut subs) = infer_expr(subs, &mut unify_problems, &constraint, var); + + // Compile and add all the Procs before adding main + let mut procs = Procs::default(); + let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap(); + + // assume 64-bit pointers + let pointer_size = std::mem::size_of::() as u32; + + // Populate Procs and Subs, and get the low-level Expr from the canonical Expr + let mono_expr = Expr::new( + &arena, + &mut subs, + loc_expr.value, + &mut procs, + home, + &mut ident_ids, + pointer_size, + ); + + let unexpected_calls = extract_named_calls(&mono_expr, &mut calls); + let expected = CallProblems::default(); + let actual = CallProblems { + missing: calls, + unexpected: unexpected_calls, + }; + + assert_eq!(expected, actual); + } + + fn extract_named_calls(expr: &Expr<'_>, calls: &mut Vec) -> Vec { + let mut unexpected_calls = Vec::new(); + + // The calls must be sorted so we can binary_search them for matches. + calls.sort(); + + extract_named_calls_help(expr, calls, &mut unexpected_calls); + + unexpected_calls + } + fn extract_named_calls_help( + expr: &Expr<'_>, + calls: &mut Vec, + unexpected_calls: &mut Vec, + ) { + match expr { + Int(_) | Float(_) | Str(_) | Bool(_) | Byte(_) | Load(_) | FunctionPointer(_) + | Jump(_) | RuntimeError(_) => (), + + Store(paths, sub_expr) => { + for (_, _, path_expr) in paths.iter() { + extract_named_calls_help(path_expr, calls, unexpected_calls); + } + + extract_named_calls_help(sub_expr, calls, unexpected_calls); + } + + CallByPointer(sub_expr, args, _) => { + extract_named_calls_help(sub_expr, calls, unexpected_calls); + + for arg in args.iter() { + extract_named_calls_help(arg, calls, unexpected_calls); + } + } + + CallByName(symbol, args) => { + // Search for the symbol. If we found it, check it off the list. + // If we didn't find it, add it to the list of unexpected calls. + match calls.binary_search(symbol) { + Ok(index) => { + calls.remove(index); + } + Err(_) => { + unexpected_calls.push(*symbol); + } + } + + for (arg, _) in args.iter() { + extract_named_calls_help(arg, calls, unexpected_calls); + } + } + + Cond { + cond, + cond_layout: _, + pass, + fail, + ret_layout: _, + } => { + extract_named_calls_help(cond, calls, unexpected_calls); + extract_named_calls_help(pass, calls, unexpected_calls); + extract_named_calls_help(fail, calls, unexpected_calls); + } + Branches { + cond, + branches, + default, + ret_layout: _, + } => { + extract_named_calls_help(cond, calls, unexpected_calls); + extract_named_calls_help(default, calls, unexpected_calls); + + for (a, b, c) in branches.iter() { + extract_named_calls_help(a, calls, unexpected_calls); + extract_named_calls_help(b, calls, unexpected_calls); + extract_named_calls_help(c, calls, unexpected_calls); + } + } + Switch { + cond, + cond_layout: _, + branches, + default_branch, + ret_layout: _, + } => { + extract_named_calls_help(cond, calls, unexpected_calls); + extract_named_calls_help(default_branch, calls, unexpected_calls); + + for (_, branch_expr) in branches.iter() { + extract_named_calls_help(branch_expr, calls, unexpected_calls); + } + } + + Tag { + tag_layout: _, + tag_name: _, + tag_id: _, + union_size: _, + arguments, + } => { + for (tag_expr, _) in arguments.iter() { + extract_named_calls_help(tag_expr, calls, unexpected_calls); + } + } + Struct(fields) => { + for (field, _) in fields.iter() { + extract_named_calls_help(field, calls, unexpected_calls); + } + } + Access { + label: _, + field_layout: _, + struct_layout: _, + record: sub_expr, + } + | AccessAtIndex { + index: _, + field_layouts: _, + expr: sub_expr, + is_unwrapped: _, + } + | Label(_, sub_expr) => { + extract_named_calls_help(sub_expr, calls, unexpected_calls); + } + + Array { + elem_layout: _, + elems, + } => { + for elem in elems.iter() { + extract_named_calls_help(elem, calls, unexpected_calls); + } + } + } + } + fn compiles_to(src: &str, expected: Expr<'_>) { let arena = Bump::new(); let (loc_expr, _, _problems, subs, var, constraint, home, mut interns) = uniq_expr(src); @@ -90,4 +267,20 @@ mod test_opt { ), ); } + + #[test] + fn set_shared_int_list() { + // This should *NOT* optimize List.set to List.set_in_place + contains_named_calls( + r#" + shared = [ 2, 4 ] + + # This should not mutate the original + x = List.set shared 1 0 + + { x, y: List.getUnsafe shared 1 } + "#, + vec![Symbol::LIST_SET, Symbol::LIST_GET_UNSAFE], + ); + } } diff --git a/compiler/types/src/boolean_algebra.rs b/compiler/types/src/boolean_algebra.rs index bcaa337cb8..1e2b24914e 100644 --- a/compiler/types/src/boolean_algebra.rs +++ b/compiler/types/src/boolean_algebra.rs @@ -94,34 +94,44 @@ impl Bool { Atom::Zero => Err(Atom::Zero), Atom::One => Err(Atom::One), Atom::Variable(var) => { - let mut result = Vec::new(); - result.push(var); + // The var may still point to Zero or One! + match subs.get_without_compacting(var).content { + Content::Structure(FlatType::Boolean(nested)) => nested.simplify(subs), + _ => { + let mut result = Vec::new(); + result.push(var); - for atom in &self.1 { - match atom { - Atom::Zero => {} - Atom::One => return Err(Atom::One), - Atom::Variable(v) => match subs.get_without_compacting(*v).content { - Content::Structure(FlatType::Boolean(nested)) => { - match nested.simplify(subs) { - Ok(variables) => { - for var in variables { - result.push(var); + for atom in &self.1 { + match atom { + Atom::Zero => {} + Atom::One => return Err(Atom::One), + Atom::Variable(v) => { + match subs.get_without_compacting(*v).content { + Content::Structure(FlatType::Boolean(nested)) => { + match nested.simplify(subs) { + Ok(variables) => { + for var in variables { + result.push(var); + } + } + Err(Atom::Zero) => {} + Err(Atom::One) => return Err(Atom::One), + Err(Atom::Variable(_)) => { + panic!("TODO nested variable") + } + } + } + _ => { + result.push(*v); } } - Err(Atom::Zero) => {} - Err(Atom::One) => return Err(Atom::One), - Err(Atom::Variable(_)) => panic!("TODO nested variable"), } } - _ => { - result.push(*v); - } - }, + } + + Ok(result) } } - - Ok(result) } } }