diff --git a/compiler/builtins/docs/Int.roc b/compiler/builtins/docs/Int.roc index e6e235db2b..2e0d54b87d 100644 --- a/compiler/builtins/docs/Int.roc +++ b/compiler/builtins/docs/Int.roc @@ -215,7 +215,7 @@ asc : Int a, Int a -> [ Eq, Lt, Gt ] ## desc : Int a, Int a -> [ Eq, Lt, Gt ] -## TODO should we offer hash32 etc even if it has to do a hash64 and truncate? +## TODO should we offer hash32 etc even if someday it has to do a hash64 and truncate? ## ## CAUTION: This function may give different answers in future releases of Roc, ## so be aware that if you rely on the exact answer this gives today, your @@ -224,16 +224,16 @@ hash64 : a -> U64 ## Limits -## The highest number that can be stored in an #Int without overflowing its +## The highest number that can be stored in an #I32 without overflowing its ## available memory and crashing. ## -## Note that this is smaller than the positive version of #Int.lowest, -## which means if you call #Num.abs on #Int.lowest, it will overflow and crash! -highest : Int * +## Note that this is smaller than the positive version of #Int.lowestI32 +## which means if you call #Num.abs on #Int.lowestI32, it will overflow and crash! +highestI32 : I32 -## The lowest number that can be stored in an #Int without overflowing its +## The lowest number that can be stored in an #I32 without overflowing its ## available memory and crashing. ## ## Note that the positive version of this number is this is larger than -## #Int.highest, which means if you call #Num.abs on #Int.lowest, it will overflow and crash! -lowest : Int * +## #Int.highestI32, which means if you call #Num.abs on #Int.lowestI32, it will overflow and crash! +lowest : I32 diff --git a/compiler/builtins/docs/Str.roc b/compiler/builtins/docs/Str.roc index 450189640c..52460d485a 100644 --- a/compiler/builtins/docs/Str.roc +++ b/compiler/builtins/docs/Str.roc @@ -2,12 +2,58 @@ api Str provides Str, isEmpty, join ## Types -## A sequence of [UTF-8](https://en.wikipedia.org/wiki/UTF-8) text characters. +## A [Unicode](https://unicode.org) text value. ## -## One #Str can be up to 2 gigabytes in size. If you need to store larger -## strings than that, you can split them into smaller chunks and operate -## on those instead of on one large #Str. This often runs faster in practice, -## even for strings much smaller than 2 gigabytes. +## Dealing with text is deep topic, so by design, Roc's `Str` module sticks +## to the basics. For more advanced use cases like working with raw [code points](https://unicode.org/glossary/#code_point), +## see the [roc/unicode](roc/unicode) package, and for locale-specific text +## functions (including capitalization, as capitalization rules vary by locale) +## see the [roc/locale](roc/locale) package. +## +## ### Unicode +## +## Unicode can represent text values which span multiple languages, symbols, and emoji. +## Here are some valid Roc strings: +## +## * "Roc" +## * "鹏" +## * "🐦" +## +## Every Unicode string is a sequence of [grapheme clusters](https://unicode.org/glossary/#grapheme_cluster). +## A grapheme cluster corresponds to what a person reading a string might call +## a "character", but because the term "character" is used to mean many different +## concepts across different programming languages, we intentionally avoid it in Roc. +## Instead, we use the term "clusters" as a shorthand for "grapheme clusters." +## +## You can get the number of grapheme clusters in a string by calling `Str.countClusters` on it: +## +## >>> Str.countClusters "Roc" +## +## >>> Str.countClusters "音乐" +## +## >>> Str.countClusters "πŸ‘" +## +## > The `countClusters` function traverses the entire string to calculate its answer, +## > so it's much better for performance to use `Str.isEmpty` instead of +## > calling `Str.countClusters` and checking whether the count was `0`. +## +## ### Escape characters +## +## ### String interpolation +## +## ### Encoding +## +## Roc strings are not coupled to any particular +## [encoding](https://en.wikipedia.org/wiki/Character_encoding). As it happens, +## they are currently encoded in UTF-8, but this module is intentionally designed +## not to rely on that implementation detail so that a future release of Roc can +## potentially change it without breaking existing Roc applications. +## +## This module has functions to can convert a #Str to a #List of raw [code unit](https://unicode.org/glossary/#code_unit) +## integers (not to be confused with the [code points](https://unicode.org/glossary/#code_point) +## mentioned earlier) in a particular encoding. If you need encoding-specific functions, +## you should take a look at the [roc/unicode](roc/unicode) package. +## It has many more tools than this module does! Str : [ @Str ] ## Convert @@ -21,10 +67,22 @@ Str : [ @Str ] ## but it's recommended to pass much smaller numbers instead. ## ## Passing a negative number for decimal places is equivalent to passing 0. -decimal : Int, Float -> Str +decimal : Float *, ULen -> Str ## Convert an #Int to a string. -int : Float -> Str +int : Int * -> Str + +## Split a string around a separator. +## +## >>> Str.splitClusters "1,2,3" "," +## +## Passing `""` for the separator is not useful; it returns the original string +## wrapped in a list. +## +## >>> Str.splitClusters "1,2,3" "" +## +## To split a string into its grapheme clusters, use #Str.clusters +split : Str, Str -> List Str ## Check @@ -58,4 +116,196 @@ padStart : Str, Int, Str -> Str padEnd : Str, Int, Str -> Str +## Grapheme Clusters + +## Split a string into its grapheme clusters. +## +## >>> Str.clusters "1,2,3" +## +## >>> Str.clusters "πŸ‘πŸ‘πŸ‘" +## +clusters : Str -> List Str + +reverseClusters : Str -> Str + +foldClusters : Str, { start: state, step: (state, Str -> state) } -> state + +## Returns #True if the string begins with a capital letter, and #False otherwise. +## +## >>> Str.isCapitalized "hi" +## +## >>> Str.isCapitalized "Hi" +## +## >>> Str.isCapitalized " Hi" +## +## >>> Str.isCapitalized "ČeskΓ‘" +## +## >>> Str.isCapitalized "Π­" +## +## >>> Str.isCapitalized "東京" +## +## >>> Str.isCapitalized "🐦" +## +## >>> Str.isCapitalized "" +## +## Since the rules for how to capitalize an uncapitalized string vary by locale, +## see the [roc/locale](roc/locale) package for functions which do that. +isCapitalized : Str -> Bool + +## ## Code Units +## +## Besides grapheme clusters, another way to break down strings is into +## raw code unit integers. +## +## Code units are no substitute for grapheme clusters! +## These functions exist to support advanced use cases like those found in +## [roc/unicode](roc/unicode), and using code units when grapheme clusters would +## be more appropriate can very easily lead to bugs. +## +## For example, `Str.countGraphemes "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` returns `1`, +## whereas `Str.toUtf8 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` returns a list with a length of 25, +## `Str.toUtf16 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` returns a list with a length of 11. +## and `Str.toUtf32 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` returns a list with a length of 7. + +## Return a #List of the string's #U8 UTF-8 [code units](https://unicode.org/glossary/#code_unit). +## (To split the string into a #List of smaller #Str values instead of #U8 values, +## see #Str.split and #Str.clusters.) +## +## >>> Str.toUtf8 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦" +## +## >>> Str.toUtf8 "Roc" +## +## >>> Str.toUtf8 "鹏" +## +## >>> Str.toUtf8 "🐦" +## +## For a more flexible function that walks through each of these #U8 code units +## without creating a #List, see #Str.foldUtf8 and #Str.foldRevUtf8. +toUtf8 : Str -> List U8 + +## Return a #List of the string's #U16 UTF-16 [code units](https://unicode.org/glossary/#code_unit). +## (To split the string into a #List of smaller #Str values instead of #U16 values, +## see #Str.split and #Str.clusters.) +## +## >>> Str.toUtf16 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦" +## +## >>> Str.toUtf16 "Roc" +## +## >>> Str.toUtf16 "鹏" +## +## >>> Str.toUtf16 "🐦" +## +## For a more flexible function that walks through each of these #U16 code units +## without creating a #List, see #Str.foldUtf16 and #Str.foldRevUtf16. +toUtf16 : Str -> List U16 + +## Return a #List of the string's #U32 UTF-32 [code units](https://unicode.org/glossary/#code_unit). +## (To split the string into a #List of smaller #Str values instead of #U32 values, +## see #Str.split and #Str.clusters.) +## +## >>> Str.toUtf32 "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦" +## +## >>> Str.toUtf32 "Roc" +## +## >>> Str.toUtf32 "鹏" +## +## >>> Str.toUtf32 "🐦" +## +## For a more flexible function that walks through each of these #U32 code units +## without creating a #List, see #Str.foldUtf32 and #Str.foldRevUtf32. +toUtf32 : Str -> List U32 + + +## Walk through the string's #U8 UTF-8 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U8, see #Str.foldClusters.) +## +## Here are the #U8 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 240, 159, 145, 169, 226, 128, 141, 240, 159, 145, 169, 226, 128, 141, 240, 159, 145, 166, 226, 128, 141, 240, 159, 145, 166 +## * `"Roc"` passes 82, 111, 99 +## * `"鹏"` passes 233, 185, 143 +## * `"🐦"` passes 240, 159, 144, 166 +## +## To convert a #Str into a plain `List U8` of UTF-8 code units, see #Str.toUtf8. +foldUtf8 : Str, { start: state, step: (state, U8 -> state) } -> state + +## Walk through the string's #U16 UTF-16 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U16, see #Str.foldClusters.) +## +## Here are the #U16 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 55357, 56425, 8205, 55357, 56425, 8205, 55357, 56422, 8205, 55357, 56422 +## * `"Roc"` passes 82, 111, 99 +## * `"鹏"` passes 40527 +## * `"🐦"` passes 55357, 56358 +## +## To convert a #Str into a plain `List U16` of UTF-16 code units, see #Str.toUtf16. +foldUtf16 : Str, { start: state, step: (state, U16 -> state) } -> state + +## Walk through the string's #U32 UTF-32 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.foldClusters.) +## +## Here are the #U32 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102 +## * `"Roc"` passes 82, 111, 99 +## * `"鹏"` passes 40527 +## * `"🐦"` passes 128038 +## +## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32. +foldUtf32 : Str, { start: state, step: (state, U32 -> state) } -> state + + +## Walk backwards through the string's #U8 UTF-8 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U8, see #Str.foldClusters.) +## +## Here are the #U8 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 166, 145, 159, 240, 141, 128, 226, 166, 145, 159, 240, 141, 128, 226, 169, 145, 159, 240, 141, 128, 226, 169, 145, 159, 240 +## * `"Roc"` passes 99, 111, 82 +## * `"鹏"` passes 143, 185, 233 +## * `"🐦"` passes 166, 144, 159, 240 +## +## To convert a #Str into a plain `List U8` of UTF-8 code units, see #Str.toUtf8. +foldRevUtf8 : Str, { start: state, step: (state, U8 -> state) } -> state + +## Walk backwards through the string's #U16 UTF-16 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U16, see #Str.foldClusters.) +## +## Here are the #U16 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 56422, 55357, 8205, 56422, 55357, 8205, 56425, 55357, 8205, 56425, 55357 +## * `"Roc"` passes 99, 111, 82 +## * `"鹏"` passes 40527 +## * `"🐦"` passes 56358, 55357 +## +## To convert a #Str into a plain `List U16` of UTF-16 code units, see #Str.toUtf16. +foldRevUtf16 : Str, { start: state, step: (state, U16 -> state) } -> state + +## Walk backwards through the string's #U32 UTF-32 [code units](https://unicode.org/glossary/#code_unit) +## to build up a state. +## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.foldClusters.) +## +## Here are the #U32 values that will be passed to `step` when this function is +## called on various strings: +## +## * `"πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105 +## * `"Roc"` passes 99, 111, 82 +## * `"鹏"` passes 40527 +## * `"🐦"` passes 128038 +## +## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32. +foldRevUtf32 : Str, { start: state, step: (state, U32 -> state) } -> state + + diff --git a/compiler/gen/src/crane/build.rs b/compiler/gen/src/crane/build.rs index 1a81e886fb..a15f9fb88d 100644 --- a/compiler/gen/src/crane/build.rs +++ b/compiler/gen/src/crane/build.rs @@ -55,6 +55,8 @@ pub fn build_expr<'a, B: Backend>( ) -> Value { use roc_mono::expr::Expr::*; + let ptr_bytes = env.cfg.pointer_bytes() as u32; + match expr { Int(num) => builder.ins().iconst(types::I64, *num), Float(num) => builder.ins().f64const(*num), @@ -173,7 +175,6 @@ pub fn build_expr<'a, B: Backend>( }, Struct(sorted_fields) => { let cfg = env.cfg; - let ptr_bytes = cfg.pointer_bytes() as u32; // The slot size will be the sum of all the fields' sizes let mut slot_size = 0; @@ -189,6 +190,7 @@ pub fn build_expr<'a, B: Backend>( )); // Create instructions for storing each field's expression + // NOTE assumes that all fields have the same width! for (index, (field_expr, field_layout)) in sorted_fields.iter().enumerate() { let val = build_expr(env, &scope, module, builder, field_expr, procs); @@ -203,6 +205,50 @@ pub fn build_expr<'a, B: Backend>( .ins() .stack_addr(cfg.pointer_type(), slot, Offset32::new(0)) } + Tag { tag_layout, arguments , tag_id, union_size, .. } => { + let cfg = env.cfg; + let ptr_bytes = cfg.pointer_bytes() as u32; + + // NOTE: all variants of a tag union must have the same size, so (among other things) + // it's easy to quickly index them in arrays. Therefore the size of this tag doens't + // depend on the tag arguments, but solely on the layout of the whole tag union + let slot_size = tag_layout.stack_size(ptr_bytes); + + // Create a slot + let slot = builder.create_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + slot_size + )); + + // Create instructions for storing each field's expression + let mut offset = 0; + + // still need to insert the tag discriminator for non-single unions + // when there are no arguments, e.g. `Nothing : Maybe a` + if *union_size > 1 { + let val = builder.ins().iconst(types::I64, *tag_id as i64); + builder.ins().stack_store(val, slot, Offset32::new(0)); + offset += ptr_bytes; + } + + for (field_expr, field_layout) in arguments.iter() { + let val = build_expr(env, &scope, module, builder, field_expr, procs); + + let field_size = field_layout.stack_size(ptr_bytes); + let field_offset = i32::try_from(offset) + .expect("TODO handle field size conversion to i32"); + + builder.ins().stack_store(val, slot, Offset32::new(field_offset)); + + offset += field_size; + } + + + builder + .ins() + .stack_addr(cfg.pointer_type(), slot, Offset32::new(0)) + } + Access { label, field_layout, @@ -252,15 +298,49 @@ pub fn build_expr<'a, B: Backend>( .ins() .load(cfg.pointer_type(), mem_flags, record, Offset32::new(offset)) } + AccessAtIndex { + index, + field_layouts, + expr, + .. + } => { + let cfg = env.cfg; + let mut offset = 0; + + + + for (field_index, field_layout) in field_layouts.iter().enumerate() { + if *index == field_index as u64 { + let offset = i32::try_from(offset) + .expect("TODO gracefully handle usize -> i32 conversion in struct access"); + + let mem_flags = MemFlags::new(); + let expr = build_expr(env, scope, module, builder, expr, procs); + + let ret_type = layout_to_type(&field_layout, cfg.pointer_type()); + + return builder + .ins() + .load(ret_type, mem_flags, expr, Offset32::new(offset)); + } + + offset += field_layout.stack_size(ptr_bytes); + } + + panic!("field access out of bounds: index {:?} in layouts {:?}", index, field_layouts) + + } + Str(str_literal) => { if str_literal.is_empty() { panic!("TODO build an empty string in Crane"); } else { let bytes_len = str_literal.len() + 1/* TODO drop the +1 when we have structs and this is no longer a NUL-terminated CString.*/; - let ptr = call_malloc(env, module, builder, bytes_len); + let size = builder.ins().iconst(types::I64, bytes_len as i64); + let ptr = call_malloc(env, module, builder, size); let mem_flags = MemFlags::new(); - // Copy the bytes from the string literal into the array + // Store the bytes from the string literal in the array for (index, byte) in str_literal.bytes().enumerate() { let val = builder.ins().iconst(types::I8, byte as i64); let offset = Offset32::new(index as i32); @@ -294,7 +374,8 @@ pub fn build_expr<'a, B: Backend>( } else { let elem_bytes = elem_layout.stack_size(ptr_bytes as u32); let bytes_len = elem_bytes as usize * elems.len(); - let elems_ptr = call_malloc(env, module, builder, bytes_len); + let size = builder.ins().iconst(types::I64, bytes_len as i64); + let elems_ptr = call_malloc(env, module, builder, size); let mem_flags = MemFlags::new(); // Copy the elements from the literal into the array @@ -308,18 +389,19 @@ pub fn build_expr<'a, B: Backend>( elems_ptr }; - // Store the pointer in slot 0 - builder - .ins() - .stack_store(elems_ptr, slot, Offset32::new(0)); + // Store the pointer + { + let offset = Offset32::new((Builtin::WRAPPER_PTR * ptr_bytes) as i32); - // Store the length in slot 1 + builder.ins().stack_store(elems_ptr, slot, offset); + } + + // Store the length { let length = builder.ins().iconst(env.ptr_sized_int(), elems.len() as i64); + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); - builder - .ins() - .stack_store(length, slot, Offset32::new(ptr_bytes as i32)); + builder.ins().stack_store(length, slot, offset); } // Return the pointer to the wrapper @@ -331,6 +413,21 @@ pub fn build_expr<'a, B: Backend>( } } +fn layout_to_type<'a>(layout: &Layout<'a>, _pointer_type: Type) -> Type { + use roc_mono::layout::Builtin::*; + + match layout { + Layout::Builtin(builtin) => match builtin { + Int64 => cranelift::prelude::types::I64, + Byte => cranelift::prelude::types::I8, + Bool => cranelift::prelude::types::B1, + Float64 => cranelift::prelude::types::F64, + other => panic!("I don't yet know how to make a type from {:?}", other), + }, + other => panic!("I don't yet know how to make a type from {:?}", other), + } +} + struct Branch2<'a> { cond: &'a Expr<'a>, cond_layout: &'a Layout<'a>, @@ -363,7 +460,7 @@ fn build_branch2<'a, B: Backend>( let fail_block = builder.create_block(); match branch.cond_layout { - Layout::Builtin(Builtin::Bool(_, _)) => { + Layout::Builtin(Builtin::Bool) => { builder.ins().brnz(cond, pass_block, &[]); } other => panic!("I don't know how to build a conditional for {:?}", other), @@ -645,14 +742,13 @@ fn call_by_name<'a, B: Backend>( debug_assert!(args.len() == 1); let list_ptr = build_arg(&args[0], env, scope, module, builder, procs); + let ptr_bytes = env.cfg.pointer_bytes() as u32; + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); - // Get the usize int length - builder.ins().load( - env.ptr_sized_int(), - MemFlags::new(), - list_ptr, - Offset32::new(env.cfg.pointer_bytes() as i32), - ) + // Get the usize list length + builder + .ins() + .load(env.ptr_sized_int(), MemFlags::new(), list_ptr, offset) } Symbol::INT_EQ_I64 | Symbol::INT_EQ_I8 | Symbol::INT_EQ_I1 => { debug_assert!(args.len() == 2); @@ -674,68 +770,68 @@ fn call_by_name<'a, B: Backend>( let wrapper_ptr = build_arg(&args[0], env, scope, module, builder, procs); let elem_index = build_arg(&args[1], env, scope, module, builder, procs); - let elem_type = Type::int(64).unwrap(); // TODO Look this up instead of hardcoding it! - let elem_bytes = 8; // TODO Look this up instead of hardcoding it! - let elem_size = builder.ins().iconst(types::I64, elem_bytes); - - // Load the pointer we got to the wrapper struct - let elems_ptr = builder.ins().load( - env.cfg.pointer_type(), - MemFlags::new(), - wrapper_ptr, - Offset32::new(0), - ); - - // Multiply the requested index by the size of each element. - let offset = builder.ins().imul(elem_index, elem_size); - - // Follow the pointer in the wrapper struct to the actual elements - builder.ins().load_complex( - elem_type, - MemFlags::new(), - &[elems_ptr, offset], - Offset32::new(0), - ) - } - Symbol::LIST_SET => { let (_list_expr, list_layout) = &args[0]; + // Get the usize list length + let ptr_bytes = env.cfg.pointer_bytes() as u32; + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); + let _list_len = + builder + .ins() + .load(env.ptr_sized_int(), MemFlags::new(), wrapper_ptr, offset); + + // TODO compare elem_index to _list_len to do array bounds checking. + match list_layout { Layout::Builtin(Builtin::List(elem_layout)) => { - // TODO try memcpy for shallow clones; it's probably faster - // let list_val = build_expr(env, scope, module, builder, list_expr, procs); + let cfg = env.cfg; + let elem_type = type_from_layout(cfg, elem_layout); + let elem_bytes = elem_layout.stack_size(cfg.pointer_bytes() as u32); + let elem_size = builder.ins().iconst(types::I64, elem_bytes as i64); - let num_elems = 10; // TODO FIXME read from List.len - let elem_bytes = - elem_layout.stack_size(env.cfg.pointer_bytes() as u32) as usize; - let bytes_len = (elem_bytes * num_elems) + 1/* TODO drop the +1 when we have structs and this is no longer NUL-terminated. */; - let wrapper_ptr = call_malloc(env, module, builder, bytes_len); - // let mem_flags = MemFlags::new(); - - // Copy the elements from the literal into the array - // for (index, elem) in elems.iter().enumerate() { - // let offset = Offset32::new(elem_bytes as i32 * index as i32); - // let val = build_expr(env, scope, module, builder, elem, procs); - - // builder.ins().store(mem_flags, val, ptr, offset); - // } - - // Add a NUL terminator at the end. - // TODO: Instead of NUL-terminating, return a struct - // with the pointer and also the length and capacity. - // let nul_terminator = builder.ins().iconst(types::I8, 0); - // let index = bytes_len as i32 - 1; - // let offset = Offset32::new(index); - - // builder.ins().store(mem_flags, nul_terminator, ptr, offset); // Load the pointer we got to the wrapper struct - let _elems_ptr = builder.ins().load( + let elems_ptr = builder.ins().load( env.cfg.pointer_type(), MemFlags::new(), wrapper_ptr, Offset32::new(0), ); + // Multiply the requested index by the size of each element. + let offset = builder.ins().imul(elem_index, elem_size); + + // Follow the pointer in the wrapper struct to the actual elements + builder.ins().load_complex( + elem_type, + MemFlags::new(), + &[elems_ptr, offset], + Offset32::new(0), + ) + } + _ => { + unreachable!("Invalid List layout for List.get: {:?}", list_layout); + } + } + } + Symbol::LIST_SET => { + // set : List elem, Int, elem -> List elem + let wrapper_ptr = build_arg(&args[0], env, scope, module, builder, procs); + let (_list_expr, list_layout) = &args[0]; + + // Get the usize list length + let ptr_bytes = env.cfg.pointer_bytes() as u32; + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); + let _list_len = + builder + .ins() + .load(env.ptr_sized_int(), MemFlags::new(), wrapper_ptr, offset); + + // TODO do array bounds checking, and early return the original List if out of bounds + + match list_layout { + Layout::Builtin(Builtin::List(elem_layout)) => { + let wrapper_ptr = clone_list(env, builder, module, wrapper_ptr, elem_layout); + list_set_in_place( env, wrapper_ptr, @@ -756,6 +852,17 @@ fn call_by_name<'a, B: Backend>( // set : List elem, Int, elem -> List elem debug_assert!(args.len() == 3); + // Get the usize list length + let wrapper_ptr = build_arg(&args[0], env, scope, module, builder, procs); + let ptr_bytes = env.cfg.pointer_bytes() as u32; + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); + let _list_len = + builder + .ins() + .load(env.ptr_sized_int(), MemFlags::new(), wrapper_ptr, offset); + + // TODO do array bounds checking, and early return the original List if out of bounds + let (list_expr, list_layout) = &args[0]; let list_val = build_expr(env, scope, module, builder, list_expr, procs); @@ -802,17 +909,13 @@ fn call_malloc( env: &Env<'_>, module: &mut Module, builder: &mut FunctionBuilder, - size: usize, + size: Value, ) -> Value { // Declare malloc inside this function let local_func = module.declare_func_in_func(env.malloc, &mut builder.func); - // Convert the size argument to a Value - let ptr_size_type = module.target_config().pointer_type(); - let size_arg = builder.ins().iconst(ptr_size_type, size as i64); - // Call malloc and return the resulting pointer - let call = builder.ins().call(local_func, &[size_arg]); + let call = builder.ins().call(local_func, &[size]); let results = builder.inst_results(call); debug_assert!(results.len() == 1); @@ -850,3 +953,77 @@ fn list_set_in_place<'a>( wrapper_ptr } + +fn clone_list( + env: &Env<'_>, + builder: &mut FunctionBuilder, + module: &mut Module, + src_wrapper_ptr: Value, + elem_layout: &Layout<'_>, +) -> Value { + let cfg = env.cfg; + let ptr_bytes = env.cfg.pointer_bytes() as u32; + + // Load the pointer we got to the wrapper struct + let elems_ptr = { + let offset = Offset32::new((Builtin::WRAPPER_PTR * ptr_bytes) as i32); + + builder + .ins() + .load(cfg.pointer_type(), MemFlags::new(), src_wrapper_ptr, offset) + }; + + // Get the usize list length + let list_len = { + let offset = Offset32::new((Builtin::WRAPPER_LEN * ptr_bytes) as i32); + + builder.ins().load( + env.ptr_sized_int(), + MemFlags::new(), + src_wrapper_ptr, + offset, + ) + }; + + // Calculate the number of bytes we'll need to allocate. + let elem_bytes = builder.ins().iconst( + env.ptr_sized_int(), + elem_layout.stack_size(cfg.pointer_bytes() as u32) as i64, + ); + let size = builder.ins().imul(elem_bytes, list_len); + + // Allocate space for the new array that we'll copy into. + let new_elems_ptr = call_malloc(env, module, builder, size); + + // Either memcpy or deep clone the array elements + if elem_layout.safe_to_memcpy() { + // Copy the bytes from the original array into the new + // one we just malloc'd. + // + // TODO how do we decide when to do the small memcpy vs the normal one? + builder.call_memcpy(env.cfg, new_elems_ptr, elems_ptr, size); + } else { + panic!("TODO Cranelift currently only knows how to clone list elements that are Copy."); + } + + // Create a fresh wrapper struct for the newly populated array + let ptr_bytes = cfg.pointer_bytes() as u32; + let slot = builder.create_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + ptr_bytes * Builtin::LIST_WORDS, + )); + + // Store the new pointer in slot 0 of the wrapper + builder + .ins() + .stack_store(new_elems_ptr, slot, Offset32::new(0)); + + // Store the length in slot 1 of the wrapper + builder + .ins() + .stack_store(list_len, slot, Offset32::new(ptr_bytes as i32)); + + builder + .ins() + .stack_addr(cfg.pointer_type(), slot, Offset32::new(0)) +} diff --git a/compiler/gen/src/crane/convert.rs b/compiler/gen/src/crane/convert.rs index 857cbdf8f9..36c94b80d8 100644 --- a/compiler/gen/src/crane/convert.rs +++ b/compiler/gen/src/crane/convert.rs @@ -10,12 +10,12 @@ pub fn type_from_layout(cfg: TargetFrontendConfig, layout: &Layout<'_>) -> Type use roc_mono::layout::Layout::*; match layout { - Pointer(_) | FunctionPointer(_, _) | Struct(_) | Tag(_) => cfg.pointer_type(), + FunctionPointer(_, _) | Struct(_) | Union(_) => cfg.pointer_type(), Builtin(builtin) => match builtin { Int64 => types::I64, Float64 => types::F64, - Bool(_, _) => types::B1, - Byte(_) => types::I8, + Bool => types::B1, + Byte => types::I8, Str | EmptyStr | Map(_, _) | EmptyMap | Set(_) | EmptySet | List(_) | EmptyList => { cfg.pointer_type() } diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index ecc8563073..921a19c74f 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -3,13 +3,13 @@ use bumpalo::Bump; use inkwell::builder::Builder; use inkwell::context::Context; use inkwell::module::{Linkage, Module}; -use inkwell::types::BasicTypeEnum; +use inkwell::types::{BasicTypeEnum, IntType}; use inkwell::values::BasicValueEnum::{self, *}; use inkwell::values::{FunctionValue, IntValue, PointerValue}; use inkwell::{AddressSpace, FloatPredicate, IntPredicate}; use crate::llvm::convert::{ - basic_type_from_layout, collection_wrapper, get_array_type, get_fn_type, + basic_type_from_layout, collection_wrapper, get_array_type, get_fn_type, ptr_int, }; use roc_collections::all::ImMap; use roc_module::symbol::{Interns, Symbol}; @@ -32,7 +32,13 @@ pub struct Env<'a, 'ctx, 'env> { pub builder: &'env Builder<'ctx>, pub module: &'ctx Module<'ctx>, pub interns: Interns, - pub pointer_bytes: u32, + pub ptr_bytes: u32, +} + +impl<'a, 'ctx, 'env> Env<'a, 'ctx, 'env> { + pub fn ptr_int(&self) -> IntType<'ctx> { + ptr_int(self.context, self.ptr_bytes) + } } pub fn build_expr<'a, 'ctx, 'env>( @@ -75,7 +81,8 @@ pub fn build_expr<'a, 'ctx, 'env>( ret_layout, cond_layout, } => { - let ret_type = basic_type_from_layout(env.arena, env.context, &ret_layout); + let ret_type = + basic_type_from_layout(env.arena, env.context, &ret_layout, env.ptr_bytes); let switch_args = SwitchArgs { cond_layout: cond_layout.clone(), cond_expr: cond, @@ -92,7 +99,7 @@ pub fn build_expr<'a, 'ctx, 'env>( for (symbol, layout, expr) in stores.iter() { let val = build_expr(env, &scope, parent, &expr, procs); - let expr_bt = basic_type_from_layout(env.arena, context, &layout); + let expr_bt = basic_type_from_layout(env.arena, context, &layout, env.ptr_bytes); let alloca = create_entry_block_alloca( env, parent, @@ -122,14 +129,14 @@ pub fn build_expr<'a, 'ctx, 'env>( panic!("TODO create a phi node for &&"); } _ => { - let mut arg_vals: Vec = + let mut arg_tuples: Vec<(BasicValueEnum, &'a Layout<'a>)> = Vec::with_capacity_in(args.len(), env.arena); - for (arg, _layout) in args.iter() { - arg_vals.push(build_expr(env, scope, parent, arg, procs)); + for (arg, layout) in args.iter() { + arg_tuples.push((build_expr(env, scope, parent, arg, procs), layout)); } - call_with_args(*symbol, arg_vals.into_bump_slice(), env) + call_with_args(*symbol, arg_tuples.into_bump_slice(), env) } }, FunctionPointer(symbol) => { @@ -165,7 +172,6 @@ pub fn build_expr<'a, 'ctx, 'env>( .left() .unwrap_or_else(|| panic!("LLVM error: Invalid call by pointer.")) } - Load(symbol) => match scope.get(symbol) { Some((_, ptr)) => env .builder @@ -209,13 +215,13 @@ pub fn build_expr<'a, 'ctx, 'env>( } Array { elem_layout, elems } => { let ctx = env.context; - let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout); + let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes); let builder = env.builder; if elems.is_empty() { let array_type = get_array_type(&elem_type, 0); let ptr_type = array_type.ptr_type(AddressSpace::Generic); - let struct_type = collection_wrapper(ctx, ptr_type); + let struct_type = collection_wrapper(ctx, ptr_type, env.ptr_bytes); // The first field in the struct should be the pointer. let struct_val = builder @@ -230,11 +236,12 @@ pub fn build_expr<'a, 'ctx, 'env>( BasicValueEnum::StructValue(struct_val.into_struct_value()) } else { let len_u64 = elems.len() as u64; - let elem_bytes = elem_layout.stack_size(env.pointer_bytes) as u64; + let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; let ptr = { let bytes_len = elem_bytes * len_u64; - let len = ctx.i32_type().const_int(bytes_len, false); + let len_type = env.ptr_int(); + let len = len_type.const_int(bytes_len, false); env.builder .build_array_malloc(elem_type, len, "create_list_ptr") @@ -252,8 +259,8 @@ pub fn build_expr<'a, 'ctx, 'env>( } let ptr_val = BasicValueEnum::PointerValue(ptr); - let struct_type = collection_wrapper(ctx, ptr.get_type()); - let len = BasicValueEnum::IntValue(ctx.i32_type().const_int(len_u64, false)); + let struct_type = collection_wrapper(ctx, ptr.get_type(), env.ptr_bytes); + let len = BasicValueEnum::IntValue(env.ptr_int().const_int(len_u64, false)); let mut struct_val; // Field 0: pointer @@ -271,16 +278,6 @@ pub fn build_expr<'a, 'ctx, 'env>( .build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len") .unwrap(); - // Field 2: capacity (initially set to length) - struct_val = builder - .build_insert_value( - struct_val, - len, - Builtin::WRAPPER_CAPACITY, - "insert_capacity", - ) - .unwrap(); - BasicValueEnum::StructValue(struct_val.into_struct_value()) } } @@ -296,7 +293,8 @@ pub fn build_expr<'a, 'ctx, 'env>( for (field_expr, field_layout) in sorted_fields.iter() { let val = build_expr(env, &scope, parent, field_expr, procs); - let field_type = basic_type_from_layout(env.arena, env.context, &field_layout); + let field_type = + basic_type_from_layout(env.arena, env.context, &field_layout, env.ptr_bytes); field_types.push(field_type); field_vals.push(val); @@ -315,6 +313,157 @@ pub fn build_expr<'a, 'ctx, 'env>( BasicValueEnum::StructValue(struct_val.into_struct_value()) } + Tag { + union_size, + arguments, + .. + } if *union_size == 1 => { + let it = arguments.iter(); + + let ctx = env.context; + let builder = env.builder; + + // Determine types + let num_fields = arguments.len() + 1; + let mut field_types = Vec::with_capacity_in(num_fields, env.arena); + let mut field_vals = Vec::with_capacity_in(num_fields, env.arena); + + for (field_expr, field_layout) in it { + let val = build_expr(env, &scope, parent, field_expr, procs); + let field_type = + basic_type_from_layout(env.arena, env.context, &field_layout, env.ptr_bytes); + + field_types.push(field_type); + field_vals.push(val); + } + + // Create the struct_type + let struct_type = ctx.struct_type(field_types.into_bump_slice(), false); + let mut struct_val = struct_type.const_zero().into(); + + // Insert field exprs into struct_val + for (index, field_val) in field_vals.into_iter().enumerate() { + struct_val = builder + .build_insert_value(struct_val, field_val, index as u32, "insert_field") + .unwrap(); + } + + BasicValueEnum::StructValue(struct_val.into_struct_value()) + } + Tag { + arguments, + tag_layout, + union_size, + tag_id, + .. + } => { + let ptr_size = env.ptr_bytes; + + let whole_size = tag_layout.stack_size(ptr_size); + let mut filler = tag_layout.stack_size(ptr_size); + + let ctx = env.context; + let builder = env.builder; + + // Determine types + let num_fields = arguments.len() + 1; + let mut field_types = Vec::with_capacity_in(num_fields, env.arena); + let mut field_vals = Vec::with_capacity_in(num_fields, env.arena); + + // insert the discriminant value + if *union_size > 1 { + let val = env + .context + .i64_type() + .const_int(*tag_id as u64, true) + .into(); + + let field_type = env.context.i64_type().into(); + + field_types.push(field_type); + field_vals.push(val); + + let field_size = ptr_size; + filler -= field_size; + } + + for (field_expr, field_layout) in arguments.iter() { + let val = build_expr(env, &scope, parent, field_expr, procs); + let field_type = + basic_type_from_layout(env.arena, env.context, &field_layout, ptr_size); + + field_types.push(field_type); + field_vals.push(val); + + let field_size = field_layout.stack_size(ptr_size); + filler -= field_size; + } + + // TODO verify that this is required (better safe than sorry) + if filler > 0 { + field_types.push(env.context.i8_type().array_type(filler).into()); + } + + // Create the struct_type + let struct_type = ctx.struct_type(field_types.into_bump_slice(), false); + let mut struct_val = struct_type.const_zero().into(); + + // Insert field exprs into struct_val + for (index, field_val) in field_vals.into_iter().enumerate() { + struct_val = builder + .build_insert_value(struct_val, field_val, index as u32, "insert_field") + .unwrap(); + } + + // How we create tag values + // + // The memory layout of tags can be different. e.g. in + // + // [ Ok Int, Err Str ] + // + // the `Ok` tag stores a 64-bit integer, the `Err` tag stores a struct. + // All tags of a union must have the same length, for easy addressing (e.g. array lookups). + // So we need to ask for the maximum of all tag's sizes, even if most tags won't use + // all that memory, and certainly won't use it in the same way (the tags have fields of + // different types/sizes) + // + // In llvm, we must be explicit about the type of value we're creating: we can't just + // make a unspecified block of memory. So what we do is create a byte array of the + // desired size. Then when we know which tag we have (which is here, in this function), + // we need to cast that down to the array of bytes that llvm expects + // + // There is the bitcast instruction, but it doesn't work for arrays. So we need to jump + // through some hoops using store and load to get this to work: the array is put into a + // one-element struct, which can be cast to the desired type. + // + // This tricks comes from + // https://github.com/raviqqe/ssf/blob/bc32aae68940d5bddf5984128e85af75ca4f4686/ssf-llvm/src/expression_compiler.rs#L116 + + let array_type = ctx.i8_type().array_type(whole_size); + let struct_pointer = builder.build_alloca(array_type, "struct_poitner"); + + builder.build_store( + builder + .build_bitcast( + struct_pointer, + struct_type.ptr_type(inkwell::AddressSpace::Generic), + "", + ) + .into_pointer_value(), + struct_val, + ); + + let result = builder.build_load(struct_pointer, ""); + + // For unclear reasons, we can't cast an array to a struct on the other side. + // the solution is to wrap the array in a struct (yea...) + let wrapper_type = ctx.struct_type(&[array_type.into()], false); + let mut wrapper_val = wrapper_type.const_zero().into(); + wrapper_val = builder + .build_insert_value(wrapper_val, result, 0, "insert_field") + .unwrap(); + wrapper_val.into_struct_value().into() + } Access { label, field_layout, @@ -348,6 +497,73 @@ pub fn build_expr<'a, 'ctx, 'env>( .build_extract_value(struct_val, index, "field_access") .unwrap() } + AccessAtIndex { + index, + expr, + is_unwrapped, + .. + } if *is_unwrapped => { + let builder = env.builder; + + // Get Struct val + // Since this is a one-element tag union, we get the correct struct immediately + let argument = build_expr(env, &scope, parent, expr, procs).into_struct_value(); + + builder + .build_extract_value( + argument, + *index as u32, + env.arena.alloc(format!("tag_field_access_{}_", index)), + ) + .unwrap() + } + + AccessAtIndex { + index, + expr, + field_layouts, + .. + } => { + let builder = env.builder; + + // Determine types, assumes the descriminant is in the field layouts + let num_fields = field_layouts.len(); + let mut field_types = Vec::with_capacity_in(num_fields, env.arena); + let ptr_bytes = env.ptr_bytes; + + for field_layout in field_layouts.iter() { + let field_type = + basic_type_from_layout(env.arena, env.context, &field_layout, ptr_bytes); + field_types.push(field_type); + } + + // Create the struct_type + let struct_type = env + .context + .struct_type(field_types.into_bump_slice(), false); + + // cast the argument bytes into the desired shape for this tag + let argument = build_expr(env, &scope, parent, expr, procs).into_struct_value(); + let argument_pointer = builder.build_alloca(argument.get_type(), ""); + builder.build_store(argument_pointer, argument); + + let argument = builder + .build_load( + builder + .build_bitcast( + argument_pointer, + struct_type.ptr_type(inkwell::AddressSpace::Generic), + "", + ) + .into_pointer_value(), + "", + ) + .into_struct_value(); + + builder + .build_extract_value(argument, *index as u32, "") + .expect("desired field did not decode") + } _ => { panic!("I don't yet know how to LLVM build {:?}", expr); } @@ -369,7 +585,7 @@ fn build_branch2<'a, 'ctx, 'env>( procs: &Procs<'a>, ) -> BasicValueEnum<'ctx> { let ret_layout = cond.ret_layout; - let ret_type = basic_type_from_layout(env.arena, env.context, &ret_layout); + let ret_type = basic_type_from_layout(env.arena, env.context, &ret_layout, env.ptr_bytes); let cond_expr = build_expr(env, scope, parent, cond.cond, procs); @@ -433,10 +649,8 @@ fn build_switch<'a, 'ctx, 'env>( // they either need to all be i8, or i64 let int_val = match cond_layout { Layout::Builtin(Builtin::Int64) => context.i64_type().const_int(*int as u64, false), - Layout::Builtin(Builtin::Bool(_, _)) => { - context.bool_type().const_int(*int as u64, false) - } - Layout::Builtin(Builtin::Byte(_)) => context.i8_type().const_int(*int as u64, false), + Layout::Builtin(Builtin::Bool) => context.bool_type().const_int(*int as u64, false), + Layout::Builtin(Builtin::Byte) => context.i8_type().const_int(*int as u64, false), _ => panic!("Can't cast to cond_layout = {:?}", cond_layout), }; let block = context.append_basic_block(parent, format!("branch{}", int).as_str()); @@ -566,12 +780,12 @@ pub fn build_proc_header<'a, 'ctx, 'env>( let args = proc.args; let arena = env.arena; let context = &env.context; - let ret_type = basic_type_from_layout(arena, context, &proc.ret_layout); + let ret_type = basic_type_from_layout(arena, context, &proc.ret_layout, env.ptr_bytes); let mut arg_basic_types = Vec::with_capacity_in(args.len(), arena); let mut arg_symbols = Vec::new_in(arena); for (layout, arg_symbol) in args.iter() { - let arg_type = basic_type_from_layout(arena, env.context, &layout); + let arg_type = basic_type_from_layout(arena, env.context, &layout, env.ptr_bytes); arg_basic_types.push(arg_type); arg_symbols.push(arg_symbol); @@ -639,7 +853,7 @@ pub fn verify_fn(fn_val: FunctionValue<'_>) { #[allow(clippy::cognitive_complexity)] fn call_with_args<'a, 'ctx, 'env>( symbol: Symbol, - args: &[BasicValueEnum<'ctx>], + args: &[(BasicValueEnum<'ctx>, &'a Layout<'a>)], env: &Env<'a, 'ctx, 'env>, ) -> BasicValueEnum<'ctx> { match symbol { @@ -647,8 +861,8 @@ fn call_with_args<'a, 'ctx, 'env>( debug_assert!(args.len() == 2); let int_val = env.builder.build_int_add( - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "add_i64", ); @@ -658,8 +872,8 @@ fn call_with_args<'a, 'ctx, 'env>( debug_assert!(args.len() == 2); let float_val = env.builder.build_float_add( - args[0].into_float_value(), - args[1].into_float_value(), + args[0].0.into_float_value(), + args[1].0.into_float_value(), "add_f64", ); @@ -669,8 +883,8 @@ fn call_with_args<'a, 'ctx, 'env>( debug_assert!(args.len() == 2); let int_val = env.builder.build_int_sub( - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "sub_I64", ); @@ -680,8 +894,8 @@ fn call_with_args<'a, 'ctx, 'env>( debug_assert!(args.len() == 2); let float_val = env.builder.build_float_sub( - args[0].into_float_value(), - args[1].into_float_value(), + args[0].0.into_float_value(), + args[1].0.into_float_value(), "sub_f64", ); @@ -691,8 +905,8 @@ fn call_with_args<'a, 'ctx, 'env>( debug_assert!(args.len() == 2); let int_val = env.builder.build_int_mul( - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "mul_i64", ); @@ -703,29 +917,26 @@ fn call_with_args<'a, 'ctx, 'env>( let int_val = env .builder - .build_int_neg(args[0].into_int_value(), "negate_i64"); + .build_int_neg(args[0].0.into_int_value(), "negate_i64"); BasicValueEnum::IntValue(int_val) } Symbol::LIST_LEN => { debug_assert!(args.len() == 1); - let wrapper_struct = args[0].into_struct_value(); + let wrapper_struct = args[0].0.into_struct_value(); let builder = env.builder; - // Get the 32-bit int length - let i32_val = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value(); - - // cast the 32-bit length to a 64-bit int - BasicValueEnum::IntValue(builder.build_int_cast(i32_val, env.context.i64_type(), "i32_to_i64")) + // Get the usize int length + builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value().into() } Symbol::LIST_IS_EMPTY => { debug_assert!(args.len() == 1); - let list_struct = args[0].into_struct_value(); + let list_struct = args[0].0.into_struct_value(); let builder = env.builder; let list_len = builder.build_extract_value(list_struct, 1, "unwrapped_list_len").unwrap().into_int_value(); - let zero = env.context.i32_type().const_zero(); + let zero = env.ptr_int().const_zero(); let answer = builder.build_int_compare(IntPredicate::EQ, list_len, zero, "is_zero"); BasicValueEnum::IntValue(answer) @@ -735,8 +946,8 @@ fn call_with_args<'a, 'ctx, 'env>( let int_val = env.builder.build_int_compare( IntPredicate::EQ, - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "cmp_i64", ); @@ -747,8 +958,8 @@ fn call_with_args<'a, 'ctx, 'env>( let int_val = env.builder.build_int_compare( IntPredicate::EQ, - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "cmp_i1", ); @@ -759,8 +970,8 @@ fn call_with_args<'a, 'ctx, 'env>( let int_val = env.builder.build_int_compare( IntPredicate::EQ, - args[0].into_int_value(), - args[1].into_int_value(), + args[0].0.into_int_value(), + args[1].0.into_int_value(), "cmp_i8", ); @@ -771,8 +982,8 @@ fn call_with_args<'a, 'ctx, 'env>( let int_val = env.builder.build_float_compare( FloatPredicate::OEQ, - args[0].into_float_value(), - args[1].into_float_value(), + args[0].0.into_float_value(), + args[1].0.into_float_value(), "cmp_f64", ); @@ -784,36 +995,45 @@ fn call_with_args<'a, 'ctx, 'env>( // List.get : List elem, Int -> Result elem [ OutOfBounds ]* debug_assert!(args.len() == 2); - let wrapper_struct = args[0].into_struct_value(); - let elem_index = args[1].into_int_value(); + let (_list_expr, list_layout) = &args[0]; - // Slot 1 in the wrapper struct is the length + let wrapper_struct = args[0].0.into_struct_value(); + let elem_index = args[1].0.into_int_value(); + + // Get the length from the wrapper struct let _list_len = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value(); // TODO here, check to see if the requested index exceeds the length of the array. - // Slot 0 in the wrapper struct is the pointer to the array data - let array_data_ptr = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "unwrapped_list_ptr").unwrap().into_pointer_value(); + match list_layout { + Layout::Builtin(Builtin::List(elem_layout)) => { + // Get the pointer to the array data + let array_data_ptr = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "unwrapped_list_ptr").unwrap().into_pointer_value(); - let elem_bytes = 8; // TODO Look this size up instead of hardcoding it! - let elem_size = env.context.i64_type().const_int(elem_bytes, false); + let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; + let elem_size = env.context.i64_type().const_int(elem_bytes, false); - // Calculate the offset at runtime by multiplying the index by the size of an element. - let offset_bytes = builder.build_int_mul(elem_index, elem_size, "mul_offset"); + // Calculate the offset at runtime by multiplying the index by the size of an element. + let offset_bytes = builder.build_int_mul(elem_index, elem_size, "mul_offset"); - // We already checked the bounds earlier. - let elem_ptr = unsafe { builder.build_in_bounds_gep(array_data_ptr, &[offset_bytes], "elem") }; + // We already checked the bounds earlier. + let elem_ptr = unsafe { builder.build_in_bounds_gep(array_data_ptr, &[offset_bytes], "elem") }; - builder.build_load(elem_ptr, "List.get") + builder.build_load(elem_ptr, "List.get") + } + _ => { + unreachable!("Invalid List layout for List.get: {:?}", list_layout); + } + } } Symbol::LIST_SET /* TODO clone first for LIST_SET! */ | Symbol::LIST_SET_IN_PLACE => { let builder = env.builder; debug_assert!(args.len() == 3); - let wrapper_struct = args[0].into_struct_value(); - let elem_index = args[1].into_int_value(); - let elem = args[2]; + let wrapper_struct = args[0].0.into_struct_value(); + let elem_index = args[1].0.into_int_value(); + let (elem, elem_layout) = args[2]; // Slot 1 in the wrapper struct is the length let _list_len = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "unwrapped_list_len").unwrap().into_int_value(); @@ -824,7 +1044,7 @@ fn call_with_args<'a, 'ctx, 'env>( // Slot 0 in the wrapper struct is the pointer to the array data let array_data_ptr = builder.build_extract_value(wrapper_struct, Builtin::WRAPPER_PTR, "unwrapped_list_ptr").unwrap().into_pointer_value(); - let elem_bytes = 8; // TODO Look this size up instead of hardcoding it! + let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; let elem_size = env.context.i64_type().const_int(elem_bytes, false); // Calculate the offset at runtime by multiplying the index by the size of an element. @@ -845,7 +1065,13 @@ fn call_with_args<'a, 'ctx, 'env>( .get_function(symbol.ident_string(&env.interns)) .unwrap_or_else(|| panic!("Unrecognized function: {:?}", symbol)); - let call = env.builder.build_call(fn_val, args, "tmp"); + let mut arg_vals: Vec = Vec::with_capacity_in(args.len(), env.arena); + + for (arg, _layout) in args.iter() { + arg_vals.push(*arg); + } + + let call = env.builder.build_call(fn_val, arg_vals.into_bump_slice(), "call"); call.try_as_basic_value() .left() diff --git a/compiler/gen/src/llvm/convert.rs b/compiler/gen/src/llvm/convert.rs index 2c8c16dd6e..0d6b558b14 100644 --- a/compiler/gen/src/llvm/convert.rs +++ b/compiler/gen/src/llvm/convert.rs @@ -2,7 +2,7 @@ use bumpalo::collections::Vec; use bumpalo::Bump; use inkwell::context::Context; use inkwell::types::BasicTypeEnum::{self, *}; -use inkwell::types::{ArrayType, BasicType, FunctionType, PointerType, StructType}; +use inkwell::types::{ArrayType, BasicType, FunctionType, IntType, PointerType, StructType}; use inkwell::AddressSpace; use roc_mono::layout::Layout; @@ -38,17 +38,20 @@ pub fn basic_type_from_layout<'ctx>( arena: &Bump, context: &'ctx Context, layout: &Layout<'_>, + ptr_bytes: u32, ) -> BasicTypeEnum<'ctx> { use roc_mono::layout::Builtin::*; use roc_mono::layout::Layout::*; match layout { FunctionPointer(args, ret_layout) => { - let ret_type = basic_type_from_layout(arena, context, &ret_layout); + let ret_type = basic_type_from_layout(arena, context, &ret_layout, ptr_bytes); let mut arg_basic_types = Vec::with_capacity_in(args.len(), arena); for arg_layout in args.iter() { - arg_basic_types.push(basic_type_from_layout(arena, context, arg_layout)); + arg_basic_types.push(basic_type_from_layout( + arena, context, arg_layout, ptr_bytes, + )); } let fn_type = get_fn_type(&ret_type, arg_basic_types.into_bump_slice()); @@ -61,24 +64,50 @@ pub fn basic_type_from_layout<'ctx>( let mut field_types = Vec::with_capacity_in(sorted_fields.len(), arena); for (_, field_layout) in sorted_fields.iter() { - field_types.push(basic_type_from_layout(arena, context, field_layout)); + field_types.push(basic_type_from_layout( + arena, + context, + field_layout, + ptr_bytes, + )); } context .struct_type(field_types.into_bump_slice(), false) .as_basic_type_enum() } - Tag(_fields) => { - panic!("TODO layout_to_basic_type for Tag"); + Union(tags) if tags.len() == 1 => { + let layouts = tags.iter().next().unwrap(); + + // Determine types + let mut field_types = Vec::with_capacity_in(layouts.len(), arena); + + for layout in layouts.iter() { + field_types.push(basic_type_from_layout(arena, context, layout, ptr_bytes)); + } + + context + .struct_type(field_types.into_bump_slice(), false) + .as_basic_type_enum() } - Pointer(_layout) => { - panic!("TODO layout_to_basic_type for Pointer"); + Union(_) => { + // TODO make this dynamic + let ptr_size = std::mem::size_of::(); + let union_size = layout.stack_size(ptr_size as u32); + + let array_type = context + .i8_type() + .array_type(union_size) + .as_basic_type_enum(); + + context.struct_type(&[array_type], false).into() } + Builtin(builtin) => match builtin { Int64 => context.i64_type().as_basic_type_enum(), Float64 => context.f64_type().as_basic_type_enum(), - Bool(_, _) => context.bool_type().as_basic_type_enum(), - Byte(_) => context.i8_type().as_basic_type_enum(), + Bool => context.bool_type().as_basic_type_enum(), + Byte => context.i8_type().as_basic_type_enum(), Str | EmptyStr => context .i8_type() .ptr_type(AddressSpace::Generic) @@ -86,17 +115,17 @@ pub fn basic_type_from_layout<'ctx>( Map(_, _) | EmptyMap => panic!("TODO layout_to_basic_type for Builtin::Map"), Set(_) | EmptySet => panic!("TODO layout_to_basic_type for Builtin::Set"), List(elem_layout) => { - let ptr_type = basic_type_from_layout(arena, context, elem_layout) + let ptr_type = basic_type_from_layout(arena, context, elem_layout, ptr_bytes) .ptr_type(AddressSpace::Generic); - collection_wrapper(context, ptr_type).into() + collection_wrapper(context, ptr_type, ptr_bytes).into() } EmptyList => { let array_type = get_array_type(&context.opaque_struct_type("empty_list_elem").into(), 0); let ptr_type = array_type.ptr_type(AddressSpace::Generic); - collection_wrapper(context, ptr_type).into() + collection_wrapper(context, ptr_type, ptr_bytes).into() } }, } @@ -106,9 +135,24 @@ pub fn basic_type_from_layout<'ctx>( pub fn collection_wrapper<'ctx>( ctx: &'ctx Context, ptr_type: PointerType<'ctx>, + ptr_bytes: u32, ) -> StructType<'ctx> { let ptr_type_enum = BasicTypeEnum::PointerType(ptr_type); - let u32_type = BasicTypeEnum::IntType(ctx.i32_type()); + let len_type = BasicTypeEnum::IntType(ptr_int(ctx, ptr_bytes)); - ctx.struct_type(&[ptr_type_enum, u32_type, u32_type], false) + ctx.struct_type(&[ptr_type_enum, len_type], false) +} + +pub fn ptr_int(ctx: &Context, ptr_bytes: u32) -> IntType<'_> { + match ptr_bytes { + 1 => ctx.i8_type(), + 2 => ctx.i16_type(), + 4 => ctx.i32_type(), + 8 => ctx.i64_type(), + 16 => ctx.i128_type(), + _ => panic!( + "Invalid target: Roc does't support compiling to {}-bit systems.", + ptr_bytes * 8 + ), + } } diff --git a/compiler/gen/tests/test_gen.rs b/compiler/gen/tests/test_gen.rs index 944a780945..6636d4d572 100644 --- a/compiler/gen/tests/test_gen.rs +++ b/compiler/gen/tests/test_gen.rs @@ -77,6 +77,7 @@ mod test_gen { // Populate Procs and Subs, and get the low-level Expr from the canonical Expr let mono_expr = Expr::new(&arena, &mut subs, loc_expr.value, &mut procs, home, &mut ident_ids, POINTER_SIZE); + // Put this module's ident_ids back in the interns env.interns.all_ident_ids.insert(home, ident_ids); @@ -147,7 +148,7 @@ mod test_gen { builder.finalize(); } - module.define_function(main_fn, &mut ctx).expect("declare main"); + module.define_function(main_fn, &mut ctx).expect("crane declare main"); module.clear_context(&mut ctx); // Perform linking @@ -200,16 +201,16 @@ mod test_gen { // Compute main_fn_type before moving subs to Env let layout = Layout::from_content(&arena, content, &subs, POINTER_SIZE) .unwrap_or_else(|err| panic!("Code gen error in test: could not convert to layout. Err was {:?} and Subs were {:?}", err, subs)); - let main_fn_type = basic_type_from_layout(&arena, &context, &layout) - .fn_type(&[], false); - let main_fn_name = "$Test.main"; - let execution_engine = module .create_jit_execution_engine(OptimizationLevel::None) .expect("Error creating JIT execution engine for test"); - let pointer_bytes = execution_engine.get_target_data().get_pointer_byte_size(None); + let ptr_bytes = execution_engine.get_target_data().get_pointer_byte_size(None); + + let main_fn_type = basic_type_from_layout(&arena, &context, &layout, ptr_bytes) + .fn_type(&[], false); + let main_fn_name = "$Test.main"; // Compile and add all the Procs before adding main let mut env = roc_gen::llvm::build::Env { @@ -218,7 +219,7 @@ mod test_gen { context: &context, interns, module: arena.alloc(module), - pointer_bytes + ptr_bytes }; let mut procs = Procs::default(); let mut ident_ids = env.interns.all_ident_ids.remove(&home).unwrap(); @@ -226,6 +227,8 @@ mod test_gen { // Populate Procs and get the low-level Expr from the canonical Expr let main_body = Expr::new(&arena, &mut subs, loc_expr.value, &mut procs, home, &mut ident_ids, POINTER_SIZE); + dbg!(&main_body); + // Put this module's ident_ids back in the interns, so we can use them in Env. env.interns.all_ident_ids.insert(home, ident_ids); @@ -335,16 +338,16 @@ mod test_gen { // Compute main_fn_type before moving subs to Env let layout = Layout::from_content(&arena, content, &subs, POINTER_SIZE) .unwrap_or_else(|err| panic!("Code gen error in test: could not convert to layout. Err was {:?} and Subs were {:?}", err, subs)); - let main_fn_type = basic_type_from_layout(&arena, &context, &layout) - .fn_type(&[], false); - let main_fn_name = "$Test.main"; let execution_engine = module .create_jit_execution_engine(OptimizationLevel::None) .expect("Error creating JIT execution engine for test"); - let pointer_bytes = execution_engine.get_target_data().get_pointer_byte_size(None); + let ptr_bytes = execution_engine.get_target_data().get_pointer_byte_size(None); + let main_fn_type = basic_type_from_layout(&arena, &context, &layout, ptr_bytes) + .fn_type(&[], false); + let main_fn_name = "$Test.main"; // Compile and add all the Procs before adding main let mut env = roc_gen::llvm::build::Env { @@ -353,7 +356,7 @@ mod test_gen { context: &context, interns, module: arena.alloc(module), - pointer_bytes + ptr_bytes }; let mut procs = Procs::default(); let mut ident_ids = env.interns.all_ident_ids.remove(&home).unwrap(); @@ -550,7 +553,26 @@ mod test_gen { } #[test] - fn get_shared_int_list() { + fn set_shared_int_list() { + assert_crane_evals_to!( + indoc!( + r#" + shared = [ 2, 4 ] + + # This should not mutate the original + x = List.set shared 1 77 + + List.getUnsafe shared 1 + "# + ), + 4, + i64, + |x| x + ); + } + + #[test] + fn get_unique_int_list() { assert_evals_to!( indoc!( r#" @@ -594,6 +616,23 @@ mod test_gen { ); } + // doesn't work yet. The condition must be cast to an integer to use a jump table + // #[test] + // fn branch_third_float() { + // assert_evals_to!( + // indoc!( + // r#" + // when 10.0 is + // 1.0 -> 63 + // 2 -> 48 + // _ -> 112 + // "# + // ), + // 112.0, + // f64 + // ); + // } + #[test] fn branch_first_int() { assert_evals_to!( @@ -624,6 +663,85 @@ mod test_gen { ); } + #[test] + fn branch_third_int() { + assert_evals_to!( + indoc!( + r#" + when 10 is + 1 -> 63 + 2 -> 48 + _ -> 112 + "# + ), + 112, + i64 + ); + } + + #[test] + fn branch_store_variable() { + assert_evals_to!( + indoc!( + r#" + when 0 is + 1 -> 12 + a -> a + "# + ), + 0, + i64 + ); + } + + #[test] + fn one_element_tag() { + assert_evals_to!( + indoc!( + r#" + x : [ Pair Int ] + x = Pair 2 + + 0x3 + "# + ), + 3, + i64 + ); + } + + #[test] + fn when_one_element_tag() { + assert_evals_to!( + indoc!( + r#" + x : [ Pair Int Int ] + x = Pair 0x2 0x3 + + when x is + Pair l r -> l + r + "# + ), + 5, + i64 + ); + } + + #[test] + fn twice_record_access() { + assert_evals_to!( + indoc!( + r#" + x = {a: 0x2, b: 0x3 } + + x.a + x.b + "# + ), + 5, + i64 + ); + } + #[test] fn gen_when_one_branch() { assert_evals_to!( @@ -1065,6 +1183,136 @@ mod test_gen { ); } + #[test] + fn applied_tag_nothing() { + assert_evals_to!( + indoc!( + r#" + Maybe a : [ Just a, Nothing ] + + x : Maybe Int + x = Nothing + + 0x1 + "# + ), + 1, + i64 + ); + } + #[test] + fn applied_tag_just() { + assert_evals_to!( + indoc!( + r#" + Maybe a : [ Just a, Nothing ] + + y : Maybe Int + y = Just 0x4 + + 0x1 + "# + ), + 1, + i64 + ); + } + + // + // #[test] + // fn applied_tag_just_unit() { + // assert_evals_to!( + // indoc!( + // r#" + // Fruit : [ Orange, Apple, Banana ] + // Maybe a : [ Just a, Nothing ] + // + // orange : Fruit + // orange = Orange + // + // y : Maybe Fruit + // y = Just orange + // + // 0x1 + // "# + // ), + // 1, + // i64 + // ); + // } + + #[test] + fn when_on_nothing() { + assert_evals_to!( + indoc!( + r#" + x : [ Nothing, Just Int ] + x = Nothing + + when x is + Nothing -> 0x2 + Just _ -> 0x1 + "# + ), + 2, + i64 + ); + } + + // #[test] + // fn when_on_just() { + // assert_evals_to!( + // indoc!( + // r#" + // x : [ Nothing, Just Int ] + // x = Just 41 + // + // case x of + // Just v -> v + 0x1 + // Nothing -> 0x1 + // "# + // ), + // 42, + // i64 + // ); + // } + + #[test] + fn when_on_result() { + assert_evals_to!( + indoc!( + r#" + x : Result Int Int + x = Err 41 + + when x is + Err v -> v + 1 + Ok _ -> 1 + "# + ), + 42, + i64 + ); + } + + #[test] + fn when_on_these() { + assert_evals_to!( + indoc!( + r#" + x : [ This Int, These Int Int ] + x = These 0x3 0x2 + + when x is + These a b -> a + b + This v -> v + "# + ), + 5, + i64 + ); + } + #[test] fn basic_record() { assert_evals_to!( @@ -1174,5 +1422,17 @@ mod test_gen { 19, i64 ); + + assert_evals_to!( + indoc!( + r#" + rec = { x: 15, y: 17, z: 19 } + + rec.z + rec.x + "# + ), + 34, + i64 + ); } } diff --git a/compiler/mono/src/decision_tree.rs b/compiler/mono/src/decision_tree.rs new file mode 100644 index 0000000000..f85df484e4 --- /dev/null +++ b/compiler/mono/src/decision_tree.rs @@ -0,0 +1,1137 @@ +use crate::expr::Env; +use crate::expr::Expr; +use crate::expr::Pattern; +use roc_collections::all::{MutMap, MutSet}; +use roc_module::ident::TagName; +use roc_module::symbol::Symbol; + +use crate::layout::Builtin; +use crate::layout::Layout; + +/// COMPILE CASES + +type Label = u64; + +/// Users of this module will mainly interact with this function. It takes +/// some normal branches and gives out a decision tree that has "labels" at all +/// the leafs and a dictionary that maps these "labels" to the code that should +/// run. +pub fn compile(raw_branches: Vec<(Pattern<'_>, u64)>) -> DecisionTree { + let formatted = raw_branches + .into_iter() + .map(|(pattern, index)| Branch { + goal: index, + patterns: vec![(Path::Empty, pattern)], + }) + .collect(); + + to_decision_tree(formatted) +} + +#[derive(Clone, Debug, PartialEq)] +pub enum DecisionTree<'a> { + Match(Label), + Decision { + path: Path, + edges: Vec<(Test<'a>, DecisionTree<'a>)>, + default: Option>>, + }, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Test<'a> { + IsCtor { + tag_id: u8, + tag_name: TagName, + union: crate::pattern::Union, + arguments: Vec<(Pattern<'a>, Layout<'a>)>, + }, + IsInt(i64), + // float patterns are stored as u64 so they are comparable/hashable + IsFloat(u64), + IsStr(Box), + IsBit(bool), + IsByte { + tag_id: u8, + num_alts: usize, + }, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Path { + Index { index: u64, path: Box }, + Unbox(Box), + Empty, +} + +// ACTUALLY BUILD DECISION TREES + +#[derive(Clone, Debug, PartialEq)] +struct Branch<'a> { + goal: Label, + patterns: Vec<(Path, Pattern<'a>)>, +} + +fn to_decision_tree(raw_branches: Vec) -> DecisionTree { + let branches: Vec<_> = raw_branches.into_iter().map(flatten_patterns).collect(); + + match check_for_match(&branches) { + Some(goal) => DecisionTree::Match(goal), + None => { + // TODO remove clone + let path = pick_path(branches.clone()); + + let (edges, fallback) = gather_edges(branches, &path); + + let mut decision_edges: Vec<_> = edges + .into_iter() + .map(|(a, b)| (a, to_decision_tree(b))) + .collect(); + + match (decision_edges.split_last_mut(), fallback.split_last()) { + (Some(((_tag, decision_tree), rest)), None) if rest.is_empty() => { + // TODO remove clone + decision_tree.clone() + } + (_, None) => DecisionTree::Decision { + path, + edges: decision_edges, + default: None, + }, + (None, Some(_)) => to_decision_tree(fallback), + _ => DecisionTree::Decision { + path, + edges: decision_edges, + default: Some(Box::new(to_decision_tree(fallback))), + }, + } + } + } +} + +fn is_complete(tests: &[Test]) -> bool { + let length = tests.len(); + debug_assert!(length > 0); + match tests.get(length - 1) { + None => unreachable!("should never happen"), + Some(v) => match v { + Test::IsCtor { union, .. } => length == union.alternatives.len(), + Test::IsByte { num_alts, .. } => length == *num_alts, + Test::IsBit(_) => length == 2, + Test::IsInt(_) => false, + Test::IsFloat(_) => false, + Test::IsStr(_) => false, + }, + } +} + +fn flatten_patterns(branch: Branch) -> Branch { + let mut result = Vec::with_capacity(branch.patterns.len()); + + for path_pattern in branch.patterns { + flatten(path_pattern, &mut result); + } + Branch { + goal: branch.goal, + patterns: result, + } +} + +fn flatten<'a>(path_pattern: (Path, Pattern<'a>), path_patterns: &mut Vec<(Path, Pattern<'a>)>) { + match &path_pattern.1 { + Pattern::AppliedTag { union, .. } => { + if union.alternatives.len() == 1 { + // case map dearg ctorArgs of + // [arg] -> + // flatten (Unbox path, arg) otherPathPatterns + // + // args -> + // foldr flatten otherPathPatterns (subPositions path args) + // subPositions :: Path -> [Can.Pattern] -> [(Path, Can.Pattern)] + // subPositions path patterns = + // Index.indexedMap (\index pattern -> (Index index path, pattern)) patterns + // + // + // dearg :: Can.PatternCtorArg -> Can.Pattern + // dearg (Can.PatternCtorArg _ _ pattern) = + // pattern + + todo!() + } else { + path_patterns.push(path_pattern); + } + } + + _ => { + path_patterns.push(path_pattern); + } + } +} + +/// SUCCESSFULLY MATCH + +/// If the first branch has no more "decision points" we can finally take that +/// path. If that is the case we give the resulting label and a mapping from free +/// variables to "how to get their value". So a pattern like (Just (x,_)) will give +/// us something like ("x" => value.0.0) +fn check_for_match(branches: &Vec) -> Option