From e8530eaca515cc2362cd6b744d814220f3f7606c Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 8 Jul 2022 22:13:13 +0200 Subject: [PATCH] improve lowlevel unwrapping --- crates/compiler/alias_analysis/src/lib.rs | 2 +- crates/compiler/builtins/bitcode/src/str.zig | 39 ++- crates/compiler/builtins/roc/List.roc | 4 + crates/compiler/builtins/roc/Str.roc | 8 +- crates/compiler/builtins/src/bitcode.rs | 1 - crates/compiler/can/src/builtins.rs | 5 +- crates/compiler/gen_llvm/src/llvm/build.rs | 54 ++-- .../compiler/gen_llvm/src/llvm/build_str.rs | 68 +--- crates/compiler/gen_wasm/src/low_level.rs | 10 +- crates/compiler/module/src/low_level.rs | 298 ++++++++++-------- crates/compiler/module/src/symbol.rs | 1 + crates/compiler/mono/src/borrow.rs | 5 +- 12 files changed, 249 insertions(+), 246 deletions(-) diff --git a/crates/compiler/alias_analysis/src/lib.rs b/crates/compiler/alias_analysis/src/lib.rs index 2fffcf4654..0994e70701 100644 --- a/crates/compiler/alias_analysis/src/lib.rs +++ b/crates/compiler/alias_analysis/src/lib.rs @@ -1075,7 +1075,7 @@ fn lowlevel_spec( builder.add_make_tuple(block, &[cell, bag]) } - StrFromUtf8 => { + StrFromUtf8Range => { let list = env.symbols[&arguments[0]]; let cell = builder.add_get_tuple_field(block, list, LIST_CELL_INDEX)?; diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index d9e81a1161..13f015e31d 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -1638,18 +1638,45 @@ inline fn fromUtf8(arg: RocList, update_mode: UpdateMode) FromUtf8Result { } } -pub fn fromUtf8RangeC(output: *FromUtf8Result, arg: RocList, countAndStart: CountAndStart) callconv(.C) void { - output.* = @call(.{ .modifier = always_inline }, fromUtf8Range, .{ arg, countAndStart }); +pub fn fromUtf8RangeC( + output: *FromUtf8Result, + list: RocList, + start: usize, + count: usize, + update_mode: UpdateMode, +) callconv(.C) void { + output.* = @call(.{ .modifier = always_inline }, fromUtf8Range, .{ list, start, count, update_mode }); } -fn fromUtf8Range(arg: RocList, countAndStart: CountAndStart) FromUtf8Result { - const bytes = @ptrCast([*]const u8, arg.bytes)[countAndStart.start..countAndStart.count]; +pub fn fromUtf8Range(arg: RocList, start: usize, count: usize, update_mode: UpdateMode) FromUtf8Result { + const bytes = @ptrCast([*]const u8, arg.bytes)[start..count]; if (unicode.utf8ValidateSlice(bytes)) { // the output will be correct. Now we need to clone the input - const string = RocStr.init(@ptrCast([*]const u8, bytes), countAndStart.count); - return FromUtf8Result{ .is_ok = true, .string = string, .byte_index = 0, .problem_code = Utf8ByteProblem.InvalidStartByte }; + if (count == arg.len() and count > SMALL_STR_MAX_LENGTH) { + const byte_list = arg.makeUniqueExtra(RocStr.alignment, @sizeOf(u8), update_mode); + + const string = RocStr{ + .str_bytes = byte_list.bytes, + .str_len = byte_list.length, + .str_capacity = byte_list.capacity, + }; + + return FromUtf8Result{ + .is_ok = true, + .string = string, + .byte_index = 0, + .problem_code = Utf8ByteProblem.InvalidStartByte, + }; + } else { + return FromUtf8Result{ + .is_ok = true, + .string = RocStr.init(@ptrCast([*]const u8, bytes), count), + .byte_index = 0, + .problem_code = Utf8ByteProblem.InvalidStartByte, + }; + } } else { const temp = errorToProblem(@ptrCast([*]u8, arg.bytes), arg.length); return FromUtf8Result{ .is_ok = false, .string = RocStr.empty(), .byte_index = temp.index, .problem_code = temp.problem }; diff --git a/crates/compiler/builtins/roc/List.roc b/crates/compiler/builtins/roc/List.roc index 789e1e5c2b..b822169c98 100644 --- a/crates/compiler/builtins/roc/List.roc +++ b/crates/compiler/builtins/roc/List.roc @@ -816,6 +816,10 @@ findIndex = \list, matcher -> ## ## Some languages have a function called **`slice`** which works similarly to this. sublist : List elem, { start : Nat, len : Nat } -> List elem +sublist = \list, config -> + sublistLowlevel list config.start config.len + +sublistLowlevel : List elem, Nat, Nat -> List elem ## Intersperses `sep` between the elements of `list` ## >>> List.intersperse 9 [1, 2, 3] # [1, 9, 2, 9, 3] diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index 43fc817742..8650dc6d16 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -199,11 +199,9 @@ toScalars : Str -> List U32 ## >>> Str.toUtf8 "🐦" toUtf8 : Str -> List U8 -# fromUtf8 : List U8 -> Result Str [BadUtf8 Utf8Problem]* -# fromUtf8Range : List U8 -> Result Str [BadUtf8 Utf8Problem Nat, OutOfBounds]* fromUtf8 : List U8 -> Result Str [BadUtf8 Utf8ByteProblem Nat]* fromUtf8 = \bytes -> - result = fromUtf8RangeLowlevel bytes { start: 0, count: List.len bytes } + result = fromUtf8RangeLowlevel bytes 0 (List.len bytes) if result.cIsOk then Ok result.bString @@ -213,7 +211,7 @@ fromUtf8 = \bytes -> fromUtf8Range : List U8, { start : Nat, count : Nat } -> Result Str [BadUtf8 Utf8ByteProblem Nat, OutOfBounds]* fromUtf8Range = \bytes, config -> if config.start + config.count <= List.len bytes then - result = fromUtf8RangeLowlevel bytes config + result = fromUtf8RangeLowlevel bytes config.start config.count if result.cIsOk then Ok result.bString @@ -229,7 +227,7 @@ FromUtf8Result : { dProblemCode : Utf8ByteProblem, } -fromUtf8RangeLowlevel : List U8, { start : Nat, count : Nat } -> FromUtf8Result +fromUtf8RangeLowlevel : List U8, Nat, Nat -> FromUtf8Result startsWith : Str, Str -> Bool endsWith : Str, Str -> Bool diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index f044bd8bfc..07bea0a5a6 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -326,7 +326,6 @@ pub const STR_TO_DECIMAL: &str = "roc_builtins.str.to_decimal"; pub const STR_EQUAL: &str = "roc_builtins.str.equal"; pub const STR_SUBSTRING_UNSAFE: &str = "roc_builtins.str.substring_unsafe"; pub const STR_TO_UTF8: &str = "roc_builtins.str.to_utf8"; -pub const STR_FROM_UTF8: &str = "roc_builtins.str.from_utf8"; pub const STR_FROM_UTF8_RANGE: &str = "roc_builtins.str.from_utf8_range"; pub const STR_REPEAT: &str = "roc_builtins.str.repeat"; pub const STR_TRIM: &str = "roc_builtins.str.trim"; diff --git a/crates/compiler/can/src/builtins.rs b/crates/compiler/can/src/builtins.rs index 33101269af..67c442da65 100644 --- a/crates/compiler/can/src/builtins.rs +++ b/crates/compiler/can/src/builtins.rs @@ -128,8 +128,7 @@ more_macro_magic! { StrSplit; STR_SPLIT; 2, StrCountGraphemes; STR_COUNT_GRAPHEMES; 1, StrCountUtf8Bytes; STR_COUNT_UTF8_BYTES; 1, - StrFromUtf8; STR_FROM_UTF8; 1, - StrFromUtf8Range; STR_FROM_UTF8_RANGE_LOWLEVEL; 2, + StrFromUtf8Range; STR_FROM_UTF8_RANGE_LOWLEVEL; 3, StrToUtf8; STR_TO_UTF8; 1, StrRepeat; STR_REPEAT; 2, StrTrim; STR_TRIM; 1, @@ -156,7 +155,7 @@ more_macro_magic! { ListMap3; LIST_MAP3; 4, ListMap4; LIST_MAP4; 5, ListSortWith; LIST_SORT_WITH; 2, - ListSublist; LIST_SUBLIST; 2, + ListSublist; LIST_SUBLIST_LOWLEVEL; 3, ListDropAt; LIST_DROP_AT; 2, ListSwap; LIST_SWAP; 3, DictSize; DICT_LEN; 1, diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 9133f77806..e9987f670b 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -11,9 +11,9 @@ use crate::llvm::build_list::{ self, allocate_list, empty_polymorphic_list, list_append_unsafe, list_concat, list_drop_at, list_get_unsafe, list_len, list_map, list_map2, list_map3, list_map4, list_prepend, list_replace_unsafe, list_reserve, list_sort_with, list_sublist, list_swap, - list_symbol_to_c_abi, list_to_c_abi, list_with_capacity, + list_symbol_to_c_abi, list_to_c_abi, list_with_capacity, pass_update_mode, }; -use crate::llvm::build_str::{str_from_float, str_from_int, str_from_utf8, str_from_utf8_range}; +use crate::llvm::build_str::{str_from_float, str_from_int}; use crate::llvm::compare::{generic_eq, generic_neq}; use crate::llvm::convert::{ self, argument_type_from_layout, basic_type_from_builtin, basic_type_from_layout, @@ -5354,18 +5354,31 @@ fn run_low_level<'a, 'ctx, 'env>( str_from_float(env, scope, args[0]) } - StrFromUtf8 => { - // Str.fromUtf8 : List U8 -> Result Str Utf8Problem - debug_assert_eq!(args.len(), 1); - - str_from_utf8(env, scope, args[0], update_mode) - } StrFromUtf8Range => { - debug_assert_eq!(args.len(), 2); + debug_assert_eq!(args.len(), 3); - let count_and_start = load_symbol(scope, &args[1]).into_struct_value(); + let list = args[0]; + let start = load_symbol(scope, &args[1]); + let count = load_symbol(scope, &args[2]); - str_from_utf8_range(env, scope, args[0], count_and_start) + let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap(); + let result_ptr = env + .builder + .build_alloca(result_type, "alloca_utf8_validate_bytes_result"); + + call_void_bitcode_fn( + env, + &[ + result_ptr.into(), + list_symbol_to_c_abi(env, scope, list).into(), + start, + count, + pass_update_mode(env, update_mode), + ], + bitcode::STR_FROM_UTF8_RANGE, + ); + + crate::llvm::build_str::decode_from_utf8_result(env, result_ptr).into() } StrToUtf8 => { // Str.fromInt : Str -> List U8 @@ -5549,26 +5562,13 @@ fn run_low_level<'a, 'ctx, 'env>( ) } ListSublist => { - // List.sublist : List elem, { start : Nat, len : Nat } -> List elem - // - // As a low-level, record is destructed - // List.sublist : List elem, start : Nat, len : Nat -> List elem - debug_assert_eq!(args.len(), 2); + debug_assert_eq!(args.len(), 3); let (list, list_layout) = load_symbol_and_layout(scope, &args[0]); let original_wrapper = list.into_struct_value(); - let record = load_symbol(scope, &args[1]).into_struct_value(); - - let len = env - .builder - .build_extract_value(record, 0, "get_len") - .unwrap(); - - let start = env - .builder - .build_extract_value(record, 1, "get_start") - .unwrap(); + let start = load_symbol(scope, &args[1]); + let len = load_symbol(scope, &args[2]); let element_layout = list_element_layout!(list_layout); list_sublist( diff --git a/crates/compiler/gen_llvm/src/llvm/build_str.rs b/crates/compiler/gen_llvm/src/llvm/build_str.rs index 3b837ca907..acd4d759f9 100644 --- a/crates/compiler/gen_llvm/src/llvm/build_str.rs +++ b/crates/compiler/gen_llvm/src/llvm/build_str.rs @@ -1,17 +1,14 @@ -use crate::llvm::bitcode::{call_bitcode_fn, call_str_bitcode_fn, call_void_bitcode_fn}; +use crate::llvm::bitcode::{call_bitcode_fn, call_str_bitcode_fn}; use crate::llvm::build::{Env, Scope}; -use crate::llvm::build_list::pass_update_mode; use inkwell::builder::Builder; use inkwell::values::{BasicValueEnum, IntValue, PointerValue, StructValue}; use inkwell::AddressSpace; -use morphic_lib::UpdateMode; use roc_builtins::bitcode::{self, IntWidth}; use roc_module::symbol::Symbol; use roc_mono::layout::{Builtin, Layout}; use roc_target::PtrWidth; use super::build::{create_entry_block_alloca, load_symbol}; -use super::build_list::list_symbol_to_c_abi; pub static CHAR_LAYOUT: Layout = Layout::u8(); @@ -70,7 +67,7 @@ pub fn str_from_int<'a, 'ctx, 'env>( call_str_bitcode_fn(env, &[value.into()], &bitcode::STR_FROM_INT[int_width]) } -fn decode_from_utf8_result<'a, 'ctx, 'env>( +pub fn decode_from_utf8_result<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, pointer: PointerValue<'ctx>, ) -> StructValue<'ctx> { @@ -106,67 +103,6 @@ fn decode_from_utf8_result<'a, 'ctx, 'env>( } } -/// Str.fromUtf8 : List U8, { count : Nat, start : Nat } -> { a : Bool, b : Str, c : Nat, d : I8 } -pub fn str_from_utf8_range<'a, 'ctx, 'env>( - env: &Env<'a, 'ctx, 'env>, - scope: &Scope<'a, 'ctx>, - list: Symbol, - count_and_start: StructValue<'ctx>, -) -> BasicValueEnum<'ctx> { - let builder = env.builder; - - let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap(); - let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result"); - - let count = env - .builder - .build_extract_value(count_and_start, 0, "get_count") - .unwrap(); - - let start = env - .builder - .build_extract_value(count_and_start, 1, "get_start") - .unwrap(); - - call_void_bitcode_fn( - env, - &[ - result_ptr.into(), - list_symbol_to_c_abi(env, scope, list).into(), - count, - start, - ], - bitcode::STR_FROM_UTF8_RANGE, - ); - - decode_from_utf8_result(env, result_ptr).into() -} - -/// Str.fromUtf8 : List U8 -> { a : Bool, b : Str, c : Nat, d : I8 } -pub fn str_from_utf8<'a, 'ctx, 'env>( - env: &Env<'a, 'ctx, 'env>, - scope: &Scope<'a, 'ctx>, - list: Symbol, - update_mode: UpdateMode, -) -> BasicValueEnum<'ctx> { - let builder = env.builder; - - let result_type = env.module.get_struct_type("str.FromUtf8Result").unwrap(); - let result_ptr = builder.build_alloca(result_type, "alloca_utf8_validate_bytes_result"); - - call_void_bitcode_fn( - env, - &[ - result_ptr.into(), - list_symbol_to_c_abi(env, scope, list).into(), - pass_update_mode(env, update_mode), - ], - bitcode::STR_FROM_UTF8, - ); - - decode_from_utf8_result(env, result_ptr).into() -} - /// Str.fromFloat : Int -> Str pub fn str_from_float<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index eeb3311f5f..570cae4f6d 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -264,16 +264,19 @@ impl<'a> LowLevelCall<'a> { } StrFromInt => self.num_to_str(backend), StrFromFloat => self.num_to_str(backend), - StrFromUtf8 => { + StrFromUtf8Range => { /* Low-level op returns a struct with all the data for both Ok and Err. Roc AST wrapper converts this to a tag union, with app-dependent tag IDs. - fromUtf8C(output: *FromUtf8Result, arg: RocList, update_mode: UpdateMode) callconv(.C) void output: *FromUtf8Result i32 arg: RocList i64, i32 + start i32 + count i32 update_mode: UpdateMode i32 */ + + // loads arg, start, count backend.storage.load_symbols_for_call( backend.env.arena, &mut backend.code_builder, @@ -283,9 +286,8 @@ impl<'a> LowLevelCall<'a> { CallConv::Zig, ); backend.code_builder.i32_const(UPDATE_MODE_IMMUTABLE); - backend.call_host_fn_after_loading_args(bitcode::STR_FROM_UTF8, 4, false); + backend.call_host_fn_after_loading_args(bitcode::STR_FROM_UTF8_RANGE, 5, false); } - StrFromUtf8Range => self.load_args_and_call_zig(backend, bitcode::STR_FROM_UTF8_RANGE), StrTrimLeft => self.load_args_and_call_zig(backend, bitcode::STR_TRIM_LEFT), StrTrimRight => self.load_args_and_call_zig(backend, bitcode::STR_TRIM_RIGHT), StrToUtf8 => self.load_args_and_call_zig(backend, bitcode::STR_TO_UTF8), diff --git a/crates/compiler/module/src/low_level.rs b/crates/compiler/module/src/low_level.rs index 70d468c156..62343512fc 100644 --- a/crates/compiler/module/src/low_level.rs +++ b/crates/compiler/module/src/low_level.rs @@ -15,7 +15,6 @@ pub enum LowLevel { StrCountGraphemes, StrCountUtf8Bytes, StrFromInt, - StrFromUtf8, StrFromUtf8Range, StrToUtf8, StrRepeat, @@ -168,134 +167,173 @@ pub enum LowLevelWrapperType { impl LowLevelWrapperType { pub fn from_symbol(symbol: Symbol) -> LowLevelWrapperType { - use LowLevel::*; - use LowLevelWrapperType::*; - - match symbol { - Symbol::STR_CONCAT => CanBeReplacedBy(StrConcat), - Symbol::STR_GET_UNSAFE => CanBeReplacedBy(StrGetUnsafe), - Symbol::STR_TO_SCALARS => CanBeReplacedBy(StrToScalars), - Symbol::STR_JOIN_WITH => CanBeReplacedBy(StrJoinWith), - Symbol::STR_IS_EMPTY => CanBeReplacedBy(StrIsEmpty), - Symbol::STR_STARTS_WITH => CanBeReplacedBy(StrStartsWith), - Symbol::STR_STARTS_WITH_SCALAR => CanBeReplacedBy(StrStartsWithScalar), - Symbol::STR_ENDS_WITH => CanBeReplacedBy(StrEndsWith), - Symbol::STR_SPLIT => CanBeReplacedBy(StrSplit), - Symbol::STR_COUNT_GRAPHEMES => CanBeReplacedBy(StrCountGraphemes), - Symbol::STR_COUNT_UTF8_BYTES => CanBeReplacedBy(StrCountUtf8Bytes), - Symbol::STR_FROM_UTF8 => WrapperIsRequired, - Symbol::STR_FROM_UTF8_RANGE => WrapperIsRequired, - Symbol::STR_TO_UTF8 => CanBeReplacedBy(StrToUtf8), - Symbol::STR_REPEAT => CanBeReplacedBy(StrRepeat), - Symbol::STR_RESERVE => CanBeReplacedBy(StrReserve), - Symbol::STR_APPEND_SCALAR_UNSAFE => CanBeReplacedBy(StrAppendScalar), - Symbol::STR_TRIM => CanBeReplacedBy(StrTrim), - Symbol::STR_TRIM_LEFT => CanBeReplacedBy(StrTrimLeft), - Symbol::STR_TRIM_RIGHT => CanBeReplacedBy(StrTrimRight), - Symbol::STR_TO_DEC => WrapperIsRequired, - Symbol::STR_TO_F64 => WrapperIsRequired, - Symbol::STR_TO_F32 => WrapperIsRequired, - Symbol::STR_TO_NAT => WrapperIsRequired, - Symbol::STR_TO_U128 => WrapperIsRequired, - Symbol::STR_TO_I128 => WrapperIsRequired, - Symbol::STR_TO_U64 => WrapperIsRequired, - Symbol::STR_TO_I64 => WrapperIsRequired, - Symbol::STR_TO_U32 => WrapperIsRequired, - Symbol::STR_TO_I32 => WrapperIsRequired, - Symbol::STR_TO_U16 => WrapperIsRequired, - Symbol::STR_TO_I16 => WrapperIsRequired, - Symbol::STR_TO_U8 => WrapperIsRequired, - Symbol::STR_TO_I8 => WrapperIsRequired, - Symbol::LIST_LEN => CanBeReplacedBy(ListLen), - Symbol::LIST_GET => WrapperIsRequired, - Symbol::LIST_REPLACE => WrapperIsRequired, - Symbol::LIST_CONCAT => CanBeReplacedBy(ListConcat), - Symbol::LIST_APPEND_UNSAFE => CanBeReplacedBy(ListAppendUnsafe), - Symbol::LIST_PREPEND => CanBeReplacedBy(ListPrepend), - Symbol::LIST_MAP => WrapperIsRequired, - Symbol::LIST_MAP2 => WrapperIsRequired, - Symbol::LIST_MAP3 => WrapperIsRequired, - Symbol::LIST_MAP4 => WrapperIsRequired, - Symbol::LIST_SORT_WITH => WrapperIsRequired, - Symbol::LIST_SUBLIST => WrapperIsRequired, - Symbol::LIST_DROP_AT => CanBeReplacedBy(ListDropAt), - Symbol::LIST_SWAP => CanBeReplacedBy(ListSwap), - Symbol::LIST_ANY => WrapperIsRequired, - Symbol::LIST_ALL => WrapperIsRequired, - Symbol::LIST_FIND => WrapperIsRequired, - Symbol::DICT_LEN => CanBeReplacedBy(DictSize), - Symbol::DICT_EMPTY => CanBeReplacedBy(DictEmpty), - Symbol::DICT_INSERT => CanBeReplacedBy(DictInsert), - Symbol::DICT_REMOVE => CanBeReplacedBy(DictRemove), - Symbol::DICT_CONTAINS => CanBeReplacedBy(DictContains), - Symbol::DICT_GET => WrapperIsRequired, - Symbol::DICT_KEYS => CanBeReplacedBy(DictKeys), - Symbol::DICT_VALUES => CanBeReplacedBy(DictValues), - Symbol::DICT_UNION => CanBeReplacedBy(DictUnion), - Symbol::DICT_INTERSECTION => CanBeReplacedBy(DictIntersection), - Symbol::DICT_DIFFERENCE => CanBeReplacedBy(DictDifference), - Symbol::DICT_WALK => WrapperIsRequired, - Symbol::SET_FROM_LIST => CanBeReplacedBy(SetFromList), - Symbol::NUM_ADD => CanBeReplacedBy(NumAdd), - Symbol::NUM_ADD_WRAP => CanBeReplacedBy(NumAddWrap), - Symbol::NUM_ADD_CHECKED => WrapperIsRequired, - Symbol::NUM_ADD_SATURATED => CanBeReplacedBy(NumAddSaturated), - Symbol::NUM_SUB => CanBeReplacedBy(NumSub), - Symbol::NUM_SUB_WRAP => CanBeReplacedBy(NumSubWrap), - Symbol::NUM_SUB_CHECKED => WrapperIsRequired, - Symbol::NUM_SUB_SATURATED => CanBeReplacedBy(NumSubSaturated), - Symbol::NUM_MUL => CanBeReplacedBy(NumMul), - Symbol::NUM_MUL_WRAP => CanBeReplacedBy(NumMulWrap), - Symbol::NUM_MUL_SATURATED => CanBeReplacedBy(NumMulSaturated), - Symbol::NUM_MUL_CHECKED => WrapperIsRequired, - Symbol::NUM_GT => CanBeReplacedBy(NumGt), - Symbol::NUM_GTE => CanBeReplacedBy(NumGte), - Symbol::NUM_LT => CanBeReplacedBy(NumLt), - Symbol::NUM_LTE => CanBeReplacedBy(NumLte), - Symbol::NUM_COMPARE => CanBeReplacedBy(NumCompare), - Symbol::NUM_DIV_FRAC => CanBeReplacedBy(NumDivUnchecked), - Symbol::NUM_DIV_FRAC_CHECKED => WrapperIsRequired, - Symbol::NUM_DIV_CEIL => CanBeReplacedBy(NumDivCeilUnchecked), - Symbol::NUM_DIV_CEIL_CHECKED => WrapperIsRequired, - Symbol::NUM_REM => CanBeReplacedBy(NumRemUnchecked), - Symbol::NUM_REM_CHECKED => WrapperIsRequired, - Symbol::NUM_IS_MULTIPLE_OF => CanBeReplacedBy(NumIsMultipleOf), - Symbol::NUM_ABS => CanBeReplacedBy(NumAbs), - Symbol::NUM_NEG => CanBeReplacedBy(NumNeg), - Symbol::NUM_SIN => CanBeReplacedBy(NumSin), - Symbol::NUM_COS => CanBeReplacedBy(NumCos), - Symbol::NUM_SQRT => CanBeReplacedBy(NumSqrtUnchecked), - Symbol::NUM_SQRT_CHECKED => WrapperIsRequired, - Symbol::NUM_LOG => CanBeReplacedBy(NumLogUnchecked), - Symbol::NUM_LOG_CHECKED => WrapperIsRequired, - Symbol::NUM_ROUND => CanBeReplacedBy(NumRound), - Symbol::NUM_TO_FRAC => CanBeReplacedBy(NumToFrac), - Symbol::NUM_POW => CanBeReplacedBy(NumPow), - Symbol::NUM_CEILING => CanBeReplacedBy(NumCeiling), - Symbol::NUM_POW_INT => CanBeReplacedBy(NumPowInt), - Symbol::NUM_FLOOR => CanBeReplacedBy(NumFloor), - Symbol::NUM_TO_STR => CanBeReplacedBy(NumToStr), - // => CanBeReplacedBy(NumIsFinite), - Symbol::NUM_ATAN => CanBeReplacedBy(NumAtan), - Symbol::NUM_ACOS => CanBeReplacedBy(NumAcos), - Symbol::NUM_ASIN => CanBeReplacedBy(NumAsin), - Symbol::NUM_BYTES_TO_U16 => WrapperIsRequired, - Symbol::NUM_BYTES_TO_U32 => WrapperIsRequired, - Symbol::NUM_BITWISE_AND => CanBeReplacedBy(NumBitwiseAnd), - Symbol::NUM_BITWISE_XOR => CanBeReplacedBy(NumBitwiseXor), - Symbol::NUM_BITWISE_OR => CanBeReplacedBy(NumBitwiseOr), - Symbol::NUM_SHIFT_LEFT => CanBeReplacedBy(NumShiftLeftBy), - Symbol::NUM_SHIFT_RIGHT => CanBeReplacedBy(NumShiftRightBy), - Symbol::NUM_SHIFT_RIGHT_ZERO_FILL => CanBeReplacedBy(NumShiftRightZfBy), - Symbol::NUM_INT_CAST => CanBeReplacedBy(NumIntCast), - Symbol::BOOL_EQ => CanBeReplacedBy(Eq), - Symbol::BOOL_NEQ => CanBeReplacedBy(NotEq), - Symbol::BOOL_AND => CanBeReplacedBy(And), - Symbol::BOOL_OR => CanBeReplacedBy(Or), - Symbol::BOOL_NOT => CanBeReplacedBy(Not), - // => CanBeReplacedBy(Hash), - // => CanBeReplacedBy(ExpectTrue), - _ => NotALowLevelWrapper, - } + for_symbol_help(symbol) } } + +macro_rules! more_macro_magic { + ($($lowlevel:ident <= $symbol:ident)+) => { more_macro_magic!{$($lowlevel <= $symbol),+} }; + ($($lowlevel:ident <= $symbol:ident),*) => { + + fn for_symbol_help(symbol: Symbol) -> LowLevelWrapperType { + use $crate::low_level::LowLevelWrapperType::*; + + match symbol { + $( + Symbol::$symbol => CanBeReplacedBy(LowLevel::$lowlevel), + )* + + Symbol::LIST_MAP => WrapperIsRequired, + Symbol::LIST_MAP2 => WrapperIsRequired, + Symbol::LIST_MAP3 => WrapperIsRequired, + Symbol::LIST_MAP4 => WrapperIsRequired, + Symbol::DICT_WALK => WrapperIsRequired, + + Symbol::LIST_SORT_WITH => WrapperIsRequired, + Symbol::DICT_GET => WrapperIsRequired, + + _ => NotALowLevelWrapper, + } + } + + fn _enforce_exhaustiveness(lowlevel: LowLevel) -> Symbol { + match lowlevel { + $( + LowLevel::$lowlevel => Symbol::$symbol, + )* + + // these are higher-order lowlevels, hence need the surrounding function to provide + // enough type information for code generation + LowLevel::ListMap => unreachable!(), + LowLevel::ListMap2 => unreachable!(), + LowLevel::ListMap3 => unreachable!(), + LowLevel::ListMap4 => unreachable!(), + LowLevel::ListSortWith => unreachable!(), + LowLevel::DictWalk => unreachable!(), + + // The BoxExpr operation is turned into mono Expr + LowLevel::BoxExpr => unreachable!(), + LowLevel::UnboxExpr => unreachable!(), + + // these are implemented explicitly in for_symbol because they are polymorphic + LowLevel::NumIntCast => unreachable!(), + LowLevel::NumToFloatCast => unreachable!(), + LowLevel::NumToIntChecked => unreachable!(), + LowLevel::NumToFloatChecked => unreachable!(), + LowLevel::NumDivUnchecked => unreachable!(), + LowLevel::DictEmpty => unreachable!(), + + // these are used internally and not tied to a symbol + LowLevel::Hash => unimplemented!(), + LowLevel::PtrCast => unimplemented!(), + LowLevel::RefCountInc => unimplemented!(), + LowLevel::RefCountDec => unimplemented!(), + + // these are not implemented, not sure why + LowLevel::StrFromInt => unimplemented!(), + LowLevel::StrFromFloat => unimplemented!(), + LowLevel::NumIsFinite => unimplemented!(), + + } + } + }; +} + +more_macro_magic! { + StrConcat <= STR_CONCAT, + StrJoinWith <= STR_JOIN_WITH, + StrIsEmpty <= STR_IS_EMPTY, + StrStartsWith <= STR_STARTS_WITH, + StrStartsWithScalar <= STR_STARTS_WITH_SCALAR, + StrEndsWith <= STR_ENDS_WITH, + StrSplit <= STR_SPLIT, + StrCountGraphemes <= STR_COUNT_GRAPHEMES, + StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES, + StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL, + StrToUtf8 <= STR_TO_UTF8, + StrRepeat <= STR_REPEAT, + StrTrim <= STR_TRIM, + StrTrimLeft <= STR_TRIM_LEFT, + StrTrimRight <= STR_TRIM_RIGHT, + StrToScalars <= STR_TO_SCALARS, + StrGetUnsafe <= STR_GET_UNSAFE, + StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE, + StrReserve <= STR_RESERVE, + StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE, + StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE, + StrToNum <= STR_TO_NUM, + ListLen <= LIST_LEN, + ListWithCapacity <= LIST_WITH_CAPACITY, + ListReserve <= LIST_RESERVE, + ListIsUnique <= LIST_IS_UNIQUE, + ListAppendUnsafe <= LIST_APPEND_UNSAFE, + ListPrepend <= LIST_PREPEND, + ListGetUnsafe <= LIST_GET_UNSAFE, + ListReplaceUnsafe <= LIST_REPLACE_UNSAFE, + ListConcat <= LIST_CONCAT, + ListSublist <= LIST_SUBLIST_LOWLEVEL, + ListDropAt <= LIST_DROP_AT, + ListSwap <= LIST_SWAP, + DictSize <= DICT_LEN, + DictInsert <= DICT_INSERT, + DictRemove <= DICT_REMOVE, + DictContains <= DICT_CONTAINS, + DictGetUnsafe <= DICT_GET_LOWLEVEL, + DictKeys <= DICT_KEYS, + DictValues <= DICT_VALUES, + DictUnion <= DICT_UNION, + DictIntersection <= DICT_INTERSECTION, + DictDifference <= DICT_DIFFERENCE, + SetFromList <= SET_FROM_LIST, + SetToDict <= SET_TO_DICT, + NumAdd <= NUM_ADD, + NumAddWrap <= NUM_ADD_WRAP, + NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL, + NumAddSaturated <= NUM_ADD_SATURATED, + NumSub <= NUM_SUB, + NumSubWrap <= NUM_SUB_WRAP, + NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL, + NumSubSaturated <= NUM_SUB_SATURATED, + NumMul <= NUM_MUL, + NumMulWrap <= NUM_MUL_WRAP, + NumMulSaturated <= NUM_MUL_SATURATED, + NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL, + NumGt <= NUM_GT, + NumGte <= NUM_GTE, + NumLt <= NUM_LT, + NumLte <= NUM_LTE, + NumCompare <= NUM_COMPARE, + NumDivCeilUnchecked <= NUM_DIV_CEIL, + NumRemUnchecked <= NUM_REM, + NumIsMultipleOf <= NUM_IS_MULTIPLE_OF, + NumAbs <= NUM_ABS, + NumNeg <= NUM_NEG, + NumSin <= NUM_SIN, + NumCos <= NUM_COS, + NumSqrtUnchecked <= NUM_SQRT, + NumLogUnchecked <= NUM_LOG, + NumRound <= NUM_ROUND, + NumToFrac <= NUM_TO_FRAC, + NumPow <= NUM_POW, + NumCeiling <= NUM_CEILING, + NumPowInt <= NUM_POW_INT, + NumFloor <= NUM_FLOOR, + NumAtan <= NUM_ATAN, + NumAcos <= NUM_ACOS, + NumAsin <= NUM_ASIN, + NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL, + NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL, + NumBitwiseAnd <= NUM_BITWISE_AND, + NumBitwiseXor <= NUM_BITWISE_XOR, + NumBitwiseOr <= NUM_BITWISE_OR, + NumShiftLeftBy <= NUM_SHIFT_LEFT, + NumShiftRightBy <= NUM_SHIFT_RIGHT, + NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL, + NumToStr <= NUM_TO_STR, + Eq <= BOOL_EQ, + NotEq <= BOOL_NEQ, + And <= BOOL_AND, + Or <= BOOL_OR, + Not <= BOOL_NOT, + Unreachable <= LIST_UNREACHABLE +} diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index cb1dce8b53..6f7852dfaf 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1283,6 +1283,7 @@ define_builtins! { 64 LIST_UNREACHABLE: "unreachable" 65 LIST_RESERVE: "reserve" 66 LIST_APPEND_UNSAFE: "appendUnsafe" + 67 LIST_SUBLIST_LOWLEVEL: "sublistLowlevel" } 6 RESULT: "Result" => { 0 RESULT_RESULT: "Result" // the Result.Result type alias diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index a68fb083fd..0b60915f5f 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -912,7 +912,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { ListAppendUnsafe => arena.alloc_slice_copy(&[owned, owned]), ListReserve => arena.alloc_slice_copy(&[owned, irrelevant]), - ListSublist => arena.alloc_slice_copy(&[owned, irrelevant]), + ListSublist => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), ListDropAt => arena.alloc_slice_copy(&[owned, irrelevant]), ListSwap => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), @@ -934,8 +934,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { NumBytesToU32 => arena.alloc_slice_copy(&[borrowed, irrelevant]), StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[borrowed, borrowed]), StrStartsWithScalar => arena.alloc_slice_copy(&[borrowed, irrelevant]), - StrFromUtf8 => arena.alloc_slice_copy(&[owned]), - StrFromUtf8Range => arena.alloc_slice_copy(&[borrowed, irrelevant]), + StrFromUtf8Range => arena.alloc_slice_copy(&[borrowed, irrelevant, irrelevant]), StrToUtf8 => arena.alloc_slice_copy(&[owned]), StrRepeat => arena.alloc_slice_copy(&[borrowed, irrelevant]), StrFromInt | StrFromFloat => arena.alloc_slice_copy(&[irrelevant]),