diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index 97051c6d6f..417e0005fe 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -158,6 +158,7 @@ comptime { exportStrFn(str.substringUnsafe, "substring_unsafe"); exportStrFn(str.getUnsafe, "get_unsafe"); exportStrFn(str.reserve, "reserve"); + exportStrFn(str.appendScalar, "append_scalar"); exportStrFn(str.strToUtf8C, "to_utf8"); exportStrFn(str.fromUtf8C, "from_utf8"); exportStrFn(str.fromUtf8RangeC, "from_utf8_range"); diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index e5418bd5bc..aa99322774 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -37,7 +37,8 @@ interface Str splitFirst, splitLast, walkUtf8WithIndex, - reserve + reserve, + appendScalar, ] imports [Bool.{ Bool }, Result.{ Result }] @@ -345,3 +346,17 @@ walkUtf8WithIndexHelp = \string, state, step, index, length -> ## Make sure at least some number of bytes fit in this string without reallocating reserve : Str, Nat -> Str + +## is UB when the scalar is invalid +appendScalarUnsafe : Str, U32 -> Str + +appendScalar : Str, U32 -> Result Str [InvalidScalar]* +appendScalar = \string, scalar -> + if isValidScalar scalar then + Ok (appendScalarUnsafe string scalar) + else + Err InvalidScalar + +isValidScalar : U32 -> Bool +isValidScalar = \scalar -> + scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF) diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index 10218002d8..3680c2fb59 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -334,6 +334,7 @@ pub const STR_TRIM_LEFT: &str = "roc_builtins.str.trim_left"; pub const STR_TRIM_RIGHT: &str = "roc_builtins.str.trim_right"; pub const STR_GET_UNSAFE: &str = "roc_builtins.str.get_unsafe"; pub const STR_RESERVE: &str = "roc_builtins.str.reserve"; +pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/crates/compiler/can/src/builtins.rs b/crates/compiler/can/src/builtins.rs index c729f950cf..170387ec1f 100644 --- a/crates/compiler/can/src/builtins.rs +++ b/crates/compiler/can/src/builtins.rs @@ -84,6 +84,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_COUNT_UTF8_BYTES => str_count_bytes, STR_SUBSTRING_UNSAFE => str_substring_unsafe, STR_RESERVE => str_reserve, + STR_APPEND_SCALAR_UNSAFE => str_append_scalar_unsafe, STR_FROM_UTF8 => str_from_utf8, STR_FROM_UTF8_RANGE => str_from_utf8_range, STR_TO_UTF8 => str_to_utf8, @@ -1743,6 +1744,11 @@ fn str_reserve(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::StrReserve, var_store) } +/// Str.appendScalarUnsafe : Str, U32 -> Str +fn str_append_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::StrAppendScalar, var_store) +} + /// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]* fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { let bytes_var = var_store.fresh(); diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 38ad8b1e32..76def28c92 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -5364,6 +5364,14 @@ fn run_low_level<'a, 'ctx, 'env>( let capacity = load_symbol(scope, &args[1]); call_str_bitcode_fn(env, &[string, capacity], bitcode::STR_RESERVE) } + StrAppendScalar => { + // Str.appendScalar : Str, U32 -> Str + debug_assert_eq!(args.len(), 2); + + let string = load_symbol(scope, &args[0]); + let capacity = load_symbol(scope, &args[1]); + call_str_bitcode_fn(env, &[string, capacity], bitcode::STR_APPEND_SCALAR) + } StrTrim => { // Str.trim : Str -> Str debug_assert_eq!(args.len(), 1); diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index 25ff769f0c..639faa0c6d 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -279,6 +279,7 @@ impl<'a> LowLevelCall<'a> { StrToUtf8 => self.load_args_and_call_zig(backend, bitcode::STR_TO_UTF8), StrReserve => self.load_args_and_call_zig(backend, bitcode::STR_RESERVE), StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT), + StrAppendScalar => self.load_args_and_call_zig(backend, bitcode::STR_APPEND_SCALAR), StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM), StrSubstringUnsafe => { self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE) diff --git a/crates/compiler/module/src/low_level.rs b/crates/compiler/module/src/low_level.rs index b33273137d..ec1776366d 100644 --- a/crates/compiler/module/src/low_level.rs +++ b/crates/compiler/module/src/low_level.rs @@ -28,6 +28,7 @@ pub enum LowLevel { StrGetUnsafe, StrSubstringUnsafe, StrReserve, + StrAppendScalar, ListLen, ListWithCapacity, ListGetUnsafe, @@ -185,6 +186,8 @@ impl LowLevelWrapperType { Symbol::STR_FROM_UTF8_RANGE => WrapperIsRequired, Symbol::STR_TO_UTF8 => CanBeReplacedBy(StrToUtf8), Symbol::STR_REPEAT => CanBeReplacedBy(StrRepeat), + Symbol::STR_RESERVE => CanBeReplacedBy(StrReserve), + Symbol::STR_APPEND_SCALAR_UNSAFE => CanBeReplacedBy(StrAppendScalar), Symbol::STR_TRIM => CanBeReplacedBy(StrTrim), Symbol::STR_TRIM_LEFT => CanBeReplacedBy(StrTrimLeft), Symbol::STR_TRIM_RIGHT => CanBeReplacedBy(StrTrimRight), diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index 0cace557c3..9d8e693b4a 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1197,6 +1197,8 @@ define_builtins! { 39 STR_SPLIT_LAST: "splitLast" 40 STR_WALK_UTF8_WITH_INDEX: "walkUtf8WithIndex" 41 STR_RESERVE: "reserve" + 42 STR_APPEND_SCALAR_UNSAFE: "appendScalarUnsafe" + 43 STR_APPEND_SCALAR: "appendScalar" } 5 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index 63a7462bfd..5e85d14ced 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -897,6 +897,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { StrConcat => arena.alloc_slice_copy(&[owned, borrowed]), StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), StrReserve => arena.alloc_slice_copy(&[owned, irrelevant]), + StrAppendScalar => arena.alloc_slice_copy(&[owned, irrelevant]), StrTrim => arena.alloc_slice_copy(&[owned]), StrTrimLeft => arena.alloc_slice_copy(&[owned]), StrTrimRight => arena.alloc_slice_copy(&[owned]), diff --git a/crates/compiler/test_gen/src/gen_str.rs b/crates/compiler/test_gen/src/gen_str.rs index 94f68c19a9..a8b78b0e58 100644 --- a/crates/compiler/test_gen/src/gen_str.rs +++ b/crates/compiler/test_gen/src/gen_str.rs @@ -1744,3 +1744,17 @@ fn str_walk_utf8_with_index() { RocList<(u64, u8)> ); } + +#[test] +#[cfg(any(feature = "gen-llvm"))] +fn str_append_scalar() { + assert_evals_to!( + indoc!( + r#" + Str.appendScalar "abcd" 'A' + "# + ), + RocStr::from("abcdA"), + RocStr + ); +}