diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index 9006e083d6..a408b1b7e1 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -159,6 +159,7 @@ comptime { exportStrFn(str.substringUnsafe, "substring_unsafe"); exportStrFn(str.getUnsafe, "get_unsafe"); exportStrFn(str.reserve, "reserve"); + exportStrFn(str.getScalarUnsafe, "get_scalar_unsafe"); exportStrFn(str.appendScalar, "append_scalar"); exportStrFn(str.strToUtf8C, "to_utf8"); exportStrFn(str.fromUtf8C, "from_utf8"); diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index beba67172c..9bae884346 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -2469,3 +2469,23 @@ pub fn reserve(string: RocStr, capacity: usize) callconv(.C) RocStr { return string; } } + +pub fn getScalarUnsafe(string: RocStr, index: usize) callconv(.C) extern struct { bytesParsed: usize, scalar: u32 } { + const slice = string.asSlice(); + const bytesParsed = @intCast(usize, std.unicode.utf8ByteSequenceLength(slice[index]) catch unreachable); + const scalar = std.unicode.utf8Decode(slice[index .. index + bytesParsed]) catch unreachable; + + return .{ .bytesParsed = bytesParsed, .scalar = @intCast(u32, scalar) }; +} + +test "getScalarUnsafe" { + const data_bytes = "A"; + var data = RocStr.init(data_bytes, data_bytes.len); + + const result = getScalarUnsafe(data, 0); + + const expected = try std.unicode.utf8Decode("A"); + + try expectEqual(result.scalar, @intCast(u32, expected)); + try expectEqual(result.bytesParsed, 1); +} diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index aa99322774..2a6dfc4428 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -39,6 +39,8 @@ interface Str walkUtf8WithIndex, reserve, appendScalar, + walkScalars, + walkScalarsUntil, ] imports [Bool.{ Bool }, Result.{ Result }] @@ -360,3 +362,36 @@ appendScalar = \string, scalar -> isValidScalar : U32 -> Bool isValidScalar = \scalar -> scalar <= 0xD7FF || (scalar >= 0xE000 && scalar <= 0x10FFFF) + +getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat } + +walkScalars : Str, state, (state, U32 -> state) -> state +walkScalars = \string, init, step -> + walkScalarsHelp string init step 0 (Str.countUtf8Bytes string) + +walkScalarsHelp : Str, state, (state, U32 -> state), Nat, Nat -> state +walkScalarsHelp = \string, state, step, index, length -> + if index < length then + { scalar, bytesParsed } = getScalarUnsafe string index + newState = step state scalar + + walkScalarsHelp string newState step (index + bytesParsed) length + else + state + +walkScalarsUntil : Str, state, (state, U32 -> [Break state, Continue state]) -> state +walkScalarsUntil = \string, init, step -> + walkScalarsUntilHelp string init step 0 (Str.countUtf8Bytes string) + +walkScalarsUntilHelp : Str, state, (state, U32 -> [Break state, Continue state]), Nat, Nat -> state +walkScalarsUntilHelp = \string, state, step, index, length -> + if index < length then + { scalar, bytesParsed } = getScalarUnsafe string index + + when step state scalar is + Continue newState -> + walkScalarsHelp string newState step (index + bytesParsed) length + Done newState -> + newState + else + state diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index 7173e94c5f..a79ba3b7b1 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -336,6 +336,7 @@ pub const STR_TRIM_RIGHT: &str = "roc_builtins.str.trim_right"; pub const STR_GET_UNSAFE: &str = "roc_builtins.str.get_unsafe"; pub const STR_RESERVE: &str = "roc_builtins.str.reserve"; pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar"; +pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe"; pub const DICT_HASH: &str = "roc_builtins.dict.hash"; pub const DICT_HASH_STR: &str = "roc_builtins.dict.hash_str"; diff --git a/crates/compiler/can/src/builtins.rs b/crates/compiler/can/src/builtins.rs index 71875966b3..c2c1acd1bb 100644 --- a/crates/compiler/can/src/builtins.rs +++ b/crates/compiler/can/src/builtins.rs @@ -85,6 +85,7 @@ pub fn builtin_defs_map(symbol: Symbol, var_store: &mut VarStore) -> Option STR_SUBSTRING_UNSAFE => str_substring_unsafe, STR_RESERVE => str_reserve, STR_APPEND_SCALAR_UNSAFE => str_append_scalar_unsafe, + STR_GET_SCALAR_UNSAFE => str_get_scalar_unsafe, STR_FROM_UTF8 => str_from_utf8, STR_FROM_UTF8_RANGE => str_from_utf8_range, STR_TO_UTF8 => str_to_utf8, @@ -1749,6 +1750,11 @@ fn str_append_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def { lowlevel_2(symbol, LowLevel::StrAppendScalar, var_store) } +/// Str.getScalarUnsafe : Str, Nat -> { scalar : U32, bytesParsed : Nat } +fn str_get_scalar_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def { + lowlevel_2(symbol, LowLevel::StrGetScalarUnsafe, var_store) +} + /// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]* fn str_from_utf8(symbol: Symbol, var_store: &mut VarStore) -> Def { let bytes_var = var_store.fresh(); diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 5d53a2a84c..97edb5d6c8 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -5371,6 +5371,14 @@ fn run_low_level<'a, 'ctx, 'env>( let string = load_symbol(scope, &args[0]); call_bitcode_fn(env, &[string], bitcode::STR_COUNT_GRAPEHEME_CLUSTERS) } + StrGetScalarUnsafe => { + // Str.getScalarUnsafe : Str, Nat -> { bytesParsed : Nat, scalar : U32 } + debug_assert_eq!(args.len(), 2); + + let string = load_symbol(scope, &args[0]); + let index = load_symbol(scope, &args[1]); + call_bitcode_fn(env, &[string, index], bitcode::STR_GET_SCALAR_UNSAFE) + } StrCountUtf8Bytes => { // Str.countGraphemes : Str -> Nat debug_assert_eq!(args.len(), 1); diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index cb7fbab7f8..a8f72562ee 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -293,6 +293,9 @@ impl<'a> LowLevelCall<'a> { StrRepeat => self.load_args_and_call_zig(backend, bitcode::STR_REPEAT), StrAppendScalar => self.load_args_and_call_zig(backend, bitcode::STR_APPEND_SCALAR), StrTrim => self.load_args_and_call_zig(backend, bitcode::STR_TRIM), + StrGetScalarUnsafe => { + self.load_args_and_call_zig(backend, bitcode::STR_GET_SCALAR_UNSAFE) + } StrSubstringUnsafe => { self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE) } diff --git a/crates/compiler/gen_wasm/src/wasm32_result.rs b/crates/compiler/gen_wasm/src/wasm32_result.rs index 1863bff7e6..9c5ae09f77 100644 --- a/crates/compiler/gen_wasm/src/wasm32_result.rs +++ b/crates/compiler/gen_wasm/src/wasm32_result.rs @@ -176,6 +176,7 @@ wasm_result_primitive!(u16, i32_store16, Align::Bytes2); wasm_result_primitive!(i16, i32_store16, Align::Bytes2); wasm_result_primitive!(u32, i32_store, Align::Bytes4); wasm_result_primitive!(i32, i32_store, Align::Bytes4); +wasm_result_primitive!(char, i32_store, Align::Bytes4); wasm_result_primitive!(u64, i64_store, Align::Bytes8); wasm_result_primitive!(i64, i64_store, Align::Bytes8); wasm_result_primitive!(usize, i32_store, Align::Bytes4); diff --git a/crates/compiler/gen_wasm/src/wasm32_sized.rs b/crates/compiler/gen_wasm/src/wasm32_sized.rs index 358190d152..3ba889c022 100644 --- a/crates/compiler/gen_wasm/src/wasm32_sized.rs +++ b/crates/compiler/gen_wasm/src/wasm32_sized.rs @@ -23,7 +23,7 @@ macro_rules! wasm32_sized_primitive { } wasm32_sized_primitive!( - u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder, + u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder, ); impl Wasm32Sized for () { diff --git a/crates/compiler/module/src/low_level.rs b/crates/compiler/module/src/low_level.rs index ec1776366d..1c35e14b01 100644 --- a/crates/compiler/module/src/low_level.rs +++ b/crates/compiler/module/src/low_level.rs @@ -29,6 +29,7 @@ pub enum LowLevel { StrSubstringUnsafe, StrReserve, StrAppendScalar, + StrGetScalarUnsafe, ListLen, ListWithCapacity, ListGetUnsafe, diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index 9d8e693b4a..da431383e7 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1199,6 +1199,9 @@ define_builtins! { 41 STR_RESERVE: "reserve" 42 STR_APPEND_SCALAR_UNSAFE: "appendScalarUnsafe" 43 STR_APPEND_SCALAR: "appendScalar" + 44 STR_GET_SCALAR_UNSAFE: "getScalarUnsafe" + 45 STR_WALK_SCALARS: "walkScalars" + 46 STR_WALK_SCALARS_UNTIL: "walkScalarsUntil" } 5 LIST: "List" => { 0 LIST_LIST: "List" imported // the List.List type alias diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index d4c39724a1..64f86c55bd 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -902,6 +902,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), StrReserve => arena.alloc_slice_copy(&[owned, irrelevant]), StrAppendScalar => arena.alloc_slice_copy(&[owned, irrelevant]), + StrGetScalarUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]), StrTrim => arena.alloc_slice_copy(&[owned]), StrTrimLeft => arena.alloc_slice_copy(&[owned]), StrTrimRight => arena.alloc_slice_copy(&[owned]), diff --git a/crates/compiler/test_gen/src/gen_str.rs b/crates/compiler/test_gen/src/gen_str.rs index 90f7223932..3ee7db96c1 100644 --- a/crates/compiler/test_gen/src/gen_str.rs +++ b/crates/compiler/test_gen/src/gen_str.rs @@ -1758,3 +1758,17 @@ fn str_append_scalar() { RocStr ); } + +#[test] +#[cfg(any(feature = "gen-llvm"))] +fn str_walk_scalars() { + assert_evals_to!( + indoc!( + r#" + Str.walkScalars "abcd" [] List.append + "# + ), + RocList::from_slice(&['a', 'b', 'c', 'd']), + RocList + ); +} diff --git a/crates/compiler/test_gen/src/helpers/from_wasmer_memory.rs b/crates/compiler/test_gen/src/helpers/from_wasmer_memory.rs index e8f5a3da5f..d8e149959f 100644 --- a/crates/compiler/test_gen/src/helpers/from_wasmer_memory.rs +++ b/crates/compiler/test_gen/src/helpers/from_wasmer_memory.rs @@ -40,7 +40,7 @@ macro_rules! from_wasm_memory_primitive { } from_wasm_memory_primitive!( - u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder, + u8, i8, u16, i16, u32, i32, char, u64, i64, u128, i128, f32, f64, bool, RocDec, RocOrder, ); impl FromWasmerMemory for () { diff --git a/crates/compiler/test_gen/src/wasm_str.rs b/crates/compiler/test_gen/src/wasm_str.rs index e2458572ac..02e5645cc2 100644 --- a/crates/compiler/test_gen/src/wasm_str.rs +++ b/crates/compiler/test_gen/src/wasm_str.rs @@ -1314,3 +1314,16 @@ fn str_to_dec() { RocDec ); } + +#[test] +fn str_walk_scalars() { + assert_evals_to!( + indoc!( + r#" + Str.walkScalars "abcd" [] List.append + "# + ), + RocList::from_slice(&['a', 'b', 'c', 'd']), + RocList + ); +}