Add Str.walkUtf8

2025-09-27 05:49:08 +00:00 · 2023-03-29 15:52:08 -04:00 · 2023-03-29 15:52:08 -04:00 · c3c1b8d083
commit c3c1b8d083
parent f7e96ecf82
3 changed files with 56 additions and 0 deletions
--- a/crates/compiler/builtins/roc/Str.roc
+++ b/crates/compiler/builtins/roc/Str.roc
@ -107,6 +107,7 @@ interface Str
        replaceLast,
        splitFirst,
        splitLast,
        walkUtf8,
        walkUtf8WithIndex,
        reserve,
        releaseExcessCapacity,
@ -841,6 +842,33 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
    else
        state
 ## Walks over the `UTF-8` bytes of the given [Str] and calls a function to update
 ## state for each byte.
 ##
 ## ```
 ## result = walkUtf8 "hello, world!" "" (\state, byte -> state ++ String.fromCodePoint byte)
 ## expect result == Ok "hello, world!"
 ## ```
 walkUtf8 : Str, state, (state, U8 -> state) -> state
 walkUtf8 = \str, initial, step ->
    walkUtf8Help str initial step 0 (Str.countUtf8Bytes str)
 walkUtf8Help : Str, state, (state, U8 -> state), Nat, Nat -> state
 walkUtf8Help = \str, state, step, index, length ->
    if index < length then
        byte = Str.getUnsafe str index
        newState = step state byte
        walkUtf8Help str newState step (index + 1) length
    else
        state
 # Test walkUtf8 with a simple ASCII string
 expect (walkUtf8 "ABC" [] List.append) == [65, 66, 67]
 # Test walkUtf8 with a multi-byte string
 expect (walkUtf8 "鹏" [] List.append) == [233, 185, 143]
 ## Shrink the memory footprint of a str such that it's capacity and length are equal.
 ## Note: This will also convert seamless slices to regular lists.
 releaseExcessCapacity : Str -> Str
--- a/crates/compiler/module/src/symbol.rs
+++ b/crates/compiler/module/src/symbol.rs
@ -1328,6 +1328,7 @@ define_builtins! {
        55 STR_GRAPHEMES: "graphemes"
        56 STR_IS_VALID_SCALAR: "isValidScalar"
        57 STR_RELEASE_EXCESS_CAPACITY: "releaseExcessCapacity"
        58 STR_WALK_UTF8: "walkUtf8"
    }
    6 LIST: "List" => {
        0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias
--- a/crates/compiler/test_gen/src/gen_str.rs
+++ b/crates/compiler/test_gen/src/gen_str.rs
@ -1822,6 +1822,33 @@ fn str_split_overlapping_substring_2() {
    );
 }
 #[test]
 #[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
 fn str_walk_utf8() {
    #[cfg(not(feature = "gen-llvm-wasm"))]
    assert_evals_to!(
        // Reverse the bytes
        indoc!(
            r#"
            Str.walkUtf8 "abcd" [] (\list, byte -> List.prepend list byte)
            "#
        ),
        RocList::from_slice(&[b'd', b'c', b'b', b'a']),
        RocList<u8>
    );
    #[cfg(feature = "gen-llvm-wasm")]
    assert_evals_to!(
        indoc!(
            r#"
            Str.walkUtf8WithIndex "abcd" [] (\list, byte, index -> List.append list (Pair index byte))
            "#
        ),
        RocList::from_slice(&[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]),
        RocList<(u32, char)>
    );
 }
 #[test]
 #[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
 fn str_walk_utf8_with_index() {