Add Str.walkUtf8

This commit is contained in:
Richard Feldman 2023-03-29 15:52:08 -04:00
parent f7e96ecf82
commit c3c1b8d083
No known key found for this signature in database
GPG key ID: F1F21AA5B1D9E43B
3 changed files with 56 additions and 0 deletions

View file

@ -107,6 +107,7 @@ interface Str
replaceLast,
splitFirst,
splitLast,
walkUtf8,
walkUtf8WithIndex,
reserve,
releaseExcessCapacity,
@ -841,6 +842,33 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
else
state
## Walks over the `UTF-8` bytes of the given [Str] and calls a function to update
## state for each byte.
##
## ```
## result = walkUtf8 "hello, world!" "" (\state, byte -> state ++ String.fromCodePoint byte)
## expect result == Ok "hello, world!"
## ```
walkUtf8 : Str, state, (state, U8 -> state) -> state
walkUtf8 = \str, initial, step ->
walkUtf8Help str initial step 0 (Str.countUtf8Bytes str)
walkUtf8Help : Str, state, (state, U8 -> state), Nat, Nat -> state
walkUtf8Help = \str, state, step, index, length ->
if index < length then
byte = Str.getUnsafe str index
newState = step state byte
walkUtf8Help str newState step (index + 1) length
else
state
# Test walkUtf8 with a simple ASCII string
expect (walkUtf8 "ABC" [] List.append) == [65, 66, 67]
# Test walkUtf8 with a multi-byte string
expect (walkUtf8 "鹏" [] List.append) == [233, 185, 143]
## Shrink the memory footprint of a str such that it's capacity and length are equal.
## Note: This will also convert seamless slices to regular lists.
releaseExcessCapacity : Str -> Str

View file

@ -1328,6 +1328,7 @@ define_builtins! {
55 STR_GRAPHEMES: "graphemes"
56 STR_IS_VALID_SCALAR: "isValidScalar"
57 STR_RELEASE_EXCESS_CAPACITY: "releaseExcessCapacity"
58 STR_WALK_UTF8: "walkUtf8"
}
6 LIST: "List" => {
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias

View file

@ -1822,6 +1822,33 @@ fn str_split_overlapping_substring_2() {
);
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_walk_utf8() {
#[cfg(not(feature = "gen-llvm-wasm"))]
assert_evals_to!(
// Reverse the bytes
indoc!(
r#"
Str.walkUtf8 "abcd" [] (\list, byte -> List.prepend list byte)
"#
),
RocList::from_slice(&[b'd', b'c', b'b', b'a']),
RocList<u8>
);
#[cfg(feature = "gen-llvm-wasm")]
assert_evals_to!(
indoc!(
r#"
Str.walkUtf8WithIndex "abcd" [] (\list, byte, index -> List.append list (Pair index byte))
"#
),
RocList::from_slice(&[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]),
RocList<(u32, char)>
);
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
fn str_walk_utf8_with_index() {