mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-03 03:42:17 +00:00
Add Str.walkUtf8
This commit is contained in:
parent
f7e96ecf82
commit
c3c1b8d083
3 changed files with 56 additions and 0 deletions
|
@ -107,6 +107,7 @@ interface Str
|
|||
replaceLast,
|
||||
splitFirst,
|
||||
splitLast,
|
||||
walkUtf8,
|
||||
walkUtf8WithIndex,
|
||||
reserve,
|
||||
releaseExcessCapacity,
|
||||
|
@ -841,6 +842,33 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
|
|||
else
|
||||
state
|
||||
|
||||
## Walks over the `UTF-8` bytes of the given [Str] and calls a function to update
|
||||
## state for each byte.
|
||||
##
|
||||
## ```
|
||||
## result = walkUtf8 "hello, world!" "" (\state, byte -> state ++ String.fromCodePoint byte)
|
||||
## expect result == Ok "hello, world!"
|
||||
## ```
|
||||
walkUtf8 : Str, state, (state, U8 -> state) -> state
|
||||
walkUtf8 = \str, initial, step ->
|
||||
walkUtf8Help str initial step 0 (Str.countUtf8Bytes str)
|
||||
|
||||
walkUtf8Help : Str, state, (state, U8 -> state), Nat, Nat -> state
|
||||
walkUtf8Help = \str, state, step, index, length ->
|
||||
if index < length then
|
||||
byte = Str.getUnsafe str index
|
||||
newState = step state byte
|
||||
|
||||
walkUtf8Help str newState step (index + 1) length
|
||||
else
|
||||
state
|
||||
|
||||
# Test walkUtf8 with a simple ASCII string
|
||||
expect (walkUtf8 "ABC" [] List.append) == [65, 66, 67]
|
||||
|
||||
# Test walkUtf8 with a multi-byte string
|
||||
expect (walkUtf8 "鹏" [] List.append) == [233, 185, 143]
|
||||
|
||||
## Shrink the memory footprint of a str such that it's capacity and length are equal.
|
||||
## Note: This will also convert seamless slices to regular lists.
|
||||
releaseExcessCapacity : Str -> Str
|
||||
|
|
|
@ -1328,6 +1328,7 @@ define_builtins! {
|
|||
55 STR_GRAPHEMES: "graphemes"
|
||||
56 STR_IS_VALID_SCALAR: "isValidScalar"
|
||||
57 STR_RELEASE_EXCESS_CAPACITY: "releaseExcessCapacity"
|
||||
58 STR_WALK_UTF8: "walkUtf8"
|
||||
}
|
||||
6 LIST: "List" => {
|
||||
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias
|
||||
|
|
|
@ -1822,6 +1822,33 @@ fn str_split_overlapping_substring_2() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
|
||||
fn str_walk_utf8() {
|
||||
#[cfg(not(feature = "gen-llvm-wasm"))]
|
||||
assert_evals_to!(
|
||||
// Reverse the bytes
|
||||
indoc!(
|
||||
r#"
|
||||
Str.walkUtf8 "abcd" [] (\list, byte -> List.prepend list byte)
|
||||
"#
|
||||
),
|
||||
RocList::from_slice(&[b'd', b'c', b'b', b'a']),
|
||||
RocList<u8>
|
||||
);
|
||||
|
||||
#[cfg(feature = "gen-llvm-wasm")]
|
||||
assert_evals_to!(
|
||||
indoc!(
|
||||
r#"
|
||||
Str.walkUtf8WithIndex "abcd" [] (\list, byte, index -> List.append list (Pair index byte))
|
||||
"#
|
||||
),
|
||||
RocList::from_slice(&[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]),
|
||||
RocList<(u32, char)>
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
|
||||
fn str_walk_utf8_with_index() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue