mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-27 05:49:08 +00:00
Add Str.walkUtf8
This commit is contained in:
parent
f7e96ecf82
commit
c3c1b8d083
3 changed files with 56 additions and 0 deletions
|
@ -107,6 +107,7 @@ interface Str
|
||||||
replaceLast,
|
replaceLast,
|
||||||
splitFirst,
|
splitFirst,
|
||||||
splitLast,
|
splitLast,
|
||||||
|
walkUtf8,
|
||||||
walkUtf8WithIndex,
|
walkUtf8WithIndex,
|
||||||
reserve,
|
reserve,
|
||||||
releaseExcessCapacity,
|
releaseExcessCapacity,
|
||||||
|
@ -841,6 +842,33 @@ walkUtf8WithIndexHelp = \string, state, step, index, length ->
|
||||||
else
|
else
|
||||||
state
|
state
|
||||||
|
|
||||||
|
## Walks over the `UTF-8` bytes of the given [Str] and calls a function to update
|
||||||
|
## state for each byte.
|
||||||
|
##
|
||||||
|
## ```
|
||||||
|
## result = walkUtf8 "hello, world!" "" (\state, byte -> state ++ String.fromCodePoint byte)
|
||||||
|
## expect result == Ok "hello, world!"
|
||||||
|
## ```
|
||||||
|
walkUtf8 : Str, state, (state, U8 -> state) -> state
|
||||||
|
walkUtf8 = \str, initial, step ->
|
||||||
|
walkUtf8Help str initial step 0 (Str.countUtf8Bytes str)
|
||||||
|
|
||||||
|
walkUtf8Help : Str, state, (state, U8 -> state), Nat, Nat -> state
|
||||||
|
walkUtf8Help = \str, state, step, index, length ->
|
||||||
|
if index < length then
|
||||||
|
byte = Str.getUnsafe str index
|
||||||
|
newState = step state byte
|
||||||
|
|
||||||
|
walkUtf8Help str newState step (index + 1) length
|
||||||
|
else
|
||||||
|
state
|
||||||
|
|
||||||
|
# Test walkUtf8 with a simple ASCII string
|
||||||
|
expect (walkUtf8 "ABC" [] List.append) == [65, 66, 67]
|
||||||
|
|
||||||
|
# Test walkUtf8 with a multi-byte string
|
||||||
|
expect (walkUtf8 "鹏" [] List.append) == [233, 185, 143]
|
||||||
|
|
||||||
## Shrink the memory footprint of a str such that it's capacity and length are equal.
|
## Shrink the memory footprint of a str such that it's capacity and length are equal.
|
||||||
## Note: This will also convert seamless slices to regular lists.
|
## Note: This will also convert seamless slices to regular lists.
|
||||||
releaseExcessCapacity : Str -> Str
|
releaseExcessCapacity : Str -> Str
|
||||||
|
|
|
@ -1328,6 +1328,7 @@ define_builtins! {
|
||||||
55 STR_GRAPHEMES: "graphemes"
|
55 STR_GRAPHEMES: "graphemes"
|
||||||
56 STR_IS_VALID_SCALAR: "isValidScalar"
|
56 STR_IS_VALID_SCALAR: "isValidScalar"
|
||||||
57 STR_RELEASE_EXCESS_CAPACITY: "releaseExcessCapacity"
|
57 STR_RELEASE_EXCESS_CAPACITY: "releaseExcessCapacity"
|
||||||
|
58 STR_WALK_UTF8: "walkUtf8"
|
||||||
}
|
}
|
||||||
6 LIST: "List" => {
|
6 LIST: "List" => {
|
||||||
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias
|
0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias
|
||||||
|
|
|
@ -1822,6 +1822,33 @@ fn str_split_overlapping_substring_2() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
|
||||||
|
fn str_walk_utf8() {
|
||||||
|
#[cfg(not(feature = "gen-llvm-wasm"))]
|
||||||
|
assert_evals_to!(
|
||||||
|
// Reverse the bytes
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.walkUtf8 "abcd" [] (\list, byte -> List.prepend list byte)
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[b'd', b'c', b'b', b'a']),
|
||||||
|
RocList<u8>
|
||||||
|
);
|
||||||
|
|
||||||
|
#[cfg(feature = "gen-llvm-wasm")]
|
||||||
|
assert_evals_to!(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
Str.walkUtf8WithIndex "abcd" [] (\list, byte, index -> List.append list (Pair index byte))
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
RocList::from_slice(&[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]),
|
||||||
|
RocList<(u32, char)>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
|
#[cfg(any(feature = "gen-llvm", feature = "gen-dev"))]
|
||||||
fn str_walk_utf8_with_index() {
|
fn str_walk_utf8_with_index() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue