Str.splitFirst and Str.splitLast

This commit is contained in:
Folkert 2022-07-03 19:47:06 +02:00
parent be3800d7fa
commit eeb271d07f
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
7 changed files with 135 additions and 3 deletions

View file

@ -156,6 +156,7 @@ comptime {
exportStrFn(str.strFromFloatC, "from_float");
exportStrFn(str.strEqual, "equal");
exportStrFn(str.substringUnsafe, "substring_unsafe");
exportStrFn(str.getUnsafe, "get_unsafe");
exportStrFn(str.strToUtf8C, "to_utf8");
exportStrFn(str.fromUtf8C, "from_utf8");
exportStrFn(str.fromUtf8RangeC, "from_utf8_range");

View file

@ -1194,6 +1194,10 @@ pub fn substringUnsafe(string: RocStr, start: usize, length: usize) callconv(.C)
return RocStr.fromSlice(slice);
}
pub fn getUnsafe(string: RocStr, index: usize) callconv(.C) u8 {
return string.getUnchecked(index);
}
test "substringUnsafe: start" {
const str = RocStr.fromSlice("abcdef");
defer str.deinit();

View file

@ -33,6 +33,8 @@ interface Str
toU8,
toI8,
toScalars,
splitFirst,
splitLast,
]
imports [Bool.{ Bool }, Result.{ Result }]
@ -230,3 +232,94 @@ countBytes : Str -> Nat
## string slice that does not do bounds checking or utf-8 verification
substringUnsafe : Str, Nat, Nat -> Str
## Returns the string before the first occurrence of a delimiter, as well as the
## rest of the string after that occurrence. If the delimiter is not found, returns `Err`.
##
## Str.splitFirst "foo/bar/baz" "/" == Ok { before: "foo", after: "bar/baz" }
splitFirst : Str, Str -> Result { before : Str, after : Str } [NotFound]*
splitFirst = \haystack, needle ->
when firstMatch haystack needle is
Some index ->
remaining = Str.countBytes haystack - Str.countBytes needle - index
before = Str.substringUnsafe haystack 0 index
after = Str.substringUnsafe haystack (index + Str.countBytes needle) remaining
Ok { before, after }
None ->
Err NotFound
firstMatch : Str, Str -> [Some Nat, None]
firstMatch = \haystack, needle ->
haystackLength = Str.countBytes haystack
needleLength = Str.countBytes needle
lastPossible = Num.subSaturated haystackLength needleLength
firstMatchHelp haystack needle 0 lastPossible
firstMatchHelp : Str, Str, Nat, Nat -> [Some Nat, None]
firstMatchHelp = \haystack, needle, index, lastPossible ->
if index < lastPossible then
if matchesAt haystack index needle then
Some index
else
firstMatchHelp haystack needle (index + 1) lastPossible
else
None
## Returns the string before the last occurrence of a delimiter, as well as the
## rest of the string after that occurrence. If the delimiter is not found, returns `Err`.
##
## Str.splitLast "foo/bar/baz" "/" == Ok { before: "foo/bar", after: "baz" }
splitLast : Str, Str -> Result { before : Str, after : Str } [NotFound]*
splitLast = \haystack, needle ->
when lastMatch haystack needle is
Some index ->
remaining = Str.countBytes haystack - Str.countBytes needle - index
before = Str.substringUnsafe haystack 0 index
after = Str.substringUnsafe haystack (index + Str.countBytes needle) remaining
Ok { before, after }
None ->
Err NotFound
lastMatch : Str, Str -> [Some Nat, None]
lastMatch = \haystack, needle ->
haystackLength = Str.countBytes haystack
needleLength = Str.countBytes needle
lastPossibleIndex = Num.subSaturated haystackLength (needleLength + 1)
lastMatchHelp haystack needle lastPossibleIndex
lastMatchHelp : Str, Str, Nat -> [Some Nat, None]
lastMatchHelp = \haystack, needle, index ->
if matchesAt haystack index needle then
Some index
else
when Num.subChecked index 1 is
Ok nextIndex ->
lastMatchHelp haystack needle nextIndex
Err _ ->
None
min = \x, y -> if x < y then x else y
matchesAt : Str, Nat, Str -> Bool
matchesAt = \haystack, haystackIndex, needle ->
haystackLength = Str.countBytes haystack
needleLength = Str.countBytes needle
endIndex = min (haystackIndex + needleLength) haystackLength
matchesAtHelp haystack haystackIndex needle 0 endIndex
matchesAtHelp : Str, Nat, Str, Nat, Nat -> Bool
matchesAtHelp = \haystack, haystackIndex, needle, needleIndex, endIndex ->
if haystackIndex < endIndex then
if Str.getUnsafe haystack haystackIndex == Str.getUnsafe needle needleIndex then
matchesAtHelp haystack (haystackIndex + 1) needle (needleIndex + 1) endIndex
else
False
else
True

View file

@ -1734,7 +1734,7 @@ fn str_count_bytes(symbol: Symbol, var_store: &mut VarStore) -> Def {
/// Str.substringUnsafe : Str, Nat, Nat -> Nat
fn str_substring_unsafe(symbol: Symbol, var_store: &mut VarStore) -> Def {
lowlevel_1(symbol, LowLevel::StrSubstringUnsafe, var_store)
lowlevel_3(symbol, LowLevel::StrSubstringUnsafe, var_store)
}
/// Str.fromUtf8 : List U8 -> Result Str [BadUtf8 { byteIndex : Nat, problem : Utf8Problem } }]*

View file

@ -1193,6 +1193,8 @@ define_builtins! {
35 STR_GET_UNSAFE: "getUnsafe"
36 STR_COUNT_BYTES: "countBytes"
37 STR_SUBSTRING_UNSAFE: "substringUnsafe"
38 STR_SPLIT_FIRST: "splitFirst"
39 STR_SPLIT_LAST: "splitLast"
}
5 LIST: "List" => {
0 LIST_LIST: "List" imported // the List.List type alias

View file

@ -895,7 +895,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
StrGetUnsafe | ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
ListConcat => arena.alloc_slice_copy(&[owned, owned]),
StrConcat => arena.alloc_slice_copy(&[owned, borrowed]),
StrSubstringUnsafe => arena.alloc_slice_copy(&[owned]),
StrSubstringUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
StrTrim => arena.alloc_slice_copy(&[owned]),
StrTrimLeft => arena.alloc_slice_copy(&[owned]),
StrTrimRight => arena.alloc_slice_copy(&[owned]),

View file

@ -1677,7 +1677,7 @@ fn to_scalar_3_byte() {
}
#[test]
#[cfg(any(feature = "gen-llvm"))]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
fn to_scalar_4_byte() {
// from https://design215.com/toolbox/utf8-4byte-characters.php
assert_evals_to!(
@ -1700,3 +1700,35 @@ fn to_scalar_4_byte() {
RocList<u32>
);
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
fn str_split_first() {
// Str.splitFirst "foo/bar/baz" "/" == Ok { before: "foo", after: "bar/baz" }
assert_evals_to!(
indoc!(
r#"
Str.splitFirst "foo/bar/baz" "/"
"#
),
RocResult::ok((RocStr::from("bar/baz"), RocStr::from("foo"))),
RocResult<(RocStr, RocStr), ()>
);
}
#[test]
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
fn str_split_last() {
// Str.splitFirst "foo/bar/baz" "/" == Ok { before: "foo", after: "bar/baz" }
assert_evals_to!(
indoc!(
r#"
Str.splitLast"foo/bar/baz" "/"
"#
),
RocResult::ok((RocStr::from("baz"), RocStr::from("foo/bar"))),
RocResult<(RocStr, RocStr), ()>
);
}