wasm: Implement Str.split

This commit is contained in:
Brian Carroll 2022-07-04 09:29:36 +01:00
parent d2dbb0001a
commit 02ec30425c
No known key found for this signature in database
GPG key ID: 9CF4E3BF9C4722C7
7 changed files with 267 additions and 259 deletions

View file

@ -143,6 +143,7 @@ const str = @import("str.zig");
comptime {
exportStrFn(str.init, "init");
exportStrFn(str.strToScalarsC, "to_scalars");
exportStrFn(str.strSplit, "str_split");
exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
exportStrFn(str.countSegments, "count_segments");
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");

View file

@ -744,6 +744,21 @@ fn strFromFloatHelp(comptime T: type, float: T) RocStr {
}
// Str.split
// For dev backends
pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
const segment_count = countSegments(string, delimiter);
const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr));
if (list.bytes) |bytes| {
const strings = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), bytes));
strSplitInPlace(strings, string, delimiter);
}
return list;
}
// For LLVM backend
pub fn strSplitInPlaceC(opt_array: ?[*]RocStr, string: RocStr, delimiter: RocStr) callconv(.C) void {
if (opt_array) |array| {
return @call(.{ .modifier = always_inline }, strSplitInPlace, .{ array, string, delimiter });

View file

@ -310,6 +310,7 @@ pub const STR_INIT: &str = "roc_builtins.str.init";
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
pub const STR_STR_SPLIT: &str = "roc_builtins.str.str_split";
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";

View file

@ -184,7 +184,8 @@ pub fn build_app_module<'a>(
}
let (module, called_preload_fns) = backend.finalize();
let main_function_index = maybe_main_fn_index.unwrap();
let main_function_index =
maybe_main_fn_index.expect("The app must expose at least one value to the host");
(module, called_preload_fns, main_function_index)
}

View file

@ -235,15 +235,7 @@ impl<'a> LowLevelCall<'a> {
self.load_args_and_call_zig(backend, bitcode::STR_STARTS_WITH_SCALAR)
}
StrEndsWith => self.load_args_and_call_zig(backend, bitcode::STR_ENDS_WITH),
StrSplit => {
// LLVM implementation (build_str.rs) does the following
// 1. Call bitcode::STR_COUNT_SEGMENTS
// 2. Allocate a `List Str`
// 3. Call bitcode::STR_STR_SPLIT_IN_PLACE
// 4. Write the elements and length of the List
// To do this here, we need full access to WasmBackend, or we could make a Zig wrapper
todo!("{:?}", self.lowlevel);
}
StrSplit => self.load_args_and_call_zig(backend, bitcode::STR_STR_SPLIT),
StrCountGraphemes => {
self.load_args_and_call_zig(backend, bitcode::STR_COUNT_GRAPEHEME_CLUSTERS)
}

View file

@ -25,7 +25,7 @@ fn str_split_empty_delimiter() {
"#
),
1,
i64
usize
);
assert_evals_to!(
@ -41,7 +41,7 @@ fn str_split_empty_delimiter() {
"#
),
3,
i64
usize
);
}
@ -55,7 +55,7 @@ fn str_split_bigger_delimiter_small_str() {
"#
),
1,
i64
usize
);
assert_evals_to!(
@ -71,7 +71,7 @@ fn str_split_bigger_delimiter_small_str() {
"#
),
3,
i64
usize
);
}
@ -210,7 +210,7 @@ fn str_split_small_str_big_delimiter() {
"#
),
3,
i64
usize
);
assert_evals_to!(

View file

@ -14,257 +14,255 @@ use crate::helpers::wasm::assert_evals_to;
use indoc::indoc;
use roc_std::{RocList, RocStr};
// #[test]
// fn str_split_empty_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// List.len (Str.split "hello" "")
// "#
// ),
// 1,
// i64
// );
#[test]
fn str_split_empty_delimiter() {
assert_evals_to!(
indoc!(
r#"
List.len (Str.split "hello" "")
"#
),
1,
usize
);
}
// assert_evals_to!(
// indoc!(
// r#"
// when List.first (Str.split "JJJ" "") is
// Ok str ->
// Str.countGraphemes str
// This test produces an app that exposes nothing to the host!
#[test]
#[ignore]
fn str_split_empty_delimiter_broken() {
assert_evals_to!(
indoc!(
r#"
when List.first (Str.split "JJJ" "") is
Ok str ->
Str.countGraphemes str
// _ ->
// -1
_ ->
-1
// "#
// ),
// 3,
// i64
// );
// }
"#
),
3,
usize
);
}
// #[test]
// fn str_split_bigger_delimiter_small_str() {
// assert_evals_to!(
// indoc!(
// r#"
// List.len (Str.split "hello" "JJJJ there")
// "#
// ),
// 1,
// i64
// );
#[test]
fn str_split_bigger_delimiter_small_str() {
assert_evals_to!(
indoc!(
r#"
List.len (Str.split "hello" "JJJJ there")
"#
),
1,
usize
);
}
// assert_evals_to!(
// indoc!(
// r#"
// when List.first (Str.split "JJJ" "JJJJ there") is
// Ok str ->
// Str.countGraphemes str
// This test produces an app that exposes nothing to the host!
#[test]
#[ignore]
fn str_split_bigger_delimiter_small_str_broken() {
assert_evals_to!(
indoc!(
r#"
when List.first (Str.split "JJJ" "JJJJ there") is
Ok str ->
Str.countGraphemes str
// _ ->
// -1
_ ->
-1
// "#
// ),
// 3,
// i64
// );
// }
"#
),
3,
usize
);
}
// #[test]
// fn str_split_str_concat_repeated() {
// assert_evals_to!(
// indoc!(
// r#"
// when List.first (Str.split "JJJJJ" "JJJJ there") is
// Ok str ->
// str
// |> Str.concat str
// |> Str.concat str
// |> Str.concat str
// |> Str.concat str
#[test]
fn str_split_str_concat_repeated() {
assert_evals_to!(
indoc!(
r#"
when List.first (Str.split "JJJJJ" "JJJJ there") is
Ok str ->
str
|> Str.concat str
|> Str.concat str
|> Str.concat str
|> Str.concat str
// _ ->
// "Not Str!"
_ ->
"Not Str!"
// "#
// ),
// RocStr::from_slice_unchecked(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"),
// RocStr
// );
// }
"#
),
RocStr::from("JJJJJJJJJJJJJJJJJJJJJJJJJ"),
RocStr
);
}
// #[test]
// fn str_split_small_str_bigger_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// when
// List.first
// (Str.split "JJJ" "0123456789abcdefghi")
// is
// Ok str -> str
// _ -> ""
// "#
// ),
// RocStr::from_slice_unchecked(b"JJJ"),
// RocStr
// );
// }
#[test]
fn str_split_small_str_bigger_delimiter() {
assert_evals_to!(
indoc!(
r#"
when
List.first
(Str.split "JJJ" "0123456789abcdefghi")
is
Ok str -> str
_ -> ""
"#
),
RocStr::from("JJJ"),
RocStr
);
}
// #[test]
// fn str_split_big_str_small_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "01234567789abcdefghi?01234567789abcdefghi" "?"
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"01234567789abcdefghi"),
// RocStr::from_slice_unchecked(b"01234567789abcdefghi")
// ]),
// RocList<RocStr>
// );
#[test]
fn str_split_big_str_small_delimiter() {
assert_evals_to!(
indoc!(
r#"
Str.split "01234567789abcdefghi?01234567789abcdefghi" "?"
"#
),
RocList::from_slice(&[
RocStr::from("01234567789abcdefghi"),
RocStr::from("01234567789abcdefghi")
]),
RocList<RocStr>
);
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch"
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"01234567789abcdefghi "),
// RocStr::from_slice_unchecked(b" 01234567789abcdefghi")
// ]),
// RocList<RocStr>
// );
// }
assert_evals_to!(
indoc!(
r#"
Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch"
"#
),
RocList::from_slice(&[
RocStr::from("01234567789abcdefghi "),
RocStr::from(" 01234567789abcdefghi")
]),
RocList<RocStr>
);
}
// #[test]
// fn str_split_small_str_small_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "J!J!J" "!"
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"J"),
// RocStr::from_slice_unchecked(b"J"),
// RocStr::from_slice_unchecked(b"J")
// ]),
// RocList<RocStr>
// );
// }
#[test]
fn str_split_small_str_small_delimiter() {
assert_evals_to!(
indoc!(
r#"
Str.split "J!J!J" "!"
"#
),
RocList::from_slice(&[RocStr::from("J"), RocStr::from("J"), RocStr::from("J")]),
RocList<RocStr>
);
}
// #[test]
// fn str_split_bigger_delimiter_big_strs() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split
// "string to split is shorter"
// "than the delimiter which happens to be very very long"
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"string to split is shorter")]),
// RocList<RocStr>
// );
// }
#[test]
fn str_split_bigger_delimiter_big_strs() {
assert_evals_to!(
indoc!(
r#"
Str.split
"string to split is shorter"
"than the delimiter which happens to be very very long"
"#
),
RocList::from_slice(&[RocStr::from("string to split is shorter")]),
RocList<RocStr>
);
}
// #[test]
// fn str_split_empty_strs() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "" ""
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"")]),
// RocList<RocStr>
// );
// }
#[test]
fn str_split_empty_strs() {
assert_evals_to!(
indoc!(
r#"
Str.split "" ""
"#
),
RocList::from_slice(&[RocStr::from("")]),
RocList<RocStr>
);
}
// #[test]
// fn str_split_minimal_example() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split "a," ","
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"a"), RocStr::from_slice_unchecked(b"")]),
// RocList<RocStr>
// )
// }
#[test]
fn str_split_minimal_example() {
assert_evals_to!(
indoc!(
r#"
Str.split "a," ","
"#
),
RocList::from_slice(&[RocStr::from("a"), RocStr::from("")]),
RocList<RocStr>
)
}
// #[test]
// fn str_split_small_str_big_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split
// "1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
// "---- ---- ---- ---- ----"
// |> List.len
// "#
// ),
// 3,
// i64
// );
#[test]
fn str_split_small_str_big_delimiter() {
assert_evals_to!(
indoc!(
r#"
Str.split
"1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
"---- ---- ---- ---- ----"
|> List.len
"#
),
3,
usize
);
// assert_evals_to!(
// indoc!(
// r#"
// Str.split
// "1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
// "---- ---- ---- ---- ----"
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"1"),
// RocStr::from_slice_unchecked(b"2"),
// RocStr::from_slice_unchecked(b"")
// ]),
// RocList<RocStr>
// );
// }
assert_evals_to!(
indoc!(
r#"
Str.split
"1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
"---- ---- ---- ---- ----"
"#
),
RocList::from_slice(&[RocStr::from("1"), RocStr::from("2"), RocStr::from("")]),
RocList<RocStr>
);
}
// #[test]
// fn str_split_small_str_20_char_delimiter() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.split
// "3|-- -- -- -- -- -- |4|-- -- -- -- -- -- |"
// "|-- -- -- -- -- -- |"
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"3"),
// RocStr::from_slice_unchecked(b"4"),
// RocStr::from_slice_unchecked(b"")
// ]),
// RocList<RocStr>
// );
// }
#[test]
fn str_split_small_str_20_char_delimiter() {
assert_evals_to!(
indoc!(
r#"
Str.split
"3|-- -- -- -- -- -- |4|-- -- -- -- -- -- |"
"|-- -- -- -- -- -- |"
"#
),
RocList::from_slice(&[RocStr::from("3"), RocStr::from("4"), RocStr::from("")]),
RocList<RocStr>
);
}
// #[test]
// fn str_concat_big_to_big() {
// assert_evals_to!(
// indoc!(
// r#"
// Str.concat
// "First string that is fairly long. Longer strings make for different errors. "
// "Second string that is also fairly long. Two long strings test things that might not appear with short strings."
// "#
// ),
// RocStr::from_slice_unchecked(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."),
// RocStr
// );
// }
#[test]
fn str_concat_big_to_big() {
assert_evals_to!(
indoc!(
r#"
Str.concat
"First string that is fairly long. Longer strings make for different errors. "
"Second string that is also fairly long. Two long strings test things that might not appear with short strings."
"#
),
RocStr::from("First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."),
RocStr
);
}
#[test]
#[cfg(any(feature = "gen-wasm"))]
@ -498,7 +496,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -513,7 +511,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("abc~".as_bytes()),
// roc_std::RocStr::from("abc~"),
// roc_std::RocStr
// );
// }
@ -528,7 +526,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("∆".as_bytes()),
// roc_std::RocStr::from("∆"),
// roc_std::RocStr
// );
// }
@ -543,7 +541,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("∆œ¬".as_bytes()),
// roc_std::RocStr::from("∆œ¬"),
// roc_std::RocStr
// );
// }
@ -558,7 +556,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("💖".as_bytes()),
// roc_std::RocStr::from("💖"),
// roc_std::RocStr
// );
// }
@ -573,7 +571,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("💖🤠🚀".as_bytes()),
// roc_std::RocStr::from("💖🤠🚀"),
// roc_std::RocStr
// );
// }
@ -588,7 +586,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("💖b∆".as_bytes()),
// roc_std::RocStr::from("💖b∆"),
// roc_std::RocStr
// );
// }
@ -607,7 +605,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -626,7 +624,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -645,7 +643,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -664,7 +662,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -683,7 +681,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -702,7 +700,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr::from("a"),
// roc_std::RocStr
// );
// }
@ -744,7 +742,7 @@ fn str_equality() {
// printExpr expr
// "#
// ),
// RocStr::from_slice_unchecked(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr::from("Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr
// );
// }