wasm: Implement Str.split

This commit is contained in:
Brian Carroll 2022-07-04 09:29:36 +01:00
parent d2dbb0001a
commit 02ec30425c
No known key found for this signature in database
GPG key ID: 9CF4E3BF9C4722C7
7 changed files with 267 additions and 259 deletions

View file

@ -143,6 +143,7 @@ const str = @import("str.zig");
comptime { comptime {
exportStrFn(str.init, "init"); exportStrFn(str.init, "init");
exportStrFn(str.strToScalarsC, "to_scalars"); exportStrFn(str.strToScalarsC, "to_scalars");
exportStrFn(str.strSplit, "str_split");
exportStrFn(str.strSplitInPlaceC, "str_split_in_place"); exportStrFn(str.strSplitInPlaceC, "str_split_in_place");
exportStrFn(str.countSegments, "count_segments"); exportStrFn(str.countSegments, "count_segments");
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters"); exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");

View file

@ -744,6 +744,21 @@ fn strFromFloatHelp(comptime T: type, float: T) RocStr {
} }
// Str.split // Str.split
// For dev backends
pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
const segment_count = countSegments(string, delimiter);
const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr));
if (list.bytes) |bytes| {
const strings = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), bytes));
strSplitInPlace(strings, string, delimiter);
}
return list;
}
// For LLVM backend
pub fn strSplitInPlaceC(opt_array: ?[*]RocStr, string: RocStr, delimiter: RocStr) callconv(.C) void { pub fn strSplitInPlaceC(opt_array: ?[*]RocStr, string: RocStr, delimiter: RocStr) callconv(.C) void {
if (opt_array) |array| { if (opt_array) |array| {
return @call(.{ .modifier = always_inline }, strSplitInPlace, .{ array, string, delimiter }); return @call(.{ .modifier = always_inline }, strSplitInPlace, .{ array, string, delimiter });

View file

@ -310,6 +310,7 @@ pub const STR_INIT: &str = "roc_builtins.str.init";
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments"; pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
pub const STR_CONCAT: &str = "roc_builtins.str.concat"; pub const STR_CONCAT: &str = "roc_builtins.str.concat";
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith"; pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
pub const STR_STR_SPLIT: &str = "roc_builtins.str.str_split";
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place"; pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars"; pub const STR_TO_SCALARS: &str = "roc_builtins.str.to_scalars";
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters"; pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";

View file

@ -184,7 +184,8 @@ pub fn build_app_module<'a>(
} }
let (module, called_preload_fns) = backend.finalize(); let (module, called_preload_fns) = backend.finalize();
let main_function_index = maybe_main_fn_index.unwrap(); let main_function_index =
maybe_main_fn_index.expect("The app must expose at least one value to the host");
(module, called_preload_fns, main_function_index) (module, called_preload_fns, main_function_index)
} }

View file

@ -235,15 +235,7 @@ impl<'a> LowLevelCall<'a> {
self.load_args_and_call_zig(backend, bitcode::STR_STARTS_WITH_SCALAR) self.load_args_and_call_zig(backend, bitcode::STR_STARTS_WITH_SCALAR)
} }
StrEndsWith => self.load_args_and_call_zig(backend, bitcode::STR_ENDS_WITH), StrEndsWith => self.load_args_and_call_zig(backend, bitcode::STR_ENDS_WITH),
StrSplit => { StrSplit => self.load_args_and_call_zig(backend, bitcode::STR_STR_SPLIT),
// LLVM implementation (build_str.rs) does the following
// 1. Call bitcode::STR_COUNT_SEGMENTS
// 2. Allocate a `List Str`
// 3. Call bitcode::STR_STR_SPLIT_IN_PLACE
// 4. Write the elements and length of the List
// To do this here, we need full access to WasmBackend, or we could make a Zig wrapper
todo!("{:?}", self.lowlevel);
}
StrCountGraphemes => { StrCountGraphemes => {
self.load_args_and_call_zig(backend, bitcode::STR_COUNT_GRAPEHEME_CLUSTERS) self.load_args_and_call_zig(backend, bitcode::STR_COUNT_GRAPEHEME_CLUSTERS)
} }

View file

@ -25,7 +25,7 @@ fn str_split_empty_delimiter() {
"# "#
), ),
1, 1,
i64 usize
); );
assert_evals_to!( assert_evals_to!(
@ -41,7 +41,7 @@ fn str_split_empty_delimiter() {
"# "#
), ),
3, 3,
i64 usize
); );
} }
@ -55,7 +55,7 @@ fn str_split_bigger_delimiter_small_str() {
"# "#
), ),
1, 1,
i64 usize
); );
assert_evals_to!( assert_evals_to!(
@ -71,7 +71,7 @@ fn str_split_bigger_delimiter_small_str() {
"# "#
), ),
3, 3,
i64 usize
); );
} }
@ -210,7 +210,7 @@ fn str_split_small_str_big_delimiter() {
"# "#
), ),
3, 3,
i64 usize
); );
assert_evals_to!( assert_evals_to!(

View file

@ -14,257 +14,255 @@ use crate::helpers::wasm::assert_evals_to;
use indoc::indoc; use indoc::indoc;
use roc_std::{RocList, RocStr}; use roc_std::{RocList, RocStr};
// #[test] #[test]
// fn str_split_empty_delimiter() { fn str_split_empty_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// List.len (Str.split "hello" "") List.len (Str.split "hello" "")
// "# "#
// ), ),
// 1, 1,
// i64 usize
// ); );
}
// assert_evals_to!( // This test produces an app that exposes nothing to the host!
// indoc!( #[test]
// r#" #[ignore]
// when List.first (Str.split "JJJ" "") is fn str_split_empty_delimiter_broken() {
// Ok str -> assert_evals_to!(
// Str.countGraphemes str indoc!(
r#"
when List.first (Str.split "JJJ" "") is
Ok str ->
Str.countGraphemes str
// _ -> _ ->
// -1 -1
// "# "#
// ), ),
// 3, 3,
// i64 usize
// ); );
// } }
// #[test] #[test]
// fn str_split_bigger_delimiter_small_str() { fn str_split_bigger_delimiter_small_str() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// List.len (Str.split "hello" "JJJJ there") List.len (Str.split "hello" "JJJJ there")
// "# "#
// ), ),
// 1, 1,
// i64 usize
// ); );
}
// assert_evals_to!( // This test produces an app that exposes nothing to the host!
// indoc!( #[test]
// r#" #[ignore]
// when List.first (Str.split "JJJ" "JJJJ there") is fn str_split_bigger_delimiter_small_str_broken() {
// Ok str -> assert_evals_to!(
// Str.countGraphemes str indoc!(
r#"
when List.first (Str.split "JJJ" "JJJJ there") is
Ok str ->
Str.countGraphemes str
// _ -> _ ->
// -1 -1
// "# "#
// ), ),
// 3, 3,
// i64 usize
// ); );
// } }
// #[test] #[test]
// fn str_split_str_concat_repeated() { fn str_split_str_concat_repeated() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// when List.first (Str.split "JJJJJ" "JJJJ there") is when List.first (Str.split "JJJJJ" "JJJJ there") is
// Ok str -> Ok str ->
// str str
// |> Str.concat str |> Str.concat str
// |> Str.concat str |> Str.concat str
// |> Str.concat str |> Str.concat str
// |> Str.concat str |> Str.concat str
// _ -> _ ->
// "Not Str!" "Not Str!"
// "# "#
// ), ),
// RocStr::from_slice_unchecked(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), RocStr::from("JJJJJJJJJJJJJJJJJJJJJJJJJ"),
// RocStr RocStr
// ); );
// } }
// #[test] #[test]
// fn str_split_small_str_bigger_delimiter() { fn str_split_small_str_bigger_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// when when
// List.first List.first
// (Str.split "JJJ" "0123456789abcdefghi") (Str.split "JJJ" "0123456789abcdefghi")
// is is
// Ok str -> str Ok str -> str
// _ -> "" _ -> ""
// "# "#
// ), ),
// RocStr::from_slice_unchecked(b"JJJ"), RocStr::from("JJJ"),
// RocStr RocStr
// ); );
// } }
// #[test] #[test]
// fn str_split_big_str_small_delimiter() { fn str_split_big_str_small_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split "01234567789abcdefghi?01234567789abcdefghi" "?" Str.split "01234567789abcdefghi?01234567789abcdefghi" "?"
// "# "#
// ), ),
// RocList::from_slice(&[ RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"01234567789abcdefghi"), RocStr::from("01234567789abcdefghi"),
// RocStr::from_slice_unchecked(b"01234567789abcdefghi") RocStr::from("01234567789abcdefghi")
// ]), ]),
// RocList<RocStr> RocList<RocStr>
// ); );
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch" Str.split "01234567789abcdefghi 3ch 01234567789abcdefghi" "3ch"
// "# "#
// ), ),
// RocList::from_slice(&[ RocList::from_slice(&[
// RocStr::from_slice_unchecked(b"01234567789abcdefghi "), RocStr::from("01234567789abcdefghi "),
// RocStr::from_slice_unchecked(b" 01234567789abcdefghi") RocStr::from(" 01234567789abcdefghi")
// ]), ]),
// RocList<RocStr> RocList<RocStr>
// ); );
// } }
// #[test] #[test]
// fn str_split_small_str_small_delimiter() { fn str_split_small_str_small_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split "J!J!J" "!" Str.split "J!J!J" "!"
// "# "#
// ), ),
// RocList::from_slice(&[ RocList::from_slice(&[RocStr::from("J"), RocStr::from("J"), RocStr::from("J")]),
// RocStr::from_slice_unchecked(b"J"), RocList<RocStr>
// RocStr::from_slice_unchecked(b"J"), );
// RocStr::from_slice_unchecked(b"J") }
// ]),
// RocList<RocStr>
// );
// }
// #[test] #[test]
// fn str_split_bigger_delimiter_big_strs() { fn str_split_bigger_delimiter_big_strs() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split Str.split
// "string to split is shorter" "string to split is shorter"
// "than the delimiter which happens to be very very long" "than the delimiter which happens to be very very long"
// "# "#
// ), ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"string to split is shorter")]), RocList::from_slice(&[RocStr::from("string to split is shorter")]),
// RocList<RocStr> RocList<RocStr>
// ); );
// } }
// #[test] #[test]
// fn str_split_empty_strs() { fn str_split_empty_strs() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split "" "" Str.split "" ""
// "# "#
// ), ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"")]), RocList::from_slice(&[RocStr::from("")]),
// RocList<RocStr> RocList<RocStr>
// ); );
// } }
// #[test] #[test]
// fn str_split_minimal_example() { fn str_split_minimal_example() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split "a," "," Str.split "a," ","
// "# "#
// ), ),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"a"), RocStr::from_slice_unchecked(b"")]), RocList::from_slice(&[RocStr::from("a"), RocStr::from("")]),
// RocList<RocStr> RocList<RocStr>
// ) )
// } }
// #[test] #[test]
// fn str_split_small_str_big_delimiter() { fn str_split_small_str_big_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split Str.split
// "1---- ---- ---- ---- ----2---- ---- ---- ---- ----" "1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
// "---- ---- ---- ---- ----" "---- ---- ---- ---- ----"
// |> List.len |> List.len
// "# "#
// ), ),
// 3, 3,
// i64 usize
// ); );
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split Str.split
// "1---- ---- ---- ---- ----2---- ---- ---- ---- ----" "1---- ---- ---- ---- ----2---- ---- ---- ---- ----"
// "---- ---- ---- ---- ----" "---- ---- ---- ---- ----"
// "# "#
// ), ),
// RocList::from_slice(&[ RocList::from_slice(&[RocStr::from("1"), RocStr::from("2"), RocStr::from("")]),
// RocStr::from_slice_unchecked(b"1"), RocList<RocStr>
// RocStr::from_slice_unchecked(b"2"), );
// RocStr::from_slice_unchecked(b"") }
// ]),
// RocList<RocStr>
// );
// }
// #[test] #[test]
// fn str_split_small_str_20_char_delimiter() { fn str_split_small_str_20_char_delimiter() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.split Str.split
// "3|-- -- -- -- -- -- |4|-- -- -- -- -- -- |" "3|-- -- -- -- -- -- |4|-- -- -- -- -- -- |"
// "|-- -- -- -- -- -- |" "|-- -- -- -- -- -- |"
// "# "#
// ), ),
// RocList::from_slice(&[ RocList::from_slice(&[RocStr::from("3"), RocStr::from("4"), RocStr::from("")]),
// RocStr::from_slice_unchecked(b"3"), RocList<RocStr>
// RocStr::from_slice_unchecked(b"4"), );
// RocStr::from_slice_unchecked(b"") }
// ]),
// RocList<RocStr>
// );
// }
// #[test] #[test]
// fn str_concat_big_to_big() { fn str_concat_big_to_big() {
// assert_evals_to!( assert_evals_to!(
// indoc!( indoc!(
// r#" r#"
// Str.concat Str.concat
// "First string that is fairly long. Longer strings make for different errors. " "First string that is fairly long. Longer strings make for different errors. "
// "Second string that is also fairly long. Two long strings test things that might not appear with short strings." "Second string that is also fairly long. Two long strings test things that might not appear with short strings."
// "# "#
// ), ),
// RocStr::from_slice_unchecked(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), RocStr::from("First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."),
// RocStr RocStr
// ); );
// } }
#[test] #[test]
#[cfg(any(feature = "gen-wasm"))] #[cfg(any(feature = "gen-wasm"))]
@ -498,7 +496,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -513,7 +511,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("abc~".as_bytes()), // roc_std::RocStr::from("abc~"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -528,7 +526,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("∆".as_bytes()), // roc_std::RocStr::from("∆"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -543,7 +541,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("∆œ¬".as_bytes()), // roc_std::RocStr::from("∆œ¬"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -558,7 +556,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("💖".as_bytes()), // roc_std::RocStr::from("💖"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -573,7 +571,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("💖🤠🚀".as_bytes()), // roc_std::RocStr::from("💖🤠🚀"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -588,7 +586,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> "" // Err _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("💖b∆".as_bytes()), // roc_std::RocStr::from("💖b∆"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -607,7 +605,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -626,7 +624,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -645,7 +643,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -664,7 +662,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -683,7 +681,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -702,7 +700,7 @@ fn str_starts_with_false_small_str() {
// _ -> "" // _ -> ""
// "# // "#
// ), // ),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr::from("a"),
// roc_std::RocStr // roc_std::RocStr
// ); // );
// } // }
@ -744,7 +742,7 @@ fn str_equality() {
// printExpr expr // printExpr expr
// "# // "#
// ), // ),
// RocStr::from_slice_unchecked(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), // RocStr::from("Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr // RocStr
// ); // );
// } // }