mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 12:18:19 +00:00
rename Str.split
This commit is contained in:
parent
d0735f2661
commit
d99c347998
66 changed files with 1560 additions and 1560 deletions
|
@ -10,7 +10,7 @@ Next, look towards the bottom of the `compiler/module/src/symbol.rs` file. Insid
|
|||
|
||||
For each of the builtin modules, there is a file in `compiler/test_gen/src/` like `gen_num.rs`, `gen_str.rs` etc. Add new tests for the module you are changing to the appropriate file here. You can look at the existing test cases for examples and inspiration.
|
||||
|
||||
You can run your new tests locally using `cargo test-gen-llvm`. You can add a filter like `cargo test-gen-llvm gen_str` (to only run tests defined in `gen_str.rs`) or `cargo test-gen-llvm gen_str::str_split` (to only run tests defined in `gen_str` whose names start with `str_split`). More details can be found in the README in the `compiler/test_gen` directory.
|
||||
You can run your new tests locally using `cargo test-gen-llvm`. You can add a filter like `cargo test-gen-llvm gen_str` (to only run tests defined in `gen_str.rs`) or `cargo test-gen-llvm gen_str::str_split_on` (to only run tests defined in `gen_str` whose names start with `str_split`). More details can be found in the README in the `compiler/test_gen` directory.
|
||||
|
||||
## A builtin implemented directly as LLVM
|
||||
|
||||
|
@ -22,8 +22,8 @@ Some of these have `#` inside their name (`first#list`, `#lt` ..). This is a tri
|
|||
|
||||
But we can use these values and some of these are necessary for implementing builtins. For example, `List.get` returns tags, and it is not easy for us to create tags when composing LLVM. What is easier however, is:
|
||||
|
||||
- ..writing `List.#getUnsafe` that has the dangerous signature of `List elem, U64 -> elem` in LLVM
|
||||
- ..writing `List elem, U64 -> Result elem [OutOfBounds]*` in a type safe way that uses `getUnsafe` internally, only after it checks if the `elem` at `U64` index exists.
|
||||
- ..writing `List.#getUnsafe` that has the dangerous signature of `List elem, U64 -> elem` in LLVM
|
||||
- ..writing `List elem, U64 -> Result elem [OutOfBounds]*` in a type safe way that uses `getUnsafe` internally, only after it checks if the `elem` at `U64` index exists.
|
||||
|
||||
### can/src/builtins.rs
|
||||
|
||||
|
@ -123,5 +123,5 @@ But replace `Num.atan`, the return value, and the return type with your new buil
|
|||
|
||||
When implementing a new builtin, it is often easy to copy and paste the implementation for an existing builtin. This can take you quite far since many builtins are very similar, but it also risks forgetting to change one small part of what you copy and pasted and losing a lot of time later on when you cant figure out why things don't work. So, speaking from experience, even if you are copying an existing builtin, try and implement it manually without copying and pasting. Two recent instances of this (as of September 7th, 2020):
|
||||
|
||||
- `List.keepIf` did not work for a long time because in builtins its `LowLevel` was `ListMap`. This was because I copy and pasted the `List.map` implementation in `builtins.rs
|
||||
- `List.walkBackwards` had mysterious memory bugs for a little while because in `unique.rs` its return type was `list_type(flex(b))` instead of `flex(b)` since it was copy and pasted from `List.keepIf`.
|
||||
- `List.keepIf` did not work for a long time because in builtins its `LowLevel` was `ListMap`. This was because I copy and pasted the `List.map` implementation in `builtins.rs
|
||||
- `List.walkBackwards` had mysterious memory bugs for a little while because in `unique.rs` its return type was `list_type(flex(b))` instead of `flex(b)` since it was copy and pasted from `List.keepIf`.
|
||||
|
|
|
@ -185,7 +185,7 @@ comptime {
|
|||
const str = @import("str.zig");
|
||||
comptime {
|
||||
exportStrFn(str.init, "init");
|
||||
exportStrFn(str.strSplit, "str_split");
|
||||
exportStrFn(str.strSplitOn, "str_split_on");
|
||||
exportStrFn(str.countSegments, "count_segments");
|
||||
exportStrFn(str.countUtf8Bytes, "count_utf8_bytes");
|
||||
exportStrFn(str.isEmpty, "is_empty");
|
||||
|
|
|
@ -598,14 +598,14 @@ fn strFromFloatHelp(comptime T: type, float: T) RocStr {
|
|||
return RocStr.init(&buf, result.len);
|
||||
}
|
||||
|
||||
// Str.split
|
||||
pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
||||
// Str.splitOn
|
||||
pub fn strSplitOn(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
|
||||
const segment_count = countSegments(string, delimiter);
|
||||
const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr), true);
|
||||
|
||||
if (list.bytes) |bytes| {
|
||||
const strings = @as([*]RocStr, @ptrCast(@alignCast(bytes)));
|
||||
strSplitHelp(strings, string, delimiter);
|
||||
strSplitOnHelp(strings, string, delimiter);
|
||||
}
|
||||
|
||||
return list;
|
||||
|
@ -625,7 +625,7 @@ fn initFromBigStr(slice_bytes: [*]u8, len: usize, alloc_ptr: usize) RocStr {
|
|||
};
|
||||
}
|
||||
|
||||
fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||
fn strSplitOnHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||
if (delimiter.len() == 0) {
|
||||
string.incref(1);
|
||||
array[0] = string;
|
||||
|
@ -650,7 +650,7 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
|||
}
|
||||
|
||||
test "strSplitHelp: empty delimiter" {
|
||||
// Str.split "abc" "" == ["abc"]
|
||||
// Str.splitOn "abc" "" == ["abc"]
|
||||
const str_arr = "abc";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
||||
|
@ -660,7 +660,7 @@ test "strSplitHelp: empty delimiter" {
|
|||
var array: [1]RocStr = undefined;
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
var expected = [1]RocStr{
|
||||
str,
|
||||
|
@ -684,7 +684,7 @@ test "strSplitHelp: empty delimiter" {
|
|||
}
|
||||
|
||||
test "strSplitHelp: no delimiter" {
|
||||
// Str.split "abc" "!" == ["abc"]
|
||||
// Str.splitOn "abc" "!" == ["abc"]
|
||||
const str_arr = "abc";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
||||
|
@ -694,7 +694,7 @@ test "strSplitHelp: no delimiter" {
|
|||
var array: [1]RocStr = undefined;
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
var expected = [1]RocStr{
|
||||
str,
|
||||
|
@ -731,7 +731,7 @@ test "strSplitHelp: empty start" {
|
|||
};
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
const one = RocStr.init("a", 1);
|
||||
|
||||
|
@ -772,7 +772,7 @@ test "strSplitHelp: empty end" {
|
|||
};
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
const one = RocStr.init("1", 1);
|
||||
const two = RocStr.init("2", 1);
|
||||
|
@ -811,7 +811,7 @@ test "strSplitHelp: string equals delimiter" {
|
|||
};
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str_delimiter, str_delimiter);
|
||||
strSplitOnHelp(array_ptr, str_delimiter, str_delimiter);
|
||||
|
||||
var expected = [2]RocStr{ RocStr.empty(), RocStr.empty() };
|
||||
|
||||
|
@ -846,7 +846,7 @@ test "strSplitHelp: delimiter on sides" {
|
|||
undefined,
|
||||
};
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
const ghi_arr = "ghi";
|
||||
const ghi = RocStr.init(ghi_arr, ghi_arr.len);
|
||||
|
@ -875,7 +875,7 @@ test "strSplitHelp: delimiter on sides" {
|
|||
}
|
||||
|
||||
test "strSplitHelp: three pieces" {
|
||||
// Str.split "a!b!c" "!" == ["a", "b", "c"]
|
||||
// Str.splitOn "a!b!c" "!" == ["a", "b", "c"]
|
||||
const str_arr = "a!b!c";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
||||
|
@ -886,7 +886,7 @@ test "strSplitHelp: three pieces" {
|
|||
var array: [array_len]RocStr = undefined;
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
const a = RocStr.init("a", 1);
|
||||
const b = RocStr.init("b", 1);
|
||||
|
@ -916,7 +916,7 @@ test "strSplitHelp: three pieces" {
|
|||
}
|
||||
|
||||
test "strSplitHelp: overlapping delimiter 1" {
|
||||
// Str.split "aaa" "aa" == ["", "a"]
|
||||
// Str.splitOn "aaa" "aa" == ["", "a"]
|
||||
const str_arr = "aaa";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
||||
|
@ -926,7 +926,7 @@ test "strSplitHelp: overlapping delimiter 1" {
|
|||
var array: [2]RocStr = undefined;
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
var expected = [2]RocStr{
|
||||
RocStr.empty(),
|
||||
|
@ -941,7 +941,7 @@ test "strSplitHelp: overlapping delimiter 1" {
|
|||
}
|
||||
|
||||
test "strSplitHelp: overlapping delimiter 2" {
|
||||
// Str.split "aaa" "aa" == ["", "a"]
|
||||
// Str.splitOn "aaa" "aa" == ["", "a"]
|
||||
const str_arr = "aaaa";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
||||
|
@ -951,7 +951,7 @@ test "strSplitHelp: overlapping delimiter 2" {
|
|||
var array: [3]RocStr = undefined;
|
||||
const array_ptr: [*]RocStr = &array;
|
||||
|
||||
strSplitHelp(array_ptr, str, delimiter);
|
||||
strSplitOnHelp(array_ptr, str, delimiter);
|
||||
|
||||
var expected = [3]RocStr{
|
||||
RocStr.empty(),
|
||||
|
@ -967,7 +967,7 @@ test "strSplitHelp: overlapping delimiter 2" {
|
|||
try expect(array[2].eq(expected[2]));
|
||||
}
|
||||
|
||||
// This is used for `Str.split : Str, Str -> Array Str
|
||||
// This is used for `Str.splitOn : Str, Str -> List Str
|
||||
// It is used to count how many segments the input `_str`
|
||||
// needs to be broken into, so that we can allocate a array
|
||||
// of that size. It always returns at least 1.
|
||||
|
@ -985,7 +985,7 @@ pub fn countSegments(string: RocStr, delimiter: RocStr) callconv(.C) usize {
|
|||
}
|
||||
|
||||
test "countSegments: long delimiter" {
|
||||
// Str.split "str" "delimiter" == ["str"]
|
||||
// Str.splitOn "str" "delimiter" == ["str"]
|
||||
// 1 segment
|
||||
const str_arr = "str";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
@ -1003,7 +1003,7 @@ test "countSegments: long delimiter" {
|
|||
}
|
||||
|
||||
test "countSegments: delimiter at start" {
|
||||
// Str.split "hello there" "hello" == ["", " there"]
|
||||
// Str.splitOn "hello there" "hello" == ["", " there"]
|
||||
// 2 segments
|
||||
const str_arr = "hello there";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
@ -1022,7 +1022,7 @@ test "countSegments: delimiter at start" {
|
|||
}
|
||||
|
||||
test "countSegments: delimiter interspered" {
|
||||
// Str.split "a!b!c" "!" == ["a", "b", "c"]
|
||||
// Str.splitOn "a!b!c" "!" == ["a", "b", "c"]
|
||||
// 3 segments
|
||||
const str_arr = "a!b!c";
|
||||
const str = RocStr.init(str_arr, str_arr.len);
|
||||
|
@ -1041,7 +1041,7 @@ test "countSegments: delimiter interspered" {
|
|||
}
|
||||
|
||||
test "countSegments: string equals delimiter" {
|
||||
// Str.split "/" "/" == ["", ""]
|
||||
// Str.splitOn "/" "/" == ["", ""]
|
||||
// 2 segments
|
||||
const str_delimiter_arr = "/";
|
||||
const str_delimiter = RocStr.init(str_delimiter_arr, str_delimiter_arr.len);
|
||||
|
@ -1056,14 +1056,14 @@ test "countSegments: string equals delimiter" {
|
|||
}
|
||||
|
||||
test "countSegments: overlapping delimiter 1" {
|
||||
// Str.split "aaa" "aa" == ["", "a"]
|
||||
// Str.splitOn "aaa" "aa" == ["", "a"]
|
||||
const segments_count = countSegments(RocStr.init("aaa", 3), RocStr.init("aa", 2));
|
||||
|
||||
try expectEqual(segments_count, 2);
|
||||
}
|
||||
|
||||
test "countSegments: overlapping delimiter 2" {
|
||||
// Str.split "aaa" "aa" == ["", "a"]
|
||||
// Str.splitOn "aaa" "aa" == ["", "a"]
|
||||
const segments_count = countSegments(RocStr.init("aaaa", 4), RocStr.init("aa", 2));
|
||||
|
||||
try expectEqual(segments_count, 3);
|
||||
|
|
|
@ -253,7 +253,7 @@
|
|||
##
|
||||
## The way Roc organizes the `Str` module and supporting packages is designed to help answer this question. Every situation is different, but the following rules of thumb are typical:
|
||||
##
|
||||
## * Most often, using `Str` values along with helper functions like [`split`](https://www.roc-lang.org/builtins/Str#split), [`joinWith`](https://www.roc-lang.org/builtins/Str#joinWith), and so on, is the best option.
|
||||
## * Most often, using `Str` values along with helper functions like [`splitOn`](https://www.roc-lang.org/builtins/Str#splitOn), [`joinWith`](https://www.roc-lang.org/builtins/Str#joinWith), and so on, is the best option.
|
||||
## * If you are specifically implementing a parser, working in UTF-8 bytes is usually the best option. So functions like [`walkUtf8`](https://www.roc-lang.org/builtins/Str#walkUtf8), [toUtf8](https://www.roc-lang.org/builtins/Str#toUtf8), and so on. (Note that single-quote literals produce number literals, so ASCII-range literals like `'a'` gives an integer literal that works with a UTF-8 `U8`.)
|
||||
## * If you are implementing a Unicode library like [roc-lang/unicode](https://github.com/roc-lang/unicode), working in terms of code points will be unavoidable. Aside from basic readability considerations like `\u(...)` in string literals, if you have the option to avoid working in terms of code points, it is almost always correct to avoid them.
|
||||
## * If it seems like a good idea to split a string into "characters" (graphemes), you should definitely stop and reconsider whether this is really the best design. Almost always, doing this is some combination of more error-prone or slower (usually both) than doing something else that does not require taking graphemes into consideration.
|
||||
|
@ -294,7 +294,7 @@
|
|||
## Try putting this into `roc repl`:
|
||||
##
|
||||
## ```
|
||||
## » "foo/bar/baz" |> Str.split "/"
|
||||
## » "foo/bar/baz" |> Str.splitOn "/"
|
||||
##
|
||||
## ["foo", "bar", "baz"] : List Str
|
||||
## ```
|
||||
|
@ -304,7 +304,7 @@
|
|||
## Now let's suppose they were long enough that this optimization no longer applied:
|
||||
##
|
||||
## ```
|
||||
## » "a much, much, much, much/longer/string compared to the last one!" |> Str.split "/"
|
||||
## » "a much, much, much, much/longer/string compared to the last one!" |> Str.splitOn "/"
|
||||
##
|
||||
## ["a much, much, much, much", "longer", "string compared to the last one!"] : List Str
|
||||
## ```
|
||||
|
@ -332,7 +332,7 @@ module [
|
|||
concat,
|
||||
isEmpty,
|
||||
joinWith,
|
||||
split,
|
||||
splitOn,
|
||||
repeat,
|
||||
countUtf8Bytes,
|
||||
toUtf8,
|
||||
|
@ -499,10 +499,10 @@ joinWith : List Str, Str -> Str
|
|||
## Passing `""` for the separator is not useful;
|
||||
## it returns the original string wrapped in a [List].
|
||||
## ```roc
|
||||
## expect Str.split "1,2,3" "," == ["1","2","3"]
|
||||
## expect Str.split "1,2,3" "" == ["1,2,3"]
|
||||
## expect Str.splitOn "1,2,3" "," == ["1","2","3"]
|
||||
## expect Str.splitOn "1,2,3" "" == ["1,2,3"]
|
||||
## ```
|
||||
split : Str, Str -> List Str
|
||||
splitOn : Str, Str -> List Str
|
||||
|
||||
## Repeats a string the given number of times.
|
||||
## ```roc
|
||||
|
@ -518,7 +518,7 @@ repeat : Str, U64 -> Str
|
|||
|
||||
## Returns a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
|
||||
## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
|
||||
## see [Str.split].)
|
||||
## see [Str.splitOn].)
|
||||
## ```roc
|
||||
## expect Str.toUtf8 "Roc" == [82, 111, 99]
|
||||
## expect Str.toUtf8 "鹏" == [233, 185, 143]
|
||||
|
|
|
@ -343,7 +343,7 @@ pub const STR_INIT: &str = "roc_builtins.str.init";
|
|||
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
||||
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
||||
pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
|
||||
pub const STR_SPLIT: &str = "roc_builtins.str.str_split";
|
||||
pub const STR_SPLIT_ON: &str = "roc_builtins.str.str_split_on";
|
||||
pub const STR_COUNT_UTF8_BYTES: &str = "roc_builtins.str.count_utf8_bytes";
|
||||
pub const STR_IS_EMPTY: &str = "roc_builtins.str.is_empty";
|
||||
pub const STR_CAPACITY: &str = "roc_builtins.str.capacity";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue