Merge remote-tracking branch 'remote/main' into upgrade-llvm-zig

2025-08-03 19:58:18 +00:00 · 2024-11-29 08:58:47 +11:00 · 2024-11-29 08:58:47 +11:00 · 2feb5d3c2e
commit 2feb5d3c2e
parent f8304477f6 46067cf28c
426 changed files with 8889 additions and 4190 deletions
--- a/crates/compiler/builtins/README.md
+++ b/crates/compiler/builtins/README.md
@ -10,7 +10,7 @@ Next, look towards the bottom of the `compiler/module/src/symbol.rs` file. Insid

 For each of the builtin modules, there is a file in `compiler/test_gen/src/` like `gen_num.rs`, `gen_str.rs` etc. Add new tests for the module you are changing to the appropriate file here. You can look at the existing test cases for examples and inspiration.

-You can run your new tests locally using `cargo test-gen-llvm`. You can add a filter like `cargo test-gen-llvm gen_str` (to only run tests defined in `gen_str.rs`) or `cargo test-gen-llvm gen_str::str_split` (to only run tests defined in `gen_str` whose names start with `str_split`). More details can be found in the README in the `compiler/test_gen` directory.
+You can run your new tests locally using `cargo test-gen-llvm`. You can add a filter like `cargo test-gen-llvm gen_str` (to only run tests defined in `gen_str.rs`) or `cargo test-gen-llvm gen_str::str_split_on` (to only run tests defined in `gen_str` whose names start with `str_split`). More details can be found in the README in the `compiler/test_gen` directory.

 ## A builtin implemented directly as LLVM

@ -22,8 +22,8 @@ Some of these have `#` inside their name (`first#list`, `#lt` ..). This is a tri

 But we can use these values and some of these are necessary for implementing builtins. For example, `List.get` returns tags, and it is not easy for us to create tags when composing LLVM. What is easier however, is:

-   ..writing `List.#getUnsafe` that has the dangerous signature of `List elem, U64 -> elem` in LLVM
-   ..writing `List elem, U64 -> Result elem [OutOfBounds]*` in a type safe way that uses `getUnsafe` internally, only after it checks if the `elem` at `U64` index exists.
+- ..writing `List.#getUnsafe` that has the dangerous signature of `List elem, U64 -> elem` in LLVM
+- ..writing `List elem, U64 -> Result elem [OutOfBounds]*` in a type safe way that uses `getUnsafe` internally, only after it checks if the `elem` at `U64` index exists.

 ### can/src/builtins.rs

@ -123,5 +123,5 @@ But replace `Num.atan`, the return value, and the return type with your new buil

 When implementing a new builtin, it is often easy to copy and paste the implementation for an existing builtin. This can take you quite far since many builtins are very similar, but it also risks forgetting to change one small part of what you copy and pasted and losing a lot of time later on when you cant figure out why things don't work. So, speaking from experience, even if you are copying an existing builtin, try and implement it manually without copying and pasting. Two recent instances of this (as of September 7th, 2020):

-   `List.keepIf` did not work for a long time because in builtins its `LowLevel` was `ListMap`. This was because I copy and pasted the `List.map` implementation in `builtins.rs
-   `List.walkBackwards` had mysterious memory bugs for a little while because in `unique.rs` its return type was `list_type(flex(b))` instead of `flex(b)` since it was copy and pasted from `List.keepIf`.
+- `List.keepIf` did not work for a long time because in builtins its `LowLevel` was `ListMap`. This was because I copy and pasted the `List.map` implementation in `builtins.rs
+- `List.walkBackwards` had mysterious memory bugs for a little while because in `unique.rs` its return type was `list_type(flex(b))` instead of `flex(b)` since it was copy and pasted from `List.keepIf`.
--- a/crates/compiler/builtins/bitcode/src/dec.zig
+++ b/crates/compiler/builtins/bitcode/src/dec.zig
@ -442,16 +442,16 @@ pub const RocDec = extern struct {
        const numerator_i128 = self.num;
        const denominator_i128 = other.num;

-        // (0 / n) is always 0
-        if (numerator_i128 == 0) {
-            return RocDec{ .num = 0 };
-        }
-
        // (n / 0) is an error
        if (denominator_i128 == 0) {
            roc_panic("Decimal division by 0!", 0);
        }

+        // (0 / n) is always 0
+        if (numerator_i128 == 0) {
+            return RocDec{ .num = 0 };
+        }
+
        // If they're both negative, or if neither is negative, the final answer
        // is positive or zero. If one is negative and the denominator isn't, the
        // final answer is negative (or zero, in which case final sign won't matter).
--- a/crates/compiler/builtins/bitcode/src/main.zig
+++ b/crates/compiler/builtins/bitcode/src/main.zig
@ -185,7 +185,7 @@ comptime {
 const str = @import("str.zig");
 comptime {
    exportStrFn(str.init, "init");
-    exportStrFn(str.strSplit, "str_split");
+    exportStrFn(str.strSplitOn, "str_split_on");
    exportStrFn(str.countSegments, "count_segments");
    exportStrFn(str.countUtf8Bytes, "count_utf8_bytes");
    exportStrFn(str.isEmpty, "is_empty");
--- a/crates/compiler/builtins/bitcode/src/str.zig
+++ b/crates/compiler/builtins/bitcode/src/str.zig
@ -598,14 +598,14 @@ fn strFromFloatHelp(comptime T: type, float: T) RocStr {
    return RocStr.init(&buf, result.len);
 }

-// Str.split
-pub fn strSplit(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
+// Str.splitOn
+pub fn strSplitOn(string: RocStr, delimiter: RocStr) callconv(.C) RocList {
    const segment_count = countSegments(string, delimiter);
    const list = RocList.allocate(@alignOf(RocStr), segment_count, @sizeOf(RocStr), true);

    if (list.bytes) |bytes| {
        const strings = @as([*]RocStr, @ptrCast(@alignCast(bytes)));
-        strSplitHelp(strings, string, delimiter);
+        strSplitOnHelp(strings, string, delimiter);
    }

    return list;
@ -625,7 +625,7 @@ fn initFromBigStr(slice_bytes: [*]u8, len: usize, alloc_ptr: usize) RocStr {
    };
 }

-fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
+fn strSplitOnHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
    if (delimiter.len() == 0) {
        string.incref(1);
        array[0] = string;
@ -650,7 +650,7 @@ fn strSplitHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
 }

 test "strSplitHelp: empty delimiter" {
-    // Str.split "abc" "" == ["abc"]
+    // Str.splitOn "abc" "" == ["abc"]
    const str_arr = "abc";
    const str = RocStr.init(str_arr, str_arr.len);

@ -660,7 +660,7 @@ test "strSplitHelp: empty delimiter" {
    var array: [1]RocStr = undefined;
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const expected = [1]RocStr{
        str,
@ -684,7 +684,7 @@ test "strSplitHelp: empty delimiter" {
 }

 test "strSplitHelp: no delimiter" {
-    // Str.split "abc" "!" == ["abc"]
+    // Str.splitOn "abc" "!" == ["abc"]
    const str_arr = "abc";
    const str = RocStr.init(str_arr, str_arr.len);

@ -694,7 +694,7 @@ test "strSplitHelp: no delimiter" {
    var array: [1]RocStr = undefined;
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const expected = [1]RocStr{
        str,
@ -731,7 +731,7 @@ test "strSplitHelp: empty start" {
    };
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const one = RocStr.init("a", 1);

@ -772,7 +772,7 @@ test "strSplitHelp: empty end" {
    };
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const one = RocStr.init("1", 1);
    const two = RocStr.init("2", 1);
@ -811,7 +811,7 @@ test "strSplitHelp: string equals delimiter" {
    };
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str_delimiter, str_delimiter);
+    strSplitOnHelp(array_ptr, str_delimiter, str_delimiter);

    const expected = [2]RocStr{ RocStr.empty(), RocStr.empty() };

@ -846,7 +846,7 @@ test "strSplitHelp: delimiter on sides" {
        undefined,
    };
    const array_ptr: [*]RocStr = &array;
-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const ghi_arr = "ghi";
    const ghi = RocStr.init(ghi_arr, ghi_arr.len);
@ -875,7 +875,7 @@ test "strSplitHelp: delimiter on sides" {
 }

 test "strSplitHelp: three pieces" {
-    // Str.split "a!b!c" "!" == ["a", "b", "c"]
+    // Str.splitOn "a!b!c" "!" == ["a", "b", "c"]
    const str_arr = "a!b!c";
    const str = RocStr.init(str_arr, str_arr.len);

@ -886,7 +886,7 @@ test "strSplitHelp: three pieces" {
    var array: [array_len]RocStr = undefined;
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const a = RocStr.init("a", 1);
    const b = RocStr.init("b", 1);
@ -916,7 +916,7 @@ test "strSplitHelp: three pieces" {
 }

 test "strSplitHelp: overlapping delimiter 1" {
-    // Str.split "aaa" "aa" == ["", "a"]
+    // Str.splitOn "aaa" "aa" == ["", "a"]
    const str_arr = "aaa";
    const str = RocStr.init(str_arr, str_arr.len);

@ -926,7 +926,7 @@ test "strSplitHelp: overlapping delimiter 1" {
    var array: [2]RocStr = undefined;
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const expected = [2]RocStr{
        RocStr.empty(),
@ -941,7 +941,7 @@ test "strSplitHelp: overlapping delimiter 1" {
 }

 test "strSplitHelp: overlapping delimiter 2" {
-    // Str.split "aaa" "aa" == ["", "a"]
+    // Str.splitOn "aaa" "aa" == ["", "a"]
    const str_arr = "aaaa";
    const str = RocStr.init(str_arr, str_arr.len);

@ -951,7 +951,7 @@ test "strSplitHelp: overlapping delimiter 2" {
    var array: [3]RocStr = undefined;
    const array_ptr: [*]RocStr = &array;

-    strSplitHelp(array_ptr, str, delimiter);
+    strSplitOnHelp(array_ptr, str, delimiter);

    const expected = [3]RocStr{
        RocStr.empty(),
@ -967,7 +967,7 @@ test "strSplitHelp: overlapping delimiter 2" {
    try expect(array[2].eq(expected[2]));
 }

-// This is used for `Str.split : Str, Str -> Array Str
+// This is used for `Str.splitOn : Str, Str -> List Str
 // It is used to count how many segments the input `_str`
 // needs to be broken into, so that we can allocate a array
 // of that size. It always returns at least 1.
@ -985,7 +985,7 @@ pub fn countSegments(string: RocStr, delimiter: RocStr) callconv(.C) usize {
 }

 test "countSegments: long delimiter" {
-    // Str.split "str" "delimiter" == ["str"]
+    // Str.splitOn "str" "delimiter" == ["str"]
    // 1 segment
    const str_arr = "str";
    const str = RocStr.init(str_arr, str_arr.len);
@ -1003,7 +1003,7 @@ test "countSegments: long delimiter" {
 }

 test "countSegments: delimiter at start" {
-    // Str.split "hello there" "hello" == ["", " there"]
+    // Str.splitOn "hello there" "hello" == ["", " there"]
    // 2 segments
    const str_arr = "hello there";
    const str = RocStr.init(str_arr, str_arr.len);
@ -1022,7 +1022,7 @@ test "countSegments: delimiter at start" {
 }

 test "countSegments: delimiter interspered" {
-    // Str.split "a!b!c" "!" == ["a", "b", "c"]
+    // Str.splitOn "a!b!c" "!" == ["a", "b", "c"]
    // 3 segments
    const str_arr = "a!b!c";
    const str = RocStr.init(str_arr, str_arr.len);
@ -1041,7 +1041,7 @@ test "countSegments: delimiter interspered" {
 }

 test "countSegments: string equals delimiter" {
-    // Str.split "/" "/" == ["", ""]
+    // Str.splitOn "/" "/" == ["", ""]
    // 2 segments
    const str_delimiter_arr = "/";
    const str_delimiter = RocStr.init(str_delimiter_arr, str_delimiter_arr.len);
@ -1056,14 +1056,14 @@ test "countSegments: string equals delimiter" {
 }

 test "countSegments: overlapping delimiter 1" {
-    // Str.split "aaa" "aa" == ["", "a"]
+    // Str.splitOn "aaa" "aa" == ["", "a"]
    const segments_count = countSegments(RocStr.init("aaa", 3), RocStr.init("aa", 2));

    try expectEqual(segments_count, 2);
 }

 test "countSegments: overlapping delimiter 2" {
-    // Str.split "aaa" "aa" == ["", "a"]
+    // Str.splitOn "aaa" "aa" == ["", "a"]
    const segments_count = countSegments(RocStr.init("aaaa", 4), RocStr.init("aa", 2));

    try expectEqual(segments_count, 3);
--- a/crates/compiler/builtins/roc/List.roc
+++ b/crates/compiler/builtins/roc/List.roc
@ -55,7 +55,9 @@ module [
    findLastIndex,
    sublist,
    intersperse,
-    split,
+    splitAt,
+    splitOn,
+    splitOnList,
    splitFirst,
    splitLast,
    startsWith,
@ -70,6 +72,8 @@ module [
    countIf,
    chunksOf,
    concatUtf8,
+    forEach!,
+    forEachTry!,
 ]

 import Bool exposing [Bool, Eq]
@ -1026,7 +1030,7 @@ first = \list ->
 ## To remove elements from both the beginning and end of the list,
 ## use `List.sublist`.
 ##
-## To split the list into two lists, use `List.split`.
+## To split the list into two lists, use `List.splitAt`.
 ##
 takeFirst : List elem, U64 -> List elem
 takeFirst = \list, outputLength ->
@ -1046,7 +1050,7 @@ takeFirst = \list, outputLength ->
 ## To remove elements from both the beginning and end of the list,
 ## use `List.sublist`.
 ##
-## To split the list into two lists, use `List.split`.
+## To split the list into two lists, use `List.splitAt`.
 ##
 takeLast : List elem, U64 -> List elem
 takeLast = \list, outputLength ->
@ -1247,8 +1251,8 @@ endsWith = \list, suffix ->
 ## than the given index, # and the `others` list will be all the others. (This
 ## means if you give an index of 0, the `before` list will be empty and the
 ## `others` list will have the same elements as the original list.)
-split : List elem, U64 -> { before : List elem, others : List elem }
-split = \elements, userSplitIndex ->
+splitAt : List elem, U64 -> { before : List elem, others : List elem }
+splitAt = \elements, userSplitIndex ->
    length = List.len elements
    splitIndex = if length > userSplitIndex then userSplitIndex else length
    before = List.sublist elements { start: 0, len: splitIndex }
@ -1256,6 +1260,44 @@ split = \elements, userSplitIndex ->

    { before, others }

+## Splits the input list on the delimiter element.
+##
+## ```roc
+## List.splitOn [1, 2, 3] 2 == [[1], [3]]
+## ```
+splitOn : List a, a -> List (List a) where a implements Eq
+splitOn = \elements, delimiter ->
+    help = \remaining, chunks, currentChunk ->
+        when remaining is
+            [] -> List.append chunks currentChunk
+            [x, .. as rest] if x == delimiter ->
+                help rest (List.append chunks currentChunk) []
+
+            [x, .. as rest] ->
+                help rest chunks (List.append currentChunk x)
+    help elements [] []
+
+## Splits the input list on the delimiter list.
+##
+## ```roc
+## List.splitOnList [1, 2, 3] [1, 2] == [[], [3]]
+## ```
+splitOnList : List a, List a -> List (List a) where a implements Eq
+splitOnList = \elements, delimiter ->
+    help = \remaining, chunks, currentChunk ->
+        when remaining is
+            [] -> List.append chunks currentChunk
+            [x, .. as rest] ->
+                if List.startsWith remaining delimiter then
+                    help (List.dropFirst remaining (List.len delimiter)) (List.append chunks currentChunk) []
+                else
+                    help rest chunks (List.append currentChunk x)
+
+    if delimiter == [] then
+        [elements]
+    else
+        help elements [] []
+
 ## Returns the elements before the first occurrence of a delimiter, as well as the
 ## remaining elements after that occurrence. If the delimiter is not found, returns `Err`.
 ## ```roc
@ -1305,7 +1347,7 @@ chunksOfHelp = \listRest, chunkSize, chunks ->
    if List.isEmpty listRest then
        chunks
    else
-        { before, others } = List.split listRest chunkSize
+        { before, others } = List.splitAt listRest chunkSize
        chunksOfHelp others chunkSize (List.append chunks before)

 ## Like [List.map], except the transformation function returns a [Result].
@ -1383,3 +1425,44 @@ concatUtf8 : List U8, Str -> List U8

 expect (List.concatUtf8 [1, 2, 3, 4] "🐦") == [1, 2, 3, 4, 240, 159, 144, 166]

+## Run an effectful function for each element on the list.
+##
+## ```roc
+## List.forEach! ["Alice", "Bob", "Charlie"] \name ->
+##     createAccount! name
+##     log! "Account created"
+## ```
+##
+## If the function might fail or you need to return early, use [forEachTry!].
+forEach! : List a, (a => {}) => {}
+forEach! = \list, func! ->
+    when list is
+        [] ->
+            {}
+
+        [elem, .. as rest] ->
+            func! elem
+            forEach! rest func!
+
+## Run an effectful function that might fail for each element on the list.
+##
+## If the function returns `Err`, the iteration stops and the error is returned.
+##
+## ```roc
+## List.forEachTry! filesToDelete \path ->
+##     try File.delete! path
+##     Stdout.line! "$(path) deleted"
+## ```
+forEachTry! : List a, (a => Result {} err) => Result {} err
+forEachTry! = \list, func! ->
+    when list is
+        [] ->
+            Ok {}
+
+        [elem, .. as rest] ->
+            when func! elem is
+                Ok {} ->
+                    forEachTry! rest func!
+
+                Err err ->
+                    Err err
--- a/crates/compiler/builtins/roc/Result.roc
+++ b/crates/compiler/builtins/roc/Result.roc
@ -8,6 +8,7 @@ module [
    map2,
    try,
    onErr,
+    onErr!,
    withDefault,
 ]

@ -119,3 +120,16 @@ onErr = \result, transform ->
    when result is
        Ok v -> Ok v
        Err e -> transform e
+
+## Like [onErr], but it allows the transformation function to produce effects.
+##
+## ```roc
+## Result.onErr (Err "missing user") \msg ->
+##     try Stdout.line! "ERROR: $(msg)"
+##     Err msg
+## ```
+onErr! : Result a err, (err => Result a otherErr) => Result a otherErr
+onErr! = \result, transform! ->
+    when result is
+        Ok v -> Ok v
+        Err e -> transform! e
--- a/crates/compiler/builtins/roc/Str.roc
+++ b/crates/compiler/builtins/roc/Str.roc
@ -253,7 +253,7 @@
 ##
 ## The way Roc organizes the `Str` module and supporting packages is designed to help answer this question. Every situation is different, but the following rules of thumb are typical:
 ##
-## * Most often, using `Str` values along with helper functions like [`split`](https://www.roc-lang.org/builtins/Str#split), [`joinWith`](https://www.roc-lang.org/builtins/Str#joinWith), and so on, is the best option.
+## * Most often, using `Str` values along with helper functions like [`splitOn`](https://www.roc-lang.org/builtins/Str#splitOn), [`joinWith`](https://www.roc-lang.org/builtins/Str#joinWith), and so on, is the best option.
 ## * If you are specifically implementing a parser, working in UTF-8 bytes is usually the best option. So functions like [`walkUtf8`](https://www.roc-lang.org/builtins/Str#walkUtf8), [toUtf8](https://www.roc-lang.org/builtins/Str#toUtf8), and so on. (Note that single-quote literals produce number literals, so ASCII-range literals like `'a'` gives an integer literal that works with a UTF-8 `U8`.)
 ## * If you are implementing a Unicode library like [roc-lang/unicode](https://github.com/roc-lang/unicode), working in terms of code points will be unavoidable. Aside from basic readability considerations like `\u(...)` in string literals, if you have the option to avoid working in terms of code points, it is almost always correct to avoid them.
 ## * If it seems like a good idea to split a string into "characters" (graphemes), you should definitely stop and reconsider whether this is really the best design. Almost always, doing this is some combination of more error-prone or slower (usually both) than doing something else that does not require taking graphemes into consideration.
@ -294,7 +294,7 @@
 ## Try putting this into `roc repl`:
 ##
 ## ```
-## » "foo/bar/baz" |> Str.split "/"
+## » "foo/bar/baz" |> Str.splitOn "/"
 ##
 ## ["foo", "bar", "baz"] : List Str
 ## ```
@ -304,7 +304,7 @@
 ## Now let's suppose they were long enough that this optimization no longer applied:
 ##
 ## ```
-## » "a much, much, much, much/longer/string compared to the last one!" |> Str.split "/"
+## » "a much, much, much, much/longer/string compared to the last one!" |> Str.splitOn "/"
 ##
 ## ["a much, much, much, much", "longer", "string compared to the last one!"] : List Str
 ## ```
@ -332,7 +332,7 @@ module [
    concat,
    isEmpty,
    joinWith,
-    split,
+    splitOn,
    repeat,
    countUtf8Bytes,
    toUtf8,
@ -499,10 +499,10 @@ joinWith : List Str, Str -> Str
 ## Passing `""` for the separator is not useful;
 ## it returns the original string wrapped in a [List].
 ## ```roc
-## expect Str.split "1,2,3" "," == ["1","2","3"]
-## expect Str.split "1,2,3" "" == ["1,2,3"]
+## expect Str.splitOn "1,2,3" "," == ["1","2","3"]
+## expect Str.splitOn "1,2,3" "" == ["1,2,3"]
 ## ```
-split : Str, Str -> List Str
+splitOn : Str, Str -> List Str

 ## Repeats a string the given number of times.
 ## ```roc
@ -518,7 +518,7 @@ repeat : Str, U64 -> Str

 ## Returns a [List] of the string's [U8] UTF-8 [code units](https://unicode.org/glossary/#code_unit).
 ## (To split the string into a [List] of smaller [Str] values instead of [U8] values,
-## see [Str.split].)
+## see [Str.splitOn].)
 ## ```roc
 ## expect Str.toUtf8 "Roc" == [82, 111, 99]
 ## expect Str.toUtf8 "鹏" == [233, 185, 143]
--- a/crates/compiler/builtins/src/bitcode.rs
+++ b/crates/compiler/builtins/src/bitcode.rs
@ -343,7 +343,7 @@ pub const STR_INIT: &str = "roc_builtins.str.init";
 pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
 pub const STR_CONCAT: &str = "roc_builtins.str.concat";
 pub const STR_JOIN_WITH: &str = "roc_builtins.str.joinWith";
-pub const STR_SPLIT: &str = "roc_builtins.str.str_split";
+pub const STR_SPLIT_ON: &str = "roc_builtins.str.str_split_on";
 pub const STR_COUNT_UTF8_BYTES: &str = "roc_builtins.str.count_utf8_bytes";
 pub const STR_IS_EMPTY: &str = "roc_builtins.str.is_empty";
 pub const STR_CAPACITY: &str = "roc_builtins.str.capacity";