Merge branch 'trunk' of https://github.com/rtfeldman/roc into add-dec-types

2025-09-28 06:14:46 +00:00 · 2021-07-08 16:47:42 -07:00 · 2021-07-08 16:47:42 -07:00 · 67eef2c97f
commit 67eef2c97f
parent ecb89da7b8 162c849a31
82 changed files with 3791 additions and 2228 deletions
--- a/compiler/builtins/bitcode/src/dec.zig
+++ b/compiler/builtins/bitcode/src/dec.zig
@ -8,16 +8,16 @@ const RocStr = str.RocStr;
 pub const RocDec = extern struct {
    num: i128,

-    pub const decimal_places: comptime u5 = 18;
-    pub const whole_number_places: comptime u5 = 21;
-    const max_digits: comptime u6 = 39;
-    const leading_zeros: comptime [17]u8 = "00000000000000000".*;
+    pub const decimal_places: u5 = 18;
+    pub const whole_number_places: u5 = 21;
+    const max_digits: u6 = 39;
+    const leading_zeros: [17]u8 = "00000000000000000".*;

-    pub const min: comptime RocDec = .{ .num = math.minInt(i128) };
-    pub const max: comptime RocDec = .{ .num = math.maxInt(i128) };
+    pub const min: RocDec = .{ .num = math.minInt(i128) };
+    pub const max: RocDec = .{ .num = math.maxInt(i128) };

-    pub const one_point_zero_i128: comptime i128 = comptime math.pow(i128, 10, RocDec.decimal_places);
-    pub const one_point_zero: comptime RocDec = .{ .num = one_point_zero_i128 };
+    pub const one_point_zero_i128: i128 = math.pow(i128, 10, RocDec.decimal_places);
+    pub const one_point_zero: RocDec = .{ .num = one_point_zero_i128 };

    pub fn fromU64(num: u64) RocDec {
        return .{ .num = num * one_point_zero_i128 };
@ -388,7 +388,7 @@ fn mul_and_decimalize(a: u128, b: u128) i128 {
    const lk = mul_u128(lhs_hi, rhs_hi);

    const e = ea.hi;
-    const _a = ea.lo;
+    // const _a = ea.lo;

    const g = gf.hi;
    const f = gf.lo;
--- a/compiler/builtins/bitcode/src/dict.zig
+++ b/compiler/builtins/bitcode/src/dict.zig
@ -568,14 +568,6 @@ pub fn dictKeys(dict: RocDict, alignment: Alignment, key_width: usize, value_wid
    const data_bytes = length * key_width;
    var ptr = allocateWithRefcount(data_bytes, alignment);

-    var offset = blk: {
-        if (alignment.keyFirst()) {
-            break :blk 0;
-        } else {
-            break :blk (dict.capacity() * value_width);
-        }
-    };
-
    i = 0;
    var copied: usize = 0;
    while (i < size) : (i += 1) {
@ -617,14 +609,6 @@ pub fn dictValues(dict: RocDict, alignment: Alignment, key_width: usize, value_w
    const data_bytes = length * value_width;
    var ptr = allocateWithRefcount(data_bytes, alignment);

-    var offset = blk: {
-        if (alignment.keyFirst()) {
-            break :blk (dict.capacity() * key_width);
-        } else {
-            break :blk 0;
-        }
-    };
-
    i = 0;
    var copied: usize = 0;
    while (i < size) : (i += 1) {
@ -644,7 +628,7 @@ pub fn dictValues(dict: RocDict, alignment: Alignment, key_width: usize, value_w
    output.* = RocList{ .bytes = ptr, .length = length };
 }

-fn doNothing(ptr: Opaque) callconv(.C) void {
+fn doNothing(_: Opaque) callconv(.C) void {
    return;
 }

@ -764,8 +748,6 @@ pub fn dictWalk(
    key_width: usize,
    value_width: usize,
    accum_width: usize,
-    inc_key: Inc,
-    inc_value: Inc,
    output: Opaque,
 ) callconv(.C) void {
    const alignment_u32 = alignment.toU32();
--- a/compiler/builtins/bitcode/src/hash.zig
+++ b/compiler/builtins/bitcode/src/hash.zig
@ -8,10 +8,9 @@ const str = @import("str.zig");
 const mem = std.mem;

 pub fn wyhash(seed: u64, bytes: ?[*]const u8, length: usize) callconv(.C) u64 {
-    const stdout = std.io.getStdOut().writer();
-
    if (bytes) |nonnull| {
-        return wyhash_hash(seed, nonnull[0..length]);
+        const slice = nonnull[0..length];
+        return wyhash_hash(seed, slice);
    } else {
        return 42;
    }
--- a/compiler/builtins/bitcode/src/list.zig
+++ b/compiler/builtins/bitcode/src/list.zig
@ -12,6 +12,7 @@ const Opaque = ?[*]u8;
 const Inc = fn (?[*]u8) callconv(.C) void;
 const IncN = fn (?[*]u8, usize) callconv(.C) void;
 const Dec = fn (?[*]u8) callconv(.C) void;
+const HasTagId = fn (u16, ?[*]u8) callconv(.C) extern struct { matched: bool, data: ?[*]u8 };

 pub const RocList = extern struct {
    bytes: ?[*]u8,
@ -405,11 +406,14 @@ pub fn listKeepOks(
    before_width: usize,
    result_width: usize,
    after_width: usize,
+    has_tag_id: HasTagId,
    dec_result: Dec,
 ) callconv(.C) RocList {
+    const good_constructor: u16 = 1;
+
    return listKeepResult(
        list,
-        RocResult.isOk,
+        good_constructor,
        caller,
        data,
        inc_n_data,
@ -418,6 +422,7 @@ pub fn listKeepOks(
        before_width,
        result_width,
        after_width,
+        has_tag_id,
        dec_result,
    );
 }
@ -432,11 +437,14 @@ pub fn listKeepErrs(
    before_width: usize,
    result_width: usize,
    after_width: usize,
+    has_tag_id: HasTagId,
    dec_result: Dec,
 ) callconv(.C) RocList {
+    const good_constructor: u16 = 0;
+
    return listKeepResult(
        list,
-        RocResult.isErr,
+        good_constructor,
        caller,
        data,
        inc_n_data,
@ -445,13 +453,14 @@ pub fn listKeepErrs(
        before_width,
        result_width,
        after_width,
+        has_tag_id,
        dec_result,
    );
 }

 pub fn listKeepResult(
    list: RocList,
-    is_good_constructor: fn (RocResult) bool,
+    good_constructor: u16,
    caller: Caller1,
    data: Opaque,
    inc_n_data: IncN,
@ -460,6 +469,7 @@ pub fn listKeepResult(
    before_width: usize,
    result_width: usize,
    after_width: usize,
+    has_tag_id: HasTagId,
    dec_result: Dec,
 ) RocList {
    if (list.bytes) |source_ptr| {
@ -479,11 +489,14 @@ pub fn listKeepResult(
            const before_element = source_ptr + (i * before_width);
            caller(data, before_element, temporary);

-            const result = utils.RocResult{ .bytes = temporary };
-
-            const after_element = temporary + @sizeOf(i64);
-            if (is_good_constructor(result)) {
-                @memcpy(target_ptr + (kept * after_width), after_element, after_width);
+            // a record { matched: bool, data: ?[*]u8 }
+            // for now, that data pointer is just the input `temporary` pointer
+            // this will change in the future to only return a pointer to the
+            // payload of the tag
+            const answer = has_tag_id(good_constructor, temporary);
+            if (answer.matched) {
+                const contents = (answer.data orelse unreachable);
+                @memcpy(target_ptr + (kept * after_width), contents, after_width);
                kept += 1;
            } else {
                dec_result(temporary);
@ -606,12 +619,13 @@ pub fn listWalkUntil(
    accum: Opaque,
    alignment: u32,
    element_width: usize,
+    continue_stop_width: usize,
    accum_width: usize,
+    has_tag_id: HasTagId,
    dec: Dec,
    output: Opaque,
 ) callconv(.C) void {
    // [ Continue a, Stop a ]
-    const CONTINUE: usize = 0;

    if (accum_width == 0) {
        return;
@ -622,9 +636,10 @@ pub fn listWalkUntil(
        return;
    }

-    const bytes_ptr: [*]u8 = utils.alloc(TAG_WIDTH + accum_width, alignment);
+    const bytes_ptr: [*]u8 = utils.alloc(continue_stop_width, alignment);

-    @memcpy(bytes_ptr + TAG_WIDTH, accum orelse unreachable, accum_width);
+    // NOTE: assumes data bytes are the first bytes in a tag
+    @memcpy(bytes_ptr, accum orelse unreachable, accum_width);

    if (list.bytes) |source_ptr| {
        var i: usize = 0;
@ -636,10 +651,12 @@ pub fn listWalkUntil(
                inc_n_data(data, 1);
            }

-            caller(data, element, bytes_ptr + TAG_WIDTH, bytes_ptr);
+            caller(data, element, bytes_ptr, bytes_ptr);

-            const usizes: [*]usize = @ptrCast([*]usize, @alignCast(8, bytes_ptr));
-            if (usizes[0] != 0) {
+            // [ Continue ..., Stop ]
+            const tag_id = has_tag_id(0, bytes_ptr);
+
+            if (!tag_id.matched) {
                // decrement refcount of the remaining items
                i += 1;
                while (i < size) : (i += 1) {
@ -650,7 +667,7 @@ pub fn listWalkUntil(
        }
    }

-    @memcpy(output orelse unreachable, bytes_ptr + TAG_WIDTH, accum_width);
+    @memcpy(output orelse unreachable, bytes_ptr, accum_width);
    utils.dealloc(bytes_ptr, alignment);
 }

@ -904,7 +921,7 @@ inline fn swapHelp(width: usize, temporary: [*]u8, ptr1: [*]u8, ptr2: [*]u8) voi
 }

 fn swap(width_initial: usize, p1: [*]u8, p2: [*]u8) void {
-    const threshold: comptime usize = 64;
+    const threshold: usize = 64;

    var width = width_initial;

@ -930,11 +947,6 @@ fn swap(width_initial: usize, p1: [*]u8, p2: [*]u8) void {
 }

 fn swapElements(source_ptr: [*]u8, element_width: usize, index_1: usize, index_2: usize) void {
-    const threshold: comptime usize = 64;
-
-    var buffer_actual: [threshold]u8 = undefined;
-    var buffer: [*]u8 = buffer_actual[0..];
-
    var element_at_i = source_ptr + (index_1 * element_width);
    var element_at_j = source_ptr + (index_2 * element_width);

@ -1009,7 +1021,23 @@ pub fn listConcat(list_a: RocList, list_b: RocList, alignment: u32, element_widt
    return output;
 }

-// input: RocList,
+pub fn listSetInPlace(
+    bytes: ?[*]u8,
+    index: usize,
+    element: Opaque,
+    element_width: usize,
+    dec: Dec,
+) callconv(.C) ?[*]u8 {
+    // INVARIANT: bounds checking happens on the roc side
+    //
+    // at the time of writing, the function is implemented roughly as
+    // `if inBounds then LowLevelListGet input index item else input`
+    // so we don't do a bounds check here. Hence, the list is also non-empty,
+    // because inserting into an empty list is always out of bounds
+
+    return listSetInPlaceHelp(bytes, index, element, element_width, dec);
+}
+
 pub fn listSet(
    bytes: ?[*]u8,
    length: usize,
@ -1028,23 +1056,32 @@ pub fn listSet(
    const ptr: [*]usize = @ptrCast([*]usize, @alignCast(8, bytes));

    if ((ptr - 1)[0] == utils.REFCOUNT_ONE) {
-
-        // the element we will replace
-        var element_at_index = (bytes orelse undefined) + (index * element_width);
-
-        // decrement its refcount
-        dec(element_at_index);
-
-        // copy in the new element
-        @memcpy(element_at_index, element orelse undefined, element_width);
-
-        return bytes;
+        return listSetInPlaceHelp(bytes, index, element, element_width, dec);
    } else {
-        return listSetClone(bytes, length, alignment, index, element, element_width, dec);
+        return listSetImmutable(bytes, length, alignment, index, element, element_width, dec);
    }
 }

-inline fn listSetClone(
+inline fn listSetInPlaceHelp(
+    bytes: ?[*]u8,
+    index: usize,
+    element: Opaque,
+    element_width: usize,
+    dec: Dec,
+) ?[*]u8 {
+    // the element we will replace
+    var element_at_index = (bytes orelse undefined) + (index * element_width);
+
+    // decrement its refcount
+    dec(element_at_index);
+
+    // copy in the new element
+    @memcpy(element_at_index, element orelse undefined, element_width);
+
+    return bytes;
+}
+
+inline fn listSetImmutable(
    old_bytes: ?[*]u8,
    length: usize,
    alignment: u32,
@ -1053,8 +1090,6 @@ inline fn listSetClone(
    element_width: usize,
    dec: Dec,
 ) ?[*]u8 {
-    @setCold(true);
-
    const data_bytes = length * element_width;

    var new_bytes = utils.allocateWithRefcount(data_bytes, alignment);
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@ -41,6 +41,7 @@ comptime {
    exportListFn(list.listConcat, "concat");
    exportListFn(list.listDrop, "drop");
    exportListFn(list.listSet, "set");
+    exportListFn(list.listSetInPlace, "set_in_place");
    exportListFn(list.listSwap, "swap");
 }

--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
@ -17,7 +17,7 @@ const InPlace = packed enum(u8) {

 const SMALL_STR_MAX_LENGTH = small_string_size - 1;
 const small_string_size = 2 * @sizeOf(usize);
-const blank_small_string: [16]u8 = init_blank_small_string(small_string_size);
+const blank_small_string: [@sizeOf(RocStr)]u8 = init_blank_small_string(small_string_size);

 fn init_blank_small_string(comptime n: usize) [n]u8 {
    var prime_list: [n]u8 = undefined;
@ -85,12 +85,6 @@ pub const RocStr = extern struct {
        }
    }

-    pub fn toSlice(self: RocStr) []u8 {
-        const str_bytes_ptr: [*]u8 = self.str_bytes orelse unreachable;
-        const str_bytes: []u8 = str_bytes_ptr[0..self.str_len];
-        return str_bytes;
-    }
-
    // This takes ownership of the pointed-to bytes if they won't fit in a
    // small string, and returns a (pointer, len) tuple which points to them.
    pub fn withCapacity(length: usize) RocStr {
@ -203,8 +197,8 @@ pub const RocStr = extern struct {
        return result;
    }

+    // NOTE: returns false for empty string!
    pub fn isSmallStr(self: RocStr) bool {
-        // NOTE: returns False for empty string!
        return @bitCast(isize, self.str_len) < 0;
    }

@ -223,6 +217,82 @@ pub const RocStr = extern struct {
        return self.len() == 0;
    }

+    // If a string happens to be null-terminated already, then we can pass its
+    // bytes directly to functions (e.g. for opening files) that require
+    // null-terminated strings. Otherwise, we need to allocate and copy a new
+    // null-terminated string, which has a much higher performance cost!
+    fn isNullTerminated(self: RocStr) bool {
+        const len = self.len();
+        const longest_small_str = @sizeOf(RocStr) - 1;
+
+        // NOTE: We want to compare length here, *NOT* check for is_small_str!
+        // This is because we explicitly want the empty string to be handled in
+        // this branch, even though the empty string is not a small string.
+        //
+        // (The other branch dereferences the bytes pointer, which is not safe
+        // to do for the empty string.)
+        if (len <= longest_small_str) {
+            // If we're a small string, then usually the next byte after the
+            // end of the string will be zero. (Small strings set all their
+            // unused bytes to 0, so that comparison for equality can be fast.)
+            //
+            // However, empty strings are *not* null terminated, so if this is
+            // empty, it should return false.
+            //
+            // Also, if we are exactly a maximum-length small string,
+            // then the next byte is off the end of the struct;
+            // in that case, we are also not null-terminated!
+            return len != 0 and len != longest_small_str;
+        } else {
+            // This is a big string, and it's not empty, so we can safely
+            // dereference the pointer.
+            const ptr: [*]usize = @ptrCast([*]usize, @alignCast(8, self.str_bytes));
+            const capacity_or_refcount: isize = (ptr - 1)[0];
+
+            // If capacity_or_refcount is positive, then it's a capacity value.
+            //
+            // If we have excess capacity, then we can safely read the next
+            // byte after the end of the string. Maybe it happens to be zero!
+            if (capacity_or_refcount > @intCast(isize, len)) {
+                return self.str_bytes[len] == 0;
+            } else {
+                // This string was refcounted or immortal; we can't safely read
+                // the next byte, so assume the string is not null-terminated.
+                return false;
+            }
+        }
+    }
+
+    // Returns (@sizeOf(RocStr) - 1) for small strings and the empty string.
+    // Returns 0 for refcounted stirngs and immortal strings.
+    // Returns the stored capacity value for all other strings.
+    pub fn capacity(self: RocStr) usize {
+        const len = self.len();
+        const longest_small_str = @sizeOf(RocStr) - 1;
+
+        if (len <= longest_small_str) {
+            // Note that although empty strings technically have the full
+            // capacity of a small string available, they aren't marked as small
+            // strings, so if you want to make use of that capacity, you need
+            // to first change its flag to mark it as a small string!
+            return longest_small_str;
+        } else {
+            const ptr: [*]usize = @ptrCast([*]usize, @alignCast(8, self.str_bytes));
+            const capacity_or_refcount: isize = (ptr - 1)[0];
+
+            if (capacity_or_refcount > 0) {
+                // If capacity_or_refcount is positive, that means it's a
+                // capacity value.
+                return capacity_or_refcount;
+            } else {
+                // This is either a refcount or else this big string is stored
+                // in a readonly section; either way, it has no capacity,
+                // because we cannot mutate it in-place!
+                return 0;
+            }
+        }
+    }
+
    pub fn isUnique(self: RocStr) bool {
        // the empty list is unique (in the sense that copying it will not leak memory)
        if (self.isEmpty()) {
@ -240,15 +310,13 @@ pub const RocStr = extern struct {
    }

    pub fn asSlice(self: RocStr) []u8 {
-        // Since this conditional would be prone to branch misprediction,
-        // make sure it will compile to a cmov.
        return self.asU8ptr()[0..self.len()];
    }

    pub fn asU8ptr(self: RocStr) [*]u8 {
        // Since this conditional would be prone to branch misprediction,
        // make sure it will compile to a cmov.
-        return if (self.isSmallStr() or self.isEmpty()) (&@bitCast([16]u8, self)) else (@ptrCast([*]u8, self.str_bytes));
+        return if (self.isSmallStr() or self.isEmpty()) (&@bitCast([@sizeOf(RocStr)]u8, self)) else (@ptrCast([*]u8, self.str_bytes));
    }

    // Given a pointer to some bytes, write the first (len) bytes of this
--- a/compiler/builtins/bitcode/src/utils.zig
+++ b/compiler/builtins/bitcode/src/utils.zig
@ -20,18 +20,18 @@ comptime {
    }
 }

-fn testing_roc_alloc(size: usize, alignment: u32) callconv(.C) ?*c_void {
+fn testing_roc_alloc(size: usize, _: u32) callconv(.C) ?*c_void {
    return @ptrCast(?*c_void, std.testing.allocator.alloc(u8, size) catch unreachable);
 }

-fn testing_roc_realloc(c_ptr: *c_void, new_size: usize, old_size: usize, alignment: u32) callconv(.C) ?*c_void {
+fn testing_roc_realloc(c_ptr: *c_void, new_size: usize, old_size: usize, _: u32) callconv(.C) ?*c_void {
    const ptr = @ptrCast([*]u8, @alignCast(16, c_ptr));
    const slice = ptr[0..old_size];

    return @ptrCast(?*c_void, std.testing.allocator.realloc(slice, new_size) catch unreachable);
 }

-fn testing_roc_dealloc(c_ptr: *c_void, alignment: u32) callconv(.C) void {
+fn testing_roc_dealloc(c_ptr: *c_void, _: u32) callconv(.C) void {
    const ptr = @ptrCast([*]u8, @alignCast(16, c_ptr));

    std.testing.allocator.destroy(ptr);
@ -53,8 +53,8 @@ pub const Inc = fn (?[*]u8) callconv(.C) void;
 pub const IncN = fn (?[*]u8, u64) callconv(.C) void;
 pub const Dec = fn (?[*]u8) callconv(.C) void;

-const REFCOUNT_MAX_ISIZE: comptime isize = 0;
-pub const REFCOUNT_ONE_ISIZE: comptime isize = std.math.minInt(isize);
+const REFCOUNT_MAX_ISIZE: isize = 0;
+pub const REFCOUNT_ONE_ISIZE: isize = std.math.minInt(isize);
 pub const REFCOUNT_ONE: usize = @bitCast(usize, REFCOUNT_ONE_ISIZE);

 pub const IntWidth = enum(u8) {
@ -110,7 +110,7 @@ pub fn allocateWithRefcount(
    data_bytes: usize,
    alignment: u32,
 ) [*]u8 {
-    comptime const result_in_place = false;
+    const result_in_place = false;

    switch (alignment) {
        16 => {