Merge remote-tracking branch 'origin/main' into wip-website-updates

2025-09-29 14:54:47 +00:00 · 2023-10-21 08:59:48 -04:00 · 2023-10-21 08:59:48 -04:00 · 8232a06c86
commit 8232a06c86
parent 55f1e82dbb a0bd943671
155 changed files with 5002 additions and 3754 deletions
--- a/crates/compiler/builtins/bitcode/benchmark/dec.zig
+++ b/crates/compiler/builtins/bitcode/benchmark/dec.zig
@ -24,19 +24,25 @@ pub fn main() !void {
    try stdout.print("Warning: Timer seems to step in units of 41ns\n\n", .{});
    timer = try Timer.start();

-    const n = 10000;
+    const n = 1000;
+
+    // Add/Sub are too fast and need a higher n.
+    const add_sub_n = 10000;

    // This number are very close to 1 to avoid over and underflow.
-    const str1 = "1.00123";
    const f1 = 1.00123;
-    const dec1 = RocDec.fromStr(RocStr.init(str1, 3)).?;
+    const dec1 = RocDec.fromF64(f1).?;
+
+    // `asin` and `acos` have a limited range, so they will use this value.
+    const f2 = 0.00130000847;
+    const dec2 = RocDec.fromF64(f2).?;

    try stdout.print("Dec:\n", .{});
-    try stdout.print("{} additions took ", .{n});
-    const decAdd = try avg_runs(RocDec, n, RocDec.add, dec1);
+    try stdout.print("{} additions took ", .{add_sub_n});
+    const decAdd = try avg_runs(RocDec, add_sub_n, RocDec.add, dec1);

-    try stdout.print("{} subtractions took ", .{n});
-    const decSub = try avg_runs(RocDec, n, RocDec.sub, dec1);
+    try stdout.print("{} subtractions took ", .{add_sub_n});
+    const decSub = try avg_runs(RocDec, add_sub_n, RocDec.sub, dec1);

    try stdout.print("{} multiplications took ", .{n});
    const decMul = try avg_runs(RocDec, n, RocDec.mul, dec1);
@ -47,15 +53,27 @@ pub fn main() !void {
    try stdout.print("{} sin took ", .{n});
    const decSin = try avg_runs(RocDec, n, sinDec, dec1);

+    try stdout.print("{} cos took ", .{n});
+    const decCos = try avg_runs(RocDec, n, cosDec, dec1);
+
+    try stdout.print("{} tan took ", .{n});
+    const decTan = try avg_runs(RocDec, n, tanDec, dec1);
+
    try stdout.print("{} asin took ", .{n});
-    const decAsin = try avg_runs(RocDec, n, asinDec, dec1);
+    const decAsin = try avg_runs(RocDec, n, asinDec, dec2);
+
+    try stdout.print("{} acos took ", .{n});
+    const decAcos = try avg_runs(RocDec, n, acosDec, dec2);
+
+    try stdout.print("{} atan took ", .{n});
+    const decAtan = try avg_runs(RocDec, n, atanDec, dec1);

    try stdout.print("\n\nF64:\n", .{});
-    try stdout.print("{} additions took ", .{n});
-    const f64Add = try avg_runs(f64, n, addF64, f1);
+    try stdout.print("{} additions took ", .{add_sub_n});
+    const f64Add = try avg_runs(f64, add_sub_n, addF64, f1);

-    try stdout.print("{} subtractions took ", .{n});
-    const f64Sub = try avg_runs(f64, n, subF64, f1);
+    try stdout.print("{} subtractions took ", .{add_sub_n});
+    const f64Sub = try avg_runs(f64, add_sub_n, subF64, f1);

    try stdout.print("{} multiplications took ", .{n});
    const f64Mul = try avg_runs(f64, n, mulF64, f1);
@ -66,8 +84,20 @@ pub fn main() !void {
    try stdout.print("{} sin took ", .{n});
    const f64Sin = try avg_runs(f64, n, sinF64, f1);

+    try stdout.print("{} cos took ", .{n});
+    const f64Cos = try avg_runs(f64, n, cosF64, f1);
+
+    try stdout.print("{} tan took ", .{n});
+    const f64Tan = try avg_runs(f64, n, tanF64, f1);
+
    try stdout.print("{} asin took ", .{n});
-    const f64Asin = try avg_runs(f64, n, asinF64, f1);
+    const f64Asin = try avg_runs(f64, n, asinF64, f2);
+
+    try stdout.print("{} acos took ", .{n});
+    const f64Acos = try avg_runs(f64, n, acosF64, f2);
+
+    try stdout.print("{} atan took ", .{n});
+    const f64Atan = try avg_runs(f64, n, atanF64, f1);

    try stdout.print("\n\nDec/F64:\n", .{});
    try stdout.print("addition:       {d:0.2}\n", .{@intToFloat(f64, decAdd) / @intToFloat(f64, f64Add)});
@ -75,67 +105,72 @@ pub fn main() !void {
    try stdout.print("multiplication: {d:0.2}\n", .{@intToFloat(f64, decMul) / @intToFloat(f64, f64Mul)});
    try stdout.print("division:       {d:0.2}\n", .{@intToFloat(f64, decDiv) / @intToFloat(f64, f64Div)});
    try stdout.print("sin:            {d:0.2}\n", .{@intToFloat(f64, decSin) / @intToFloat(f64, f64Sin)});
+    try stdout.print("cos:            {d:0.2}\n", .{@intToFloat(f64, decCos) / @intToFloat(f64, f64Cos)});
+    try stdout.print("tan:            {d:0.2}\n", .{@intToFloat(f64, decTan) / @intToFloat(f64, f64Tan)});
    try stdout.print("asin:           {d:0.2}\n", .{@intToFloat(f64, decAsin) / @intToFloat(f64, f64Asin)});
+    try stdout.print("acos:           {d:0.2}\n", .{@intToFloat(f64, decAcos) / @intToFloat(f64, f64Acos)});
+    try stdout.print("atan:           {d:0.2}\n", .{@intToFloat(f64, decAtan) / @intToFloat(f64, f64Atan)});
 }

-fn avg_runs(comptime T: type, comptime n: usize, op: fn (T, T) T, v: T) !u64 {
+fn avg_runs(comptime T: type, comptime n: usize, comptime op: fn (T, T) T, v: T) !u64 {
    const stdout = std.io.getStdOut().writer();

-    const repeats = 1000;
-    var runs = [_]u64{0} ** repeats;
+    const warmups = 10000;
+    const repeats = 10000;
+    var runs = [_]u64{0} ** (warmups + repeats);

    var i: usize = 0;
-    while (i < repeats) : (i += 1) {
-        runs[i] = run(T, n, op, v);
+    while (i < warmups + repeats) : (i += 1) {
+        // Never inline run to ensure it doesn't optimize for the value of `v`.
+        runs[i] = callWrapper(u64, .never_inline, run, .{ T, n, op, v });
    }

-    std.sort.sort(u64, &runs, {}, comptime std.sort.asc(u64));
+    var real_runs = runs[warmups..runs.len];
+    std.sort.sort(u64, real_runs, {}, comptime std.sort.asc(u64));

-    const median = runs[runs.len / 2];
-    const highest = runs[runs.len - 1];
-    const lowest = runs[0];
+    const median = real_runs[real_runs.len / 2];
+    const highest = real_runs[real_runs.len - 1];
+    const lowest = real_runs[0];

    try stdout.print("{}ns (lowest: {}ns, highest: {}ns)\n", .{ median, lowest, highest });
    return median;
 }

-fn run(comptime T: type, comptime n: usize, op: fn (T, T) T, v: T) u64 {
+fn run(comptime T: type, comptime n: usize, comptime op: fn (T, T) T, v: T) u64 {
    var a = v;
    timer.reset();

    // Split into outer and inner loop to avoid breaking comptime.
-    comptime var outer = n / 500;
-    comptime var inner = std.math.min(n, 500);
+    const max_inline = 100;
+    comptime var outer = n / max_inline;
+    comptime var inner = std.math.min(n, max_inline);
    var i: usize = 0;
    while (i < outer) : (i += 1) {
        comptime var j = 0;
        inline while (j < inner) : (j += 1) {
-            a = op(a, v);
-
-            // Clobber a to avoid optimizations and removal of dead code.
-            asm volatile (""
-                :
-                : [a] "r,m" (&a),
-                : "memory"
-            );
+            a = callWrapper(T, .always_inline, op, .{ a, v });
        }
    }
-    comptime var rem = n % 500;
-    i = 0;
-    inline while (i < rem) : (i += 1) {
-        a = op(a, v);
-
-        // Clobber a to avoid optimizations and removal of dead code.
-        asm volatile (""
-            :
-            : [a] "r,m" (&a),
-            : "memory"
-        );
+    const rem = n % max_inline;
+    comptime var j = 0;
+    inline while (j < rem) : (j += 1) {
+        a = callWrapper(T, .always_inline, op, .{ a, v });
    }

+    // Clobber `a` to avoid removal as dead code.
+    asm volatile (""
+        :
+        : [a] "r,m" (&a),
+        : "memory"
+    );
    return timer.read();
 }

+// This is needed to work around a bug with using `@call` in loops.
+inline fn callWrapper(comptime T: type, call_modifier: anytype, comptime func: anytype, params: anytype) T {
+    return @call(.{ .modifier = call_modifier }, func, params);
+}
+
 fn addF64(x: f64, y: f64) f64 {
    return x + y;
 }
@ -151,13 +186,43 @@ fn divF64(x: f64, y: f64) f64 {
 fn sinF64(x: f64, _: f64) f64 {
    return std.math.sin(x);
 }
+fn cosF64(x: f64, _: f64) f64 {
+    return std.math.cos(x);
+}
+fn tanF64(x: f64, _: f64) f64 {
+    return std.math.tan(x);
+}
 fn asinF64(x: f64, _: f64) f64 {
    return std.math.asin(x);
 }
+const pi_over_2 = std.math.pi / 2.0;
+fn acosF64(x: f64, _: f64) f64 {
+    // acos is only stable if we subtract pi/2.
+    // The perf should be essentially the same because subtraction is much faster than acos.
+    return std.math.acos(x) - pi_over_2;
+}
+fn atanF64(x: f64, _: f64) f64 {
+    return std.math.atan(x);
+}

 fn sinDec(x: RocDec, _: RocDec) RocDec {
    return x.sin();
 }
+fn cosDec(x: RocDec, _: RocDec) RocDec {
+    return x.cos();
+}
+fn tanDec(x: RocDec, _: RocDec) RocDec {
+    return x.tan();
+}
 fn asinDec(x: RocDec, _: RocDec) RocDec {
    return x.asin();
 }
+const pi_over_2_dec = RocDec.fromF64(pi_over_2).?;
+fn acosDec(x: RocDec, _: RocDec) RocDec {
+    // acos is only stable if we subtract pi/2.
+    // The perf should be essentially the same because subtraction is much faster than acos.
+    return x.acos().sub(pi_over_2_dec);
+}
+fn atanDec(x: RocDec, _: RocDec) RocDec {
+    return x.atan();
+}
--- a/crates/compiler/builtins/bitcode/src/dec.zig
+++ b/crates/compiler/builtins/bitcode/src/dec.zig
@ -235,6 +235,11 @@ pub const RocDec = extern struct {
        return if (negated) |n| .{ .num = n } else null;
    }

+    pub fn abs(self: RocDec) !RocDec {
+        const absolute = try math.absInt(self.num);
+        return RocDec{ .num = absolute };
+    }
+
    pub fn addWithOverflow(self: RocDec, other: RocDec) WithOverflow(RocDec) {
        var answer: i128 = undefined;
        const overflowed = @addWithOverflow(i128, self.num, other.num, &answer);
@ -1244,6 +1249,11 @@ pub fn negateC(arg: RocDec) callconv(.C) i128 {
    return if (@call(.{ .modifier = always_inline }, RocDec.negate, .{arg})) |dec| dec.num else @panic("TODO overflow for negating RocDec");
 }

+pub fn absC(arg: RocDec) callconv(.C) i128 {
+    const result = @call(.{ .modifier = always_inline }, RocDec.abs, .{arg}) catch @panic("TODO overflow for calling absolute value on RocDec");
+    return result.num;
+}
+
 pub fn addC(arg1: RocDec, arg2: RocDec) callconv(.C) WithOverflow(RocDec) {
    return @call(.{ .modifier = always_inline }, RocDec.addWithOverflow, .{ arg1, arg2 });
 }
--- a/crates/compiler/builtins/bitcode/src/hash.zig
+++ b/crates/compiler/builtins/bitcode/src/hash.zig
@ -1,254 +0,0 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2015-2021 Zig Contributors
-// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
-// The MIT license requires this copyright notice to be included in all copies
-// and substantial portions of the software.
-const std = @import("std");
-const str = @import("str.zig");
-const mem = std.mem;
-
-pub fn wyhash(seed: u64, bytes: ?[*]const u8, length: usize) callconv(.C) u64 {
-    if (bytes) |nonnull| {
-        const slice = nonnull[0..length];
-        return wyhash_hash(seed, slice);
-    } else {
-        return 42;
-    }
-}
-
-pub fn wyhash_rocstr(seed: u64, input: str.RocStr) callconv(.C) u64 {
-    return wyhash_hash(seed, input.asSlice());
-}
-
-const primes = [_]u64{
-    0xa0761d6478bd642f,
-    0xe7037ed1a0b428db,
-    0x8ebc6af09c88c6e3,
-    0x589965cc75374cc3,
-    0x1d8e4e27c47d124f,
-};
-
-fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
-    const T = std.meta.Int(.unsigned, 8 * bytes);
-    return mem.readIntLittle(T, data[0..bytes]);
-}
-
-fn read_8bytes_swapped(data: []const u8) u64 {
-    return (read_bytes(4, data) << 32 | read_bytes(4, data[4..]));
-}
-
-fn mum(a: u64, b: u64) u64 {
-    var r = std.math.mulWide(u64, a, b);
-    r = (r >> 64) ^ r;
-    return @truncate(u64, r);
-}
-
-fn mix0(a: u64, b: u64, seed: u64) u64 {
-    return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]);
-}
-
-fn mix1(a: u64, b: u64, seed: u64) u64 {
-    return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
-}
-
-// Wyhash version which does not store internal state for handling partial buffers.
-// This is needed so that we can maximize the speed for the short key case, which will
-// use the non-iterative api which the public Wyhash exposes.
-const WyhashStateless = struct {
-    seed: u64,
-    msg_len: usize,
-
-    pub fn init(seed: u64) WyhashStateless {
-        return WyhashStateless{
-            .seed = seed,
-            .msg_len = 0,
-        };
-    }
-
-    fn round(self: *WyhashStateless, b: []const u8) void {
-        std.debug.assert(b.len == 32);
-
-        self.seed = mix0(
-            read_bytes(8, b[0..]),
-            read_bytes(8, b[8..]),
-            self.seed,
-        ) ^ mix1(
-            read_bytes(8, b[16..]),
-            read_bytes(8, b[24..]),
-            self.seed,
-        );
-    }
-
-    pub fn update(self: *WyhashStateless, b: []const u8) void {
-        std.debug.assert(b.len % 32 == 0);
-
-        var off: usize = 0;
-        while (off < b.len) : (off += 32) {
-            @call(.{ .modifier = .always_inline }, self.round, .{b[off .. off + 32]});
-        }
-
-        self.msg_len += b.len;
-    }
-
-    pub fn final(self: *WyhashStateless, b: []const u8) u64 {
-        std.debug.assert(b.len < 32);
-
-        const seed = self.seed;
-        const rem_len = @intCast(u5, b.len);
-        const rem_key = b[0..rem_len];
-
-        self.seed = switch (rem_len) {
-            0 => seed,
-            1 => mix0(read_bytes(1, rem_key), primes[4], seed),
-            2 => mix0(read_bytes(2, rem_key), primes[4], seed),
-            3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
-            4 => mix0(read_bytes(4, rem_key), primes[4], seed),
-            5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
-            6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
-            7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
-            8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
-            9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
-            10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
-            11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
-            12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
-            13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
-            14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
-            15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
-            16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
-            17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
-            18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
-            19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
-            20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
-            21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
-            22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
-            23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
-            24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
-            25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
-            26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
-            27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
-            28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
-            29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
-            30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
-            31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
-        };
-
-        self.msg_len += b.len;
-        return mum(self.seed ^ self.msg_len, primes[4]);
-    }
-
-    pub fn hash(seed: u64, input: []const u8) u64 {
-        const aligned_len = input.len - (input.len % 32);
-
-        var c = WyhashStateless.init(seed);
-        @call(.{ .modifier = .always_inline }, c.update, .{input[0..aligned_len]});
-        return @call(.{ .modifier = .always_inline }, c.final, .{input[aligned_len..]});
-    }
-};
-
-/// Fast non-cryptographic 64bit hash function.
-/// See https://github.com/wangyi-fudan/wyhash
-pub const Wyhash = struct {
-    state: WyhashStateless,
-
-    buf: [32]u8,
-    buf_len: usize,
-
-    pub fn init(seed: u64) Wyhash {
-        return Wyhash{
-            .state = WyhashStateless.init(seed),
-            .buf = undefined,
-            .buf_len = 0,
-        };
-    }
-
-    pub fn update(self: *Wyhash, b: []const u8) void {
-        var off: usize = 0;
-
-        if (self.buf_len != 0 and self.buf_len + b.len >= 32) {
-            off += 32 - self.buf_len;
-            mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
-            self.state.update(self.buf[0..]);
-            self.buf_len = 0;
-        }
-
-        const remain_len = b.len - off;
-        const aligned_len = remain_len - (remain_len % 32);
-        self.state.update(b[off .. off + aligned_len]);
-
-        mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]);
-        self.buf_len += @intCast(u8, b[off + aligned_len ..].len);
-    }
-
-    pub fn final(self: *Wyhash) u64 {
-        // const seed = self.state.seed;
-        // const rem_len = @intCast(u5, self.buf_len);
-        const rem_key = self.buf[0..self.buf_len];
-
-        return self.state.final(rem_key);
-    }
-
-    pub fn hash(seed: u64, input: []const u8) u64 {
-        return WyhashStateless.hash(seed, input);
-    }
-};
-
-fn wyhash_hash(seed: u64, input: []const u8) u64 {
-    return Wyhash.hash(seed, input);
-}
-
-const expectEqual = std.testing.expectEqual;
-
-test "test vectors" {
-    const hash = Wyhash.hash;
-
-    try expectEqual(hash(0, ""), 0x0);
-    try expectEqual(hash(1, "a"), 0xbed235177f41d328);
-    try expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
-    try expectEqual(hash(3, "message digest"), 0x37320f657213a290);
-    try expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
-    try expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
-    try expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
-}
-
-test "test vectors streaming" {
-    var wh = Wyhash.init(5);
-    for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| {
-        wh.update(mem.asBytes(&e));
-    }
-    try expectEqual(wh.final(), 0x602a1894d3bbfe7f);
-
-    const pattern = "1234567890";
-    const count = 8;
-    const result = 0x829e9c148b75970e;
-    try expectEqual(Wyhash.hash(6, pattern ** 8), result);
-
-    wh = Wyhash.init(6);
-    var i: u32 = 0;
-    while (i < count) : (i += 1) {
-        wh.update(pattern);
-    }
-    try expectEqual(wh.final(), result);
-}
-
-test "iterative non-divisible update" {
-    var buf: [8192]u8 = undefined;
-    for (buf) |*e, i| {
-        e.* = @truncate(u8, i);
-    }
-
-    const seed = 0x128dad08f;
-
-    var end: usize = 32;
-    while (end < buf.len) : (end += 32) {
-        const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
-
-        var wy = Wyhash.init(seed);
-        var i: usize = 0;
-        while (i < end) : (i += 33) {
-            wy.update(buf[i..std.math.min(i + 33, end)]);
-        }
-        const iterative_hash = wy.final();
-
-        try std.testing.expectEqual(iterative_hash, non_iterative_hash);
-    }
-}
--- a/crates/compiler/builtins/bitcode/src/main.zig
+++ b/crates/compiler/builtins/bitcode/src/main.zig
@ -18,40 +18,37 @@ const STR = "str";
 const dec = @import("dec.zig");

 comptime {
-    exportDecFn(dec.fromStr, "from_str");
-    exportDecFn(dec.toStr, "to_str");
-    exportDecFn(dec.fromU64C, "from_u64");
-    exportDecFn(dec.toI128, "to_i128");
-    exportDecFn(dec.toF64, "to_f64");
-    exportDecFn(dec.eqC, "eq");
-    exportDecFn(dec.neqC, "neq");
-    exportDecFn(dec.negateC, "negate");
-    exportDecFn(dec.divC, "div");
-    exportDecFn(dec.sinC, "sin");
-    exportDecFn(dec.cosC, "cos");
-    exportDecFn(dec.tanC, "tan");
-    exportDecFn(dec.asinC, "asin");
+    exportDecFn(dec.absC, "abs");
    exportDecFn(dec.acosC, "acos");
-    exportDecFn(dec.atanC, "atan");
-
    exportDecFn(dec.addC, "add_with_overflow");
    exportDecFn(dec.addOrPanicC, "add_or_panic");
    exportDecFn(dec.addSaturatedC, "add_saturated");
-
-    exportDecFn(dec.subC, "sub_with_overflow");
-    exportDecFn(dec.subOrPanicC, "sub_or_panic");
-    exportDecFn(dec.subSaturatedC, "sub_saturated");
-
+    exportDecFn(dec.asinC, "asin");
+    exportDecFn(dec.atanC, "atan");
+    exportDecFn(dec.cosC, "cos");
+    exportDecFn(dec.divC, "div");
+    exportDecFn(dec.eqC, "eq");
+    exportDecFn(dec.fromF32C, "from_float.f32");
+    exportDecFn(dec.fromF64C, "from_float.f64");
+    exportDecFn(dec.fromStr, "from_str");
+    exportDecFn(dec.fromU64C, "from_u64");
    exportDecFn(dec.mulC, "mul_with_overflow");
    exportDecFn(dec.mulOrPanicC, "mul_or_panic");
    exportDecFn(dec.mulSaturatedC, "mul_saturated");
+    exportDecFn(dec.negateC, "negate");
+    exportDecFn(dec.neqC, "neq");
+    exportDecFn(dec.sinC, "sin");
+    exportDecFn(dec.subC, "sub_with_overflow");
+    exportDecFn(dec.subOrPanicC, "sub_or_panic");
+    exportDecFn(dec.subSaturatedC, "sub_saturated");
+    exportDecFn(dec.tanC, "tan");
+    exportDecFn(dec.toF64, "to_f64");
+    exportDecFn(dec.toI128, "to_i128");
+    exportDecFn(dec.toStr, "to_str");

    inline for (INTEGERS) |T| {
        dec.exportFromInt(T, ROC_BUILTINS ++ ".dec.from_int.");
    }
-
-    exportDecFn(dec.fromF32C, "from_float.f32");
-    exportDecFn(dec.fromF64C, "from_float.f64");
 }

 // List Module
--- a/crates/compiler/builtins/bitcode/src/str.zig
+++ b/crates/compiler/builtins/bitcode/src/str.zig
@ -259,19 +259,41 @@ pub const RocStr = extern struct {
        const old_length = self.len();
        const delta_length = new_length - old_length;

-        var result = RocStr.allocate(new_length);
+        const element_width = 1;
+        const result_is_big = new_length >= SMALL_STRING_SIZE;

-        // transfer the memory
+        if (result_is_big) {
+            const capacity = utils.calculateCapacity(0, new_length, element_width);
+            var result = RocStr.allocateBig(new_length, capacity);

-        const source_ptr = self.asU8ptr();
-        const dest_ptr = result.asU8ptrMut();
+            // transfer the memory

-        @memcpy(dest_ptr, source_ptr, old_length);
-        @memset(dest_ptr + old_length, 0, delta_length);
+            const source_ptr = self.asU8ptr();
+            const dest_ptr = result.asU8ptrMut();

-        self.decref();
+            std.mem.copy(u8, dest_ptr[0..old_length], source_ptr[0..old_length]);
+            std.mem.set(u8, dest_ptr[old_length .. old_length + delta_length], 0);

-        return result;
+            self.decref();
+
+            return result;
+        } else {
+            var string = RocStr.empty();
+
+            // I believe taking this reference on the stack here is important for correctness.
+            // Doing it via a method call seemed to cause issues
+            const dest_ptr = @ptrCast([*]u8, &string);
+            dest_ptr[@sizeOf(RocStr) - 1] = @intCast(u8, new_length) | 0b1000_0000;
+
+            const source_ptr = self.asU8ptr();
+
+            std.mem.copy(u8, dest_ptr[0..old_length], source_ptr[0..old_length]);
+            std.mem.set(u8, dest_ptr[old_length .. old_length + delta_length], 0);
+
+            self.decref();
+
+            return string;
+        }
    }

    pub fn isSmallStr(self: RocStr) bool {
@ -1857,13 +1879,12 @@ const CountAndStart = extern struct {
 };

 pub fn fromUtf8RangeC(
-    output: *FromUtf8Result,
    list: RocList,
    start: usize,
    count: usize,
    update_mode: UpdateMode,
-) callconv(.C) void {
-    output.* = @call(.{ .modifier = always_inline }, fromUtf8Range, .{ list, start, count, update_mode });
+) callconv(.C) FromUtf8Result {
+    return fromUtf8Range(list, start, count, update_mode);
 }

 pub fn fromUtf8Range(arg: RocList, start: usize, count: usize, update_mode: UpdateMode) FromUtf8Result {