remove all compiler-rt and libc code ports from zig bitcode

2025-10-03 00:24:34 +00:00 · 2023-12-08 13:59:44 -08:00 · 2023-12-08 13:59:44 -08:00 · 797ba64003
commit 797ba64003
parent be06599bb6
15 changed files with 0 additions and 1577 deletions
--- a/crates/compiler/builtins/bitcode/src/compiler_rt.zig
+++ b/crates/compiler/builtins/bitcode/src/compiler_rt.zig
@ -1,478 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const math = std.math;
-
-// Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore.
-// Until then, we are manually ingesting used parts of compiler-rt here.
-//
-// Taken from
-// https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/
-// Thank you Zig Contributors!
-
-// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
-// calling convention of @Vector(2, u64), rather than what's standard.
-pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
-
-const v2u64 = @Vector(2, u64);
-
-// Export it as weak incase it is already linked in by something else.
-comptime {
-    if (!want_windows_v2u64_abi) {
-        @export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak });
-        @export(__lshrti3, .{ .name = "__lshrti3", .linkage = .Weak });
-        @export(__divti3, .{ .name = "__divti3", .linkage = .Weak });
-        @export(__modti3, .{ .name = "__modti3", .linkage = .Weak });
-        @export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak });
-        @export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak });
-        @export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak });
-        @export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak });
-        @export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak });
-        @export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak });
-    }
-}
-
-pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
-    if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) {
-        return muloXi4_genericFast(i128, a, b, overflow);
-    } else {
-        return muloXi4_genericSmall(i128, a, b, overflow);
-    }
-}
-
-pub fn __divti3(a: i128, b: i128) callconv(.C) i128 {
-    return div(a, b);
-}
-
-fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(div(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
-}
-
-inline fn div(a: i128, b: i128) i128 {
-    const s_a = a >> (128 - 1);
-    const s_b = b >> (128 - 1);
-
-    const an = (a ^ s_a) -% s_a;
-    const bn = (b ^ s_b) -% s_b;
-
-    const r = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), null);
-    const s = s_a ^ s_b;
-    return (@as(i128, @bitCast(r)) ^ s) -% s;
-}
-
-pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 {
-    return udivmod(u128, a, b, null);
-}
-
-fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), null)));
-}
-
-pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 {
-    var r: u128 = undefined;
-    _ = udivmod(u128, a, b, &r);
-    return r;
-}
-
-fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    var r: u128 = undefined;
-    _ = udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), &r);
-    return @as(v2u64, @bitCast(r));
-}
-
-pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
-    return mod(a, b);
-}
-
-fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
-}
-
-inline fn mod(a: i128, b: i128) i128 {
-    const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0
-    const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0
-
-    const an = (a ^ s_a) -% s_a; // negate if s == -1
-    const bn = (b ^ s_b) -% s_b; // negate if s == -1
-
-    var r: u128 = undefined;
-    _ = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), &r);
-    return (@as(i128, @bitCast(r)) ^ s_a) -% s_a; // negate if s == -1
-}
-
-pub fn __fixdfti(a: f64) callconv(.C) i128 {
-    return floatToInt(i128, a);
-}
-
-fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(i128, a)));
-}
-
-pub fn __fixsfti(a: f32) callconv(.C) i128 {
-    return floatToInt(i128, a);
-}
-
-fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(i128, a)));
-}
-
-pub fn __fixunsdfti(a: f64) callconv(.C) u128 {
-    return floatToInt(u128, a);
-}
-
-fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(u128, a)));
-}
-
-pub fn __fixunssfti(a: f32) callconv(.C) u128 {
-    return floatToInt(u128, a);
-}
-
-fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(u128, a)));
-}
-// mulo - multiplication overflow
-// * return a*%b.
-// * return if a*b overflows => 1 else => 0
-// - muloXi4_genericSmall as default
-// - muloXi4_genericFast for 2*bitsize <= usize
-
-inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
-    overflow.* = 0;
-    const min = math.minInt(ST);
-    var res: ST = a *% b;
-    // Hacker's Delight section Overflow subsection Multiplication
-    // case a=-2^{31}, b=-1 problem, because
-    // on some machines a*b = -2^{31} with overflow
-    // Then -2^{31}/-1 overflows and any result is possible.
-    // => check with a<0 and b=-2^{31}
-    if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
-        overflow.* = 1;
-    return res;
-}
-
-inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
-    overflow.* = 0;
-    const EST = switch (ST) {
-        i32 => i64,
-        i64 => i128,
-        i128 => i256,
-        else => unreachable,
-    };
-    const min = math.minInt(ST);
-    const max = math.maxInt(ST);
-    var res: EST = @as(EST, a) * @as(EST, b);
-    //invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1}
-    if (res < min or max < res)
-        overflow.* = 1;
-    return @as(ST, @truncate(res));
-}
-
-const native_endian = builtin.cpu.arch.endian();
-const low = switch (native_endian) {
-    .Big => 1,
-    .Little => 0,
-};
-const high = 1 - low;
-
-pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt {
-    // @setRuntimeSafety(builtin.is_test);
-
-    const double_int_bits = @typeInfo(DoubleInt).Int.bits;
-    const single_int_bits = @divExact(double_int_bits, 2);
-    const SingleInt = std.meta.Int(.unsigned, single_int_bits);
-    const SignedDoubleInt = std.meta.Int(.signed, double_int_bits);
-    const Log2SingleInt = std.math.Log2Int(SingleInt);
-
-    const n = @as([2]SingleInt, @bitCast(a));
-    const d = @as([2]SingleInt, @bitCast(b));
-    var q: [2]SingleInt = undefined;
-    var r: [2]SingleInt = undefined;
-    var sr: c_uint = undefined;
-    // special cases, X is unknown, K != 0
-    if (n[high] == 0) {
-        if (d[high] == 0) {
-            // 0 X
-            // ---
-            // 0 X
-            if (maybe_rem) |rem| {
-                rem.* = n[low] % d[low];
-            }
-            return n[low] / d[low];
-        }
-        // 0 X
-        // ---
-        // K X
-        if (maybe_rem) |rem| {
-            rem.* = n[low];
-        }
-        return 0;
-    }
-    // n[high] != 0
-    if (d[low] == 0) {
-        if (d[high] == 0) {
-            // K X
-            // ---
-            // 0 0
-            if (maybe_rem) |rem| {
-                rem.* = n[high] % d[low];
-            }
-            return n[high] / d[low];
-        }
-        // d[high] != 0
-        if (n[low] == 0) {
-            // K 0
-            // ---
-            // K 0
-            if (maybe_rem) |rem| {
-                r[high] = n[high] % d[high];
-                r[low] = 0;
-                rem.* = @as(DoubleInt, @bitCast(r));
-            }
-            return n[high] / d[high];
-        }
-        // K K
-        // ---
-        // K 0
-        if ((d[high] & (d[high] - 1)) == 0) {
-            // d is a power of 2
-            if (maybe_rem) |rem| {
-                r[low] = n[low];
-                r[high] = n[high] & (d[high] - 1);
-                rem.* = @as(DoubleInt, @bitCast(r));
-            }
-            return n[high] >> @as(Log2SingleInt, @intCast(@ctz(d[high])));
-        }
-        // K K
-        // ---
-        // K 0
-        sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
-        // 0 <= sr <= single_int_bits - 2 or sr large
-        if (sr > single_int_bits - 2) {
-            if (maybe_rem) |rem| {
-                rem.* = a;
-            }
-            return 0;
-        }
-        sr += 1;
-        // 1 <= sr <= single_int_bits - 1
-        // q.all = a << (double_int_bits - sr);
-        q[low] = 0;
-        q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-        // r.all = a >> sr;
-        r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-        r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-    } else {
-        // d[low] != 0
-        if (d[high] == 0) {
-            // K X
-            // ---
-            // 0 K
-            if ((d[low] & (d[low] - 1)) == 0) {
-                // d is a power of 2
-                if (maybe_rem) |rem| {
-                    rem.* = n[low] & (d[low] - 1);
-                }
-                if (d[low] == 1) {
-                    return a;
-                }
-                sr = @ctz(d[low]);
-                q[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                q[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-                return @as(DoubleInt, @bitCast(q));
-            }
-            // K X
-            // ---
-            // 0 K
-            sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high]));
-            // 2 <= sr <= double_int_bits - 1
-            // q.all = a << (double_int_bits - sr);
-            // r.all = a >> sr;
-            if (sr == single_int_bits) {
-                q[low] = 0;
-                q[high] = n[low];
-                r[high] = 0;
-                r[low] = n[high];
-            } else if (sr < single_int_bits) {
-                // 2 <= sr <= single_int_bits - 1
-                q[low] = 0;
-                q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-                r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-            } else {
-                // single_int_bits + 1 <= sr <= double_int_bits - 1
-                q[low] = n[low] << @as(Log2SingleInt, @intCast(double_int_bits - sr));
-                q[high] = (n[high] << @as(Log2SingleInt, @intCast(double_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr - single_int_bits)));
-                r[high] = 0;
-                r[low] = n[high] >> @as(Log2SingleInt, @intCast(sr - single_int_bits));
-            }
-        } else {
-            // K X
-            // ---
-            // K K
-            sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
-            // 0 <= sr <= single_int_bits - 1 or sr large
-            if (sr > single_int_bits - 1) {
-                if (maybe_rem) |rem| {
-                    rem.* = a;
-                }
-                return 0;
-            }
-            sr += 1;
-            // 1 <= sr <= single_int_bits
-            // q.all = a << (double_int_bits - sr);
-            // r.all = a >> sr;
-            q[low] = 0;
-            if (sr == single_int_bits) {
-                q[high] = n[low];
-                r[high] = 0;
-                r[low] = n[high];
-            } else {
-                r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-                q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-            }
-        }
-    }
-    // Not a special case
-    // q and r are initialized with:
-    // q.all = a << (double_int_bits - sr);
-    // r.all = a >> sr;
-    // 1 <= sr <= double_int_bits - 1
-    var carry: u32 = 0;
-    var r_all: DoubleInt = undefined;
-    while (sr > 0) : (sr -= 1) {
-        // r:q = ((r:q)  << 1) | carry
-        r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1));
-        r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1));
-        q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1));
-        q[low] = (q[low] << 1) | carry;
-        // carry = 0;
-        // if (r.all >= b)
-        // {
-        //     r.all -= b;
-        //      carry = 1;
-        // }
-        r_all = @as(DoubleInt, @bitCast(r));
-        const s: SignedDoubleInt = @as(SignedDoubleInt, @bitCast(b -% r_all -% 1)) >> (double_int_bits - 1);
-        carry = @as(u32, @intCast(s & 1));
-        r_all -= b & @as(DoubleInt, @bitCast(s));
-        r = @as([2]SingleInt, @bitCast(r_all));
-    }
-    const q_all = (@as(DoubleInt, @bitCast(q)) << 1) | carry;
-    if (maybe_rem) |rem| {
-        rem.* = r_all;
-    }
-    return q_all;
-}
-
-pub inline fn floatToInt(comptime I: type, a: anytype) I {
-    const Log2Int = math.Log2Int;
-    const Int = @import("std").meta.Int;
-    const F = @TypeOf(a);
-    const float_bits = @typeInfo(F).Float.bits;
-    const int_bits = @typeInfo(I).Int.bits;
-    const rep_t = Int(.unsigned, float_bits);
-    const sig_bits = math.floatMantissaBits(F);
-    const exp_bits = math.floatExponentBits(F);
-    const fractional_bits = floatFractionalBits(F);
-
-    // const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
-    const implicit_bit = @as(rep_t, 1) << sig_bits;
-    const max_exp = (1 << (exp_bits - 1));
-    const exp_bias = max_exp - 1;
-    const sig_mask = (@as(rep_t, 1) << sig_bits) - 1;
-
-    // Break a into sign, exponent, significand
-    const a_rep: rep_t = @as(rep_t, @bitCast(a));
-    const negative = (a_rep >> (float_bits - 1)) != 0;
-    const exponent = @as(i32, @intCast((a_rep << 1) >> (sig_bits + 1))) - exp_bias;
-    const significand: rep_t = (a_rep & sig_mask) | implicit_bit;
-
-    // If the exponent is negative, the result rounds to zero.
-    if (exponent < 0) return 0;
-
-    // If the value is too large for the integer type, saturate.
-    switch (@typeInfo(I).Int.signedness) {
-        .unsigned => {
-            if (negative) return 0;
-            if (@as(c_uint, @intCast(exponent)) >= @min(int_bits, max_exp)) return math.maxInt(I);
-        },
-        .signed => if (@as(c_uint, @intCast(exponent)) >= @min(int_bits - 1, max_exp)) {
-            return if (negative) math.minInt(I) else math.maxInt(I);
-        },
-    }
-
-    // If 0 <= exponent < sig_bits, right shift to get the result.
-    // Otherwise, shift left.
-    var result: I = undefined;
-    if (exponent < fractional_bits) {
-        result = @as(I, @intCast(significand >> @as(Log2Int(rep_t), @intCast(fractional_bits - exponent))));
-    } else {
-        result = @as(I, @intCast(significand)) << @as(Log2Int(I), @intCast(exponent - fractional_bits));
-    }
-
-    if ((@typeInfo(I).Int.signedness == .signed) and negative)
-        return ~result +% 1;
-    return result;
-}
-
-/// Returns the number of fractional bits in the mantissa of floating point type T.
-pub inline fn floatFractionalBits(comptime T: type) comptime_int {
-    comptime std.debug.assert(@typeInfo(T) == .Float);
-
-    // standard IEEE floats have an implicit 0.m or 1.m integer part
-    // f80 is special and has an explicitly stored bit in the MSB
-    // this function corresponds to `MANT_DIG - 1' from C
-    return switch (@typeInfo(T).Float.bits) {
-        16 => 10,
-        32 => 23,
-        64 => 52,
-        80 => 63,
-        128 => 112,
-        else => @compileError("unknown floating point type " ++ @typeName(T)),
-    };
-}
-
-pub fn __lshrti3(a: i128, b: i32) callconv(.C) i128 {
-    return lshrXi3(i128, a, b);
-}
-
-// Logical shift right: shift in 0 from left to right
-// Precondition: 0 <= b < T.bit_count
-inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
-    const word_t = HalveInt(T, false);
-    const S = std.math.Log2Int(word_t.HalfT);
-
-    const input = word_t{ .all = a };
-    var output: word_t = undefined;
-
-    if (b >= word_t.bits) {
-        output.s.high = 0;
-        output.s.low = input.s.high >> @as(S, @intCast(b - word_t.bits));
-    } else if (b == 0) {
-        return a;
-    } else {
-        output.s.high = input.s.high >> @as(S, @intCast(b));
-        output.s.low = input.s.high << @as(S, @intCast(word_t.bits - b));
-        output.s.low |= input.s.low >> @as(S, @intCast(b));
-    }
-
-    return output.all;
-}
-
-/// Allows to access underlying bits as two equally sized lower and higher
-/// signed or unsigned integers.
-fn HalveInt(comptime T: type, comptime signed_half: bool) type {
-    return extern union {
-        pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
-        pub const HalfTU = std.meta.Int(.unsigned, bits);
-        pub const HalfTS = std.meta.Int(.signed, bits);
-        pub const HalfT = if (signed_half) HalfTS else HalfTU;
-
-        all: T,
-        s: if (native_endian == .Little)
-            extern struct { low: HalfT, high: HalfT }
-        else
-            extern struct { high: HalfT, low: HalfT },
-    };
-}
--- a/crates/compiler/builtins/bitcode/src/libc.zig
+++ b/crates/compiler/builtins/bitcode/src/libc.zig
@ -1,87 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const musl = @import("libc/musl.zig");
-const folly = @import("libc/folly.zig");
-const cpuid = @import("libc/cpuid.zig");
-
-comptime {
-    // TODO: remove this workaround.
-    // Our wasm llvm pipeline always links in memcpy.
-    // As such, our impl will conflict.
-    if (builtin.is_test) {
-        // We don't need memcpy for tests because the tests are built with -lc
-    } else if (arch != .wasm32) {
-        @export(memcpy, .{ .name = "memcpy", .linkage = .Strong });
-    }
-}
-
-const Memcpy = *const fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8;
-
-pub var memcpy_target: Memcpy = switch (arch) {
-    .x86_64 => dispatch_memcpy,
-    else => unreachable,
-};
-
-pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    switch (builtin.os.tag) {
-        .windows => {
-            return musl.memcpy(dest, src, len);
-        },
-        else => switch (arch) {
-            // x86_64 has a special optimized memcpy that can use avx2.
-            .x86_64 => {
-                return memcpy_target(dest, src, len);
-            },
-            else => {
-                return musl.memcpy(dest, src, len);
-            },
-        },
-    }
-}
-
-const MemcpyDecision = enum {
-    uninitialized,
-    folly_prefetchw,
-    folly_prefetcht0,
-    musl,
-};
-
-var memcpy_decision: MemcpyDecision = .uninitialized;
-
-fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    switch (arch) {
-        .x86_64 => {
-            // TODO: Switch this to overwrite the memcpy_target pointer once the surgical linker can support it.
-            // Then dispatch will just happen on the first call instead of every call.
-            // if (cpuid.supports_avx2()) {
-            //     if (cpuid.supports_prefetchw()) {
-            //         memcpy_target = folly.memcpy_prefetchw;
-            //     } else {
-            //         memcpy_target = folly.memcpy_prefetcht0;
-            //     }
-            // } else {
-            //     memcpy_target = musl.memcpy;
-            // }
-            // return memcpy_target(dest, src, len);
-            switch (memcpy_decision) {
-                .uninitialized => {
-                    if (cpuid.supports_avx2()) {
-                        if (cpuid.supports_prefetchw()) {
-                            memcpy_decision = .folly_prefetchw;
-                        } else {
-                            memcpy_decision = .folly_prefetcht0;
-                        }
-                    } else {
-                        memcpy_decision = .musl;
-                    }
-                    return dispatch_memcpy(dest, src, len);
-                },
-                .folly_prefetchw => return folly.memcpy_prefetchw(dest, src, len),
-                .folly_prefetcht0 => return folly.memcpy_prefetcht0(dest, src, len),
-                .musl => return musl.memcpy(dest, src, len),
-            }
-        },
-        else => unreachable,
-    }
-}
--- a/crates/compiler/builtins/bitcode/src/libc/assembly_util.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/assembly_util.zig
@ -1,7 +0,0 @@
-const builtin = @import("builtin");
-const os = builtin.os;
-
-pub const function_prefix = switch (os.tag) {
-    .macos => "_",
-    else => "",
-};
--- a/crates/compiler/builtins/bitcode/src/libc/cpuid.S
+++ b/crates/compiler/builtins/bitcode/src/libc/cpuid.S
@ -1,53 +0,0 @@
-// Check if AVX2 is supported.
-// Returns 1 if AVX2 is supported, 0 otherwise.
-.global {[function_prefix]s}supports_avx2;
-{[function_prefix]s}supports_avx2:
-    // Save the EBX register.
-    push %rbx
-
-    // Call the CPUID instruction with the EAX register set to 7 and ECX set to 0.
-    // This will get the CPUID information for the current CPU.
-    mov $7, %eax
-    mov $0, %ecx
-    cpuid
-
-    // The AVX2 feature flag is located in the EBX register at bit 5.
-    bt $5, %ebx
-    jc .avx2_supported
-
-    // AVX2 is not supported.
-    pop %rbx
-    mov $0, %eax
-    ret
-
-    .avx2_supported:
-    pop %rbx
-    mov $1, %eax
-    ret
-
- // Check if prefetchw is supported.
- // Returns 1 if the prefetchw instruction is supported, 0 otherwise.
-.global {[function_prefix]s}supports_prefetchw;
-{[function_prefix]s}supports_prefetchw:
-    // Save the EBX register.
-    push %rbx
-
-    // Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
-    // This will get the CPUID information for the current CPU.
-    mov $0x80000001, %eax
-    mov $0, %ecx
-    cpuid
-
-    // The prefetchw feature flag is located in the ECX register at bit 8.
-    bt $8, %ecx
-    jc .prefetchw_supported
-
-    // AVX2 is not supported.
-    pop %rbx
-    mov $0, %eax
-    ret
-
-    .prefetchw_supported:
-    pop %rbx
-    mov $1, %eax
-    ret
--- a/crates/compiler/builtins/bitcode/src/libc/cpuid.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/cpuid.zig
@ -1,18 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("assembly_util.zig").function_prefix;
-
-// I couldn't manage to define this in a PIE friendly way with inline assembly.
-// Instead, I am defining it as global assembly functions.
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            asm (std.fmt.comptimePrint(@embedFile("cpuid.S"), .{ .function_prefix = function_prefix }));
-        },
-        else => unreachable,
-    }
-}
-
-pub extern fn supports_avx2() bool;
-pub extern fn supports_prefetchw() bool;
--- a/crates/compiler/builtins/bitcode/src/libc/folly.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/folly.zig
@ -1,2 +0,0 @@
-pub const memcpy_prefetchw = @import("folly/memcpy.zig").__folly_memcpy_prefetchw;
-pub const memcpy_prefetcht0 = @import("folly/memcpy.zig").__folly_memcpy_prefetcht0;
--- a/crates/compiler/builtins/bitcode/src/libc/folly/memcpy-x86_64.S
+++ b/crates/compiler/builtins/bitcode/src/libc/folly/memcpy-x86_64.S
@ -1,437 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * __folly_memcpy: An optimized memcpy implementation that uses prefetch and
- * AVX2 instructions.
- *
- * This implementation of memcpy acts as a memmove: while overlapping copies
- * are undefined in memcpy, in some implementations they're the same function and
- * legacy programs rely on this behavior.
- *
- * This implementation uses prefetch to avoid dtlb misses. This can
- * substantially reduce dtlb store misses in cases where the destination
- * location is absent from L1 cache and where the copy size is small enough
- * that the hardware prefetcher doesn't have a large impact.
- *
- * The number of branches is limited by the use of overlapping loads & stores.
- * This helps with copies where the source and destination cache lines are already
- * present in L1 because there are fewer instructions to execute and fewer
- * branches to potentially mispredict.
- *   e.g. to copy the last 4 <= n <= 7 bytes: copy the first & last 4 bytes (overlapped):
- *      movl        (%rsi), %r8d
- *      movl        -4(%rsi,%rdx), %r9d
- *      movl        %r8d, (%rdi)
- *      movl        %r9d, -4(%rdi,%rdx)
- *
- *
- * For sizes up to 256 all source data is first read into registers and then written:
- * - n <=  16: overlapping movs
- * - n <=  32: overlapping unaligned 16-byte SSE XMM load/stores
- * - n <= 256: overlapping unaligned 32-byte AVX YMM load/stores
- *
- * Large copies (> 256 bytes) use unaligned loads + aligned stores.
- * This is observed to always be faster than rep movsb, so the rep movsb
- * instruction is not used.
- * - The head & tail may be unaligned => they're always written using unaligned stores.
- *
- * If the copy size is humongous (> 32 KiB) and the source and destination are both
- * aligned, this memcpy will use non-temporal operations (AVX2). This can have
- * a substantial speedup for copies where data is absent from L1, but it
- * is significantly slower if the source and destination data were already
- * in L1. The use of non-temporal operations also has the effect that after
- * the copy is complete, the data will be moved out of L1, even if the data was
- * present before the copy started.
- *
- * For n > 256 and overlapping src & dst buffers (memmove):
- * - use unaligned loads + aligned stores, but not non-temporal stores
- * - for dst < src forward copy in 128 byte batches:
- *   - unaligned load the first 32 bytes & last 4 x 32 bytes
- *   - forward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
- *   - unaligned store the first 32 bytes & last 4 x 32 bytes
- * - for dst > src backward copy in 128 byte batches:
- *   - unaligned load the first 4 x 32 bytes & last 32 bytes
- *   - backward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
- *   - unaligned store the first 4 x 32 bytes & last 32 bytes
- *
- * @author Logan Evans <lpe@fb.com>
- */
-
-
-        // .type       {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, @function not supported by windows
-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s}:
-        .cfi_startproc
-
-.L_GE1_LE7_{[prefetch]s}:
-        cmp         $1, %rdx
-        je          .L_EQ1_{[prefetch]s}
-
-        cmp         $4, %rdx
-        jae         .L_GE4_LE7_{[prefetch]s}
-
-.L_GE2_LE3_{[prefetch]s}:
-        movw        (%rsi), %r8w
-        movw        -2(%rsi,%rdx), %r9w
-        movw        %r8w, (%rdi)
-        movw        %r9w, -2(%rdi,%rdx)
-        ret
-
-        .balign      2
-.L_EQ1_{[prefetch]s}:
-        movb        (%rsi), %r8b
-        movb        %r8b, (%rdi)
-        ret
-
-        // Aligning the target of a jump to an even address has a measurable
-        // speedup in microbenchmarks.
-        .balign      2
-.L_GE4_LE7_{[prefetch]s}:
-        movl        (%rsi), %r8d
-        movl        -4(%rsi,%rdx), %r9d
-        movl        %r8d, (%rdi)
-        movl        %r9d, -4(%rdi,%rdx)
-        ret
-
-        .cfi_endproc
-        // .size       {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s} not supported by windows
-
-// memcpy is an alternative entrypoint into the function named __folly_memcpy.
-// The compiler is able to call memcpy since the name is global while
-// stacktraces will show __folly_memcpy since that is the name of the function.
-// This is intended to aid in debugging by making it obvious which version of
-// memcpy is being used.
-        .balign      64
-        .globl      {[function_prefix]s}__folly_memcpy_{[prefetch]s}
-        // .type       {[function_prefix]s}__folly_memcpy_{[prefetch]s}, @function not supported by windows
-
-{[function_prefix]s}__folly_memcpy_{[prefetch]s}:
-        .cfi_startproc
-
-        mov         %rdi, %rax    // return: $rdi
-
-        test        %rdx, %rdx
-        je          .L_EQ0_{[prefetch]s}
-
-        {[prefetch]s}    (%rdi)
-        {[prefetch]s}    -1(%rdi,%rdx)
-
-        cmp         $8, %rdx
-        jb          .L_GE1_LE7_{[prefetch]s}
-
-.L_GE8_{[prefetch]s}:
-        cmp         $32, %rdx
-        ja          .L_GE33_{[prefetch]s}
-
-.L_GE8_LE32_{[prefetch]s}:
-        cmp         $16, %rdx
-        ja          .L_GE17_LE32_{[prefetch]s}
-
-.L_GE8_LE16_{[prefetch]s}:
-        mov         (%rsi), %r8
-        mov         -8(%rsi,%rdx), %r9
-        mov         %r8, (%rdi)
-        mov         %r9, -8(%rdi,%rdx)
-.L_EQ0_{[prefetch]s}:
-        ret
-
-        .balign      2
-.L_GE17_LE32_{[prefetch]s}:
-        movdqu      (%rsi), %xmm0
-        movdqu      -16(%rsi,%rdx), %xmm1
-        movdqu      %xmm0, (%rdi)
-        movdqu      %xmm1, -16(%rdi,%rdx)
-        ret
-
-        .balign      2
-.L_GE193_LE256_{[prefetch]s}:
-        vmovdqu     %ymm3, 96(%rdi)
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-
-.L_GE129_LE192_{[prefetch]s}:
-        vmovdqu     %ymm2, 64(%rdi)
-        vmovdqu     %ymm5, -96(%rdi,%rdx)
-
-.L_GE65_LE128_{[prefetch]s}:
-        vmovdqu     %ymm1, 32(%rdi)
-        vmovdqu     %ymm6, -64(%rdi,%rdx)
-
-.L_GE33_LE64_{[prefetch]s}:
-        vmovdqu     %ymm0, (%rdi)
-        vmovdqu     %ymm7, -32(%rdi,%rdx)
-
-        vzeroupper
-        ret
-
-        .balign      2
-.L_GE33_{[prefetch]s}:
-        vmovdqu     (%rsi), %ymm0
-        vmovdqu     -32(%rsi,%rdx), %ymm7
-
-        cmp         $64, %rdx
-        jbe         .L_GE33_LE64_{[prefetch]s}
-
-        {[prefetch]s}    64(%rdi)
-
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     -64(%rsi,%rdx), %ymm6
-
-        cmp         $128, %rdx
-        jbe         .L_GE65_LE128_{[prefetch]s}
-
-        {[prefetch]s}    128(%rdi)
-
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     -96(%rsi,%rdx), %ymm5
-
-        cmp         $192, %rdx
-        jbe         .L_GE129_LE192_{[prefetch]s}
-
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu     96(%rsi), %ymm3
-        vmovdqu     -128(%rsi,%rdx), %ymm4
-
-        cmp         $256, %rdx
-        jbe         .L_GE193_LE256_{[prefetch]s}
-
-.L_GE257_{[prefetch]s}:
-        {[prefetch]s}    256(%rdi)
-
-        // Check if there is an overlap. If there is an overlap then the caller
-        // has a bug since this is undefined behavior. However, for legacy
-        // reasons this behavior is expected by some callers.
-        //
-        // All copies through 256 bytes will operate as a memmove since for
-        // those sizes all reads are performed before any writes.
-        //
-        // This check uses the idea that there is an overlap if
-        // (%rdi < (%rsi + %rdx)) && (%rsi < (%rdi + %rdx)),
-        // or equivalently, there is no overlap if
-        // ((%rsi + %rdx) <= %rdi) || ((%rdi + %rdx) <= %rsi).
-        //
-        // %r9 will be used after .L_ALIGNED_DST_LOOP to calculate how many
-        // bytes remain to be copied.
-
-        // (%rsi + %rdx <= %rdi) => no overlap
-        lea         (%rsi,%rdx), %r9
-        cmp         %rdi, %r9
-        jbe         .L_NO_OVERLAP_{[prefetch]s}
-
-        // (%rdi + %rdx <= %rsi) => no overlap
-        lea         (%rdi,%rdx), %r8
-        cmp         %rsi, %r8
-        // If no info is available in branch predictor's cache, Intel CPUs assume
-        // forward jumps are not taken. Use a forward jump as overlapping buffers
-        // are unlikely.
-        ja          .L_OVERLAP_{[prefetch]s}
-
-        .balign      2
-.L_NO_OVERLAP_{[prefetch]s}:
-        vmovdqu     %ymm0, (%rdi)
-        vmovdqu     %ymm1, 32(%rdi)
-        vmovdqu     %ymm2, 64(%rdi)
-        vmovdqu     %ymm3, 96(%rdi)
-
-        // Align %rdi to a 32 byte boundary.
-        // %rcx = 128 - 31 & %rdi
-        mov         $128, %rcx
-        and         $31, %rdi
-        sub         %rdi, %rcx
-
-        lea         (%rsi,%rcx), %rsi
-        lea         (%rax,%rcx), %rdi
-        sub         %rcx, %rdx
-
-        // %r8 is the end condition for the loop.
-        lea         -128(%rsi,%rdx), %r8
-
-		// This threshold is half of L1 cache on a Skylake machine, which means that
-		// potentially all of L1 will be populated by this copy once it is executed
-		// (dst and src are cached for temporal copies).
-		// NON_TEMPORAL_STORE_THRESHOLD = $32768
-        // cmp         NON_TEMPORAL_STORE_THRESHOLD, %rdx
-        cmp         $32768, %rdx
-        jae         .L_NON_TEMPORAL_LOOP_{[prefetch]s}
-
-        .balign      2
-.L_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    128(%rdi)
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu     (%rsi), %ymm0
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovdqa     %ymm0, (%rdi)
-        vmovdqa     %ymm1, 32(%rdi)
-        vmovdqa     %ymm2, 64(%rdi)
-        vmovdqa     %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_ALIGNED_DST_LOOP_{[prefetch]s}
-
-.L_ALIGNED_DST_LOOP_END_{[prefetch]s}:
-        sub         %rsi, %r9
-        mov         %r9, %rdx
-
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-        vmovdqu     %ymm5, -96(%rdi,%rdx)
-        vmovdqu     %ymm6, -64(%rdi,%rdx)
-        vmovdqu     %ymm7, -32(%rdi,%rdx)
-
-        vzeroupper
-        ret
-
-        .balign      2
-.L_NON_TEMPORAL_LOOP_{[prefetch]s}:
-        testb       $31, %sil
-        jne         .L_ALIGNED_DST_LOOP_{[prefetch]s}
-        // This is prefetching the source data unlike ALIGNED_DST_LOOP which
-        // prefetches the destination data. This choice is again informed by
-        // benchmarks. With a non-temporal store the entirety of the cache line
-        // is being written so the previous data can be discarded without being
-        // fetched.
-        prefetchnta 128(%rsi)
-        prefetchnta 196(%rsi)
-
-        vmovntdqa   (%rsi), %ymm0
-        vmovntdqa   32(%rsi), %ymm1
-        vmovntdqa   64(%rsi), %ymm2
-        vmovntdqa   96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovntdq    %ymm0, (%rdi)
-        vmovntdq    %ymm1, 32(%rdi)
-        vmovntdq    %ymm2, 64(%rdi)
-        vmovntdq    %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_NON_TEMPORAL_LOOP_{[prefetch]s}
-
-        sfence
-        jmp         .L_ALIGNED_DST_LOOP_END_{[prefetch]s}
-
-
-.L_OVERLAP_{[prefetch]s}:
-        .balign      2
-        cmp         %rdi, %rsi
-        jb          .L_OVERLAP_BWD_{[prefetch]s}  // %rsi  < %rdi => backward-copy
-        je          .L_RET_{[prefetch]s}          // %rsi == %rdi => return, nothing to copy
-
-        // Source & destination buffers overlap. Forward copy.
-
-        vmovdqu     (%rsi), %ymm8
-
-        // Align %rdi to a 32 byte boundary.
-        // %rcx = 32 - 31 & %rdi
-        mov         $32, %rcx
-        and         $31, %rdi
-        sub         %rdi, %rcx
-
-        lea         (%rsi,%rcx), %rsi
-        lea         (%rax,%rcx), %rdi
-        sub         %rcx, %rdx
-
-        // %r8 is the end condition for the loop.
-        lea         -128(%rsi,%rdx), %r8
-
-
-.L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    128(%rdi)
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu       (%rsi), %ymm0
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovdqa     %ymm0,   (%rdi)
-        vmovdqa     %ymm1, 32(%rdi)
-        vmovdqa     %ymm2, 64(%rdi)
-        vmovdqa     %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}
-
-        sub         %rsi, %r9
-        mov         %r9, %rdx
-
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-        vmovdqu     %ymm5,  -96(%rdi,%rdx)
-        vmovdqu     %ymm6,  -64(%rdi,%rdx)
-        vmovdqu     %ymm7,  -32(%rdi,%rdx)
-        vmovdqu     %ymm8, (%rax)  // %rax == the original (unaligned) %rdi
-
-        vzeroupper
-
-.L_RET_{[prefetch]s}:
-        ret
-
-.L_OVERLAP_BWD_{[prefetch]s}:
-        // Save last 32 bytes.
-        vmovdqu     -32(%rsi, %rdx), %ymm8
-        lea         -32(%rdi, %rdx), %r9
-
-
-        // %r8 is the end condition for the loop.
-        lea         128(%rsi), %r8
-
-        // Align %rdi+%rdx (destination end) to a 32 byte boundary.
-        // %rcx = (%rdi + %rdx - 32) & 31
-        mov         %r9, %rcx
-        and         $31, %rcx
-        // Set %rsi & %rdi to the end of the 32 byte aligned range.
-        sub         %rcx, %rdx
-        add         %rdx, %rsi
-        add         %rdx, %rdi
-
-
-.L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    -128(%rdi)
-        {[prefetch]s}    -192(%rdi)
-
-        vmovdqu      -32(%rsi), %ymm4
-        vmovdqu      -64(%rsi), %ymm5
-        vmovdqu      -96(%rsi), %ymm6
-        vmovdqu     -128(%rsi), %ymm7
-        sub         $128, %rsi
-
-        vmovdqa     %ymm4,  -32(%rdi)
-        vmovdqa     %ymm5,  -64(%rdi)
-        vmovdqa     %ymm6,  -96(%rdi)
-        vmovdqa     %ymm7, -128(%rdi)
-        sub         $128, %rdi
-
-        cmp         %r8, %rsi
-        ja          .L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}
-
-        vmovdqu     %ymm0,   (%rax)  // %rax == the original unaligned %rdi
-        vmovdqu     %ymm1, 32(%rax)
-        vmovdqu     %ymm2, 64(%rax)
-        vmovdqu     %ymm3, 96(%rax)
-        vmovdqu     %ymm8, (%r9)
-
-        vzeroupper
-	ret
-
-        .cfi_endproc
-        // .size       {[function_prefix]s}__folly_memcpy_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_{[prefetch]s} not supported by windows
--- a/crates/compiler/builtins/bitcode/src/libc/folly/memcpy.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/folly/memcpy.zig
@ -1,18 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("../assembly_util.zig").function_prefix;
-
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            inline for ([_][]const u8{ "prefetchw", "prefetcht0" }) |prefetch| {
-                asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .prefetch = prefetch, .function_prefix = function_prefix }));
-            }
-        },
-        else => unreachable,
-    }
-}
-
-pub extern fn __folly_memcpy_prefetchw(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
-pub extern fn __folly_memcpy_prefetcht0(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
--- a/crates/compiler/builtins/bitcode/src/libc/musl.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/musl.zig
@ -1 +0,0 @@
-pub const memcpy = @import("musl/memcpy.zig").memcpy;
--- a/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT
@ -1,193 +0,0 @@
-musl as a whole is licensed under the following standard MIT license:
-
----------------------------------------------------------------------
-Copyright © 2005-2020 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
-
-Authors/contributors include:
-
-A. Wilcox
-Ada Worcester
-Alex Dowad
-Alex Suykov
-Alexander Monakov
-Andre McCurdy
-Andrew Kelley
-Anthony G. Basile
-Aric Belsito
-Arvid Picciani
-Bartosz Brachaczek
-Benjamin Peterson
-Bobby Bingham
-Boris Brezillon
-Brent Cook
-Chris Spiegel
-Clément Vasseur
-Daniel Micay
-Daniel Sabogal
-Daurnimator
-David Carlier
-David Edelsohn
-Denys Vlasenko
-Dmitry Ivanov
-Dmitry V. Levin
-Drew DeVault
-Emil Renner Berthing
-Fangrui Song
-Felix Fietkau
-Felix Janda
-Gianluca Anzolin
-Hauke Mehrtens
-He X
-Hiltjo Posthuma
-Isaac Dunham
-Jaydeep Patil
-Jens Gustedt
-Jeremy Huntwork
-Jo-Philipp Wich
-Joakim Sindholt
-John Spencer
-Julien Ramseier
-Justin Cormack
-Kaarle Ritvanen
-Khem Raj
-Kylie McClain
-Leah Neukirchen
-Luca Barbato
-Luka Perkov
-M Farkas-Dyck (Strake)
-Mahesh Bodapati
-Markus Wichmann
-Masanori Ogino
-Michael Clark
-Michael Forney
-Mikhail Kremnyov
-Natanael Copa
-Nicholas J. Kain
-orc
-Pascal Cuoq
-Patrick Oppenlander
-Petr Hosek
-Petr Skocik
-Pierre Carrier
-Reini Urban
-Rich Felker
-Richard Pennington
-Ryan Fairfax
-Samuel Holland
-Segev Finer
-Shiz
-sin
-Solar Designer
-Stefan Kristiansson
-Stefan O'Rear
-Szabolcs Nagy
-Timo Teräs
-Trutz Behn
-Valentin Ochs
-Will Dietz
-William Haddon
-William Pitcock
-
-Portions of this software are derived from third-party works licensed
-under terms compatible with the above MIT license:
-
-The TRE regular expression implementation (src/regex/reg* and
-src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
-under a 2-clause BSD license (license text in the source files). The
-included version has been heavily modified by Rich Felker in 2012, in
-the interests of size, simplicity, and namespace cleanliness.
-
-Much of the math library code (src/math/* and src/complex/*) is
-Copyright © 1993,2004 Sun Microsystems or
-Copyright © 2003-2011 David Schultz or
-Copyright © 2003-2009 Steven G. Kargl or
-Copyright © 2003-2009 Bruce D. Evans or
-Copyright © 2008 Stephen L. Moshier or
-Copyright © 2017-2018 Arm Limited
-and labelled as such in comments in the individual source files. All
-have been licensed under extremely permissive terms.
-
-The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
-The Android Open Source Project and is licensed under a two-clause BSD
-license. It was taken from Bionic libc, used on Android.
-
-The AArch64 memcpy and memset code (src/string/aarch64/*) are
-Copyright © 1999-2019, Arm Limited.
-
-The implementation of DES for crypt (src/crypt/crypt_des.c) is
-Copyright © 1994 David Burren. It is licensed under a BSD license.
-
-The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
-originally written by Solar Designer and placed into the public
-domain. The code also comes with a fallback permissive license for use
-in jurisdictions that may not recognize the public domain.
-
-The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
-Valentin Ochs and is licensed under an MIT-style license.
-
-The x86_64 port was written by Nicholas J. Kain and is licensed under
-the standard MIT terms.
-
-The mips and microblaze ports were originally written by Richard
-Pennington for use in the ellcc project. The original code was adapted
-by Rich Felker for build system and code conventions during upstream
-integration. It is licensed under the standard MIT terms.
-
-The mips64 port was contributed by Imagination Technologies and is
-licensed under the standard MIT terms.
-
-The powerpc port was also originally written by Richard Pennington,
-and later supplemented and integrated by John Spencer. It is licensed
-under the standard MIT terms.
-
-All other files which have no copyright comments are original works
-produced specifically for use as part of this library, written either
-by Rich Felker, the main author of the library, or by one or more
-contibutors listed above. Details on authorship of individual files
-can be found in the git version control history of the project. The
-omission of copyright and license comments in each file is in the
-interest of source tree size.
-
-In addition, permission is hereby granted for all public header files
-(include/* and arch/*/bits/*) and crt files intended to be linked into
-applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
-the copyright notice and permission notice otherwise required by the
-license, and to use these files without any requirement of
-attribution. These files include substantial contributions from:
-
-Bobby Bingham
-John Spencer
-Nicholas J. Kain
-Rich Felker
-Richard Pennington
-Stefan Kristiansson
-Szabolcs Nagy
-
-all of whom have explicitly granted such permission.
-
-This file previously contained text expressing a belief that most of
-the files covered by the above exception were sufficiently trivial not
-to be subject to copyright, resulting in confusion over whether it
-negated the permissions granted in the license. In the spirit of
-permissive licensing, and of not having licensing issues being an
-obstacle to adoption, that text has been removed.
--- a/crates/compiler/builtins/bitcode/src/libc/musl/README.md
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/README.md
@ -1,2 +0,0 @@
-This set of files all come from [musl libc](https://musl.libc.org/).
-Roc just directly uses a few of them instead of depending on musl libc fully.
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86.S
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86.S
@ -1,30 +0,0 @@
-.global {[function_prefix]s}musl_memcpy
-// Windows does not support the type directive.
-// .type {[function_prefix]s}musl_memcpy,@function
-{[function_prefix]s}musl_memcpy:
-	push %esi
-	push %edi
-	mov 12(%esp),%edi
-	mov 16(%esp),%esi
-	mov 20(%esp),%ecx
-	mov %edi,%eax
-	cmp $4,%ecx
-	jc 1f
-	test $3,%edi
-	jz 1f
-2:	movsb
-	dec %ecx
-	test $3,%edi
-	jnz 2b
-1:	mov %ecx,%edx
-	shr $2,%ecx
-	rep
-	movsl
-	and $3,%edx
-	jz 1f
-2:	movsb
-	dec %edx
-	jnz 2b
-1:	pop %edi
-	pop %esi
-	ret
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86_64.S
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86_64.S
@ -1,23 +0,0 @@
-.global {[function_prefix]s}musl_memcpy
-// Windows does not support the type directive.
-// .type {[function_prefix]s}musl_memcpy,@function
-{[function_prefix]s}musl_memcpy:
-	mov %rdi,%rax
-	cmp $8,%rdx
-	jc 1f
-	test $7,%edi
-	jz 1f
-2:	movsb
-	dec %rdx
-	test $7,%edi
-	jnz 2b
-1:	mov %rdx,%rcx
-	shr $3,%rcx
-	rep
-	movsq
-	and $7,%edx
-	jz 1f
-2:	movsb
-	dec %edx
-	jnz 2b
-1:	ret
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig
@ -1,223 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("../assembly_util.zig").function_prefix;
-
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .function_prefix = function_prefix }));
-        },
-        .x86 => {
-            asm (std.fmt.comptimePrint(@embedFile("memcpy-x86.S"), .{ .function_prefix = function_prefix }));
-        },
-        // TODO: add assembly implementations for other platforms.
-        else => {},
-    }
-}
-
-pub const memcpy =
-    switch (builtin.os.tag) {
-    .windows => fallback_memcpy,
-    else => switch (arch) {
-        .x86_64, .x86 => musl_memcpy,
-        else => fallback_memcpy,
-    },
-};
-
-pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
-
-// Note: this is written to only support little endian targets.
-// To support big endian, `<<` and `>>` wold need to be swapped.
-pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    var d = dest;
-    var s = src;
-    var n = len;
-    switch (@min(n, @intFromPtr(s) % 4)) {
-        1 => {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-            n -= 1;
-        },
-        2 => {
-            d[0] = s[0];
-            d[1] = s[1];
-            d += 2;
-            s += 2;
-            n -= 2;
-        },
-        3 => {
-            d[0] = s[0];
-            d[1] = s[1];
-            d[2] = s[2];
-            d += 3;
-            s += 3;
-            n -= 3;
-        },
-        else => {},
-    }
-
-    if (@intFromPtr(d) % 4 == 0) {
-        var d4 = @as([*]align(4) u8, @alignCast(d));
-        var s4 = @as([*]align(4) const u8, @alignCast(s));
-        while (n >= 16) : (n -= 16) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-            d_u32[1] = s_u32[1];
-            d_u32[2] = s_u32[2];
-            d_u32[3] = s_u32[3];
-
-            d4 += 16;
-            s4 += 16;
-        }
-        if (n & 8 != 0) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-            d_u32[1] = s_u32[1];
-
-            d4 += 8;
-            s4 += 8;
-        }
-        if (n & 4 != 0) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-
-            d4 += 4;
-            s4 += 4;
-        }
-        d = d4;
-        s = s4;
-        if (n & 2 != 0) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-        if (n & 1 != 0) {
-            d[0] = s[0];
-        }
-        return dest;
-    }
-    if (n >= 32) {
-        switch (@intFromPtr(d) % 4) {
-            1 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 3;
-                while (n >= 17) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 1)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 24) | (x << 8);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 24) | (w << 8);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 24) | (x << 8);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 24) | (w << 8);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            2 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 2;
-                while (n >= 18) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 2)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 16) | (x << 16);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 16) | (w << 16);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 16) | (x << 16);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 16) | (w << 16);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            3 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 1;
-                while (n >= 19) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 3)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 8) | (x << 24);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 8) | (w << 24);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 8) | (x << 24);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 8) | (w << 24);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            else => unreachable,
-        }
-    }
-    if (n & 16 != 0) {
-        comptime var i = 0;
-        inline while (i < 16) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 8 != 0) {
-        comptime var i = 0;
-        inline while (i < 8) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 4 != 0) {
-        comptime var i = 0;
-        inline while (i < 4) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 2 != 0) {
-        d[0] = s[0];
-        d += 1;
-        s += 1;
-        d[0] = s[0];
-        d += 1;
-        s += 1;
-    }
-    if (n & 1 != 0) {
-        d[0] = s[0];
-    }
-    return dest;
-}
--- a/crates/compiler/builtins/bitcode/src/main.zig
+++ b/crates/compiler/builtins/bitcode/src/main.zig
@ -6,11 +6,6 @@ const expect = @import("expect.zig");
 const panic_utils = @import("panic.zig");
 const dbg_utils = @import("dbg.zig");

-// comptime {
-// _ = @import("compiler_rt.zig");
-// _ = @import("libc.zig");
-// }
-
 const ROC_BUILTINS = "roc_builtins";
 const NUM = "num";
 const STR = "str";
				`@ -1 +0,0 @@`
				`pub const memcpy = @import("musl/memcpy.zig").memcpy;`