const std = @import("std"); const builtin = @import("builtin"); const math = std.math; // Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore. // Until then, we are manually ingesting used parts of compiler-rt here. // // Taken from // https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/ // Thank you Zig Contributors! // Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the // calling convention of @Vector(2, u64), rather than what's standard. pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c; const v2u64 = @Vector(2, u64); // Export it as weak incase it is already linked in by something else. comptime { @export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak }); if (want_windows_v2u64_abi) { @export(__divti3_windows_x86_64, .{ .name = "__divti3", .linkage = .Weak }); @export(__modti3_windows_x86_64, .{ .name = "__modti3", .linkage = .Weak }); @export(__umodti3_windows_x86_64, .{ .name = "__umodti3", .linkage = .Weak }); @export(__udivti3_windows_x86_64, .{ .name = "__udivti3", .linkage = .Weak }); @export(__fixdfti_windows_x86_64, .{ .name = "__fixdfti", .linkage = .Weak }); @export(__fixsfti_windows_x86_64, .{ .name = "__fixsfti", .linkage = .Weak }); @export(__fixunsdfti_windows_x86_64, .{ .name = "__fixunsdfti", .linkage = .Weak }); @export(__fixunssfti_windows_x86_64, .{ .name = "__fixunssfti", .linkage = .Weak }); } else { @export(__divti3, .{ .name = "__divti3", .linkage = .Weak }); @export(__modti3, .{ .name = "__modti3", .linkage = .Weak }); @export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak }); @export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak }); @export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak }); @export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak }); @export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak }); @export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak }); } } pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) { return muloXi4_genericFast(i128, a, b, overflow); } else { return muloXi4_genericSmall(i128, a, b, overflow); } } pub fn __divti3(a: i128, b: i128) callconv(.C) i128 { return div(a, b); } fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 { return @bitCast(v2u64, div(@bitCast(i128, a), @bitCast(i128, b))); } inline fn div(a: i128, b: i128) i128 { const s_a = a >> (128 - 1); const s_b = b >> (128 - 1); const an = (a ^ s_a) -% s_a; const bn = (b ^ s_b) -% s_b; const r = udivmod(u128, @bitCast(u128, an), @bitCast(u128, bn), null); const s = s_a ^ s_b; return (@bitCast(i128, r) ^ s) -% s; } pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 { return udivmod(u128, a, b, null); } fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 { return @bitCast(v2u64, udivmod(u128, @bitCast(u128, a), @bitCast(u128, b), null)); } pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 { var r: u128 = undefined; _ = udivmod(u128, a, b, &r); return r; } fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 { var r: u128 = undefined; _ = udivmod(u128, @bitCast(u128, a), @bitCast(u128, b), &r); return @bitCast(v2u64, r); } pub fn __modti3(a: i128, b: i128) callconv(.C) i128 { return mod(a, b); } fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 { return @bitCast(v2u64, mod(@bitCast(i128, a), @bitCast(i128, b))); } inline fn mod(a: i128, b: i128) i128 { const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0 const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0 const an = (a ^ s_a) -% s_a; // negate if s == -1 const bn = (b ^ s_b) -% s_b; // negate if s == -1 var r: u128 = undefined; _ = udivmod(u128, @bitCast(u128, an), @bitCast(u128, bn), &r); return (@bitCast(i128, r) ^ s_a) -% s_a; // negate if s == -1 } pub fn __fixdfti(a: f64) callconv(.C) i128 { return floatToInt(i128, a); } fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 { return @bitCast(v2u64, floatToInt(i128, a)); } pub fn __fixsfti(a: f32) callconv(.C) i128 { return floatToInt(i128, a); } fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 { return @bitCast(v2u64, floatToInt(i128, a)); } pub fn __fixunsdfti(a: f64) callconv(.C) u128 { return floatToInt(u128, a); } fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 { return @bitCast(v2u64, floatToInt(u128, a)); } pub fn __fixunssfti(a: f32) callconv(.C) u128 { return floatToInt(u128, a); } fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 { return @bitCast(v2u64, floatToInt(u128, a)); } // mulo - multiplication overflow // * return a*%b. // * return if a*b overflows => 1 else => 0 // - muloXi4_genericSmall as default // - muloXi4_genericFast for 2*bitsize <= usize inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { overflow.* = 0; const min = math.minInt(ST); var res: ST = a *% b; // Hacker's Delight section Overflow subsection Multiplication // case a=-2^{31}, b=-1 problem, because // on some machines a*b = -2^{31} with overflow // Then -2^{31}/-1 overflows and any result is possible. // => check with a<0 and b=-2^{31} if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b)) overflow.* = 1; return res; } inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { overflow.* = 0; const EST = switch (ST) { i32 => i64, i64 => i128, i128 => i256, else => unreachable, }; const min = math.minInt(ST); const max = math.maxInt(ST); var res: EST = @as(EST, a) * @as(EST, b); //invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1} if (res < min or max < res) overflow.* = 1; return @truncate(ST, res); } const native_endian = builtin.cpu.arch.endian(); const low = switch (native_endian) { .Big => 1, .Little => 0, }; const high = 1 - low; pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt { // @setRuntimeSafety(builtin.is_test); const double_int_bits = @typeInfo(DoubleInt).Int.bits; const single_int_bits = @divExact(double_int_bits, 2); const SingleInt = std.meta.Int(.unsigned, single_int_bits); const SignedDoubleInt = std.meta.Int(.signed, double_int_bits); const Log2SingleInt = std.math.Log2Int(SingleInt); const n = @bitCast([2]SingleInt, a); const d = @bitCast([2]SingleInt, b); var q: [2]SingleInt = undefined; var r: [2]SingleInt = undefined; var sr: c_uint = undefined; // special cases, X is unknown, K != 0 if (n[high] == 0) { if (d[high] == 0) { // 0 X // --- // 0 X if (maybe_rem) |rem| { rem.* = n[low] % d[low]; } return n[low] / d[low]; } // 0 X // --- // K X if (maybe_rem) |rem| { rem.* = n[low]; } return 0; } // n[high] != 0 if (d[low] == 0) { if (d[high] == 0) { // K X // --- // 0 0 if (maybe_rem) |rem| { rem.* = n[high] % d[low]; } return n[high] / d[low]; } // d[high] != 0 if (n[low] == 0) { // K 0 // --- // K 0 if (maybe_rem) |rem| { r[high] = n[high] % d[high]; r[low] = 0; rem.* = @bitCast(DoubleInt, r); } return n[high] / d[high]; } // K K // --- // K 0 if ((d[high] & (d[high] - 1)) == 0) { // d is a power of 2 if (maybe_rem) |rem| { r[low] = n[low]; r[high] = n[high] & (d[high] - 1); rem.* = @bitCast(DoubleInt, r); } return n[high] >> @intCast(Log2SingleInt, @ctz(SingleInt, d[high])); } // K K // --- // K 0 sr = @bitCast(c_uint, @as(c_int, @clz(SingleInt, d[high])) - @as(c_int, @clz(SingleInt, n[high]))); // 0 <= sr <= single_int_bits - 2 or sr large if (sr > single_int_bits - 2) { if (maybe_rem) |rem| { rem.* = a; } return 0; } sr += 1; // 1 <= sr <= single_int_bits - 1 // q.all = a << (double_int_bits - sr); q[low] = 0; q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); // r.all = a >> sr; r[high] = n[high] >> @intCast(Log2SingleInt, sr); r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); } else { // d[low] != 0 if (d[high] == 0) { // K X // --- // 0 K if ((d[low] & (d[low] - 1)) == 0) { // d is a power of 2 if (maybe_rem) |rem| { rem.* = n[low] & (d[low] - 1); } if (d[low] == 1) { return a; } sr = @ctz(SingleInt, d[low]); q[high] = n[high] >> @intCast(Log2SingleInt, sr); q[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); return @bitCast(DoubleInt, q); } // K X // --- // 0 K sr = 1 + single_int_bits + @as(c_uint, @clz(SingleInt, d[low])) - @as(c_uint, @clz(SingleInt, n[high])); // 2 <= sr <= double_int_bits - 1 // q.all = a << (double_int_bits - sr); // r.all = a >> sr; if (sr == single_int_bits) { q[low] = 0; q[high] = n[low]; r[high] = 0; r[low] = n[high]; } else if (sr < single_int_bits) { // 2 <= sr <= single_int_bits - 1 q[low] = 0; q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); r[high] = n[high] >> @intCast(Log2SingleInt, sr); r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); } else { // single_int_bits + 1 <= sr <= double_int_bits - 1 q[low] = n[low] << @intCast(Log2SingleInt, double_int_bits - sr); q[high] = (n[high] << @intCast(Log2SingleInt, double_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr - single_int_bits)); r[high] = 0; r[low] = n[high] >> @intCast(Log2SingleInt, sr - single_int_bits); } } else { // K X // --- // K K sr = @bitCast(c_uint, @as(c_int, @clz(SingleInt, d[high])) - @as(c_int, @clz(SingleInt, n[high]))); // 0 <= sr <= single_int_bits - 1 or sr large if (sr > single_int_bits - 1) { if (maybe_rem) |rem| { rem.* = a; } return 0; } sr += 1; // 1 <= sr <= single_int_bits // q.all = a << (double_int_bits - sr); // r.all = a >> sr; q[low] = 0; if (sr == single_int_bits) { q[high] = n[low]; r[high] = 0; r[low] = n[high]; } else { r[high] = n[high] >> @intCast(Log2SingleInt, sr); r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); } } } // Not a special case // q and r are initialized with: // q.all = a << (double_int_bits - sr); // r.all = a >> sr; // 1 <= sr <= double_int_bits - 1 var carry: u32 = 0; var r_all: DoubleInt = undefined; while (sr > 0) : (sr -= 1) { // r:q = ((r:q) << 1) | carry r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1)); r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1)); q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1)); q[low] = (q[low] << 1) | carry; // carry = 0; // if (r.all >= b) // { // r.all -= b; // carry = 1; // } r_all = @bitCast(DoubleInt, r); const s: SignedDoubleInt = @bitCast(SignedDoubleInt, b -% r_all -% 1) >> (double_int_bits - 1); carry = @intCast(u32, s & 1); r_all -= b & @bitCast(DoubleInt, s); r = @bitCast([2]SingleInt, r_all); } const q_all = (@bitCast(DoubleInt, q) << 1) | carry; if (maybe_rem) |rem| { rem.* = r_all; } return q_all; } pub inline fn floatToInt(comptime I: type, a: anytype) I { const Log2Int = math.Log2Int; const Int = @import("std").meta.Int; const F = @TypeOf(a); const float_bits = @typeInfo(F).Float.bits; const int_bits = @typeInfo(I).Int.bits; const rep_t = Int(.unsigned, float_bits); const sig_bits = math.floatMantissaBits(F); const exp_bits = math.floatExponentBits(F); const fractional_bits = floatFractionalBits(F); // const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0; const implicit_bit = @as(rep_t, 1) << sig_bits; const max_exp = (1 << (exp_bits - 1)); const exp_bias = max_exp - 1; const sig_mask = (@as(rep_t, 1) << sig_bits) - 1; // Break a into sign, exponent, significand const a_rep: rep_t = @bitCast(rep_t, a); const negative = (a_rep >> (float_bits - 1)) != 0; const exponent = @intCast(i32, (a_rep << 1) >> (sig_bits + 1)) - exp_bias; const significand: rep_t = (a_rep & sig_mask) | implicit_bit; // If the exponent is negative, the result rounds to zero. if (exponent < 0) return 0; // If the value is too large for the integer type, saturate. switch (@typeInfo(I).Int.signedness) { .unsigned => { if (negative) return 0; if (@intCast(c_uint, exponent) >= @minimum(int_bits, max_exp)) return math.maxInt(I); }, .signed => if (@intCast(c_uint, exponent) >= @minimum(int_bits - 1, max_exp)) { return if (negative) math.minInt(I) else math.maxInt(I); }, } // If 0 <= exponent < sig_bits, right shift to get the result. // Otherwise, shift left. var result: I = undefined; if (exponent < fractional_bits) { result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_bits - exponent)); } else { result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_bits); } if ((@typeInfo(I).Int.signedness == .signed) and negative) return ~result +% 1; return result; } /// Returns the number of fractional bits in the mantissa of floating point type T. pub inline fn floatFractionalBits(comptime T: type) comptime_int { comptime std.debug.assert(@typeInfo(T) == .Float); // standard IEEE floats have an implicit 0.m or 1.m integer part // f80 is special and has an explicitly stored bit in the MSB // this function corresponds to `MANT_DIG - 1' from C return switch (@typeInfo(T).Float.bits) { 16 => 10, 32 => 23, 64 => 52, 80 => 63, 128 => 112, else => @compileError("unknown floating point type " ++ @typeName(T)), }; }