Merge pull request #1307 from rtfeldman/dec

Beginnings of RocDec in Zig
2025-09-28 06:14:46 +00:00 · 2021-05-25 20:04:50 -04:00 · 2021-05-25 20:04:50 -04:00 · e8e83513e9
commit e8e83513e9
parent 326fb98d7f 42d583cc88
3 changed files with 328 additions and 14 deletions
--- a/compiler/builtins/bitcode/src/dec.zig
+++ b/compiler/builtins/bitcode/src/dec.zig
@ -0,0 +1,233 @@
+const std = @import("std");
+
+const math = std.math;
+
+pub const RocDec = struct {
+    num: i128,
+
+    pub const decimal_places: u32 = 18;
+
+    pub const min: RocDec = .{ .num = math.minInt(i128) };
+    pub const max: RocDec = .{ .num = math.maxInt(i128) };
+
+    pub const one_point_zero: RocDec = .{ .num = comptime math.pow(i128, 10, RocDec.decimal_places) };
+
+    pub fn add(self: RocDec, other: RocDec) RocDec {
+        var answer: i128 = undefined;
+        const overflowed = @addWithOverflow(i128, self.num, other.num, &answer);
+
+        if (!overflowed) {
+            return RocDec{ .num = answer };
+        } else {
+            std.debug.panic("TODO runtime exception for overflow!", .{});
+        }
+    }
+
+    pub fn mul(self: RocDec, other: RocDec) RocDec {
+        const self_i128 = self.num;
+        const other_i128 = other.num;
+        // const answer = 0; //self_i256 * other_i256;
+
+        const is_answer_negative = (self_i128 < 0) != (other_i128 < 0);
+
+        const self_u128 = @intCast(u128, math.absInt(self_i128) catch {
+            if (other_i128 == 0) {
+                return .{ .num = 0 };
+            } else if (other_i128 == RocDec.one_point_zero.num) {
+                return self;
+            } else {
+                std.debug.panic("TODO runtime exception for overflow!", .{});
+            }
+        });
+
+        const other_u128 = @intCast(u128, math.absInt(other_i128) catch {
+            if (self_i128 == 0) {
+                return .{ .num = 0 };
+            } else if (self_i128 == RocDec.one_point_zero.num) {
+                return other;
+            } else {
+                std.debug.panic("TODO runtime exception for overflow!", .{});
+            }
+        });
+
+        const unsigned_answer: i128 = mul_and_decimalize(self_u128, other_u128);
+
+        if (is_answer_negative) {
+            return .{ .num = -unsigned_answer };
+        } else {
+            return .{ .num = unsigned_answer };
+        }
+    }
+};
+
+const U256 = struct {
+    hi: u128,
+    lo: u128,
+};
+
+fn mul_and_decimalize(a: u128, b: u128) i128 {
+    const answer_u256 = mul_u128(a, b);
+
+    var lhs_hi = answer_u256.hi;
+    var lhs_lo = answer_u256.lo;
+
+    // Divide - or just add 1, multiply by floor(2^315/10^18), then right shift 315 times.
+    // floor(2^315/10^18) is 66749594872528440074844428317798503581334516323645399060845050244444366430645
+
+    // Add 1.
+    // This can't overflow because the intial numbers are only 127bit due to removing the sign bit.
+    var overflowed = @addWithOverflow(u128, lhs_lo, 1, &lhs_lo);
+    lhs_hi = blk: {
+        if (overflowed) {
+            break :blk lhs_hi + 1;
+        } else {
+            break :blk lhs_hi + 0;
+        }
+    };
+
+    // This needs to do multiplication in a way that expands,
+    // since we throw away 315 bits we care only about the higher end, not lower.
+    // So like need to do high low mult with 2 U256's and then bitshift.
+    // I bet this has a lot of room for multiplication optimization.
+    const rhs_hi: u128 = 0x9392ee8e921d5d073aff322e62439fcf;
+    const rhs_lo: u128 = 0x32d7f344649470f90cac0c573bf9e1b5;
+
+    const ea = mul_u128(lhs_lo, rhs_lo);
+    const gf = mul_u128(lhs_hi, rhs_lo);
+    const jh = mul_u128(lhs_lo, rhs_hi);
+    const lk = mul_u128(lhs_hi, rhs_hi);
+
+    const e = ea.hi;
+    const _a = ea.lo;
+
+    const g = gf.hi;
+    const f = gf.lo;
+
+    const j = jh.hi;
+    const h = jh.lo;
+
+    const l = lk.hi;
+    const k = lk.lo;
+
+    // b = e + f + h
+    var e_plus_f: u128 = undefined;
+    overflowed = @addWithOverflow(u128, e, f, &e_plus_f);
+    var b_carry1: u128 = undefined;
+    if (overflowed) {
+        b_carry1 = 1;
+    } else {
+        b_carry1 = 0;
+    }
+
+    var idk: u128 = undefined;
+    overflowed = @addWithOverflow(u128, e_plus_f, h, &idk);
+    var b_carry2: u128 = undefined;
+    if (overflowed) {
+        b_carry2 = 1;
+    } else {
+        b_carry2 = 0;
+    }
+
+    // c = carry + g + j + k // it doesn't say +k but I think it should be?
+    var g_plus_j: u128 = undefined;
+    overflowed = @addWithOverflow(u128, g, j, &g_plus_j);
+    var c_carry1: u128 = undefined;
+    if (overflowed) {
+        c_carry1 = 1;
+    } else {
+        c_carry1 = 0;
+    }
+
+    var g_plus_j_plus_k: u128 = undefined;
+    overflowed = @addWithOverflow(u128, g_plus_j, k, &g_plus_j_plus_k);
+    var c_carry2: u128 = undefined;
+    if (overflowed) {
+        c_carry2 = 1;
+    } else {
+        c_carry2 = 0;
+    }
+
+    var c_without_bcarry2: u128 = undefined;
+    overflowed = @addWithOverflow(u128, g_plus_j_plus_k, b_carry1, &c_without_bcarry2);
+    var c_carry3: u128 = undefined;
+    if (overflowed) {
+        c_carry3 = 1;
+    } else {
+        c_carry3 = 0;
+    }
+
+    var c: u128 = undefined;
+    overflowed = @addWithOverflow(u128, c_without_bcarry2, b_carry2, &c);
+    var c_carry4: u128 = undefined;
+    if (overflowed) {
+        c_carry4 = 1;
+    } else {
+        c_carry4 = 0;
+    }
+
+    // d = carry + l
+    var d: u128 = undefined;
+    overflowed = @addWithOverflow(u128, l, c_carry1, &d);
+    overflowed = overflowed or @addWithOverflow(u128, d, c_carry2, &d);
+    overflowed = overflowed or @addWithOverflow(u128, d, c_carry3, &d);
+    overflowed = overflowed or @addWithOverflow(u128, d, c_carry4, &d);
+
+    if (overflowed) {
+        std.debug.panic("TODO runtime exception for overflow!", .{});
+    }
+
+    // Final 512bit value is d, c, b, a
+    // need to left shift 321 times
+    // 315 - 256 is 59. So left shift d, c 59 times.
+    return @intCast(i128, c >> 59 | (d << (128 - 59)));
+}
+
+fn mul_u128(a: u128, b: u128) U256 {
+    var hi: u128 = undefined;
+    var lo: u128 = undefined;
+
+    const bits_in_dword_2: u32 = 64;
+    const lower_mask: u128 = math.maxInt(u128) >> bits_in_dword_2;
+
+    lo = (a & lower_mask) * (b & lower_mask);
+
+    var t = lo >> bits_in_dword_2;
+
+    lo &= lower_mask;
+
+    t += (a >> bits_in_dword_2) * (b & lower_mask);
+
+    lo += (t & lower_mask) << bits_in_dword_2;
+
+    hi = t >> bits_in_dword_2;
+
+    t = lo >> bits_in_dword_2;
+
+    lo &= lower_mask;
+
+    t += (b >> bits_in_dword_2) * (a & lower_mask);
+
+    lo += (t & lower_mask) << bits_in_dword_2;
+
+    hi += t >> bits_in_dword_2;
+
+    hi += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
+
+    return .{ .hi = hi, .lo = lo };
+}
+
+const one_e20: i256 = 100000000000000000000;
+
+const expectEqual = std.testing.expectEqual;
+
+test "add" {
+    const dec: RocDec = .{ .num = 0 };
+
+    expectEqual(RocDec{ .num = 0 }, dec.add(.{ .num = 0 }));
+}
+
+test "mul" {
+    var dec1: RocDec = .{ .num = 0 };
+
+    expectEqual(RocDec{ .num = 0 }, dec1.mul(.{ .num = 0 }));
+}
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@ -2,6 +2,9 @@ const builtin = @import("builtin");
 const std = @import("std");
 const testing = std.testing;

+// Dec Module
+const dec = @import("dec.zig");
+
 // List Module
 const list = @import("list.zig");

--- a/compiler/builtins/docs/Num.roc
+++ b/compiler/builtins/docs/Num.roc
@ -4,7 +4,7 @@ interface Num2

 ## Types

-## Represents a number that could be either an #Int or a #Float.
+## Represents a number that could be either an [Int] or a [Frac].
 ##
 ## This is useful for functions that can work on either, for example #Num.add, whose type is:
 ##
@ -53,6 +53,85 @@ interface Num2
 ## number literals without any suffix.
 Num range : [ @Num range ]

+## A decimal number.
+##
+## [Dec] is the best default choice for representing base-10 decimal numbers
+## like currency, because it is base-10 under the hood. In contrast,
+## [F64] and [F32] are base-2 under the hood, which can lead to decimal
+## precision loss even when doing addition and subtraction. For example, when
+## using [F64], running 0.1 + 0.2 returns 0.3000000000000000444089209850062616169452667236328125,
+## whereas when using [Dec], 0.1 + 0.2 returns 0.3.
+##
+## Under the hood, a [Dec] is an [I128], and operations on it perform
+## [base-10 fixed-point arithmetic](https://en.wikipedia.org/wiki/Fixed-point_arithmetic)
+## with 18 decimal places of precision.
+##
+## This means a [Dec] can represent whole numbers up to slightly over 170
+## quintillion, along with 18 decimal places. (To be precise, it can store
+## numbers betwween `-170_141_183_460_469_231_731.687303715884105728`
+## and `170_141_183_460_469_231_731.687303715884105727`.) Why 18
+## decimal places? It's the highest number of decimal places where you can still
+## convert any [U64] to a [Dec] without losing information.
+##
+## There are some use cases where [F64] and [F32] can be better choices than [Dec]
+## despite their precision issues. For example, in graphical applications they
+## can be a better choice for representing coordinates because they take up
+## less memory, certain relevant calculations run faster (see performance
+## details, below), and decimal precision loss isn't as big a concern when
+## dealing with screen coordinates as it is when dealing with currency.
+##
+## ## Performance
+##
+## [Dec] typically takes slightly less time than [F64] to perform addition and
+## subtraction, but 10-20 times longer to perform multiplication and division.
+## [sqrt] and trigonometry are massively slower with [Dec] than with [F64].
+Dec : Frac [ @Decimal128 ]
+
+## A fixed-size number with a fractional component.
+##
+## Roc fractions come in two flavors: fixed-point base-10 and floating-point base-2.
+##
+## * [Dec] is a 128-bit [fixed-point](https://en.wikipedia.org/wiki/Fixed-point_arithmetic) base-10 number. It's a great default choice, especially when precision is important - for example when representing currency. With [Dec], 0.1 + 0.2 returns 0.3.
+## * [F64] and [F32] are [floating-point](https://en.wikipedia.org/wiki/Floating-point_arithmetic) base-2 numbers. They sacrifice precision for lower memory usage and improved performance on some operations. This makes them a good fit for representing graphical coordinates. With [F64], 0.1 + 0.2 returns 0.3000000000000000444089209850062616169452667236328125.
+##
+## If you don't specify a type, Roc will default to using [Dec] because it's
+## the least error-prone overall. For example, suppose you write this:
+##
+##     wasItPrecise = 0.1 + 0.2 == 0.3
+##
+## The value of `wasItPrecise` here will be `True`, because Roc uses [Dec]
+## by default when there are no types specified.
+##
+## In contrast, suppose we use `f32` or `f64` for one of these numbers:
+##
+##     wasItPrecise = 0.1f64 + 0.2 == 0.3
+##
+## Here, `wasItPrecise` will be `False` because the entire calculation will have
+## been done in a base-2 floating point calculation, which causes noticeable
+## precision loss in this case.
+##
+## ## Performance Notes
+##
+## On typical modern CPUs, performance is similar between [Dec], [F64], and [F32]
+## for addition and subtraction. For example, [F32] and [F64] do addition using
+## a single CPU floating-point addition instruction, which typically takes a
+## few clock cycles to complete. In contrast, [Dec] does addition using a few
+## CPU integer arithmetic instructions, each of which typically takes only one
+## clock cycle to complete. Exact numbers will vary by CPU, but they should be
+## similar overall.
+##
+## [Dec] is significantly slower for multiplication and division. It not only
+## needs to do more arithmetic instructions than [F32] and [F64] do, but also
+## those instructions typically take more clock cycles to complete.
+##
+## With [Num.sqrt] and trigonometry functions like [Num.cos], there is
+## an even bigger performance difference. [F32] and [F64] can do these in a
+## single instruction, whereas [Dec] needs entire custom procedures - which use
+## loops and conditionals. If you need to do performance-critical trigonometry
+## or square roots, either [F32] or [F64] is probably a better choice than the
+## usual default choice of [Dec], despite the precision problems they bring.
+Frac a : Num [ @Fraction a ]
+
 ## A fixed-size integer - that is, a number with no fractional component.
 ##
 ## Integers come in two flavors: signed and unsigned. Signed integers can be
@ -102,21 +181,20 @@ Num range : [ @Num range ]
 ## * Start by deciding if this integer should allow negative numbers, and choose signed or unsigned accordingly.
 ## * Next, think about the range of numbers you expect this number to hold. Choose the smallest size you will never expect to overflow, no matter the inputs your program receives. (Validating inputs for size, and presenting the user with an error if they are too big, can help guard against overflow.)
 ## * Finally, if a particular numeric calculation is running too slowly, you can try experimenting with other number sizes. This rarely makes a meaningful difference, but some processors can operate on different number sizes at different speeds.
-Int size : Num [ @Int size ]
+Int size : Num [ @Integer size ]

 ## A signed 8-bit integer, ranging from -128 to 127
-I8 : Int [ @I8 ]
-U8 : Int [ @U8 ]
-U16 : Int [ @U16 ]
-I16 : Int [ @I16 ]
-U32 : Int [ @U32 ]
-I32 : Int [ @I32 ]
-I64 : Int [ @I64 ]
-U64 : Int [ @U64 ]
-I128 : Int [ @I128 ]
-U128 : Int [ @U128 ]
-Ilen : Int [ @Ilen ]
-Nat : Int [ @Nat ]
+I8 : Int [ @Signed8 ]
+U8 : Int [ @Unsigned8 ]
+I16 : Int [ @Signed16 ]
+U16 : Int [ @Unsigned16 ]
+I32 : Int [ @Signed32 ]
+U32 : Int [ @Unsigned32 ]
+I64 : Int [ @Signed64 ]
+U64 : Int [ @Unsigned64 ]
+I128 : Int [ @Signed128 ]
+U128 : Int [ @Unsigned128 ]
+Nat : Int [ @Natural ]

 ## A 64-bit signed integer. All number literals without decimal points are compatible with #Int values.
 ##