Merge pull request #3870 from roc-lang/dev-backend-div

add int/float division to the dev backend
2025-07-24 06:55:15 +00:00 · 2022-08-25 09:03:19 +02:00 · 2022-08-25 09:03:19 +02:00 · 51d5aa789a
commit 51d5aa789a
parent 1dc765ba29 2d366acc6e
10 changed files with 384 additions and 40 deletions
--- a/crates/compiler/gen_dev/src/generic64/aarch64.rs
+++ b/crates/compiler/gen_dev/src/generic64/aarch64.rs
@ -456,6 +456,32 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
        todo!("register unsigned multiplication for AArch64");
    }

+    fn idiv_reg64_reg64_reg64<'a, ASM, CC>(
+        _buf: &mut Vec<'a, u8>,
+        _storage_manager: &mut StorageManager<'a, AArch64GeneralReg, AArch64FloatReg, ASM, CC>,
+        _dst: AArch64GeneralReg,
+        _src1: AArch64GeneralReg,
+        _src2: AArch64GeneralReg,
+    ) where
+        ASM: Assembler<AArch64GeneralReg, AArch64FloatReg>,
+        CC: CallConv<AArch64GeneralReg, AArch64FloatReg, ASM>,
+    {
+        todo!("register signed division for AArch64");
+    }
+
+    fn udiv_reg64_reg64_reg64<'a, ASM, CC>(
+        _buf: &mut Vec<'a, u8>,
+        _storage_manager: &mut StorageManager<'a, AArch64GeneralReg, AArch64FloatReg, ASM, CC>,
+        _dst: AArch64GeneralReg,
+        _src1: AArch64GeneralReg,
+        _src2: AArch64GeneralReg,
+    ) where
+        ASM: Assembler<AArch64GeneralReg, AArch64FloatReg>,
+        CC: CallConv<AArch64GeneralReg, AArch64FloatReg, ASM>,
+    {
+        todo!("register unsigned division for AArch64");
+    }
+
    #[inline(always)]
    fn mul_freg32_freg32_freg32(
        _buf: &mut Vec<'_, u8>,
@ -475,6 +501,25 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
        todo!("multiplication for floats for AArch64");
    }

+    #[inline(always)]
+    fn div_freg32_freg32_freg32(
+        _buf: &mut Vec<'_, u8>,
+        _dst: AArch64FloatReg,
+        _src1: AArch64FloatReg,
+        _src2: AArch64FloatReg,
+    ) {
+        todo!("division for floats for AArch64");
+    }
+    #[inline(always)]
+    fn div_freg64_freg64_freg64(
+        _buf: &mut Vec<'_, u8>,
+        _dst: AArch64FloatReg,
+        _src1: AArch64FloatReg,
+        _src2: AArch64FloatReg,
+    ) {
+        todo!("division for floats for AArch64");
+    }
+
    #[inline(always)]
    fn jmp_imm32(_buf: &mut Vec<'_, u8>, _offset: i32) -> usize {
        todo!("jump instructions for AArch64");
--- a/crates/compiler/gen_dev/src/generic64/mod.rs
+++ b/crates/compiler/gen_dev/src/generic64/mod.rs
@ -222,6 +222,18 @@ pub trait Assembler<GeneralReg: RegTrait, FloatReg: RegTrait>: Sized + Copy {
        src1: FloatReg,
        src2: FloatReg,
    );
+    fn div_freg32_freg32_freg32(
+        buf: &mut Vec<'_, u8>,
+        dst: FloatReg,
+        src1: FloatReg,
+        src2: FloatReg,
+    );
+    fn div_freg64_freg64_freg64(
+        buf: &mut Vec<'_, u8>,
+        dst: FloatReg,
+        src1: FloatReg,
+        src2: FloatReg,
+    );
    fn imul_reg64_reg64_reg64(
        buf: &mut Vec<'_, u8>,
        dst: GeneralReg,
@ -238,6 +250,25 @@ pub trait Assembler<GeneralReg: RegTrait, FloatReg: RegTrait>: Sized + Copy {
        ASM: Assembler<GeneralReg, FloatReg>,
        CC: CallConv<GeneralReg, FloatReg, ASM>;

+    fn idiv_reg64_reg64_reg64<'a, ASM, CC>(
+        buf: &mut Vec<'a, u8>,
+        storage_manager: &mut StorageManager<'a, GeneralReg, FloatReg, ASM, CC>,
+        dst: GeneralReg,
+        src1: GeneralReg,
+        src2: GeneralReg,
+    ) where
+        ASM: Assembler<GeneralReg, FloatReg>,
+        CC: CallConv<GeneralReg, FloatReg, ASM>;
+    fn udiv_reg64_reg64_reg64<'a, ASM, CC>(
+        buf: &mut Vec<'a, u8>,
+        storage_manager: &mut StorageManager<'a, GeneralReg, FloatReg, ASM, CC>,
+        dst: GeneralReg,
+        src1: GeneralReg,
+        src2: GeneralReg,
+    ) where
+        ASM: Assembler<GeneralReg, FloatReg>,
+        CC: CallConv<GeneralReg, FloatReg, ASM>;
+
    fn sub_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GeneralReg, src1: GeneralReg, imm32: i32);
    fn sub_reg64_reg64_reg64(
        buf: &mut Vec<'_, u8>,
@ -808,6 +839,62 @@ impl<
        }
    }

+    fn build_num_div(&mut self, dst: &Symbol, src1: &Symbol, src2: &Symbol, layout: &Layout<'a>) {
+        match layout {
+            Layout::Builtin(Builtin::Int(
+                IntWidth::I64 | IntWidth::I32 | IntWidth::I16 | IntWidth::I8,
+            )) => {
+                let dst_reg = self.storage_manager.claim_general_reg(&mut self.buf, dst);
+                let src1_reg = self
+                    .storage_manager
+                    .load_to_general_reg(&mut self.buf, src1);
+                let src2_reg = self
+                    .storage_manager
+                    .load_to_general_reg(&mut self.buf, src2);
+
+                ASM::idiv_reg64_reg64_reg64(
+                    &mut self.buf,
+                    &mut self.storage_manager,
+                    dst_reg,
+                    src1_reg,
+                    src2_reg,
+                );
+            }
+            Layout::Builtin(Builtin::Int(
+                IntWidth::U64 | IntWidth::U32 | IntWidth::U16 | IntWidth::U8,
+            )) => {
+                let dst_reg = self.storage_manager.claim_general_reg(&mut self.buf, dst);
+                let src1_reg = self
+                    .storage_manager
+                    .load_to_general_reg(&mut self.buf, src1);
+                let src2_reg = self
+                    .storage_manager
+                    .load_to_general_reg(&mut self.buf, src2);
+
+                ASM::udiv_reg64_reg64_reg64(
+                    &mut self.buf,
+                    &mut self.storage_manager,
+                    dst_reg,
+                    src1_reg,
+                    src2_reg,
+                );
+            }
+            Layout::Builtin(Builtin::Float(FloatWidth::F64)) => {
+                let dst_reg = self.storage_manager.claim_float_reg(&mut self.buf, dst);
+                let src1_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src1);
+                let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2);
+                ASM::div_freg64_freg64_freg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
+            }
+            Layout::Builtin(Builtin::Float(FloatWidth::F32)) => {
+                let dst_reg = self.storage_manager.claim_float_reg(&mut self.buf, dst);
+                let src1_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src1);
+                let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2);
+                ASM::div_freg32_freg32_freg32(&mut self.buf, dst_reg, src1_reg, src2_reg);
+            }
+            x => todo!("NumDiv: layout, {:?}", x),
+        }
+    }
+
    fn build_num_neg(&mut self, dst: &Symbol, src: &Symbol, layout: &Layout<'a>) {
        match layout {
            Layout::Builtin(Builtin::Int(IntWidth::I64 | IntWidth::U64)) => {
--- a/crates/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/crates/compiler/gen_dev/src/generic64/x86_64.rs
@ -1061,6 +1061,78 @@ impl Assembler<X86_64GeneralReg, X86_64FloatReg> for X86_64Assembler {
        }
    }

+    fn div_freg32_freg32_freg32(
+        buf: &mut Vec<'_, u8>,
+        dst: X86_64FloatReg,
+        src1: X86_64FloatReg,
+        src2: X86_64FloatReg,
+    ) {
+        if dst == src1 {
+            divss_freg32_freg32(buf, dst, src2);
+        } else if dst == src2 {
+            divss_freg32_freg32(buf, dst, src1);
+        } else {
+            movsd_freg64_freg64(buf, dst, src1);
+            divss_freg32_freg32(buf, dst, src2);
+        }
+    }
+
+    fn div_freg64_freg64_freg64(
+        buf: &mut Vec<'_, u8>,
+        dst: X86_64FloatReg,
+        src1: X86_64FloatReg,
+        src2: X86_64FloatReg,
+    ) {
+        if dst == src1 {
+            divsd_freg64_freg64(buf, dst, src2);
+        } else if dst == src2 {
+            divsd_freg64_freg64(buf, dst, src1);
+        } else {
+            movsd_freg64_freg64(buf, dst, src1);
+            divsd_freg64_freg64(buf, dst, src2);
+        }
+    }
+
+    fn idiv_reg64_reg64_reg64<'a, ASM, CC>(
+        buf: &mut Vec<'a, u8>,
+        storage_manager: &mut StorageManager<'a, X86_64GeneralReg, X86_64FloatReg, ASM, CC>,
+        dst: X86_64GeneralReg,
+        src1: X86_64GeneralReg,
+        src2: X86_64GeneralReg,
+    ) where
+        ASM: Assembler<X86_64GeneralReg, X86_64FloatReg>,
+        CC: CallConv<X86_64GeneralReg, X86_64FloatReg, ASM>,
+    {
+        use crate::generic64::RegStorage;
+
+        storage_manager.ensure_reg_free(buf, RegStorage::General(X86_64GeneralReg::RAX));
+        storage_manager.ensure_reg_free(buf, RegStorage::General(X86_64GeneralReg::RDX));
+
+        mov_reg64_reg64(buf, X86_64GeneralReg::RAX, src1);
+        idiv_reg64_reg64(buf, src2);
+        mov_reg64_reg64(buf, dst, X86_64GeneralReg::RAX);
+    }
+
+    fn udiv_reg64_reg64_reg64<'a, ASM, CC>(
+        buf: &mut Vec<'a, u8>,
+        storage_manager: &mut StorageManager<'a, X86_64GeneralReg, X86_64FloatReg, ASM, CC>,
+        dst: X86_64GeneralReg,
+        src1: X86_64GeneralReg,
+        src2: X86_64GeneralReg,
+    ) where
+        ASM: Assembler<X86_64GeneralReg, X86_64FloatReg>,
+        CC: CallConv<X86_64GeneralReg, X86_64FloatReg, ASM>,
+    {
+        use crate::generic64::RegStorage;
+
+        storage_manager.ensure_reg_free(buf, RegStorage::General(X86_64GeneralReg::RAX));
+        storage_manager.ensure_reg_free(buf, RegStorage::General(X86_64GeneralReg::RDX));
+
+        mov_reg64_reg64(buf, X86_64GeneralReg::RAX, src1);
+        udiv_reg64_reg64(buf, src2);
+        mov_reg64_reg64(buf, dst, X86_64GeneralReg::RAX);
+    }
+
    #[inline(always)]
    fn jmp_imm32(buf: &mut Vec<'_, u8>, offset: i32) -> usize {
        jmp_imm32(buf, offset);
@ -1481,6 +1553,46 @@ fn mulsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64Fl
    }
 }

+/// `DIVSS xmm1,xmm2/m64` -> Divide the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.
+#[inline(always)]
+fn divss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
+    let dst_high = dst as u8 > 7;
+    let dst_mod = dst as u8 % 8;
+    let src_high = src as u8 > 7;
+    let src_mod = src as u8 % 8;
+    if dst_high || src_high {
+        buf.extend(&[
+            0xF3,
+            0x40 | ((dst_high as u8) << 2) | (src_high as u8),
+            0x0F,
+            0x5E,
+            0xC0 | (dst_mod << 3) | (src_mod),
+        ])
+    } else {
+        buf.extend(&[0xF3, 0x0F, 0x5E, 0xC0 | (dst_mod << 3) | (src_mod)])
+    }
+}
+
+/// `DIVSD xmm1,xmm2/m64` -> Divide the low double-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.
+#[inline(always)]
+fn divsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
+    let dst_high = dst as u8 > 7;
+    let dst_mod = dst as u8 % 8;
+    let src_high = src as u8 > 7;
+    let src_mod = src as u8 % 8;
+    if dst_high || src_high {
+        buf.extend(&[
+            0xF2,
+            0x40 | ((dst_high as u8) << 2) | (src_high as u8),
+            0x0F,
+            0x5E,
+            0xC0 | (dst_mod << 3) | (src_mod),
+        ])
+    } else {
+        buf.extend(&[0xF2, 0x0F, 0x5E, 0xC0 | (dst_mod << 3) | (src_mod)])
+    }
+}
+
 /// `ADDSS xmm1,xmm2/m64` -> Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.
 #[inline(always)]
 fn mulss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
@ -1583,6 +1695,47 @@ fn mul_reg64_reg64(buf: &mut Vec<'_, u8>, src: X86_64GeneralReg) {
    buf.extend(&[rex, 0xF7, 0b1110_0000 | (src as u8 % 8)]);
 }

+/// `IDIV r/m64` -> Signed divide RDX:RAX by r/m64, with result stored in RAX ← Quotient, RDX ← Remainder.
+#[inline(always)]
+fn idiv_reg64_reg64(buf: &mut Vec<'_, u8>, src: X86_64GeneralReg) {
+    let mut rex = REX_W;
+    rex = add_reg_extension(src, rex);
+
+    if src.value() > 7 {
+        rex |= REX_PREFIX_B;
+    }
+
+    // The CQO instruction can be used to produce a double quadword dividend
+    // from a quadword before a quadword division.
+    //
+    // The CQO instruction (available in 64-bit mode only) copies the sign (bit 63)
+    // of the value in the RAX register into every bit position in the RDX register
+    buf.extend(&[0x48, 0x99]);
+
+    buf.extend(&[rex, 0xF7, 0b1111_1000 | (src as u8 % 8)]);
+}
+
+/// `DIV r/m64` -> Unsigned divide RDX:RAX by r/m64, with result stored in RAX ← Quotient, RDX ← Remainder.
+#[inline(always)]
+fn udiv_reg64_reg64(buf: &mut Vec<'_, u8>, src: X86_64GeneralReg) {
+    let mut rex = REX_W;
+    rex = add_reg_extension(src, rex);
+
+    if src.value() > 7 {
+        rex |= REX_PREFIX_B;
+    }
+
+    // The CQO instruction can be used to produce a double quadword dividend
+    // from a quadword before a quadword division.
+    //
+    // The CQO instruction (available in 64-bit mode only) copies the sign (bit 63)
+    // of the value in the RAX register into every bit position in the RDX register
+    buf.extend(&[0x48, 0x99]);
+
+    // adds a cqo (convert doubleword to quadword)
+    buf.extend(&[rex, 0xF7, 0b1111_0000 | (src as u8 % 8)]);
+}
+
 /// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64-bits.
 #[inline(always)]
 fn jmp_imm32(buf: &mut Vec<'_, u8>, imm: i32) {
@ -2233,6 +2386,44 @@ mod tests {
        );
    }

+    #[test]
+    fn test_idiv_reg64_reg64() {
+        disassembler_test!(
+            idiv_reg64_reg64,
+            |reg| format!("cqo\nidiv {}", reg),
+            ALL_GENERAL_REGS
+        );
+    }
+
+    #[test]
+    fn test_div_reg64_reg64() {
+        disassembler_test!(
+            udiv_reg64_reg64,
+            |reg| format!("cqo\ndiv {}", reg),
+            ALL_GENERAL_REGS
+        );
+    }
+
+    #[test]
+    fn test_divsd_freg64_freg64() {
+        disassembler_test!(
+            divsd_freg64_freg64,
+            |reg1, reg2| format!("divsd {}, {}", reg1, reg2),
+            ALL_FLOAT_REGS,
+            ALL_FLOAT_REGS
+        );
+    }
+
+    #[test]
+    fn test_divss_freg32_freg32() {
+        disassembler_test!(
+            divss_freg32_freg32,
+            |reg1, reg2| format!("divss {}, {}", reg1, reg2),
+            ALL_FLOAT_REGS,
+            ALL_FLOAT_REGS
+        );
+    }
+
    #[test]
    fn test_jmp_imm32() {
        const INST_SIZE: i32 = 5;
--- a/crates/compiler/gen_dev/src/lib.rs
+++ b/crates/compiler/gen_dev/src/lib.rs
@ -442,6 +442,22 @@ trait Backend<'a> {
                );
                self.build_num_mul(sym, &args[0], &args[1], ret_layout)
            }
+            LowLevel::NumDivTruncUnchecked | LowLevel::NumDivFrac => {
+                debug_assert_eq!(
+                    2,
+                    args.len(),
+                    "NumDiv: expected to have exactly two argument"
+                );
+                debug_assert_eq!(
+                    arg_layouts[0], arg_layouts[1],
+                    "NumDiv: expected all arguments of to have the same layout"
+                );
+                debug_assert_eq!(
+                    arg_layouts[0], *ret_layout,
+                    "NumDiv: expected to have the same argument and return layout"
+                );
+                self.build_num_div(sym, &args[0], &args[1], ret_layout)
+            }
            LowLevel::NumNeg => {
                debug_assert_eq!(
                    1,
@ -702,6 +718,9 @@ trait Backend<'a> {
    /// build_num_mul stores `src1 * src2` into dst.
    fn build_num_mul(&mut self, dst: &Symbol, src1: &Symbol, src2: &Symbol, layout: &Layout<'a>);

+    /// build_num_mul stores `src1 / src2` into dst.
+    fn build_num_div(&mut self, dst: &Symbol, src1: &Symbol, src2: &Symbol, layout: &Layout<'a>);
+
    /// build_num_neg stores the negated value of src into dst.
    fn build_num_neg(&mut self, dst: &Symbol, src: &Symbol, layout: &Layout<'a>);