Merge remote-tracking branch 'origin/trunk' into morphic-lib

2025-09-28 22:34:45 +00:00 · 2021-05-17 19:32:58 +02:00 · 2021-05-17 19:32:58 +02:00 · c5bd06db66
commit c5bd06db66
parent ef2c5dba15 b7b04344f3
19 changed files with 1487 additions and 993 deletions
--- a/compiler/gen_dev/src/generic64/aarch64.rs
+++ b/compiler/gen_dev/src/generic64/aarch64.rs
@ -250,6 +250,16 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
        unimplemented!("abs_reg64_reg64 is not yet implement for AArch64");
    }

+    #[inline(always)]
+    fn abs_freg64_freg64(
+        _buf: &mut Vec<'_, u8>,
+        _relocs: &mut Vec<'_, Relocation>,
+        _dst: AArch64FloatReg,
+        _src: AArch64FloatReg,
+    ) {
+        unimplemented!("abs_reg64_reg64 is not yet implement for AArch64");
+    }
+
    #[inline(always)]
    fn add_reg64_reg64_imm32(
        buf: &mut Vec<'_, u8>,
@ -291,6 +301,16 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
        unimplemented!("calling functions literal not yet implemented for AArch64");
    }

+    #[inline(always)]
+    fn imul_reg64_reg64_reg64(
+        _buf: &mut Vec<'_, u8>,
+        _dst: AArch64GeneralReg,
+        _src1: AArch64GeneralReg,
+        _src2: AArch64GeneralReg,
+    ) {
+        unimplemented!("register multiplication not implemented yet for AArch64");
+    }
+
    #[inline(always)]
    fn jmp_imm32(_buf: &mut Vec<'_, u8>, _offset: i32) -> usize {
        unimplemented!("jump instructions not yet implemented for AArch64");
--- a/compiler/gen_dev/src/generic64/mod.rs
+++ b/compiler/gen_dev/src/generic64/mod.rs
@ -71,6 +71,12 @@ pub trait CallConv<GeneralReg: RegTrait, FloatReg: RegTrait> {
 /// dst should always come before sources.
 pub trait Assembler<GeneralReg: RegTrait, FloatReg: RegTrait> {
    fn abs_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GeneralReg, src: GeneralReg);
+    fn abs_freg64_freg64(
+        buf: &mut Vec<'_, u8>,
+        relocs: &mut Vec<'_, Relocation>,
+        dst: FloatReg,
+        src: FloatReg,
+    );

    fn add_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GeneralReg, src1: GeneralReg, imm32: i32);
    fn add_freg64_freg64_freg64(
@ -124,6 +130,13 @@ pub trait Assembler<GeneralReg: RegTrait, FloatReg: RegTrait> {
    fn mov_stack32_freg64(buf: &mut Vec<'_, u8>, offset: i32, src: FloatReg);
    fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: GeneralReg);

+    fn imul_reg64_reg64_reg64(
+        buf: &mut Vec<'_, u8>,
+        dst: GeneralReg,
+        src1: GeneralReg,
+        src2: GeneralReg,
+    );
+
    fn sub_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GeneralReg, src1: GeneralReg, imm32: i32);
    fn sub_reg64_reg64_reg64(
        buf: &mut Vec<'_, u8>,
@ -468,6 +481,15 @@ impl<
        Ok(())
    }

+    fn build_num_abs_f64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
+        let dst_reg = self.claim_float_reg(dst)?;
+        let src_reg = self.load_to_float_reg(src)?;
+
+        ASM::abs_freg64_freg64(&mut self.buf, &mut self.relocs, dst_reg, src_reg);
+
+        Ok(())
+    }
+
    fn build_num_add_i64(
        &mut self,
        dst: &Symbol,
@ -494,6 +516,19 @@ impl<
        Ok(())
    }

+    fn build_num_mul_i64(
+        &mut self,
+        dst: &Symbol,
+        src1: &Symbol,
+        src2: &Symbol,
+    ) -> Result<(), String> {
+        let dst_reg = self.claim_general_reg(dst)?;
+        let src1_reg = self.load_to_general_reg(src1)?;
+        let src2_reg = self.load_to_general_reg(src2)?;
+        ASM::imul_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
+        Ok(())
+    }
+
    fn build_num_sub_i64(
        &mut self,
        dst: &Symbol,
--- a/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/compiler/gen_dev/src/generic64/x86_64.rs
@ -740,6 +740,24 @@ impl Assembler<X86_64GeneralReg, X86_64FloatReg> for X86_64Assembler {
        cmovl_reg64_reg64(buf, dst, src);
    }

+    #[inline(always)]
+    fn abs_freg64_freg64(
+        buf: &mut Vec<'_, u8>,
+        relocs: &mut Vec<'_, Relocation>,
+        dst: X86_64FloatReg,
+        src: X86_64FloatReg,
+    ) {
+        movsd_freg64_rip_offset32(buf, dst, 0);
+
+        // TODO: make sure this constant only loads once instead of every call to abs
+        relocs.push(Relocation::LocalData {
+            offset: buf.len() as u64 - 4,
+            data: 0x7fffffffffffffffu64.to_le_bytes().to_vec(),
+        });
+
+        andpd_freg64_freg64(buf, dst, src);
+    }
+
    #[inline(always)]
    fn add_reg64_reg64_imm32(
        buf: &mut Vec<'_, u8>,
@ -796,6 +814,21 @@ impl Assembler<X86_64GeneralReg, X86_64FloatReg> for X86_64Assembler {
        });
    }

+    #[inline(always)]
+    fn imul_reg64_reg64_reg64(
+        buf: &mut Vec<'_, u8>,
+        dst: X86_64GeneralReg,
+        src1: X86_64GeneralReg,
+        src2: X86_64GeneralReg,
+    ) {
+        if dst == src1 {
+            imul_reg64_reg64(buf, dst, src2);
+        } else {
+            mov_reg64_reg64(buf, dst, src1);
+            imul_reg64_reg64(buf, dst, src2);
+        }
+    }
+
    #[inline(always)]
    fn jmp_imm32(buf: &mut Vec<'_, u8>, offset: i32) -> usize {
        jmp_imm32(buf, offset);
@ -976,6 +1009,21 @@ fn binop_reg64_reg64(
    buf.extend(&[rex, op_code, 0xC0 + dst_mod + src_mod]);
 }

+#[inline(always)]
+fn extended_binop_reg64_reg64(
+    op_code1: u8,
+    op_code2: u8,
+    buf: &mut Vec<'_, u8>,
+    dst: X86_64GeneralReg,
+    src: X86_64GeneralReg,
+) {
+    let rex = add_rm_extension(dst, REX_W);
+    let rex = add_reg_extension(src, rex);
+    let dst_mod = dst as u8 % 8;
+    let src_mod = (src as u8 % 8) << 3;
+    buf.extend(&[rex, op_code1, op_code2, 0xC0 + dst_mod + src_mod]);
+}
+
 // Below here are the functions for all of the assembly instructions.
 // Their names are based on the instruction and operators combined.
 // You should call `buf.reserve()` if you push or extend more than once.
@ -1018,6 +1066,26 @@ fn addsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64Fl
    }
 }

+#[inline(always)]
+fn andpd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
+    let dst_high = dst as u8 > 7;
+    let dst_mod = dst as u8 % 8;
+    let src_high = src as u8 > 7;
+    let src_mod = src as u8 % 8;
+
+    if dst_high || src_high {
+        buf.extend(&[
+            0x66,
+            0x40 + ((dst_high as u8) << 2) + (src_high as u8),
+            0x0F,
+            0x54,
+            0xC0 + (dst_mod << 3) + (src_mod),
+        ])
+    } else {
+        buf.extend(&[0x66, 0x0F, 0x54, 0xC0 + (dst_mod << 3) + (src_mod)])
+    }
+}
+
 /// r/m64 AND imm8 (sign-extended).
 #[inline(always)]
 fn and_reg64_imm8(buf: &mut Vec<'_, u8>, dst: X86_64GeneralReg, imm: i8) {
@ -1052,6 +1120,14 @@ fn cmp_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GeneralReg, src: X86_64Gene
    binop_reg64_reg64(0x39, buf, dst, src);
 }

+/// `IMUL r64,r/m64` -> Signed Multiply r/m64 to r64.
+#[inline(always)]
+fn imul_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GeneralReg, src: X86_64GeneralReg) {
+    // IMUL is strange, the parameters are reversed from must other binary ops.
+    // The final encoding is (src, dst) instead of (dst, src).
+    extended_binop_reg64_reg64(0x0F, 0xAF, buf, src, dst);
+}
+
 /// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64-bits.
 #[inline(always)]
 fn jmp_imm32(buf: &mut Vec<'_, u8>, imm: i32) {
@ -1389,6 +1465,35 @@ mod tests {
        }
    }

+    #[test]
+    fn test_andpd_freg64_freg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+
+        for ((dst, src), expected) in &[
+            (
+                (X86_64FloatReg::XMM0, X86_64FloatReg::XMM0),
+                vec![0x66, 0x0F, 0x54, 0xC0],
+            ),
+            (
+                (X86_64FloatReg::XMM0, X86_64FloatReg::XMM15),
+                vec![0x66, 0x41, 0x0F, 0x54, 0xC7],
+            ),
+            (
+                (X86_64FloatReg::XMM15, X86_64FloatReg::XMM0),
+                vec![0x66, 0x44, 0x0F, 0x54, 0xF8],
+            ),
+            (
+                (X86_64FloatReg::XMM15, X86_64FloatReg::XMM15),
+                vec![0x66, 0x45, 0x0F, 0x54, 0xFF],
+            ),
+        ] {
+            buf.clear();
+            andpd_freg64_freg64(&mut buf, *dst, *src);
+            assert_eq!(&expected[..], &buf[..]);
+        }
+    }
+
    #[test]
    fn test_xor_reg64_reg64() {
        let arena = bumpalo::Bump::new();
@ -1460,6 +1565,34 @@ mod tests {
        }
    }

+    #[test]
+    fn test_imul_reg64_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        for ((dst, src), expected) in &[
+            (
+                (X86_64GeneralReg::RAX, X86_64GeneralReg::RAX),
+                [0x48, 0x0F, 0xAF, 0xC0],
+            ),
+            (
+                (X86_64GeneralReg::RAX, X86_64GeneralReg::R15),
+                [0x49, 0x0F, 0xAF, 0xC7],
+            ),
+            (
+                (X86_64GeneralReg::R15, X86_64GeneralReg::RAX),
+                [0x4C, 0x0F, 0xAF, 0xF8],
+            ),
+            (
+                (X86_64GeneralReg::R15, X86_64GeneralReg::R15),
+                [0x4D, 0x0F, 0xAF, 0xFF],
+            ),
+        ] {
+            buf.clear();
+            imul_reg64_reg64(&mut buf, *dst, *src);
+            assert_eq!(expected, &buf[..]);
+        }
+    }
+
    #[test]
    fn test_jmp_imm32() {
        let arena = bumpalo::Bump::new();
--- a/compiler/gen_dev/src/lib.rs
+++ b/compiler/gen_dev/src/lib.rs
@ -184,6 +184,9 @@ where
                            Symbol::NUM_ATAN => {
                                self.build_run_low_level(sym, &LowLevel::NumAtan, arguments, layout)
                            }
+                            Symbol::NUM_MUL => {
+                                self.build_run_low_level(sym, &LowLevel::NumMul, arguments, layout)
+                            }
                            Symbol::NUM_POW_INT => self.build_run_low_level(
                                sym,
                                &LowLevel::NumPowInt,
@ -237,6 +240,7 @@ where
                // TODO: when this is expanded to floats. deal with typecasting here, and then call correct low level method.
                match layout {
                    Layout::Builtin(Builtin::Int64) => self.build_num_abs_i64(sym, &args[0]),
+                    Layout::Builtin(Builtin::Float64) => self.build_num_abs_f64(sym, &args[0]),
                    x => Err(format!("layout, {:?}, not implemented yet", x)),
                }
            }
@ -261,6 +265,15 @@ where
            LowLevel::NumAtan => {
                self.build_fn_call(sym, bitcode::NUM_ATAN.to_string(), args, &[*layout], layout)
            }
+            LowLevel::NumMul => {
+                // TODO: when this is expanded to floats. deal with typecasting here, and then call correct low level method.
+                match layout {
+                    Layout::Builtin(Builtin::Int64) => {
+                        self.build_num_mul_i64(sym, &args[0], &args[1])
+                    }
+                    x => Err(format!("layout, {:?}, not implemented yet", x)),
+                }
+            }
            LowLevel::NumPowInt => self.build_fn_call(
                sym,
                bitcode::NUM_POW_INT.to_string(),
@ -302,6 +315,10 @@ where
    /// It only deals with inputs and outputs of i64 type.
    fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String>;

+    /// build_num_abs_f64 stores the absolute value of src into dst.
+    /// It only deals with inputs and outputs of f64 type.
+    fn build_num_abs_f64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String>;
+
    /// build_num_add_i64 stores the sum of src1 and src2 into dst.
    /// It only deals with inputs and outputs of i64 type.
    fn build_num_add_i64(
@ -320,6 +337,15 @@ where
        src2: &Symbol,
    ) -> Result<(), String>;

+    /// build_num_mul_i64 stores `src1 * src2` into dst.
+    /// It only deals with inputs and outputs of i64 type.
+    fn build_num_mul_i64(
+        &mut self,
+        dst: &Symbol,
+        src1: &Symbol,
+        src2: &Symbol,
+    ) -> Result<(), String>;
+
    /// build_num_sub_i64 stores the `src1 - src2` difference into dst.
    /// It only deals with inputs and outputs of i64 type.
    fn build_num_sub_i64(