diff --git a/crates/compiler/gen_dev/src/generic64/aarch64.rs b/crates/compiler/gen_dev/src/generic64/aarch64.rs index 095ccb82b4..d5fba65acf 100644 --- a/crates/compiler/gen_dev/src/generic64/aarch64.rs +++ b/crates/compiler/gen_dev/src/generic64/aarch64.rs @@ -410,6 +410,15 @@ impl Assembler for AArch64Assembler { add_reg64_reg64_reg64(buf, dst, src1, src2); } #[inline(always)] + fn add_freg32_freg32_freg32( + _buf: &mut Vec<'_, u8>, + _dst: AArch64FloatReg, + _src1: AArch64FloatReg, + _src2: AArch64FloatReg, + ) { + todo!("adding floats for AArch64"); + } + #[inline(always)] fn add_freg64_freg64_freg64( _buf: &mut Vec<'_, u8>, _dst: AArch64FloatReg, diff --git a/crates/compiler/gen_dev/src/generic64/mod.rs b/crates/compiler/gen_dev/src/generic64/mod.rs index bb9e13be36..a27f116908 100644 --- a/crates/compiler/gen_dev/src/generic64/mod.rs +++ b/crates/compiler/gen_dev/src/generic64/mod.rs @@ -124,6 +124,12 @@ pub trait Assembler: Sized + Copy { ); fn add_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GeneralReg, src1: GeneralReg, imm32: i32); + fn add_freg32_freg32_freg32( + buf: &mut Vec<'_, u8>, + dst: FloatReg, + src1: FloatReg, + src2: FloatReg, + ); fn add_freg64_freg64_freg64( buf: &mut Vec<'_, u8>, dst: FloatReg, @@ -716,6 +722,12 @@ impl< let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2); ASM::add_freg64_freg64_freg64(&mut self.buf, dst_reg, src1_reg, src2_reg); } + Layout::Builtin(Builtin::Float(FloatWidth::F32)) => { + let dst_reg = self.storage_manager.claim_float_reg(&mut self.buf, dst); + let src1_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src1); + let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2); + ASM::add_freg32_freg32_freg32(&mut self.buf, dst_reg, src1_reg, src2_reg); + } x => todo!("NumAdd: layout, {:?}", x), } } diff --git a/crates/compiler/gen_dev/src/generic64/x86_64.rs b/crates/compiler/gen_dev/src/generic64/x86_64.rs index 666ce48be4..cb3e2e085c 100644 --- a/crates/compiler/gen_dev/src/generic64/x86_64.rs +++ b/crates/compiler/gen_dev/src/generic64/x86_64.rs @@ -957,6 +957,22 @@ impl Assembler for X86_64Assembler { } } #[inline(always)] + fn add_freg32_freg32_freg32( + buf: &mut Vec<'_, u8>, + dst: X86_64FloatReg, + src1: X86_64FloatReg, + src2: X86_64FloatReg, + ) { + if dst == src1 { + addss_freg32_freg32(buf, dst, src2); + } else if dst == src2 { + addss_freg32_freg32(buf, dst, src1); + } else { + movss_freg32_freg32(buf, dst, src1); + addss_freg32_freg32(buf, dst, src2); + } + } + #[inline(always)] fn add_freg64_freg64_freg64( buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, @@ -1360,6 +1376,26 @@ fn addsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64Fl } } +/// `ADDSS xmm1,xmm2/m64` -> Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1. +#[inline(always)] +fn addss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) { + let dst_high = dst as u8 > 7; + let dst_mod = dst as u8 % 8; + let src_high = src as u8 > 7; + let src_mod = src as u8 % 8; + if dst_high || src_high { + buf.extend(&[ + 0xF3, + 0x40 | ((dst_high as u8) << 2) | (src_high as u8), + 0x0F, + 0x58, + 0xC0 | (dst_mod << 3) | (src_mod), + ]) + } else { + buf.extend(&[0xF3, 0x0F, 0x58, 0xC0 | (dst_mod << 3) | (src_mod)]) + } +} + #[inline(always)] fn andpd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) { let dst_high = dst as u8 > 7; @@ -1581,6 +1617,36 @@ fn raw_movsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_ } } +/// `MOVSS xmm1,xmm2` -> Move scalar low single-precision floating-point value from xmm2 to xmm1 register. +/// This will not generate anything if dst and src are the same. +#[inline(always)] +fn movss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) { + if dst != src { + raw_movss_freg32_freg32(buf, dst, src); + } +} + +/// `MOVSS xmm1,xmm2` -> Move scalar low single-precision floating-point from xmm2 to xmm1 register. +/// This will always generate the move. It is used for verification. +#[inline(always)] +fn raw_movss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) { + let dst_high = dst as u8 > 7; + let dst_mod = dst as u8 % 8; + let src_high = src as u8 > 7; + let src_mod = src as u8 % 8; + if dst_high || src_high { + buf.extend(&[ + 0xF3, + 0x40 | ((dst_high as u8) << 2) | (src_high as u8), + 0x0F, + 0x10, + 0xC0 | (dst_mod << 3) | (src_mod), + ]) + } else { + buf.extend(&[0xF3, 0x0F, 0x10, 0xC0 | (dst_mod << 3) | (src_mod)]) + } +} + // `MOVSS xmm, m32` -> Load scalar single-precision floating-point value from m32 to xmm register. #[inline(always)] fn movss_freg32_rip_offset32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, offset: u32) { @@ -1960,6 +2026,16 @@ mod tests { ); } + #[test] + fn test_addss_freg32_freg32() { + disassembler_test!( + addss_freg32_freg32, + |reg1, reg2| format!("addss {}, {}", reg1, reg2), + ALL_FLOAT_REGS, + ALL_FLOAT_REGS + ); + } + #[test] fn test_andpd_freg64_freg64() { disassembler_test!( @@ -2131,6 +2207,16 @@ mod tests { ); } + #[test] + fn test_movss_freg32_freg32() { + disassembler_test!( + raw_movss_freg32_freg32, + |reg1, reg2| format!("movss {}, {}", reg1, reg2), + ALL_FLOAT_REGS, + ALL_FLOAT_REGS + ); + } + #[test] fn test_movss_freg32_rip_offset32() { disassembler_test!( diff --git a/crates/compiler/test_gen/src/gen_num.rs b/crates/compiler/test_gen/src/gen_num.rs index c92dbf924c..485551df9f 100644 --- a/crates/compiler/test_gen/src/gen_num.rs +++ b/crates/compiler/test_gen/src/gen_num.rs @@ -693,6 +693,19 @@ fn gen_add_dec() { } #[test] #[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))] +fn gen_add_f32() { + assert_evals_to!( + indoc!( + r#" + 1.1f32 + 2.4f32 + 3 + "# + ), + 6.5, + f32 + ); +} +#[test] +#[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))] fn gen_add_f64() { assert_evals_to!( indoc!(