mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 04:08:19 +00:00
implement float multiplication
This commit is contained in:
parent
038ba47409
commit
d39de07b5a
4 changed files with 127 additions and 0 deletions
|
@ -443,6 +443,25 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
|
|||
todo!("register multiplication for AArch64");
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn mul_freg32_freg32_freg32(
|
||||
_buf: &mut Vec<'_, u8>,
|
||||
_dst: AArch64FloatReg,
|
||||
_src1: AArch64FloatReg,
|
||||
_src2: AArch64FloatReg,
|
||||
) {
|
||||
todo!("multiplication for floats for AArch64");
|
||||
}
|
||||
#[inline(always)]
|
||||
fn mul_freg64_freg64_freg64(
|
||||
_buf: &mut Vec<'_, u8>,
|
||||
_dst: AArch64FloatReg,
|
||||
_src1: AArch64FloatReg,
|
||||
_src2: AArch64FloatReg,
|
||||
) {
|
||||
todo!("multiplication for floats for AArch64");
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn jmp_imm32(_buf: &mut Vec<'_, u8>, _offset: i32) -> usize {
|
||||
todo!("jump instructions for AArch64");
|
||||
|
|
|
@ -210,6 +210,18 @@ pub trait Assembler<GeneralReg: RegTrait, FloatReg: RegTrait>: Sized + Copy {
|
|||
fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: GeneralReg);
|
||||
|
||||
fn neg_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GeneralReg, src: GeneralReg);
|
||||
fn mul_freg32_freg32_freg32(
|
||||
buf: &mut Vec<'_, u8>,
|
||||
dst: FloatReg,
|
||||
src1: FloatReg,
|
||||
src2: FloatReg,
|
||||
);
|
||||
fn mul_freg64_freg64_freg64(
|
||||
buf: &mut Vec<'_, u8>,
|
||||
dst: FloatReg,
|
||||
src1: FloatReg,
|
||||
src2: FloatReg,
|
||||
);
|
||||
fn imul_reg64_reg64_reg64(
|
||||
buf: &mut Vec<'_, u8>,
|
||||
dst: GeneralReg,
|
||||
|
@ -750,6 +762,18 @@ impl<
|
|||
.load_to_general_reg(&mut self.buf, src2);
|
||||
ASM::imul_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
|
||||
}
|
||||
Layout::Builtin(Builtin::Float(FloatWidth::F64)) => {
|
||||
let dst_reg = self.storage_manager.claim_float_reg(&mut self.buf, dst);
|
||||
let src1_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src1);
|
||||
let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2);
|
||||
ASM::mul_freg64_freg64_freg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
|
||||
}
|
||||
Layout::Builtin(Builtin::Float(FloatWidth::F32)) => {
|
||||
let dst_reg = self.storage_manager.claim_float_reg(&mut self.buf, dst);
|
||||
let src1_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src1);
|
||||
let src2_reg = self.storage_manager.load_to_float_reg(&mut self.buf, src2);
|
||||
ASM::mul_freg32_freg32_freg32(&mut self.buf, dst_reg, src1_reg, src2_reg);
|
||||
}
|
||||
x => todo!("NumMul: layout, {:?}", x),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1009,6 +1009,38 @@ impl Assembler<X86_64GeneralReg, X86_64FloatReg> for X86_64Assembler {
|
|||
imul_reg64_reg64(buf, dst, src2);
|
||||
}
|
||||
|
||||
fn mul_freg32_freg32_freg32(
|
||||
buf: &mut Vec<'_, u8>,
|
||||
dst: X86_64FloatReg,
|
||||
src1: X86_64FloatReg,
|
||||
src2: X86_64FloatReg,
|
||||
) {
|
||||
if dst == src1 {
|
||||
mulss_freg32_freg32(buf, dst, src2);
|
||||
} else if dst == src2 {
|
||||
mulss_freg32_freg32(buf, dst, src1);
|
||||
} else {
|
||||
movss_freg32_freg32(buf, dst, src1);
|
||||
mulss_freg32_freg32(buf, dst, src2);
|
||||
}
|
||||
}
|
||||
#[inline(always)]
|
||||
fn mul_freg64_freg64_freg64(
|
||||
buf: &mut Vec<'_, u8>,
|
||||
dst: X86_64FloatReg,
|
||||
src1: X86_64FloatReg,
|
||||
src2: X86_64FloatReg,
|
||||
) {
|
||||
if dst == src1 {
|
||||
mulsd_freg64_freg64(buf, dst, src2);
|
||||
} else if dst == src2 {
|
||||
mulsd_freg64_freg64(buf, dst, src1);
|
||||
} else {
|
||||
movsd_freg64_freg64(buf, dst, src1);
|
||||
mulsd_freg64_freg64(buf, dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn jmp_imm32(buf: &mut Vec<'_, u8>, offset: i32) -> usize {
|
||||
jmp_imm32(buf, offset);
|
||||
|
@ -1396,6 +1428,46 @@ fn addss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64Fl
|
|||
}
|
||||
}
|
||||
|
||||
/// `MULSD xmm1,xmm2/m64` -> Multiply the low double-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.
|
||||
#[inline(always)]
|
||||
fn mulsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
|
||||
let dst_high = dst as u8 > 7;
|
||||
let dst_mod = dst as u8 % 8;
|
||||
let src_high = src as u8 > 7;
|
||||
let src_mod = src as u8 % 8;
|
||||
if dst_high || src_high {
|
||||
buf.extend(&[
|
||||
0xF2,
|
||||
0x40 | ((dst_high as u8) << 2) | (src_high as u8),
|
||||
0x0F,
|
||||
0x59,
|
||||
0xC0 | (dst_mod << 3) | (src_mod),
|
||||
])
|
||||
} else {
|
||||
buf.extend(&[0xF2, 0x0F, 0x59, 0xC0 | (dst_mod << 3) | (src_mod)])
|
||||
}
|
||||
}
|
||||
|
||||
/// `ADDSS xmm1,xmm2/m64` -> Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.
|
||||
#[inline(always)]
|
||||
fn mulss_freg32_freg32(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
|
||||
let dst_high = dst as u8 > 7;
|
||||
let dst_mod = dst as u8 % 8;
|
||||
let src_high = src as u8 > 7;
|
||||
let src_mod = src as u8 % 8;
|
||||
if dst_high || src_high {
|
||||
buf.extend(&[
|
||||
0xF3,
|
||||
0x40 | ((dst_high as u8) << 2) | (src_high as u8),
|
||||
0x0F,
|
||||
0x59,
|
||||
0xC0 | (dst_mod << 3) | (src_mod),
|
||||
])
|
||||
} else {
|
||||
buf.extend(&[0xF3, 0x0F, 0x59, 0xC0 | (dst_mod << 3) | (src_mod)])
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn andpd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FloatReg, src: X86_64FloatReg) {
|
||||
let dst_high = dst as u8 > 7;
|
||||
|
|
|
@ -1129,6 +1129,18 @@ fn gen_mul_quadword_and_lower() {
|
|||
assert_evals_to!("2u8 * 4 * 6", 48, u8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))]
|
||||
fn gen_mul_f64() {
|
||||
assert_evals_to!("2f64 * 4 * 6", 48.0, f64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-dev", feature = "gen-wasm"))]
|
||||
fn gen_mul_f32() {
|
||||
assert_evals_to!("2f32 * 4 * 6", 48.0, f32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
|
||||
fn gen_div_i64() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue