diff --git a/compiler/gen_dev/src/generic64/aarch64.rs b/compiler/gen_dev/src/generic64/aarch64.rs index 8f18fe0041..df58823d12 100644 --- a/compiler/gen_dev/src/generic64/aarch64.rs +++ b/compiler/gen_dev/src/generic64/aarch64.rs @@ -147,12 +147,11 @@ impl CallConv for AArch64Call { requested_stack_size.checked_add(8 * saved_regs.len() as i32 + offset as i32) { if aligned_stack_size > 0 { - // TODO deal with sizes over imm12. - sub_reg64_reg64_imm12( + AArch64Assembler::sub_reg64_reg64_imm32( buf, AArch64GPReg::ZRSP, AArch64GPReg::ZRSP, - aligned_stack_size as u16, + aligned_stack_size, ); // All the following stores could be optimized by using `STP` to store pairs. @@ -196,12 +195,11 @@ impl CallConv for AArch64Call { offset -= 8; AArch64Assembler::mov_reg64_stack32(buf, *reg, offset); } - // TODO deal with sizes over imm12. - add_reg64_reg64_imm12( + AArch64Assembler::add_reg64_reg64_imm32( buf, AArch64GPReg::ZRSP, AArch64GPReg::ZRSP, - aligned_stack_size as u16, + aligned_stack_size, ); } Ok(()) @@ -209,6 +207,29 @@ impl CallConv for AArch64Call { } impl Assembler for AArch64Assembler { + #[inline(always)] + fn abs_reg64_reg64<'a>(_buf: &mut Vec<'a, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) { + unimplemented!("abs_reg64_reg64 is not yet implement for AArch64"); + } + + #[inline(always)] + fn add_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm32: i32, + ) { + if imm32 < 0 { + unimplemented!("immediate addition with values less than 0 are not yet implemented"); + } else if imm32 < 0xFFF { + add_reg64_reg64_imm12(buf, dst, src, imm32 as u16); + } else { + unimplemented!( + "immediate additions with values greater than 12bits are not yet implemented" + ); + } + } + #[inline(always)] fn add_reg64_reg64_reg64<'a>( buf: &mut Vec<'a, u8>, @@ -267,8 +288,23 @@ impl Assembler for AArch64Assembler { } #[inline(always)] - fn abs_reg64_reg64<'a>(_buf: &mut Vec<'a, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) { - unimplemented!("abs_reg64_reg64 is not yet implement for AArch64"); + fn sub_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm32: i32, + ) { + if imm32 < 0 { + unimplemented!( + "immediate subtractions with values less than 0 are not yet implemented" + ); + } else if imm32 < 0xFFF { + sub_reg64_reg64_imm12(buf, dst, src, imm32 as u16); + } else { + unimplemented!( + "immediate subtractions with values greater than 12bits are not yet implemented" + ); + } } #[inline(always)] @@ -277,6 +313,8 @@ impl Assembler for AArch64Assembler { } } +impl AArch64Assembler {} + /// AArch64Instruction, maps all instructions to an enum. /// Decoding the function should be cheap because we will always inline. /// All of the operations should resolved by constants, leave just some bit manipulation. diff --git a/compiler/gen_dev/src/generic64/mod.rs b/compiler/gen_dev/src/generic64/mod.rs index 1b2516cc8e..fdc1519bb9 100644 --- a/compiler/gen_dev/src/generic64/mod.rs +++ b/compiler/gen_dev/src/generic64/mod.rs @@ -43,12 +43,14 @@ pub trait CallConv { /// Generally, I prefer explicit sources, as opposed to dst being one of the sources. Ex: `x = x + y` would be `add x, x, y` instead of `add x, y`. /// dst should always come before sources. pub trait Assembler { + fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); + fn add_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32); fn add_reg64_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, src2: GPReg); fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64); fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32); fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg); - fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); + fn sub_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32); fn ret<'a>(buf: &mut Vec<'a, u8>); } diff --git a/compiler/gen_dev/src/generic64/x86_64.rs b/compiler/gen_dev/src/generic64/x86_64.rs index 344a3cf192..98fa94afa6 100644 --- a/compiler/gen_dev/src/generic64/x86_64.rs +++ b/compiler/gen_dev/src/generic64/x86_64.rs @@ -179,11 +179,11 @@ fn x86_64_generic_setup_stack<'a>( requested_stack_size: i32, ) -> Result { if !leaf_function { - push_reg64(buf, X86_64GPReg::RBP); + X86_64Assembler::push_reg64(buf, X86_64GPReg::RBP); X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RBP, X86_64GPReg::RSP); } for reg in saved_regs { - push_reg64(buf, *reg); + X86_64Assembler::push_reg64(buf, *reg); } // full size is upcast to i64 to make sure we don't overflow here. @@ -200,8 +200,12 @@ fn x86_64_generic_setup_stack<'a>( }; if let Some(aligned_stack_size) = requested_stack_size.checked_add(offset as i32) { if aligned_stack_size > 0 { - // TODO: move this call to one using X86_64Assembler. - sub_reg64_imm32(buf, X86_64GPReg::RSP, aligned_stack_size); + X86_64Assembler::sub_reg64_reg64_imm32( + buf, + X86_64GPReg::RSP, + X86_64GPReg::RSP, + aligned_stack_size, + ); Ok(aligned_stack_size) } else { Ok(0) @@ -219,15 +223,19 @@ fn x86_64_generic_cleanup_stack<'a>( aligned_stack_size: i32, ) -> Result<(), String> { if aligned_stack_size > 0 { - // TODO: move this call to one using X86_64Assembler. - add_reg64_imm32(buf, X86_64GPReg::RSP, aligned_stack_size); + X86_64Assembler::add_reg64_reg64_imm32( + buf, + X86_64GPReg::RSP, + X86_64GPReg::RSP, + aligned_stack_size, + ); } for reg in saved_regs.iter().rev() { - pop_reg64(buf, *reg); + X86_64Assembler::pop_reg64(buf, *reg); } if !leaf_function { X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RSP, X86_64GPReg::RBP); - pop_reg64(buf, X86_64GPReg::RBP); + X86_64Assembler::pop_reg64(buf, X86_64GPReg::RBP); } Ok(()) } @@ -236,6 +244,26 @@ impl Assembler for X86_64Assembler { // These functions should map to the raw assembly functions below. // In some cases, that means you can just directly call one of the direct assembly functions. #[inline(always)] + fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + mov_reg64_reg64(buf, dst, src); + neg_reg64(buf, dst); + cmovl_reg64_reg64(buf, dst, src); + } + #[inline(always)] + fn add_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: X86_64GPReg, + src1: X86_64GPReg, + imm32: i32, + ) { + if dst == src1 { + add_reg64_imm32(buf, dst, imm32); + } else { + mov_reg64_reg64(buf, dst, src1); + add_reg64_imm32(buf, dst, imm32); + } + } + #[inline(always)] fn add_reg64_reg64_reg64<'a>( buf: &mut Vec<'a, u8>, dst: X86_64GPReg, @@ -268,10 +296,18 @@ impl Assembler for X86_64Assembler { mov_stack32_reg64(buf, offset, src); } #[inline(always)] - fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { - mov_reg64_reg64(buf, dst, src); - neg_reg64(buf, dst); - cmovl_reg64_reg64(buf, dst, src); + fn sub_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: X86_64GPReg, + src1: X86_64GPReg, + imm32: i32, + ) { + if dst == src1 { + sub_reg64_imm32(buf, dst, imm32); + } else { + mov_reg64_reg64(buf, dst, src1); + sub_reg64_imm32(buf, dst, imm32); + } } #[inline(always)] fn ret<'a>(buf: &mut Vec<'a, u8>) { @@ -279,6 +315,17 @@ impl Assembler for X86_64Assembler { } } +impl X86_64Assembler { + #[inline(always)] + fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + pop_reg64(buf, reg); + } + + #[inline(always)] + fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + push_reg64(buf, reg); + } +} const REX: u8 = 0x40; const REX_W: u8 = REX + 0x8;