Merge branch 'trunk' into dev-backend-num-is-zero

This commit is contained in:
satotake 2021-11-24 10:59:42 +00:00 committed by GitHub
commit 10afadd810
21 changed files with 726 additions and 506 deletions

View file

@ -1,6 +1,7 @@
use crate::generic64::{Assembler, CallConv, RegTrait, SymbolStorage};
use crate::Relocation;
use bumpalo::collections::Vec;
use packed_struct::prelude::*;
use roc_collections::all::MutMap;
use roc_module::symbol::Symbol;
use roc_mono::layout::Layout;
@ -42,8 +43,16 @@ pub enum AArch64GeneralReg {
/// This can mean Zero or Stack Pointer depending on the context.
ZRSP = 31,
}
impl RegTrait for AArch64GeneralReg {}
impl AArch64GeneralReg {
#[inline(always)]
fn id(&self) -> u8 {
*self as u8
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
#[allow(dead_code)]
pub enum AArch64FloatReg {}
@ -518,90 +527,165 @@ impl Assembler<AArch64GeneralReg, AArch64FloatReg> for AArch64Assembler {
impl AArch64Assembler {}
/// AArch64Instruction, maps all instructions to an enum.
/// Decoding the function should be cheap because we will always inline.
/// All of the operations should resolved by constants, leave just some bit manipulation.
/// Enums may not be complete since we will only add what we need.
#[derive(Debug)]
enum AArch64Instruction {
_Reserved,
_SVE,
DPImm(DPImmGroup),
Branch(BranchGroup),
LdStr(LdStrGroup),
DPReg(DPRegGroup),
_DPFloat,
// Instructions
// ARM manual section C3
// https://developer.arm.com/documentation/ddi0487/ga
// Map all instructions to a packed struct.
trait Aarch64Bytes: PackedStruct {
#[inline(always)]
fn bytes(&self) -> [u8; 4] {
let mut bytes: [u8; 4] = [0, 0, 0, 0];
self.pack_to_slice(&mut bytes).unwrap();
bytes.reverse();
bytes
}
}
#[derive(Debug)]
enum BranchGroup {
UnconditionBranchReg {
opc: u8,
op2: u8,
op3: u8,
reg_n: AArch64GeneralReg,
op4: u8,
},
#[derive(PackedStruct, Debug)]
#[packed_struct(endian = "msb")]
pub struct MoveWideImmediate {
sf: bool,
opc: Integer<u8, packed_bits::Bits<2>>,
fixed: Integer<u8, packed_bits::Bits<6>>, // = 0b100101,
hw: Integer<u8, packed_bits::Bits<2>>,
imm16: u16,
reg_d: Integer<u8, packed_bits::Bits<5>>, // AArch64GeneralReg
}
#[derive(Debug)]
enum DPRegGroup {
AddSubShifted {
sf: bool,
subtract: bool,
set_flags: bool,
shift: u8,
reg_m: AArch64GeneralReg,
imm6: u8,
reg_n: AArch64GeneralReg,
reg_d: AArch64GeneralReg,
},
Logical {
sf: bool,
op: DPRegLogicalOp,
shift: u8,
reg_m: AArch64GeneralReg,
imm6: u8,
reg_n: AArch64GeneralReg,
reg_d: AArch64GeneralReg,
},
impl Aarch64Bytes for MoveWideImmediate {}
impl MoveWideImmediate {
#[inline(always)]
fn new(opc: u8, rd: AArch64GeneralReg, imm16: u16, hw: u8, sf: bool) -> Self {
// TODO: revisit this is we change where we want to check the shift
// currently this is done in the assembler above
// assert!(shift % 16 == 0 && shift <= 48);
debug_assert!(hw <= 0b11);
debug_assert!(opc <= 0b11);
Self {
reg_d: rd.id().into(),
imm16,
hw: hw.into(),
opc: opc.into(),
sf,
fixed: 0b100101.into(),
}
}
}
#[derive(Debug)]
enum DPImmGroup {
AddSubImm {
sf: bool,
subtract: bool,
set_flags: bool,
shift: bool,
#[derive(PackedStruct, Debug)]
#[packed_struct(endian = "msb")]
pub struct ArithmeticImmediate {
sf: bool,
op: bool, // add or subtract
s: bool,
fixed: Integer<u8, packed_bits::Bits<6>>, // = 0b100010,
sh: bool, // shift
imm12: Integer<u16, packed_bits::Bits<12>>,
reg_n: Integer<u8, packed_bits::Bits<5>>,
reg_d: Integer<u8, packed_bits::Bits<5>>,
}
impl Aarch64Bytes for ArithmeticImmediate {}
impl ArithmeticImmediate {
#[inline(always)]
fn new(
op: bool,
s: bool,
rd: AArch64GeneralReg,
rn: AArch64GeneralReg,
imm12: u16,
reg_n: AArch64GeneralReg,
reg_d: AArch64GeneralReg,
},
MoveWide {
sf: bool,
opc: u8,
hw: u8,
imm16: u16,
reg_d: AArch64GeneralReg,
},
sh: bool,
) -> Self {
debug_assert!(imm12 <= 0xFFF);
Self {
reg_d: rd.id().into(),
reg_n: rn.id().into(),
imm12: imm12.into(),
sh,
s,
op,
// true for 64 bit addition
// false for 32 bit addition
sf: true,
fixed: 0b100010.into(),
}
}
}
#[derive(Debug)]
enum LdStrGroup {
UnsignedImm {
size: u8,
v: bool,
opc: u8,
imm12: u16,
reg_n: AArch64GeneralReg,
reg_t: AArch64GeneralReg,
},
#[derive(Clone, Copy)]
#[allow(dead_code)]
enum ShiftType {
LSL = 0,
LSR = 1,
ASR = 2,
ROR = 3,
}
impl ShiftType {
#[inline(always)]
fn id(&self) -> u8 {
*self as u8
}
}
#[derive(PackedStruct)]
#[packed_struct(endian = "msb")]
pub struct ArithmeticShifted {
sf: bool,
op: bool, // add or subtract
s: bool,
fixed: Integer<u8, packed_bits::Bits<5>>, // = 0b01011,
shift: Integer<u8, packed_bits::Bits<2>>, // shift
fixed2: bool, // = 0b0,
reg_m: Integer<u8, packed_bits::Bits<5>>,
imm6: Integer<u8, packed_bits::Bits<6>>,
reg_n: Integer<u8, packed_bits::Bits<5>>,
reg_d: Integer<u8, packed_bits::Bits<5>>,
}
impl Aarch64Bytes for ArithmeticShifted {}
impl ArithmeticShifted {
#[inline(always)]
fn new(
op: bool,
s: bool,
shift: ShiftType,
imm6: u8,
rm: AArch64GeneralReg,
rn: AArch64GeneralReg,
rd: AArch64GeneralReg,
) -> Self {
debug_assert!(imm6 <= 0b111111);
Self {
reg_d: rd.id().into(),
reg_n: rn.id().into(),
imm6: imm6.into(),
reg_m: rm.id().into(),
fixed2: false,
shift: shift.id().into(),
fixed: 0b01011.into(),
s,
op,
// true for 64 bit addition
// false for 32 bit addition
sf: true,
}
}
}
#[derive(Debug)]
#[allow(dead_code)]
enum DPRegLogicalOp {
enum LogicalOp {
AND,
BIC,
ORR,
@ -612,155 +696,142 @@ enum DPRegLogicalOp {
BICS,
}
#[inline(always)]
fn build_instruction(inst: AArch64Instruction) -> [u8; 4] {
let mut out: u32 = 0;
match inst {
AArch64Instruction::Branch(branch) => {
out |= 0b101 << 26;
match branch {
BranchGroup::UnconditionBranchReg {
opc,
op2,
op3,
reg_n,
op4,
} => {
debug_assert!(opc <= 0b1111);
debug_assert!(op2 <= 0b11111);
debug_assert!(op3 <= 0b111111);
debug_assert!(op4 <= 0b1111);
out |= 0b1101011 << 25;
out |= (opc as u32) << 21;
out |= (op2 as u32) << 16;
out |= (op3 as u32) << 10;
out |= (reg_n as u32) << 5;
out |= op4 as u32;
}
}
#[derive(PackedStruct)]
#[packed_struct(endian = "msb")]
pub struct LogicalShiftedRegister {
sf: bool,
op: Integer<u8, packed_bits::Bits<2>>,
fixed: Integer<u8, packed_bits::Bits<5>>, // = 0b01010,
shift: Integer<u8, packed_bits::Bits<2>>, // shift
n: bool,
reg_m: Integer<u8, packed_bits::Bits<5>>,
imm6: Integer<u8, packed_bits::Bits<6>>,
reg_n: Integer<u8, packed_bits::Bits<5>>,
reg_d: Integer<u8, packed_bits::Bits<5>>,
}
impl Aarch64Bytes for LogicalShiftedRegister {}
impl LogicalShiftedRegister {
#[inline(always)]
fn new(
op: LogicalOp,
shift: ShiftType,
imm6: u8,
rm: AArch64GeneralReg,
rn: AArch64GeneralReg,
rd: AArch64GeneralReg,
) -> Self {
debug_assert!(imm6 <= 0b111111);
let (op, n) = match op {
LogicalOp::AND => (0b00, false),
LogicalOp::BIC => (0b00, true),
LogicalOp::ORR => (0b01, false),
LogicalOp::ORN => (0b01, true),
LogicalOp::EOR => (0b10, false),
LogicalOp::EON => (0b10, true),
LogicalOp::ANDS => (0b11, false),
LogicalOp::BICS => (0b11, true),
};
Self {
reg_d: rd.id().into(),
reg_n: rn.id().into(),
imm6: imm6.into(),
reg_m: rm.id().into(),
n,
shift: shift.id().into(),
fixed: 0b01010.into(),
op: op.into(),
// true for 64 bit addition
// false for 32 bit addition
sf: true,
}
AArch64Instruction::DPImm(dpimm) => {
out |= 0b100 << 26;
match dpimm {
DPImmGroup::MoveWide {
sf,
opc,
hw,
imm16,
reg_d,
} => {
out |= (sf as u32) << 31;
out |= (opc as u32) << 29;
out |= 0b101 << 23;
out |= (hw as u32) << 21;
out |= (imm16 as u32) << 5;
out |= reg_d as u32;
}
DPImmGroup::AddSubImm {
sf,
subtract,
set_flags,
shift,
imm12,
reg_n,
reg_d,
} => {
debug_assert!(imm12 <= 0xFFF);
out |= (sf as u32) << 31;
out |= (subtract as u32) << 30;
out |= (set_flags as u32) << 29;
out |= 0b010 << 23;
out |= (shift as u32) << 22;
out |= (imm12 as u32) << 10;
out |= (reg_n as u32) << 5;
out |= reg_d as u32;
}
}
}
AArch64Instruction::DPReg(dpreg) => {
out |= 0b101 << 25;
match dpreg {
DPRegGroup::Logical {
sf,
op,
shift,
reg_m,
imm6,
reg_n,
reg_d,
} => {
debug_assert!(shift <= 0b11);
debug_assert!(imm6 <= 0b111111);
let (opc, n) = match op {
DPRegLogicalOp::AND => (0b00, 0),
DPRegLogicalOp::BIC => (0b00, 1),
DPRegLogicalOp::ORR => (0b01, 0),
DPRegLogicalOp::ORN => (0b01, 1),
DPRegLogicalOp::EOR => (0b10, 0),
DPRegLogicalOp::EON => (0b10, 1),
DPRegLogicalOp::ANDS => (0b11, 0),
DPRegLogicalOp::BICS => (0b11, 1),
};
out |= (sf as u32) << 31;
out |= opc << 29;
out |= (shift as u32) << 22;
out |= n << 21;
out |= (reg_m as u32) << 16;
out |= (imm6 as u32) << 10;
out |= (reg_n as u32) << 5;
out |= reg_d as u32;
}
DPRegGroup::AddSubShifted {
sf,
subtract,
set_flags,
shift,
reg_m,
imm6,
reg_n,
reg_d,
} => {
debug_assert!(shift <= 0b11);
debug_assert!(imm6 <= 0b111111);
out |= (sf as u32) << 31;
out |= (subtract as u32) << 30;
out |= (set_flags as u32) << 29;
out |= 0b1 << 24;
out |= (shift as u32) << 22;
out |= (reg_m as u32) << 16;
out |= (imm6 as u32) << 10;
out |= (reg_n as u32) << 5;
out |= reg_d as u32;
}
}
}
AArch64Instruction::LdStr(ldstr) => {
out |= 0b1 << 27;
match ldstr {
LdStrGroup::UnsignedImm {
size,
v,
opc,
imm12,
reg_n,
reg_t,
} => {
debug_assert!(size <= 0b11);
debug_assert!(imm12 <= 0xFFF);
out |= (size as u32) << 30;
out |= 0b11 << 28;
out |= (v as u32) << 26;
out |= 0b1 << 24;
out |= (opc as u32) << 22;
out |= (imm12 as u32) << 10;
out |= (reg_n as u32) << 5;
out |= reg_t as u32;
}
}
}
x => unimplemented!("The instruction, {:?}, has not be implemented yet", x),
}
out.to_le_bytes()
}
#[derive(PackedStruct)]
pub struct UnconditionalBranchRegister {
fixed: Integer<u8, packed_bits::Bits<7>>,
z: bool,
fixed2: bool,
op: Integer<u8, packed_bits::Bits<2>>,
fixed3: Integer<u8, packed_bits::Bits<5>>,
fixed4: Integer<u8, packed_bits::Bits<4>>,
a: bool,
m: bool,
rn: Integer<u8, packed_bits::Bits<5>>,
fixed5: Integer<u8, packed_bits::Bits<5>>,
}
impl Aarch64Bytes for UnconditionalBranchRegister {}
impl UnconditionalBranchRegister {
#[inline(always)]
fn new(op: u8, rn: AArch64GeneralReg) -> Self {
debug_assert!(op <= 0b11);
Self {
fixed5: 0b00000.into(),
rn: rn.id().into(),
m: false,
a: false,
fixed4: 0b0000.into(),
fixed3: 0b11111.into(),
op: op.into(),
fixed2: false,
z: false,
fixed: 0b1101011.into(),
}
}
}
// Uses unsigned Offset
// opc = 0b01 means load
// opc = 0b00 means store
#[derive(PackedStruct, Debug)]
#[packed_struct(endian = "msb")]
pub struct LoadStoreRegisterImmediate {
size: Integer<u8, packed_bits::Bits<2>>,
fixed: Integer<u8, packed_bits::Bits<3>>, // = 0b111,
fixed2: bool,
fixed3: Integer<u8, packed_bits::Bits<2>>,
opc: Integer<u8, packed_bits::Bits<2>>,
imm12: Integer<u16, packed_bits::Bits<12>>,
rn: Integer<u8, packed_bits::Bits<5>>,
rt: Integer<u8, packed_bits::Bits<5>>,
}
impl Aarch64Bytes for LoadStoreRegisterImmediate {}
impl LoadStoreRegisterImmediate {
#[inline(always)]
fn new(size: u8, opc: u8, imm12: u16, rn: AArch64GeneralReg, rt: AArch64GeneralReg) -> Self {
debug_assert!(size <= 0b11);
debug_assert!(imm12 <= 0xFFF);
Self {
rt: rt.id().into(),
rn: rn.id().into(),
imm12: imm12.into(),
opc: opc.into(),
fixed3: 0b01.into(),
fixed2: false,
fixed: 0b111.into(),
size: size.into(),
}
}
#[inline(always)]
fn new_load(size: u8, imm12: u16, rn: AArch64GeneralReg, rt: AArch64GeneralReg) -> Self {
Self::new(size, 0b01, imm12, rn, rt)
}
#[inline(always)]
fn new_store(size: u8, imm12: u16, rn: AArch64GeneralReg, rt: AArch64GeneralReg) -> Self {
Self::new(size, 0b00, imm12, rn, rt)
}
}
// Below here are the functions for all of the assembly instructions.
@ -777,17 +848,9 @@ fn add_reg64_reg64_imm12(
src: AArch64GeneralReg,
imm12: u16,
) {
buf.extend(&build_instruction(AArch64Instruction::DPImm(
DPImmGroup::AddSubImm {
sf: true,
subtract: false,
set_flags: false,
shift: false,
imm12,
reg_n: src,
reg_d: dst,
},
)));
let inst = ArithmeticImmediate::new(false, false, dst, src, imm12, false);
buf.extend(inst.bytes());
}
/// `ADD Xd, Xm, Xn` -> Add Xm and Xn and place the result into Xd.
@ -798,18 +861,9 @@ fn add_reg64_reg64_reg64(
src1: AArch64GeneralReg,
src2: AArch64GeneralReg,
) {
buf.extend(&build_instruction(AArch64Instruction::DPReg(
DPRegGroup::AddSubShifted {
sf: true,
subtract: false,
set_flags: false,
shift: 0,
reg_m: src1,
imm6: 0,
reg_n: src2,
reg_d: dst,
},
)));
let inst = ArithmeticShifted::new(false, false, ShiftType::LSL, 0, src1, src2, dst);
buf.extend(inst.bytes());
}
/// `LDR Xt, [Xn, #offset]` -> Load Xn + Offset Xt. ZRSP is SP.
@ -821,66 +875,41 @@ fn ldr_reg64_imm12(
base: AArch64GeneralReg,
imm12: u16,
) {
debug_assert!(imm12 <= 0xFFF);
buf.extend(&build_instruction(AArch64Instruction::LdStr(
LdStrGroup::UnsignedImm {
size: 0b11,
v: false,
opc: 0b01,
imm12,
reg_n: base,
reg_t: dst,
},
)));
let inst = LoadStoreRegisterImmediate::new_load(0b11, imm12, base, dst);
buf.extend(inst.bytes());
}
/// `MOV Xd, Xm` -> Move Xm to Xd.
#[inline(always)]
fn mov_reg64_reg64(buf: &mut Vec<'_, u8>, dst: AArch64GeneralReg, src: AArch64GeneralReg) {
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
buf.extend(&build_instruction(AArch64Instruction::DPReg(
DPRegGroup::Logical {
sf: true,
op: DPRegLogicalOp::ORR,
shift: 0,
reg_m: src,
imm6: 0,
reg_n: AArch64GeneralReg::ZRSP,
reg_d: dst,
},
)));
let inst = LogicalShiftedRegister::new(
LogicalOp::ORR,
ShiftType::LSL,
0,
src,
AArch64GeneralReg::ZRSP,
dst,
);
buf.extend(inst.bytes());
}
/// `MOVK Xd, imm16` -> Keeps Xd and moves an optionally shifted imm16 to Xd.
#[inline(always)]
fn movk_reg64_imm16(buf: &mut Vec<'_, u8>, dst: AArch64GeneralReg, imm16: u16, hw: u8) {
debug_assert!(hw <= 0b11);
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
buf.extend(&build_instruction(AArch64Instruction::DPImm(
DPImmGroup::MoveWide {
sf: true,
opc: 0b11,
hw,
imm16,
reg_d: dst,
},
)));
let inst = MoveWideImmediate::new(0b11, dst, imm16, hw, true);
buf.extend(inst.bytes());
}
/// `MOVZ Xd, imm16` -> Zeros Xd and moves an optionally shifted imm16 to Xd.
#[inline(always)]
fn movz_reg64_imm16(buf: &mut Vec<'_, u8>, dst: AArch64GeneralReg, imm16: u16, hw: u8) {
debug_assert!(hw <= 0b11);
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
buf.extend(&build_instruction(AArch64Instruction::DPImm(
DPImmGroup::MoveWide {
sf: true,
opc: 0b10,
hw,
imm16,
reg_d: dst,
},
)));
let inst = MoveWideImmediate::new(0b10, dst, imm16, hw, true);
buf.extend(inst.bytes());
}
/// `STR Xt, [Xn, #offset]` -> Store Xt to Xn + Offset. ZRSP is SP.
@ -892,17 +921,9 @@ fn str_reg64_imm12(
base: AArch64GeneralReg,
imm12: u16,
) {
debug_assert!(imm12 <= 0xFFF);
buf.extend(&build_instruction(AArch64Instruction::LdStr(
LdStrGroup::UnsignedImm {
size: 0b11,
v: false,
opc: 0b00,
imm12,
reg_n: base,
reg_t: src,
},
)));
let inst = LoadStoreRegisterImmediate::new_store(0b11, imm12, base, src);
buf.extend(inst.bytes());
}
/// `SUB Xd, Xn, imm12` -> Subtract Xn and imm12 and place the result into Xd.
@ -913,31 +934,17 @@ fn sub_reg64_reg64_imm12(
src: AArch64GeneralReg,
imm12: u16,
) {
buf.extend(&build_instruction(AArch64Instruction::DPImm(
DPImmGroup::AddSubImm {
sf: true,
subtract: true,
set_flags: false,
shift: false,
imm12,
reg_n: src,
reg_d: dst,
},
)));
let inst = ArithmeticImmediate::new(true, false, dst, src, imm12, false);
buf.extend(inst.bytes());
}
/// `RET Xn` -> Return to the address stored in Xn.
#[inline(always)]
fn ret_reg64(buf: &mut Vec<'_, u8>, xn: AArch64GeneralReg) {
buf.extend(&build_instruction(AArch64Instruction::Branch(
BranchGroup::UnconditionBranchReg {
opc: 0b0010,
op2: 0b11111,
op3: 0b000000,
reg_n: xn,
op4: 0b000,
},
)));
let inst = UnconditionalBranchRegister::new(0b10, xn);
buf.extend(inst.bytes());
}
#[cfg(test)]

View file

@ -82,6 +82,28 @@ pub fn build_module<'a>(
Object::new(BinaryFormat::Elf, Architecture::Aarch64, Endianness::Little),
)
}
Triple {
architecture: TargetArch::Aarch64(_),
binary_format: TargetBF::Macho,
..
} if cfg!(feature = "target-aarch64") => {
let backend: Backend64Bit<
aarch64::AArch64GeneralReg,
aarch64::AArch64FloatReg,
aarch64::AArch64Assembler,
aarch64::AArch64Call,
> = Backend::new(env)?;
build_object(
env,
procedures,
backend,
Object::new(
BinaryFormat::MachO,
Architecture::Aarch64,
Endianness::Little,
),
)
}
x => Err(format! {
"the target, {:?}, is not yet implemented",
x}),