Major refactor for generic 64 bit with traits

This commit is contained in:
Brendan Hansknecht 2020-11-23 00:16:42 -08:00
parent c8dbcdcf64
commit 3430a08d3d
6 changed files with 848 additions and 723 deletions

View file

@ -1,142 +0,0 @@
use crate::x86_64::X86_64Backend;
use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
use bumpalo::collections::Vec;
use object::write;
use object::write::{Object, StandardSection, Symbol, SymbolSection};
use object::{
Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind,
SymbolFlags, SymbolKind, SymbolScope,
};
use roc_collections::all::MutMap;
use roc_module::symbol;
use roc_mono::ir::Proc;
use roc_mono::layout::Layout;
use target_lexicon::Triple;
const VERSION: &str = env!("CARGO_PKG_VERSION");
pub fn build_module<'a>(
env: &'a Env,
target: &Triple,
procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
) -> Result<Object, String> {
match target.architecture {
target_lexicon::Architecture::X86_64 => {
let mut output =
Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little);
let text = output.section_id(StandardSection::Text);
let data_section = output.section_id(StandardSection::Data);
let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
output.append_section_data(
comment,
format!("\0roc dev backend version {} \0", VERSION).as_bytes(),
1,
);
// Setup layout_ids for procedure calls.
let mut layout_ids = roc_mono::layout::LayoutIds::default();
let mut procs = Vec::with_capacity_in(procedures.len(), env.arena);
for ((sym, layout), proc) in procedures {
// This is temporary until we support passing args to functions.
if INLINED_SYMBOLS.contains(&sym) {
continue;
}
let fn_name = layout_ids
.get(sym, &layout)
.to_symbol_string(sym, &env.interns);
let proc_symbol = Symbol {
name: fn_name.as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
// TODO: Depending on whether we are building a static or dynamic lib, this should change.
// We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only.
scope: if env.exposed_to_host.contains(&sym) {
SymbolScope::Dynamic
} else {
SymbolScope::Linkage
},
weak: false,
section: SymbolSection::Section(text),
flags: SymbolFlags::None,
};
let proc_id = output.add_symbol(proc_symbol);
procs.push((fn_name, proc_id, proc));
}
// Build procedures.
let mut backend: X86_64Backend = Backend::new(env, target)?;
for (fn_name, proc_id, proc) in procs {
let mut local_data_index = 0;
let (proc_data, relocations) = backend.build_proc(proc)?;
let proc_offset = output.add_symbol_data(proc_id, text, proc_data, 16);
for reloc in relocations {
let elfreloc = match reloc {
Relocation::LocalData { offset, data } => {
let data_symbol = write::Symbol {
name: format!("{}.data{}", fn_name, local_data_index)
.as_bytes()
.to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Data,
scope: SymbolScope::Compilation,
weak: false,
section: write::SymbolSection::Section(data_section),
flags: SymbolFlags::None,
};
local_data_index += 1;
let data_id = output.add_symbol(data_symbol);
output.add_symbol_data(data_id, data_section, data, 4);
write::Relocation {
offset: *offset + proc_offset,
size: 32,
kind: RelocationKind::Relative,
encoding: RelocationEncoding::Generic,
symbol: data_id,
addend: -4,
}
}
Relocation::LinkedData { offset, name } => {
if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
write::Relocation {
offset: *offset + proc_offset,
size: 32,
kind: RelocationKind::GotRelative,
encoding: RelocationEncoding::Generic,
symbol: sym_id,
addend: -4,
}
} else {
return Err(format!("failed to find symbol for {:?}", name));
}
}
Relocation::LinkedFunction { offset, name } => {
if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
write::Relocation {
offset: *offset + proc_offset,
size: 32,
kind: RelocationKind::PltRelative,
encoding: RelocationEncoding::Generic,
symbol: sym_id,
addend: -4,
}
} else {
return Err(format!("failed to find symbol for {:?}", name));
}
}
};
output
.add_relocation(text, elfreloc)
.map_err(|e| format!("{:?}", e))?;
}
}
Ok(output)
}
x => Err(format! {
"the architecture, {:?}, is not yet implemented for elf",
x}),
}
}

View file

@ -3,21 +3,59 @@ use bumpalo::collections::Vec;
use roc_collections::all::{ImSet, MutMap, MutSet}; use roc_collections::all::{ImSet, MutMap, MutSet};
use roc_module::symbol::Symbol; use roc_module::symbol::Symbol;
use roc_mono::ir::{Literal, Stmt}; use roc_mono::ir::{Literal, Stmt};
use std::marker::PhantomData;
use target_lexicon::{CallingConvention, Triple}; use target_lexicon::{CallingConvention, Triple};
mod asm; pub mod x86_64;
use asm::GPReg;
#[derive(Clone, Debug, PartialEq)] pub trait CallConv<GPReg> {
enum SymbolStorage { fn gp_param_regs() -> &'static [GPReg];
// These may need layout, but I am not sure. fn gp_return_regs() -> &'static [GPReg];
// I think whenever a symbol would be used, we specify layout anyways. fn gp_default_free_regs() -> &'static [GPReg];
GPReg(GPReg),
Stack(i32), // A linear scan of an array may be faster than a set technically.
StackAndGPReg(GPReg, i32), // That being said, fastest would likely be a trait based on calling convention/register.
fn caller_saved_regs() -> ImSet<GPReg>;
fn callee_saved_regs() -> ImSet<GPReg>;
fn stack_pointer() -> GPReg;
fn frame_pointer() -> GPReg;
fn shadow_space_size() -> u8;
// It may be worth ignoring the red zone and keeping things simpler.
fn red_zone_size() -> u8;
} }
pub struct X86_64Backend<'a> { pub trait Assembler<GPReg> {
fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
fn ret_near<'a>(buf: &mut Vec<'a, u8>);
fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
}
#[derive(Clone, Debug, PartialEq)]
enum SymbolStorage<GPReg> {
// These may need layout, but I am not sure.
// I think whenever a symbol would be used, we specify layout anyways.
GPRegeg(GPReg),
Stack(i32),
StackAndGPRegeg(GPReg, i32),
}
pub trait GPRegTrait: Copy + Eq + std::hash::Hash + std::fmt::Debug + 'static {}
pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> {
phantom_asm: PhantomData<ASM>,
phantom_cc: PhantomData<CC>,
env: &'a Env<'a>, env: &'a Env<'a>,
buf: Vec<'a, u8>, buf: Vec<'a, u8>,
@ -27,15 +65,11 @@ pub struct X86_64Backend<'a> {
last_seen_map: MutMap<Symbol, *const Stmt<'a>>, last_seen_map: MutMap<Symbol, *const Stmt<'a>>,
free_map: MutMap<*const Stmt<'a>, Vec<'a, Symbol>>, free_map: MutMap<*const Stmt<'a>, Vec<'a, Symbol>>,
symbols_map: MutMap<Symbol, SymbolStorage>, symbols_map: MutMap<Symbol, SymbolStorage<GPReg>>,
literal_map: MutMap<Symbol, Literal<'a>>, literal_map: MutMap<Symbol, Literal<'a>>,
gp_param_regs: &'static [GPReg],
gp_return_regs: &'static [GPReg],
// This should probably be smarter than a vec. // This should probably be smarter than a vec.
// There are certain registers we should always use first. With pushing and poping, this could get mixed. // There are certain registers we should always use first. With pushing and poping, this could get mixed.
gp_default_free_regs: &'static [GPReg],
gp_free_regs: Vec<'a, GPReg>, gp_free_regs: Vec<'a, GPReg>,
// The last major thing we need is a way to decide what reg to free when all of them are full. // The last major thing we need is a way to decide what reg to free when all of them are full.
@ -44,22 +78,18 @@ pub struct X86_64Backend<'a> {
gp_used_regs: Vec<'a, (GPReg, Symbol)>, gp_used_regs: Vec<'a, (GPReg, Symbol)>,
stack_size: i32, stack_size: i32,
shadow_space_size: u8,
red_zone_size: u8,
// A linear scan of an array may be faster than a set technically.
// That being said, fastest would likely be a trait based on calling convention/register.
caller_saved_regs: ImSet<GPReg>,
callee_saved_regs: ImSet<GPReg>,
// used callee saved regs must be tracked for pushing and popping at the beginning/end of the function. // used callee saved regs must be tracked for pushing and popping at the beginning/end of the function.
used_callee_saved_regs: MutSet<GPReg>, used_callee_saved_regs: MutSet<GPReg>,
} }
impl<'a> Backend<'a> for X86_64Backend<'a> { impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<'a>
fn new(env: &'a Env, target: &Triple) -> Result<Self, String> { for Backend64Bit<'a, GPReg, ASM, CC>
match target.default_calling_convention() { {
Ok(CallingConvention::SystemV) => Ok(X86_64Backend { fn new(env: &'a Env, _target: &Triple) -> Result<Self, String> {
Ok(Backend64Bit {
phantom_asm: PhantomData,
phantom_cc: PhantomData,
env, env,
leaf_function: true, leaf_function: true,
buf: bumpalo::vec!(in env.arena), buf: bumpalo::vec!(in env.arena),
@ -67,126 +97,11 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
free_map: MutMap::default(), free_map: MutMap::default(),
symbols_map: MutMap::default(), symbols_map: MutMap::default(),
literal_map: MutMap::default(), literal_map: MutMap::default(),
gp_param_regs: &[
GPReg::RDI,
GPReg::RSI,
GPReg::RDX,
GPReg::RCX,
GPReg::R8,
GPReg::R9,
],
gp_return_regs: &[GPReg::RAX, GPReg::RDX],
gp_default_free_regs: &[
// The regs we want to use first should be at the end of this vec.
// We will use pop to get which reg to use next
// Use callee saved regs last.
GPReg::RBX,
// Don't use frame pointer: GPReg::RBP,
GPReg::R12,
GPReg::R13,
GPReg::R14,
GPReg::R15,
// Use caller saved regs first.
GPReg::RAX,
GPReg::RCX,
GPReg::RDX,
// Don't use stack pionter: GPReg::RSP,
GPReg::RSI,
GPReg::RDI,
GPReg::R8,
GPReg::R9,
GPReg::R10,
GPReg::R11,
],
gp_free_regs: bumpalo::vec![in env.arena], gp_free_regs: bumpalo::vec![in env.arena],
gp_used_regs: bumpalo::vec![in env.arena], gp_used_regs: bumpalo::vec![in env.arena],
stack_size: 0, stack_size: 0,
shadow_space_size: 0,
red_zone_size: 128,
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
caller_saved_regs: ImSet::from(vec![
GPReg::RAX,
GPReg::RCX,
GPReg::RDX,
GPReg::RSP,
GPReg::RSI,
GPReg::RDI,
GPReg::R8,
GPReg::R9,
GPReg::R10,
GPReg::R11,
]),
callee_saved_regs: ImSet::from(vec![
GPReg::RBX,
GPReg::RBP,
GPReg::R12,
GPReg::R13,
GPReg::R14,
GPReg::R15,
]),
used_callee_saved_regs: MutSet::default(), used_callee_saved_regs: MutSet::default(),
}), })
Ok(CallingConvention::WindowsFastcall) => Ok(X86_64Backend {
env,
leaf_function: true,
buf: bumpalo::vec!(in env.arena),
last_seen_map: MutMap::default(),
free_map: MutMap::default(),
symbols_map: MutMap::default(),
literal_map: MutMap::default(),
gp_param_regs: &[GPReg::RCX, GPReg::RDX, GPReg::R8, GPReg::R9],
gp_return_regs: &[GPReg::RAX],
gp_default_free_regs: &[
// The regs we want to use first should be at the end of this vec.
// We will use pop to get which reg to use next
// Use callee saved regs last.
GPReg::RBX,
// Don't use frame pointer: GPReg::RBP,
GPReg::RSI,
// Don't use stack pionter: GPReg::RSP,
GPReg::RDI,
GPReg::R12,
GPReg::R13,
GPReg::R14,
GPReg::R15,
// Use caller saved regs first.
GPReg::RAX,
GPReg::RCX,
GPReg::RDX,
GPReg::R8,
GPReg::R9,
GPReg::R10,
GPReg::R11,
],
gp_free_regs: bumpalo::vec![in env.arena],
gp_used_regs: bumpalo::vec![in env.arena],
stack_size: 0,
shadow_space_size: 32,
red_zone_size: 0,
caller_saved_regs: ImSet::from(vec![
GPReg::RAX,
GPReg::RCX,
GPReg::RDX,
GPReg::R8,
GPReg::R9,
GPReg::R10,
GPReg::R11,
]),
callee_saved_regs: ImSet::from(vec![
GPReg::RBX,
GPReg::RBP,
GPReg::RSI,
GPReg::RSP,
GPReg::RDI,
GPReg::R12,
GPReg::R13,
GPReg::R14,
GPReg::R15,
]),
used_callee_saved_regs: MutSet::default(),
}),
x => Err(format!("unsupported backend: {:?}", x)),
}
} }
fn env(&self) -> &'a Env<'a> { fn env(&self) -> &'a Env<'a> {
@ -194,7 +109,7 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
} }
fn reset(&mut self) { fn reset(&mut self) {
self.stack_size = -(self.red_zone_size as i32); self.stack_size = -(CC::red_zone_size() as i32);
self.leaf_function = true; self.leaf_function = true;
self.last_seen_map.clear(); self.last_seen_map.clear();
self.free_map.clear(); self.free_map.clear();
@ -204,13 +119,13 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
self.gp_free_regs.clear(); self.gp_free_regs.clear();
self.gp_used_regs.clear(); self.gp_used_regs.clear();
self.gp_free_regs self.gp_free_regs
.extend_from_slice(self.gp_default_free_regs); .extend_from_slice(CC::gp_default_free_regs());
} }
fn set_not_leaf_function(&mut self) { fn set_not_leaf_function(&mut self) {
self.leaf_function = true; self.leaf_function = true;
// If this is not a leaf function, it can't use the shadow space. // If this is not a leaf function, it can't use the shadow space.
self.stack_size = self.shadow_space_size as i32 - self.red_zone_size as i32; self.stack_size = CC::shadow_space_size() as i32 - CC::red_zone_size() as i32;
} }
fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> { fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> {
@ -233,33 +148,38 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
let mut out = bumpalo::vec![in self.env.arena]; let mut out = bumpalo::vec![in self.env.arena];
if !self.leaf_function { if !self.leaf_function {
asm::push_register64bit(&mut out, GPReg::RBP); // I believe that this will have to move away from push and to mov to be generic across backends.
asm::mov_register64bit_register64bit(&mut out, GPReg::RBP, GPReg::RSP); ASM::push_register64bit(&mut out, CC::frame_pointer());
ASM::mov_register64bit_register64bit(
&mut out,
CC::frame_pointer(),
CC::stack_pointer(),
);
} }
// Save data in all callee saved regs. // Save data in all callee saved regs.
let mut pop_order = bumpalo::vec![in self.env.arena]; let mut pop_order = bumpalo::vec![in self.env.arena];
for reg in &self.used_callee_saved_regs { for reg in &self.used_callee_saved_regs {
asm::push_register64bit(&mut out, *reg); ASM::push_register64bit(&mut out, *reg);
pop_order.push(*reg); pop_order.push(*reg);
} }
if self.stack_size > 0 { if self.stack_size > 0 {
asm::sub_register64bit_immediate32bit(&mut out, GPReg::RSP, self.stack_size); ASM::sub_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
} }
// Add function body. // Add function body.
out.extend(&self.buf); out.extend(&self.buf);
if self.stack_size > 0 { if self.stack_size > 0 {
asm::add_register64bit_immediate32bit(&mut out, GPReg::RSP, self.stack_size); ASM::add_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
} }
// Restore data in callee saved regs. // Restore data in callee saved regs.
while let Some(reg) = pop_order.pop() { while let Some(reg) = pop_order.pop() {
asm::pop_register64bit(&mut out, reg); ASM::pop_register64bit(&mut out, reg);
} }
if !self.leaf_function { if !self.leaf_function {
asm::pop_register64bit(&mut out, GPReg::RBP); ASM::pop_register64bit(&mut out, CC::frame_pointer());
} }
asm::ret_near(&mut out); ASM::ret_near(&mut out);
Ok((out.into_bump_slice(), &[])) Ok((out.into_bump_slice(), &[]))
} }
@ -267,9 +187,9 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> { fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
let dst_reg = self.claim_gp_reg(dst)?; let dst_reg = self.claim_gp_reg(dst)?;
let src_reg = self.load_to_reg(src)?; let src_reg = self.load_to_reg(src)?;
asm::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
asm::neg_register64bit(&mut self.buf, dst_reg); ASM::neg_register64bit(&mut self.buf, dst_reg);
asm::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); ASM::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
Ok(()) Ok(())
} }
@ -281,9 +201,9 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
) -> Result<(), String> { ) -> Result<(), String> {
let dst_reg = self.claim_gp_reg(dst)?; let dst_reg = self.claim_gp_reg(dst)?;
let src1_reg = self.load_to_reg(src1)?; let src1_reg = self.load_to_reg(src1)?;
asm::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg); ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg);
let src2_reg = self.load_to_reg(src2)?; let src2_reg = self.load_to_reg(src2)?;
asm::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg); ASM::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg);
Ok(()) Ok(())
} }
@ -292,7 +212,7 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
Literal::Int(x) => { Literal::Int(x) => {
let reg = self.claim_gp_reg(sym)?; let reg = self.claim_gp_reg(sym)?;
let val = *x; let val = *x;
asm::mov_register64bit_immediate64bit(&mut self.buf, reg, val); ASM::mov_register64bit_immediate64bit(&mut self.buf, reg, val);
Ok(()) Ok(())
} }
x => Err(format!("loading literal, {:?}, is not yet implemented", x)), x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
@ -314,11 +234,11 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> { fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
let val = self.symbols_map.get(sym); let val = self.symbols_map.get(sym);
match val { match val {
Some(SymbolStorage::GPReg(reg)) if *reg == self.gp_return_regs[0] => Ok(()), Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::gp_return_regs()[0] => Ok(()),
Some(SymbolStorage::GPReg(reg)) => { Some(SymbolStorage::GPRegeg(reg)) => {
// If it fits in a general purpose register, just copy it over to. // If it fits in a general purpose register, just copy it over to.
// Technically this can be optimized to produce shorter instructions if less than 64bits. // Technically this can be optimized to produce shorter instructions if less than 64bits.
asm::mov_register64bit_register64bit(&mut self.buf, self.gp_return_regs[0], *reg); ASM::mov_register64bit_register64bit(&mut self.buf, CC::gp_return_regs()[0], *reg);
Ok(()) Ok(())
} }
Some(x) => Err(format!( Some(x) => Err(format!(
@ -332,11 +252,13 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
/// This impl block is for ir related instructions that need backend specific information. /// This impl block is for ir related instructions that need backend specific information.
/// For example, loading a symbol for doing a computation. /// For example, loading a symbol for doing a computation.
impl<'a> X86_64Backend<'a> { impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
Backend64Bit<'a, GPReg, ASM, CC>
{
fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> { fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
let reg = if !self.gp_free_regs.is_empty() { let reg = if !self.gp_free_regs.is_empty() {
let free_reg = self.gp_free_regs.pop().unwrap(); let free_reg = self.gp_free_regs.pop().unwrap();
if self.callee_saved_regs.contains(&free_reg) { if CC::callee_saved_regs().contains(&free_reg) {
self.used_callee_saved_regs.insert(free_reg); self.used_callee_saved_regs.insert(free_reg);
} }
Ok(free_reg) Ok(free_reg)
@ -349,27 +271,27 @@ impl<'a> X86_64Backend<'a> {
}?; }?;
self.gp_used_regs.push((reg, *sym)); self.gp_used_regs.push((reg, *sym));
self.symbols_map.insert(*sym, SymbolStorage::GPReg(reg)); self.symbols_map.insert(*sym, SymbolStorage::GPRegeg(reg));
Ok(reg) Ok(reg)
} }
fn load_to_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> { fn load_to_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
let val = self.symbols_map.remove(sym); let val = self.symbols_map.remove(sym);
match val { match val {
Some(SymbolStorage::GPReg(reg)) => { Some(SymbolStorage::GPRegeg(reg)) => {
self.symbols_map.insert(*sym, SymbolStorage::GPReg(reg)); self.symbols_map.insert(*sym, SymbolStorage::GPRegeg(reg));
Ok(reg) Ok(reg)
} }
Some(SymbolStorage::StackAndGPReg(reg, offset)) => { Some(SymbolStorage::StackAndGPRegeg(reg, offset)) => {
self.symbols_map self.symbols_map
.insert(*sym, SymbolStorage::StackAndGPReg(reg, offset)); .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
Ok(reg) Ok(reg)
} }
Some(SymbolStorage::Stack(offset)) => { Some(SymbolStorage::Stack(offset)) => {
let reg = self.claim_gp_reg(sym)?; let reg = self.claim_gp_reg(sym)?;
self.symbols_map self.symbols_map
.insert(*sym, SymbolStorage::StackAndGPReg(reg, offset)); .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
asm::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32); ASM::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32);
Ok(reg) Ok(reg)
} }
None => Err(format!("Unknown symbol: {}", sym)), None => Err(format!("Unknown symbol: {}", sym)),
@ -379,7 +301,7 @@ impl<'a> X86_64Backend<'a> {
fn free_to_stack(&mut self, sym: &Symbol) -> Result<(), String> { fn free_to_stack(&mut self, sym: &Symbol) -> Result<(), String> {
let val = self.symbols_map.remove(sym); let val = self.symbols_map.remove(sym);
match val { match val {
Some(SymbolStorage::GPReg(reg)) => { Some(SymbolStorage::GPRegeg(reg)) => {
let offset = self.stack_size; let offset = self.stack_size;
self.stack_size += 8; self.stack_size += 8;
if let Some(size) = self.stack_size.checked_add(8) { if let Some(size) = self.stack_size.checked_add(8) {
@ -390,12 +312,12 @@ impl<'a> X86_64Backend<'a> {
sym sym
)); ));
} }
asm::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg); ASM::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg);
self.symbols_map self.symbols_map
.insert(*sym, SymbolStorage::Stack(offset as i32)); .insert(*sym, SymbolStorage::Stack(offset as i32));
Ok(()) Ok(())
} }
Some(SymbolStorage::StackAndGPReg(_, offset)) => { Some(SymbolStorage::StackAndGPRegeg(_, offset)) => {
self.symbols_map.insert(*sym, SymbolStorage::Stack(offset)); self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
Ok(()) Ok(())
} }

View file

@ -0,0 +1,582 @@
use crate::generic64::{Assembler, CallConv, GPRegTrait};
use bumpalo::collections::Vec;
use roc_collections::all::ImSet;
// Not sure exactly how I want to represent registers.
// If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub enum X86_64GPReg {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
}
impl GPRegTrait for X86_64GPReg {}
const REX: u8 = 0x40;
const REX_W: u8 = REX + 0x8;
fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
if reg as u8 > 7 {
byte + 1
} else {
byte
}
}
fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
add_rm_extension(reg, byte)
}
fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
if reg as u8 > 7 {
byte + 4
} else {
byte
}
}
pub struct X86_64Assembler {}
pub struct X86_64WindowsFastcall {}
pub struct X86_64SystemV {}
impl CallConv<X86_64GPReg> for X86_64SystemV {
fn gp_param_regs() -> &'static [X86_64GPReg] {
&[
X86_64GPReg::RDI,
X86_64GPReg::RSI,
X86_64GPReg::RDX,
X86_64GPReg::RCX,
X86_64GPReg::R8,
X86_64GPReg::R9,
]
}
fn gp_return_regs() -> &'static [X86_64GPReg] {
&[X86_64GPReg::RAX, X86_64GPReg::RDX]
}
fn gp_default_free_regs() -> &'static [X86_64GPReg] {
&[
// The regs we want to use first should be at the end of this vec.
// We will use pop to get which reg to use next
// Use callee saved regs last.
X86_64GPReg::RBX,
// Don't use frame pointer: X86_64GPReg::RBP,
X86_64GPReg::R12,
X86_64GPReg::R13,
X86_64GPReg::R14,
X86_64GPReg::R15,
// Use caller saved regs first.
X86_64GPReg::RAX,
X86_64GPReg::RCX,
X86_64GPReg::RDX,
// Don't use stack pionter: X86_64GPReg::RSP,
X86_64GPReg::RSI,
X86_64GPReg::RDI,
X86_64GPReg::R8,
X86_64GPReg::R9,
X86_64GPReg::R10,
X86_64GPReg::R11,
]
}
fn caller_saved_regs() -> ImSet<X86_64GPReg> {
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
ImSet::from(vec![
X86_64GPReg::RAX,
X86_64GPReg::RCX,
X86_64GPReg::RDX,
X86_64GPReg::RSP,
X86_64GPReg::RSI,
X86_64GPReg::RDI,
X86_64GPReg::R8,
X86_64GPReg::R9,
X86_64GPReg::R10,
X86_64GPReg::R11,
])
}
fn callee_saved_regs() -> ImSet<X86_64GPReg> {
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
ImSet::from(vec![
X86_64GPReg::RBX,
X86_64GPReg::RBP,
X86_64GPReg::R12,
X86_64GPReg::R13,
X86_64GPReg::R14,
X86_64GPReg::R15,
])
}
fn stack_pointer() -> X86_64GPReg {
X86_64GPReg::RSP
}
fn frame_pointer() -> X86_64GPReg {
X86_64GPReg::RBP
}
fn shadow_space_size() -> u8 {
0
}
fn red_zone_size() -> u8 {
128
}
}
impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
fn gp_param_regs() -> &'static [X86_64GPReg] {
&[
X86_64GPReg::RCX,
X86_64GPReg::RDX,
X86_64GPReg::R8,
X86_64GPReg::R9,
]
}
fn gp_return_regs() -> &'static [X86_64GPReg] {
&[X86_64GPReg::RAX]
}
fn gp_default_free_regs() -> &'static [X86_64GPReg] {
&[
// The regs we want to use first should be at the end of this vec.
// We will use pop to get which reg to use next
// Use callee saved regs last.
X86_64GPReg::RBX,
// Don't use frame pointer: X86_64GPReg::RBP,
X86_64GPReg::RSI,
// Don't use stack pionter: X86_64GPReg::RSP,
X86_64GPReg::RDI,
X86_64GPReg::R12,
X86_64GPReg::R13,
X86_64GPReg::R14,
X86_64GPReg::R15,
// Use caller saved regs first.
X86_64GPReg::RAX,
X86_64GPReg::RCX,
X86_64GPReg::RDX,
X86_64GPReg::R8,
X86_64GPReg::R9,
X86_64GPReg::R10,
X86_64GPReg::R11,
]
}
fn caller_saved_regs() -> ImSet<X86_64GPReg> {
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
ImSet::from(vec![
X86_64GPReg::RAX,
X86_64GPReg::RCX,
X86_64GPReg::RDX,
X86_64GPReg::R8,
X86_64GPReg::R9,
X86_64GPReg::R10,
X86_64GPReg::R11,
])
}
fn callee_saved_regs() -> ImSet<X86_64GPReg> {
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
ImSet::from(vec![
X86_64GPReg::RBX,
X86_64GPReg::RBP,
X86_64GPReg::RSI,
X86_64GPReg::RSP,
X86_64GPReg::RDI,
X86_64GPReg::R12,
X86_64GPReg::R13,
X86_64GPReg::R14,
X86_64GPReg::R15,
])
}
fn stack_pointer() -> X86_64GPReg {
X86_64GPReg::RSP
}
fn frame_pointer() -> X86_64GPReg {
X86_64GPReg::RBP
}
fn shadow_space_size() -> u8 {
32
}
fn red_zone_size() -> u8 {
0
}
}
impl Assembler<X86_64GPReg> for X86_64Assembler {
// Below here are the functions for all of the assembly instructions.
// Their names are based on the instruction and operators combined.
// You should call `buf.reserve()` if you push or extend more than once.
// Unit tests are added at the bottom of the file to ensure correct asm generation.
// Please keep these in alphanumeric order.
/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
// This can be optimized if the immediate is 1 byte.
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `ADD r/m64,r64` -> Add r64 to r/m64.
fn add_register64bit_register64bit<'a>(
buf: &mut Vec<'a, u8>,
dst: X86_64GPReg,
src: X86_64GPReg,
) {
let rex = add_rm_extension(dst, REX_W);
let rex = add_reg_extension(src, rex);
let dst_mod = dst as u8 % 8;
let src_mod = (src as u8 % 8) << 3;
buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
}
/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
fn cmovl_register64bit_register64bit<'a>(
buf: &mut Vec<'a, u8>,
dst: X86_64GPReg,
src: X86_64GPReg,
) {
let rex = add_reg_extension(dst, REX_W);
let rex = add_rm_extension(src, rex);
let dst_mod = (dst as u8 % 8) << 3;
let src_mod = src as u8 % 8;
buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
}
/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `MOV r64, imm64` -> Move imm64 to r64.
fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
Self::mov_register64bit_immediate32bit(buf, dst, imm as i32)
} else {
let rex = add_opcode_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(10);
buf.extend(&[rex, 0xB8 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
}
/// `MOV r/m64,r64` -> Move r64 to r/m64.
fn mov_register64bit_register64bit<'a>(
buf: &mut Vec<'a, u8>,
dst: X86_64GPReg,
src: X86_64GPReg,
) {
let rex = add_rm_extension(dst, REX_W);
let rex = add_reg_extension(src, rex);
let dst_mod = dst as u8 % 8;
let src_mod = (src as u8 % 8) << 3;
buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
}
/// `MOV r64,r/m64` -> Move r/m64 to r64.
fn mov_register64bit_stackoffset32bit<'a>(
buf: &mut Vec<'a, u8>,
dst: X86_64GPReg,
offset: i32,
) {
// This can be optimized based on how many bytes the offset actually is.
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
// Also, this may technically be faster genration since stack operations should be so common.
let rex = add_reg_extension(dst, REX_W);
let dst_mod = (dst as u8 % 8) << 3;
buf.reserve(8);
buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
buf.extend(&offset.to_le_bytes());
}
/// `MOV r/m64,r64` -> Move r64 to r/m64.
fn mov_stackoffset32bit_register64bit<'a>(
buf: &mut Vec<'a, u8>,
offset: i32,
src: X86_64GPReg,
) {
// This can be optimized based on how many bytes the offset actually is.
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
// Also, this may technically be faster genration since stack operations should be so common.
let rex = add_reg_extension(src, REX_W);
let src_mod = (src as u8 % 8) << 3;
buf.reserve(8);
buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
buf.extend(&offset.to_le_bytes());
}
/// `NEG r/m64` -> Two's complement negate r/m64.
fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
let rex = add_rm_extension(reg, REX_W);
let reg_mod = reg as u8 % 8;
buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
}
/// `RET` -> Near return to calling procedure.
fn ret_near<'a>(buf: &mut Vec<'a, u8>) {
buf.push(0xC3);
}
/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
// This can be optimized if the immediate is 1 byte.
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
let reg_mod = reg as u8 % 8;
if reg as u8 > 7 {
let rex = add_opcode_extension(reg, REX);
buf.extend(&[rex, 0x58 + reg_mod]);
} else {
buf.push(0x58 + reg_mod);
}
}
/// `PUSH r64` -> Push r64,
fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
let reg_mod = reg as u8 % 8;
if reg as u8 > 7 {
let rex = add_opcode_extension(reg, REX);
buf.extend(&[rex, 0x50 + reg_mod]);
} else {
buf.push(0x50 + reg_mod);
}
}
}
// When writing tests, it is a good idea to test both a number and unnumbered register.
// This is because R8-R15 often have special instruction prefixes.
#[cfg(test)]
mod tests {
use super::*;
const TEST_I32: i32 = 0x12345678;
const TEST_I64: i64 = 0x12345678_9ABCDEF0;
#[test]
fn test_add_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(X86_64GPReg::RAX, [0x48, 0x81, 0xC0]),
(X86_64GPReg::R15, [0x49, 0x81, 0xC7]),
] {
buf.clear();
X86_64Assembler::add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_add_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
((X86_64GPReg::RAX, X86_64GPReg::RAX), [0x48, 0x01, 0xC0]),
((X86_64GPReg::RAX, X86_64GPReg::R15), [0x4C, 0x01, 0xF8]),
((X86_64GPReg::R15, X86_64GPReg::RAX), [0x49, 0x01, 0xC7]),
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]),
] {
buf.clear();
X86_64Assembler::add_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_cmovl_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
(
(X86_64GPReg::RAX, X86_64GPReg::RAX),
[0x48, 0x0F, 0x4C, 0xC0],
),
(
(X86_64GPReg::RAX, X86_64GPReg::R15),
[0x49, 0x0F, 0x4C, 0xC7],
),
(
(X86_64GPReg::R15, X86_64GPReg::RAX),
[0x4C, 0x0F, 0x4C, 0xF8],
),
(
(X86_64GPReg::R15, X86_64GPReg::R15),
[0x4D, 0x0F, 0x4C, 0xFF],
),
] {
buf.clear();
X86_64Assembler::cmovl_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_mov_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(X86_64GPReg::RAX, [0x48, 0xC7, 0xC0]),
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
] {
buf.clear();
X86_64Assembler::mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_mov_register64bit_immediate64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(X86_64GPReg::RAX, [0x48, 0xB8]),
(X86_64GPReg::R15, [0x49, 0xBF]),
] {
buf.clear();
X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
assert_eq!(expected, &buf[..2]);
assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
}
for (dst, expected) in &[
(X86_64GPReg::RAX, [0x48, 0xC7, 0xC0]),
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
] {
buf.clear();
X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_mov_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
((X86_64GPReg::RAX, X86_64GPReg::RAX), [0x48, 0x89, 0xC0]),
((X86_64GPReg::RAX, X86_64GPReg::R15), [0x4C, 0x89, 0xF8]),
((X86_64GPReg::R15, X86_64GPReg::RAX), [0x49, 0x89, 0xC7]),
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]),
] {
buf.clear();
X86_64Assembler::mov_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_mov_register64bit_stackoffset32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, offset), expected) in &[
((X86_64GPReg::RAX, TEST_I32), [0x48, 0x8B, 0x84, 0x24]),
((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
] {
buf.clear();
X86_64Assembler::mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
assert_eq!(expected, &buf[..4]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
}
}
#[test]
fn test_mov_stackoffset32bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((offset, src), expected) in &[
((TEST_I32, X86_64GPReg::RAX), [0x48, 0x89, 0x84, 0x24]),
((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
] {
buf.clear();
X86_64Assembler::mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
assert_eq!(expected, &buf[..4]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
}
}
#[test]
fn test_neg_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (reg, expected) in &[
(X86_64GPReg::RAX, [0x48, 0xF7, 0xD8]),
(X86_64GPReg::R15, [0x49, 0xF7, 0xDF]),
] {
buf.clear();
X86_64Assembler::neg_register64bit(&mut buf, *reg);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_ret_near() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
X86_64Assembler::ret_near(&mut buf);
assert_eq!(&[0xC3], &buf[..]);
}
#[test]
fn test_sub_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(X86_64GPReg::RAX, [0x48, 0x81, 0xE8]),
(X86_64GPReg::R15, [0x49, 0x81, 0xEF]),
] {
buf.clear();
X86_64Assembler::sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_pop_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(X86_64GPReg::RAX, vec![0x58]),
(X86_64GPReg::R15, vec![0x41, 0x5F]),
] {
buf.clear();
X86_64Assembler::pop_register64bit(&mut buf, *dst);
assert_eq!(&expected[..], &buf[..]);
}
}
#[test]
fn test_push_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (src, expected) in &[
(X86_64GPReg::RAX, vec![0x50]),
(X86_64GPReg::R15, vec![0x41, 0x57]),
] {
buf.clear();
X86_64Assembler::push_register64bit(&mut buf, *src);
assert_eq!(&expected[..], &buf[..]);
}
}
}

View file

@ -19,11 +19,12 @@ use roc_module::low_level::LowLevel;
use roc_module::symbol::{Interns, Symbol}; use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt}; use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt};
use roc_mono::layout::{Builtin, Layout}; use roc_mono::layout::{Builtin, Layout};
use target_lexicon::{BinaryFormat, Triple}; use target_lexicon::Triple;
pub mod elf; mod generic64;
pub mod run_roc; mod object_builder;
pub mod x86_64; pub use object_builder::build_module;
mod run_roc;
pub struct Env<'a> { pub struct Env<'a> {
pub arena: &'a Bump, pub arena: &'a Bump,
@ -35,21 +36,6 @@ pub struct Env<'a> {
// INLINED_SYMBOLS is a set of all of the functions we automatically inline if seen. // INLINED_SYMBOLS is a set of all of the functions we automatically inline if seen.
const INLINED_SYMBOLS: [Symbol; 2] = [Symbol::NUM_ABS, Symbol::NUM_ADD]; const INLINED_SYMBOLS: [Symbol; 2] = [Symbol::NUM_ABS, Symbol::NUM_ADD];
/// build_module is the high level builder/delegator.
/// It takes the request to build a module and output the object file for the module.
pub fn build_module<'a>(
env: &'a Env,
target: &Triple,
procedures: MutMap<(Symbol, Layout<'a>), Proc<'a>>,
) -> Result<Object, String> {
match target.binary_format {
BinaryFormat::Elf => elf::build_module(env, target, procedures),
x => Err(format! {
"the binary format, {:?}, is not yet implemented",
x}),
}
}
// These relocations likely will need a length. // These relocations likely will need a length.
// They may even need more definition, but this should be at least good enough for how we will use elf. // They may even need more definition, but this should be at least good enough for how we will use elf.
enum Relocation<'a> { enum Relocation<'a> {

View file

@ -0,0 +1,154 @@
use crate::generic64::{x86_64, Backend64Bit};
use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
use bumpalo::collections::Vec;
use object::write;
use object::write::{Object, StandardSection, Symbol, SymbolSection};
use object::{
Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind,
SymbolFlags, SymbolKind, SymbolScope,
};
use roc_collections::all::MutMap;
use roc_module::symbol;
use roc_mono::ir::Proc;
use roc_mono::layout::Layout;
use target_lexicon::{Architecture as TargetArch, BinaryFormat as TargetBF, Triple};
const VERSION: &str = env!("CARGO_PKG_VERSION");
/// build_module is the high level builder/delegator.
/// It takes the request to build a module and output the object file for the module.
pub fn build_module<'a>(
env: &'a Env,
target: &Triple,
procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
) -> Result<Object, String> {
let (mut output, mut backend) = match target {
Triple {
architecture: TargetArch::X86_64,
binary_format: TargetBF::Elf,
..
} => {
let backend: Backend64Bit<
x86_64::X86_64GPReg,
x86_64::X86_64Assembler,
x86_64::X86_64SystemV,
> = Backend::new(env, target)?;
Ok((
Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
backend,
))
}
x => Err(format! {
"the target, {:?}, is not yet implemented",
x}),
}?;
let text = output.section_id(StandardSection::Text);
let data_section = output.section_id(StandardSection::Data);
let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
output.append_section_data(
comment,
format!("\0roc dev backend version {} \0", VERSION).as_bytes(),
1,
);
// Setup layout_ids for procedure calls.
let mut layout_ids = roc_mono::layout::LayoutIds::default();
let mut procs = Vec::with_capacity_in(procedures.len(), env.arena);
for ((sym, layout), proc) in procedures {
// This is temporary until we support passing args to functions.
if INLINED_SYMBOLS.contains(&sym) {
continue;
}
let fn_name = layout_ids
.get(sym, &layout)
.to_symbol_string(sym, &env.interns);
let proc_symbol = Symbol {
name: fn_name.as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
// TODO: Depending on whether we are building a static or dynamic lib, this should change.
// We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only.
scope: if env.exposed_to_host.contains(&sym) {
SymbolScope::Dynamic
} else {
SymbolScope::Linkage
},
weak: false,
section: SymbolSection::Section(text),
flags: SymbolFlags::None,
};
let proc_id = output.add_symbol(proc_symbol);
procs.push((fn_name, proc_id, proc));
}
// Build procedures.
for (fn_name, proc_id, proc) in procs {
let mut local_data_index = 0;
let (proc_data, relocations) = backend.build_proc(proc)?;
let proc_offset = output.add_symbol_data(proc_id, text, proc_data, 16);
for reloc in relocations {
let elfreloc = match reloc {
Relocation::LocalData { offset, data } => {
let data_symbol = write::Symbol {
name: format!("{}.data{}", fn_name, local_data_index)
.as_bytes()
.to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Data,
scope: SymbolScope::Compilation,
weak: false,
section: write::SymbolSection::Section(data_section),
flags: SymbolFlags::None,
};
local_data_index += 1;
let data_id = output.add_symbol(data_symbol);
output.add_symbol_data(data_id, data_section, data, 4);
write::Relocation {
offset: offset + proc_offset,
size: 32,
kind: RelocationKind::Relative,
encoding: RelocationEncoding::Generic,
symbol: data_id,
addend: -4,
}
}
Relocation::LinkedData { offset, name } => {
if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
write::Relocation {
offset: offset + proc_offset,
size: 32,
kind: RelocationKind::GotRelative,
encoding: RelocationEncoding::Generic,
symbol: sym_id,
addend: -4,
}
} else {
return Err(format!("failed to find symbol for {:?}", name));
}
}
Relocation::LinkedFunction { offset, name } => {
if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
write::Relocation {
offset: offset + proc_offset,
size: 32,
kind: RelocationKind::PltRelative,
encoding: RelocationEncoding::Generic,
symbol: sym_id,
addend: -4,
}
} else {
return Err(format!("failed to find symbol for {:?}", name));
}
}
};
output
.add_relocation(text, elfreloc)
.map_err(|e| format!("{:?}", e))?;
}
}
Ok(output)
}

View file

@ -1,377 +0,0 @@
use bumpalo::collections::Vec;
// Not sure exactly how I want to represent registers.
// If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub enum GPReg {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
}
const REX: u8 = 0x40;
const REX_W: u8 = REX + 0x8;
fn add_rm_extension(reg: GPReg, byte: u8) -> u8 {
if reg as u8 > 7 {
byte + 1
} else {
byte
}
}
fn add_opcode_extension(reg: GPReg, byte: u8) -> u8 {
add_rm_extension(reg, byte)
}
fn add_reg_extension(reg: GPReg, byte: u8) -> u8 {
if reg as u8 > 7 {
byte + 4
} else {
byte
}
}
// Below here are the functions for all of the assembly instructions.
// Their names are based on the instruction and operators combined.
// You should call `buf.reserve()` if you push or extend more than once.
// Unit tests are added at the bottom of the file to ensure correct asm generation.
// Please keep these in alphanumeric order.
/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
pub fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
// This can be optimized if the immediate is 1 byte.
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `ADD r/m64,r64` -> Add r64 to r/m64.
pub fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
let rex = add_rm_extension(dst, REX_W);
let rex = add_reg_extension(src, rex);
let dst_mod = dst as u8 % 8;
let src_mod = (src as u8 % 8) << 3;
buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
}
/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
pub fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
let rex = add_reg_extension(dst, REX_W);
let rex = add_rm_extension(src, rex);
let dst_mod = (dst as u8 % 8) << 3;
let src_mod = src as u8 % 8;
buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
}
/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
pub fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `MOV r64, imm64` -> Move imm64 to r64.
pub fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64) {
if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
mov_register64bit_immediate32bit(buf, dst, imm as i32)
} else {
let rex = add_opcode_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(10);
buf.extend(&[rex, 0xB8 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
}
/// `MOV r/m64,r64` -> Move r64 to r/m64.
pub fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
let rex = add_rm_extension(dst, REX_W);
let rex = add_reg_extension(src, rex);
let dst_mod = dst as u8 % 8;
let src_mod = (src as u8 % 8) << 3;
buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
}
/// `MOV r64,r/m64` -> Move r/m64 to r64.
pub fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32) {
// This can be optimized based on how many bytes the offset actually is.
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
// Also, this may technically be faster genration since stack operations should be so common.
let rex = add_reg_extension(dst, REX_W);
let dst_mod = (dst as u8 % 8) << 3;
buf.reserve(8);
buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
buf.extend(&offset.to_le_bytes());
}
/// `MOV r/m64,r64` -> Move r64 to r/m64.
pub fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg) {
// This can be optimized based on how many bytes the offset actually is.
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
// Also, this may technically be faster genration since stack operations should be so common.
let rex = add_reg_extension(src, REX_W);
let src_mod = (src as u8 % 8) << 3;
buf.reserve(8);
buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
buf.extend(&offset.to_le_bytes());
}
/// `NEG r/m64` -> Two's complement negate r/m64.
pub fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
let rex = add_rm_extension(reg, REX_W);
let reg_mod = reg as u8 % 8;
buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
}
/// `RET` -> Near return to calling procedure.
pub fn ret_near<'a>(buf: &mut Vec<'a, u8>) {
buf.push(0xC3);
}
/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
pub fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
// This can be optimized if the immediate is 1 byte.
let rex = add_rm_extension(dst, REX_W);
let dst_mod = dst as u8 % 8;
buf.reserve(7);
buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
buf.extend(&imm.to_le_bytes());
}
/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
pub fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
let reg_mod = reg as u8 % 8;
if reg as u8 > 7 {
let rex = add_opcode_extension(reg, REX);
buf.extend(&[rex, 0x58 + reg_mod]);
} else {
buf.push(0x58 + reg_mod);
}
}
/// `PUSH r64` -> Push r64,
pub fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
let reg_mod = reg as u8 % 8;
if reg as u8 > 7 {
let rex = add_opcode_extension(reg, REX);
buf.extend(&[rex, 0x50 + reg_mod]);
} else {
buf.push(0x50 + reg_mod);
}
}
// When writing tests, it is a good idea to test both a number and unnumbered register.
// This is because R8-R15 often have special instruction prefixes.
#[cfg(test)]
mod tests {
use super::*;
const TEST_I32: i32 = 0x12345678;
const TEST_I64: i64 = 0x12345678_9ABCDEF0;
#[test]
fn test_add_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(GPReg::RAX, [0x48, 0x81, 0xC0]),
(GPReg::R15, [0x49, 0x81, 0xC7]),
] {
buf.clear();
add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_add_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
((GPReg::RAX, GPReg::RAX), [0x48, 0x01, 0xC0]),
((GPReg::RAX, GPReg::R15), [0x4C, 0x01, 0xF8]),
((GPReg::R15, GPReg::RAX), [0x49, 0x01, 0xC7]),
((GPReg::R15, GPReg::R15), [0x4D, 0x01, 0xFF]),
] {
buf.clear();
add_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_cmovl_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
((GPReg::RAX, GPReg::RAX), [0x48, 0x0F, 0x4C, 0xC0]),
((GPReg::RAX, GPReg::R15), [0x49, 0x0F, 0x4C, 0xC7]),
((GPReg::R15, GPReg::RAX), [0x4C, 0x0F, 0x4C, 0xF8]),
((GPReg::R15, GPReg::R15), [0x4D, 0x0F, 0x4C, 0xFF]),
] {
buf.clear();
cmovl_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_mov_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(GPReg::RAX, [0x48, 0xC7, 0xC0]),
(GPReg::R15, [0x49, 0xC7, 0xC7]),
] {
buf.clear();
mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_mov_register64bit_immediate64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[(GPReg::RAX, [0x48, 0xB8]), (GPReg::R15, [0x49, 0xBF])] {
buf.clear();
mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
assert_eq!(expected, &buf[..2]);
assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
}
for (dst, expected) in &[
(GPReg::RAX, [0x48, 0xC7, 0xC0]),
(GPReg::R15, [0x49, 0xC7, 0xC7]),
] {
buf.clear();
mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_mov_register64bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, src), expected) in &[
((GPReg::RAX, GPReg::RAX), [0x48, 0x89, 0xC0]),
((GPReg::RAX, GPReg::R15), [0x4C, 0x89, 0xF8]),
((GPReg::R15, GPReg::RAX), [0x49, 0x89, 0xC7]),
((GPReg::R15, GPReg::R15), [0x4D, 0x89, 0xFF]),
] {
buf.clear();
mov_register64bit_register64bit(&mut buf, *dst, *src);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_mov_register64bit_stackoffset32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((dst, offset), expected) in &[
((GPReg::RAX, TEST_I32), [0x48, 0x8B, 0x84, 0x24]),
((GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
] {
buf.clear();
mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
assert_eq!(expected, &buf[..4]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
}
}
#[test]
fn test_mov_stackoffset32bit_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for ((offset, src), expected) in &[
((TEST_I32, GPReg::RAX), [0x48, 0x89, 0x84, 0x24]),
((TEST_I32, GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
] {
buf.clear();
mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
assert_eq!(expected, &buf[..4]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
}
}
#[test]
fn test_neg_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (reg, expected) in &[
(GPReg::RAX, [0x48, 0xF7, 0xD8]),
(GPReg::R15, [0x49, 0xF7, 0xDF]),
] {
buf.clear();
neg_register64bit(&mut buf, *reg);
assert_eq!(expected, &buf[..]);
}
}
#[test]
fn test_ret_near() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
ret_near(&mut buf);
assert_eq!(&[0xC3], &buf[..]);
}
#[test]
fn test_sub_register64bit_immediate32bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[
(GPReg::RAX, [0x48, 0x81, 0xE8]),
(GPReg::R15, [0x49, 0x81, 0xEF]),
] {
buf.clear();
sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
assert_eq!(expected, &buf[..3]);
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
}
}
#[test]
fn test_pop_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (dst, expected) in &[(GPReg::RAX, vec![0x58]), (GPReg::R15, vec![0x41, 0x5F])] {
buf.clear();
pop_register64bit(&mut buf, *dst);
assert_eq!(&expected[..], &buf[..]);
}
}
#[test]
fn test_push_register64bit() {
let arena = bumpalo::Bump::new();
let mut buf = bumpalo::vec![in &arena];
for (src, expected) in &[(GPReg::RAX, vec![0x50]), (GPReg::R15, vec![0x41, 0x57])] {
buf.clear();
push_register64bit(&mut buf, *src);
assert_eq!(&expected[..], &buf[..]);
}
}
}