Major refactor for generic 64 bit with traits

2025-08-22 12:54:08 +00:00 · 2020-11-23 00:16:42 -08:00 · 2020-11-23 00:16:42 -08:00 · 3430a08d3d
commit 3430a08d3d
parent c8dbcdcf64
6 changed files with 848 additions and 723 deletions
--- a/compiler/gen_dev/src/elf.rs
+++ b/compiler/gen_dev/src/elf.rs
@ -1,142 +0,0 @@
 use crate::x86_64::X86_64Backend;
 use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
 use bumpalo::collections::Vec;
 use object::write;
 use object::write::{Object, StandardSection, Symbol, SymbolSection};
 use object::{
    Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind,
    SymbolFlags, SymbolKind, SymbolScope,
 };
 use roc_collections::all::MutMap;
 use roc_module::symbol;
 use roc_mono::ir::Proc;
 use roc_mono::layout::Layout;
 use target_lexicon::Triple;
 const VERSION: &str = env!("CARGO_PKG_VERSION");
 pub fn build_module<'a>(
    env: &'a Env,
    target: &Triple,
    procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
 ) -> Result<Object, String> {
    match target.architecture {
        target_lexicon::Architecture::X86_64 => {
            let mut output =
                Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little);
            let text = output.section_id(StandardSection::Text);
            let data_section = output.section_id(StandardSection::Data);
            let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
            output.append_section_data(
                comment,
                format!("\0roc dev backend version {} \0", VERSION).as_bytes(),
                1,
            );
            // Setup layout_ids for procedure calls.
            let mut layout_ids = roc_mono::layout::LayoutIds::default();
            let mut procs = Vec::with_capacity_in(procedures.len(), env.arena);
            for ((sym, layout), proc) in procedures {
                // This is temporary until we support passing args to functions.
                if INLINED_SYMBOLS.contains(&sym) {
                    continue;
                }
                let fn_name = layout_ids
                    .get(sym, &layout)
                    .to_symbol_string(sym, &env.interns);
                let proc_symbol = Symbol {
                    name: fn_name.as_bytes().to_vec(),
                    value: 0,
                    size: 0,
                    kind: SymbolKind::Text,
                    // TODO: Depending on whether we are building a static or dynamic lib, this should change.
                    // We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only.
                    scope: if env.exposed_to_host.contains(&sym) {
                        SymbolScope::Dynamic
                    } else {
                        SymbolScope::Linkage
                    },
                    weak: false,
                    section: SymbolSection::Section(text),
                    flags: SymbolFlags::None,
                };
                let proc_id = output.add_symbol(proc_symbol);
                procs.push((fn_name, proc_id, proc));
            }
            // Build procedures.
            let mut backend: X86_64Backend = Backend::new(env, target)?;
            for (fn_name, proc_id, proc) in procs {
                let mut local_data_index = 0;
                let (proc_data, relocations) = backend.build_proc(proc)?;
                let proc_offset = output.add_symbol_data(proc_id, text, proc_data, 16);
                for reloc in relocations {
                    let elfreloc = match reloc {
                        Relocation::LocalData { offset, data } => {
                            let data_symbol = write::Symbol {
                                name: format!("{}.data{}", fn_name, local_data_index)
                                    .as_bytes()
                                    .to_vec(),
                                value: 0,
                                size: 0,
                                kind: SymbolKind::Data,
                                scope: SymbolScope::Compilation,
                                weak: false,
                                section: write::SymbolSection::Section(data_section),
                                flags: SymbolFlags::None,
                            };
                            local_data_index += 1;
                            let data_id = output.add_symbol(data_symbol);
                            output.add_symbol_data(data_id, data_section, data, 4);
                            write::Relocation {
                                offset: *offset + proc_offset,
                                size: 32,
                                kind: RelocationKind::Relative,
                                encoding: RelocationEncoding::Generic,
                                symbol: data_id,
                                addend: -4,
                            }
                        }
                        Relocation::LinkedData { offset, name } => {
                            if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
                                write::Relocation {
                                    offset: *offset + proc_offset,
                                    size: 32,
                                    kind: RelocationKind::GotRelative,
                                    encoding: RelocationEncoding::Generic,
                                    symbol: sym_id,
                                    addend: -4,
                                }
                            } else {
                                return Err(format!("failed to find symbol for {:?}", name));
                            }
                        }
                        Relocation::LinkedFunction { offset, name } => {
                            if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
                                write::Relocation {
                                    offset: *offset + proc_offset,
                                    size: 32,
                                    kind: RelocationKind::PltRelative,
                                    encoding: RelocationEncoding::Generic,
                                    symbol: sym_id,
                                    addend: -4,
                                }
                            } else {
                                return Err(format!("failed to find symbol for {:?}", name));
                            }
                        }
                    };
                    output
                        .add_relocation(text, elfreloc)
                        .map_err(|e| format!("{:?}", e))?;
                }
            }
            Ok(output)
        }
        x => Err(format! {
        "the architecture, {:?}, is not yet implemented for elf",
        x}),
    }
 }
--- a/compiler/gen_dev/src/generic64/mod.rs
+++ b/compiler/gen_dev/src/generic64/mod.rs
@ -3,21 +3,59 @@ use bumpalo::collections::Vec;
 use roc_collections::all::{ImSet, MutMap, MutSet};
 use roc_module::symbol::Symbol;
 use roc_mono::ir::{Literal, Stmt};
 use std::marker::PhantomData;
 use target_lexicon::{CallingConvention, Triple};
-mod asm;
+pub mod x86_64;
 use asm::GPReg;
-#[derive(Clone, Debug, PartialEq)]
+pub trait CallConv<GPReg> {
-enum SymbolStorage {
+    fn gp_param_regs() -> &'static [GPReg];
-    // These may need layout, but I am not sure.
+    fn gp_return_regs() -> &'static [GPReg];
-    // I think whenever a symbol would be used, we specify layout anyways.
+    fn gp_default_free_regs() -> &'static [GPReg];
-    GPReg(GPReg),
+
-    Stack(i32),
+    // A linear scan of an array may be faster than a set technically.
-    StackAndGPReg(GPReg, i32),
+    // That being said, fastest would likely be a trait based on calling convention/register.
    fn caller_saved_regs() -> ImSet<GPReg>;
    fn callee_saved_regs() -> ImSet<GPReg>;
    fn stack_pointer() -> GPReg;
    fn frame_pointer() -> GPReg;
    fn shadow_space_size() -> u8;
    // It may be worth ignoring the red zone and keeping things simpler.
    fn red_zone_size() -> u8;
 }
-pub struct X86_64Backend<'a> {
+pub trait Assembler<GPReg> {
    fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
    fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
    fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
    fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
    fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
    fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
    fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
    fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
    fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
    fn ret_near<'a>(buf: &mut Vec<'a, u8>);
    fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
    fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
    fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
 }
 #[derive(Clone, Debug, PartialEq)]
 enum SymbolStorage<GPReg> {
    // These may need layout, but I am not sure.
    // I think whenever a symbol would be used, we specify layout anyways.
    GPRegeg(GPReg),
    Stack(i32),
    StackAndGPRegeg(GPReg, i32),
 }
 pub trait GPRegTrait: Copy + Eq + std::hash::Hash + std::fmt::Debug + 'static {}
 pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> {
    phantom_asm: PhantomData<ASM>,
    phantom_cc: PhantomData<CC>,
    env: &'a Env<'a>,
    buf: Vec<'a, u8>,
@ -27,15 +65,11 @@ pub struct X86_64Backend<'a> {
    last_seen_map: MutMap<Symbol, *const Stmt<'a>>,
    free_map: MutMap<*const Stmt<'a>, Vec<'a, Symbol>>,
-    symbols_map: MutMap<Symbol, SymbolStorage>,
+    symbols_map: MutMap<Symbol, SymbolStorage<GPReg>>,
    literal_map: MutMap<Symbol, Literal<'a>>,
    gp_param_regs: &'static [GPReg],
    gp_return_regs: &'static [GPReg],
    // This should probably be smarter than a vec.
    // There are certain registers we should always use first. With pushing and poping, this could get mixed.
    gp_default_free_regs: &'static [GPReg],
    gp_free_regs: Vec<'a, GPReg>,
    // The last major thing we need is a way to decide what reg to free when all of them are full.
@ -44,22 +78,18 @@ pub struct X86_64Backend<'a> {
    gp_used_regs: Vec<'a, (GPReg, Symbol)>,
    stack_size: i32,
    shadow_space_size: u8,
    red_zone_size: u8,
    // A linear scan of an array may be faster than a set technically.
    // That being said, fastest would likely be a trait based on calling convention/register.
    caller_saved_regs: ImSet<GPReg>,
    callee_saved_regs: ImSet<GPReg>,
    // used callee saved regs must be tracked for pushing and popping at the beginning/end of the function.
    used_callee_saved_regs: MutSet<GPReg>,
 }
-impl<'a> Backend<'a> for X86_64Backend<'a> {
+impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<'a>
-    fn new(env: &'a Env, target: &Triple) -> Result<Self, String> {
+    for Backend64Bit<'a, GPReg, ASM, CC>
-        match target.default_calling_convention() {
+{
-            Ok(CallingConvention::SystemV) => Ok(X86_64Backend {
+    fn new(env: &'a Env, _target: &Triple) -> Result<Self, String> {
        Ok(Backend64Bit {
            phantom_asm: PhantomData,
            phantom_cc: PhantomData,
            env,
            leaf_function: true,
            buf: bumpalo::vec!(in env.arena),
@ -67,126 +97,11 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
            free_map: MutMap::default(),
            symbols_map: MutMap::default(),
            literal_map: MutMap::default(),
                gp_param_regs: &[
                    GPReg::RDI,
                    GPReg::RSI,
                    GPReg::RDX,
                    GPReg::RCX,
                    GPReg::R8,
                    GPReg::R9,
                ],
                gp_return_regs: &[GPReg::RAX, GPReg::RDX],
                gp_default_free_regs: &[
                    // The regs we want to use first should be at the end of this vec.
                    // We will use pop to get which reg to use next
                    // Use callee saved regs last.
                    GPReg::RBX,
                    // Don't use frame pointer: GPReg::RBP,
                    GPReg::R12,
                    GPReg::R13,
                    GPReg::R14,
                    GPReg::R15,
                    // Use caller saved regs first.
                    GPReg::RAX,
                    GPReg::RCX,
                    GPReg::RDX,
                    // Don't use stack pionter: GPReg::RSP,
                    GPReg::RSI,
                    GPReg::RDI,
                    GPReg::R8,
                    GPReg::R9,
                    GPReg::R10,
                    GPReg::R11,
                ],
            gp_free_regs: bumpalo::vec![in env.arena],
            gp_used_regs: bumpalo::vec![in env.arena],
            stack_size: 0,
                shadow_space_size: 0,
                red_zone_size: 128,
                // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
                caller_saved_regs: ImSet::from(vec![
                    GPReg::RAX,
                    GPReg::RCX,
                    GPReg::RDX,
                    GPReg::RSP,
                    GPReg::RSI,
                    GPReg::RDI,
                    GPReg::R8,
                    GPReg::R9,
                    GPReg::R10,
                    GPReg::R11,
                ]),
                callee_saved_regs: ImSet::from(vec![
                    GPReg::RBX,
                    GPReg::RBP,
                    GPReg::R12,
                    GPReg::R13,
                    GPReg::R14,
                    GPReg::R15,
                ]),
            used_callee_saved_regs: MutSet::default(),
-            }),
+        })
            Ok(CallingConvention::WindowsFastcall) => Ok(X86_64Backend {
                env,
                leaf_function: true,
                buf: bumpalo::vec!(in env.arena),
                last_seen_map: MutMap::default(),
                free_map: MutMap::default(),
                symbols_map: MutMap::default(),
                literal_map: MutMap::default(),
                gp_param_regs: &[GPReg::RCX, GPReg::RDX, GPReg::R8, GPReg::R9],
                gp_return_regs: &[GPReg::RAX],
                gp_default_free_regs: &[
                    // The regs we want to use first should be at the end of this vec.
                    // We will use pop to get which reg to use next
                    // Use callee saved regs last.
                    GPReg::RBX,
                    // Don't use frame pointer: GPReg::RBP,
                    GPReg::RSI,
                    // Don't use stack pionter: GPReg::RSP,
                    GPReg::RDI,
                    GPReg::R12,
                    GPReg::R13,
                    GPReg::R14,
                    GPReg::R15,
                    // Use caller saved regs first.
                    GPReg::RAX,
                    GPReg::RCX,
                    GPReg::RDX,
                    GPReg::R8,
                    GPReg::R9,
                    GPReg::R10,
                    GPReg::R11,
                ],
                gp_free_regs: bumpalo::vec![in env.arena],
                gp_used_regs: bumpalo::vec![in env.arena],
                stack_size: 0,
                shadow_space_size: 32,
                red_zone_size: 0,
                caller_saved_regs: ImSet::from(vec![
                    GPReg::RAX,
                    GPReg::RCX,
                    GPReg::RDX,
                    GPReg::R8,
                    GPReg::R9,
                    GPReg::R10,
                    GPReg::R11,
                ]),
                callee_saved_regs: ImSet::from(vec![
                    GPReg::RBX,
                    GPReg::RBP,
                    GPReg::RSI,
                    GPReg::RSP,
                    GPReg::RDI,
                    GPReg::R12,
                    GPReg::R13,
                    GPReg::R14,
                    GPReg::R15,
                ]),
                used_callee_saved_regs: MutSet::default(),
            }),
            x => Err(format!("unsupported backend: {:?}", x)),
        }
    }
    fn env(&self) -> &'a Env<'a> {
@ -194,7 +109,7 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
    }
    fn reset(&mut self) {
-        self.stack_size = -(self.red_zone_size as i32);
+        self.stack_size = -(CC::red_zone_size() as i32);
        self.leaf_function = true;
        self.last_seen_map.clear();
        self.free_map.clear();
@ -204,13 +119,13 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
        self.gp_free_regs.clear();
        self.gp_used_regs.clear();
        self.gp_free_regs
-            .extend_from_slice(self.gp_default_free_regs);
+            .extend_from_slice(CC::gp_default_free_regs());
    }
    fn set_not_leaf_function(&mut self) {
        self.leaf_function = true;
        // If this is not a leaf function, it can't use the shadow space.
-        self.stack_size = self.shadow_space_size as i32 - self.red_zone_size as i32;
+        self.stack_size = CC::shadow_space_size() as i32 - CC::red_zone_size() as i32;
    }
    fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> {
@ -233,33 +148,38 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
        let mut out = bumpalo::vec![in self.env.arena];
        if !self.leaf_function {
-            asm::push_register64bit(&mut out, GPReg::RBP);
+            // I believe that this will have to move away from push and to mov to be generic across backends.
-            asm::mov_register64bit_register64bit(&mut out, GPReg::RBP, GPReg::RSP);
+            ASM::push_register64bit(&mut out, CC::frame_pointer());
            ASM::mov_register64bit_register64bit(
                &mut out,
                CC::frame_pointer(),
                CC::stack_pointer(),
            );
        }
        // Save data in all callee saved regs.
        let mut pop_order = bumpalo::vec![in self.env.arena];
        for reg in &self.used_callee_saved_regs {
-            asm::push_register64bit(&mut out, *reg);
+            ASM::push_register64bit(&mut out, *reg);
            pop_order.push(*reg);
        }
        if self.stack_size > 0 {
-            asm::sub_register64bit_immediate32bit(&mut out, GPReg::RSP, self.stack_size);
+            ASM::sub_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
        }
        // Add function body.
        out.extend(&self.buf);
        if self.stack_size > 0 {
-            asm::add_register64bit_immediate32bit(&mut out, GPReg::RSP, self.stack_size);
+            ASM::add_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
        }
        // Restore data in callee saved regs.
        while let Some(reg) = pop_order.pop() {
-            asm::pop_register64bit(&mut out, reg);
+            ASM::pop_register64bit(&mut out, reg);
        }
        if !self.leaf_function {
-            asm::pop_register64bit(&mut out, GPReg::RBP);
+            ASM::pop_register64bit(&mut out, CC::frame_pointer());
        }
-        asm::ret_near(&mut out);
+        ASM::ret_near(&mut out);
        Ok((out.into_bump_slice(), &[]))
    }
@ -267,9 +187,9 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
    fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
        let dst_reg = self.claim_gp_reg(dst)?;
        let src_reg = self.load_to_reg(src)?;
-        asm::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
+        ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
-        asm::neg_register64bit(&mut self.buf, dst_reg);
+        ASM::neg_register64bit(&mut self.buf, dst_reg);
-        asm::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
+        ASM::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
        Ok(())
    }
@ -281,9 +201,9 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
    ) -> Result<(), String> {
        let dst_reg = self.claim_gp_reg(dst)?;
        let src1_reg = self.load_to_reg(src1)?;
-        asm::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg);
+        ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg);
        let src2_reg = self.load_to_reg(src2)?;
-        asm::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg);
+        ASM::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg);
        Ok(())
    }
@ -292,7 +212,7 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
            Literal::Int(x) => {
                let reg = self.claim_gp_reg(sym)?;
                let val = *x;
-                asm::mov_register64bit_immediate64bit(&mut self.buf, reg, val);
+                ASM::mov_register64bit_immediate64bit(&mut self.buf, reg, val);
                Ok(())
            }
            x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
@ -314,11 +234,11 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
    fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
        let val = self.symbols_map.get(sym);
        match val {
-            Some(SymbolStorage::GPReg(reg)) if *reg == self.gp_return_regs[0] => Ok(()),
+            Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::gp_return_regs()[0] => Ok(()),
-            Some(SymbolStorage::GPReg(reg)) => {
+            Some(SymbolStorage::GPRegeg(reg)) => {
                // If it fits in a general purpose register, just copy it over to.
                // Technically this can be optimized to produce shorter instructions if less than 64bits.
-                asm::mov_register64bit_register64bit(&mut self.buf, self.gp_return_regs[0], *reg);
+                ASM::mov_register64bit_register64bit(&mut self.buf, CC::gp_return_regs()[0], *reg);
                Ok(())
            }
            Some(x) => Err(format!(
@ -332,11 +252,13 @@ impl<'a> Backend<'a> for X86_64Backend<'a> {
 /// This impl block is for ir related instructions that need backend specific information.
 /// For example, loading a symbol for doing a computation.
-impl<'a> X86_64Backend<'a> {
+impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
    Backend64Bit<'a, GPReg, ASM, CC>
 {
    fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
        let reg = if !self.gp_free_regs.is_empty() {
            let free_reg = self.gp_free_regs.pop().unwrap();
-            if self.callee_saved_regs.contains(&free_reg) {
+            if CC::callee_saved_regs().contains(&free_reg) {
                self.used_callee_saved_regs.insert(free_reg);
            }
            Ok(free_reg)
@ -349,27 +271,27 @@ impl<'a> X86_64Backend<'a> {
        }?;
        self.gp_used_regs.push((reg, *sym));
-        self.symbols_map.insert(*sym, SymbolStorage::GPReg(reg));
+        self.symbols_map.insert(*sym, SymbolStorage::GPRegeg(reg));
        Ok(reg)
    }
    fn load_to_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
        let val = self.symbols_map.remove(sym);
        match val {
-            Some(SymbolStorage::GPReg(reg)) => {
+            Some(SymbolStorage::GPRegeg(reg)) => {
-                self.symbols_map.insert(*sym, SymbolStorage::GPReg(reg));
+                self.symbols_map.insert(*sym, SymbolStorage::GPRegeg(reg));
                Ok(reg)
            }
-            Some(SymbolStorage::StackAndGPReg(reg, offset)) => {
+            Some(SymbolStorage::StackAndGPRegeg(reg, offset)) => {
                self.symbols_map
-                    .insert(*sym, SymbolStorage::StackAndGPReg(reg, offset));
+                    .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
                Ok(reg)
            }
            Some(SymbolStorage::Stack(offset)) => {
                let reg = self.claim_gp_reg(sym)?;
                self.symbols_map
-                    .insert(*sym, SymbolStorage::StackAndGPReg(reg, offset));
+                    .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
-                asm::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32);
+                ASM::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32);
                Ok(reg)
            }
            None => Err(format!("Unknown symbol: {}", sym)),
@ -379,7 +301,7 @@ impl<'a> X86_64Backend<'a> {
    fn free_to_stack(&mut self, sym: &Symbol) -> Result<(), String> {
        let val = self.symbols_map.remove(sym);
        match val {
-            Some(SymbolStorage::GPReg(reg)) => {
+            Some(SymbolStorage::GPRegeg(reg)) => {
                let offset = self.stack_size;
                self.stack_size += 8;
                if let Some(size) = self.stack_size.checked_add(8) {
@ -390,12 +312,12 @@ impl<'a> X86_64Backend<'a> {
                        sym
                    ));
                }
-                asm::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg);
+                ASM::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg);
                self.symbols_map
                    .insert(*sym, SymbolStorage::Stack(offset as i32));
                Ok(())
            }
-            Some(SymbolStorage::StackAndGPReg(_, offset)) => {
+            Some(SymbolStorage::StackAndGPRegeg(_, offset)) => {
                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
                Ok(())
            }
--- a/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/compiler/gen_dev/src/generic64/x86_64.rs
@ -0,0 +1,582 @@
 use crate::generic64::{Assembler, CallConv, GPRegTrait};
 use bumpalo::collections::Vec;
 use roc_collections::all::ImSet;
 // Not sure exactly how I want to represent registers.
 // If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
 pub enum X86_64GPReg {
    RAX = 0,
    RCX = 1,
    RDX = 2,
    RBX = 3,
    RSP = 4,
    RBP = 5,
    RSI = 6,
    RDI = 7,
    R8 = 8,
    R9 = 9,
    R10 = 10,
    R11 = 11,
    R12 = 12,
    R13 = 13,
    R14 = 14,
    R15 = 15,
 }
 impl GPRegTrait for X86_64GPReg {}
 const REX: u8 = 0x40;
 const REX_W: u8 = REX + 0x8;
 fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 1
    } else {
        byte
    }
 }
 fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    add_rm_extension(reg, byte)
 }
 fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 4
    } else {
        byte
    }
 }
 pub struct X86_64Assembler {}
 pub struct X86_64WindowsFastcall {}
 pub struct X86_64SystemV {}
 impl CallConv<X86_64GPReg> for X86_64SystemV {
    fn gp_param_regs() -> &'static [X86_64GPReg] {
        &[
            X86_64GPReg::RDI,
            X86_64GPReg::RSI,
            X86_64GPReg::RDX,
            X86_64GPReg::RCX,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
        ]
    }
    fn gp_return_regs() -> &'static [X86_64GPReg] {
        &[X86_64GPReg::RAX, X86_64GPReg::RDX]
    }
    fn gp_default_free_regs() -> &'static [X86_64GPReg] {
        &[
            // The regs we want to use first should be at the end of this vec.
            // We will use pop to get which reg to use next
            // Use callee saved regs last.
            X86_64GPReg::RBX,
            // Don't use frame pointer: X86_64GPReg::RBP,
            X86_64GPReg::R12,
            X86_64GPReg::R13,
            X86_64GPReg::R14,
            X86_64GPReg::R15,
            // Use caller saved regs first.
            X86_64GPReg::RAX,
            X86_64GPReg::RCX,
            X86_64GPReg::RDX,
            // Don't use stack pionter: X86_64GPReg::RSP,
            X86_64GPReg::RSI,
            X86_64GPReg::RDI,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
            X86_64GPReg::R10,
            X86_64GPReg::R11,
        ]
    }
    fn caller_saved_regs() -> ImSet<X86_64GPReg> {
        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
        ImSet::from(vec![
            X86_64GPReg::RAX,
            X86_64GPReg::RCX,
            X86_64GPReg::RDX,
            X86_64GPReg::RSP,
            X86_64GPReg::RSI,
            X86_64GPReg::RDI,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
            X86_64GPReg::R10,
            X86_64GPReg::R11,
        ])
    }
    fn callee_saved_regs() -> ImSet<X86_64GPReg> {
        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
        ImSet::from(vec![
            X86_64GPReg::RBX,
            X86_64GPReg::RBP,
            X86_64GPReg::R12,
            X86_64GPReg::R13,
            X86_64GPReg::R14,
            X86_64GPReg::R15,
        ])
    }
    fn stack_pointer() -> X86_64GPReg {
        X86_64GPReg::RSP
    }
    fn frame_pointer() -> X86_64GPReg {
        X86_64GPReg::RBP
    }
    fn shadow_space_size() -> u8 {
        0
    }
    fn red_zone_size() -> u8 {
        128
    }
 }
 impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
    fn gp_param_regs() -> &'static [X86_64GPReg] {
        &[
            X86_64GPReg::RCX,
            X86_64GPReg::RDX,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
        ]
    }
    fn gp_return_regs() -> &'static [X86_64GPReg] {
        &[X86_64GPReg::RAX]
    }
    fn gp_default_free_regs() -> &'static [X86_64GPReg] {
        &[
            // The regs we want to use first should be at the end of this vec.
            // We will use pop to get which reg to use next
            // Use callee saved regs last.
            X86_64GPReg::RBX,
            // Don't use frame pointer: X86_64GPReg::RBP,
            X86_64GPReg::RSI,
            // Don't use stack pionter: X86_64GPReg::RSP,
            X86_64GPReg::RDI,
            X86_64GPReg::R12,
            X86_64GPReg::R13,
            X86_64GPReg::R14,
            X86_64GPReg::R15,
            // Use caller saved regs first.
            X86_64GPReg::RAX,
            X86_64GPReg::RCX,
            X86_64GPReg::RDX,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
            X86_64GPReg::R10,
            X86_64GPReg::R11,
        ]
    }
    fn caller_saved_regs() -> ImSet<X86_64GPReg> {
        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
        ImSet::from(vec![
            X86_64GPReg::RAX,
            X86_64GPReg::RCX,
            X86_64GPReg::RDX,
            X86_64GPReg::R8,
            X86_64GPReg::R9,
            X86_64GPReg::R10,
            X86_64GPReg::R11,
        ])
    }
    fn callee_saved_regs() -> ImSet<X86_64GPReg> {
        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
        ImSet::from(vec![
            X86_64GPReg::RBX,
            X86_64GPReg::RBP,
            X86_64GPReg::RSI,
            X86_64GPReg::RSP,
            X86_64GPReg::RDI,
            X86_64GPReg::R12,
            X86_64GPReg::R13,
            X86_64GPReg::R14,
            X86_64GPReg::R15,
        ])
    }
    fn stack_pointer() -> X86_64GPReg {
        X86_64GPReg::RSP
    }
    fn frame_pointer() -> X86_64GPReg {
        X86_64GPReg::RBP
    }
    fn shadow_space_size() -> u8 {
        32
    }
    fn red_zone_size() -> u8 {
        0
    }
 }
 impl Assembler<X86_64GPReg> for X86_64Assembler {
    // Below here are the functions for all of the assembly instructions.
    // Their names are based on the instruction and operators combined.
    // You should call `buf.reserve()` if you push or extend more than once.
    // Unit tests are added at the bottom of the file to ensure correct asm generation.
    // Please keep these in alphanumeric order.
    /// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
    fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
        // This can be optimized if the immediate is 1 byte.
        let rex = add_rm_extension(dst, REX_W);
        let dst_mod = dst as u8 % 8;
        buf.reserve(7);
        buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
        buf.extend(&imm.to_le_bytes());
    }
    /// `ADD r/m64,r64` -> Add r64 to r/m64.
    fn add_register64bit_register64bit<'a>(
        buf: &mut Vec<'a, u8>,
        dst: X86_64GPReg,
        src: X86_64GPReg,
    ) {
        let rex = add_rm_extension(dst, REX_W);
        let rex = add_reg_extension(src, rex);
        let dst_mod = dst as u8 % 8;
        let src_mod = (src as u8 % 8) << 3;
        buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
    }
    /// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
    fn cmovl_register64bit_register64bit<'a>(
        buf: &mut Vec<'a, u8>,
        dst: X86_64GPReg,
        src: X86_64GPReg,
    ) {
        let rex = add_reg_extension(dst, REX_W);
        let rex = add_rm_extension(src, rex);
        let dst_mod = (dst as u8 % 8) << 3;
        let src_mod = src as u8 % 8;
        buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
    }
    /// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
    fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
        let rex = add_rm_extension(dst, REX_W);
        let dst_mod = dst as u8 % 8;
        buf.reserve(7);
        buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
        buf.extend(&imm.to_le_bytes());
    }
    /// `MOV r64, imm64` -> Move imm64 to r64.
    fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
        if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
            Self::mov_register64bit_immediate32bit(buf, dst, imm as i32)
        } else {
            let rex = add_opcode_extension(dst, REX_W);
            let dst_mod = dst as u8 % 8;
            buf.reserve(10);
            buf.extend(&[rex, 0xB8 + dst_mod]);
            buf.extend(&imm.to_le_bytes());
        }
    }
    /// `MOV r/m64,r64` -> Move r64 to r/m64.
    fn mov_register64bit_register64bit<'a>(
        buf: &mut Vec<'a, u8>,
        dst: X86_64GPReg,
        src: X86_64GPReg,
    ) {
        let rex = add_rm_extension(dst, REX_W);
        let rex = add_reg_extension(src, rex);
        let dst_mod = dst as u8 % 8;
        let src_mod = (src as u8 % 8) << 3;
        buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
    }
    /// `MOV r64,r/m64` -> Move r/m64 to r64.
    fn mov_register64bit_stackoffset32bit<'a>(
        buf: &mut Vec<'a, u8>,
        dst: X86_64GPReg,
        offset: i32,
    ) {
        // This can be optimized based on how many bytes the offset actually is.
        // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
        // Also, this may technically be faster genration since stack operations should be so common.
        let rex = add_reg_extension(dst, REX_W);
        let dst_mod = (dst as u8 % 8) << 3;
        buf.reserve(8);
        buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
        buf.extend(&offset.to_le_bytes());
    }
    /// `MOV r/m64,r64` -> Move r64 to r/m64.
    fn mov_stackoffset32bit_register64bit<'a>(
        buf: &mut Vec<'a, u8>,
        offset: i32,
        src: X86_64GPReg,
    ) {
        // This can be optimized based on how many bytes the offset actually is.
        // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
        // Also, this may technically be faster genration since stack operations should be so common.
        let rex = add_reg_extension(src, REX_W);
        let src_mod = (src as u8 % 8) << 3;
        buf.reserve(8);
        buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
        buf.extend(&offset.to_le_bytes());
    }
    /// `NEG r/m64` -> Two's complement negate r/m64.
    fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
        let rex = add_rm_extension(reg, REX_W);
        let reg_mod = reg as u8 % 8;
        buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
    }
    /// `RET` -> Near return to calling procedure.
    fn ret_near<'a>(buf: &mut Vec<'a, u8>) {
        buf.push(0xC3);
    }
    /// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
    fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
        // This can be optimized if the immediate is 1 byte.
        let rex = add_rm_extension(dst, REX_W);
        let dst_mod = dst as u8 % 8;
        buf.reserve(7);
        buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
        buf.extend(&imm.to_le_bytes());
    }
    /// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
    fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
        let reg_mod = reg as u8 % 8;
        if reg as u8 > 7 {
            let rex = add_opcode_extension(reg, REX);
            buf.extend(&[rex, 0x58 + reg_mod]);
        } else {
            buf.push(0x58 + reg_mod);
        }
    }
    /// `PUSH r64` -> Push r64,
    fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
        let reg_mod = reg as u8 % 8;
        if reg as u8 > 7 {
            let rex = add_opcode_extension(reg, REX);
            buf.extend(&[rex, 0x50 + reg_mod]);
        } else {
            buf.push(0x50 + reg_mod);
        }
    }
 }
 // When writing tests, it is a good idea to test both a number and unnumbered register.
 // This is because R8-R15 often have special instruction prefixes.
 #[cfg(test)]
 mod tests {
    use super::*;
    const TEST_I32: i32 = 0x12345678;
    const TEST_I64: i64 = 0x12345678_9ABCDEF0;
    #[test]
    fn test_add_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0x81, 0xC0]),
            (X86_64GPReg::R15, [0x49, 0x81, 0xC7]),
        ] {
            buf.clear();
            X86_64Assembler::add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_add_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            ((X86_64GPReg::RAX, X86_64GPReg::RAX), [0x48, 0x01, 0xC0]),
            ((X86_64GPReg::RAX, X86_64GPReg::R15), [0x4C, 0x01, 0xF8]),
            ((X86_64GPReg::R15, X86_64GPReg::RAX), [0x49, 0x01, 0xC7]),
            ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]),
        ] {
            buf.clear();
            X86_64Assembler::add_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_cmovl_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            (
                (X86_64GPReg::RAX, X86_64GPReg::RAX),
                [0x48, 0x0F, 0x4C, 0xC0],
            ),
            (
                (X86_64GPReg::RAX, X86_64GPReg::R15),
                [0x49, 0x0F, 0x4C, 0xC7],
            ),
            (
                (X86_64GPReg::R15, X86_64GPReg::RAX),
                [0x4C, 0x0F, 0x4C, 0xF8],
            ),
            (
                (X86_64GPReg::R15, X86_64GPReg::R15),
                [0x4D, 0x0F, 0x4C, 0xFF],
            ),
        ] {
            buf.clear();
            X86_64Assembler::cmovl_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_mov_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0xC7, 0xC0]),
            (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
            X86_64Assembler::mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_mov_register64bit_immediate64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0xB8]),
            (X86_64GPReg::R15, [0x49, 0xBF]),
        ] {
            buf.clear();
            X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
            assert_eq!(expected, &buf[..2]);
            assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
        }
        for (dst, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0xC7, 0xC0]),
            (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
            X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_mov_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            ((X86_64GPReg::RAX, X86_64GPReg::RAX), [0x48, 0x89, 0xC0]),
            ((X86_64GPReg::RAX, X86_64GPReg::R15), [0x4C, 0x89, 0xF8]),
            ((X86_64GPReg::R15, X86_64GPReg::RAX), [0x49, 0x89, 0xC7]),
            ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]),
        ] {
            buf.clear();
            X86_64Assembler::mov_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_mov_register64bit_stackoffset32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, offset), expected) in &[
            ((X86_64GPReg::RAX, TEST_I32), [0x48, 0x8B, 0x84, 0x24]),
            ((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
        ] {
            buf.clear();
            X86_64Assembler::mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }
    #[test]
    fn test_mov_stackoffset32bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((offset, src), expected) in &[
            ((TEST_I32, X86_64GPReg::RAX), [0x48, 0x89, 0x84, 0x24]),
            ((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
        ] {
            buf.clear();
            X86_64Assembler::mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }
    #[test]
    fn test_neg_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (reg, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0xF7, 0xD8]),
            (X86_64GPReg::R15, [0x49, 0xF7, 0xDF]),
        ] {
            buf.clear();
            X86_64Assembler::neg_register64bit(&mut buf, *reg);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_ret_near() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        X86_64Assembler::ret_near(&mut buf);
        assert_eq!(&[0xC3], &buf[..]);
    }
    #[test]
    fn test_sub_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (X86_64GPReg::RAX, [0x48, 0x81, 0xE8]),
            (X86_64GPReg::R15, [0x49, 0x81, 0xEF]),
        ] {
            buf.clear();
            X86_64Assembler::sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_pop_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (X86_64GPReg::RAX, vec![0x58]),
            (X86_64GPReg::R15, vec![0x41, 0x5F]),
        ] {
            buf.clear();
            X86_64Assembler::pop_register64bit(&mut buf, *dst);
            assert_eq!(&expected[..], &buf[..]);
        }
    }
    #[test]
    fn test_push_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (src, expected) in &[
            (X86_64GPReg::RAX, vec![0x50]),
            (X86_64GPReg::R15, vec![0x41, 0x57]),
        ] {
            buf.clear();
            X86_64Assembler::push_register64bit(&mut buf, *src);
            assert_eq!(&expected[..], &buf[..]);
        }
    }
 }
--- a/compiler/gen_dev/src/lib.rs
+++ b/compiler/gen_dev/src/lib.rs
@ -19,11 +19,12 @@ use roc_module::low_level::LowLevel;
 use roc_module::symbol::{Interns, Symbol};
 use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt};
 use roc_mono::layout::{Builtin, Layout};
-use target_lexicon::{BinaryFormat, Triple};
+use target_lexicon::Triple;
-pub mod elf;
+mod generic64;
-pub mod run_roc;
+mod object_builder;
-pub mod x86_64;
+pub use object_builder::build_module;
 mod run_roc;
 pub struct Env<'a> {
    pub arena: &'a Bump,
@ -35,21 +36,6 @@ pub struct Env<'a> {
 // INLINED_SYMBOLS is a set of all of the functions we automatically inline if seen.
 const INLINED_SYMBOLS: [Symbol; 2] = [Symbol::NUM_ABS, Symbol::NUM_ADD];
 /// build_module is the high level builder/delegator.
 /// It takes the request to build a module and output the object file for the module.
 pub fn build_module<'a>(
    env: &'a Env,
    target: &Triple,
    procedures: MutMap<(Symbol, Layout<'a>), Proc<'a>>,
 ) -> Result<Object, String> {
    match target.binary_format {
        BinaryFormat::Elf => elf::build_module(env, target, procedures),
        x => Err(format! {
        "the binary format, {:?}, is not yet implemented",
        x}),
    }
 }
 // These relocations likely will need a length.
 // They may even need more definition, but this should be at least good enough for how we will use elf.
 enum Relocation<'a> {
--- a/compiler/gen_dev/src/object_builder.rs
+++ b/compiler/gen_dev/src/object_builder.rs
@ -0,0 +1,154 @@
 use crate::generic64::{x86_64, Backend64Bit};
 use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
 use bumpalo::collections::Vec;
 use object::write;
 use object::write::{Object, StandardSection, Symbol, SymbolSection};
 use object::{
    Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind,
    SymbolFlags, SymbolKind, SymbolScope,
 };
 use roc_collections::all::MutMap;
 use roc_module::symbol;
 use roc_mono::ir::Proc;
 use roc_mono::layout::Layout;
 use target_lexicon::{Architecture as TargetArch, BinaryFormat as TargetBF, Triple};
 const VERSION: &str = env!("CARGO_PKG_VERSION");
 /// build_module is the high level builder/delegator.
 /// It takes the request to build a module and output the object file for the module.
 pub fn build_module<'a>(
    env: &'a Env,
    target: &Triple,
    procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
 ) -> Result<Object, String> {
    let (mut output, mut backend) = match target {
        Triple {
            architecture: TargetArch::X86_64,
            binary_format: TargetBF::Elf,
            ..
        } => {
            let backend: Backend64Bit<
                x86_64::X86_64GPReg,
                x86_64::X86_64Assembler,
                x86_64::X86_64SystemV,
            > = Backend::new(env, target)?;
            Ok((
                Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
                backend,
            ))
        }
        x => Err(format! {
        "the target, {:?}, is not yet implemented",
        x}),
    }?;
    let text = output.section_id(StandardSection::Text);
    let data_section = output.section_id(StandardSection::Data);
    let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
    output.append_section_data(
        comment,
        format!("\0roc dev backend version {} \0", VERSION).as_bytes(),
        1,
    );
    // Setup layout_ids for procedure calls.
    let mut layout_ids = roc_mono::layout::LayoutIds::default();
    let mut procs = Vec::with_capacity_in(procedures.len(), env.arena);
    for ((sym, layout), proc) in procedures {
        // This is temporary until we support passing args to functions.
        if INLINED_SYMBOLS.contains(&sym) {
            continue;
        }
        let fn_name = layout_ids
            .get(sym, &layout)
            .to_symbol_string(sym, &env.interns);
        let proc_symbol = Symbol {
            name: fn_name.as_bytes().to_vec(),
            value: 0,
            size: 0,
            kind: SymbolKind::Text,
            // TODO: Depending on whether we are building a static or dynamic lib, this should change.
            // We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only.
            scope: if env.exposed_to_host.contains(&sym) {
                SymbolScope::Dynamic
            } else {
                SymbolScope::Linkage
            },
            weak: false,
            section: SymbolSection::Section(text),
            flags: SymbolFlags::None,
        };
        let proc_id = output.add_symbol(proc_symbol);
        procs.push((fn_name, proc_id, proc));
    }
    // Build procedures.
    for (fn_name, proc_id, proc) in procs {
        let mut local_data_index = 0;
        let (proc_data, relocations) = backend.build_proc(proc)?;
        let proc_offset = output.add_symbol_data(proc_id, text, proc_data, 16);
        for reloc in relocations {
            let elfreloc = match reloc {
                Relocation::LocalData { offset, data } => {
                    let data_symbol = write::Symbol {
                        name: format!("{}.data{}", fn_name, local_data_index)
                            .as_bytes()
                            .to_vec(),
                        value: 0,
                        size: 0,
                        kind: SymbolKind::Data,
                        scope: SymbolScope::Compilation,
                        weak: false,
                        section: write::SymbolSection::Section(data_section),
                        flags: SymbolFlags::None,
                    };
                    local_data_index += 1;
                    let data_id = output.add_symbol(data_symbol);
                    output.add_symbol_data(data_id, data_section, data, 4);
                    write::Relocation {
                        offset: offset + proc_offset,
                        size: 32,
                        kind: RelocationKind::Relative,
                        encoding: RelocationEncoding::Generic,
                        symbol: data_id,
                        addend: -4,
                    }
                }
                Relocation::LinkedData { offset, name } => {
                    if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
                        write::Relocation {
                            offset: offset + proc_offset,
                            size: 32,
                            kind: RelocationKind::GotRelative,
                            encoding: RelocationEncoding::Generic,
                            symbol: sym_id,
                            addend: -4,
                        }
                    } else {
                        return Err(format!("failed to find symbol for {:?}", name));
                    }
                }
                Relocation::LinkedFunction { offset, name } => {
                    if let Some(sym_id) = output.symbol_id(name.as_bytes()) {
                        write::Relocation {
                            offset: offset + proc_offset,
                            size: 32,
                            kind: RelocationKind::PltRelative,
                            encoding: RelocationEncoding::Generic,
                            symbol: sym_id,
                            addend: -4,
                        }
                    } else {
                        return Err(format!("failed to find symbol for {:?}", name));
                    }
                }
            };
            output
                .add_relocation(text, elfreloc)
                .map_err(|e| format!("{:?}", e))?;
        }
    }
    Ok(output)
 }
--- a/compiler/gen_dev/src/x86_64/asm.rs
+++ b/compiler/gen_dev/src/x86_64/asm.rs
@ -1,377 +0,0 @@
 use bumpalo::collections::Vec;
 // Not sure exactly how I want to represent registers.
 // If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
 pub enum GPReg {
    RAX = 0,
    RCX = 1,
    RDX = 2,
    RBX = 3,
    RSP = 4,
    RBP = 5,
    RSI = 6,
    RDI = 7,
    R8 = 8,
    R9 = 9,
    R10 = 10,
    R11 = 11,
    R12 = 12,
    R13 = 13,
    R14 = 14,
    R15 = 15,
 }
 const REX: u8 = 0x40;
 const REX_W: u8 = REX + 0x8;
 fn add_rm_extension(reg: GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 1
    } else {
        byte
    }
 }
 fn add_opcode_extension(reg: GPReg, byte: u8) -> u8 {
    add_rm_extension(reg, byte)
 }
 fn add_reg_extension(reg: GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 4
    } else {
        byte
    }
 }
 // Below here are the functions for all of the assembly instructions.
 // Their names are based on the instruction and operators combined.
 // You should call `buf.reserve()` if you push or extend more than once.
 // Unit tests are added at the bottom of the file to ensure correct asm generation.
 // Please keep these in alphanumeric order.
 /// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
 pub fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
    // This can be optimized if the immediate is 1 byte.
    let rex = add_rm_extension(dst, REX_W);
    let dst_mod = dst as u8 % 8;
    buf.reserve(7);
    buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
    buf.extend(&imm.to_le_bytes());
 }
 /// `ADD r/m64,r64` -> Add r64 to r/m64.
 pub fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
    let rex = add_rm_extension(dst, REX_W);
    let rex = add_reg_extension(src, rex);
    let dst_mod = dst as u8 % 8;
    let src_mod = (src as u8 % 8) << 3;
    buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
 }
 /// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
 pub fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
    let rex = add_reg_extension(dst, REX_W);
    let rex = add_rm_extension(src, rex);
    let dst_mod = (dst as u8 % 8) << 3;
    let src_mod = src as u8 % 8;
    buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
 }
 /// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
 pub fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
    let rex = add_rm_extension(dst, REX_W);
    let dst_mod = dst as u8 % 8;
    buf.reserve(7);
    buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
    buf.extend(&imm.to_le_bytes());
 }
 /// `MOV r64, imm64` -> Move imm64 to r64.
 pub fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64) {
    if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
        mov_register64bit_immediate32bit(buf, dst, imm as i32)
    } else {
        let rex = add_opcode_extension(dst, REX_W);
        let dst_mod = dst as u8 % 8;
        buf.reserve(10);
        buf.extend(&[rex, 0xB8 + dst_mod]);
        buf.extend(&imm.to_le_bytes());
    }
 }
 /// `MOV r/m64,r64` -> Move r64 to r/m64.
 pub fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) {
    let rex = add_rm_extension(dst, REX_W);
    let rex = add_reg_extension(src, rex);
    let dst_mod = dst as u8 % 8;
    let src_mod = (src as u8 % 8) << 3;
    buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
 }
 /// `MOV r64,r/m64` -> Move r/m64 to r64.
 pub fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32) {
    // This can be optimized based on how many bytes the offset actually is.
    // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
    // Also, this may technically be faster genration since stack operations should be so common.
    let rex = add_reg_extension(dst, REX_W);
    let dst_mod = (dst as u8 % 8) << 3;
    buf.reserve(8);
    buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
    buf.extend(&offset.to_le_bytes());
 }
 /// `MOV r/m64,r64` -> Move r64 to r/m64.
 pub fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg) {
    // This can be optimized based on how many bytes the offset actually is.
    // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
    // Also, this may technically be faster genration since stack operations should be so common.
    let rex = add_reg_extension(src, REX_W);
    let src_mod = (src as u8 % 8) << 3;
    buf.reserve(8);
    buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
    buf.extend(&offset.to_le_bytes());
 }
 /// `NEG r/m64` -> Two's complement negate r/m64.
 pub fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
    let rex = add_rm_extension(reg, REX_W);
    let reg_mod = reg as u8 % 8;
    buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
 }
 /// `RET` -> Near return to calling procedure.
 pub fn ret_near<'a>(buf: &mut Vec<'a, u8>) {
    buf.push(0xC3);
 }
 /// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
 pub fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) {
    // This can be optimized if the immediate is 1 byte.
    let rex = add_rm_extension(dst, REX_W);
    let dst_mod = dst as u8 % 8;
    buf.reserve(7);
    buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
    buf.extend(&imm.to_le_bytes());
 }
 /// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
 pub fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
    let reg_mod = reg as u8 % 8;
    if reg as u8 > 7 {
        let rex = add_opcode_extension(reg, REX);
        buf.extend(&[rex, 0x58 + reg_mod]);
    } else {
        buf.push(0x58 + reg_mod);
    }
 }
 /// `PUSH r64` -> Push r64,
 pub fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) {
    let reg_mod = reg as u8 % 8;
    if reg as u8 > 7 {
        let rex = add_opcode_extension(reg, REX);
        buf.extend(&[rex, 0x50 + reg_mod]);
    } else {
        buf.push(0x50 + reg_mod);
    }
 }
 // When writing tests, it is a good idea to test both a number and unnumbered register.
 // This is because R8-R15 often have special instruction prefixes.
 #[cfg(test)]
 mod tests {
    use super::*;
    const TEST_I32: i32 = 0x12345678;
    const TEST_I64: i64 = 0x12345678_9ABCDEF0;
    #[test]
    fn test_add_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (GPReg::RAX, [0x48, 0x81, 0xC0]),
            (GPReg::R15, [0x49, 0x81, 0xC7]),
        ] {
            buf.clear();
            add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_add_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            ((GPReg::RAX, GPReg::RAX), [0x48, 0x01, 0xC0]),
            ((GPReg::RAX, GPReg::R15), [0x4C, 0x01, 0xF8]),
            ((GPReg::R15, GPReg::RAX), [0x49, 0x01, 0xC7]),
            ((GPReg::R15, GPReg::R15), [0x4D, 0x01, 0xFF]),
        ] {
            buf.clear();
            add_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_cmovl_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            ((GPReg::RAX, GPReg::RAX), [0x48, 0x0F, 0x4C, 0xC0]),
            ((GPReg::RAX, GPReg::R15), [0x49, 0x0F, 0x4C, 0xC7]),
            ((GPReg::R15, GPReg::RAX), [0x4C, 0x0F, 0x4C, 0xF8]),
            ((GPReg::R15, GPReg::R15), [0x4D, 0x0F, 0x4C, 0xFF]),
        ] {
            buf.clear();
            cmovl_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_mov_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (GPReg::RAX, [0x48, 0xC7, 0xC0]),
            (GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
            mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_mov_register64bit_immediate64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[(GPReg::RAX, [0x48, 0xB8]), (GPReg::R15, [0x49, 0xBF])] {
            buf.clear();
            mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
            assert_eq!(expected, &buf[..2]);
            assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
        }
        for (dst, expected) in &[
            (GPReg::RAX, [0x48, 0xC7, 0xC0]),
            (GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
            mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_mov_register64bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
            ((GPReg::RAX, GPReg::RAX), [0x48, 0x89, 0xC0]),
            ((GPReg::RAX, GPReg::R15), [0x4C, 0x89, 0xF8]),
            ((GPReg::R15, GPReg::RAX), [0x49, 0x89, 0xC7]),
            ((GPReg::R15, GPReg::R15), [0x4D, 0x89, 0xFF]),
        ] {
            buf.clear();
            mov_register64bit_register64bit(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_mov_register64bit_stackoffset32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, offset), expected) in &[
            ((GPReg::RAX, TEST_I32), [0x48, 0x8B, 0x84, 0x24]),
            ((GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
        ] {
            buf.clear();
            mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }
    #[test]
    fn test_mov_stackoffset32bit_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((offset, src), expected) in &[
            ((TEST_I32, GPReg::RAX), [0x48, 0x89, 0x84, 0x24]),
            ((TEST_I32, GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
        ] {
            buf.clear();
            mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }
    #[test]
    fn test_neg_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (reg, expected) in &[
            (GPReg::RAX, [0x48, 0xF7, 0xD8]),
            (GPReg::R15, [0x49, 0xF7, 0xDF]),
        ] {
            buf.clear();
            neg_register64bit(&mut buf, *reg);
            assert_eq!(expected, &buf[..]);
        }
    }
    #[test]
    fn test_ret_near() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        ret_near(&mut buf);
        assert_eq!(&[0xC3], &buf[..]);
    }
    #[test]
    fn test_sub_register64bit_immediate32bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
            (GPReg::RAX, [0x48, 0x81, 0xE8]),
            (GPReg::R15, [0x49, 0x81, 0xEF]),
        ] {
            buf.clear();
            sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }
    #[test]
    fn test_pop_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[(GPReg::RAX, vec![0x58]), (GPReg::R15, vec![0x41, 0x5F])] {
            buf.clear();
            pop_register64bit(&mut buf, *dst);
            assert_eq!(&expected[..], &buf[..]);
        }
    }
    #[test]
    fn test_push_register64bit() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (src, expected) in &[(GPReg::RAX, vec![0x50]), (GPReg::R15, vec![0x41, 0x57])] {
            buf.clear();
            push_register64bit(&mut buf, *src);
            assert_eq!(&expected[..], &buf[..]);
        }
    }
 }