Add floats to the dev backend

2025-10-03 00:24:34 +00:00 · 2021-01-18 12:07:47 -08:00 · 2021-01-18 12:07:47 -08:00 · 7f8511532a
commit 7f8511532a
parent eecdc7ff85
5 changed files with 406 additions and 61 deletions
--- a/compiler/gen_dev/src/generic64/aarch64.rs
+++ b/compiler/gen_dev/src/generic64/aarch64.rs
@ -1,4 +1,5 @@
-use crate::generic64::{Assembler, CallConv, GPRegTrait};
+use crate::generic64::{Assembler, CallConv, RegTrait};
+use crate::Relocation;
 use bumpalo::collections::Vec;

 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
@ -38,8 +39,12 @@ pub enum AArch64GPReg {
    /// This can mean Zero or Stack Pointer depending on the context.
    ZRSP = 31,
 }
+impl RegTrait for AArch64GPReg {}

-impl GPRegTrait for AArch64GPReg {}
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[allow(dead_code)]
+pub enum AArch64FPReg {}
+impl RegTrait for AArch64FPReg {}

 pub struct AArch64Assembler {}

@ -49,7 +54,7 @@ pub struct AArch64Call {}

 const STACK_ALIGNMENT: u8 = 16;

-impl CallConv<AArch64GPReg> for AArch64Call {
+impl CallConv<AArch64GPReg, AArch64FPReg> for AArch64Call {
    const GP_PARAM_REGS: &'static [AArch64GPReg] = &[
        AArch64GPReg::X0,
        AArch64GPReg::X1,
@ -101,11 +106,14 @@ impl CallConv<AArch64GPReg> for AArch64Call {
        AArch64GPReg::IP0,
        AArch64GPReg::IP1,
    ];
+    const FP_PARAM_REGS: &'static [AArch64FPReg] = &[];
+    const FP_RETURN_REGS: &'static [AArch64FPReg] = Self::FP_PARAM_REGS;
+    const FP_DEFAULT_FREE_REGS: &'static [AArch64FPReg] = &[];

    const SHADOW_SPACE_SIZE: u8 = 0;

    #[inline(always)]
-    fn callee_saved(reg: &AArch64GPReg) -> bool {
+    fn gp_callee_saved(reg: &AArch64GPReg) -> bool {
        matches!(
            reg,
            AArch64GPReg::X19
@ -120,6 +128,10 @@ impl CallConv<AArch64GPReg> for AArch64Call {
                | AArch64GPReg::X28
        )
    }
+    #[inline(always)]
+    fn fp_callee_saved(_reg: &AArch64FPReg) -> bool {
+        unimplemented!("AArch64 FPRegs not implemented yet");
+    }

    #[inline(always)]
    fn setup_stack(
@ -206,7 +218,7 @@ impl CallConv<AArch64GPReg> for AArch64Call {
    }
 }

-impl Assembler<AArch64GPReg> for AArch64Assembler {
+impl Assembler<AArch64GPReg, AArch64FPReg> for AArch64Assembler {
    #[inline(always)]
    fn abs_reg64_reg64(_buf: &mut Vec<'_, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) {
        unimplemented!("abs_reg64_reg64 is not yet implement for AArch64");
@ -240,6 +252,16 @@ impl Assembler<AArch64GPReg> for AArch64Assembler {
        add_reg64_reg64_reg64(buf, dst, src1, src2);
    }

+    #[inline(always)]
+    fn mov_freg64_imm64(
+        _buf: &mut Vec<'_, u8>,
+        _relocs: &mut Vec<'_, Relocation>,
+        _dst: AArch64FPReg,
+        _imm: f64,
+    ) {
+        unimplemented!("loading float literal not yet implemented for AArch64");
+    }
+
    #[inline(always)]
    fn mov_reg64_imm64(buf: &mut Vec<'_, u8>, dst: AArch64GPReg, imm: i64) {
        let mut remaining = imm as u64;
@ -258,6 +280,11 @@ impl Assembler<AArch64GPReg> for AArch64Assembler {
        }
    }

+    #[inline(always)]
+    fn mov_freg64_freg64(_buf: &mut Vec<'_, u8>, _dst: AArch64FPReg, _src: AArch64FPReg) {
+        unimplemented!("moving data between float registers not yet implemented for AArch64");
+    }
+
    #[inline(always)]
    fn mov_reg64_reg64(buf: &mut Vec<'_, u8>, dst: AArch64GPReg, src: AArch64GPReg) {
        mov_reg64_reg64(buf, dst, src);
@ -275,6 +302,11 @@ impl Assembler<AArch64GPReg> for AArch64Assembler {
        }
    }

+    #[inline(always)]
+    fn mov_stack32_freg64(_buf: &mut Vec<'_, u8>, _offset: i32, _src: AArch64FPReg) {
+        unimplemented!("saving floating point reg to stack not yet implemented for AArch64");
+    }
+
    #[inline(always)]
    fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: AArch64GPReg) {
        if offset < 0 {
--- a/compiler/gen_dev/src/generic64/mod.rs
+++ b/compiler/gen_dev/src/generic64/mod.rs
@ -9,29 +9,38 @@ use target_lexicon::Triple;
 pub mod aarch64;
 pub mod x86_64;

-pub trait CallConv<GPReg: GPRegTrait> {
+pub trait CallConv<GPReg: RegTrait, FPReg: RegTrait> {
    const GP_PARAM_REGS: &'static [GPReg];
    const GP_RETURN_REGS: &'static [GPReg];
    const GP_DEFAULT_FREE_REGS: &'static [GPReg];

+    const FP_PARAM_REGS: &'static [FPReg];
+    const FP_RETURN_REGS: &'static [FPReg];
+    const FP_DEFAULT_FREE_REGS: &'static [FPReg];
+
    const SHADOW_SPACE_SIZE: u8;

-    fn callee_saved(reg: &GPReg) -> bool;
+    fn gp_callee_saved(reg: &GPReg) -> bool;
    #[inline(always)]
-    fn caller_saved_regs(reg: &GPReg) -> bool {
-        !Self::callee_saved(reg)
+    fn gp_caller_saved(reg: &GPReg) -> bool {
+        !Self::gp_callee_saved(reg)
+    }
+    fn fp_callee_saved(reg: &FPReg) -> bool;
+    #[inline(always)]
+    fn fp_caller_saved(reg: &FPReg) -> bool {
+        !Self::fp_callee_saved(reg)
    }

    fn setup_stack<'a>(
        buf: &mut Vec<'a, u8>,
        leaf_function: bool,
-        saved_regs: &[GPReg],
+        gp_saved_regs: &[GPReg],
        requested_stack_size: i32,
    ) -> Result<i32, String>;
    fn cleanup_stack<'a>(
        buf: &mut Vec<'a, u8>,
        leaf_function: bool,
-        saved_regs: &[GPReg],
+        gp_saved_regs: &[GPReg],
        aligned_stack_size: i32,
    ) -> Result<(), String>;
 }
@ -42,13 +51,22 @@ pub trait CallConv<GPReg: GPRegTrait> {
 /// Thus, some backends will need to use mulitiple instructions to preform a single one of this calls.
 /// Generally, I prefer explicit sources, as opposed to dst being one of the sources. Ex: `x = x + y` would be `add x, x, y` instead of `add x, y`.
 /// dst should always come before sources.
-pub trait Assembler<GPReg: GPRegTrait> {
+pub trait Assembler<GPReg: RegTrait, FPReg: RegTrait> {
    fn abs_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src: GPReg);
    fn add_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, imm32: i32);
    fn add_reg64_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
+    fn mov_freg64_imm64(
+        buf: &mut Vec<'_, u8>,
+        relocs: &mut Vec<'_, Relocation>,
+        dst: FPReg,
+        imm: f64,
+    );
    fn mov_reg64_imm64(buf: &mut Vec<'_, u8>, dst: GPReg, imm: i64);
+    fn mov_freg64_freg64(buf: &mut Vec<'_, u8>, dst: FPReg, src: FPReg);
    fn mov_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src: GPReg);
+    // fn mov_freg64_stack32(buf: &mut Vec<'_, u8>, dst: FPReg, offset: i32);
    fn mov_reg64_stack32(buf: &mut Vec<'_, u8>, dst: GPReg, offset: i32);
+    fn mov_stack32_freg64(buf: &mut Vec<'_, u8>, offset: i32, src: FPReg);
    fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: GPReg);
    fn sub_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, imm32: i32);
    fn sub_reg64_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
@ -56,21 +74,31 @@ pub trait Assembler<GPReg: GPRegTrait> {
 }

 #[derive(Clone, Debug, PartialEq)]
-enum SymbolStorage<GPReg: GPRegTrait> {
+#[allow(dead_code)]
+enum SymbolStorage<GPReg: RegTrait, FPReg: RegTrait> {
    // These may need layout, but I am not sure.
    // I think whenever a symbol would be used, we specify layout anyways.
    GPReg(GPReg),
+    FPReg(FPReg),
    Stack(i32),
    StackAndGPReg(GPReg, i32),
+    StackAndFPReg(FPReg, i32),
 }

-pub trait GPRegTrait: Copy + Eq + std::hash::Hash + std::fmt::Debug + 'static {}
+pub trait RegTrait: Copy + Eq + std::hash::Hash + std::fmt::Debug + 'static {}

-pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> {
+pub struct Backend64Bit<
+    'a,
+    GPReg: RegTrait,
+    FPReg: RegTrait,
+    ASM: Assembler<GPReg, FPReg>,
+    CC: CallConv<GPReg, FPReg>,
+> {
    phantom_asm: PhantomData<ASM>,
    phantom_cc: PhantomData<CC>,
    env: &'a Env<'a>,
    buf: Vec<'a, u8>,
+    relocs: Vec<'a, Relocation<'a>>,

    /// leaf_function is true if the only calls this function makes are tail calls.
    /// If that is the case, we can skip emitting the frame pointer and updating the stack.
@ -78,26 +106,34 @@ pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallCo

    last_seen_map: MutMap<Symbol, *const Stmt<'a>>,
    free_map: MutMap<*const Stmt<'a>, Vec<'a, Symbol>>,
-    symbols_map: MutMap<Symbol, SymbolStorage<GPReg>>,
+    symbols_map: MutMap<Symbol, SymbolStorage<GPReg, FPReg>>,
    literal_map: MutMap<Symbol, Literal<'a>>,

    // This should probably be smarter than a vec.
    // There are certain registers we should always use first. With pushing and popping, this could get mixed.
    gp_free_regs: Vec<'a, GPReg>,
+    fp_free_regs: Vec<'a, FPReg>,

    // The last major thing we need is a way to decide what reg to free when all of them are full.
    // Theoretically we want a basic lru cache for the currently loaded symbols.
    // For now just a vec of used registers and the symbols they contain.
    gp_used_regs: Vec<'a, (GPReg, Symbol)>,
-
-    stack_size: i32,
+    fp_used_regs: Vec<'a, (FPReg, Symbol)>,

    // used callee saved regs must be tracked for pushing and popping at the beginning/end of the function.
-    used_callee_saved_regs: MutSet<GPReg>,
+    gp_used_callee_saved_regs: MutSet<GPReg>,
+    fp_used_callee_saved_regs: MutSet<FPReg>,
+
+    stack_size: i32,
 }

-impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<'a>
-    for Backend64Bit<'a, GPReg, ASM, CC>
+impl<
+        'a,
+        GPReg: RegTrait,
+        FPReg: RegTrait,
+        ASM: Assembler<GPReg, FPReg>,
+        CC: CallConv<GPReg, FPReg>,
+    > Backend<'a> for Backend64Bit<'a, GPReg, FPReg, ASM, CC>
 {
    fn new(env: &'a Env, _target: &Triple) -> Result<Self, String> {
        Ok(Backend64Bit {
@ -106,14 +142,18 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
            env,
            leaf_function: true,
            buf: bumpalo::vec!(in env.arena),
+            relocs: bumpalo::vec!(in env.arena),
            last_seen_map: MutMap::default(),
            free_map: MutMap::default(),
            symbols_map: MutMap::default(),
            literal_map: MutMap::default(),
            gp_free_regs: bumpalo::vec![in env.arena],
            gp_used_regs: bumpalo::vec![in env.arena],
+            gp_used_callee_saved_regs: MutSet::default(),
+            fp_free_regs: bumpalo::vec![in env.arena],
+            fp_used_regs: bumpalo::vec![in env.arena],
+            fp_used_callee_saved_regs: MutSet::default(),
            stack_size: 0,
-            used_callee_saved_regs: MutSet::default(),
        })
    }

@ -128,11 +168,16 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        self.free_map.clear();
        self.symbols_map.clear();
        self.buf.clear();
-        self.used_callee_saved_regs.clear();
+        self.gp_used_callee_saved_regs.clear();
        self.gp_free_regs.clear();
        self.gp_used_regs.clear();
        self.gp_free_regs
            .extend_from_slice(CC::GP_DEFAULT_FREE_REGS);
+        self.fp_used_callee_saved_regs.clear();
+        self.fp_free_regs.clear();
+        self.fp_used_regs.clear();
+        self.fp_free_regs
+            .extend_from_slice(CC::FP_DEFAULT_FREE_REGS);
    }

    fn set_not_leaf_function(&mut self) {
@ -156,12 +201,12 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        &mut self.free_map
    }

-    fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> {
+    fn finalize(&mut self) -> Result<(&'a [u8], &[&Relocation]), String> {
        let mut out = bumpalo::vec![in self.env.arena];

        // Setup stack.
        let mut used_regs = bumpalo::vec![in self.env.arena];
-        used_regs.extend(&self.used_callee_saved_regs);
+        used_regs.extend(&self.gp_used_callee_saved_regs);
        let aligned_stack_size =
            CC::setup_stack(&mut out, self.leaf_function, &used_regs, self.stack_size)?;

@ -172,12 +217,14 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        CC::cleanup_stack(&mut out, self.leaf_function, &used_regs, aligned_stack_size)?;
        ASM::ret(&mut out);

-        Ok((out.into_bump_slice(), &[]))
+        let mut out_relocs = bumpalo::vec![in self.env.arena];
+        out_relocs.extend(&self.relocs);
+        Ok((out.into_bump_slice(), out_relocs.into_bump_slice()))
    }

    fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
-        let dst_reg = self.claim_gp_reg(dst)?;
-        let src_reg = self.load_to_reg(src)?;
+        let dst_reg = self.claim_gpreg(dst)?;
+        let src_reg = self.load_to_gpreg(src)?;
        ASM::abs_reg64_reg64(&mut self.buf, dst_reg, src_reg);
        Ok(())
    }
@ -188,9 +235,9 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        src1: &Symbol,
        src2: &Symbol,
    ) -> Result<(), String> {
-        let dst_reg = self.claim_gp_reg(dst)?;
-        let src1_reg = self.load_to_reg(src1)?;
-        let src2_reg = self.load_to_reg(src2)?;
+        let dst_reg = self.claim_gpreg(dst)?;
+        let src1_reg = self.load_to_gpreg(src1)?;
+        let src2_reg = self.load_to_gpreg(src2)?;
        ASM::add_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
        Ok(())
    }
@ -201,9 +248,9 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        src1: &Symbol,
        src2: &Symbol,
    ) -> Result<(), String> {
-        let dst_reg = self.claim_gp_reg(dst)?;
-        let src1_reg = self.load_to_reg(src1)?;
-        let src2_reg = self.load_to_reg(src2)?;
+        let dst_reg = self.claim_gpreg(dst)?;
+        let src1_reg = self.load_to_gpreg(src1)?;
+        let src2_reg = self.load_to_gpreg(src2)?;
        ASM::sub_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
        Ok(())
    }
@ -211,11 +258,17 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    fn load_literal(&mut self, sym: &Symbol, lit: &Literal<'a>) -> Result<(), String> {
        match lit {
            Literal::Int(x) => {
-                let reg = self.claim_gp_reg(sym)?;
+                let reg = self.claim_gpreg(sym)?;
                let val = *x;
                ASM::mov_reg64_imm64(&mut self.buf, reg, val);
                Ok(())
            }
+            Literal::Float(x) => {
+                let reg = self.claim_fpreg(sym)?;
+                let val = *x;
+                ASM::mov_freg64_imm64(&mut self.buf, &mut self.relocs, reg, val);
+                Ok(())
+            }
            x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
        }
    }
@ -242,6 +295,11 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
                ASM::mov_reg64_reg64(&mut self.buf, CC::GP_RETURN_REGS[0], *reg);
                Ok(())
            }
+            Some(SymbolStorage::FPReg(reg)) if *reg == CC::FP_RETURN_REGS[0] => Ok(()),
+            Some(SymbolStorage::FPReg(reg)) => {
+                ASM::mov_freg64_freg64(&mut self.buf, CC::FP_RETURN_REGS[0], *reg);
+                Ok(())
+            }
            Some(x) => Err(format!(
                "returning symbol storage, {:?}, is not yet implemented",
                x
@ -253,14 +311,19 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<

 /// This impl block is for ir related instructions that need backend specific information.
 /// For example, loading a symbol for doing a computation.
-impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
-    Backend64Bit<'a, GPReg, ASM, CC>
+impl<
+        'a,
+        FPReg: RegTrait,
+        GPReg: RegTrait,
+        ASM: Assembler<GPReg, FPReg>,
+        CC: CallConv<GPReg, FPReg>,
+    > Backend64Bit<'a, GPReg, FPReg, ASM, CC>
 {
-    fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
+    fn claim_gpreg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
        let reg = if !self.gp_free_regs.is_empty() {
            let free_reg = self.gp_free_regs.pop().unwrap();
-            if CC::callee_saved(&free_reg) {
-                self.used_callee_saved_regs.insert(free_reg);
+            if CC::gp_callee_saved(&free_reg) {
+                self.gp_used_callee_saved_regs.insert(free_reg);
            }
            Ok(free_reg)
        } else if !self.gp_used_regs.is_empty() {
@ -268,7 +331,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
            self.free_to_stack(&sym)?;
            Ok(reg)
        } else {
-            Err("completely out of registers".to_string())
+            Err("completely out of general purpose registers".to_string())
        }?;

        self.gp_used_regs.push((reg, *sym));
@ -276,20 +339,46 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
        Ok(reg)
    }

-    fn load_to_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
+    fn claim_fpreg(&mut self, sym: &Symbol) -> Result<FPReg, String> {
+        let reg = if !self.fp_free_regs.is_empty() {
+            let free_reg = self.fp_free_regs.pop().unwrap();
+            if CC::fp_callee_saved(&free_reg) {
+                self.fp_used_callee_saved_regs.insert(free_reg);
+            }
+            Ok(free_reg)
+        } else if !self.fp_used_regs.is_empty() {
+            let (reg, sym) = self.fp_used_regs.remove(0);
+            self.free_to_stack(&sym)?;
+            Ok(reg)
+        } else {
+            Err("completely out of floating point registers".to_string())
+        }?;
+
+        self.fp_used_regs.push((reg, *sym));
+        self.symbols_map.insert(*sym, SymbolStorage::FPReg(reg));
+        Ok(reg)
+    }
+
+    fn load_to_gpreg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
        let val = self.symbols_map.remove(sym);
        match val {
            Some(SymbolStorage::GPReg(reg)) => {
                self.symbols_map.insert(*sym, SymbolStorage::GPReg(reg));
                Ok(reg)
            }
+            Some(SymbolStorage::FPReg(_reg)) => {
+                Err("Cannot load floating point symbol into GPReg".to_string())
+            }
            Some(SymbolStorage::StackAndGPReg(reg, offset)) => {
                self.symbols_map
                    .insert(*sym, SymbolStorage::StackAndGPReg(reg, offset));
                Ok(reg)
            }
+            Some(SymbolStorage::StackAndFPReg(_reg, _offset)) => {
+                Err("Cannot load floating point symbol into GPReg".to_string())
+            }
            Some(SymbolStorage::Stack(offset)) => {
-                let reg = self.claim_gp_reg(sym)?;
+                let reg = self.claim_gpreg(sym)?;
                self.symbols_map
                    .insert(*sym, SymbolStorage::StackAndGPReg(reg, offset));
                ASM::mov_reg64_stack32(&mut self.buf, reg, offset as i32);
@ -308,10 +397,20 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
                Ok(())
            }
+            Some(SymbolStorage::FPReg(reg)) => {
+                let offset = self.increase_stack_size(8)?;
+                ASM::mov_stack32_freg64(&mut self.buf, offset as i32, reg);
+                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
+                Ok(())
+            }
            Some(SymbolStorage::StackAndGPReg(_, offset)) => {
                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
                Ok(())
            }
+            Some(SymbolStorage::StackAndFPReg(_, offset)) => {
+                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
+                Ok(())
+            }
            Some(SymbolStorage::Stack(offset)) => {
                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
                Ok(())
--- a/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/compiler/gen_dev/src/generic64/x86_64.rs
@ -1,4 +1,5 @@
-use crate::generic64::{Assembler, CallConv, GPRegTrait};
+use crate::generic64::{Assembler, CallConv, RegTrait};
+use crate::Relocation;
 use bumpalo::collections::Vec;

 // Not sure exactly how I want to represent registers.
@ -22,8 +23,28 @@ pub enum X86_64GPReg {
    R14 = 14,
    R15 = 15,
 }
+impl RegTrait for X86_64GPReg {}

-impl GPRegTrait for X86_64GPReg {}
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+pub enum X86_64FPReg {
+    XMM0 = 0,
+    XMM1 = 1,
+    XMM2 = 2,
+    XMM3 = 3,
+    XMM4 = 4,
+    XMM5 = 5,
+    XMM6 = 6,
+    XMM7 = 7,
+    XMM8 = 8,
+    XMM9 = 9,
+    XMM10 = 10,
+    XMM11 = 11,
+    XMM12 = 12,
+    XMM13 = 13,
+    XMM14 = 14,
+    XMM15 = 15,
+}
+impl RegTrait for X86_64FPReg {}

 pub struct X86_64Assembler {}
 pub struct X86_64WindowsFastcall {}
@ -31,7 +52,7 @@ pub struct X86_64SystemV {}

 const STACK_ALIGNMENT: u8 = 16;

-impl CallConv<X86_64GPReg> for X86_64SystemV {
+impl CallConv<X86_64GPReg, X86_64FPReg> for X86_64SystemV {
    const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
        X86_64GPReg::RDI,
        X86_64GPReg::RSI,
@ -41,7 +62,6 @@ impl CallConv<X86_64GPReg> for X86_64SystemV {
        X86_64GPReg::R9,
    ];
    const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX, X86_64GPReg::RDX];
-
    const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[
        // The regs we want to use first should be at the end of this vec.
        // We will use pop to get which reg to use next
@ -64,10 +84,44 @@ impl CallConv<X86_64GPReg> for X86_64SystemV {
        X86_64GPReg::R10,
        X86_64GPReg::R11,
    ];
+
+    const FP_PARAM_REGS: &'static [X86_64FPReg] = &[
+        X86_64FPReg::XMM0,
+        X86_64FPReg::XMM1,
+        X86_64FPReg::XMM2,
+        X86_64FPReg::XMM3,
+        X86_64FPReg::XMM4,
+        X86_64FPReg::XMM5,
+        X86_64FPReg::XMM6,
+        X86_64FPReg::XMM7,
+    ];
+    const FP_RETURN_REGS: &'static [X86_64FPReg] = &[X86_64FPReg::XMM0, X86_64FPReg::XMM1];
+    const FP_DEFAULT_FREE_REGS: &'static [X86_64FPReg] = &[
+        // The regs we want to use first should be at the end of this vec.
+        // We will use pop to get which reg to use next
+        // No callee saved regs.
+        // Use caller saved regs first.
+        X86_64FPReg::XMM15,
+        X86_64FPReg::XMM14,
+        X86_64FPReg::XMM13,
+        X86_64FPReg::XMM12,
+        X86_64FPReg::XMM11,
+        X86_64FPReg::XMM10,
+        X86_64FPReg::XMM9,
+        X86_64FPReg::XMM8,
+        X86_64FPReg::XMM7,
+        X86_64FPReg::XMM6,
+        X86_64FPReg::XMM5,
+        X86_64FPReg::XMM4,
+        X86_64FPReg::XMM3,
+        X86_64FPReg::XMM2,
+        X86_64FPReg::XMM1,
+        X86_64FPReg::XMM0,
+    ];
    const SHADOW_SPACE_SIZE: u8 = 0;

    #[inline(always)]
-    fn callee_saved(reg: &X86_64GPReg) -> bool {
+    fn gp_callee_saved(reg: &X86_64GPReg) -> bool {
        matches!(
            reg,
            X86_64GPReg::RBX
@ -79,28 +133,33 @@ impl CallConv<X86_64GPReg> for X86_64SystemV {
        )
    }

+    #[inline(always)]
+    fn fp_callee_saved(_reg: &X86_64FPReg) -> bool {
+        false
+    }
+
    #[inline(always)]
    fn setup_stack<'a>(
        buf: &mut Vec<'a, u8>,
        leaf_function: bool,
-        saved_regs: &[X86_64GPReg],
+        gp_saved_regs: &[X86_64GPReg],
        requested_stack_size: i32,
    ) -> Result<i32, String> {
-        x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size)
+        x86_64_generic_setup_stack(buf, leaf_function, gp_saved_regs, requested_stack_size)
    }

    #[inline(always)]
    fn cleanup_stack<'a>(
        buf: &mut Vec<'a, u8>,
        leaf_function: bool,
-        saved_regs: &[X86_64GPReg],
+        gp_saved_regs: &[X86_64GPReg],
        aligned_stack_size: i32,
    ) -> Result<(), String> {
-        x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size)
+        x86_64_generic_cleanup_stack(buf, leaf_function, gp_saved_regs, aligned_stack_size)
    }
 }

-impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
+impl CallConv<X86_64GPReg, X86_64FPReg> for X86_64WindowsFastcall {
    const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
        X86_64GPReg::RCX,
        X86_64GPReg::RDX,
@ -132,10 +191,39 @@ impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
        X86_64GPReg::R10,
        X86_64GPReg::R11,
    ];
+    const FP_PARAM_REGS: &'static [X86_64FPReg] = &[
+        X86_64FPReg::XMM0,
+        X86_64FPReg::XMM1,
+        X86_64FPReg::XMM2,
+        X86_64FPReg::XMM3,
+    ];
+    const FP_RETURN_REGS: &'static [X86_64FPReg] = &[X86_64FPReg::XMM0];
+    const FP_DEFAULT_FREE_REGS: &'static [X86_64FPReg] = &[
+        // The regs we want to use first should be at the end of this vec.
+        // We will use pop to get which reg to use next
+        // Use callee saved regs last.
+        X86_64FPReg::XMM15,
+        X86_64FPReg::XMM15,
+        X86_64FPReg::XMM13,
+        X86_64FPReg::XMM12,
+        X86_64FPReg::XMM11,
+        X86_64FPReg::XMM10,
+        X86_64FPReg::XMM9,
+        X86_64FPReg::XMM8,
+        X86_64FPReg::XMM7,
+        X86_64FPReg::XMM6,
+        // Use caller saved regs first.
+        X86_64FPReg::XMM5,
+        X86_64FPReg::XMM4,
+        X86_64FPReg::XMM3,
+        X86_64FPReg::XMM2,
+        X86_64FPReg::XMM1,
+        X86_64FPReg::XMM0,
+    ];
    const SHADOW_SPACE_SIZE: u8 = 32;

    #[inline(always)]
-    fn callee_saved(reg: &X86_64GPReg) -> bool {
+    fn gp_callee_saved(reg: &X86_64GPReg) -> bool {
        matches!(
            reg,
            X86_64GPReg::RBX
@ -150,6 +238,19 @@ impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
        )
    }

+    #[inline(always)]
+    fn fp_callee_saved(reg: &X86_64FPReg) -> bool {
+        matches!(
+            reg,
+            X86_64FPReg::XMM0
+                | X86_64FPReg::XMM1
+                | X86_64FPReg::XMM2
+                | X86_64FPReg::XMM3
+                | X86_64FPReg::XMM4
+                | X86_64FPReg::XMM5
+        )
+    }
+
    #[inline(always)]
    fn setup_stack<'a>(
        buf: &mut Vec<'a, u8>,
@ -240,7 +341,7 @@ fn x86_64_generic_cleanup_stack<'a>(
    Ok(())
 }

-impl Assembler<X86_64GPReg> for X86_64Assembler {
+impl Assembler<X86_64GPReg, X86_64FPReg> for X86_64Assembler {
    // These functions should map to the raw assembly functions below.
    // In some cases, that means you can just directly call one of the direct assembly functions.
    #[inline(always)]
@ -280,10 +381,27 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
        }
    }
    #[inline(always)]
+    fn mov_freg64_imm64(
+        buf: &mut Vec<'_, u8>,
+        relocs: &mut Vec<'_, Relocation>,
+        dst: X86_64FPReg,
+        imm: f64,
+    ) {
+        movsd_freg64_rip_offset32(buf, dst, 0);
+        relocs.push(Relocation::LocalData {
+            offset: buf.len() as u64 - 4,
+            data: imm.to_le_bytes().to_vec(),
+        });
+    }
+    #[inline(always)]
    fn mov_reg64_imm64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, imm: i64) {
        mov_reg64_imm64(buf, dst, imm);
    }
    #[inline(always)]
+    fn mov_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FPReg, src: X86_64FPReg) {
+        movsd_freg64_freg64(buf, dst, src);
+    }
+    #[inline(always)]
    fn mov_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
        mov_reg64_reg64(buf, dst, src);
    }
@ -292,6 +410,10 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
        mov_reg64_stack32(buf, dst, offset);
    }
    #[inline(always)]
+    fn mov_stack32_freg64(_buf: &mut Vec<'_, u8>, _offset: i32, _src: X86_64FPReg) {
+        unimplemented!("saving floating point reg to stack not yet implemented for X86_64");
+    }
+    #[inline(always)]
    fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: X86_64GPReg) {
        mov_stack32_reg64(buf, offset, src);
    }
@ -473,6 +595,39 @@ fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: X86_64GPReg) {
    buf.extend(&offset.to_le_bytes());
 }

+/// `MOVSD xmm1,xmm2` -> Move scalar double-precision floating-point value from xmm2 to xmm1 register.
+#[inline(always)]
+fn movsd_freg64_freg64(buf: &mut Vec<'_, u8>, dst: X86_64FPReg, src: X86_64FPReg) {
+    let dst_high = dst as u8 > 7;
+    let dst_mod = dst as u8 % 8;
+    let src_high = src as u8 > 7;
+    let src_mod = src as u8 % 8;
+    if dst_high || src_high {
+        buf.extend(&[
+            0xF2,
+            0x40 + ((dst_high as u8) << 2) + (src_high as u8),
+            0x0F,
+            0x10,
+            0xC0 + (dst_mod << 3) + (src_mod),
+        ])
+    } else {
+        buf.extend(&[0xF2, 0x0F, 0x10, 0xC0 + (dst_mod << 3) + (src_mod)])
+    }
+}
+
+// `MOVSD xmm, m64` -> Load scalar double-precision floating-point value from m64 to xmm register.
+fn movsd_freg64_rip_offset32(buf: &mut Vec<'_, u8>, dst: X86_64FPReg, offset: u32) {
+    let dst_mod = dst as u8 % 8;
+    if dst as u8 > 7 {
+        buf.reserve(9);
+        buf.extend(&[0xF2, 0x44, 0x0F, 0x10, 0x05 + (dst_mod << 3)]);
+    } else {
+        buf.reserve(8);
+        buf.extend(&[0xF2, 0x0F, 0x10, 0x05 + (dst_mod << 3)]);
+    }
+    buf.extend(&offset.to_le_bytes());
+}
+
 /// `NEG r/m64` -> Two's complement negate r/m64.
 #[inline(always)]
 fn neg_reg64(buf: &mut Vec<'_, u8>, reg: X86_64GPReg) {
@ -675,6 +830,52 @@ mod tests {
        }
    }

+    #[test]
+    fn test_movsd_freg64_freg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        for ((dst, src), expected) in &[
+            (
+                (X86_64FPReg::XMM0, X86_64FPReg::XMM0),
+                vec![0xF2, 0x0F, 0x10, 0xC0],
+            ),
+            (
+                (X86_64FPReg::XMM0, X86_64FPReg::XMM15),
+                vec![0xF2, 0x41, 0x0F, 0x10, 0xC7],
+            ),
+            (
+                (X86_64FPReg::XMM15, X86_64FPReg::XMM0),
+                vec![0xF2, 0x44, 0x0F, 0x10, 0xF8],
+            ),
+            (
+                (X86_64FPReg::XMM15, X86_64FPReg::XMM15),
+                vec![0xF2, 0x45, 0x0F, 0x10, 0xFF],
+            ),
+        ] {
+            buf.clear();
+            movsd_freg64_freg64(&mut buf, *dst, *src);
+            assert_eq!(&expected[..], &buf[..]);
+        }
+    }
+
+    #[test]
+    fn test_movsd_freg64_rip_offset32() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        for ((dst, offset), expected) in &[
+            ((X86_64FPReg::XMM0, TEST_I32), vec![0xF2, 0x0F, 0x10, 0x05]),
+            (
+                (X86_64FPReg::XMM15, TEST_I32),
+                vec![0xF2, 0x44, 0x0F, 0x10, 0x3D],
+            ),
+        ] {
+            buf.clear();
+            movsd_freg64_rip_offset32(&mut buf, *dst, *offset as u32);
+            assert_eq!(&expected[..], &buf[..(buf.len() - 4)]);
+            assert_eq!(TEST_I32.to_le_bytes(), &buf[(buf.len() - 4)..]);
+        }
+    }
+
    #[test]
    fn test_neg_reg64() {
        let arena = bumpalo::Bump::new();
--- a/compiler/gen_dev/src/lib.rs
+++ b/compiler/gen_dev/src/lib.rs
@ -29,10 +29,21 @@ const INLINED_SYMBOLS: [Symbol; 3] = [Symbol::NUM_ABS, Symbol::NUM_ADD, Symbol::
 // These relocations likely will need a length.
 // They may even need more definition, but this should be at least good enough for how we will use elf.
 #[allow(dead_code)]
-enum Relocation<'a> {
-    LocalData { offset: u64, data: &'a [u8] },
-    LinkedFunction { offset: u64, name: &'a str },
-    LinkedData { offset: u64, name: &'a str },
+pub enum Relocation<'a> {
+    LocalData {
+        offset: u64,
+        // This should probably technically be a bumpalo::Vec.
+        // The problem is that it currently is built in a place that can't access the arena.
+        data: std::vec::Vec<u8>,
+    },
+    LinkedFunction {
+        offset: u64,
+        name: &'a str,
+    },
+    LinkedData {
+        offset: u64,
+        name: &'a str,
+    },
 }

 trait Backend<'a>
@ -51,10 +62,10 @@ where
    /// finalize does setup because things like stack size and jump locations are not know until the function is written.
    /// For example, this can store the frame pionter and setup stack space.
    /// finalize is run at the end of build_proc when all internal code is finalized.
-    fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String>;
+    fn finalize(&mut self) -> Result<(&'a [u8], &[&Relocation]), String>;

    /// build_proc creates a procedure and outputs it to the wrapped object writer.
-    fn build_proc(&mut self, proc: Proc<'a>) -> Result<(&'a [u8], &[Relocation]), String> {
+    fn build_proc(&mut self, proc: Proc<'a>) -> Result<(&'a [u8], &[&Relocation]), String> {
        self.reset();
        // TODO: let the backend know of all the arguments.
        // let start = std::time::Instant::now();
--- a/compiler/gen_dev/src/object_builder.rs
+++ b/compiler/gen_dev/src/object_builder.rs
@ -30,6 +30,7 @@ pub fn build_module<'a>(
        } => {
            let backend: Backend64Bit<
                x86_64::X86_64GPReg,
+                x86_64::X86_64FPReg,
                x86_64::X86_64Assembler,
                x86_64::X86_64SystemV,
            > = Backend::new(env, target)?;
@ -47,6 +48,7 @@ pub fn build_module<'a>(
        } => {
            let backend: Backend64Bit<
                aarch64::AArch64GPReg,
+                aarch64::AArch64FPReg,
                aarch64::AArch64Assembler,
                aarch64::AArch64Call,
            > = Backend::new(env, target)?;