Beat the borrow-checker by following gen_dev structure

This commit is contained in:
Brian Carroll 2021-09-01 18:20:54 +01:00
parent e67efaba4d
commit 3bf94e020c
3 changed files with 128 additions and 137 deletions

View file

@ -1,4 +1,6 @@
use parity_wasm::elements::{Instruction, Instruction::*, Local, ValueType}; use parity_wasm::builder;
use parity_wasm::builder::{CodeLocation, ModuleBuilder};
use parity_wasm::elements::{Instruction, Instruction::*, Instructions, Local, ValueType};
use roc_collections::all::MutMap; use roc_collections::all::MutMap;
use roc_module::low_level::LowLevel; use roc_module::low_level::LowLevel;
@ -6,7 +8,9 @@ use roc_module::symbol::Symbol;
use roc_mono::ir::{CallType, Expr, Literal, Proc, Stmt}; use roc_mono::ir::{CallType, Expr, Literal, Proc, Stmt};
use roc_mono::layout::{Builtin, Layout}; use roc_mono::layout::{Builtin, Layout};
use crate::module::ModuleState; // Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone.
// Follow Emscripten's example by using 1kB (4 bytes would probably do)
const UNUSED_DATA_SECTION_BYTES: u32 = 1024;
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
struct LocalId(u32); struct LocalId(u32);
@ -14,13 +18,13 @@ struct LocalId(u32);
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
struct LabelId(u32); struct LabelId(u32);
struct SymbolStorage(LocalId, WasmLayout);
struct WasmLayout { struct WasmLayout {
value_type: ValueType, value_type: ValueType,
stack_memory: u32, stack_memory: u32,
} }
struct SymbolStorage(LocalId, WasmLayout);
impl WasmLayout { impl WasmLayout {
fn new(layout: &Layout) -> Result<Self, String> { fn new(layout: &Layout) -> Result<Self, String> {
match layout { match layout {
@ -33,32 +37,52 @@ impl WasmLayout {
} }
} }
pub struct FunctionGenerator<'a> { pub struct WasmBackend<'a> {
pub instructions: std::vec::Vec<Instruction>, // Module: Wasm AST
pub ret_type: ValueType, pub builder: ModuleBuilder,
pub arg_types: std::vec::Vec<ValueType>,
pub locals: std::vec::Vec<Local>, // Module: internal state & IR mappings
module_state: &'a mut ModuleState, _data_offset_map: MutMap<Literal<'a>, u32>,
// joinpoint_label_map: MutMap<JoinPointId, LabelId>, _data_offset_next: u32,
symbol_storage_map: MutMap<Symbol, SymbolStorage>, proc_symbol_map: MutMap<Symbol, CodeLocation>,
// Functions: Wasm AST
instructions: std::vec::Vec<Instruction>,
ret_type: ValueType,
arg_types: std::vec::Vec<ValueType>,
locals: std::vec::Vec<Local>,
// Functions: internal state & IR mappings
stack_memory: u32, stack_memory: u32,
symbol_storage_map: MutMap<Symbol, SymbolStorage>,
// joinpoint_label_map: MutMap<JoinPointId, LabelId>,
} }
impl<'a> FunctionGenerator<'a> { impl<'a> WasmBackend<'a> {
pub fn new(module_state: &'a mut ModuleState) -> Self { pub fn new() -> Self {
FunctionGenerator { WasmBackend {
// Module: Wasm AST
builder: builder::module(),
// Module: internal state & IR mappings
_data_offset_map: MutMap::default(),
_data_offset_next: UNUSED_DATA_SECTION_BYTES,
proc_symbol_map: MutMap::default(),
// Functions: Wasm AST
instructions: std::vec::Vec::new(), instructions: std::vec::Vec::new(),
ret_type: ValueType::I32, ret_type: ValueType::I32,
arg_types: std::vec::Vec::new(), arg_types: std::vec::Vec::new(),
locals: std::vec::Vec::new(), locals: std::vec::Vec::new(),
module_state: module_state,
// joinpoint_label_map: MutMap::default(), // Functions: internal state & IR mappings
symbol_storage_map: MutMap::default(),
stack_memory: 0, stack_memory: 0,
symbol_storage_map: MutMap::default(),
// joinpoint_label_map: MutMap::default(),
} }
} }
pub fn build(&mut self, proc: Proc<'a>) -> Result<(), String> { pub fn build_proc(&mut self, proc: Proc<'a>, sym: Symbol) -> Result<u32, String> {
let ret_layout = WasmLayout::new(&proc.ret_layout)?; let ret_layout = WasmLayout::new(&proc.ret_layout)?;
if ret_layout.stack_memory > 0 { if ret_layout.stack_memory > 0 {
// TODO: if returning a struct by value, add an extra argument for a pointer to callee's stack memory // TODO: if returning a struct by value, add an extra argument for a pointer to callee's stack memory
@ -78,7 +102,25 @@ impl<'a> FunctionGenerator<'a> {
} }
self.build_stmt(&proc.body, &proc.ret_layout)?; self.build_stmt(&proc.body, &proc.ret_layout)?;
Ok(())
let signature = builder::signature()
.with_params(self.arg_types.clone()) // requires std::Vec, not Bumpalo
.with_result(self.ret_type.clone())
.build_sig();
let function_def = builder::function()
.with_signature(signature)
.body()
.with_locals(self.locals.clone())
.with_instructions(Instructions::new(self.instructions.clone()))
.build() // body
.build(); // function
let location = self.builder.push_function(function_def);
let function_index = location.body;
self.proc_symbol_map.insert(sym, location);
Ok(function_index)
} }
fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId { fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId {
@ -158,14 +200,10 @@ impl<'a> FunctionGenerator<'a> {
for arg in *arguments { for arg in *arguments {
self.load_from_symbol(arg)?; self.load_from_symbol(arg)?;
} }
let function_location = let function_location = self.proc_symbol_map.get(func_sym).ok_or(format!(
self.module_state "Cannot find function {:?} called from {:?}",
.proc_symbol_map func_sym, sym
.get(func_sym) ))?;
.ok_or(format!(
"Cannot find function {:?} called from {:?}",
func_sym, sym
))?;
self.instructions.push(Call(function_location.body)); self.instructions.push(Call(function_location.body));
self.store_to_symbol(sym)?; self.store_to_symbol(sym)?;
Ok(()) Ok(())

View file

@ -1,2 +1,62 @@
pub mod module; mod backend;
mod function;
use bumpalo::Bump;
use parity_wasm::builder;
use parity_wasm::elements::Internal;
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{CallType, Expr, Proc, ProcLayout, Stmt};
use roc_mono::layout::LayoutIds;
use crate::backend::WasmBackend;
pub struct Env<'a> {
pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot
pub interns: Interns,
pub exposed_to_host: MutSet<Symbol>,
}
pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<std::vec::Vec<u8>, String> {
let mut backend = WasmBackend::new();
let mut layout_ids = LayoutIds::default();
let mut exports = std::vec::Vec::new();
for ((sym, layout), proc) in procedures {
let function_index = backend.build_proc(proc, sym)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
let export = builder::export()
.field(fn_name.as_str())
.with_internal(Internal::Function(function_index))
.build();
exports.push(export);
}
}
let module = backend.builder.build();
module
.to_bytes()
.map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) })
}
// TODO: use something like this for very simple inlining
// Create a HashMap of inlined Procs, generate each call with different Symbol arguments
fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool {
match proc.body {
Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr {
Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type {
CallType::LowLevel { .. } => true,
_ => false,
},
_ => false,
},
_ => false,
}
}

View file

@ -1,107 +0,0 @@
use bumpalo::Bump;
use parity_wasm::builder::{CodeLocation, ModuleBuilder};
use parity_wasm::elements::{Instructions, Internal};
use parity_wasm::{builder, elements};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{CallType, Expr, Literal, Proc, ProcLayout, Stmt};
use roc_mono::layout::LayoutIds;
use crate::function::FunctionGenerator;
pub struct Env<'a> {
pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot
pub interns: Interns,
pub exposed_to_host: MutSet<Symbol>,
}
// Don't allocate any constant data at the address zero or anywhere near it.
// These addresses are not special in Wasm, but putting something there seems bug-prone.
const UNUSED_DATA_SECTION_BYTES: u32 = 1024;
pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<elements::Module, String> {
let mut module_state = ModuleState::new(env);
let mut layout_ids = LayoutIds::default();
for ((sym, layout), proc) in procedures {
let location = build_proc(&mut module_state, proc)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
let export = builder::export()
.field(fn_name.as_str())
.with_internal(Internal::Function(location.body))
.build();
module_state.module_builder.push_export(export);
}
module_state.proc_symbol_map.insert(sym, location);
}
Ok(module_state.module_builder.build())
}
fn build_proc<'a>(module_state: &mut ModuleState, proc: Proc<'a>) -> Result<CodeLocation, String> {
// TODO: see if we can reuse the same memory each time and reset it?
// Can't convince the borrow-checker to let me do that, as things get moved into the function builder.
let mut func_gen = FunctionGenerator::new(module_state);
func_gen.build(proc)?;
let signature = builder::signature()
.with_params(func_gen.arg_types) // requires std::Vec, not Bumpalo
.with_result(func_gen.ret_type)
.build_sig();
let function_def = builder::function()
.with_signature(signature)
.body()
.with_locals(func_gen.locals)
.with_instructions(Instructions::new(func_gen.instructions))
.build() // body
.build(); // function
let location = module_state.module_builder.push_function(function_def);
Ok(location)
}
pub struct ModuleState<'a> {
_env: &'a Env<'a>,
module_builder: ModuleBuilder,
pub proc_symbol_map: MutMap<Symbol, CodeLocation>,
pub _data_offset_map: MutMap<Literal<'a>, u32>,
pub _data_offset_next: u32,
}
impl<'a> ModuleState<'a> {
fn new(_env: &'a Env) -> Self {
ModuleState {
_env,
module_builder: builder::module(),
proc_symbol_map: MutMap::default(),
_data_offset_map: MutMap::default(),
_data_offset_next: UNUSED_DATA_SECTION_BYTES,
}
}
}
// TODO: use something like this for very simple inlining
// Create a HashMap of inlined Procs, generate each call with different Symbol arguments
fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool {
match proc.body {
Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr {
Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type {
CallType::LowLevel { .. } => true,
_ => false,
},
_ => false,
},
_ => false,
}
}