diff --git a/compiler/gen_wasm/src/function.rs b/compiler/gen_wasm/src/backend.rs similarity index 76% rename from compiler/gen_wasm/src/function.rs rename to compiler/gen_wasm/src/backend.rs index 07de4b12d1..2dd9cedcf8 100644 --- a/compiler/gen_wasm/src/function.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -1,4 +1,6 @@ -use parity_wasm::elements::{Instruction, Instruction::*, Local, ValueType}; +use parity_wasm::builder; +use parity_wasm::builder::{CodeLocation, ModuleBuilder}; +use parity_wasm::elements::{Instruction, Instruction::*, Instructions, Local, ValueType}; use roc_collections::all::MutMap; use roc_module::low_level::LowLevel; @@ -6,7 +8,9 @@ use roc_module::symbol::Symbol; use roc_mono::ir::{CallType, Expr, Literal, Proc, Stmt}; use roc_mono::layout::{Builtin, Layout}; -use crate::module::ModuleState; +// Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone. +// Follow Emscripten's example by using 1kB (4 bytes would probably do) +const UNUSED_DATA_SECTION_BYTES: u32 = 1024; #[derive(Clone, Copy)] struct LocalId(u32); @@ -14,13 +18,13 @@ struct LocalId(u32); #[derive(Clone, Copy)] struct LabelId(u32); +struct SymbolStorage(LocalId, WasmLayout); + struct WasmLayout { value_type: ValueType, stack_memory: u32, } -struct SymbolStorage(LocalId, WasmLayout); - impl WasmLayout { fn new(layout: &Layout) -> Result { match layout { @@ -33,32 +37,52 @@ impl WasmLayout { } } -pub struct FunctionGenerator<'a> { - pub instructions: std::vec::Vec, - pub ret_type: ValueType, - pub arg_types: std::vec::Vec, - pub locals: std::vec::Vec, - module_state: &'a mut ModuleState, - // joinpoint_label_map: MutMap, - symbol_storage_map: MutMap, +pub struct WasmBackend<'a> { + // Module: Wasm AST + pub builder: ModuleBuilder, + + // Module: internal state & IR mappings + _data_offset_map: MutMap, u32>, + _data_offset_next: u32, + proc_symbol_map: MutMap, + + // Functions: Wasm AST + instructions: std::vec::Vec, + ret_type: ValueType, + arg_types: std::vec::Vec, + locals: std::vec::Vec, + + // Functions: internal state & IR mappings stack_memory: u32, + symbol_storage_map: MutMap, + // joinpoint_label_map: MutMap, } -impl<'a> FunctionGenerator<'a> { - pub fn new(module_state: &'a mut ModuleState) -> Self { - FunctionGenerator { +impl<'a> WasmBackend<'a> { + pub fn new() -> Self { + WasmBackend { + // Module: Wasm AST + builder: builder::module(), + + // Module: internal state & IR mappings + _data_offset_map: MutMap::default(), + _data_offset_next: UNUSED_DATA_SECTION_BYTES, + proc_symbol_map: MutMap::default(), + + // Functions: Wasm AST instructions: std::vec::Vec::new(), ret_type: ValueType::I32, arg_types: std::vec::Vec::new(), locals: std::vec::Vec::new(), - module_state: module_state, - // joinpoint_label_map: MutMap::default(), - symbol_storage_map: MutMap::default(), + + // Functions: internal state & IR mappings stack_memory: 0, + symbol_storage_map: MutMap::default(), + // joinpoint_label_map: MutMap::default(), } } - pub fn build(&mut self, proc: Proc<'a>) -> Result<(), String> { + pub fn build_proc(&mut self, proc: Proc<'a>, sym: Symbol) -> Result { let ret_layout = WasmLayout::new(&proc.ret_layout)?; if ret_layout.stack_memory > 0 { // TODO: if returning a struct by value, add an extra argument for a pointer to callee's stack memory @@ -78,7 +102,25 @@ impl<'a> FunctionGenerator<'a> { } self.build_stmt(&proc.body, &proc.ret_layout)?; - Ok(()) + + let signature = builder::signature() + .with_params(self.arg_types.clone()) // requires std::Vec, not Bumpalo + .with_result(self.ret_type.clone()) + .build_sig(); + + let function_def = builder::function() + .with_signature(signature) + .body() + .with_locals(self.locals.clone()) + .with_instructions(Instructions::new(self.instructions.clone())) + .build() // body + .build(); // function + + let location = self.builder.push_function(function_def); + let function_index = location.body; + self.proc_symbol_map.insert(sym, location); + + Ok(function_index) } fn insert_local(&mut self, layout: WasmLayout, symbol: Symbol) -> LocalId { @@ -158,14 +200,10 @@ impl<'a> FunctionGenerator<'a> { for arg in *arguments { self.load_from_symbol(arg)?; } - let function_location = - self.module_state - .proc_symbol_map - .get(func_sym) - .ok_or(format!( - "Cannot find function {:?} called from {:?}", - func_sym, sym - ))?; + let function_location = self.proc_symbol_map.get(func_sym).ok_or(format!( + "Cannot find function {:?} called from {:?}", + func_sym, sym + ))?; self.instructions.push(Call(function_location.body)); self.store_to_symbol(sym)?; Ok(()) diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index d020707d3f..53c544e3fb 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -1,2 +1,62 @@ -pub mod module; -mod function; +mod backend; + +use bumpalo::Bump; +use parity_wasm::builder; +use parity_wasm::elements::Internal; + +use roc_collections::all::{MutMap, MutSet}; +use roc_module::symbol::{Interns, Symbol}; +use roc_mono::ir::{CallType, Expr, Proc, ProcLayout, Stmt}; +use roc_mono::layout::LayoutIds; + +use crate::backend::WasmBackend; + +pub struct Env<'a> { + pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot + pub interns: Interns, + pub exposed_to_host: MutSet, +} + +pub fn build_module<'a>( + env: &'a Env, + procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, +) -> Result, String> { + let mut backend = WasmBackend::new(); + let mut layout_ids = LayoutIds::default(); + + let mut exports = std::vec::Vec::new(); + for ((sym, layout), proc) in procedures { + let function_index = backend.build_proc(proc, sym)?; + if env.exposed_to_host.contains(&sym) { + let fn_name = layout_ids + .get_toplevel(sym, &layout) + .to_symbol_string(sym, &env.interns); + + let export = builder::export() + .field(fn_name.as_str()) + .with_internal(Internal::Function(function_index)) + .build(); + + exports.push(export); + } + } + let module = backend.builder.build(); + module + .to_bytes() + .map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) }) +} + +// TODO: use something like this for very simple inlining +// Create a HashMap of inlined Procs, generate each call with different Symbol arguments +fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool { + match proc.body { + Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr { + Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type { + CallType::LowLevel { .. } => true, + _ => false, + }, + _ => false, + }, + _ => false, + } +} diff --git a/compiler/gen_wasm/src/module.rs b/compiler/gen_wasm/src/module.rs deleted file mode 100644 index 408ea3b4fd..0000000000 --- a/compiler/gen_wasm/src/module.rs +++ /dev/null @@ -1,107 +0,0 @@ -use bumpalo::Bump; -use parity_wasm::builder::{CodeLocation, ModuleBuilder}; -use parity_wasm::elements::{Instructions, Internal}; -use parity_wasm::{builder, elements}; - -use roc_collections::all::{MutMap, MutSet}; -use roc_module::symbol::{Interns, Symbol}; -use roc_mono::ir::{CallType, Expr, Literal, Proc, ProcLayout, Stmt}; -use roc_mono::layout::LayoutIds; - -use crate::function::FunctionGenerator; - -pub struct Env<'a> { - pub arena: &'a Bump, // not really using this much, parity_wasm works with std::vec a lot - pub interns: Interns, - pub exposed_to_host: MutSet, -} - -// Don't allocate any constant data at the address zero or anywhere near it. -// These addresses are not special in Wasm, but putting something there seems bug-prone. -const UNUSED_DATA_SECTION_BYTES: u32 = 1024; - -pub fn build_module<'a>( - env: &'a Env, - procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, -) -> Result { - let mut module_state = ModuleState::new(env); - let mut layout_ids = LayoutIds::default(); - - for ((sym, layout), proc) in procedures { - let location = build_proc(&mut module_state, proc)?; - - if env.exposed_to_host.contains(&sym) { - let fn_name = layout_ids - .get_toplevel(sym, &layout) - .to_symbol_string(sym, &env.interns); - - let export = builder::export() - .field(fn_name.as_str()) - .with_internal(Internal::Function(location.body)) - .build(); - - module_state.module_builder.push_export(export); - } - module_state.proc_symbol_map.insert(sym, location); - } - - Ok(module_state.module_builder.build()) -} - -fn build_proc<'a>(module_state: &mut ModuleState, proc: Proc<'a>) -> Result { - // TODO: see if we can reuse the same memory each time and reset it? - // Can't convince the borrow-checker to let me do that, as things get moved into the function builder. - let mut func_gen = FunctionGenerator::new(module_state); - func_gen.build(proc)?; - - let signature = builder::signature() - .with_params(func_gen.arg_types) // requires std::Vec, not Bumpalo - .with_result(func_gen.ret_type) - .build_sig(); - - let function_def = builder::function() - .with_signature(signature) - .body() - .with_locals(func_gen.locals) - .with_instructions(Instructions::new(func_gen.instructions)) - .build() // body - .build(); // function - - let location = module_state.module_builder.push_function(function_def); - Ok(location) -} - -pub struct ModuleState<'a> { - _env: &'a Env<'a>, - module_builder: ModuleBuilder, - pub proc_symbol_map: MutMap, - pub _data_offset_map: MutMap, u32>, - pub _data_offset_next: u32, -} - -impl<'a> ModuleState<'a> { - fn new(_env: &'a Env) -> Self { - ModuleState { - _env, - module_builder: builder::module(), - proc_symbol_map: MutMap::default(), - _data_offset_map: MutMap::default(), - _data_offset_next: UNUSED_DATA_SECTION_BYTES, - } - } -} - -// TODO: use something like this for very simple inlining -// Create a HashMap of inlined Procs, generate each call with different Symbol arguments -fn _is_lowlevel_wrapper<'a>(proc: Proc<'a>) -> bool { - match proc.body { - Stmt::Let(_, expr, _, Stmt::Ret(..)) => match expr { - Expr::Call(roc_mono::ir::Call { call_type, .. }) => match call_type { - CallType::LowLevel { .. } => true, - _ => false, - }, - _ => false, - }, - _ => false, - } -}