From 13577aa9ecd4f9fc1cb44dbece16e7379a02fcda Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Sat, 23 Oct 2021 17:10:00 +0200 Subject: [PATCH] Combine our handmade Code section with other sections from parity_wasm (tests compile but fail) --- compiler/gen_wasm/src/backend.rs | 40 +++++++---- compiler/gen_wasm/src/code_builder.rs | 22 ++++++ compiler/gen_wasm/src/lib.rs | 69 +++++++++++++----- compiler/gen_wasm/tests/helpers/eval.rs | 13 +++- .../tests/helpers/wasm32_test_result.rs | 70 +++++++++++-------- 5 files changed, 149 insertions(+), 65 deletions(-) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index ffce729bc8..8c617f040a 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -11,7 +11,7 @@ use roc_mono::layout::{Builtin, Layout}; use crate::code_builder::{BlockType, CodeBuilder, ValueType}; use crate::layout::WasmLayout; use crate::storage::{Storage, StoredValue, StoredValueKind}; -use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; +use crate::{copy_memory, encode_u32_padded, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; // Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone. // Follow Emscripten's example by using 1kB (4 bytes would probably do) @@ -20,18 +20,17 @@ const UNUSED_DATA_SECTION_BYTES: u32 = 1024; #[derive(Clone, Copy, Debug)] struct LabelId(u32); -// TODO: use Bumpalo Vec once parity_wasm supports general iterators (>=0.43) pub struct WasmBackend<'a> { - // Module level: Wasm AST - pub module_builder: ModuleBuilder, env: &'a Env<'a>, - // Module level: internal state & IR mappings + // Module-level data + pub module_builder: ModuleBuilder, + pub code_section_bytes: std::vec::Vec, _data_offset_map: MutMap, u32>, _data_offset_next: u32, proc_symbol_map: MutMap, - // Function level + // Function-level data code_builder: CodeBuilder<'a>, storage: Storage<'a>, @@ -41,21 +40,29 @@ pub struct WasmBackend<'a> { } impl<'a> WasmBackend<'a> { - pub fn new(env: &'a Env<'a>) -> Self { + pub fn new(env: &'a Env<'a>, num_procs: usize) -> Self { + // Code section is prefixed with the number of Wasm functions + // For now, this is the same as the number of IR procedures (until we start inlining!) + let mut code_section_bytes = std::vec::Vec::with_capacity(4096); + + // Reserve space for code section header: inner byte length and number of functions + // Padded to the maximum 5 bytes each, so we can update later without moving everything + code_section_bytes.resize(10, 0); + encode_u32_padded(&mut code_section_bytes[5..10], num_procs as u32); + WasmBackend { - // Module: Wasm AST - module_builder: builder::module(), env, - // Module: internal state & IR mappings + // Module-level data + module_builder: builder::module(), + code_section_bytes, _data_offset_map: MutMap::default(), _data_offset_next: UNUSED_DATA_SECTION_BYTES, proc_symbol_map: MutMap::default(), + // Function-level data block_depth: 0, joinpoint_label_map: MutMap::default(), - - // Functions code_builder: CodeBuilder::new(env.arena), storage: Storage::new(env.arena), } @@ -86,7 +93,7 @@ impl<'a> WasmBackend<'a> { self.build_stmt(&proc.body, &proc.ret_layout)?; - self.finalize_proc(); + self.finalize_proc()?; self.reset(); // println!("\nfinished generating {:?}\n", sym); @@ -123,7 +130,7 @@ impl<'a> WasmBackend<'a> { builder::function().with_signature(signature).build() } - fn finalize_proc(&mut self) { + fn finalize_proc(&mut self) -> Result<(), String> { // end the block from start_proc, to ensure all paths pop stack memory (if any) self.end_block(); @@ -133,6 +140,11 @@ impl<'a> WasmBackend<'a> { self.storage.stack_frame_size, self.storage.stack_frame_pointer, ); + + self.code_builder + .serialize(&mut self.code_section_bytes) + .map_err(|e| format!("{:?}", e))?; + Ok(()) } /********************************************************** diff --git a/compiler/gen_wasm/src/code_builder.rs b/compiler/gen_wasm/src/code_builder.rs index 6aae708807..eed1f43e56 100644 --- a/compiler/gen_wasm/src/code_builder.rs +++ b/compiler/gen_wasm/src/code_builder.rs @@ -60,6 +60,21 @@ pub enum Align { // ... we can add more if we need them ... } +impl From for Align { + fn from(x: u32) -> Align { + match x { + 1 => Align::Bytes1, + 2 => Align::Bytes2, + 4 => Align::Bytes4, + 8 => Align::Bytes8, + 16 => Align::Bytes16, + 32 => Align::Bytes32, + 64 => Align::Bytes64, + _ => panic!("{:?}-byte alignment not supported", x), + } + } +} + #[derive(Debug, Clone, PartialEq, Copy)] pub enum VirtualMachineSymbolState { /// Value doesn't exist yet @@ -99,6 +114,13 @@ macro_rules! instruction_memargs { }; } +/// Finalize the code section bytes by writing its inner length at the start. +/// Assumes 5 bytes have been reserved for it (maximally-padded LEB-128) +pub fn finalize_code_section(code_section_bytes: &mut std::vec::Vec) { + let inner_len = (code_section_bytes.len() - 5) as u32; + encode_u32_padded(code_section_bytes[0..5], inner_len); +} + #[derive(Debug)] pub struct CodeBuilder<'a> { /// The main container for the instructions diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 656eb70868..c85d4266f6 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -13,14 +13,14 @@ use bumpalo::collections::Vec; use bumpalo::Bump; use parity_wasm::builder; -use parity_wasm::elements::Internal; +use parity_wasm::elements::{Instruction, Internal, Module, Section}; use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::{Interns, Symbol}; use roc_mono::ir::{Proc, ProcLayout}; use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; -use crate::code_builder::{Align, CodeBuilder, ValueType}; +use crate::code_builder::{finalize_code_section, Align, CodeBuilder, ValueType}; const PTR_SIZE: u32 = 4; const PTR_TYPE: ValueType = ValueType::I32; @@ -28,6 +28,10 @@ const PTR_TYPE: ValueType = ValueType::I32; pub const STACK_POINTER_GLOBAL_ID: u32 = 0; pub const FRAME_ALIGNMENT_BYTES: i32 = 16; +/// Code section ID from spec +/// https://webassembly.github.io/spec/core/binary/modules.html#sections +pub const CODE_SECTION_ID: u8 = 10; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct LocalId(pub u32); @@ -41,8 +45,10 @@ pub fn build_module<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, ) -> Result, String> { - let (builder, _) = build_module_help(env, procedures)?; - let module = builder.build(); + let (builder, code_section_bytes, _) = build_module_help(env, procedures)?; + let mut module = builder.build(); + replace_code_section(&mut module, code_section_bytes); + module .into_bytes() .map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) }) @@ -51,8 +57,8 @@ pub fn build_module<'a>( pub fn build_module_help<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, -) -> Result<(builder::ModuleBuilder, u32), String> { - let mut backend = WasmBackend::new(env); +) -> Result<(builder::ModuleBuilder, std::vec::Vec, u32), String> { + let mut backend = WasmBackend::new(env, procedures.len()); let mut layout_ids = LayoutIds::default(); // Sort procedures by occurrence order @@ -85,6 +91,8 @@ pub fn build_module_help<'a>( } } + finalize_code_section(&mut backend.code_section_bytes); + // Because of the sorting above, we know the last function in the `for` is the main function. // Here we grab its index and return it, so that the test_wrapper is able to call it. // This is a workaround until we implement object files with symbols and relocations. @@ -106,23 +114,30 @@ pub fn build_module_help<'a>( let stack_pointer_global = builder::global() .with_type(parity_wasm::elements::ValueType::I32) .mutable() - .init_expr(parity_wasm::elements::Instruction::I32Const( - (MIN_MEMORY_SIZE_KB * 1024) as i32, - )) + .init_expr(Instruction::I32Const((MIN_MEMORY_SIZE_KB * 1024) as i32)) .build(); backend.module_builder.push_global(stack_pointer_global); - Ok((backend.module_builder, main_function_index)) + Ok(( + backend.module_builder, + backend.code_section_bytes, + main_function_index, + )) } -fn encode_alignment(bytes: u32) -> Align { - match bytes { - 1 => Align::Bytes1, - 2 => Align::Bytes2, - 4 => Align::Bytes4, - 8 => Align::Bytes8, - _ => panic!("{:?}-byte alignment is not supported", bytes), +/// Replace parity-wasm's code section with our own handmade one +pub fn replace_code_section(module: &mut Module, code_section_bytes: std::vec::Vec) { + let sections = module.sections_mut(); + let mut code_section_index = usize::MAX; + for (i, s) in sections.iter().enumerate() { + if let Section::Code(_) = s { + code_section_index = i; + } } + sections[code_section_index] = Section::Unparsed { + id: CODE_SECTION_ID, + payload: code_section_bytes, + }; } pub struct CopyMemoryConfig { @@ -139,7 +154,7 @@ pub fn copy_memory(code_builder: &mut CodeBuilder, config: CopyMemoryConfig) { return; } - let alignment = encode_alignment(config.alignment_bytes); + let alignment = Align::from(config.alignment_bytes); let mut i = 0; while config.size - i >= 8 { code_builder.get_local(config.to_ptr); @@ -196,7 +211,7 @@ pub fn encode_u32<'a>(buffer: &mut [u8], mut value: u32) -> usize { /// /// All integers in Wasm are variable-length encoded, which saves space for small values. /// The most significant bit indicates "more bytes are coming", and the other 7 are payload. -pub fn encode_u64<'a>(buffer: &mut Vec<'a, u8>, mut value: u64) -> usize { +pub fn encode_u64<'a>(buffer: &mut [u8], mut value: u64) -> usize { let mut count = 0; while value >= 0x80 { buffer[count] = 0x80 | ((value & 0x7f) as u8); @@ -206,3 +221,19 @@ pub fn encode_u64<'a>(buffer: &mut Vec<'a, u8>, mut value: u64) -> usize { buffer[count] = value as u8; count + 1 } + +/// Write a u32 value as LEB-128 encoded bytes, but padded to maximum byte length (5) +/// +/// Sometimes we want a number to have fixed length, so we can update it later (e.g. relocations) +/// without moving all the following bytes. For those cases we pad it to maximum length. +/// For example, 3 is encoded as 0x83 0x80 0x80 0x80 0x00. +/// +/// https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections +pub fn encode_u32_padded<'a>(buffer: &mut [u8], mut value: u32) -> usize { + for i in 0..4 { + buffer[i] = 0x80 | ((value & 0x7f) as u8); + value >>= 7; + } + buffer[4] = value as u8; + 5 +} diff --git a/compiler/gen_wasm/tests/helpers/eval.rs b/compiler/gen_wasm/tests/helpers/eval.rs index 45d21e8962..615aa4034b 100644 --- a/compiler/gen_wasm/tests/helpers/eval.rs +++ b/compiler/gen_wasm/tests/helpers/eval.rs @@ -102,14 +102,21 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( exposed_to_host, }; - let (mut builder, main_function_index) = + let (mut builder, mut code_section_bytes, main_function_index) = roc_gen_wasm::build_module_help(&env, procedures).unwrap(); - T::insert_test_wrapper(&mut builder, TEST_WRAPPER_NAME, main_function_index); + + T::insert_test_wrapper( + arena, + &mut builder, + &mut code_section_bytes, + TEST_WRAPPER_NAME, + main_function_index, + ); let module_bytes = builder.build().into_bytes().unwrap(); // for debugging (e.g. with wasm2wat) - if false { + if true { use std::io::Write; let mut hash_state = DefaultHasher::new(); diff --git a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs index 81a1d62827..af8176cdbd 100644 --- a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs +++ b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs @@ -1,18 +1,22 @@ use parity_wasm::builder; -use parity_wasm::builder::ModuleBuilder; +use parity_wasm::elements::Internal; -use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; +use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; +use roc_gen_wasm::{encode_u32_padded, LocalId}; use roc_std::{RocDec, RocList, RocOrder, RocStr}; pub trait Wasm32TestResult { - fn insert_test_wrapper( - module_builder: &mut ModuleBuilder, - code_builder: &mut CodeBuilder, + fn insert_test_wrapper<'a>( + arena: &'a bumpalo::Bump, + module_builder: &mut builder::ModuleBuilder, + code_section_bytes: &mut std::vec::Vec, wrapper_name: &str, main_function_index: u32, ) { - let signature = builder::signature().with_result(ValueType::I32).build_sig(); + let signature = builder::signature() + .with_result(parity_wasm::elements::ValueType::I32) + .build_sig(); // parity-wasm FunctionDefinition with no instructions let empty_fn_def = builder::function().with_signature(signature).build(); @@ -23,7 +27,11 @@ pub trait Wasm32TestResult { .build(); module_builder.push_export(export); - self.build_wrapper_body(code_builder, main_function_index); + let mut code_builder = CodeBuilder::new(arena); + Self::build_wrapper_body(&mut code_builder, main_function_index); + + code_builder.serialize(code_section_bytes).unwrap(); + finalize_code_section(&mut code_section_bytes); } fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32); @@ -31,10 +39,7 @@ pub trait Wasm32TestResult { macro_rules! build_wrapper_body_primitive { ($store_instruction: ident, $align: expr) => { - fn build_wrapper_body( - code_builder: &mut FunctionBuilder, - main_function_index: u32, - ) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { let frame_pointer_id = LocalId(0); let frame_pointer = Some(frame_pointer_id); let local_types = &[ValueType::I32]; @@ -62,7 +67,7 @@ fn build_wrapper_body_stack_memory( code_builder: &mut CodeBuilder, main_function_index: u32, size: usize, -) -> Vec { +) { let local_id = LocalId(0); let local_types = &[ValueType::I32]; let frame_pointer = Some(local_id); @@ -70,16 +75,13 @@ fn build_wrapper_body_stack_memory( code_builder.get_local(local_id); code_builder.call(main_function_index, 0, true); code_builder.get_local(local_id); - code_builder.finalize(local_types, size, frame_pointer); + code_builder.finalize(local_types, size as i32, frame_pointer); } macro_rules! wasm_test_result_stack_memory { ($type_name: ident) => { impl Wasm32TestResult for $type_name { - fn build_wrapper_body( - code_builder: &mut FunctionBuilder, - main_function_index: u32, - ) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( code_builder, main_function_index, @@ -111,8 +113,8 @@ wasm_test_result_stack_memory!(RocDec); wasm_test_result_stack_memory!(RocStr); impl Wasm32TestResult for RocList { - fn build_wrapper_body(main_function_index: u32) -> Vec { - build_wrapper_body_stack_memory(main_function_index, 12) + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { + build_wrapper_body_stack_memory(code_builder, main_function_index, 12) } } @@ -124,8 +126,8 @@ impl Wasm32TestResult for [T; N] where T: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { - build_wrapper_body_stack_memory(main_function_index, N * T::ACTUAL_WIDTH) + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { + build_wrapper_body_stack_memory(code_builder, main_function_index, N * T::ACTUAL_WIDTH) } } @@ -134,8 +136,12 @@ where T: Wasm32TestResult + FromWasm32Memory, U: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { - build_wrapper_body_stack_memory(main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH) + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { + build_wrapper_body_stack_memory( + code_builder, + main_function_index, + T::ACTUAL_WIDTH + U::ACTUAL_WIDTH, + ) } } @@ -145,8 +151,9 @@ where U: Wasm32TestResult + FromWasm32Memory, V: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH + V::ACTUAL_WIDTH, ) @@ -160,8 +167,9 @@ where V: Wasm32TestResult + FromWasm32Memory, W: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH + V::ACTUAL_WIDTH + W::ACTUAL_WIDTH, ) @@ -176,8 +184,9 @@ where W: Wasm32TestResult + FromWasm32Memory, X: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH + V::ACTUAL_WIDTH + W::ACTUAL_WIDTH + X::ACTUAL_WIDTH, ) @@ -193,8 +202,9 @@ where X: Wasm32TestResult + FromWasm32Memory, Y: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH @@ -216,8 +226,9 @@ where Y: Wasm32TestResult + FromWasm32Memory, Z: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH @@ -241,8 +252,9 @@ where Z: Wasm32TestResult + FromWasm32Memory, A: Wasm32TestResult + FromWasm32Memory, { - fn build_wrapper_body(main_function_index: u32) -> Vec { + fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32) { build_wrapper_body_stack_memory( + code_builder, main_function_index, T::ACTUAL_WIDTH + U::ACTUAL_WIDTH