From c29e8bc452c87f27f57c5d7e31dec2fc6a6eb356 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Sun, 31 Oct 2021 08:53:06 +0000 Subject: [PATCH 01/20] comment formatting --- compiler/gen_wasm/src/module_builder.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index a0b085343b..1480a85182 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -248,11 +248,9 @@ impl Serialize for LinkingInitFunc { } } -//---------------- -// +//------------------------------------------------ // Common data -// -//---------------- +//------------------------------------------------ #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -291,11 +289,9 @@ impl<'a> Serialize for LinkingComdat<'a> { } } -//---------------- -// +//------------------------------------------------ // Symbol table -// -//---------------- +//------------------------------------------------ /// Indicating that this is a weak symbol. When /// linking multiple modules defining the same symbol, all weak definitions are @@ -442,11 +438,9 @@ impl Serialize for SymInfo { } } -//-------------------------------- -// +//---------------------------------------------------------------- // Linking subsections -// -//-------------------------------- +//---------------------------------------------------------------- pub enum LinkingSubSection<'a> { /// Extra metadata about the data segments. @@ -482,6 +476,10 @@ impl<'a> Serialize for LinkingSubSection<'a> { } } +//---------------------------------------------------------------- +// Linking metadata section +//---------------------------------------------------------------- + const LINKING_VERSION: u8 = 2; pub struct LinkingSection<'a> { From 6ab00d164cf2157b506e456fdad4332e213ec3ab Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Sun, 31 Oct 2021 13:29:09 +0000 Subject: [PATCH 02/20] Create WasmModule struct --- compiler/gen_wasm/src/backend.rs | 8 +- compiler/gen_wasm/src/lib.rs | 17 +-- compiler/gen_wasm/src/module_builder.rs | 142 +++++++++++++++++++++++- 3 files changed, 145 insertions(+), 22 deletions(-) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index 963d0a5585..54ba8d8491 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -10,7 +10,7 @@ use roc_mono::layout::{Builtin, Layout}; use crate::code_builder::{BlockType, CodeBuilder, ValueType}; use crate::layout::WasmLayout; -use crate::module_builder::RelocationEntry; +use crate::module_builder::WasmModule; use crate::serialize::SerialBuffer; use crate::storage::{Storage, StoredValue, StoredValueKind}; use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; @@ -26,9 +26,9 @@ pub struct WasmBackend<'a> { env: &'a Env<'a>, // Module-level data + pub module: WasmModule<'a>, pub module_builder: ModuleBuilder, pub code_section_bytes: std::vec::Vec, - pub code_relocations: Vec<'a, RelocationEntry>, _data_offset_map: MutMap, u32>, _data_offset_next: u32, proc_symbols: &'a [Symbol], @@ -54,12 +54,12 @@ impl<'a> WasmBackend<'a> { env, // Module-level data + module: WasmModule::new(env.arena), module_builder: builder::module(), code_section_bytes, _data_offset_map: MutMap::default(), _data_offset_next: UNUSED_DATA_SECTION_BYTES, proc_symbols, - code_relocations: Vec::with_capacity_in(256, env.arena), // Function-level data block_depth: 0, @@ -142,7 +142,7 @@ impl<'a> WasmBackend<'a> { ); let relocs = self.code_builder.serialize(&mut self.code_section_bytes); - self.code_relocations.extend(relocs); + self.module.reloc_code.entries.extend(relocs); Ok(()) } diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 1ac84e25b5..30f647a148 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -18,9 +18,7 @@ use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; use crate::code_builder::{Align, CodeBuilder, ValueType}; -use crate::module_builder::{ - LinkingSection, LinkingSubSection, RelocationSection, SectionId, SymInfo, -}; +use crate::module_builder::{LinkingSection, LinkingSubSection, SectionId, SymInfo}; use crate::serialize::{SerialBuffer, Serialize}; const PTR_SIZE: u32 = 4; @@ -105,19 +103,10 @@ pub fn build_module_help<'a>( payload: linking_section_bytes, }); - // We always output the code section at the same index relative to other sections, and we need that for relocations. - // TODO: If there's a data section, this will be 6 so we'll need logic for that - // TODO: Build a cleaner solution after we replace parity-wasm with our own module_builder const CODE_SECTION_INDEX: u32 = 5; - - let code_reloc_section = RelocationSection { - name: "reloc.CODE", - target_section_index: CODE_SECTION_INDEX, - entries: &backend.code_relocations, - }; - + backend.module.reloc_code.target_section_index = Some(CODE_SECTION_INDEX); let mut code_reloc_section_bytes = std::vec::Vec::with_capacity(256); - code_reloc_section.serialize(&mut code_reloc_section_bytes); + backend.module.reloc_code.serialize(&mut code_reloc_section_bytes); // Must come after linking section backend.module_builder = backend.module_builder.with_section(Section::Unparsed { diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 1480a85182..0c4c5480ad 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -1,8 +1,15 @@ use bumpalo::collections::vec::Vec; +use bumpalo::Bump; use crate::code_builder::Align; use crate::serialize::{SerialBuffer, Serialize}; +/******************************************************************* + * + * Helper functions + * + *******************************************************************/ + #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum SectionId { @@ -204,15 +211,25 @@ impl Serialize for RelocationEntry { pub struct RelocationSection<'a> { pub name: &'a str, /// The *index* (not ID!) of the target section in the module - pub target_section_index: u32, - pub entries: &'a Vec<'a, RelocationEntry>, + pub target_section_index: Option, + pub entries: Vec<'a, RelocationEntry>, +} + +impl<'a> RelocationSection<'a> { + fn new(arena: &'a Bump, name: &'a str) -> Self { + RelocationSection { + name, + target_section_index: None, + entries: Vec::with_capacity_in(64, arena), + } + } } impl<'a> Serialize for RelocationSection<'a> { fn serialize(&self, buffer: &mut T) { let header_indices = write_custom_section_header(buffer, self.name); - buffer.encode_u32(self.target_section_index); - serialize_vector_with_count(buffer, self.entries); + buffer.encode_u32(self.target_section_index.unwrap()); + serialize_vector_with_count(buffer, &self.entries); update_section_size(buffer, header_indices); } } @@ -485,6 +502,15 @@ const LINKING_VERSION: u8 = 2; pub struct LinkingSection<'a> { pub subsections: Vec<'a, LinkingSubSection<'a>>, } + +impl<'a> LinkingSection<'a> { + fn new(arena: &'a Bump) -> Self { + LinkingSection { + subsections: Vec::with_capacity_in(1, arena), + } + } +} + impl<'a> Serialize for LinkingSection<'a> { fn serialize(&self, buffer: &mut T) { let header_indices = write_custom_section_header(buffer, "linking"); @@ -495,3 +521,111 @@ impl<'a> Serialize for LinkingSection<'a> { update_section_size(buffer, header_indices); } } + +/******************************************************************* + * + * Module + * + * https://webassembly.github.io/spec/core/binary/modules.html + * + *******************************************************************/ + +const WASM_VERSION: u32 = 1; + +pub struct WasmModule<'a> { + pub types: &'a str, // TODO + pub import: &'a str, // TODO + pub function: &'a str, // TODO + pub table: &'a str, // TODO + pub memory: &'a str, // TODO + pub global: &'a str, // TODO + pub export: &'a str, // TODO + pub start: &'a str, // TODO + pub element: &'a str, // TODO + pub data_count: &'a str, // TODO + pub code: &'a str, // TODO + pub data: &'a str, // TODO + pub linking: LinkingSection<'a>, + pub reloc_code: RelocationSection<'a>, + pub reloc_data: RelocationSection<'a>, +} + +fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { + if size > *prev_size { + *index += 1; + *prev_size = size; + } +} + +impl<'a> WasmModule<'a> { + pub fn new(arena: &'a Bump) -> Self { + WasmModule { + types: "", + import: "", + function: "", + table: "", + memory: "", + global: "", + export: "", + start: "", + element: "", + data_count: "", + code: "", + data: "", + linking: LinkingSection::new(arena), + reloc_code: RelocationSection::new(arena, "reloc.CODE"), + reloc_data: RelocationSection::new(arena, "reloc.DATA"), + } + } + + #[allow(dead_code)] + fn serialize(&mut self, buffer: &mut T) { + buffer.append_byte(0); + buffer.append_slice("asm".as_bytes()); + buffer.write_unencoded_u32(WASM_VERSION); + + let mut index: u32 = 0; + let mut prev_size = buffer.size(); + + self.types.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.import.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.function.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.table.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.memory.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.global.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.export.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.start.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.element.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.data_count.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.code.serialize(buffer); + self.reloc_code.target_section_index = Some(index); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + self.data.serialize(buffer); + self.reloc_data.target_section_index = Some(index); + + self.linking.serialize(buffer); + self.reloc_code.serialize(buffer); + self.reloc_data.serialize(buffer); + } +} From 7ad452d78a359dfdf0636d4d2d204bda6ef3e554 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 08:34:34 +0000 Subject: [PATCH 03/20] Types section --- compiler/gen_wasm/src/code_builder.rs | 40 +--- compiler/gen_wasm/src/module_builder.rs | 276 +++++++++++++++++++++++- compiler/gen_wasm/src/serialize.rs | 19 ++ compiler/gen_wasm/tests/helpers/eval.rs | 2 +- 4 files changed, 293 insertions(+), 44 deletions(-) diff --git a/compiler/gen_wasm/src/code_builder.rs b/compiler/gen_wasm/src/code_builder.rs index fe9319172f..efc25828dd 100644 --- a/compiler/gen_wasm/src/code_builder.rs +++ b/compiler/gen_wasm/src/code_builder.rs @@ -7,7 +7,7 @@ use roc_module::symbol::Symbol; use crate::module_builder::{IndexRelocType, RelocationEntry}; use crate::opcodes::*; -use crate::serialize::SerialBuffer; +use crate::serialize::{SerialBuffer, Serialize}; use crate::{round_up_to_alignment, LocalId, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID}; /// Wasm value type. (Rust representation matches Wasm encoding) @@ -300,43 +300,7 @@ impl<'a> CodeBuilder<'a> { /// Generate bytes to declare the function's local variables fn build_local_declarations(&mut self, local_types: &[ValueType]) { - // reserve one byte for num_batches - self.preamble.push(0); - - if local_types.is_empty() { - return; - } - - // Write declarations in batches of the same ValueType - let mut num_batches: u32 = 0; - let mut batch_type = local_types[0]; - let mut batch_size = 0; - for t in local_types { - if *t == batch_type { - batch_size += 1; - } else { - self.preamble.encode_u32(batch_size); - self.preamble.push(batch_type as u8); - batch_type = *t; - batch_size = 1; - num_batches += 1; - } - } - self.preamble.encode_u32(batch_size); - self.preamble.push(batch_type as u8); - num_batches += 1; - - // Go back and write the number of batches at the start - if num_batches < 128 { - self.preamble[0] = num_batches as u8; - } else { - // We need more than 1 byte to encode num_batches! - // This is a ridiculous edge case, so just pad to 5 bytes for simplicity - let old_len = self.preamble.len(); - self.preamble.resize(old_len + 4, 0); - self.preamble.copy_within(1..old_len, 5); - self.preamble.overwrite_padded_u32(0, num_batches); - } + local_types.serialize(&mut self.preamble); } /// Generate instruction bytes to grab a frame of stack memory on entering the function diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 0c4c5480ad..a98eed83d8 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -1,7 +1,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use crate::code_builder::Align; +use crate::code_builder::{Align, ValueType}; use crate::serialize::{SerialBuffer, Serialize}; /******************************************************************* @@ -34,7 +34,7 @@ struct SectionHeaderIndices { } /// Write a section header, returning the position of the encoded length -fn _write_section_header(buffer: &mut T, id: SectionId) -> SectionHeaderIndices { +fn write_section_header(buffer: &mut T, id: SectionId) -> SectionHeaderIndices { buffer.append_byte(id as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); @@ -49,7 +49,7 @@ fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { - // buffer.append_byte(SectionId::Custom as u8); // TODO: uncomment when we get rid of parity_wasm + buffer.append_byte(SectionId::Custom as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); name.serialize(buffer); @@ -76,6 +76,262 @@ where } } +/******************************************************************* + * + * Type section (function signature definitions) + * + *******************************************************************/ + +impl<'a> Serialize for [ValueType] { + fn serialize(&self, buffer: &mut T) { + // reserve one byte for num_batches + let start = buffer.size(); + buffer.append_byte(0); // mut + + if self.is_empty() { + return; + } + + // Write declarations in batches of the same ValueType + let mut num_batches: u32 = 0; + let mut batch_type = self[0]; + let mut batch_size = 0; + for t in self { + if *t == batch_type { + batch_size += 1; + } else { + buffer.encode_u32(batch_size); + buffer.append_byte(batch_type as u8); + batch_type = *t; + batch_size = 1; + num_batches += 1; + } + } + buffer.encode_u32(batch_size); + buffer.append_byte(batch_type as u8); + num_batches += 1; + + // Go back and write the number of batches at the start + if num_batches < 128 { + buffer.set_byte(start, num_batches as u8); + } else { + // We need more than 1 byte to encode num_batches! + // This is a ridiculous edge case, so just pad to 5 bytes for simplicity + buffer.insert_space_at(1, 4); + buffer.overwrite_padded_u32(0, num_batches); + } + } +} + +struct Signature<'a> { + param_types: Vec<'a, ValueType>, + ret_type: Option, +} + +impl<'a> Serialize for Signature<'a> { + fn serialize(&self, buffer: &mut T) { + buffer.append_byte(0x60); + self.param_types.serialize(buffer); + match self.ret_type { + Some(t) => [t].serialize(buffer), + None => buffer.append_byte(0), // vector of length zero + } + } +} + +pub struct TypeSection<'a> { + signatures: Vec<'a, Signature<'a>>, +} + +impl<'a> TypeSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + TypeSection { + signatures: Vec::with_capacity_in(8, arena), + } + } +} + +impl<'a> Serialize for TypeSection<'a> { + fn serialize(&self, buffer: &mut T) { + let header_indices = write_section_header(buffer, SectionId::Type); + serialize_vector_with_count(buffer, self.signatures); + update_section_size(buffer, header_indices); + } +} + +/******************************************************************* + * + * Import section + * + *******************************************************************/ + +pub struct ImportSection<'a> { + todo: &'a str, +} + +impl<'a> ImportSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + ImportSection { todo: "" } + } +} + +impl<'a> Serialize for ImportSection<'a> { + fn serialize(&self, _buffer: &mut T) {} +} + +/******************************************************************* + * + * Function section (map function index to signature index) + * + *******************************************************************/ + +pub struct FunctionSection<'a> { + pub signature_indices: Vec<'a, u32>, +} + +impl<'a> FunctionSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + FunctionSection { + signature_indices: Vec::with_capacity_in(8, arena), + } + } +} + +impl<'a> Serialize for FunctionSection<'a> { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/******************************************************************* + * + * Memory section + * + *******************************************************************/ + +enum Limits { + Min(u32), + MinMax(u32, u32), +} + +pub struct MemorySection { + /// number of 64kB pages + num_pages: Limits, +} + +impl MemorySection { + const PAGE_SIZE_KB: u32 = 64; + + pub fn new(kb: u32) -> Self { + MemorySection { + num_pages: Limits::Min(kb / Self::PAGE_SIZE_KB), + } + } +} + +impl Serialize for MemorySection { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/******************************************************************* + * + * Global section + * + *******************************************************************/ + +enum InitValue { + I32(i32), + I64(i64), + F32(f32), + F64(f64), +} + +struct Global { + init_value: InitValue, + is_mutable: bool, +} + +impl Serialize for Global { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +pub struct GlobalSection<'a>(Vec<'a, Global>); + +impl<'a> GlobalSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + GlobalSection(Vec::with_capacity_in(1, arena)) + } +} + +impl<'a> Serialize for GlobalSection<'a> { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/******************************************************************* + * + * Export section + * + *******************************************************************/ + +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum ExportType { + Func = 0, + Table = 1, + Mem = 2, + Global = 3, +} + +struct Export { + name: String, + ty: ExportType, + index: u32, +} + +pub struct ExportSection<'a> { + todo: &'a str, +} + +impl<'a> ExportSection<'a> { + pub fn new(arena: &'a Bump) {} +} + +impl<'a> Serialize for ExportSection<'a> { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/******************************************************************* + * + * Code section + * + *******************************************************************/ + +pub struct CodeSection<'a> { + bytes: Vec<'a, u8>, +} + +impl<'a> CodeSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + CodeSection { + bytes: Vec::with_capacity_in(4096, arena), + } + } +} + +impl<'a> Serialize for CodeSection<'a> { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + /******************************************************************* * * Relocation sections @@ -248,6 +504,7 @@ pub struct LinkingSegment { pub alignment: Align, pub flags: u32, } + impl Serialize for LinkingSegment { fn serialize(&self, _buffer: &mut T) { todo!(); @@ -259,6 +516,7 @@ pub struct LinkingInitFunc { pub priority: u32, pub symbol_index: u32, // index in the symbol table, not the function index } + impl Serialize for LinkingInitFunc { fn serialize(&self, _buffer: &mut T) { todo!(); @@ -284,6 +542,7 @@ pub struct ComdatSym { pub kind: ComdatSymKind, pub index: u32, } + impl Serialize for ComdatSym { fn serialize(&self, _buffer: &mut T) { todo!(); @@ -300,6 +559,7 @@ pub struct LinkingComdat<'a> { flags: u32, syms: Vec<'a, ComdatSym>, } + impl<'a> Serialize for LinkingComdat<'a> { fn serialize(&self, _buffer: &mut T) { todo!(); @@ -352,6 +612,7 @@ pub enum WasmObjectSymbol { Defined { index: u32, name: String }, Imported { index: u32 }, } + impl Serialize for WasmObjectSymbol { fn serialize(&self, buffer: &mut T) { match self { @@ -378,6 +639,7 @@ pub enum DataSymbol { name: String, }, } + impl Serialize for DataSymbol { fn serialize(&self, buffer: &mut T) { match self { @@ -418,6 +680,7 @@ pub struct SymInfo { flags: u32, info: SymInfoFields, } + impl SymInfo { pub fn for_function(wasm_function_index: u32, name: String) -> Self { let linking_symbol = WasmObjectSymbol::Defined { @@ -470,6 +733,7 @@ pub enum LinkingSubSection<'a> { /// Specifies extra information about the symbols present in the module. SymbolTable(Vec<'a, SymInfo>), } + impl<'a> Serialize for LinkingSubSection<'a> { fn serialize(&self, buffer: &mut T) { buffer.append_byte(match self { @@ -530,7 +794,7 @@ impl<'a> Serialize for LinkingSection<'a> { * *******************************************************************/ -const WASM_VERSION: u32 = 1; +type UnusedSection<'a> = &'a str; pub struct WasmModule<'a> { pub types: &'a str, // TODO @@ -558,6 +822,8 @@ fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) } impl<'a> WasmModule<'a> { + const WASM_VERSION: u32 = 1; + pub fn new(arena: &'a Bump) -> Self { WasmModule { types: "", @@ -582,7 +848,7 @@ impl<'a> WasmModule<'a> { fn serialize(&mut self, buffer: &mut T) { buffer.append_byte(0); buffer.append_slice("asm".as_bytes()); - buffer.write_unencoded_u32(WASM_VERSION); + buffer.write_unencoded_u32(Self::WASM_VERSION); let mut index: u32 = 0; let mut prev_size = buffer.size(); diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index 15e0fab285..084d8131bc 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -71,6 +71,7 @@ macro_rules! encode_padded_sleb128 { pub trait SerialBuffer { fn append_byte(&mut self, b: u8); + fn set_byte(&mut self, index: usize, b: u8); fn append_slice(&mut self, b: &[u8]); fn size(&self) -> usize; @@ -79,6 +80,8 @@ pub trait SerialBuffer { encode_sleb128!(encode_i32, i32); encode_sleb128!(encode_i64, i64); + /// Inserts extra entries at the given index by copying the following entries to higher indices + fn insert_space_at(&mut self, index: usize, size: usize); fn reserve_padded_u32(&mut self) -> usize; fn encode_padded_u32(&mut self, value: u32) -> usize; fn overwrite_padded_u32(&mut self, index: usize, value: u32); @@ -122,12 +125,20 @@ impl SerialBuffer for std::vec::Vec { fn append_byte(&mut self, b: u8) { self.push(b); } + fn set_byte(&mut self, index: usize, b: u8) { + self[index] = b; + } fn append_slice(&mut self, b: &[u8]) { self.extend_from_slice(b); } fn size(&self) -> usize { self.len() } + fn insert_space_at(&mut self, index: usize, size: usize) { + let old_len = self.len(); + self.resize(old_len + size, 0); + self.copy_within(index..old_len, index + size); + } fn reserve_padded_u32(&mut self) -> usize { let index = self.len(); self.resize(index + 5, 0xff); @@ -149,12 +160,20 @@ impl<'a> SerialBuffer for Vec<'a, u8> { fn append_byte(&mut self, b: u8) { self.push(b); } + fn set_byte(&mut self, index: usize, b: u8) { + self[index] = b; + } fn append_slice(&mut self, b: &[u8]) { self.extend_from_slice(b); } fn size(&self) -> usize { self.len() } + fn insert_space_at(&mut self, index: usize, size: usize) { + let old_len = self.len(); + self.resize(old_len + size, 0); + self.copy_within(index..old_len, index + size); + } fn reserve_padded_u32(&mut self) -> usize { let index = self.len(); self.resize(index + 5, 0xff); diff --git a/compiler/gen_wasm/tests/helpers/eval.rs b/compiler/gen_wasm/tests/helpers/eval.rs index 0ee845f592..c8ac350858 100644 --- a/compiler/gen_wasm/tests/helpers/eval.rs +++ b/compiler/gen_wasm/tests/helpers/eval.rs @@ -120,7 +120,7 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( let module_bytes = parity_module.into_bytes().unwrap(); // for debugging (e.g. with wasm2wat or wasm-objdump) - if false { + if true { use std::io::Write; let mut hash_state = DefaultHasher::new(); From d25b12328d177a0fbb179b49ce6debf99d36fe6f Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 08:42:02 +0000 Subject: [PATCH 04/20] Rename some SerialBuffer methods --- compiler/gen_wasm/src/module_builder.rs | 28 ++++++++++++------------- compiler/gen_wasm/src/serialize.rs | 28 +++++++++++++------------ 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index a98eed83d8..9a1a573c69 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -35,7 +35,7 @@ struct SectionHeaderIndices { /// Write a section header, returning the position of the encoded length fn write_section_header(buffer: &mut T, id: SectionId) -> SectionHeaderIndices { - buffer.append_byte(id as u8); + buffer.append_u8(id as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); SectionHeaderIndices { @@ -49,7 +49,7 @@ fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { - buffer.append_byte(SectionId::Custom as u8); + buffer.append_u8(SectionId::Custom as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); name.serialize(buffer); @@ -86,7 +86,7 @@ impl<'a> Serialize for [ValueType] { fn serialize(&self, buffer: &mut T) { // reserve one byte for num_batches let start = buffer.size(); - buffer.append_byte(0); // mut + buffer.append_u8(0); if self.is_empty() { return; @@ -101,19 +101,19 @@ impl<'a> Serialize for [ValueType] { batch_size += 1; } else { buffer.encode_u32(batch_size); - buffer.append_byte(batch_type as u8); + buffer.append_u8(batch_type as u8); batch_type = *t; batch_size = 1; num_batches += 1; } } buffer.encode_u32(batch_size); - buffer.append_byte(batch_type as u8); + buffer.append_u8(batch_type as u8); num_batches += 1; // Go back and write the number of batches at the start if num_batches < 128 { - buffer.set_byte(start, num_batches as u8); + buffer.overwrite_u8(start, num_batches as u8); } else { // We need more than 1 byte to encode num_batches! // This is a ridiculous edge case, so just pad to 5 bytes for simplicity @@ -130,11 +130,11 @@ struct Signature<'a> { impl<'a> Serialize for Signature<'a> { fn serialize(&self, buffer: &mut T) { - buffer.append_byte(0x60); + buffer.append_u8(0x60); self.param_types.serialize(buffer); match self.ret_type { Some(t) => [t].serialize(buffer), - None => buffer.append_byte(0), // vector of length zero + None => buffer.append_u8(0), // vector of length zero } } } @@ -444,7 +444,7 @@ impl Serialize for RelocationEntry { offset, symbol_index, } => { - buffer.append_byte(*type_id as u8); + buffer.append_u8(*type_id as u8); buffer.encode_u32(*offset); buffer.encode_u32(*symbol_index); } @@ -454,7 +454,7 @@ impl Serialize for RelocationEntry { symbol_index, addend, } => { - buffer.append_byte(*type_id as u8); + buffer.append_u8(*type_id as u8); buffer.encode_u32(*offset); buffer.encode_u32(*symbol_index); buffer.encode_i32(*addend); @@ -696,7 +696,7 @@ impl SymInfo { impl Serialize for SymInfo { fn serialize(&self, buffer: &mut T) { - buffer.append_byte(match self.info { + buffer.append_u8(match self.info { SymInfoFields::Function(_) => 0, SymInfoFields::Data(_) => 1, SymInfoFields::Global(_) => 2, @@ -736,7 +736,7 @@ pub enum LinkingSubSection<'a> { impl<'a> Serialize for LinkingSubSection<'a> { fn serialize(&self, buffer: &mut T) { - buffer.append_byte(match self { + buffer.append_u8(match self { Self::SegmentInfo(_) => 5, Self::InitFuncs(_) => 6, Self::ComdatInfo(_) => 7, @@ -778,7 +778,7 @@ impl<'a> LinkingSection<'a> { impl<'a> Serialize for LinkingSection<'a> { fn serialize(&self, buffer: &mut T) { let header_indices = write_custom_section_header(buffer, "linking"); - buffer.append_byte(LINKING_VERSION); + buffer.append_u8(LINKING_VERSION); for subsection in self.subsections.iter() { subsection.serialize(buffer); } @@ -846,7 +846,7 @@ impl<'a> WasmModule<'a> { #[allow(dead_code)] fn serialize(&mut self, buffer: &mut T) { - buffer.append_byte(0); + buffer.append_u8(0); buffer.append_slice("asm".as_bytes()); buffer.write_unencoded_u32(Self::WASM_VERSION); diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index 084d8131bc..a261256a4a 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -10,10 +10,10 @@ macro_rules! encode_uleb128 { let mut x = value; let start_len = self.size(); while x >= 0x80 { - self.append_byte(0x80 | ((x & 0x7f) as u8)); + self.append_u8(0x80 | ((x & 0x7f) as u8)); x >>= 7; } - self.append_byte(x as u8); + self.append_u8(x as u8); self.size() - start_len } }; @@ -30,10 +30,10 @@ macro_rules! encode_sleb128 { x >>= 7; let byte_is_negative = (byte & 0x40) != 0; if ((x == 0 && !byte_is_negative) || (x == -1 && byte_is_negative)) { - self.append_byte(byte); + self.append_u8(byte); break; } - self.append_byte(byte | 0x80); + self.append_u8(byte | 0x80); } self.size() - start_len } @@ -47,7 +47,7 @@ macro_rules! write_unencoded { let mut x = value; let size = std::mem::size_of::<$ty>(); for _ in 0..size { - self.append_byte((x & 0xff) as u8); + self.append_u8((x & 0xff) as u8); x >>= 8; } } @@ -61,18 +61,19 @@ macro_rules! encode_padded_sleb128 { let mut x = value; let size = (std::mem::size_of::<$ty>() / 4) * 5; for _ in 0..(size - 1) { - self.append_byte(0x80 | (x & 0x7f) as u8); + self.append_u8(0x80 | (x & 0x7f) as u8); x >>= 7; } - self.append_byte((x & 0x7f) as u8); + self.append_u8((x & 0x7f) as u8); } }; } pub trait SerialBuffer { - fn append_byte(&mut self, b: u8); - fn set_byte(&mut self, index: usize, b: u8); + fn append_u8(&mut self, b: u8); + fn overwrite_u8(&mut self, index: usize, b: u8); fn append_slice(&mut self, b: &[u8]); + fn size(&self) -> usize; encode_uleb128!(encode_u32, u32); @@ -82,6 +83,7 @@ pub trait SerialBuffer { /// Inserts extra entries at the given index by copying the following entries to higher indices fn insert_space_at(&mut self, index: usize, size: usize); + fn reserve_padded_u32(&mut self) -> usize; fn encode_padded_u32(&mut self, value: u32) -> usize; fn overwrite_padded_u32(&mut self, index: usize, value: u32); @@ -122,10 +124,10 @@ fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) { } impl SerialBuffer for std::vec::Vec { - fn append_byte(&mut self, b: u8) { + fn append_u8(&mut self, b: u8) { self.push(b); } - fn set_byte(&mut self, index: usize, b: u8) { + fn overwrite_u8(&mut self, index: usize, b: u8) { self[index] = b; } fn append_slice(&mut self, b: &[u8]) { @@ -157,10 +159,10 @@ impl SerialBuffer for std::vec::Vec { } impl<'a> SerialBuffer for Vec<'a, u8> { - fn append_byte(&mut self, b: u8) { + fn append_u8(&mut self, b: u8) { self.push(b); } - fn set_byte(&mut self, index: usize, b: u8) { + fn overwrite_u8(&mut self, index: usize, b: u8) { self[index] = b; } fn append_slice(&mut self, b: &[u8]) { From eb9705e2e99e9449e5cf408bdb7b12f3a1380b76 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 09:48:29 +0000 Subject: [PATCH 05/20] import section --- compiler/gen_wasm/src/module_builder.rs | 140 +++++++++++++++++++----- 1 file changed, 110 insertions(+), 30 deletions(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 9a1a573c69..cfa2de5f05 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -6,7 +6,7 @@ use crate::serialize::{SerialBuffer, Serialize}; /******************************************************************* * - * Helper functions + * Helpers * *******************************************************************/ @@ -28,6 +28,15 @@ pub enum SectionId { DataCount = 12, } +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum ImportExportType { + Func = 0, + Table = 1, + Mem = 2, + Global = 3, +} + struct SectionHeaderIndices { size_index: usize, body_index: usize, @@ -65,7 +74,7 @@ fn update_section_size(buffer: &mut T, header_indices: SectionH buffer.overwrite_padded_u32(header_indices.size_index, size as u32); } -fn serialize_vector_with_count<'a, SB, S>(buffer: &mut SB, items: &Vec<'a, S>) +fn serialize_vector_with_count<'a, SB, S>(buffer: &mut SB, items: &[S]) where SB: SerialBuffer, S: Serialize, @@ -154,7 +163,7 @@ impl<'a> TypeSection<'a> { impl<'a> Serialize for TypeSection<'a> { fn serialize(&self, buffer: &mut T) { let header_indices = write_section_header(buffer, SectionId::Type); - serialize_vector_with_count(buffer, self.signatures); + serialize_vector_with_count(buffer, &self.signatures); update_section_size(buffer, header_indices); } } @@ -165,18 +174,80 @@ impl<'a> Serialize for TypeSection<'a> { * *******************************************************************/ -pub struct ImportSection<'a> { - todo: &'a str, +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum RefType { + Func = 0x70, + Extern = 0x6f, } +struct TableType { + ref_type: RefType, + limits: Limits, +} + +impl Serialize for TableType { + fn serialize(&self, buffer: &mut T) { + buffer.append_u8(self.ref_type as u8); + self.limits.serialize(buffer); + } +} + +enum ImportDesc { + Func { signature_index: u32 }, + Table { ty: TableType }, + Mem { limits: Limits }, + Global { ty: GlobalType }, +} + +struct Import { + module: String, + name: String, + description: ImportDesc, +} + +impl Serialize for Import { + fn serialize(&self, buffer: &mut T) { + self.module.serialize(buffer); + self.name.serialize(buffer); + match &self.description { + ImportDesc::Func { signature_index } => { + buffer.append_u8(0); + buffer.encode_u32(*signature_index); + } + ImportDesc::Table { ty } => { + buffer.append_u8(1); + ty.serialize(buffer); + } + ImportDesc::Mem { limits } => { + buffer.append_u8(2); + limits.serialize(buffer); + } + ImportDesc::Global { ty } => { + buffer.append_u8(3); + ty.serialize(buffer); + } + } + } +} + +pub struct ImportSection<'a>(Vec<'a, Import>); + impl<'a> ImportSection<'a> { pub fn new(arena: &'a Bump) -> Self { - ImportSection { todo: "" } + ImportSection(bumpalo::vec![in arena]) } } impl<'a> Serialize for ImportSection<'a> { - fn serialize(&self, _buffer: &mut T) {} + fn serialize(&self, buffer: &mut T) { + if self.0.is_empty() { + return; + } + let header_indices = write_section_header(buffer, SectionId::Import); + serialize_vector_with_count(buffer, &self.0); + update_section_size(buffer, header_indices); + } } /******************************************************************* @@ -198,7 +269,7 @@ impl<'a> FunctionSection<'a> { } impl<'a> Serialize for FunctionSection<'a> { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -214,6 +285,12 @@ enum Limits { MinMax(u32, u32), } +impl Serialize for Limits { + fn serialize(&self, buffer: &mut T) { + todo!(); + } +} + pub struct MemorySection { /// number of 64kB pages num_pages: Limits, @@ -230,7 +307,7 @@ impl MemorySection { } impl Serialize for MemorySection { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -241,7 +318,19 @@ impl Serialize for MemorySection { * *******************************************************************/ -enum InitValue { +struct GlobalType { + value_type: ValueType, + is_mutable: bool, +} + +impl Serialize for GlobalType { + fn serialize(&self, buffer: &mut T) { + buffer.append_u8(self.value_type as u8); + buffer.append_u8(self.is_mutable as u8); + } +} + +enum GlobalInitValue { I32(i32), I64(i64), F32(f32), @@ -249,12 +338,12 @@ enum InitValue { } struct Global { - init_value: InitValue, - is_mutable: bool, + ty: GlobalType, + init_value: GlobalInitValue, } impl Serialize for Global { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -268,7 +357,7 @@ impl<'a> GlobalSection<'a> { } impl<'a> Serialize for GlobalSection<'a> { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -279,18 +368,9 @@ impl<'a> Serialize for GlobalSection<'a> { * *******************************************************************/ -#[repr(u8)] -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum ExportType { - Func = 0, - Table = 1, - Mem = 2, - Global = 3, -} - struct Export { name: String, - ty: ExportType, + ty: ImportExportType, index: u32, } @@ -303,7 +383,7 @@ impl<'a> ExportSection<'a> { } impl<'a> Serialize for ExportSection<'a> { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -327,7 +407,7 @@ impl<'a> CodeSection<'a> { } impl<'a> Serialize for CodeSection<'a> { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -506,7 +586,7 @@ pub struct LinkingSegment { } impl Serialize for LinkingSegment { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -518,7 +598,7 @@ pub struct LinkingInitFunc { } impl Serialize for LinkingInitFunc { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -544,7 +624,7 @@ pub struct ComdatSym { } impl Serialize for ComdatSym { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } @@ -561,7 +641,7 @@ pub struct LinkingComdat<'a> { } impl<'a> Serialize for LinkingComdat<'a> { - fn serialize(&self, _buffer: &mut T) { + fn serialize(&self, buffer: &mut T) { todo!(); } } From e05ef73f6c72475305211e70a9158aabe3ff3ae8 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 09:54:09 +0000 Subject: [PATCH 06/20] function section --- compiler/gen_wasm/src/module_builder.rs | 4 +++- compiler/gen_wasm/src/serialize.rs | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index cfa2de5f05..8051298300 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -270,7 +270,9 @@ impl<'a> FunctionSection<'a> { impl<'a> Serialize for FunctionSection<'a> { fn serialize(&self, buffer: &mut T) { - todo!(); + let header_indices = write_section_header(buffer, SectionId::Function); + serialize_vector_with_count(buffer, &self.signature_indices); + update_section_size(buffer, header_indices); } } diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index a261256a4a..2e8777b1b8 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -114,6 +114,12 @@ impl Serialize for str { } } +impl Serialize for u32 { + fn serialize(&self, buffer: &mut T) { + buffer.encode_u32(*self); + } +} + fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) { let mut x = value; for byte in buffer.iter_mut().take(4) { From 2a0dbb39fb53b38912c62ddef8a2960769bebdda Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 12:30:44 +0000 Subject: [PATCH 07/20] Memory and Global sections --- compiler/gen_wasm/src/module_builder.rs | 58 ++++++++++++++++++++----- compiler/gen_wasm/src/serialize.rs | 24 ++++++++++ 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 8051298300..5b892d4466 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -2,6 +2,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; use crate::code_builder::{Align, ValueType}; +use crate::opcodes; use crate::serialize::{SerialBuffer, Serialize}; /******************************************************************* @@ -58,7 +59,7 @@ fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { - buffer.append_u8(SectionId::Custom as u8); + // buffer.append_u8(SectionId::Custom as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); name.serialize(buffer); @@ -289,28 +290,43 @@ enum Limits { impl Serialize for Limits { fn serialize(&self, buffer: &mut T) { - todo!(); + match self { + Self::Min(min) => { + buffer.append_u8(0); + buffer.encode_u32(*min); + } + Self::MinMax(min, max) => { + buffer.append_u8(1); + buffer.encode_u32(*min); + buffer.encode_u32(*max); + } + } } } -pub struct MemorySection { - /// number of 64kB pages - num_pages: Limits, -} +pub struct MemorySection(Option); impl MemorySection { - const PAGE_SIZE_KB: u32 = 64; + pub const PAGE_SIZE: u32 = 64 * 1024; - pub fn new(kb: u32) -> Self { - MemorySection { - num_pages: Limits::Min(kb / Self::PAGE_SIZE_KB), + pub fn new(bytes: u32) -> Self { + if bytes == 0 { + MemorySection(None) + } else { + let pages = (bytes + Self::PAGE_SIZE - 1) / Self::PAGE_SIZE; + MemorySection(Some(Limits::Min(pages))) } } } impl Serialize for MemorySection { fn serialize(&self, buffer: &mut T) { - todo!(); + if let Some(limits) = &self.0 { + let header_indices = write_section_header(buffer, SectionId::Memory); + buffer.append_u8(1); + limits.serialize(buffer); + update_section_size(buffer, header_indices); + } } } @@ -346,7 +362,25 @@ struct Global { impl Serialize for Global { fn serialize(&self, buffer: &mut T) { - todo!(); + self.ty.serialize(buffer); + match self.init_value { + GlobalInitValue::I32(x) => { + buffer.append_u8(opcodes::I32CONST); + buffer.encode_i32(x); + } + GlobalInitValue::I64(x) => { + buffer.append_u8(opcodes::I64CONST); + buffer.encode_i64(x); + } + GlobalInitValue::F32(x) => { + buffer.append_u8(opcodes::F32CONST); + buffer.encode_f32(x); + } + GlobalInitValue::F64(x) => { + buffer.append_u8(opcodes::F64CONST); + buffer.encode_f64(x); + } + } } } diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index 2e8777b1b8..f4430332e8 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -120,6 +120,30 @@ impl Serialize for u32 { } } +impl Serialize for [S] { + fn serialize(&self, buffer: &mut T) { + buffer.encode_u32(self.len() as u32); + for item in self.iter() { + item.serialize(buffer); + } + } +} + +impl Serialize for Option { + /// serialize Option as a vector of length 1 or 0 + fn serialize(&self, buffer: &mut T) { + match self { + Some(x) => { + buffer.append_u8(1); + x.serialize(buffer); + } + None => { + buffer.append_u8(0); + } + } + } +} + fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) { let mut x = value; for byte in buffer.iter_mut().take(4) { From 4f464e485a416894783e65bd2e79ecf5b692f3ef Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Mon, 1 Nov 2021 14:06:22 +0000 Subject: [PATCH 08/20] Export and code sections --- compiler/gen_wasm/src/module_builder.rs | 156 +++++++++++++----------- 1 file changed, 82 insertions(+), 74 deletions(-) diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 5b892d4466..1aa2f6c9b7 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -29,15 +29,6 @@ pub enum SectionId { DataCount = 12, } -#[repr(u8)] -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum ImportExportType { - Func = 0, - Table = 1, - Mem = 2, - Global = 3, -} - struct SectionHeaderIndices { size_index: usize, body_index: usize, @@ -75,20 +66,23 @@ fn update_section_size(buffer: &mut T, header_indices: SectionH buffer.overwrite_padded_u32(header_indices.size_index, size as u32); } -fn serialize_vector_with_count<'a, SB, S>(buffer: &mut SB, items: &[S]) -where - SB: SerialBuffer, - S: Serialize, -{ - buffer.encode_u32(items.len() as u32); - for item in items.iter() { - item.serialize(buffer); +/// Serialize a section that is just a vector of some struct +fn serialize_vector_section( + buffer: &mut B, + section_id: SectionId, + subsections: &[T], +) { + if !subsections.is_empty() { + let header_indices = write_section_header(buffer, section_id); + subsections.serialize(buffer); + update_section_size(buffer, header_indices); } } /******************************************************************* * - * Type section (function signature definitions) + * Type section + * Deduplicated list of function type signatures * *******************************************************************/ @@ -163,9 +157,7 @@ impl<'a> TypeSection<'a> { impl<'a> Serialize for TypeSection<'a> { fn serialize(&self, buffer: &mut T) { - let header_indices = write_section_header(buffer, SectionId::Type); - serialize_vector_with_count(buffer, &self.signatures); - update_section_size(buffer, header_indices); + serialize_vector_section(buffer, SectionId::Type, &self.signatures); } } @@ -242,18 +234,14 @@ impl<'a> ImportSection<'a> { impl<'a> Serialize for ImportSection<'a> { fn serialize(&self, buffer: &mut T) { - if self.0.is_empty() { - return; - } - let header_indices = write_section_header(buffer, SectionId::Import); - serialize_vector_with_count(buffer, &self.0); - update_section_size(buffer, header_indices); + serialize_vector_section(buffer, SectionId::Import, &self.0); } } /******************************************************************* * - * Function section (map function index to signature index) + * Function section + * Maps function indices (Code section) to signature indices (Type section) * *******************************************************************/ @@ -271,9 +259,7 @@ impl<'a> FunctionSection<'a> { impl<'a> Serialize for FunctionSection<'a> { fn serialize(&self, buffer: &mut T) { - let header_indices = write_section_header(buffer, SectionId::Function); - serialize_vector_with_count(buffer, &self.signature_indices); - update_section_size(buffer, header_indices); + serialize_vector_section(buffer, SectionId::Function, &self.signature_indices); } } @@ -394,7 +380,7 @@ impl<'a> GlobalSection<'a> { impl<'a> Serialize for GlobalSection<'a> { fn serialize(&self, buffer: &mut T) { - todo!(); + serialize_vector_section(buffer, SectionId::Global, &self.0); } } @@ -404,29 +390,45 @@ impl<'a> Serialize for GlobalSection<'a> { * *******************************************************************/ +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +enum ExportType { + Func = 0, + Table = 1, + Mem = 2, + Global = 3, +} + struct Export { name: String, - ty: ImportExportType, + ty: ExportType, index: u32, } - -pub struct ExportSection<'a> { - todo: &'a str, +impl Serialize for Export { + fn serialize(&self, buffer: &mut T) { + self.name.serialize(buffer); + buffer.append_u8(self.ty as u8); + buffer.encode_u32(self.index); + } } +pub struct ExportSection<'a>(Vec<'a, Export>); + impl<'a> ExportSection<'a> { - pub fn new(arena: &'a Bump) {} + pub fn new(arena: &'a Bump) -> Self { + ExportSection(bumpalo::vec![in arena]) + } } impl<'a> Serialize for ExportSection<'a> { fn serialize(&self, buffer: &mut T) { - todo!(); + serialize_vector_section(buffer, SectionId::Export, &self.0); } } /******************************************************************* * - * Code section + * Code section (see also code_builder.rs) * *******************************************************************/ @@ -444,7 +446,13 @@ impl<'a> CodeSection<'a> { impl<'a> Serialize for CodeSection<'a> { fn serialize(&self, buffer: &mut T) { - todo!(); + buffer.append_u8(SectionId::Code as u8); + + // TODO + // We've copied each function into self.bytes, now we're copying again. + // Can eliminate one of those copies by refactoring to a vector of CodeBuilders + + buffer.append_slice(&self.bytes); } } @@ -601,7 +609,7 @@ impl<'a> Serialize for RelocationSection<'a> { fn serialize(&self, buffer: &mut T) { let header_indices = write_custom_section_header(buffer, self.name); buffer.encode_u32(self.target_section_index.unwrap()); - serialize_vector_with_count(buffer, &self.entries); + self.entries.serialize(buffer); update_section_size(buffer, header_indices); } } @@ -622,7 +630,7 @@ pub struct LinkingSegment { } impl Serialize for LinkingSegment { - fn serialize(&self, buffer: &mut T) { + fn serialize(&self, _buffer: &mut T) { todo!(); } } @@ -634,7 +642,7 @@ pub struct LinkingInitFunc { } impl Serialize for LinkingInitFunc { - fn serialize(&self, buffer: &mut T) { + fn serialize(&self, _buffer: &mut T) { todo!(); } } @@ -660,7 +668,7 @@ pub struct ComdatSym { } impl Serialize for ComdatSym { - fn serialize(&self, buffer: &mut T) { + fn serialize(&self, _buffer: &mut T) { todo!(); } } @@ -677,7 +685,7 @@ pub struct LinkingComdat<'a> { } impl<'a> Serialize for LinkingComdat<'a> { - fn serialize(&self, buffer: &mut T) { + fn serialize(&self, _buffer: &mut T) { todo!(); } } @@ -861,10 +869,10 @@ impl<'a> Serialize for LinkingSubSection<'a> { let payload_len_index = buffer.reserve_padded_u32(); let payload_start_index = buffer.size(); match self { - Self::SegmentInfo(items) => serialize_vector_with_count(buffer, items), - Self::InitFuncs(items) => serialize_vector_with_count(buffer, items), - Self::ComdatInfo(items) => serialize_vector_with_count(buffer, items), - Self::SymbolTable(items) => serialize_vector_with_count(buffer, items), + Self::SegmentInfo(items) => items.serialize(buffer), + Self::InitFuncs(items) => items.serialize(buffer), + Self::ComdatInfo(items) => items.serialize(buffer), + Self::SymbolTable(items) => items.serialize(buffer), } buffer.overwrite_padded_u32( payload_len_index, @@ -910,21 +918,21 @@ impl<'a> Serialize for LinkingSection<'a> { * *******************************************************************/ -type UnusedSection<'a> = &'a str; +type RocUnusedSection<'a> = &'a str; pub struct WasmModule<'a> { - pub types: &'a str, // TODO - pub import: &'a str, // TODO - pub function: &'a str, // TODO - pub table: &'a str, // TODO - pub memory: &'a str, // TODO - pub global: &'a str, // TODO - pub export: &'a str, // TODO - pub start: &'a str, // TODO - pub element: &'a str, // TODO - pub data_count: &'a str, // TODO - pub code: &'a str, // TODO - pub data: &'a str, // TODO + pub types: TypeSection<'a>, + pub import: ImportSection<'a>, + pub function: FunctionSection<'a>, + pub table: RocUnusedSection<'a>, + pub memory: MemorySection, + pub global: GlobalSection<'a>, + pub export: ExportSection<'a>, + pub start: RocUnusedSection<'a>, + pub element: RocUnusedSection<'a>, + pub data_count: RocUnusedSection<'a>, + pub code: CodeSection<'a>, + pub data: RocUnusedSection<'a>, pub linking: LinkingSection<'a>, pub reloc_code: RelocationSection<'a>, pub reloc_data: RelocationSection<'a>, @@ -942,18 +950,18 @@ impl<'a> WasmModule<'a> { pub fn new(arena: &'a Bump) -> Self { WasmModule { - types: "", - import: "", - function: "", - table: "", - memory: "", - global: "", - export: "", - start: "", - element: "", - data_count: "", - code: "", - data: "", + types: TypeSection::new(arena), + import: ImportSection::new(arena), + function: FunctionSection::new(arena), + table: RocUnusedSection::default(), + memory: MemorySection::new(1024 * 1024), + global: GlobalSection::new(arena), + export: ExportSection::new(arena), + start: RocUnusedSection::default(), + element: RocUnusedSection::default(), + data_count: RocUnusedSection::default(), + code: CodeSection::new(arena), + data: RocUnusedSection::default(), linking: LinkingSection::new(arena), reloc_code: RelocationSection::new(arena, "reloc.CODE"), reloc_data: RelocationSection::new(arena, "reloc.DATA"), From edadd4ce13ec926333da1ac39aafa01454448898 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 11:48:31 +0000 Subject: [PATCH 09/20] Temporary setup to gradually migrate sections from parity_wasm to our own module_builder Previously we just had our own code section. Now we also have export section, and a way of switching them over one by one, running tests in between. --- compiler/gen_wasm/src/backend.rs | 23 ++- compiler/gen_wasm/src/lib.rs | 160 +++++++++++++----- compiler/gen_wasm/src/module_builder.rs | 73 +++++--- compiler/gen_wasm/src/serialize.rs | 9 +- compiler/gen_wasm/tests/helpers/eval.rs | 13 +- .../tests/helpers/wasm32_test_result.rs | 26 +-- 6 files changed, 203 insertions(+), 101 deletions(-) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index 54ba8d8491..647b728fc6 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -27,11 +27,10 @@ pub struct WasmBackend<'a> { // Module-level data pub module: WasmModule<'a>, - pub module_builder: ModuleBuilder, - pub code_section_bytes: std::vec::Vec, + pub parity_builder: ModuleBuilder, _data_offset_map: MutMap, u32>, _data_offset_next: u32, - proc_symbols: &'a [Symbol], + proc_symbols: Vec<'a, Symbol>, // Function-level data code_builder: CodeBuilder<'a>, @@ -43,20 +42,20 @@ pub struct WasmBackend<'a> { } impl<'a> WasmBackend<'a> { - pub fn new(env: &'a Env<'a>, proc_symbols: &'a [Symbol]) -> Self { - let mut code_section_bytes = std::vec::Vec::with_capacity(4096); + pub fn new(env: &'a Env<'a>, proc_symbols: Vec<'a, Symbol>) -> Self { + let mut module = WasmModule::new(env.arena); // Code section header - code_section_bytes.reserve_padded_u32(); // byte length, to be written at the end - code_section_bytes.encode_padded_u32(proc_symbols.len() as u32); // modified later in unit tests + module.code.bytes.reserve_padded_u32(); // byte length, to be written at the end + let num_procs = proc_symbols.len() as u32; + module.code.bytes.encode_padded_u32(num_procs); // modified later in unit tests WasmBackend { env, // Module-level data - module: WasmModule::new(env.arena), - module_builder: builder::module(), - code_section_bytes, + module, + parity_builder: builder::module(), _data_offset_map: MutMap::default(), _data_offset_next: UNUSED_DATA_SECTION_BYTES, proc_symbols, @@ -88,7 +87,7 @@ impl<'a> WasmBackend<'a> { // Use parity-wasm to add the signature in "types" and "functions" sections // but no instructions, since we are building our own code section let empty_function_def = self.start_proc(&proc); - let location = self.module_builder.push_function(empty_function_def); + let location = self.parity_builder.push_function(empty_function_def); let function_index = location.body; self.build_stmt(&proc.body, &proc.ret_layout)?; @@ -141,7 +140,7 @@ impl<'a> WasmBackend<'a> { self.storage.stack_frame_pointer, ); - let relocs = self.code_builder.serialize(&mut self.code_section_bytes); + let relocs = self.code_builder.serialize(&mut self.module.code.bytes); self.module.reloc_code.entries.extend(relocs); Ok(()) } diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 30f647a148..afdd19abbc 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -8,9 +8,9 @@ pub mod serialize; mod storage; use bumpalo::{self, collections::Vec, Bump}; -use parity_wasm::builder; +use parity_wasm::builder::{self, ModuleBuilder}; -use parity_wasm::elements::{Instruction, Internal, Module, Section}; +use parity_wasm::elements::{Instruction, Section, Serialize as ParitySerialize}; use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::{Interns, Symbol}; use roc_mono::ir::{Proc, ProcLayout}; @@ -18,7 +18,9 @@ use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; use crate::code_builder::{Align, CodeBuilder, ValueType}; -use crate::module_builder::{LinkingSection, LinkingSubSection, SectionId, SymInfo}; +use crate::module_builder::{ + Export, ExportType, LinkingSection, LinkingSubSection, SectionId, SymInfo, WasmModule, +}; use crate::serialize::{SerialBuffer, Serialize}; const PTR_SIZE: u32 = 4; @@ -44,50 +46,51 @@ pub fn build_module<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, ) -> Result, String> { - let (builder, code_section_bytes) = build_module_help(env, procedures)?; - let mut module = builder.build(); - replace_code_section(&mut module, code_section_bytes); - - module - .into_bytes() - .map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) }) + let (parity_builder, mut wasm_module) = build_module_help(env, procedures)?; + let mut buffer = std::vec::Vec::with_capacity(4096); + combine_and_serialize(&mut buffer, parity_builder, &mut wasm_module); + Ok(buffer) } pub fn build_module_help<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, -) -> Result<(builder::ModuleBuilder, std::vec::Vec), String> { +) -> Result<(ModuleBuilder, WasmModule<'a>), String> { let proc_symbols = Vec::from_iter_in(procedures.keys().map(|(sym, _)| *sym), env.arena); - let mut backend = WasmBackend::new(env, &proc_symbols); + let mut backend = WasmBackend::new(env, proc_symbols); + let mut layout_ids = LayoutIds::default(); let mut symbol_table_entries = Vec::with_capacity_in(procedures.len(), env.arena); + + for (i, ((sym, layout), proc)) in procedures.into_iter().enumerate() { let proc_name = layout_ids .get(proc.name, &proc.ret_layout) .to_symbol_string(proc.name, &env.interns); symbol_table_entries.push(SymInfo::for_function(i as u32, proc_name)); - let function_index = backend.build_proc(proc, sym)?; + backend.build_proc(proc, sym)?; if env.exposed_to_host.contains(&sym) { let fn_name = layout_ids .get_toplevel(sym, &layout) .to_symbol_string(sym, &env.interns); - let export = builder::export() - .field(fn_name.as_str()) - .with_internal(Internal::Function(function_index)) - .build(); - - backend.module_builder.push_export(export); + backend.module.export.entries.push(Export { + name: fn_name, + ty: ExportType::Func, + index: i as u32, + }); } } // Update code section length - let inner_length = (backend.code_section_bytes.len() - 5) as u32; + let inner_length = (backend.module.code.bytes.len() - 5) as u32; backend - .code_section_bytes + .module + .code + .bytes .overwrite_padded_u32(0, inner_length); // linking metadata section @@ -98,7 +101,7 @@ pub fn build_module_help<'a>( ], }; linking_section.serialize(&mut linking_section_bytes); - backend.module_builder = backend.module_builder.with_section(Section::Unparsed { + backend.parity_builder = backend.parity_builder.with_section(Section::Unparsed { id: SectionId::Custom as u8, payload: linking_section_bytes, }); @@ -106,10 +109,13 @@ pub fn build_module_help<'a>( const CODE_SECTION_INDEX: u32 = 5; backend.module.reloc_code.target_section_index = Some(CODE_SECTION_INDEX); let mut code_reloc_section_bytes = std::vec::Vec::with_capacity(256); - backend.module.reloc_code.serialize(&mut code_reloc_section_bytes); + backend + .module + .reloc_code + .serialize(&mut code_reloc_section_bytes); // Must come after linking section - backend.module_builder = backend.module_builder.with_section(Section::Unparsed { + backend.parity_builder = backend.parity_builder.with_section(Section::Unparsed { id: SectionId::Custom as u8, payload: code_reloc_section_bytes, }); @@ -120,36 +126,106 @@ pub fn build_module_help<'a>( let memory = builder::MemoryBuilder::new() .with_min(MIN_MEMORY_SIZE_KB / PAGE_SIZE_KB) .build(); - backend.module_builder.push_memory(memory); - let memory_export = builder::export() - .field("memory") - .with_internal(Internal::Memory(0)) - .build(); - backend.module_builder.push_export(memory_export); + backend.parity_builder.push_memory(memory); + + backend.module.export.entries.push(Export { + name: "memory".to_string(), + ty: ExportType::Mem, + index: 0, + }); let stack_pointer_global = builder::global() .with_type(parity_wasm::elements::ValueType::I32) .mutable() .init_expr(Instruction::I32Const((MIN_MEMORY_SIZE_KB * 1024) as i32)) .build(); - backend.module_builder.push_global(stack_pointer_global); + backend.parity_builder.push_global(stack_pointer_global); - Ok((backend.module_builder, backend.code_section_bytes)) + Ok((backend.parity_builder, backend.module)) +} + +fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { + if size > *prev_size { + *index += 1; + *prev_size = size; + } +} + +macro_rules! serialize_parity { + ($buffer: expr, $sections: expr, $lambda: expr) => { + $sections + .remove($sections.iter().position($lambda).unwrap()) + .serialize($buffer) + .unwrap(); + }; } /// Replace parity-wasm's code section with our own handmade one -pub fn replace_code_section(module: &mut Module, code_section_bytes: std::vec::Vec) { - let sections = module.sections_mut(); +pub fn combine_and_serialize<'a>( + buffer: &mut std::vec::Vec, + parity_builder: ModuleBuilder, + wasm_module: &mut WasmModule<'a>, // backend: &mut WasmBackend<'a> +) { + buffer.append_u8(0); + buffer.append_slice("asm".as_bytes()); + buffer.write_unencoded_u32(WasmModule::WASM_VERSION); - let code_section_index = sections - .iter() - .position(|s| matches!(s, Section::Code(_))) - .unwrap(); + let mut index: u32 = 0; + let mut prev_size = buffer.size(); - sections[code_section_index] = Section::Unparsed { - id: SectionId::Code as u8, - payload: code_section_bytes, - }; + let mut parity_module = parity_builder.build(); + let sections = parity_module.sections_mut(); + + // wasm_module.types.serialize(buffer); + serialize_parity!(buffer, sections, |s| matches!(s, Section::Type(_))); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.import.serialize(buffer); + // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.function.serialize(buffer); + serialize_parity!(buffer, sections, |s| matches!(s, Section::Function(_))); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.table.serialize(buffer); + // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.memory.serialize(buffer); + serialize_parity!(buffer, sections, |s| matches!(s, Section::Memory(_))); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.global.serialize(buffer); + serialize_parity!(buffer, sections, |s| matches!(s, Section::Global(_))); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.export.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.start.serialize(buffer); + // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.element.serialize(buffer); + // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + // wasm_module.data_count.serialize(buffer); + // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.code.serialize(buffer); + wasm_module.reloc_code.target_section_index = Some(index); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.data.serialize(buffer); + wasm_module.reloc_data.target_section_index = Some(index); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.linking.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.reloc_code.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + + wasm_module.reloc_data.serialize(buffer); + maybe_increment_section(buffer.size(), &mut prev_size, &mut index); } pub struct CopyMemoryConfig { diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 1aa2f6c9b7..f93276203d 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -50,7 +50,7 @@ fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { - // buffer.append_u8(SectionId::Custom as u8); + buffer.append_u8(SectionId::Custom as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); name.serialize(buffer); @@ -392,17 +392,17 @@ impl<'a> Serialize for GlobalSection<'a> { #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum ExportType { +pub enum ExportType { Func = 0, Table = 1, Mem = 2, Global = 3, } -struct Export { - name: String, - ty: ExportType, - index: u32, +pub struct Export { + pub name: String, + pub ty: ExportType, + pub index: u32, } impl Serialize for Export { fn serialize(&self, buffer: &mut T) { @@ -412,17 +412,21 @@ impl Serialize for Export { } } -pub struct ExportSection<'a>(Vec<'a, Export>); +pub struct ExportSection<'a> { + pub entries: Vec<'a, Export>, +} impl<'a> ExportSection<'a> { pub fn new(arena: &'a Bump) -> Self { - ExportSection(bumpalo::vec![in arena]) + ExportSection { + entries: bumpalo::vec![in arena], + } } } impl<'a> Serialize for ExportSection<'a> { fn serialize(&self, buffer: &mut T) { - serialize_vector_section(buffer, SectionId::Export, &self.0); + serialize_vector_section(buffer, SectionId::Export, &self.entries); } } @@ -432,8 +436,9 @@ impl<'a> Serialize for ExportSection<'a> { * *******************************************************************/ +#[derive(Debug)] pub struct CodeSection<'a> { - bytes: Vec<'a, u8>, + pub bytes: Vec<'a, u8>, } impl<'a> CodeSection<'a> { @@ -607,10 +612,12 @@ impl<'a> RelocationSection<'a> { impl<'a> Serialize for RelocationSection<'a> { fn serialize(&self, buffer: &mut T) { - let header_indices = write_custom_section_header(buffer, self.name); - buffer.encode_u32(self.target_section_index.unwrap()); - self.entries.serialize(buffer); - update_section_size(buffer, header_indices); + if !self.entries.is_empty() { + let header_indices = write_custom_section_header(buffer, self.name); + buffer.encode_u32(self.target_section_index.unwrap()); + self.entries.serialize(buffer); + update_section_size(buffer, header_indices); + } } } @@ -917,22 +924,34 @@ impl<'a> Serialize for LinkingSection<'a> { * https://webassembly.github.io/spec/core/binary/modules.html * *******************************************************************/ - -type RocUnusedSection<'a> = &'a str; +pub struct RocUnusedSection {} +impl Serialize for RocUnusedSection { + fn serialize(&self, _buffer: &mut T) {} +} +impl Default for RocUnusedSection { + fn default() -> Self { + RocUnusedSection {} + } +} pub struct WasmModule<'a> { pub types: TypeSection<'a>, pub import: ImportSection<'a>, pub function: FunctionSection<'a>, - pub table: RocUnusedSection<'a>, + /// Dummy placeholder for tables (used for function pointers and host references) + pub table: (), pub memory: MemorySection, pub global: GlobalSection<'a>, pub export: ExportSection<'a>, - pub start: RocUnusedSection<'a>, - pub element: RocUnusedSection<'a>, - pub data_count: RocUnusedSection<'a>, + /// Dummy placeholder for start function. In Roc, this would be part of the platform. + pub start: (), + /// Dummy placeholder for table elements. Roc does not use tables. + pub element: (), + /// Dummy placeholder for data count section, not yet implemented + pub data_count: (), pub code: CodeSection<'a>, - pub data: RocUnusedSection<'a>, + /// Dummy placeholder for data section, not yet implemented + pub data: (), pub linking: LinkingSection<'a>, pub reloc_code: RelocationSection<'a>, pub reloc_data: RelocationSection<'a>, @@ -946,22 +965,22 @@ fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) } impl<'a> WasmModule<'a> { - const WASM_VERSION: u32 = 1; + pub const WASM_VERSION: u32 = 1; pub fn new(arena: &'a Bump) -> Self { WasmModule { types: TypeSection::new(arena), import: ImportSection::new(arena), function: FunctionSection::new(arena), - table: RocUnusedSection::default(), + table: (), memory: MemorySection::new(1024 * 1024), global: GlobalSection::new(arena), export: ExportSection::new(arena), - start: RocUnusedSection::default(), - element: RocUnusedSection::default(), - data_count: RocUnusedSection::default(), + start: (), + element: (), + data_count: (), code: CodeSection::new(arena), - data: RocUnusedSection::default(), + data: (), linking: LinkingSection::new(arena), reloc_code: RelocationSection::new(arena, "reloc.CODE"), reloc_data: RelocationSection::new(arena, "reloc.DATA"), diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index f4430332e8..2b88c4159c 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -1,3 +1,5 @@ +use std::fmt::Debug; + use bumpalo::collections::vec::Vec; /// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length @@ -69,7 +71,7 @@ macro_rules! encode_padded_sleb128 { }; } -pub trait SerialBuffer { +pub trait SerialBuffer: Debug { fn append_u8(&mut self, b: u8); fn overwrite_u8(&mut self, index: usize, b: u8); fn append_slice(&mut self, b: &[u8]); @@ -120,6 +122,11 @@ impl Serialize for u32 { } } +// Unit is used as a placeholder in parts of the Wasm spec we don't use yet +impl Serialize for () { + fn serialize(&self, _buffer: &mut T) {} +} + impl Serialize for [S] { fn serialize(&self, buffer: &mut T) { buffer.encode_u32(self.len() as u32); diff --git a/compiler/gen_wasm/tests/helpers/eval.rs b/compiler/gen_wasm/tests/helpers/eval.rs index c8ac350858..9aaaa2ba61 100644 --- a/compiler/gen_wasm/tests/helpers/eval.rs +++ b/compiler/gen_wasm/tests/helpers/eval.rs @@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher}; use roc_can::builtins::builtin_defs_map; use roc_collections::all::{MutMap, MutSet}; -use roc_gen_wasm::replace_code_section; +use roc_gen_wasm::combine_and_serialize; // use roc_std::{RocDec, RocList, RocOrder, RocStr}; use crate::helpers::wasm32_test_result::Wasm32TestResult; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; @@ -104,20 +104,19 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( exposed_to_host, }; - let (mut builder, mut code_section_bytes) = + let (mut parity_builder, mut wasm_module) = roc_gen_wasm::build_module_help(&env, procedures).unwrap(); T::insert_test_wrapper( arena, - &mut builder, - &mut code_section_bytes, + &mut parity_builder, + &mut wasm_module, TEST_WRAPPER_NAME, main_fn_index as u32, ); - let mut parity_module = builder.build(); - replace_code_section(&mut parity_module, code_section_bytes); - let module_bytes = parity_module.into_bytes().unwrap(); + let mut module_bytes = std::vec::Vec::with_capacity(4096); + combine_and_serialize(&mut module_bytes, parity_builder, &mut wasm_module); // for debugging (e.g. with wasm2wat or wasm-objdump) if true { diff --git a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs index dfd78b88fb..cdfaecee7a 100644 --- a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs +++ b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs @@ -1,8 +1,8 @@ use parity_wasm::builder; -use parity_wasm::elements::Internal; use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; +use roc_gen_wasm::module_builder::{Export, ExportType, WasmModule}; use roc_gen_wasm::{serialize::SerialBuffer, LocalId}; use roc_std::{RocDec, RocList, RocOrder, RocStr}; @@ -10,7 +10,7 @@ pub trait Wasm32TestResult { fn insert_test_wrapper<'a>( arena: &'a bumpalo::Bump, module_builder: &mut builder::ModuleBuilder, - code_section_bytes: &mut std::vec::Vec, + wasm_module: &mut WasmModule<'a>, wrapper_name: &str, main_function_index: u32, ) { @@ -21,24 +21,26 @@ pub trait Wasm32TestResult { // parity-wasm FunctionDefinition with no instructions let empty_fn_def = builder::function().with_signature(signature).build(); let location = module_builder.push_function(empty_fn_def); - let export = builder::export() - .field(wrapper_name) - .with_internal(Internal::Function(location.body)) - .build(); - module_builder.push_export(export); + + wasm_module.export.entries.push(Export { + name: wrapper_name.to_string(), + ty: ExportType::Func, + index: location.body, + }); + let mut code_builder = CodeBuilder::new(arena); Self::build_wrapper_body(&mut code_builder, main_function_index); - code_builder.serialize(code_section_bytes); + code_builder.serialize(&mut wasm_module.code.bytes); let mut num_procs = 0; - for (i, byte) in code_section_bytes[5..10].iter().enumerate() { + for (i, byte) in wasm_module.code.bytes[5..10].iter().enumerate() { num_procs += ((byte & 0x7f) as u32) << (i * 7); } - let inner_length = (code_section_bytes.len() - 5) as u32; - code_section_bytes.overwrite_padded_u32(0, inner_length); - code_section_bytes.overwrite_padded_u32(5, num_procs + 1); + let inner_length = (wasm_module.code.bytes.len() - 5) as u32; + wasm_module.code.bytes.overwrite_padded_u32(0, inner_length); + wasm_module.code.bytes.overwrite_padded_u32(5, num_procs + 1); } fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32); From e720873c05ca0e3396492a85e866ba92a1d38ff4 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 13:32:54 +0000 Subject: [PATCH 10/20] Migrate to own Global and Memory sections --- compiler/gen_wasm/src/lib.rs | 39 ++++++++---------- compiler/gen_wasm/src/module_builder.rs | 53 ++++++++++++++----------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index afdd19abbc..e950f1de92 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -8,9 +8,9 @@ pub mod serialize; mod storage; use bumpalo::{self, collections::Vec, Bump}; -use parity_wasm::builder::{self, ModuleBuilder}; +use parity_wasm::builder::{ModuleBuilder}; -use parity_wasm::elements::{Instruction, Section, Serialize as ParitySerialize}; +use parity_wasm::elements::{Section, Serialize as ParitySerialize}; use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::{Interns, Symbol}; use roc_mono::ir::{Proc, ProcLayout}; @@ -19,7 +19,8 @@ use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; use crate::code_builder::{Align, CodeBuilder, ValueType}; use crate::module_builder::{ - Export, ExportType, LinkingSection, LinkingSubSection, SectionId, SymInfo, WasmModule, + Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSection, LinkingSubSection, + SectionId, SymInfo, WasmModule, }; use crate::serialize::{SerialBuffer, Serialize}; @@ -62,8 +63,6 @@ pub fn build_module_help<'a>( let mut layout_ids = LayoutIds::default(); let mut symbol_table_entries = Vec::with_capacity_in(procedures.len(), env.arena); - - for (i, ((sym, layout), proc)) in procedures.into_iter().enumerate() { let proc_name = layout_ids .get(proc.name, &proc.ret_layout) @@ -120,13 +119,7 @@ pub fn build_module_help<'a>( payload: code_reloc_section_bytes, }); - const MIN_MEMORY_SIZE_KB: u32 = 1024; - const PAGE_SIZE_KB: u32 = 64; - - let memory = builder::MemoryBuilder::new() - .with_min(MIN_MEMORY_SIZE_KB / PAGE_SIZE_KB) - .build(); - backend.parity_builder.push_memory(memory); + const MIN_MEMORY_SIZE_KB: i32 = 1024; backend.module.export.entries.push(Export { name: "memory".to_string(), @@ -134,12 +127,14 @@ pub fn build_module_help<'a>( index: 0, }); - let stack_pointer_global = builder::global() - .with_type(parity_wasm::elements::ValueType::I32) - .mutable() - .init_expr(Instruction::I32Const((MIN_MEMORY_SIZE_KB * 1024) as i32)) - .build(); - backend.parity_builder.push_global(stack_pointer_global); + let stack_pointer_global = Global { + ty: GlobalType { + value_type: ValueType::I32, + is_mutable: true, + }, + init_value: GlobalInitValue::I32(MIN_MEMORY_SIZE_KB * 1024), + }; + backend.module.global.entries.push(stack_pointer_global); Ok((backend.parity_builder, backend.module)) } @@ -190,12 +185,12 @@ pub fn combine_and_serialize<'a>( // wasm_module.table.serialize(buffer); // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - // wasm_module.memory.serialize(buffer); - serialize_parity!(buffer, sections, |s| matches!(s, Section::Memory(_))); + wasm_module.memory.serialize(buffer); + // serialize_parity!(buffer, sections, |s| matches!(s, Section::Memory(_))); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - // wasm_module.global.serialize(buffer); - serialize_parity!(buffer, sections, |s| matches!(s, Section::Global(_))); + wasm_module.global.serialize(buffer); + // serialize_parity!(buffer, sections, |s| matches!(s, Section::Global(_))); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); wasm_module.export.serialize(buffer); diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index f93276203d..aef9497592 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -127,7 +127,7 @@ impl<'a> Serialize for [ValueType] { } } -struct Signature<'a> { +pub struct Signature<'a> { param_types: Vec<'a, ValueType>, ret_type: Option, } @@ -169,14 +169,14 @@ impl<'a> Serialize for TypeSection<'a> { #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum RefType { +pub enum RefType { Func = 0x70, Extern = 0x6f, } -struct TableType { - ref_type: RefType, - limits: Limits, +pub struct TableType { + pub ref_type: RefType, + pub limits: Limits, } impl Serialize for TableType { @@ -186,17 +186,17 @@ impl Serialize for TableType { } } -enum ImportDesc { +pub enum ImportDesc { Func { signature_index: u32 }, Table { ty: TableType }, Mem { limits: Limits }, Global { ty: GlobalType }, } -struct Import { - module: String, - name: String, - description: ImportDesc, +pub struct Import { + pub module: String, + pub name: String, + pub description: ImportDesc, } impl Serialize for Import { @@ -224,17 +224,17 @@ impl Serialize for Import { } } -pub struct ImportSection<'a>(Vec<'a, Import>); +pub struct ImportSection<'a> { entries: Vec<'a, Import> } impl<'a> ImportSection<'a> { pub fn new(arena: &'a Bump) -> Self { - ImportSection(bumpalo::vec![in arena]) + ImportSection { entries: bumpalo::vec![in arena] } } } impl<'a> Serialize for ImportSection<'a> { fn serialize(&self, buffer: &mut T) { - serialize_vector_section(buffer, SectionId::Import, &self.0); + serialize_vector_section(buffer, SectionId::Import, &self.entries); } } @@ -269,7 +269,7 @@ impl<'a> Serialize for FunctionSection<'a> { * *******************************************************************/ -enum Limits { +pub enum Limits { Min(u32), MinMax(u32, u32), } @@ -322,9 +322,9 @@ impl Serialize for MemorySection { * *******************************************************************/ -struct GlobalType { - value_type: ValueType, - is_mutable: bool, +pub struct GlobalType { + pub value_type: ValueType, + pub is_mutable: bool, } impl Serialize for GlobalType { @@ -334,16 +334,16 @@ impl Serialize for GlobalType { } } -enum GlobalInitValue { +pub enum GlobalInitValue { I32(i32), I64(i64), F32(f32), F64(f64), } -struct Global { - ty: GlobalType, - init_value: GlobalInitValue, +pub struct Global { + pub ty: GlobalType, + pub init_value: GlobalInitValue, } impl Serialize for Global { @@ -367,20 +367,25 @@ impl Serialize for Global { buffer.encode_f64(x); } } + buffer.append_u8(opcodes::END); } } -pub struct GlobalSection<'a>(Vec<'a, Global>); +pub struct GlobalSection<'a> { + pub entries: Vec<'a, Global>, +} impl<'a> GlobalSection<'a> { pub fn new(arena: &'a Bump) -> Self { - GlobalSection(Vec::with_capacity_in(1, arena)) + GlobalSection { + entries: Vec::with_capacity_in(1, arena), + } } } impl<'a> Serialize for GlobalSection<'a> { fn serialize(&self, buffer: &mut T) { - serialize_vector_section(buffer, SectionId::Global, &self.0); + serialize_vector_section(buffer, SectionId::Global, &self.entries); } } From baf896f6f9a21b1d039f2b1b3940a20ac1850c34 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 13:55:14 +0000 Subject: [PATCH 11/20] Migrate linking and relocation sections --- compiler/gen_wasm/src/lib.rs | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index e950f1de92..7e4d00ef06 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -8,7 +8,7 @@ pub mod serialize; mod storage; use bumpalo::{self, collections::Vec, Bump}; -use parity_wasm::builder::{ModuleBuilder}; +use parity_wasm::builder::ModuleBuilder; use parity_wasm::elements::{Section, Serialize as ParitySerialize}; use roc_collections::all::{MutMap, MutSet}; @@ -92,32 +92,8 @@ pub fn build_module_help<'a>( .bytes .overwrite_padded_u32(0, inner_length); - // linking metadata section - let mut linking_section_bytes = std::vec::Vec::with_capacity(symbol_table_entries.len() * 20); - let linking_section = LinkingSection { - subsections: bumpalo::vec![in env.arena; - LinkingSubSection::SymbolTable(symbol_table_entries) - ], - }; - linking_section.serialize(&mut linking_section_bytes); - backend.parity_builder = backend.parity_builder.with_section(Section::Unparsed { - id: SectionId::Custom as u8, - payload: linking_section_bytes, - }); - - const CODE_SECTION_INDEX: u32 = 5; - backend.module.reloc_code.target_section_index = Some(CODE_SECTION_INDEX); - let mut code_reloc_section_bytes = std::vec::Vec::with_capacity(256); - backend - .module - .reloc_code - .serialize(&mut code_reloc_section_bytes); - - // Must come after linking section - backend.parity_builder = backend.parity_builder.with_section(Section::Unparsed { - id: SectionId::Custom as u8, - payload: code_reloc_section_bytes, - }); + let symbol_table = LinkingSubSection::SymbolTable(symbol_table_entries); + backend.module.linking.subsections.push(symbol_table); const MIN_MEMORY_SIZE_KB: i32 = 1024; From d8c1017aece6c9e3b825ccdbd800d452e54b2e6f Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 14:00:22 +0000 Subject: [PATCH 12/20] refactor stack pointer initialisation --- compiler/gen_wasm/src/lib.rs | 13 +++++-------- compiler/gen_wasm/src/module_builder.rs | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 7e4d00ef06..7d2ffd7ce7 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -19,8 +19,7 @@ use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; use crate::code_builder::{Align, CodeBuilder, ValueType}; use crate::module_builder::{ - Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSection, LinkingSubSection, - SectionId, SymInfo, WasmModule, + Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, SymInfo, WasmModule, }; use crate::serialize::{SerialBuffer, Serialize}; @@ -95,22 +94,20 @@ pub fn build_module_help<'a>( let symbol_table = LinkingSubSection::SymbolTable(symbol_table_entries); backend.module.linking.subsections.push(symbol_table); - const MIN_MEMORY_SIZE_KB: i32 = 1024; - backend.module.export.entries.push(Export { name: "memory".to_string(), ty: ExportType::Mem, index: 0, }); - let stack_pointer_global = Global { + let stack_pointer_init = backend.module.memory.min_size().unwrap() as i32; + backend.module.global.entries.push(Global { ty: GlobalType { value_type: ValueType::I32, is_mutable: true, }, - init_value: GlobalInitValue::I32(MIN_MEMORY_SIZE_KB * 1024), - }; - backend.module.global.entries.push(stack_pointer_global); + init_value: GlobalInitValue::I32(stack_pointer_init), + }); Ok((backend.parity_builder, backend.module)) } diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index aef9497592..9a651cd326 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -224,11 +224,15 @@ impl Serialize for Import { } } -pub struct ImportSection<'a> { entries: Vec<'a, Import> } +pub struct ImportSection<'a> { + entries: Vec<'a, Import>, +} impl<'a> ImportSection<'a> { pub fn new(arena: &'a Bump) -> Self { - ImportSection { entries: bumpalo::vec![in arena] } + ImportSection { + entries: bumpalo::vec![in arena], + } } } @@ -303,6 +307,15 @@ impl MemorySection { MemorySection(Some(Limits::Min(pages))) } } + + pub fn min_size(&self) -> Option { + match self { + MemorySection(Some(Limits::Min(min))) | MemorySection(Some(Limits::MinMax(min, _))) => { + Some(min * Self::PAGE_SIZE) + } + MemorySection(None) => None, + } + } } impl Serialize for MemorySection { From b0aeafc066ed83cd0b12b8a23c3d5e8ce3a941a8 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 20:34:57 +0000 Subject: [PATCH 13/20] Refactor code section to get rid of a copy --- compiler/gen_wasm/src/backend.rs | 19 ++-- compiler/gen_wasm/src/code_builder.rs | 91 +++++++++---------- compiler/gen_wasm/src/lib.rs | 10 +- compiler/gen_wasm/src/module_builder.rs | 31 ++++--- .../tests/helpers/wasm32_test_result.rs | 14 +-- 5 files changed, 71 insertions(+), 94 deletions(-) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index 647b728fc6..c7bfeb0195 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -11,7 +11,6 @@ use roc_mono::layout::{Builtin, Layout}; use crate::code_builder::{BlockType, CodeBuilder, ValueType}; use crate::layout::WasmLayout; use crate::module_builder::WasmModule; -use crate::serialize::SerialBuffer; use crate::storage::{Storage, StoredValue, StoredValueKind}; use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; @@ -43,18 +42,11 @@ pub struct WasmBackend<'a> { impl<'a> WasmBackend<'a> { pub fn new(env: &'a Env<'a>, proc_symbols: Vec<'a, Symbol>) -> Self { - let mut module = WasmModule::new(env.arena); - - // Code section header - module.code.bytes.reserve_padded_u32(); // byte length, to be written at the end - let num_procs = proc_symbols.len() as u32; - module.code.bytes.encode_padded_u32(num_procs); // modified later in unit tests - WasmBackend { env, // Module-level data - module, + module: WasmModule::new(env.arena), parity_builder: builder::module(), _data_offset_map: MutMap::default(), _data_offset_next: UNUSED_DATA_SECTION_BYTES, @@ -68,8 +60,13 @@ impl<'a> WasmBackend<'a> { } } + /// Reset function-level data fn reset(&mut self) { - self.code_builder.clear(); + // Push the completed CodeBuilder into the module and swap it for a new empty one + let mut swap_code_builder = CodeBuilder::new(self.env.arena); + std::mem::swap(&mut swap_code_builder, &mut self.code_builder); + self.module.code.code_builders.push(swap_code_builder); + self.storage.clear(); self.joinpoint_label_map.clear(); assert_eq!(self.block_depth, 0); @@ -140,8 +137,6 @@ impl<'a> WasmBackend<'a> { self.storage.stack_frame_pointer, ); - let relocs = self.code_builder.serialize(&mut self.module.code.bytes); - self.module.reloc_code.entries.extend(relocs); Ok(()) } diff --git a/compiler/gen_wasm/src/code_builder.rs b/compiler/gen_wasm/src/code_builder.rs index efc25828dd..2a540ad439 100644 --- a/compiler/gen_wasm/src/code_builder.rs +++ b/compiler/gen_wasm/src/code_builder.rs @@ -1,4 +1,4 @@ -use bumpalo::collections::vec::{Drain, Vec}; +use bumpalo::collections::vec::Vec; use bumpalo::Bump; use core::panic; use std::fmt::Debug; @@ -92,8 +92,8 @@ pub enum VirtualMachineSymbolState { // An instruction (local.set or local.tee) to be inserted into the function code #[derive(Debug)] -struct InsertLocation { - insert_at: usize, +struct Insertion { + at: usize, start: usize, end: usize, } @@ -124,7 +124,7 @@ pub struct CodeBuilder<'a> { insert_bytes: Vec<'a, u8>, /// Code locations where the insert_bytes should go - insert_locations: Vec<'a, InsertLocation>, + insertions: Vec<'a, Insertion>, /// Bytes for local variable declarations and stack-frame setup code. /// We can't write this until we've finished the main code. But it goes @@ -151,7 +151,7 @@ impl<'a> CodeBuilder<'a> { pub fn new(arena: &'a Bump) -> Self { CodeBuilder { code: Vec::with_capacity_in(1024, arena), - insert_locations: Vec::with_capacity_in(32, arena), + insertions: Vec::with_capacity_in(32, arena), insert_bytes: Vec::with_capacity_in(64, arena), preamble: Vec::with_capacity_in(32, arena), inner_length: Vec::with_capacity_in(5, arena), @@ -160,15 +160,6 @@ impl<'a> CodeBuilder<'a> { } } - pub fn clear(&mut self) { - self.code.clear(); - self.insert_locations.clear(); - self.insert_bytes.clear(); - self.preamble.clear(); - self.inner_length.clear(); - self.vm_stack.clear(); - } - /********************************************************** SYMBOLS @@ -220,8 +211,8 @@ impl<'a> CodeBuilder<'a> { self.insert_bytes.push(opcode); self.insert_bytes.encode_u32(immediate); - self.insert_locations.push(InsertLocation { - insert_at, + self.insertions.push(Insertion { + at: insert_at, start, end: self.insert_bytes.len(), }); @@ -348,52 +339,56 @@ impl<'a> CodeBuilder<'a> { let inner_len = self.preamble.len() + self.code.len() + self.insert_bytes.len(); self.inner_length.encode_u32(inner_len as u32); - } - - /// Write out all the bytes in the right order - pub fn serialize( - &mut self, - code_section_buf: &mut T, - ) -> Drain { - code_section_buf.append_slice(&self.inner_length); - code_section_buf.append_slice(&self.preamble); // Sort insertions. They are not created in order of assignment, but in order of *second* usage. - self.insert_locations.sort_by_key(|loc| loc.insert_at); + self.insertions.sort_by_key(|ins| ins.at); + } + + /// Serialize all byte vectors in the right order + /// Also update relocation offsets relative to the provided base offset in the buffer + pub fn serialize_with_relocs( + &self, + buffer: &mut T, + final_relocs: &mut Vec<'a, RelocationEntry>, + reloc_base_offset: usize, + ) { + buffer.append_slice(&self.inner_length); + buffer.append_slice(&self.preamble); // Do the insertions & update relocation offsets - const CODE_SECTION_BODY_OFFSET: usize = 5; let mut reloc_index = 0; - let mut code_pos: usize = 0; - for location in self.insert_locations.iter() { + let mut code_pos = 0; + let mut insert_iter = self.insertions.iter(); + + loop { + let next_insert = insert_iter.next(); + let next_pos = next_insert.map(|i| i.at).unwrap_or(self.code.len()); + // Relocation offset needs to be an index into the body of the code section, but // at this point it is an index into self.code. Need to adjust for all previous functions // in the code section, and for insertions in the current function. - let section_body_pos = code_section_buf.size() - CODE_SECTION_BODY_OFFSET; + let section_body_pos = buffer.size() - reloc_base_offset; while reloc_index < self.relocations.len() - && self.relocations[reloc_index].offset() < location.insert_at as u32 + && self.relocations[reloc_index].offset() < next_pos as u32 { - let offset_ref = self.relocations[reloc_index].offset_mut(); - *offset_ref += (section_body_pos - code_pos) as u32; + let mut reloc_clone = self.relocations[reloc_index].clone(); + *reloc_clone.offset_mut() += (section_body_pos - code_pos) as u32; + final_relocs.push(reloc_clone); reloc_index += 1; } - code_section_buf.append_slice(&self.code[code_pos..location.insert_at]); - code_section_buf.append_slice(&self.insert_bytes[location.start..location.end]); - code_pos = location.insert_at; + buffer.append_slice(&self.code[code_pos..next_pos]); + + match next_insert { + Some(Insertion { at, start, end }) => { + buffer.append_slice(&self.insert_bytes[*start..*end]); + code_pos = *at; + } + None => { + break; + } + } } - - let section_body_pos = code_section_buf.size() - CODE_SECTION_BODY_OFFSET; - while reloc_index < self.relocations.len() { - let offset_ref = self.relocations[reloc_index].offset_mut(); - *offset_ref += (section_body_pos - code_pos) as u32; - reloc_index += 1; - } - - let len = self.code.len(); - code_section_buf.append_slice(&self.code[code_pos..len]); - - self.relocations.drain(0..) } /********************************************************** diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index 7d2ffd7ce7..eb93e0fc3c 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -83,14 +83,6 @@ pub fn build_module_help<'a>( } } - // Update code section length - let inner_length = (backend.module.code.bytes.len() - 5) as u32; - backend - .module - .code - .bytes - .overwrite_padded_u32(0, inner_length); - let symbol_table = LinkingSubSection::SymbolTable(symbol_table_entries); backend.module.linking.subsections.push(symbol_table); @@ -178,8 +170,8 @@ pub fn combine_and_serialize<'a>( // wasm_module.data_count.serialize(buffer); // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - wasm_module.code.serialize(buffer); wasm_module.reloc_code.target_section_index = Some(index); + wasm_module.code.serialize_mut(buffer, &mut wasm_module.reloc_code.entries); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); wasm_module.data.serialize(buffer); diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 9a651cd326..8312679c50 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -1,7 +1,7 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use crate::code_builder::{Align, ValueType}; +use crate::code_builder::{Align, CodeBuilder, ValueType}; use crate::opcodes; use crate::serialize::{SerialBuffer, Serialize}; @@ -456,26 +456,30 @@ impl<'a> Serialize for ExportSection<'a> { #[derive(Debug)] pub struct CodeSection<'a> { - pub bytes: Vec<'a, u8>, + pub code_builders: Vec<'a, CodeBuilder<'a>>, } impl<'a> CodeSection<'a> { pub fn new(arena: &'a Bump) -> Self { CodeSection { - bytes: Vec::with_capacity_in(4096, arena), + code_builders: Vec::with_capacity_in(8, arena), } } -} -impl<'a> Serialize for CodeSection<'a> { - fn serialize(&self, buffer: &mut T) { - buffer.append_u8(SectionId::Code as u8); + /// Serialize the code builders for all functions, and get code relocations with final offsets + pub fn serialize_mut( + &mut self, + buffer: &mut T, + relocations: &mut Vec<'a, RelocationEntry>, + ) { + let header_indices = write_section_header(buffer, SectionId::Code); + buffer.encode_u32(self.code_builders.len() as u32); - // TODO - // We've copied each function into self.bytes, now we're copying again. - // Can eliminate one of those copies by refactoring to a vector of CodeBuilders + for code_builder in self.code_builders.iter_mut() { + code_builder.serialize_with_relocs(buffer, relocations, header_indices.body_index); + } - buffer.append_slice(&self.bytes); + update_section_size(buffer, header_indices); } } @@ -542,7 +546,7 @@ pub enum OffsetRelocType { MemoryAddrI64 = 16, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum RelocationEntry { Index { type_id: IndexRelocType, @@ -1044,8 +1048,9 @@ impl<'a> WasmModule<'a> { self.data_count.serialize(buffer); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - self.code.serialize(buffer); self.reloc_code.target_section_index = Some(index); + self.code + .serialize_mut(buffer, &mut self.reloc_code.entries); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); self.data.serialize(buffer); diff --git a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs index cdfaecee7a..00ce3714b0 100644 --- a/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs +++ b/compiler/gen_wasm/tests/helpers/wasm32_test_result.rs @@ -3,7 +3,7 @@ use parity_wasm::builder; use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; use roc_gen_wasm::module_builder::{Export, ExportType, WasmModule}; -use roc_gen_wasm::{serialize::SerialBuffer, LocalId}; +use roc_gen_wasm::LocalId; use roc_std::{RocDec, RocList, RocOrder, RocStr}; pub trait Wasm32TestResult { @@ -28,19 +28,9 @@ pub trait Wasm32TestResult { index: location.body, }); - let mut code_builder = CodeBuilder::new(arena); Self::build_wrapper_body(&mut code_builder, main_function_index); - - code_builder.serialize(&mut wasm_module.code.bytes); - - let mut num_procs = 0; - for (i, byte) in wasm_module.code.bytes[5..10].iter().enumerate() { - num_procs += ((byte & 0x7f) as u32) << (i * 7); - } - let inner_length = (wasm_module.code.bytes.len() - 5) as u32; - wasm_module.code.bytes.overwrite_padded_u32(0, inner_length); - wasm_module.code.bytes.overwrite_padded_u32(5, num_procs + 1); + wasm_module.code.code_builders.push(code_builder); } fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32); From 7a1b5ecc99f357e44e9cd40b21ff3b74842f00ed Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Tue, 2 Nov 2021 20:35:29 +0000 Subject: [PATCH 14/20] Slightly reorganise serialize code --- compiler/gen_wasm/src/serialize.rs | 92 +++++++++++++++--------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index 2b88c4159c..e40d34667e 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -2,6 +2,52 @@ use std::fmt::Debug; use bumpalo::collections::vec::Vec; +pub trait Serialize { + fn serialize(&self, buffer: &mut T); +} + +impl Serialize for str { + fn serialize(&self, buffer: &mut T) { + buffer.encode_u32(self.len() as u32); + buffer.append_slice(self.as_bytes()); + } +} + +impl Serialize for u32 { + fn serialize(&self, buffer: &mut T) { + buffer.encode_u32(*self); + } +} + +// Unit is used as a placeholder in parts of the Wasm spec we don't use yet +impl Serialize for () { + fn serialize(&self, _buffer: &mut T) {} +} + +impl Serialize for [S] { + fn serialize(&self, buffer: &mut T) { + buffer.encode_u32(self.len() as u32); + for item in self.iter() { + item.serialize(buffer); + } + } +} + +impl Serialize for Option { + /// serialize Option as a vector of length 1 or 0 + fn serialize(&self, buffer: &mut T) { + match self { + Some(x) => { + buffer.append_u8(1); + x.serialize(buffer); + } + None => { + buffer.append_u8(0); + } + } + } +} + /// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length /// /// All integers in Wasm are variable-length encoded, which saves space for small values. @@ -105,52 +151,6 @@ pub trait SerialBuffer: Debug { encode_padded_sleb128!(encode_padded_i64, i64); } -pub trait Serialize { - fn serialize(&self, buffer: &mut T); -} - -impl Serialize for str { - fn serialize(&self, buffer: &mut T) { - buffer.encode_u32(self.len() as u32); - buffer.append_slice(self.as_bytes()); - } -} - -impl Serialize for u32 { - fn serialize(&self, buffer: &mut T) { - buffer.encode_u32(*self); - } -} - -// Unit is used as a placeholder in parts of the Wasm spec we don't use yet -impl Serialize for () { - fn serialize(&self, _buffer: &mut T) {} -} - -impl Serialize for [S] { - fn serialize(&self, buffer: &mut T) { - buffer.encode_u32(self.len() as u32); - for item in self.iter() { - item.serialize(buffer); - } - } -} - -impl Serialize for Option { - /// serialize Option as a vector of length 1 or 0 - fn serialize(&self, buffer: &mut T) { - match self { - Some(x) => { - buffer.append_u8(1); - x.serialize(buffer); - } - None => { - buffer.append_u8(0); - } - } - } -} - fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) { let mut x = value; for byte in buffer.iter_mut().take(4) { From 59757d638ae152d6d1c2e9f56fedbfca1fd850bc Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 10:24:03 +0000 Subject: [PATCH 15/20] Migrate the last sections: Type and Function --- compiler/gen_wasm/src/backend.rs | 47 +++++------ compiler/gen_wasm/src/code_builder.rs | 50 +++++++++--- compiler/gen_wasm/src/layout.rs | 8 +- compiler/gen_wasm/src/lib.rs | 8 +- compiler/gen_wasm/src/module_builder.rs | 79 +++++++------------ compiler/gen_wasm/src/serialize.rs | 13 --- compiler/test_wasm/src/helpers/eval.rs | 2 +- .../src/helpers/wasm32_test_result.rs | 16 ++-- 8 files changed, 102 insertions(+), 121 deletions(-) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index c7bfeb0195..a80239be37 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -10,7 +10,7 @@ use roc_mono::layout::{Builtin, Layout}; use crate::code_builder::{BlockType, CodeBuilder, ValueType}; use crate::layout::WasmLayout; -use crate::module_builder::WasmModule; +use crate::module_builder::{Signature, WasmModule}; use crate::storage::{Storage, StoredValue, StoredValueKind}; use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; @@ -78,52 +78,43 @@ impl<'a> WasmBackend<'a> { ***********************************************************/ - pub fn build_proc(&mut self, proc: Proc<'a>, _sym: Symbol) -> Result { - // println!("\ngenerating procedure {:?}\n", sym); + pub fn build_proc(&mut self, proc: Proc<'a>, _sym: Symbol) -> Result<(), String> { + // println!("\ngenerating procedure {:?}\n", _sym); - // Use parity-wasm to add the signature in "types" and "functions" sections - // but no instructions, since we are building our own code section - let empty_function_def = self.start_proc(&proc); - let location = self.parity_builder.push_function(empty_function_def); - let function_index = location.body; + self.start_proc(&proc); self.build_stmt(&proc.body, &proc.ret_layout)?; self.finalize_proc()?; self.reset(); - // println!("\nfinished generating {:?}\n", sym); + // println!("\nfinished generating {:?}\n", _sym); - Ok(function_index) + Ok(()) } - fn start_proc(&mut self, proc: &Proc<'a>) -> FunctionDefinition { + fn start_proc(&mut self, proc: &Proc<'a>) { let ret_layout = WasmLayout::new(&proc.ret_layout); - - let signature_builder = if let WasmLayout::StackMemory { .. } = ret_layout { + let ret_type = if ret_layout.is_stack_memory() { self.storage.arg_types.push(PTR_TYPE); self.start_block(BlockType::NoResult); // block to ensure all paths pop stack memory (if any) - builder::signature() + None } else { - let ret_type = ret_layout.value_type(); - self.start_block(BlockType::Value(ret_type)); // block to ensure all paths pop stack memory (if any) - builder::signature().with_result(ret_type.to_parity_wasm()) + let ty = ret_layout.value_type(); + self.start_block(BlockType::Value(ty)); // block to ensure all paths pop stack memory (if any) + Some(ty) }; for (layout, symbol) in proc.args { - self.storage.allocate( - &WasmLayout::new(layout), - *symbol, - StoredValueKind::Parameter, - ); + let arg_layout = WasmLayout::new(layout); + self.storage + .allocate(&arg_layout, *symbol, StoredValueKind::Parameter); } - let parity_params = self.storage.arg_types.iter().map(|t| t.to_parity_wasm()); - - let signature = signature_builder.with_params(parity_params).build_sig(); - - // parity-wasm FunctionDefinition with no instructions - builder::function().with_signature(signature).build() + self.module.add_function_signature(Signature { + param_types: self.storage.arg_types.clone(), + ret_type, + }); } fn finalize_proc(&mut self) -> Result<(), String> { diff --git a/compiler/gen_wasm/src/code_builder.rs b/compiler/gen_wasm/src/code_builder.rs index 2a540ad439..d15b2b4ed2 100644 --- a/compiler/gen_wasm/src/code_builder.rs +++ b/compiler/gen_wasm/src/code_builder.rs @@ -20,15 +20,9 @@ pub enum ValueType { F64 = 0x7c, } -// This is a bit unfortunate. Will go away if we generate our own Types section. -impl ValueType { - pub fn to_parity_wasm(&self) -> parity_wasm::elements::ValueType { - match self { - Self::I32 => parity_wasm::elements::ValueType::I32, - Self::I64 => parity_wasm::elements::ValueType::I64, - Self::F32 => parity_wasm::elements::ValueType::F32, - Self::F64 => parity_wasm::elements::ValueType::F64, - } +impl Serialize for ValueType { + fn serialize(&self, buffer: &mut T) { + buffer.append_u8(*self as u8); } } @@ -291,7 +285,43 @@ impl<'a> CodeBuilder<'a> { /// Generate bytes to declare the function's local variables fn build_local_declarations(&mut self, local_types: &[ValueType]) { - local_types.serialize(&mut self.preamble); + // reserve one byte for num_batches + self.preamble.push(0); + + if local_types.is_empty() { + return; + } + + // Write declarations in batches of the same ValueType + let mut num_batches: u32 = 0; + let mut batch_type = local_types[0]; + let mut batch_size = 0; + for t in local_types { + if *t == batch_type { + batch_size += 1; + } else { + self.preamble.encode_u32(batch_size); + self.preamble.push(batch_type as u8); + batch_type = *t; + batch_size = 1; + num_batches += 1; + } + } + self.preamble.encode_u32(batch_size); + self.preamble.push(batch_type as u8); + num_batches += 1; + + // Go back and write the number of batches at the start + if num_batches < 128 { + self.preamble[0] = num_batches as u8; + } else { + // We need more than 1 byte to encode num_batches! + // This is a ridiculous edge case, so just pad to 5 bytes for simplicity + let old_len = self.preamble.len(); + self.preamble.resize(old_len + 4, 0); + self.preamble.copy_within(1..old_len, 5); + self.preamble.overwrite_padded_u32(0, num_batches); + } } /// Generate instruction bytes to grab a frame of stack memory on entering the function diff --git a/compiler/gen_wasm/src/layout.rs b/compiler/gen_wasm/src/layout.rs index 66aba67d04..43bc52fcd8 100644 --- a/compiler/gen_wasm/src/layout.rs +++ b/compiler/gen_wasm/src/layout.rs @@ -71,11 +71,7 @@ impl WasmLayout { } } - #[allow(dead_code)] - pub fn stack_memory(&self) -> u32 { - match self { - Self::StackMemory { size, .. } => *size, - _ => 0, - } + pub fn is_stack_memory(&self) -> bool { + matches!(self, Self::StackMemory { .. }) } } diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index eb93e0fc3c..d049613188 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -136,15 +136,15 @@ pub fn combine_and_serialize<'a>( let mut parity_module = parity_builder.build(); let sections = parity_module.sections_mut(); - // wasm_module.types.serialize(buffer); - serialize_parity!(buffer, sections, |s| matches!(s, Section::Type(_))); + wasm_module.types.serialize(buffer); + // serialize_parity!(buffer, sections, |s| matches!(s, Section::Type(_))); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); // wasm_module.import.serialize(buffer); // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - // wasm_module.function.serialize(buffer); - serialize_parity!(buffer, sections, |s| matches!(s, Section::Function(_))); + wasm_module.function.serialize(buffer); + // serialize_parity!(buffer, sections, |s| matches!(s, Section::Function(_))); maybe_increment_section(buffer.size(), &mut prev_size, &mut index); // wasm_module.table.serialize(buffer); diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/module_builder.rs index 8312679c50..f59082eb68 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/module_builder.rs @@ -86,64 +86,22 @@ fn serialize_vector_section( * *******************************************************************/ -impl<'a> Serialize for [ValueType] { - fn serialize(&self, buffer: &mut T) { - // reserve one byte for num_batches - let start = buffer.size(); - buffer.append_u8(0); - - if self.is_empty() { - return; - } - - // Write declarations in batches of the same ValueType - let mut num_batches: u32 = 0; - let mut batch_type = self[0]; - let mut batch_size = 0; - for t in self { - if *t == batch_type { - batch_size += 1; - } else { - buffer.encode_u32(batch_size); - buffer.append_u8(batch_type as u8); - batch_type = *t; - batch_size = 1; - num_batches += 1; - } - } - buffer.encode_u32(batch_size); - buffer.append_u8(batch_type as u8); - num_batches += 1; - - // Go back and write the number of batches at the start - if num_batches < 128 { - buffer.overwrite_u8(start, num_batches as u8); - } else { - // We need more than 1 byte to encode num_batches! - // This is a ridiculous edge case, so just pad to 5 bytes for simplicity - buffer.insert_space_at(1, 4); - buffer.overwrite_padded_u32(0, num_batches); - } - } -} - +#[derive(PartialEq, Eq)] pub struct Signature<'a> { - param_types: Vec<'a, ValueType>, - ret_type: Option, + pub param_types: Vec<'a, ValueType>, + pub ret_type: Option, } impl<'a> Serialize for Signature<'a> { fn serialize(&self, buffer: &mut T) { buffer.append_u8(0x60); self.param_types.serialize(buffer); - match self.ret_type { - Some(t) => [t].serialize(buffer), - None => buffer.append_u8(0), // vector of length zero - } + self.ret_type.serialize(buffer); } } pub struct TypeSection<'a> { + /// Private. See WasmModule::add_function_signature signatures: Vec<'a, Signature<'a>>, } @@ -153,6 +111,21 @@ impl<'a> TypeSection<'a> { signatures: Vec::with_capacity_in(8, arena), } } + + /// Find a matching signature or insert a new one. Return the index. + fn insert(&mut self, signature: Signature<'a>) -> u32 { + // Using linear search because we need to preserve indices stored in + // the Function section. (Also for practical sizes it's fast) + let maybe_index = self.signatures.iter().position(|s| *s == signature); + match maybe_index { + Some(index) => index as u32, + None => { + let index = self.signatures.len(); + self.signatures.push(signature); + index as u32 + } + } + } } impl<'a> Serialize for TypeSection<'a> { @@ -250,7 +223,8 @@ impl<'a> Serialize for ImportSection<'a> { *******************************************************************/ pub struct FunctionSection<'a> { - pub signature_indices: Vec<'a, u32>, + /// Private. See WasmModule::add_function_signature + signature_indices: Vec<'a, u32>, } impl<'a> FunctionSection<'a> { @@ -1009,8 +983,13 @@ impl<'a> WasmModule<'a> { } } - #[allow(dead_code)] - fn serialize(&mut self, buffer: &mut T) { + /// Create entries in the Type and Function sections for a function signature + pub fn add_function_signature(&mut self, signature: Signature<'a>) { + let index = self.types.insert(signature); + self.function.signature_indices.push(index); + } + + pub fn serialize(&mut self, buffer: &mut T) { buffer.append_u8(0); buffer.append_slice("asm".as_bytes()); buffer.write_unencoded_u32(Self::WASM_VERSION); diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/serialize.rs index e40d34667e..a462a96981 100644 --- a/compiler/gen_wasm/src/serialize.rs +++ b/compiler/gen_wasm/src/serialize.rs @@ -129,9 +129,6 @@ pub trait SerialBuffer: Debug { encode_sleb128!(encode_i32, i32); encode_sleb128!(encode_i64, i64); - /// Inserts extra entries at the given index by copying the following entries to higher indices - fn insert_space_at(&mut self, index: usize, size: usize); - fn reserve_padded_u32(&mut self) -> usize; fn encode_padded_u32(&mut self, value: u32) -> usize; fn overwrite_padded_u32(&mut self, index: usize, value: u32); @@ -173,11 +170,6 @@ impl SerialBuffer for std::vec::Vec { fn size(&self) -> usize { self.len() } - fn insert_space_at(&mut self, index: usize, size: usize) { - let old_len = self.len(); - self.resize(old_len + size, 0); - self.copy_within(index..old_len, index + size); - } fn reserve_padded_u32(&mut self) -> usize { let index = self.len(); self.resize(index + 5, 0xff); @@ -208,11 +200,6 @@ impl<'a> SerialBuffer for Vec<'a, u8> { fn size(&self) -> usize { self.len() } - fn insert_space_at(&mut self, index: usize, size: usize) { - let old_len = self.len(); - self.resize(old_len + size, 0); - self.copy_within(index..old_len, index + size); - } fn reserve_padded_u32(&mut self) -> usize { let index = self.len(); self.resize(index + 5, 0xff); diff --git a/compiler/test_wasm/src/helpers/eval.rs b/compiler/test_wasm/src/helpers/eval.rs index 9aaaa2ba61..9fd0c2dc2f 100644 --- a/compiler/test_wasm/src/helpers/eval.rs +++ b/compiler/test_wasm/src/helpers/eval.rs @@ -119,7 +119,7 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( combine_and_serialize(&mut module_bytes, parity_builder, &mut wasm_module); // for debugging (e.g. with wasm2wat or wasm-objdump) - if true { + if false { use std::io::Write; let mut hash_state = DefaultHasher::new(); diff --git a/compiler/test_wasm/src/helpers/wasm32_test_result.rs b/compiler/test_wasm/src/helpers/wasm32_test_result.rs index 00ce3714b0..5fc78cc320 100644 --- a/compiler/test_wasm/src/helpers/wasm32_test_result.rs +++ b/compiler/test_wasm/src/helpers/wasm32_test_result.rs @@ -1,8 +1,9 @@ +use bumpalo::collections::Vec; use parity_wasm::builder; use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; -use roc_gen_wasm::module_builder::{Export, ExportType, WasmModule}; +use roc_gen_wasm::module_builder::{Export, ExportType, Signature, WasmModule}; use roc_gen_wasm::LocalId; use roc_std::{RocDec, RocList, RocOrder, RocStr}; @@ -14,18 +15,15 @@ pub trait Wasm32TestResult { wrapper_name: &str, main_function_index: u32, ) { - let signature = builder::signature() - .with_result(parity_wasm::elements::ValueType::I32) - .build_sig(); - - // parity-wasm FunctionDefinition with no instructions - let empty_fn_def = builder::function().with_signature(signature).build(); - let location = module_builder.push_function(empty_fn_def); + wasm_module.add_function_signature(Signature { + param_types: Vec::with_capacity_in(0, arena), + ret_type: Some(ValueType::I32), + }); wasm_module.export.entries.push(Export { name: wrapper_name.to_string(), ty: ExportType::Func, - index: location.body, + index: wasm_module.code.code_builders.len() as u32, }); let mut code_builder = CodeBuilder::new(arena); From 87f624685176035e45382ae84d681dbd15d7caa1 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 10:36:08 +0000 Subject: [PATCH 16/20] Remove parity-wasm! --- Cargo.lock | 7 -- compiler/gen_wasm/Cargo.toml | 2 - compiler/gen_wasm/README.md | 4 +- compiler/gen_wasm/src/backend.rs | 4 - compiler/gen_wasm/src/lib.rs | 95 +------------------ compiler/test_wasm/Cargo.toml | 3 - compiler/test_wasm/src/helpers/eval.rs | 7 +- .../src/helpers/wasm32_test_result.rs | 2 - 8 files changed, 7 insertions(+), 117 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4b9721e18..d9a94f95fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2781,11 +2781,6 @@ dependencies = [ "syn 1.0.76", ] -[[package]] -name = "parity-wasm" -version = "0.44.0" -source = "git+https://github.com/brian-carroll/parity-wasm?branch=master#373f655f64d2260a2e9665811f7b6ed17f9db705" - [[package]] name = "parking_lot" version = "0.11.2" @@ -3916,7 +3911,6 @@ dependencies = [ "bumpalo", "indoc 0.3.6", "libc", - "parity-wasm", "pretty_assertions 0.5.1", "roc_builtins", "roc_can", @@ -4755,7 +4749,6 @@ dependencies = [ "bumpalo", "indoc 0.3.6", "libc", - "parity-wasm", "pretty_assertions 0.5.1", "roc_builtins", "roc_can", diff --git a/compiler/gen_wasm/Cargo.toml b/compiler/gen_wasm/Cargo.toml index 93b4414d54..ce9ec1480e 100644 --- a/compiler/gen_wasm/Cargo.toml +++ b/compiler/gen_wasm/Cargo.toml @@ -10,8 +10,6 @@ roc_collections = { path = "../collections" } roc_module = { path = "../module" } roc_mono = { path = "../mono" } bumpalo = { version = "3.6.1", features = ["collections"] } -# TODO: switch to parity-wasm 0.44 once it's out (allows bumpalo vectors in some places) -parity-wasm = { git = "https://github.com/brian-carroll/parity-wasm", branch = "master" } roc_std = { path = "../../roc_std" } wasmer = "2.0.0" diff --git a/compiler/gen_wasm/README.md b/compiler/gen_wasm/README.md index e27efd677c..4a9469c445 100644 --- a/compiler/gen_wasm/README.md +++ b/compiler/gen_wasm/README.md @@ -225,6 +225,4 @@ The Module is a _specification_ for how to create an Instance of the program. Th A WebAssembly module is equivalent to an executable file. It doesn't normally need relocations since at the WebAssembly layer, there is no Address Space Layout Randomisation. If it has relocations then it's an object file. -The [official spec](https://webassembly.github.io/spec/core/binary/modules.html#sections) lists the sections that are part of the final module. It doesn't mention any sections for relocations or symbol names, but it has room for "custom sections" that in practice seem to be used for that. - -The WebAssembly `tool-conventions` repo has a document on [linking](https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md), and the `parity_wasm` crate supports "name" and "relocation" [sections](https://docs.rs/parity-wasm/0.42.2/parity_wasm/elements/enum.Section.html). +The [official spec](https://webassembly.github.io/spec/core/binary/modules.html#sections) lists the sections that are part of the final module. It doesn't mention any sections for relocations or symbol names, but it does support "custom" sections. Conventions to use those for linking are documented in the WebAssembly `tool-conventions` repo [here](https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md) and it mentions that LLVM is using those conventions. diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index a80239be37..b023313210 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -1,6 +1,4 @@ use bumpalo::collections::Vec; -use parity_wasm::builder; -use parity_wasm::builder::{FunctionDefinition, ModuleBuilder}; use roc_collections::all::MutMap; use roc_module::low_level::LowLevel; @@ -26,7 +24,6 @@ pub struct WasmBackend<'a> { // Module-level data pub module: WasmModule<'a>, - pub parity_builder: ModuleBuilder, _data_offset_map: MutMap, u32>, _data_offset_next: u32, proc_symbols: Vec<'a, Symbol>, @@ -47,7 +44,6 @@ impl<'a> WasmBackend<'a> { // Module-level data module: WasmModule::new(env.arena), - parity_builder: builder::module(), _data_offset_map: MutMap::default(), _data_offset_next: UNUSED_DATA_SECTION_BYTES, proc_symbols, diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index d049613188..a610fd1162 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -8,9 +8,7 @@ pub mod serialize; mod storage; use bumpalo::{self, collections::Vec, Bump}; -use parity_wasm::builder::ModuleBuilder; -use parity_wasm::elements::{Section, Serialize as ParitySerialize}; use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::{Interns, Symbol}; use roc_mono::ir::{Proc, ProcLayout}; @@ -21,7 +19,6 @@ use crate::code_builder::{Align, CodeBuilder, ValueType}; use crate::module_builder::{ Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, SymInfo, WasmModule, }; -use crate::serialize::{SerialBuffer, Serialize}; const PTR_SIZE: u32 = 4; const PTR_TYPE: ValueType = ValueType::I32; @@ -46,16 +43,16 @@ pub fn build_module<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, ) -> Result, String> { - let (parity_builder, mut wasm_module) = build_module_help(env, procedures)?; + let mut wasm_module = build_module_help(env, procedures)?; let mut buffer = std::vec::Vec::with_capacity(4096); - combine_and_serialize(&mut buffer, parity_builder, &mut wasm_module); + wasm_module.serialize(&mut buffer); Ok(buffer) } pub fn build_module_help<'a>( env: &'a Env, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, -) -> Result<(ModuleBuilder, WasmModule<'a>), String> { +) -> Result, String> { let proc_symbols = Vec::from_iter_in(procedures.keys().map(|(sym, _)| *sym), env.arena); let mut backend = WasmBackend::new(env, proc_symbols); @@ -101,91 +98,7 @@ pub fn build_module_help<'a>( init_value: GlobalInitValue::I32(stack_pointer_init), }); - Ok((backend.parity_builder, backend.module)) -} - -fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { - if size > *prev_size { - *index += 1; - *prev_size = size; - } -} - -macro_rules! serialize_parity { - ($buffer: expr, $sections: expr, $lambda: expr) => { - $sections - .remove($sections.iter().position($lambda).unwrap()) - .serialize($buffer) - .unwrap(); - }; -} - -/// Replace parity-wasm's code section with our own handmade one -pub fn combine_and_serialize<'a>( - buffer: &mut std::vec::Vec, - parity_builder: ModuleBuilder, - wasm_module: &mut WasmModule<'a>, // backend: &mut WasmBackend<'a> -) { - buffer.append_u8(0); - buffer.append_slice("asm".as_bytes()); - buffer.write_unencoded_u32(WasmModule::WASM_VERSION); - - let mut index: u32 = 0; - let mut prev_size = buffer.size(); - - let mut parity_module = parity_builder.build(); - let sections = parity_module.sections_mut(); - - wasm_module.types.serialize(buffer); - // serialize_parity!(buffer, sections, |s| matches!(s, Section::Type(_))); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // wasm_module.import.serialize(buffer); - // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.function.serialize(buffer); - // serialize_parity!(buffer, sections, |s| matches!(s, Section::Function(_))); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // wasm_module.table.serialize(buffer); - // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.memory.serialize(buffer); - // serialize_parity!(buffer, sections, |s| matches!(s, Section::Memory(_))); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.global.serialize(buffer); - // serialize_parity!(buffer, sections, |s| matches!(s, Section::Global(_))); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.export.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // wasm_module.start.serialize(buffer); - // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // wasm_module.element.serialize(buffer); - // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - // wasm_module.data_count.serialize(buffer); - // maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.reloc_code.target_section_index = Some(index); - wasm_module.code.serialize_mut(buffer, &mut wasm_module.reloc_code.entries); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.data.serialize(buffer); - wasm_module.reloc_data.target_section_index = Some(index); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.linking.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.reloc_code.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); - - wasm_module.reloc_data.serialize(buffer); - maybe_increment_section(buffer.size(), &mut prev_size, &mut index); + Ok(backend.module) } pub struct CopyMemoryConfig { diff --git a/compiler/test_wasm/Cargo.toml b/compiler/test_wasm/Cargo.toml index e8370bc960..77b83cf21a 100644 --- a/compiler/test_wasm/Cargo.toml +++ b/compiler/test_wasm/Cargo.toml @@ -9,9 +9,6 @@ edition = "2018" # roc_module = { path = "../module" } # roc_mono = { path = "../mono" } -# # TODO: switch to parity-wasm 0.44 once it's out (allows bumpalo vectors in some places) -parity-wasm = { git = "https://github.com/brian-carroll/parity-wasm", branch = "master" } - wasmer = "2.0.0" wasmer-wasi = "2.0.0" diff --git a/compiler/test_wasm/src/helpers/eval.rs b/compiler/test_wasm/src/helpers/eval.rs index 9fd0c2dc2f..a628656573 100644 --- a/compiler/test_wasm/src/helpers/eval.rs +++ b/compiler/test_wasm/src/helpers/eval.rs @@ -4,8 +4,6 @@ use std::hash::{Hash, Hasher}; use roc_can::builtins::builtin_defs_map; use roc_collections::all::{MutMap, MutSet}; -use roc_gen_wasm::combine_and_serialize; -// use roc_std::{RocDec, RocList, RocOrder, RocStr}; use crate::helpers::wasm32_test_result::Wasm32TestResult; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; @@ -104,19 +102,18 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( exposed_to_host, }; - let (mut parity_builder, mut wasm_module) = + let mut wasm_module = roc_gen_wasm::build_module_help(&env, procedures).unwrap(); T::insert_test_wrapper( arena, - &mut parity_builder, &mut wasm_module, TEST_WRAPPER_NAME, main_fn_index as u32, ); let mut module_bytes = std::vec::Vec::with_capacity(4096); - combine_and_serialize(&mut module_bytes, parity_builder, &mut wasm_module); + wasm_module.serialize(&mut module_bytes); // for debugging (e.g. with wasm2wat or wasm-objdump) if false { diff --git a/compiler/test_wasm/src/helpers/wasm32_test_result.rs b/compiler/test_wasm/src/helpers/wasm32_test_result.rs index 5fc78cc320..fe338e90d4 100644 --- a/compiler/test_wasm/src/helpers/wasm32_test_result.rs +++ b/compiler/test_wasm/src/helpers/wasm32_test_result.rs @@ -1,5 +1,4 @@ use bumpalo::collections::Vec; -use parity_wasm::builder; use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; @@ -10,7 +9,6 @@ use roc_std::{RocDec, RocList, RocOrder, RocStr}; pub trait Wasm32TestResult { fn insert_test_wrapper<'a>( arena: &'a bumpalo::Bump, - module_builder: &mut builder::ModuleBuilder, wasm_module: &mut WasmModule<'a>, wrapper_name: &str, main_function_index: u32, From ad9b761fce45917338dcf27e848c385238c2d148 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 11:20:16 +0000 Subject: [PATCH 17/20] Move wasm file format code into a submodule --- compiler/gen_wasm/src/backend.rs | 5 ++-- compiler/gen_wasm/src/layout.rs | 2 +- compiler/gen_wasm/src/lib.rs | 18 +++-------- compiler/gen_wasm/src/storage.rs | 6 ++-- .../src/{ => wasm_module}/code_builder.rs | 11 ++++--- compiler/gen_wasm/src/wasm_module/mod.rs | 12 ++++++++ .../gen_wasm/src/{ => wasm_module}/opcodes.rs | 0 .../sections.rs} | 30 +++++++------------ .../src/{ => wasm_module}/serialize.rs | 0 .../src/helpers/wasm32_test_result.rs | 11 +++---- 10 files changed, 45 insertions(+), 50 deletions(-) rename compiler/gen_wasm/src/{ => wasm_module}/code_builder.rs (99%) create mode 100644 compiler/gen_wasm/src/wasm_module/mod.rs rename compiler/gen_wasm/src/{ => wasm_module}/opcodes.rs (100%) rename compiler/gen_wasm/src/{module_builder.rs => wasm_module/sections.rs} (98%) rename compiler/gen_wasm/src/{ => wasm_module}/serialize.rs (100%) diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index b023313210..6d22ada08f 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -6,11 +6,10 @@ use roc_module::symbol::Symbol; use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt}; use roc_mono::layout::{Builtin, Layout}; -use crate::code_builder::{BlockType, CodeBuilder, ValueType}; use crate::layout::WasmLayout; -use crate::module_builder::{Signature, WasmModule}; use crate::storage::{Storage, StoredValue, StoredValueKind}; -use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE}; +use crate::wasm_module::{BlockType, CodeBuilder, LocalId, Signature, ValueType, WasmModule}; +use crate::{copy_memory, CopyMemoryConfig, Env, PTR_TYPE}; // Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone. // Follow Emscripten's example by using 1kB (4 bytes would probably do) diff --git a/compiler/gen_wasm/src/layout.rs b/compiler/gen_wasm/src/layout.rs index 43bc52fcd8..49b2b6be93 100644 --- a/compiler/gen_wasm/src/layout.rs +++ b/compiler/gen_wasm/src/layout.rs @@ -1,6 +1,6 @@ use roc_mono::layout::{Layout, UnionLayout}; -use crate::{code_builder::ValueType, PTR_SIZE, PTR_TYPE}; +use crate::{wasm_module::ValueType, PTR_SIZE, PTR_TYPE}; // See README for background information on Wasm locals, memory and function calls #[derive(Debug, Clone)] diff --git a/compiler/gen_wasm/src/lib.rs b/compiler/gen_wasm/src/lib.rs index a610fd1162..d5957574c9 100644 --- a/compiler/gen_wasm/src/lib.rs +++ b/compiler/gen_wasm/src/lib.rs @@ -1,11 +1,8 @@ mod backend; -pub mod code_builder; pub mod from_wasm32_memory; mod layout; -pub mod module_builder; -pub mod opcodes; -pub mod serialize; mod storage; +pub mod wasm_module; use bumpalo::{self, collections::Vec, Bump}; @@ -15,9 +12,9 @@ use roc_mono::ir::{Proc, ProcLayout}; use roc_mono::layout::LayoutIds; use crate::backend::WasmBackend; -use crate::code_builder::{Align, CodeBuilder, ValueType}; -use crate::module_builder::{ - Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, SymInfo, WasmModule, +use crate::wasm_module::{ + Align, CodeBuilder, Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, + LocalId, SymInfo, ValueType, WasmModule, }; const PTR_SIZE: u32 = 4; @@ -26,13 +23,6 @@ const PTR_TYPE: ValueType = ValueType::I32; pub const STACK_POINTER_GLOBAL_ID: u32 = 0; pub const FRAME_ALIGNMENT_BYTES: i32 = 16; -/// Code section ID from spec -/// https://webassembly.github.io/spec/core/binary/modules.html#sections -pub const CODE_SECTION_ID: u8 = 10; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct LocalId(pub u32); - pub struct Env<'a> { pub arena: &'a Bump, pub interns: Interns, diff --git a/compiler/gen_wasm/src/storage.rs b/compiler/gen_wasm/src/storage.rs index 04982846bf..61ecd1347c 100644 --- a/compiler/gen_wasm/src/storage.rs +++ b/compiler/gen_wasm/src/storage.rs @@ -4,9 +4,9 @@ use bumpalo::Bump; use roc_collections::all::MutMap; use roc_module::symbol::Symbol; -use crate::code_builder::{CodeBuilder, ValueType, VirtualMachineSymbolState}; use crate::layout::WasmLayout; -use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, LocalId, PTR_SIZE, PTR_TYPE}; +use crate::wasm_module::{CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState}; +use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, PTR_SIZE, PTR_TYPE}; pub enum StoredValueKind { Parameter, @@ -291,7 +291,7 @@ impl<'a> Storage<'a> { | StoredValue::Local { value_type, size, .. } => { - use crate::code_builder::Align::*; + use crate::wasm_module::Align::*; code_builder.get_local(to_ptr); self.load_symbols(code_builder, &[from_symbol]); match (value_type, size) { diff --git a/compiler/gen_wasm/src/code_builder.rs b/compiler/gen_wasm/src/wasm_module/code_builder.rs similarity index 99% rename from compiler/gen_wasm/src/code_builder.rs rename to compiler/gen_wasm/src/wasm_module/code_builder.rs index d15b2b4ed2..b6344bfeb8 100644 --- a/compiler/gen_wasm/src/code_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/code_builder.rs @@ -5,10 +5,13 @@ use std::fmt::Debug; use roc_module::symbol::Symbol; -use crate::module_builder::{IndexRelocType, RelocationEntry}; -use crate::opcodes::*; -use crate::serialize::{SerialBuffer, Serialize}; -use crate::{round_up_to_alignment, LocalId, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID}; +use super::opcodes::*; +use super::sections::{IndexRelocType, RelocationEntry}; +use super::serialize::{SerialBuffer, Serialize}; +use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LocalId(pub u32); /// Wasm value type. (Rust representation matches Wasm encoding) #[repr(u8)] diff --git a/compiler/gen_wasm/src/wasm_module/mod.rs b/compiler/gen_wasm/src/wasm_module/mod.rs new file mode 100644 index 0000000000..ec397c281e --- /dev/null +++ b/compiler/gen_wasm/src/wasm_module/mod.rs @@ -0,0 +1,12 @@ +pub mod code_builder; +pub mod opcodes; +pub mod sections; +pub mod serialize; + +pub use code_builder::{ + Align, BlockType, CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState, +}; +pub use sections::{ + Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, Signature, SymInfo, + WasmModule, +}; diff --git a/compiler/gen_wasm/src/opcodes.rs b/compiler/gen_wasm/src/wasm_module/opcodes.rs similarity index 100% rename from compiler/gen_wasm/src/opcodes.rs rename to compiler/gen_wasm/src/wasm_module/opcodes.rs diff --git a/compiler/gen_wasm/src/module_builder.rs b/compiler/gen_wasm/src/wasm_module/sections.rs similarity index 98% rename from compiler/gen_wasm/src/module_builder.rs rename to compiler/gen_wasm/src/wasm_module/sections.rs index f59082eb68..6e91503b66 100644 --- a/compiler/gen_wasm/src/module_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -1,9 +1,9 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use crate::code_builder::{Align, CodeBuilder, ValueType}; -use crate::opcodes; -use crate::serialize::{SerialBuffer, Serialize}; +use super::opcodes; +use super::serialize::{SerialBuffer, Serialize}; +use super::{Align, CodeBuilder, ValueType}; /******************************************************************* * @@ -920,16 +920,6 @@ impl<'a> Serialize for LinkingSection<'a> { * https://webassembly.github.io/spec/core/binary/modules.html * *******************************************************************/ -pub struct RocUnusedSection {} -impl Serialize for RocUnusedSection { - fn serialize(&self, _buffer: &mut T) {} -} -impl Default for RocUnusedSection { - fn default() -> Self { - RocUnusedSection {} - } -} - pub struct WasmModule<'a> { pub types: TypeSection<'a>, pub import: ImportSection<'a>, @@ -953,13 +943,6 @@ pub struct WasmModule<'a> { pub reloc_data: RelocationSection<'a>, } -fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { - if size > *prev_size { - *index += 1; - *prev_size = size; - } -} - impl<'a> WasmModule<'a> { pub const WASM_VERSION: u32 = 1; @@ -1040,3 +1023,10 @@ impl<'a> WasmModule<'a> { self.reloc_data.serialize(buffer); } } + +fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) { + if size > *prev_size { + *index += 1; + *prev_size = size; + } +} diff --git a/compiler/gen_wasm/src/serialize.rs b/compiler/gen_wasm/src/wasm_module/serialize.rs similarity index 100% rename from compiler/gen_wasm/src/serialize.rs rename to compiler/gen_wasm/src/wasm_module/serialize.rs diff --git a/compiler/test_wasm/src/helpers/wasm32_test_result.rs b/compiler/test_wasm/src/helpers/wasm32_test_result.rs index fe338e90d4..627e4ad0a8 100644 --- a/compiler/test_wasm/src/helpers/wasm32_test_result.rs +++ b/compiler/test_wasm/src/helpers/wasm32_test_result.rs @@ -1,9 +1,10 @@ use bumpalo::collections::Vec; -use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType}; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; -use roc_gen_wasm::module_builder::{Export, ExportType, Signature, WasmModule}; -use roc_gen_wasm::LocalId; +use roc_gen_wasm::wasm_module::opcodes; +use roc_gen_wasm::wasm_module::{ + Align, CodeBuilder, Export, ExportType, LocalId, Signature, ValueType, WasmModule, +}; use roc_std::{RocDec, RocList, RocOrder, RocStr}; pub trait Wasm32TestResult { @@ -42,7 +43,7 @@ macro_rules! build_wrapper_body_primitive { code_builder.get_local(frame_pointer_id); // Raw "call" instruction. Don't bother with symbol & relocation since we're not going to link. - code_builder.inst_imm32(roc_gen_wasm::opcodes::CALL, 0, true, main_function_index); + code_builder.inst_imm32(opcodes::CALL, 0, true, main_function_index); code_builder.$store_instruction($align, 0); code_builder.get_local(frame_pointer_id); @@ -70,7 +71,7 @@ fn build_wrapper_body_stack_memory( code_builder.get_local(local_id); // Raw "call" instruction. Don't bother with symbol & relocation since we're not going to link. - code_builder.inst_imm32(roc_gen_wasm::opcodes::CALL, 0, true, main_function_index); + code_builder.inst_imm32(opcodes::CALL, 0, true, main_function_index); code_builder.get_local(local_id); code_builder.finalize(local_types, size as i32, frame_pointer); } From d87db2047808d1167cd1f53db8535c8ab1cfd94d Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 11:31:12 +0000 Subject: [PATCH 18/20] clippy --- compiler/gen_wasm/src/wasm_module/code_builder.rs | 2 +- compiler/gen_wasm/src/wasm_module/sections.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/compiler/gen_wasm/src/wasm_module/code_builder.rs b/compiler/gen_wasm/src/wasm_module/code_builder.rs index b6344bfeb8..12fbbe3324 100644 --- a/compiler/gen_wasm/src/wasm_module/code_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/code_builder.rs @@ -395,7 +395,7 @@ impl<'a> CodeBuilder<'a> { loop { let next_insert = insert_iter.next(); - let next_pos = next_insert.map(|i| i.at).unwrap_or(self.code.len()); + let next_pos = next_insert.map(|i| i.at).unwrap_or_else(|| self.code.len()); // Relocation offset needs to be an index into the body of the code section, but // at this point it is an index into self.code. Need to adjust for all previous functions diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index 6e91503b66..98e493d104 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -951,15 +951,15 @@ impl<'a> WasmModule<'a> { types: TypeSection::new(arena), import: ImportSection::new(arena), function: FunctionSection::new(arena), - table: (), + table: (), // Unused in Roc (mainly for function pointers) memory: MemorySection::new(1024 * 1024), global: GlobalSection::new(arena), export: ExportSection::new(arena), - start: (), - element: (), - data_count: (), + start: (), // Entry function. In Roc this would be part of the platform. + element: (), // Unused in Roc (related to table section) + data_count: (), // TODO, related to data section code: CodeSection::new(arena), - data: (), + data: (), // TODO: program constants (e.g. string literals) linking: LinkingSection::new(arena), reloc_code: RelocationSection::new(arena, "reloc.CODE"), reloc_data: RelocationSection::new(arena, "reloc.DATA"), @@ -972,6 +972,7 @@ impl<'a> WasmModule<'a> { self.function.signature_indices.push(index); } + #[allow(clippy::unit_arg)] pub fn serialize(&mut self, buffer: &mut T) { buffer.append_u8(0); buffer.append_slice("asm".as_bytes()); From b432d923b16bd2187d8e843c4c12e2767318f9ed Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 11:45:54 +0000 Subject: [PATCH 19/20] Create a linking module --- .../gen_wasm/src/wasm_module/code_builder.rs | 2 +- compiler/gen_wasm/src/wasm_module/linking.rs | 462 +++++++++++++++++ compiler/gen_wasm/src/wasm_module/mod.rs | 5 +- compiler/gen_wasm/src/wasm_module/sections.rs | 465 +----------------- 4 files changed, 471 insertions(+), 463 deletions(-) create mode 100644 compiler/gen_wasm/src/wasm_module/linking.rs diff --git a/compiler/gen_wasm/src/wasm_module/code_builder.rs b/compiler/gen_wasm/src/wasm_module/code_builder.rs index 12fbbe3324..c798213ebb 100644 --- a/compiler/gen_wasm/src/wasm_module/code_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/code_builder.rs @@ -6,7 +6,7 @@ use std::fmt::Debug; use roc_module::symbol::Symbol; use super::opcodes::*; -use super::sections::{IndexRelocType, RelocationEntry}; +use super::linking::{IndexRelocType, RelocationEntry}; use super::serialize::{SerialBuffer, Serialize}; use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID}; diff --git a/compiler/gen_wasm/src/wasm_module/linking.rs b/compiler/gen_wasm/src/wasm_module/linking.rs new file mode 100644 index 0000000000..bf2875b770 --- /dev/null +++ b/compiler/gen_wasm/src/wasm_module/linking.rs @@ -0,0 +1,462 @@ +use bumpalo::collections::vec::Vec; +use bumpalo::Bump; + +use super::sections::{update_section_size, write_custom_section_header}; +use super::serialize::{SerialBuffer, Serialize}; +use super::Align; + +/******************************************************************* + * + * Relocation sections + * + * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections + * + *******************************************************************/ + +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub enum IndexRelocType { + /// a function index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `call` instruction. + FunctionIndexLeb = 0, + /// a function table index encoded as a 5-byte [varint32]. + /// Used to refer to the immediate argument of a `i32.const` instruction, e.g. taking the address of a function. + TableIndexSleb = 1, + /// a function table index encoded as a [uint32], e.g. taking the address of a function in a static data initializer. + TableIndexI32 = 2, + /// a type index encoded as a 5-byte [varuint32], e.g. the type immediate in a `call_indirect`. + TypeIndexLeb = 6, + /// a global index encoded as a 5-byte [varuint32], e.g. the index immediate in a `get_global`. + GlobalIndexLeb = 7, + /// an event index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `throw` and `if_except` instruction. + EventIndexLeb = 10, + /// a global index encoded as [uint32]. + GlobalIndexI32 = 13, + /// the 64-bit counterpart of `R_WASM_TABLE_INDEX_SLEB`. A function table index encoded as a 10-byte [varint64]. + /// Used to refer to the immediate argument of a `i64.const` instruction, e.g. taking the address of a function in Wasm64. + TableIndexSleb64 = 18, + /// the 64-bit counterpart of `R_WASM_TABLE_INDEX_I32`. + /// A function table index encoded as a [uint64], e.g. taking the address of a function in a static data initializer. + TableIndexI64 = 19, + /// a table number encoded as a 5-byte [varuint32]. Used for the table immediate argument in the table.* instructions. + TableNumberLeb = 20, +} + +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub enum OffsetRelocType { + /// a linear memory index encoded as a 5-byte [varuint32]. + /// Used for the immediate argument of a `load` or `store` instruction, e.g. directly loading from or storing to a C++ global. + MemoryAddrLeb = 3, + /// a linear memory index encoded as a 5-byte [varint32]. + /// Used for the immediate argument of a `i32.const` instruction, e.g. taking the address of a C++ global. + MemoryAddrSleb = 4, + /// a linear memory index encoded as a [uint32], e.g. taking the address of a C++ global in a static data initializer. + MemoryAddrI32 = 5, + /// a byte offset within code section for the specific function encoded as a [uint32]. + /// The offsets start at the actual function code excluding its size field. + FunctionOffsetI32 = 8, + /// a byte offset from start of the specified section encoded as a [uint32]. + SectionOffsetI32 = 9, + /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_LEB`. A 64-bit linear memory index encoded as a 10-byte [varuint64], + /// Used for the immediate argument of a `load` or `store` instruction on a 64-bit linear memory array. + MemoryAddrLeb64 = 14, + /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_SLEB`. A 64-bit linear memory index encoded as a 10-byte [varint64]. + /// Used for the immediate argument of a `i64.const` instruction. + MemoryAddrSleb64 = 15, + /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR`. A 64-bit linear memory index encoded as a [uint64], + /// e.g. taking the 64-bit address of a C++ global in a static data initializer. + MemoryAddrI64 = 16, +} + +#[derive(Debug, Clone)] +pub enum RelocationEntry { + Index { + type_id: IndexRelocType, + offset: u32, // offset 0 means the next byte after section id and size + symbol_index: u32, // index in symbol table + }, + Offset { + type_id: OffsetRelocType, + offset: u32, // offset 0 means the next byte after section id and size + symbol_index: u32, // index in symbol table + addend: i32, // addend to add to the address + }, +} + +impl RelocationEntry { + pub fn offset(&self) -> u32 { + match self { + Self::Index { offset, .. } => *offset, + Self::Offset { offset, .. } => *offset, + } + } + + pub fn offset_mut(&mut self) -> &mut u32 { + match self { + Self::Index { offset, .. } => offset, + Self::Offset { offset, .. } => offset, + } + } +} + +impl RelocationEntry { + pub fn for_function_call(offset: u32, symbol_index: u32) -> Self { + RelocationEntry::Index { + type_id: IndexRelocType::FunctionIndexLeb, + offset, + symbol_index, + } + } +} + +impl Serialize for RelocationEntry { + fn serialize(&self, buffer: &mut T) { + match self { + Self::Index { + type_id, + offset, + symbol_index, + } => { + buffer.append_u8(*type_id as u8); + buffer.encode_u32(*offset); + buffer.encode_u32(*symbol_index); + } + Self::Offset { + type_id, + offset, + symbol_index, + addend, + } => { + buffer.append_u8(*type_id as u8); + buffer.encode_u32(*offset); + buffer.encode_u32(*symbol_index); + buffer.encode_i32(*addend); + } + } + } +} + +#[derive(Debug)] +pub struct RelocationSection<'a> { + pub name: &'a str, + /// The *index* (not ID!) of the target section in the module + pub target_section_index: Option, + pub entries: Vec<'a, RelocationEntry>, +} + +impl<'a> RelocationSection<'a> { + pub fn new(arena: &'a Bump, name: &'a str) -> Self { + RelocationSection { + name, + target_section_index: None, + entries: Vec::with_capacity_in(64, arena), + } + } +} + +impl<'a> Serialize for RelocationSection<'a> { + fn serialize(&self, buffer: &mut T) { + if !self.entries.is_empty() { + let header_indices = write_custom_section_header(buffer, self.name); + buffer.encode_u32(self.target_section_index.unwrap()); + self.entries.serialize(buffer); + update_section_size(buffer, header_indices); + } + } +} + +/******************************************************************* + * + * Linking section + * + * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#linking-metadata-section + * + *******************************************************************/ + +/// Linking metadata for data segments +pub struct LinkingSegment { + pub name: String, + pub alignment: Align, + pub flags: u32, +} + +impl Serialize for LinkingSegment { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/// Linking metadata for init (start) functions +pub struct LinkingInitFunc { + pub priority: u32, + pub symbol_index: u32, // index in the symbol table, not the function index +} + +impl Serialize for LinkingInitFunc { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +//------------------------------------------------ +// Common data +//------------------------------------------------ + +#[repr(u8)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub enum ComdatSymKind { + Data = 0, + Function = 1, + Global = 2, + Event = 3, + Table = 4, + Section = 5, +} + +pub struct ComdatSym { + pub kind: ComdatSymKind, + pub index: u32, +} + +impl Serialize for ComdatSym { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +/// Linking metadata for common data +/// A COMDAT group may contain one or more functions, data segments, and/or custom sections. +/// The linker will include all of these elements with a given group name from one object file, +/// and will exclude any element with this group name from all other object files. +#[allow(dead_code)] +pub struct LinkingComdat<'a> { + name: String, + flags: u32, + syms: Vec<'a, ComdatSym>, +} + +impl<'a> Serialize for LinkingComdat<'a> { + fn serialize(&self, _buffer: &mut T) { + todo!(); + } +} + +//------------------------------------------------ +// Symbol table +//------------------------------------------------ + +/// Indicating that this is a weak symbol. When +/// linking multiple modules defining the same symbol, all weak definitions are +/// discarded if any strong definitions exist; then if multiple weak definitions +/// exist all but one (unspecified) are discarded; and finally it is an error if +/// more than one definition remains. +pub const WASM_SYM_BINDING_WEAK: u32 = 1; + +/// Indicating that this is a local symbol (this is exclusive with `WASM_SYM_BINDING_WEAK`). +/// Local symbols are not to be exported, or linked to other modules/sections. +/// The names of all non-local symbols must be unique, but the names of local symbols +/// are not considered for uniqueness. A local function or global symbol cannot reference an import. +pub const WASM_SYM_BINDING_LOCAL: u32 = 2; + +/// Indicating that this is a hidden symbol. +/// Hidden symbols are not to be exported when performing the final link, but +/// may be linked to other modules. +pub const WASM_SYM_VISIBILITY_HIDDEN: u32 = 4; + +/// Indicating that this symbol is not defined. +/// For non-data symbols, this must match whether the symbol is an import +/// or is defined; for data symbols, determines whether a segment is specified. +pub const WASM_SYM_UNDEFINED: u32 = 0x10; // required if the symbol refers to an import + +/// The symbol is intended to be exported from the +/// wasm module to the host environment. This differs from the visibility flags +/// in that it effects the static linker. +pub const WASM_SYM_EXPORTED: u32 = 0x20; + +/// The symbol uses an explicit symbol name, +/// rather than reusing the name from a wasm import. This allows it to remap +/// imports from foreign WebAssembly modules into local symbols with different +/// names. +pub const WASM_SYM_EXPLICIT_NAME: u32 = 0x40; // use the name from the symbol table, not from the import + +/// The symbol is intended to be included in the +/// linker output, regardless of whether it is used by the program. +pub const WASM_SYM_NO_STRIP: u32 = 0x80; + +pub enum WasmObjectSymbol { + Defined { index: u32, name: String }, + Imported { index: u32 }, +} + +impl Serialize for WasmObjectSymbol { + fn serialize(&self, buffer: &mut T) { + match self { + Self::Defined { index, name } => { + buffer.encode_u32(*index); + buffer.encode_u32(name.len() as u32); + buffer.append_slice(name.as_bytes()); + } + Self::Imported { index } => { + buffer.encode_u32(*index); + } + } + } +} + +pub enum DataSymbol { + Defined { + name: String, + index: u32, + offset: u32, + size: u32, + }, + Imported { + name: String, + }, +} + +impl Serialize for DataSymbol { + fn serialize(&self, buffer: &mut T) { + match self { + Self::Defined { + name, + index, + offset, + size, + } => { + buffer.encode_u32(name.len() as u32); + buffer.append_slice(name.as_bytes()); + buffer.encode_u32(*index); + buffer.encode_u32(*offset); + buffer.encode_u32(*size); + } + Self::Imported { name } => { + buffer.encode_u32(name.len() as u32); + buffer.append_slice(name.as_bytes()); + } + } + } +} + +/// section index (not section id!) +#[derive(Clone, Copy, Debug)] +pub struct SectionIndex(u32); + +pub enum SymInfoFields { + Function(WasmObjectSymbol), + Data(DataSymbol), + Global(WasmObjectSymbol), + Section(SectionIndex), + Event(WasmObjectSymbol), + Table(WasmObjectSymbol), +} + +pub struct SymInfo { + flags: u32, + info: SymInfoFields, +} + +impl SymInfo { + pub fn for_function(wasm_function_index: u32, name: String) -> Self { + let linking_symbol = WasmObjectSymbol::Defined { + index: wasm_function_index, + name, + }; + SymInfo { + flags: 0, + info: SymInfoFields::Function(linking_symbol), + } + } +} + +impl Serialize for SymInfo { + fn serialize(&self, buffer: &mut T) { + buffer.append_u8(match self.info { + SymInfoFields::Function(_) => 0, + SymInfoFields::Data(_) => 1, + SymInfoFields::Global(_) => 2, + SymInfoFields::Section(_) => 3, + SymInfoFields::Event(_) => 4, + SymInfoFields::Table(_) => 5, + }); + buffer.encode_u32(self.flags); + match &self.info { + SymInfoFields::Function(x) => x.serialize(buffer), + SymInfoFields::Data(x) => x.serialize(buffer), + SymInfoFields::Global(x) => x.serialize(buffer), + SymInfoFields::Section(SectionIndex(x)) => { + buffer.encode_u32(*x); + } + SymInfoFields::Event(x) => x.serialize(buffer), + SymInfoFields::Table(x) => x.serialize(buffer), + }; + } +} + +//---------------------------------------------------------------- +// Linking subsections +//---------------------------------------------------------------- + +pub enum LinkingSubSection<'a> { + /// Extra metadata about the data segments. + SegmentInfo(Vec<'a, LinkingSegment>), + /// Specifies a list of constructor functions to be called at startup. + /// These constructors will be called in priority order after memory has been initialized. + InitFuncs(Vec<'a, LinkingInitFunc>), + /// Specifies the COMDAT groups of associated linking objects, which are linked only once and all together. + ComdatInfo(Vec<'a, LinkingComdat<'a>>), + /// Specifies extra information about the symbols present in the module. + SymbolTable(Vec<'a, SymInfo>), +} + +impl<'a> Serialize for LinkingSubSection<'a> { + fn serialize(&self, buffer: &mut T) { + buffer.append_u8(match self { + Self::SegmentInfo(_) => 5, + Self::InitFuncs(_) => 6, + Self::ComdatInfo(_) => 7, + Self::SymbolTable(_) => 8, + }); + let payload_len_index = buffer.reserve_padded_u32(); + let payload_start_index = buffer.size(); + match self { + Self::SegmentInfo(items) => items.serialize(buffer), + Self::InitFuncs(items) => items.serialize(buffer), + Self::ComdatInfo(items) => items.serialize(buffer), + Self::SymbolTable(items) => items.serialize(buffer), + } + buffer.overwrite_padded_u32( + payload_len_index, + (buffer.size() - payload_start_index) as u32, + ); + } +} + +//---------------------------------------------------------------- +// Linking metadata section +//---------------------------------------------------------------- + +const LINKING_VERSION: u8 = 2; + +pub struct LinkingSection<'a> { + pub subsections: Vec<'a, LinkingSubSection<'a>>, +} + +impl<'a> LinkingSection<'a> { + pub fn new(arena: &'a Bump) -> Self { + LinkingSection { + subsections: Vec::with_capacity_in(1, arena), + } + } +} + +impl<'a> Serialize for LinkingSection<'a> { + fn serialize(&self, buffer: &mut T) { + let header_indices = write_custom_section_header(buffer, "linking"); + buffer.append_u8(LINKING_VERSION); + for subsection in self.subsections.iter() { + subsection.serialize(buffer); + } + update_section_size(buffer, header_indices); + } +} diff --git a/compiler/gen_wasm/src/wasm_module/mod.rs b/compiler/gen_wasm/src/wasm_module/mod.rs index ec397c281e..d4d649e6c0 100644 --- a/compiler/gen_wasm/src/wasm_module/mod.rs +++ b/compiler/gen_wasm/src/wasm_module/mod.rs @@ -1,4 +1,5 @@ pub mod code_builder; +pub mod linking; pub mod opcodes; pub mod sections; pub mod serialize; @@ -6,7 +7,7 @@ pub mod serialize; pub use code_builder::{ Align, BlockType, CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState, }; +pub use linking::{LinkingSubSection, SymInfo}; pub use sections::{ - Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection, Signature, SymInfo, - WasmModule, + Export, ExportType, Global, GlobalInitValue, GlobalType, Signature, WasmModule, }; diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index 98e493d104..a82a544390 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -1,9 +1,10 @@ use bumpalo::collections::vec::Vec; use bumpalo::Bump; +use super::linking::{LinkingSection, RelocationEntry, RelocationSection}; use super::opcodes; use super::serialize::{SerialBuffer, Serialize}; -use super::{Align, CodeBuilder, ValueType}; +use super::{CodeBuilder, ValueType}; /******************************************************************* * @@ -29,7 +30,7 @@ pub enum SectionId { DataCount = 12, } -struct SectionHeaderIndices { +pub struct SectionHeaderIndices { size_index: usize, body_index: usize, } @@ -46,7 +47,7 @@ fn write_section_header(buffer: &mut T, id: SectionId) -> Secti } /// Write a custom section header, returning the position of the encoded length -fn write_custom_section_header( +pub fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { @@ -61,7 +62,7 @@ fn write_custom_section_header( } /// Update a section header with its final size, after writing the bytes -fn update_section_size(buffer: &mut T, header_indices: SectionHeaderIndices) { +pub fn update_section_size(buffer: &mut T, header_indices: SectionHeaderIndices) { let size = buffer.size() - header_indices.body_index; buffer.overwrite_padded_u32(header_indices.size_index, size as u32); } @@ -457,462 +458,6 @@ impl<'a> CodeSection<'a> { } } -/******************************************************************* - * - * Relocation sections - * - * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections - * - *******************************************************************/ - -#[repr(u8)] -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -pub enum IndexRelocType { - /// a function index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `call` instruction. - FunctionIndexLeb = 0, - /// a function table index encoded as a 5-byte [varint32]. - /// Used to refer to the immediate argument of a `i32.const` instruction, e.g. taking the address of a function. - TableIndexSleb = 1, - /// a function table index encoded as a [uint32], e.g. taking the address of a function in a static data initializer. - TableIndexI32 = 2, - /// a type index encoded as a 5-byte [varuint32], e.g. the type immediate in a `call_indirect`. - TypeIndexLeb = 6, - /// a global index encoded as a 5-byte [varuint32], e.g. the index immediate in a `get_global`. - GlobalIndexLeb = 7, - /// an event index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `throw` and `if_except` instruction. - EventIndexLeb = 10, - /// a global index encoded as [uint32]. - GlobalIndexI32 = 13, - /// the 64-bit counterpart of `R_WASM_TABLE_INDEX_SLEB`. A function table index encoded as a 10-byte [varint64]. - /// Used to refer to the immediate argument of a `i64.const` instruction, e.g. taking the address of a function in Wasm64. - TableIndexSleb64 = 18, - /// the 64-bit counterpart of `R_WASM_TABLE_INDEX_I32`. - /// A function table index encoded as a [uint64], e.g. taking the address of a function in a static data initializer. - TableIndexI64 = 19, - /// a table number encoded as a 5-byte [varuint32]. Used for the table immediate argument in the table.* instructions. - TableNumberLeb = 20, -} - -#[repr(u8)] -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -pub enum OffsetRelocType { - /// a linear memory index encoded as a 5-byte [varuint32]. - /// Used for the immediate argument of a `load` or `store` instruction, e.g. directly loading from or storing to a C++ global. - MemoryAddrLeb = 3, - /// a linear memory index encoded as a 5-byte [varint32]. - /// Used for the immediate argument of a `i32.const` instruction, e.g. taking the address of a C++ global. - MemoryAddrSleb = 4, - /// a linear memory index encoded as a [uint32], e.g. taking the address of a C++ global in a static data initializer. - MemoryAddrI32 = 5, - /// a byte offset within code section for the specific function encoded as a [uint32]. - /// The offsets start at the actual function code excluding its size field. - FunctionOffsetI32 = 8, - /// a byte offset from start of the specified section encoded as a [uint32]. - SectionOffsetI32 = 9, - /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_LEB`. A 64-bit linear memory index encoded as a 10-byte [varuint64], - /// Used for the immediate argument of a `load` or `store` instruction on a 64-bit linear memory array. - MemoryAddrLeb64 = 14, - /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_SLEB`. A 64-bit linear memory index encoded as a 10-byte [varint64]. - /// Used for the immediate argument of a `i64.const` instruction. - MemoryAddrSleb64 = 15, - /// the 64-bit counterpart of `R_WASM_MEMORY_ADDR`. A 64-bit linear memory index encoded as a [uint64], - /// e.g. taking the 64-bit address of a C++ global in a static data initializer. - MemoryAddrI64 = 16, -} - -#[derive(Debug, Clone)] -pub enum RelocationEntry { - Index { - type_id: IndexRelocType, - offset: u32, // offset 0 means the next byte after section id and size - symbol_index: u32, // index in symbol table - }, - Offset { - type_id: OffsetRelocType, - offset: u32, // offset 0 means the next byte after section id and size - symbol_index: u32, // index in symbol table - addend: i32, // addend to add to the address - }, -} - -impl RelocationEntry { - pub fn offset(&self) -> u32 { - match self { - Self::Index { offset, .. } => *offset, - Self::Offset { offset, .. } => *offset, - } - } - - pub fn offset_mut(&mut self) -> &mut u32 { - match self { - Self::Index { offset, .. } => offset, - Self::Offset { offset, .. } => offset, - } - } -} - -impl RelocationEntry { - pub fn for_function_call(offset: u32, symbol_index: u32) -> Self { - RelocationEntry::Index { - type_id: IndexRelocType::FunctionIndexLeb, - offset, - symbol_index, - } - } -} - -impl Serialize for RelocationEntry { - fn serialize(&self, buffer: &mut T) { - match self { - Self::Index { - type_id, - offset, - symbol_index, - } => { - buffer.append_u8(*type_id as u8); - buffer.encode_u32(*offset); - buffer.encode_u32(*symbol_index); - } - Self::Offset { - type_id, - offset, - symbol_index, - addend, - } => { - buffer.append_u8(*type_id as u8); - buffer.encode_u32(*offset); - buffer.encode_u32(*symbol_index); - buffer.encode_i32(*addend); - } - } - } -} - -#[derive(Debug)] -pub struct RelocationSection<'a> { - pub name: &'a str, - /// The *index* (not ID!) of the target section in the module - pub target_section_index: Option, - pub entries: Vec<'a, RelocationEntry>, -} - -impl<'a> RelocationSection<'a> { - fn new(arena: &'a Bump, name: &'a str) -> Self { - RelocationSection { - name, - target_section_index: None, - entries: Vec::with_capacity_in(64, arena), - } - } -} - -impl<'a> Serialize for RelocationSection<'a> { - fn serialize(&self, buffer: &mut T) { - if !self.entries.is_empty() { - let header_indices = write_custom_section_header(buffer, self.name); - buffer.encode_u32(self.target_section_index.unwrap()); - self.entries.serialize(buffer); - update_section_size(buffer, header_indices); - } - } -} - -/******************************************************************* - * - * Linking section - * - * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#linking-metadata-section - * - *******************************************************************/ - -/// Linking metadata for data segments -pub struct LinkingSegment { - pub name: String, - pub alignment: Align, - pub flags: u32, -} - -impl Serialize for LinkingSegment { - fn serialize(&self, _buffer: &mut T) { - todo!(); - } -} - -/// Linking metadata for init (start) functions -pub struct LinkingInitFunc { - pub priority: u32, - pub symbol_index: u32, // index in the symbol table, not the function index -} - -impl Serialize for LinkingInitFunc { - fn serialize(&self, _buffer: &mut T) { - todo!(); - } -} - -//------------------------------------------------ -// Common data -//------------------------------------------------ - -#[repr(u8)] -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -pub enum ComdatSymKind { - Data = 0, - Function = 1, - Global = 2, - Event = 3, - Table = 4, - Section = 5, -} - -pub struct ComdatSym { - pub kind: ComdatSymKind, - pub index: u32, -} - -impl Serialize for ComdatSym { - fn serialize(&self, _buffer: &mut T) { - todo!(); - } -} - -/// Linking metadata for common data -/// A COMDAT group may contain one or more functions, data segments, and/or custom sections. -/// The linker will include all of these elements with a given group name from one object file, -/// and will exclude any element with this group name from all other object files. -#[allow(dead_code)] -pub struct LinkingComdat<'a> { - name: String, - flags: u32, - syms: Vec<'a, ComdatSym>, -} - -impl<'a> Serialize for LinkingComdat<'a> { - fn serialize(&self, _buffer: &mut T) { - todo!(); - } -} - -//------------------------------------------------ -// Symbol table -//------------------------------------------------ - -/// Indicating that this is a weak symbol. When -/// linking multiple modules defining the same symbol, all weak definitions are -/// discarded if any strong definitions exist; then if multiple weak definitions -/// exist all but one (unspecified) are discarded; and finally it is an error if -/// more than one definition remains. -pub const WASM_SYM_BINDING_WEAK: u32 = 1; - -/// Indicating that this is a local symbol (this is exclusive with `WASM_SYM_BINDING_WEAK`). -/// Local symbols are not to be exported, or linked to other modules/sections. -/// The names of all non-local symbols must be unique, but the names of local symbols -/// are not considered for uniqueness. A local function or global symbol cannot reference an import. -pub const WASM_SYM_BINDING_LOCAL: u32 = 2; - -/// Indicating that this is a hidden symbol. -/// Hidden symbols are not to be exported when performing the final link, but -/// may be linked to other modules. -pub const WASM_SYM_VISIBILITY_HIDDEN: u32 = 4; - -/// Indicating that this symbol is not defined. -/// For non-data symbols, this must match whether the symbol is an import -/// or is defined; for data symbols, determines whether a segment is specified. -pub const WASM_SYM_UNDEFINED: u32 = 0x10; // required if the symbol refers to an import - -/// The symbol is intended to be exported from the -/// wasm module to the host environment. This differs from the visibility flags -/// in that it effects the static linker. -pub const WASM_SYM_EXPORTED: u32 = 0x20; - -/// The symbol uses an explicit symbol name, -/// rather than reusing the name from a wasm import. This allows it to remap -/// imports from foreign WebAssembly modules into local symbols with different -/// names. -pub const WASM_SYM_EXPLICIT_NAME: u32 = 0x40; // use the name from the symbol table, not from the import - -/// The symbol is intended to be included in the -/// linker output, regardless of whether it is used by the program. -pub const WASM_SYM_NO_STRIP: u32 = 0x80; - -pub enum WasmObjectSymbol { - Defined { index: u32, name: String }, - Imported { index: u32 }, -} - -impl Serialize for WasmObjectSymbol { - fn serialize(&self, buffer: &mut T) { - match self { - Self::Defined { index, name } => { - buffer.encode_u32(*index); - buffer.encode_u32(name.len() as u32); - buffer.append_slice(name.as_bytes()); - } - Self::Imported { index } => { - buffer.encode_u32(*index); - } - } - } -} - -pub enum DataSymbol { - Defined { - name: String, - index: u32, - offset: u32, - size: u32, - }, - Imported { - name: String, - }, -} - -impl Serialize for DataSymbol { - fn serialize(&self, buffer: &mut T) { - match self { - Self::Defined { - name, - index, - offset, - size, - } => { - buffer.encode_u32(name.len() as u32); - buffer.append_slice(name.as_bytes()); - buffer.encode_u32(*index); - buffer.encode_u32(*offset); - buffer.encode_u32(*size); - } - Self::Imported { name } => { - buffer.encode_u32(name.len() as u32); - buffer.append_slice(name.as_bytes()); - } - } - } -} - -/// section index (not section id!) -#[derive(Clone, Copy, Debug)] -pub struct SectionIndex(u32); - -pub enum SymInfoFields { - Function(WasmObjectSymbol), - Data(DataSymbol), - Global(WasmObjectSymbol), - Section(SectionIndex), - Event(WasmObjectSymbol), - Table(WasmObjectSymbol), -} - -pub struct SymInfo { - flags: u32, - info: SymInfoFields, -} - -impl SymInfo { - pub fn for_function(wasm_function_index: u32, name: String) -> Self { - let linking_symbol = WasmObjectSymbol::Defined { - index: wasm_function_index, - name, - }; - SymInfo { - flags: 0, - info: SymInfoFields::Function(linking_symbol), - } - } -} - -impl Serialize for SymInfo { - fn serialize(&self, buffer: &mut T) { - buffer.append_u8(match self.info { - SymInfoFields::Function(_) => 0, - SymInfoFields::Data(_) => 1, - SymInfoFields::Global(_) => 2, - SymInfoFields::Section(_) => 3, - SymInfoFields::Event(_) => 4, - SymInfoFields::Table(_) => 5, - }); - buffer.encode_u32(self.flags); - match &self.info { - SymInfoFields::Function(x) => x.serialize(buffer), - SymInfoFields::Data(x) => x.serialize(buffer), - SymInfoFields::Global(x) => x.serialize(buffer), - SymInfoFields::Section(SectionIndex(x)) => { - buffer.encode_u32(*x); - } - SymInfoFields::Event(x) => x.serialize(buffer), - SymInfoFields::Table(x) => x.serialize(buffer), - }; - } -} - -//---------------------------------------------------------------- -// Linking subsections -//---------------------------------------------------------------- - -pub enum LinkingSubSection<'a> { - /// Extra metadata about the data segments. - SegmentInfo(Vec<'a, LinkingSegment>), - /// Specifies a list of constructor functions to be called at startup. - /// These constructors will be called in priority order after memory has been initialized. - InitFuncs(Vec<'a, LinkingInitFunc>), - /// Specifies the COMDAT groups of associated linking objects, which are linked only once and all together. - ComdatInfo(Vec<'a, LinkingComdat<'a>>), - /// Specifies extra information about the symbols present in the module. - SymbolTable(Vec<'a, SymInfo>), -} - -impl<'a> Serialize for LinkingSubSection<'a> { - fn serialize(&self, buffer: &mut T) { - buffer.append_u8(match self { - Self::SegmentInfo(_) => 5, - Self::InitFuncs(_) => 6, - Self::ComdatInfo(_) => 7, - Self::SymbolTable(_) => 8, - }); - let payload_len_index = buffer.reserve_padded_u32(); - let payload_start_index = buffer.size(); - match self { - Self::SegmentInfo(items) => items.serialize(buffer), - Self::InitFuncs(items) => items.serialize(buffer), - Self::ComdatInfo(items) => items.serialize(buffer), - Self::SymbolTable(items) => items.serialize(buffer), - } - buffer.overwrite_padded_u32( - payload_len_index, - (buffer.size() - payload_start_index) as u32, - ); - } -} - -//---------------------------------------------------------------- -// Linking metadata section -//---------------------------------------------------------------- - -const LINKING_VERSION: u8 = 2; - -pub struct LinkingSection<'a> { - pub subsections: Vec<'a, LinkingSubSection<'a>>, -} - -impl<'a> LinkingSection<'a> { - fn new(arena: &'a Bump) -> Self { - LinkingSection { - subsections: Vec::with_capacity_in(1, arena), - } - } -} - -impl<'a> Serialize for LinkingSection<'a> { - fn serialize(&self, buffer: &mut T) { - let header_indices = write_custom_section_header(buffer, "linking"); - buffer.append_u8(LINKING_VERSION); - for subsection in self.subsections.iter() { - subsection.serialize(buffer); - } - update_section_size(buffer, header_indices); - } -} - /******************************************************************* * * Module From 7fdd6cf9526657a7462ed2613149cdd91296a0f4 Mon Sep 17 00:00:00 2001 From: Brian Carroll Date: Wed, 3 Nov 2021 11:59:49 +0000 Subject: [PATCH 20/20] rust-fmt --- compiler/gen_wasm/src/wasm_module/code_builder.rs | 2 +- compiler/test_wasm/src/helpers/eval.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler/gen_wasm/src/wasm_module/code_builder.rs b/compiler/gen_wasm/src/wasm_module/code_builder.rs index c798213ebb..38561a986e 100644 --- a/compiler/gen_wasm/src/wasm_module/code_builder.rs +++ b/compiler/gen_wasm/src/wasm_module/code_builder.rs @@ -5,8 +5,8 @@ use std::fmt::Debug; use roc_module::symbol::Symbol; -use super::opcodes::*; use super::linking::{IndexRelocType, RelocationEntry}; +use super::opcodes::*; use super::serialize::{SerialBuffer, Serialize}; use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID}; diff --git a/compiler/test_wasm/src/helpers/eval.rs b/compiler/test_wasm/src/helpers/eval.rs index a628656573..e9d509257a 100644 --- a/compiler/test_wasm/src/helpers/eval.rs +++ b/compiler/test_wasm/src/helpers/eval.rs @@ -2,9 +2,9 @@ use std::cell::Cell; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; +use crate::helpers::wasm32_test_result::Wasm32TestResult; use roc_can::builtins::builtin_defs_map; use roc_collections::all::{MutMap, MutSet}; -use crate::helpers::wasm32_test_result::Wasm32TestResult; use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory; const TEST_WRAPPER_NAME: &str = "test_wrapper"; @@ -102,8 +102,7 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>( exposed_to_host, }; - let mut wasm_module = - roc_gen_wasm::build_module_help(&env, procedures).unwrap(); + let mut wasm_module = roc_gen_wasm::build_module_help(&env, procedures).unwrap(); T::insert_test_wrapper( arena,