use bumpalo::collections::vec::Vec; use bumpalo::Bump; use super::linking::{LinkingSection, RelocationEntry, RelocationSection}; use super::opcodes::OpCode; use super::serialize::{decode_u32_or_panic, SerialBuffer, Serialize}; use super::{CodeBuilder, ValueType}; /******************************************************************* * * Helpers * *******************************************************************/ #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum SectionId { Custom = 0, Type = 1, Import = 2, Function = 3, Table = 4, Memory = 5, Global = 6, Export = 7, Start = 8, Element = 9, Code = 10, Data = 11, /// DataCount section is unused. Only needed for single-pass validation of /// memory.init and data.drop, which we don't use DataCount = 12, } pub struct SectionHeaderIndices { size_index: usize, body_index: usize, } /// Write a section header, returning the position of the encoded length fn write_section_header(buffer: &mut T, id: SectionId) -> SectionHeaderIndices { buffer.append_u8(id as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); SectionHeaderIndices { size_index, body_index, } } /// Write a custom section header, returning the position of the encoded length pub fn write_custom_section_header( buffer: &mut T, name: &str, ) -> SectionHeaderIndices { buffer.append_u8(SectionId::Custom as u8); let size_index = buffer.reserve_padded_u32(); let body_index = buffer.size(); name.serialize(buffer); SectionHeaderIndices { size_index, body_index, } } /// Update a section header with its final size, after writing the bytes pub fn update_section_size(buffer: &mut T, header_indices: SectionHeaderIndices) { let size = buffer.size() - header_indices.body_index; buffer.overwrite_padded_u32(header_indices.size_index, size as u32); } /// Serialize a section that is just a vector of some struct fn serialize_vector_section( buffer: &mut B, section_id: SectionId, subsections: &[T], ) { if !subsections.is_empty() { let header_indices = write_section_header(buffer, section_id); subsections.serialize(buffer); update_section_size(buffer, header_indices); } } /// Serialize a section that is stored as bytes and a count fn serialize_bytes_section( buffer: &mut B, section_id: SectionId, count: u32, bytes: &[u8], ) { if !bytes.is_empty() { let header_indices = write_section_header(buffer, section_id); buffer.encode_u32(count); buffer.append_slice(bytes); update_section_size(buffer, header_indices); } } /******************************************************************* * * Type section * Deduplicated list of function type signatures * *******************************************************************/ #[derive(PartialEq, Eq, Debug)] pub struct Signature<'a> { pub param_types: Vec<'a, ValueType>, pub ret_type: Option, } impl Signature<'_> { pub const SEPARATOR: u8 = 0x60; } impl<'a> Serialize for Signature<'a> { fn serialize(&self, buffer: &mut T) { buffer.append_u8(Self::SEPARATOR); self.param_types.serialize(buffer); self.ret_type.serialize(buffer); } } #[derive(Debug)] pub struct TypeSection<'a> { /// Private. See WasmModule::add_function_signature arena: &'a Bump, bytes: Vec<'a, u8>, offsets: Vec<'a, usize>, } impl<'a> TypeSection<'a> { pub fn new(arena: &'a Bump, capacity: usize) -> Self { TypeSection { arena, bytes: Vec::with_capacity_in(capacity * 4, arena), offsets: Vec::with_capacity_in(capacity, arena), } } /// Find a matching signature or insert a new one. Return the index. pub fn insert(&mut self, signature: Signature<'a>) -> u32 { let mut sig_bytes = Vec::with_capacity_in(signature.param_types.len() + 4, self.arena); signature.serialize(&mut sig_bytes); let sig_len = sig_bytes.len(); let bytes_len = self.bytes.len(); for (i, offset) in self.offsets.iter().enumerate() { let end = offset + sig_len; if end > bytes_len { break; } if &self.bytes[*offset..end] == sig_bytes.as_slice() { return i as u32; } } let sig_id = self.offsets.len(); self.offsets.push(bytes_len); self.bytes.extend_from_slice(&sig_bytes); sig_id as u32 } pub fn preload(arena: &'a Bump, section_body: &[u8]) -> Self { if section_body.is_empty() { return TypeSection { arena, bytes: Vec::new_in(arena), offsets: Vec::new_in(arena), }; } let (count, content_offset) = decode_u32_or_panic(section_body); let mut bytes = Vec::with_capacity_in(section_body.len() * 2, arena); bytes.extend_from_slice(§ion_body[content_offset..]); let mut offsets = Vec::with_capacity_in((count * 2) as usize, arena); let mut i = 0; while i < bytes.len() { offsets.push(i); let sep = bytes[i]; debug_assert!(sep == Signature::SEPARATOR); i += 1; let (n_params, n_params_size) = decode_u32_or_panic(&bytes[i..]); i += n_params_size; // skip over the array length that we just decoded i += n_params as usize; // skip over one byte per param type let n_return_values = bytes[i]; i += 1 + n_return_values as usize; } TypeSection { arena, bytes, offsets, } } } impl<'a> Serialize for TypeSection<'a> { fn serialize(&self, buffer: &mut T) { serialize_bytes_section( buffer, SectionId::Type, self.offsets.len() as u32, &self.bytes, ); } } /******************************************************************* * * Import section * *******************************************************************/ #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum RefType { Func = 0x70, Extern = 0x6f, } #[derive(Debug)] pub struct TableType { pub ref_type: RefType, pub limits: Limits, } impl Serialize for TableType { fn serialize(&self, buffer: &mut T) { buffer.append_u8(self.ref_type as u8); self.limits.serialize(buffer); } } #[derive(Debug)] pub enum ImportDesc { Func { signature_index: u32 }, Table { ty: TableType }, Mem { limits: Limits }, Global { ty: GlobalType }, } #[derive(Debug)] pub struct Import { pub module: &'static str, pub name: String, pub description: ImportDesc, } impl Serialize for Import { fn serialize(&self, buffer: &mut T) { self.module.serialize(buffer); self.name.serialize(buffer); match &self.description { ImportDesc::Func { signature_index } => { buffer.append_u8(0); buffer.encode_u32(*signature_index); } ImportDesc::Table { ty } => { buffer.append_u8(1); ty.serialize(buffer); } ImportDesc::Mem { limits } => { buffer.append_u8(2); limits.serialize(buffer); } ImportDesc::Global { ty } => { buffer.append_u8(3); ty.serialize(buffer); } } } } #[derive(Debug)] pub struct ImportSection<'a> { pub entries: Vec<'a, Import>, } impl<'a> ImportSection<'a> { pub fn new(arena: &'a Bump) -> Self { ImportSection { entries: bumpalo::vec![in arena], } } pub fn function_count(&self) -> usize { self.entries .iter() .filter(|import| matches!(import.description, ImportDesc::Func { .. })) .count() } } impl<'a> Serialize for ImportSection<'a> { fn serialize(&self, buffer: &mut T) { serialize_vector_section(buffer, SectionId::Import, &self.entries); } } /******************************************************************* * * Function section * Maps function indices (Code section) to signature indices (Type section) * *******************************************************************/ #[derive(Debug)] pub struct FunctionSection<'a> { pub count: u32, pub bytes: Vec<'a, u8>, } impl<'a> FunctionSection<'a> { pub fn new(arena: &'a Bump, capacity: usize) -> Self { FunctionSection { count: 0, bytes: Vec::with_capacity_in(capacity, arena), } } fn add_sig(&mut self, sig_id: u32) { self.bytes.encode_u32(sig_id); self.count += 1; } } impl<'a> Serialize for FunctionSection<'a> { fn serialize(&self, buffer: &mut T) { serialize_bytes_section(buffer, SectionId::Function, self.count, &self.bytes); } } /******************************************************************* * * Memory section * *******************************************************************/ #[derive(Debug)] pub enum Limits { Min(u32), MinMax(u32, u32), } impl Serialize for Limits { fn serialize(&self, buffer: &mut T) { match self { Self::Min(min) => { buffer.append_u8(0); buffer.encode_u32(*min); } Self::MinMax(min, max) => { buffer.append_u8(1); buffer.encode_u32(*min); buffer.encode_u32(*max); } } } } #[derive(Debug)] pub struct MemorySection(Option); impl MemorySection { pub const PAGE_SIZE: u32 = 64 * 1024; pub fn new(bytes: u32) -> Self { if bytes == 0 { MemorySection(None) } else { let pages = (bytes + Self::PAGE_SIZE - 1) / Self::PAGE_SIZE; MemorySection(Some(Limits::Min(pages))) } } pub fn min_size(&self) -> Option { match self { MemorySection(Some(Limits::Min(min))) | MemorySection(Some(Limits::MinMax(min, _))) => { Some(min * Self::PAGE_SIZE) } MemorySection(None) => None, } } } impl Serialize for MemorySection { fn serialize(&self, buffer: &mut T) { if let Some(limits) = &self.0 { let header_indices = write_section_header(buffer, SectionId::Memory); buffer.append_u8(1); limits.serialize(buffer); update_section_size(buffer, header_indices); } } } /******************************************************************* * * Global section * *******************************************************************/ #[derive(Debug)] pub struct GlobalType { pub value_type: ValueType, pub is_mutable: bool, } impl Serialize for GlobalType { fn serialize(&self, buffer: &mut T) { buffer.append_u8(self.value_type as u8); buffer.append_u8(self.is_mutable as u8); } } /// Constant expression for initialising globals or data segments /// Note: This is restricted for simplicity, but the spec allows arbitrary constant expressions #[derive(Debug)] pub enum ConstExpr { I32(i32), I64(i64), F32(f32), F64(f64), } impl Serialize for ConstExpr { fn serialize(&self, buffer: &mut T) { match self { ConstExpr::I32(x) => { buffer.append_u8(OpCode::I32CONST as u8); buffer.encode_i32(*x); } ConstExpr::I64(x) => { buffer.append_u8(OpCode::I64CONST as u8); buffer.encode_i64(*x); } ConstExpr::F32(x) => { buffer.append_u8(OpCode::F32CONST as u8); buffer.encode_f32(*x); } ConstExpr::F64(x) => { buffer.append_u8(OpCode::F64CONST as u8); buffer.encode_f64(*x); } } buffer.append_u8(OpCode::END as u8); } } #[derive(Debug)] pub struct Global { /// Type and mutability of the global pub ty: GlobalType, /// Initial value of the global. pub init: ConstExpr, } impl Serialize for Global { fn serialize(&self, buffer: &mut T) { self.ty.serialize(buffer); self.init.serialize(buffer); } } #[derive(Debug)] pub struct GlobalSection<'a> { pub entries: Vec<'a, Global>, } impl<'a> Serialize for GlobalSection<'a> { fn serialize(&self, buffer: &mut T) { serialize_vector_section(buffer, SectionId::Global, &self.entries); } } /******************************************************************* * * Export section * *******************************************************************/ #[repr(u8)] #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum ExportType { Func = 0, Table = 1, Mem = 2, Global = 3, } #[derive(Debug)] pub struct Export { pub name: String, pub ty: ExportType, pub index: u32, } impl Serialize for Export { fn serialize(&self, buffer: &mut T) { self.name.serialize(buffer); buffer.append_u8(self.ty as u8); buffer.encode_u32(self.index); } } #[derive(Debug)] pub struct ExportSection<'a> { pub entries: Vec<'a, Export>, } impl<'a> Serialize for ExportSection<'a> { fn serialize(&self, buffer: &mut T) { serialize_vector_section(buffer, SectionId::Export, &self.entries); } } /******************************************************************* * * Code section (see also code_builder.rs) * *******************************************************************/ #[derive(Debug)] pub struct CodeSection<'a> { pub preloaded_count: u32, pub preloaded_bytes: Vec<'a, u8>, pub code_builders: Vec<'a, CodeBuilder<'a>>, } impl<'a> CodeSection<'a> { /// Serialize the code builders for all functions, and get code relocations with final offsets pub fn serialize_with_relocs( &self, buffer: &mut T, relocations: &mut Vec<'a, RelocationEntry>, ) -> usize { let header_indices = write_section_header(buffer, SectionId::Code); buffer.encode_u32(self.preloaded_count + self.code_builders.len() as u32); buffer.append_slice(&self.preloaded_bytes); for code_builder in self.code_builders.iter() { code_builder.serialize_with_relocs(buffer, relocations, header_indices.body_index); } let code_section_body_index = header_indices.body_index; update_section_size(buffer, header_indices); code_section_body_index } } /******************************************************************* * * Data section * *******************************************************************/ #[derive(Debug)] pub enum DataMode { /// A data segment that auto-loads into memory on instantiation Active { offset: ConstExpr }, /// A data segment that can be loaded with the `memory.init` instruction Passive, } impl DataMode { pub fn active_at(offset: u32) -> Self { DataMode::Active { offset: ConstExpr::I32(offset as i32), } } } #[derive(Debug)] pub struct DataSegment<'a> { pub mode: DataMode, pub init: Vec<'a, u8>, } impl Serialize for DataSegment<'_> { fn serialize(&self, buffer: &mut T) { match &self.mode { DataMode::Active { offset } => { buffer.append_u8(0); offset.serialize(buffer); } DataMode::Passive => { buffer.append_u8(1); } } self.init.serialize(buffer); } } #[derive(Debug)] pub struct DataSection<'a> { segment_count: u32, bytes: Vec<'a, u8>, } impl<'a> DataSection<'a> { pub fn new(arena: &'a Bump) -> Self { DataSection { segment_count: 0, bytes: bumpalo::vec![in arena], } } pub fn append_segment(&mut self, segment: DataSegment<'a>) -> u32 { let index = self.segment_count; self.segment_count += 1; segment.serialize(&mut self.bytes); index } } impl Serialize for DataSection<'_> { fn serialize(&self, buffer: &mut T) { if !self.bytes.is_empty() { serialize_bytes_section(buffer, SectionId::Data, self.segment_count, &self.bytes); } } } /******************************************************************* * * Module * * https://webassembly.github.io/spec/core/binary/modules.html * *******************************************************************/ /// Helper struct to count non-empty sections. /// Needed to generate linking data, which refers to target sections by index. struct SectionCounter { buffer_size: usize, section_index: u32, } impl SectionCounter { /// Update the section counter if buffer size increased since last call #[inline] fn update(&mut self, buffer: &mut SB) { let new_size = buffer.size(); if new_size > self.buffer_size { self.section_index += 1; self.buffer_size = new_size; } } #[inline] fn serialize_and_count( &mut self, buffer: &mut SB, section: &S, ) { section.serialize(buffer); self.update(buffer); } } /// A Wasm module section that we don't use for Roc code, /// but may be present in a preloaded binary #[derive(Debug, Default)] pub struct OpaqueSection<'a> { bytes: &'a [u8], } impl<'a> OpaqueSection<'a> { pub fn new(bytes: &'a [u8]) -> Self { Self { bytes } } } impl Serialize for OpaqueSection<'_> { fn serialize(&self, buffer: &mut T) { if !self.bytes.is_empty() { buffer.append_slice(self.bytes); } } } #[derive(Debug)] pub struct WasmModule<'a> { pub types: TypeSection<'a>, pub import: ImportSection<'a>, pub function: FunctionSection<'a>, pub table: OpaqueSection<'a>, pub memory: MemorySection, pub global: GlobalSection<'a>, pub export: ExportSection<'a>, pub start: OpaqueSection<'a>, pub element: OpaqueSection<'a>, pub code: CodeSection<'a>, pub data: DataSection<'a>, pub linking: LinkingSection<'a>, pub relocations: RelocationSection<'a>, } impl<'a> WasmModule<'a> { pub const WASM_VERSION: u32 = 1; /// Create entries in the Type and Function sections for a function signature pub fn add_function_signature(&mut self, signature: Signature<'a>) { let index = self.types.insert(signature); self.function.add_sig(index); } /// Serialize the module to bytes /// (Mutates some data related to linking) pub fn serialize_mut(&mut self, buffer: &mut T) { buffer.append_u8(0); buffer.append_slice("asm".as_bytes()); buffer.write_unencoded_u32(Self::WASM_VERSION); // Keep track of (non-empty) section indices for linking let mut counter = SectionCounter { buffer_size: buffer.size(), section_index: 0, }; counter.serialize_and_count(buffer, &self.types); counter.serialize_and_count(buffer, &self.import); counter.serialize_and_count(buffer, &self.function); counter.serialize_and_count(buffer, &self.table); counter.serialize_and_count(buffer, &self.memory); counter.serialize_and_count(buffer, &self.global); counter.serialize_and_count(buffer, &self.export); counter.serialize_and_count(buffer, &self.start); counter.serialize_and_count(buffer, &self.element); // Code section is the only one with relocations so we can stop counting let code_section_index = counter.section_index; self.code .serialize_with_relocs(buffer, &mut self.relocations.entries); self.data.serialize(buffer); self.linking.serialize(buffer); self.relocations.target_section_index = Some(code_section_index); self.relocations.serialize(buffer); } } #[cfg(test)] mod tests { use super::*; use bumpalo::{self, collections::Vec, Bump}; fn test_assert_types_preload<'a>(arena: &'a Bump, original: &TypeSection<'a>) { // Serialize the Type section that we built from Roc code let mut original_serialized = Vec::with_capacity_in(6 + original.bytes.len(), arena); original.serialize(&mut original_serialized); debug_assert!(original_serialized[0] == SectionId::Type as u8); // Reconstruct a new TypeSection by "pre-loading" the bytes of the original! let body = &original_serialized[6..]; let preloaded = TypeSection::preload(arena, body); debug_assert_eq!(original.offsets, preloaded.offsets); debug_assert_eq!(original.bytes, preloaded.bytes); } #[test] fn test_type_section() { use ValueType::*; let arena = &Bump::new(); let signatures = [ Signature { param_types: bumpalo::vec![in arena], ret_type: None, }, Signature { param_types: bumpalo::vec![in arena; I32, I64, F32, F64], ret_type: None, }, Signature { param_types: bumpalo::vec![in arena; I32, I32, I32], ret_type: Some(I32), }, ]; let mut section = TypeSection::new(arena, signatures.len()); for sig in signatures { section.insert(sig); } test_assert_types_preload(arena, §ion); } }