diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index b0901f8c46..f7c4fe232d 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -29,11 +29,6 @@ use crate::{ PTR_SIZE, PTR_TYPE, STACK_POINTER_GLOBAL_ID, STACK_POINTER_NAME, TARGET_INFO, }; -/// The memory address where the constants data will be loaded during module instantiation. -/// We avoid address zero and anywhere near it. They're valid addresses but maybe bug-prone. -/// Follow Emscripten's example by leaving 1kB unused (though 4 bytes would probably do!) -const CONST_SEGMENT_BASE_ADDR: u32 = 1024; - pub struct WasmBackend<'a> { pub env: &'a Env<'a>, interns: &'a mut Interns, @@ -57,7 +52,6 @@ pub struct WasmBackend<'a> { } impl<'a> WasmBackend<'a> { - #[allow(clippy::too_many_arguments)] pub fn new( env: &'a Env<'a>, interns: &'a mut Interns, @@ -78,8 +72,10 @@ impl<'a> WasmBackend<'a> { index: STACK_POINTER_GLOBAL_ID, }); - // TODO: Examine the actual address ranges of every preloaded data segment in case they are not simply sequential - let next_constant_addr = CONST_SEGMENT_BASE_ADDR + module.data.bytes.len() as u32; + // The preloaded binary has a global to tell us where its data section ends + // Note: We need this to account for zero data (.bss), which doesn't have an explicit DataSegment! + let data_end_idx = module.export.globals_lookup["__data_end".as_bytes()]; + let next_constant_addr = module.global.parse_u32_at_index(data_end_idx); WasmBackend { env, diff --git a/compiler/gen_wasm/src/wasm_module/sections.rs b/compiler/gen_wasm/src/wasm_module/sections.rs index a87cfa7865..f64db3591b 100644 --- a/compiler/gen_wasm/src/wasm_module/sections.rs +++ b/compiler/gen_wasm/src/wasm_module/sections.rs @@ -12,7 +12,8 @@ use super::dead_code::{ use super::linking::RelocationEntry; use super::opcodes::OpCode; use super::serialize::{ - parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes, MAX_SIZE_ENCODED_U32, + parse_string_bytes, parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes, + MAX_SIZE_ENCODED_U32, }; use super::{CodeBuilder, ValueType}; @@ -565,6 +566,26 @@ pub enum ConstExpr { F64(f64), } +impl ConstExpr { + fn parse_u32(bytes: &[u8], cursor: &mut usize) -> u32 { + let err = || internal_error!("Invalid ConstExpr. Expected i32."); + + if bytes[*cursor] != OpCode::I32CONST as u8 { + err(); + } + *cursor += 1; + + let value = parse_u32_or_panic(bytes, cursor); + + if bytes[*cursor] != OpCode::END as u8 { + err(); + } + *cursor += 1; + + value + } +} + impl Serialize for ConstExpr { fn serialize(&self, buffer: &mut T) { match self { @@ -589,6 +610,15 @@ impl Serialize for ConstExpr { } } +impl SkipBytes for ConstExpr { + fn skip_bytes(bytes: &[u8], cursor: &mut usize) { + while bytes[*cursor] != OpCode::END as u8 { + OpCode::skip_bytes(bytes, cursor); + } + *cursor += 1; + } +} + #[derive(Debug)] pub struct Global { /// Type and mutability of the global @@ -611,16 +641,14 @@ pub struct GlobalSection<'a> { } impl<'a> GlobalSection<'a> { - pub fn new(arena: &'a Bump, globals: &[Global]) -> Self { - let capacity = 13 * globals.len(); - let mut bytes = Vec::with_capacity_in(capacity, arena); - for global in globals { - global.serialize(&mut bytes); - } - GlobalSection { - count: globals.len() as u32, - bytes, + pub fn parse_u32_at_index(&self, index: u32) -> u32 { + let mut cursor = 0; + for _ in 0..index { + GlobalType::skip_bytes(&self.bytes, &mut cursor); + ConstExpr::skip_bytes(&self.bytes, &mut cursor); } + GlobalType::skip_bytes(&self.bytes, &mut cursor); + ConstExpr::parse_u32(&self.bytes, &mut cursor) } pub fn append(&mut self, global: Global) { @@ -666,15 +694,15 @@ pub struct Export<'a> { } impl<'a> Export<'a> { - fn parse_type(bytes: &[u8], cursor: &mut usize) -> ExportType { - String::skip_bytes(bytes, cursor); // name + fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Self { + let name = parse_string_bytes(arena, bytes, cursor); let ty = ExportType::from(bytes[*cursor]); *cursor += 1; - u32::skip_bytes(bytes, cursor); // index + let index = parse_u32_or_panic(bytes, cursor); - ty + Export { name, ty, index } } } @@ -690,7 +718,10 @@ impl Serialize for Export<'_> { pub struct ExportSection<'a> { pub count: u32, pub bytes: Vec<'a, u8>, + /// List of exported functions to keep during dead-code-elimination pub function_indices: Vec<'a, u32>, + /// name -> index + pub globals_lookup: MutMap<&'a [u8], u32>, } impl<'a> ExportSection<'a> { @@ -713,6 +744,7 @@ impl<'a> ExportSection<'a> { count: 0, bytes: Vec::with_capacity_in(256, arena), function_indices: Vec::with_capacity_in(4, arena), + globals_lookup: MutMap::default(), } } @@ -725,11 +757,14 @@ impl<'a> ExportSection<'a> { let mut body_cursor = 0; for _ in 0..num_exports { let export_start = body_cursor; - let export_type = Export::parse_type(body_bytes, &mut body_cursor); - if matches!(export_type, ExportType::Global) { + let export = Export::parse(arena, body_bytes, &mut body_cursor); + if matches!(export.ty, ExportType::Global) { let global_bytes = &body_bytes[export_start..body_cursor]; export_section.bytes.extend_from_slice(global_bytes); export_section.count += 1; + export_section + .globals_lookup + .insert(export.name, export.index); } } @@ -1146,10 +1181,7 @@ impl<'a> NameSection<'a> { cursor: &mut usize, section_end: usize, ) { - // Custom section name - let section_name_len = parse_u32_or_panic(module_bytes, cursor); - let section_name_end = *cursor + section_name_len as usize; - let section_name = &module_bytes[*cursor..section_name_end]; + let section_name = parse_string_bytes(arena, module_bytes, cursor); if section_name != Self::NAME.as_bytes() { internal_error!( "Expected Custom section {:?}, found {:?}", @@ -1157,7 +1189,6 @@ impl<'a> NameSection<'a> { std::str::from_utf8(section_name) ); } - *cursor = section_name_end; // Find function names subsection let mut found_function_names = false; @@ -1182,10 +1213,7 @@ impl<'a> NameSection<'a> { let num_entries = parse_u32_or_panic(module_bytes, cursor) as usize; for _ in 0..num_entries { let fn_index = parse_u32_or_panic(module_bytes, cursor); - let name_len = parse_u32_or_panic(module_bytes, cursor); - let name_end = *cursor + name_len as usize; - let name_bytes: &[u8] = &module_bytes[*cursor..name_end]; - *cursor = name_end; + let name_bytes = parse_string_bytes(arena, module_bytes, cursor); self.functions .insert(arena.alloc_slice_copy(name_bytes), fn_index); diff --git a/compiler/gen_wasm/src/wasm_module/serialize.rs b/compiler/gen_wasm/src/wasm_module/serialize.rs index 99561309c5..99abd64018 100644 --- a/compiler/gen_wasm/src/wasm_module/serialize.rs +++ b/compiler/gen_wasm/src/wasm_module/serialize.rs @@ -1,6 +1,6 @@ use std::{fmt::Debug, iter::FromIterator}; -use bumpalo::collections::vec::Vec; +use bumpalo::{collections::vec::Vec, Bump}; use roc_error_macros::internal_error; /// In the WebAssembly binary format, all integers are variable-length encoded (using LEB-128) @@ -262,6 +262,15 @@ pub fn parse_u32_or_panic(bytes: &[u8], cursor: &mut usize) -> u32 { value } +pub fn parse_string_bytes<'a>(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> &'a [u8] { + let len = parse_u32_or_panic(bytes, cursor); + let end = *cursor + len as usize; + let bytes: &[u8] = &bytes[*cursor..end]; + let copy = arena.alloc_slice_copy(bytes); + *cursor = end; + copy +} + /// Skip over serialized bytes for a type /// This may, or may not, require looking at the byte values pub trait SkipBytes {