wasm: use __data_end to account for all constant data including zero (bss) data

This commit is contained in:
Brian Carroll 2022-03-08 08:56:34 +00:00
parent 4d14fbf562
commit 9ae26c5aac
3 changed files with 67 additions and 34 deletions

View file

@ -29,11 +29,6 @@ use crate::{
PTR_SIZE, PTR_TYPE, STACK_POINTER_GLOBAL_ID, STACK_POINTER_NAME, TARGET_INFO,
};
/// The memory address where the constants data will be loaded during module instantiation.
/// We avoid address zero and anywhere near it. They're valid addresses but maybe bug-prone.
/// Follow Emscripten's example by leaving 1kB unused (though 4 bytes would probably do!)
const CONST_SEGMENT_BASE_ADDR: u32 = 1024;
pub struct WasmBackend<'a> {
pub env: &'a Env<'a>,
interns: &'a mut Interns,
@ -57,7 +52,6 @@ pub struct WasmBackend<'a> {
}
impl<'a> WasmBackend<'a> {
#[allow(clippy::too_many_arguments)]
pub fn new(
env: &'a Env<'a>,
interns: &'a mut Interns,
@ -78,8 +72,10 @@ impl<'a> WasmBackend<'a> {
index: STACK_POINTER_GLOBAL_ID,
});
// TODO: Examine the actual address ranges of every preloaded data segment in case they are not simply sequential
let next_constant_addr = CONST_SEGMENT_BASE_ADDR + module.data.bytes.len() as u32;
// The preloaded binary has a global to tell us where its data section ends
// Note: We need this to account for zero data (.bss), which doesn't have an explicit DataSegment!
let data_end_idx = module.export.globals_lookup["__data_end".as_bytes()];
let next_constant_addr = module.global.parse_u32_at_index(data_end_idx);
WasmBackend {
env,

View file

@ -12,7 +12,8 @@ use super::dead_code::{
use super::linking::RelocationEntry;
use super::opcodes::OpCode;
use super::serialize::{
parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes, MAX_SIZE_ENCODED_U32,
parse_string_bytes, parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes,
MAX_SIZE_ENCODED_U32,
};
use super::{CodeBuilder, ValueType};
@ -565,6 +566,26 @@ pub enum ConstExpr {
F64(f64),
}
impl ConstExpr {
fn parse_u32(bytes: &[u8], cursor: &mut usize) -> u32 {
let err = || internal_error!("Invalid ConstExpr. Expected i32.");
if bytes[*cursor] != OpCode::I32CONST as u8 {
err();
}
*cursor += 1;
let value = parse_u32_or_panic(bytes, cursor);
if bytes[*cursor] != OpCode::END as u8 {
err();
}
*cursor += 1;
value
}
}
impl Serialize for ConstExpr {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
@ -589,6 +610,15 @@ impl Serialize for ConstExpr {
}
}
impl SkipBytes for ConstExpr {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
while bytes[*cursor] != OpCode::END as u8 {
OpCode::skip_bytes(bytes, cursor);
}
*cursor += 1;
}
}
#[derive(Debug)]
pub struct Global {
/// Type and mutability of the global
@ -611,16 +641,14 @@ pub struct GlobalSection<'a> {
}
impl<'a> GlobalSection<'a> {
pub fn new(arena: &'a Bump, globals: &[Global]) -> Self {
let capacity = 13 * globals.len();
let mut bytes = Vec::with_capacity_in(capacity, arena);
for global in globals {
global.serialize(&mut bytes);
}
GlobalSection {
count: globals.len() as u32,
bytes,
pub fn parse_u32_at_index(&self, index: u32) -> u32 {
let mut cursor = 0;
for _ in 0..index {
GlobalType::skip_bytes(&self.bytes, &mut cursor);
ConstExpr::skip_bytes(&self.bytes, &mut cursor);
}
GlobalType::skip_bytes(&self.bytes, &mut cursor);
ConstExpr::parse_u32(&self.bytes, &mut cursor)
}
pub fn append(&mut self, global: Global) {
@ -666,15 +694,15 @@ pub struct Export<'a> {
}
impl<'a> Export<'a> {
fn parse_type(bytes: &[u8], cursor: &mut usize) -> ExportType {
String::skip_bytes(bytes, cursor); // name
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Self {
let name = parse_string_bytes(arena, bytes, cursor);
let ty = ExportType::from(bytes[*cursor]);
*cursor += 1;
u32::skip_bytes(bytes, cursor); // index
let index = parse_u32_or_panic(bytes, cursor);
ty
Export { name, ty, index }
}
}
@ -690,7 +718,10 @@ impl Serialize for Export<'_> {
pub struct ExportSection<'a> {
pub count: u32,
pub bytes: Vec<'a, u8>,
/// List of exported functions to keep during dead-code-elimination
pub function_indices: Vec<'a, u32>,
/// name -> index
pub globals_lookup: MutMap<&'a [u8], u32>,
}
impl<'a> ExportSection<'a> {
@ -713,6 +744,7 @@ impl<'a> ExportSection<'a> {
count: 0,
bytes: Vec::with_capacity_in(256, arena),
function_indices: Vec::with_capacity_in(4, arena),
globals_lookup: MutMap::default(),
}
}
@ -725,11 +757,14 @@ impl<'a> ExportSection<'a> {
let mut body_cursor = 0;
for _ in 0..num_exports {
let export_start = body_cursor;
let export_type = Export::parse_type(body_bytes, &mut body_cursor);
if matches!(export_type, ExportType::Global) {
let export = Export::parse(arena, body_bytes, &mut body_cursor);
if matches!(export.ty, ExportType::Global) {
let global_bytes = &body_bytes[export_start..body_cursor];
export_section.bytes.extend_from_slice(global_bytes);
export_section.count += 1;
export_section
.globals_lookup
.insert(export.name, export.index);
}
}
@ -1146,10 +1181,7 @@ impl<'a> NameSection<'a> {
cursor: &mut usize,
section_end: usize,
) {
// Custom section name
let section_name_len = parse_u32_or_panic(module_bytes, cursor);
let section_name_end = *cursor + section_name_len as usize;
let section_name = &module_bytes[*cursor..section_name_end];
let section_name = parse_string_bytes(arena, module_bytes, cursor);
if section_name != Self::NAME.as_bytes() {
internal_error!(
"Expected Custom section {:?}, found {:?}",
@ -1157,7 +1189,6 @@ impl<'a> NameSection<'a> {
std::str::from_utf8(section_name)
);
}
*cursor = section_name_end;
// Find function names subsection
let mut found_function_names = false;
@ -1182,10 +1213,7 @@ impl<'a> NameSection<'a> {
let num_entries = parse_u32_or_panic(module_bytes, cursor) as usize;
for _ in 0..num_entries {
let fn_index = parse_u32_or_panic(module_bytes, cursor);
let name_len = parse_u32_or_panic(module_bytes, cursor);
let name_end = *cursor + name_len as usize;
let name_bytes: &[u8] = &module_bytes[*cursor..name_end];
*cursor = name_end;
let name_bytes = parse_string_bytes(arena, module_bytes, cursor);
self.functions
.insert(arena.alloc_slice_copy(name_bytes), fn_index);

View file

@ -1,6 +1,6 @@
use std::{fmt::Debug, iter::FromIterator};
use bumpalo::collections::vec::Vec;
use bumpalo::{collections::vec::Vec, Bump};
use roc_error_macros::internal_error;
/// In the WebAssembly binary format, all integers are variable-length encoded (using LEB-128)
@ -262,6 +262,15 @@ pub fn parse_u32_or_panic(bytes: &[u8], cursor: &mut usize) -> u32 {
value
}
pub fn parse_string_bytes<'a>(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> &'a [u8] {
let len = parse_u32_or_panic(bytes, cursor);
let end = *cursor + len as usize;
let bytes: &[u8] = &bytes[*cursor..end];
let copy = arena.alloc_slice_copy(bytes);
*cursor = end;
copy
}
/// Skip over serialized bytes for a type
/// This may, or may not, require looking at the byte values
pub trait SkipBytes {