wasm: implement relocations to set memory layout

This commit is contained in:
Brian Carroll 2022-05-29 12:17:13 +02:00
parent 73f0a7c96c
commit 9b47dada6a
No known key found for this signature in database
GPG key ID: 5C7B2EC4101703C0
5 changed files with 127 additions and 80 deletions

View file

@ -17,8 +17,8 @@ use roc_std::RocDec;
use crate::layout::{CallConv, ReturnMethod, WasmLayout};
use crate::low_level::{call_higher_order_lowlevel, LowLevelCall};
use crate::storage::{Storage, StoredValue, StoredValueKind};
use crate::wasm_module::linking::{DataSymbol, LinkingSegment, WasmObjectSymbol};
use crate::wasm_module::sections::{DataMode, DataSegment, Limits};
use crate::wasm_module::linking::{DataSymbol, LinkingSegment, SymType, WasmObjectSymbol};
use crate::wasm_module::sections::{ConstExpr, DataMode, DataSegment, Global, GlobalType, Limits};
use crate::wasm_module::{
code_builder, CodeBuilder, ExportType, LocalId, Signature, SymInfo, ValueType, WasmModule,
};
@ -86,13 +86,9 @@ impl<'a> WasmBackend<'a> {
}
}
module.global.append(Global {
ty: GlobalType {
value_type: ValueType::I32,
is_mutable: true,
},
init: ConstExpr::I32(1234), // TODO: come up with a value!
});
// TODO: get this from a CLI parameter with some default
const STACK_SIZE: u32 = 1024 * 1024;
Self::set_memory_layout(&mut module, STACK_SIZE);
module.export.exports = app_exports;
module.code.code_builders.reserve(proc_lookup.len());
@ -118,6 +114,29 @@ impl<'a> WasmBackend<'a> {
}
}
/// A Wasm module's memory is all in one contiguous block, unlike native executables.
/// The standard layout is: constant data, then stack, then heap.
/// Since they're all in one block, they can't grow independently. Only the highest one can grow.
/// Also, there's no "invalid region" below the stack, so stack overflow will overwrite constants!
/// TODO: Detect stack overflow in function prologue... at least in Roc code...
fn set_memory_layout(module: &mut WasmModule<'a>, stack_size: u32) {
let stack_heap_boundary = module.data.end_addr + stack_size;
// Create a mutable global for __stack_pointer
// We assume __stack_pointer is Global #0 in the host object file
// TODO: make this a bit more robust (though it's always valid in practice)
debug_assert!(module.global.count == 0);
module.global.append(Global {
ty: GlobalType {
value_type: ValueType::I32,
is_mutable: true,
},
init: ConstExpr::I32(stack_heap_boundary as i32),
});
module.relocate_preloaded_code("__heap_base", SymType::Data, stack_heap_boundary);
}
pub fn get_helpers(&mut self) -> Vec<'a, Proc<'a>> {
self.helper_proc_gen.take_procs()
}

View file

@ -3,7 +3,7 @@ use bumpalo::Bump;
use super::parse::parse_fixed_size_items;
use super::sections::{update_section_size, write_custom_section_header, SectionId};
use super::serialize::{SerialBuffer, Serialize};
use super::serialize::{overwrite_padded_i32, SerialBuffer, Serialize};
use crate::wasm_module::parse::{Parse, ParseError, SkipBytes};
/*******************************************************************
@ -182,6 +182,35 @@ pub struct RelocationSection<'a> {
pub entries: Vec<'a, RelocationEntry>,
}
impl<'a> RelocationSection<'a> {
pub fn apply_relocs_u32(&self, section_bytes: &mut [u8], sym_index: u32, value: u32) {
for entry in self.entries.iter() {
match entry {
RelocationEntry::Index { symbol_index, .. } if *symbol_index == sym_index => {
todo!("Linking RelocationEntry {:?}", entry)
}
RelocationEntry::Offset {
type_id,
offset,
symbol_index,
addend,
} if *symbol_index == sym_index => {
use OffsetRelocType::*;
match type_id {
MemoryAddrSleb => overwrite_padded_i32(
section_bytes,
*offset as usize,
value as i32 + *addend,
),
_ => todo!("Linking relocation type {:?}", type_id),
}
}
_ => {}
}
}
}
}
impl<'a> Serialize for RelocationSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if !self.entries.is_empty() {
@ -514,7 +543,8 @@ pub enum SymInfo<'a> {
}
#[repr(u8)]
enum SymType {
#[derive(Debug)]
pub enum SymType {
Function = 0,
Data = 1,
Global = 2,
@ -668,6 +698,29 @@ impl<'a> LinkingSection<'a> {
comdat_info: Vec::with_capacity_in(0, arena),
}
}
pub fn find_symbol_by_name(&self, sym_name: &str, sym_type: SymType) -> Option<u32> {
let found = match sym_type {
SymType::Data => self
.symbol_table
.iter()
.position(|sym_info| match sym_info {
SymInfo::Data(DataSymbol::Imported { name, .. })
| SymInfo::Data(DataSymbol::Defined { name, .. }) => *name == sym_name,
_ => false,
}),
SymType::Function => self
.symbol_table
.iter()
.position(|sym_info| match sym_info {
SymInfo::Function(WasmObjectSymbol::Defined { name, .. }) => *name == sym_name,
_ => false,
}),
_ => unimplemented!("Finding {:?} symbols by name", sym_type),
};
found.map(|i| i as u32)
}
}
impl<'a> Serialize for LinkingSection<'a> {

View file

@ -8,7 +8,7 @@ pub mod serialize;
use bumpalo::{collections::Vec, Bump};
pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
pub use linking::SymInfo;
pub use linking::{OffsetRelocType, RelocationEntry, SymInfo, SymType};
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature};
use self::linking::{LinkingSection, RelocationSection};
@ -182,4 +182,14 @@ impl<'a> WasmModule<'a> {
.find(|ex| ex.name == name)
.and_then(|ex| self.global.parse_u32_at_index(ex.index).ok())
}
pub fn relocate_preloaded_code(&mut self, sym_name: &str, sym_type: SymType, value: u32) {
let sym_index = self
.linking
.find_symbol_by_name(sym_name, sym_type)
.unwrap_or_else(|| panic!("Linking failed! Can't find symbol `{}`", sym_name));
self.reloc_code
.apply_relocs_u32(&mut self.code.preloaded_bytes, sym_index, value);
}
}

View file

@ -1105,7 +1105,7 @@ impl<'a> Serialize for ElementSection<'a> {
#[derive(Debug)]
pub struct CodeSection<'a> {
pub preloaded_count: u32,
pub preloaded_bytes: &'a [u8],
pub preloaded_bytes: Vec<'a, u8>,
pub code_builders: Vec<'a, CodeBuilder<'a>>,
dead_code_metadata: PreloadsCallGraph<'a>,
}
@ -1145,11 +1145,13 @@ impl<'a> CodeSection<'a> {
) -> Result<Self, ParseError> {
let (preloaded_count, range) = parse_section(SectionId::Code, module_bytes, cursor)?;
*cursor = range.end;
let preloaded_bytes = arena.alloc_slice_copy(&module_bytes[range]);
let mut preloaded_bytes = Vec::with_capacity_in(range.len(), arena);
preloaded_bytes.extend_from_slice(&module_bytes[range]);
let dead_code_metadata = parse_preloads_call_graph(
arena,
preloaded_bytes,
&preloaded_bytes,
import_signatures,
function_signatures,
indirect_callees,
@ -1183,12 +1185,12 @@ impl<'a> CodeSection<'a> {
arena,
&mut buffer,
&self.dead_code_metadata,
self.preloaded_bytes,
&self.preloaded_bytes,
import_fn_count,
live_ext_fn_indices,
);
self.preloaded_bytes = buffer.into_bump_slice();
self.preloaded_bytes = buffer;
}
}
@ -1197,7 +1199,7 @@ impl<'a> Serialize for CodeSection<'a> {
let header_indices = write_section_header(buffer, SectionId::Code);
buffer.encode_u32(self.preloaded_count + self.code_builders.len() as u32);
buffer.append_slice(self.preloaded_bytes);
buffer.append_slice(&self.preloaded_bytes);
for code_builder in self.code_builders.iter() {
code_builder.serialize(buffer);

View file

@ -121,19 +121,23 @@ macro_rules! write_unencoded {
};
}
macro_rules! encode_padded_sleb128 {
($name: ident, $ty: ty) => {
/// write a maximally-padded SLEB128 integer (only used in relocations)
fn $name(&mut self, value: $ty) {
/// For relocations
pub fn overwrite_padded_i32(buffer: &mut [u8], offset: usize, value: i32) {
let mut x = value;
let size = (std::mem::size_of::<$ty>() / 4) * MAX_SIZE_ENCODED_U32;
for _ in 0..(size - 1) {
self.append_u8(0x80 | (x & 0x7f) as u8);
for byte in buffer.iter_mut().skip(offset).take(4) {
*byte = 0x80 | ((x & 0x7f) as u8);
x >>= 7;
}
self.append_u8((x & 0x7f) as u8);
buffer[4] = (x & 0x7f) as u8;
}
};
fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) {
let mut x = value;
for byte in buffer.iter_mut().take(4) {
*byte = 0x80 | ((x & 0x7f) as u8);
x >>= 7;
}
buffer[4] = x as u8;
}
pub trait SerialBuffer: Debug {
@ -163,17 +167,6 @@ pub trait SerialBuffer: Debug {
// methods for relocations
write_unencoded!(write_unencoded_u32, u32);
write_unencoded!(write_unencoded_u64, u64);
encode_padded_sleb128!(encode_padded_i32, i32);
encode_padded_sleb128!(encode_padded_i64, i64);
}
fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) {
let mut x = value;
for byte in buffer.iter_mut().take(4) {
*byte = 0x80 | ((x & 0x7f) as u8);
x >>= 7;
}
buffer[4] = x as u8;
}
impl SerialBuffer for std::vec::Vec<u8> {
@ -367,48 +360,18 @@ mod tests {
assert_eq!(buffer, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
}
fn help_pad_i32(val: i32) -> std::vec::Vec<u8> {
let mut buffer = std::vec::Vec::with_capacity(MAX_SIZE_ENCODED_U32);
buffer.encode_padded_i32(val);
fn help_pad_i32(val: i32) -> [u8; MAX_SIZE_ENCODED_U32] {
let mut buffer = [0; MAX_SIZE_ENCODED_U32];
overwrite_padded_i32(&mut buffer, 0, val);
buffer
}
#[test]
fn test_encode_padded_i32() {
assert_eq!(help_pad_i32(0), &[0x80, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(1), &[0x81, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(-1), &[0xff, 0xff, 0xff, 0xff, 0x7f]);
assert_eq!(help_pad_i32(i32::MAX), &[0xff, 0xff, 0xff, 0xff, 0x07]);
assert_eq!(help_pad_i32(i32::MIN), &[0x80, 0x80, 0x80, 0x80, 0x78]);
}
fn help_pad_i64(val: i64) -> std::vec::Vec<u8> {
let mut buffer = std::vec::Vec::with_capacity(10);
buffer.encode_padded_i64(val);
buffer
}
#[test]
fn test_encode_padded_i64() {
assert_eq!(
help_pad_i64(0),
&[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00]
);
assert_eq!(
help_pad_i64(1),
&[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00]
);
assert_eq!(
help_pad_i64(-1),
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f]
);
assert_eq!(
help_pad_i64(i64::MAX),
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00],
);
assert_eq!(
help_pad_i64(i64::MIN),
&[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f],
);
assert_eq!(help_pad_i32(0), [0x80, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(1), [0x81, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(-1), [0xff, 0xff, 0xff, 0xff, 0x7f]);
assert_eq!(help_pad_i32(i32::MAX), [0xff, 0xff, 0xff, 0xff, 0x07]);
assert_eq!(help_pad_i32(i32::MIN), [0x80, 0x80, 0x80, 0x80, 0x78]);
}
}