move wasm_module out of gen_wasm

This commit is contained in:
Brian Carroll 2022-11-13 09:01:50 +00:00
parent b907f01f1f
commit 63d9187343
No known key found for this signature in database
GPG key ID: 5C7B2EC4101703C0
17 changed files with 43 additions and 21 deletions

View file

@ -17,3 +17,4 @@ roc_mono = { path = "../mono" }
roc_target = { path = "../roc_target" }
roc_std = { path = "../../roc_std" }
roc_error_macros = { path = "../../error_macros" }
roc_wasm_module = { path = "../../wasm_module" }

View file

@ -18,18 +18,18 @@ use roc_std::RocDec;
use crate::layout::{CallConv, ReturnMethod, WasmLayout};
use crate::low_level::{call_higher_order_lowlevel, LowLevelCall};
use crate::storage::{AddressValue, Storage, StoredValue, StoredVarKind};
use crate::wasm_module::linking::{DataSymbol, WasmObjectSymbol};
use crate::wasm_module::sections::{
ConstExpr, DataMode, DataSegment, Export, Global, GlobalType, Import, ImportDesc, Limits,
MemorySection, NameSection,
};
use crate::wasm_module::{
code_builder, CodeBuilder, ExportType, LocalId, Signature, SymInfo, ValueType, WasmModule,
};
use crate::{
copy_memory, round_up_to_alignment, CopyMemoryConfig, Env, DEBUG_SETTINGS, MEMORY_NAME,
PTR_SIZE, PTR_TYPE, TARGET_INFO,
};
use roc_wasm_module::linking::{DataSymbol, WasmObjectSymbol};
use roc_wasm_module::sections::{
ConstExpr, DataMode, DataSegment, Export, Global, GlobalType, Import, ImportDesc, Limits,
MemorySection, NameSection,
};
use roc_wasm_module::{
code_builder, CodeBuilder, ExportType, LocalId, Signature, SymInfo, ValueType, WasmModule,
};
#[derive(Clone, Copy, Debug)]
pub enum ProcSource {

View file

@ -1,8 +1,8 @@
use roc_builtins::bitcode::{FloatWidth, IntWidth};
use roc_mono::layout::{Layout, STLayoutInterner, UnionLayout};
use crate::wasm_module::ValueType;
use crate::{PTR_SIZE, PTR_TYPE, TARGET_INFO};
use roc_wasm_module::ValueType;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReturnMethod {

View file

@ -3,7 +3,6 @@ mod backend;
mod layout;
mod low_level;
mod storage;
pub mod wasm_module;
// Helpers for interfacing to a Wasm module from outside
pub mod wasm32_result;
@ -19,10 +18,10 @@ use roc_mono::code_gen_help::CodeGenHelp;
use roc_mono::ir::{Proc, ProcLayout};
use roc_mono::layout::{LayoutIds, STLayoutInterner};
use roc_target::TargetInfo;
use wasm_module::parse::ParseError;
use roc_wasm_module::parse::ParseError;
use crate::backend::{ProcLookupData, ProcSource, WasmBackend};
use crate::wasm_module::{Align, CodeBuilder, LocalId, ValueType, WasmModule};
use roc_wasm_module::{Align, CodeBuilder, LocalId, ValueType, WasmModule};
const TARGET_INFO: TargetInfo = TargetInfo::default_wasm32();
const PTR_SIZE: u32 = {

View file

@ -12,8 +12,8 @@ use roc_mono::low_level::HigherOrder;
use crate::backend::{ProcLookupData, ProcSource, WasmBackend};
use crate::layout::{CallConv, StackMemoryFormat, WasmLayout};
use crate::storage::{AddressValue, StackMemoryLocation, StoredValue};
use crate::wasm_module::{Align, LocalId, ValueType};
use crate::{PTR_TYPE, TARGET_INFO};
use roc_wasm_module::{Align, LocalId, ValueType};
/// Number types used for Wasm code gen
/// Unlike other enums, this contains no details about layout or storage.

View file

@ -7,8 +7,8 @@ use roc_module::symbol::Symbol;
use roc_mono::layout::{Layout, STLayoutInterner};
use crate::layout::{CallConv, ReturnMethod, StackMemoryFormat, WasmLayout};
use crate::wasm_module::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, PTR_TYPE};
use roc_wasm_module::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
pub enum StoredVarKind {
Variable,
@ -592,7 +592,7 @@ impl<'a> Storage<'a> {
| StoredValue::Local {
value_type, size, ..
} => {
use crate::wasm_module::Align::*;
use roc_wasm_module::Align::*;
code_builder.get_local(to_ptr);
self.load_symbols(code_builder, &[from_symbol]);
match (value_type, size) {
@ -666,7 +666,7 @@ impl<'a> Storage<'a> {
| StoredValue::Local {
value_type, size, ..
} => {
use crate::wasm_module::Align::*;
use roc_wasm_module::Align::*;
if let AddressValue::NotLoaded(from_ptr) = from_addr {
code_builder.get_local(from_ptr);

View file

@ -11,11 +11,11 @@ use roc_mono::layout::{Builtin, Layout, UnionLayout};
use roc_target::TargetInfo;
use crate::wasm32_sized::Wasm32Sized;
use crate::wasm_module::{
use roc_std::{RocDec, RocList, RocOrder, RocResult, RocStr, I128, U128};
use roc_wasm_module::{
linking::SymInfo, linking::WasmObjectSymbol, Align, CodeBuilder, Export, ExportType, LocalId,
Signature, ValueType, WasmModule,
};
use roc_std::{RocDec, RocList, RocOrder, RocResult, RocStr, I128, U128};
/// Type-driven wrapper generation
pub trait Wasm32Result {

View file

@ -1,940 +0,0 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use core::panic;
use roc_error_macros::internal_error;
use roc_module::symbol::Symbol;
use super::opcodes::{OpCode, OpCode::*};
use super::serialize::{SerialBuffer, Serialize};
use crate::{
round_up_to_alignment, DEBUG_SETTINGS, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID,
};
macro_rules! log_instruction {
($($x: expr),+) => {
if DEBUG_SETTINGS.instructions { println!($($x,)*); }
};
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
/// Wasm value type. (Rust representation matches Wasm encoding)
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ValueType {
I32 = 0x7f,
I64 = 0x7e,
F32 = 0x7d,
F64 = 0x7c,
}
impl Serialize for ValueType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self as u8);
}
}
impl From<u8> for ValueType {
fn from(x: u8) -> Self {
match x {
0x7f => Self::I32,
0x7e => Self::I64,
0x7d => Self::F32,
0x7c => Self::F64,
_ => internal_error!("Invalid ValueType 0x{:02x}", x),
}
}
}
const BLOCK_NO_RESULT: u8 = 0x40;
/// A control block in our model of the VM
/// Child blocks cannot "see" values from their parent block
struct VmBlock<'a> {
/// opcode indicating what kind of block this is
opcode: OpCode,
/// the stack of values for this block
value_stack: Vec<'a, Symbol>,
}
impl std::fmt::Debug for VmBlock<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{:?} {:?}", self.opcode, self.value_stack))
}
}
/// Wasm memory alignment for load/store instructions.
/// Rust representation matches Wasm encoding.
/// It's an error to specify alignment higher than the "natural" alignment of the instruction
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
pub enum Align {
Bytes1 = 0,
Bytes2 = 1,
Bytes4 = 2,
Bytes8 = 3,
}
impl Align {
/// Calculate the largest possible alignment for a load/store at a given stack frame offset
/// Assumes the stack frame is aligned to at least 8 bytes
pub fn from_stack_offset(max_align: Align, offset: u32) -> Align {
if (max_align == Align::Bytes8) && (offset & 7 == 0) {
return Align::Bytes8;
}
if (max_align >= Align::Bytes4) && (offset & 3 == 0) {
return Align::Bytes4;
}
if (max_align >= Align::Bytes2) && (offset & 1 == 0) {
return Align::Bytes2;
}
Align::Bytes1
}
}
impl From<u32> for Align {
fn from(x: u32) -> Align {
match x {
1 => Align::Bytes1,
2 => Align::Bytes2,
4 => Align::Bytes4,
_ => {
if x.count_ones() == 1 {
Align::Bytes8 // Max value supported by any Wasm instruction
} else {
internal_error!("Cannot align to {} bytes", x);
}
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum VmSymbolState {
/// Value doesn't exist yet
NotYetPushed,
/// Value has been pushed onto the VM stack but not yet popped
/// Remember where it was pushed, in case we need to insert another instruction there later
Pushed { pushed_at: usize },
/// Value has been pushed and popped, so it's not on the VM stack any more.
/// If we want to use it again later, we will have to create a local for it,
/// by going back to insert a local.tee instruction at pushed_at
Popped { pushed_at: usize },
}
// An instruction (local.set or local.tee) to be inserted into the function code
#[derive(Debug)]
struct Insertion {
at: usize,
start: usize,
end: usize,
}
macro_rules! instruction_no_args {
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
pub fn $method_name(&mut self) {
self.inst($opcode, $pops, $push);
}
};
}
macro_rules! instruction_memargs {
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
pub fn $method_name(&mut self, align: Align, offset: u32) {
self.inst_mem($opcode, $pops, $push, align, offset);
}
};
}
#[derive(Debug)]
pub struct CodeBuilder<'a> {
pub arena: &'a Bump,
/// The main container for the instructions
code: Vec<'a, u8>,
/// Instruction bytes to be inserted into the code when finalizing the function
/// (Used for setting locals when we realise they are used multiple times)
insert_bytes: Vec<'a, u8>,
/// Code locations where the insert_bytes should go
insertions: Vec<'a, Insertion>,
/// Bytes for local variable declarations and stack-frame setup code.
/// We can't write this until we've finished the main code. But it goes
/// before it in the final output, so we need a separate vector.
preamble: Vec<'a, u8>,
/// Encoded bytes for the inner length of the function, locals + code.
/// ("inner" because it doesn't include its own length!)
/// Again, we can't write this until we've finished the code and preamble,
/// but it goes before them in the binary, so it's a separate vector.
inner_length: Vec<'a, u8>,
/// Our simulation model of the Wasm stack machine
/// Nested blocks of instructions. A child block can't "see" the stack of its parent block
vm_block_stack: Vec<'a, VmBlock<'a>>,
/// Relocations for calls to JS imports
/// When we remove unused imports, the live ones are re-indexed
import_relocations: Vec<'a, (usize, u32)>,
}
impl<'a> Serialize for CodeBuilder<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.serialize_without_relocs(buffer);
}
}
#[allow(clippy::new_without_default)]
impl<'a> CodeBuilder<'a> {
pub fn new(arena: &'a Bump) -> Self {
let mut vm_block_stack = Vec::with_capacity_in(8, arena);
let function_block = VmBlock {
opcode: BLOCK,
value_stack: Vec::with_capacity_in(8, arena),
};
vm_block_stack.push(function_block);
CodeBuilder {
arena,
code: Vec::with_capacity_in(1024, arena),
insertions: Vec::with_capacity_in(32, arena),
insert_bytes: Vec::with_capacity_in(64, arena),
preamble: Vec::with_capacity_in(32, arena),
inner_length: Vec::with_capacity_in(5, arena),
vm_block_stack,
import_relocations: Vec::with_capacity_in(0, arena),
}
}
/**********************************************************
LINKING
***********************************************************/
/// Build a dummy function with just a single `unreachable` instruction
pub fn dummy(arena: &'a Bump) -> Self {
let mut builder = Self::new(arena);
builder.unreachable_();
builder.build_fn_header_and_footer(&[], 0, None);
builder
}
pub fn apply_import_relocs(&mut self, live_import_fns: &[usize]) {
for (code_index, fn_index) in self.import_relocations.iter() {
for (new_index, old_index) in live_import_fns.iter().enumerate() {
if *fn_index as usize == *old_index {
self.code
.overwrite_padded_u32(*code_index, new_index as u32);
}
}
}
}
/**********************************************************
SYMBOLS
The Wasm VM stores temporary values in its stack machine.
We track which stack positions correspond to IR Symbols,
because it helps to generate more efficient code.
***********************************************************/
fn current_stack(&self) -> &Vec<'a, Symbol> {
let block = self.vm_block_stack.last().unwrap();
&block.value_stack
}
fn current_stack_mut(&mut self) -> &mut Vec<'a, Symbol> {
let block = self.vm_block_stack.last_mut().unwrap();
&mut block.value_stack
}
/// Set the Symbol that is at the top of the VM stack right now
/// We will use this later when we need to load the Symbol
pub fn set_top_symbol(&mut self, sym: Symbol) -> VmSymbolState {
let current_stack = &mut self.vm_block_stack.last_mut().unwrap().value_stack;
let pushed_at = self.code.len();
let top_symbol: &mut Symbol = current_stack
.last_mut()
.unwrap_or_else(|| internal_error!("Empty stack when trying to set Symbol {:?}", sym));
*top_symbol = sym;
VmSymbolState::Pushed { pushed_at }
}
/// Verify if a sequence of symbols is at the top of the stack
pub fn verify_stack_match(&self, symbols: &[Symbol]) -> bool {
let current_stack = self.current_stack();
let n_symbols = symbols.len();
let stack_depth = current_stack.len();
if n_symbols > stack_depth {
return false;
}
let offset = stack_depth - n_symbols;
for (i, sym) in symbols.iter().enumerate() {
if current_stack[offset + i] != *sym {
return false;
}
}
true
}
fn add_insertion(&mut self, insert_at: usize, opcode: OpCode, immediate: u32) {
let start = self.insert_bytes.len();
self.insert_bytes.push(opcode as u8);
self.insert_bytes.encode_u32(immediate);
self.insertions.push(Insertion {
at: insert_at,
start,
end: self.insert_bytes.len(),
});
log_instruction!(
"**insert {:?} {} at byte offset {}**",
opcode,
immediate,
insert_at
);
}
/// Load a Symbol that is stored in the VM stack
/// If it's already at the top of the stack, no code will be generated.
/// Otherwise, local.set and local.get instructions will be inserted, using the LocalId provided.
///
/// If the return value is `Some(s)`, `s` should be stored by the caller, and provided in the next call.
/// If the return value is `None`, the Symbol is no longer stored in the VM stack, but in a local.
/// (In this case, the caller must remember to declare the local in the function header.)
pub fn load_symbol(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
next_local_id: LocalId,
) -> Option<VmSymbolState> {
use VmSymbolState::*;
match vm_state {
NotYetPushed => {
internal_error!("Symbol {:?} has no value yet. Nothing to load.", symbol)
}
Pushed { pushed_at } => {
match self.current_stack().last() {
Some(top_symbol) if *top_symbol == symbol => {
// We're lucky, the symbol is already on top of the current block's stack.
// No code to generate! (This reduces code size by up to 25% in tests.)
// Just let the caller know what happened
Some(Popped { pushed_at })
}
_ => {
// Symbol is not on top of the stack.
// We should have saved it to a local, so go back and do that now.
self.store_pushed_symbol_to_local(
symbol,
vm_state,
pushed_at,
next_local_id,
);
// Recover the value again at the current position
self.get_local(next_local_id);
self.set_top_symbol(symbol);
// This Symbol is no longer stored in the VM stack, but in a local
None
}
}
}
Popped { pushed_at } => {
// This Symbol is being used for a second time
// Insert a local.tee where it was pushed, so we don't interfere with the first usage
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
// Insert a local.get at the current position
self.get_local(next_local_id);
self.set_top_symbol(symbol);
// This symbol has been promoted to a Local
// Tell the caller it no longer has a VirtualMachineSymbolState
None
}
}
}
/// Go back and store a Symbol in a local variable, without loading it at the current position
pub fn store_symbol_to_local(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
next_local_id: LocalId,
) {
use VmSymbolState::*;
match vm_state {
NotYetPushed => {
// Nothing to do
}
Pushed { pushed_at } => {
self.store_pushed_symbol_to_local(symbol, vm_state, pushed_at, next_local_id)
}
Popped { pushed_at } => {
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
}
}
}
fn store_pushed_symbol_to_local(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
pushed_at: usize,
local_id: LocalId,
) {
debug_assert!(matches!(vm_state, VmSymbolState::Pushed { .. }));
// Update our stack model at the position where we're going to set the SETLOCAL
let mut found = false;
for block in self.vm_block_stack.iter_mut() {
if let Some(found_index) = block.value_stack.iter().position(|&s| s == symbol) {
block.value_stack.remove(found_index);
found = true;
}
}
// Go back to the code position where it was pushed, and save it to a local
if found {
self.add_insertion(pushed_at, SETLOCAL, local_id.0);
} else {
if DEBUG_SETTINGS.instructions {
println!(
"{:?} has been popped implicitly. Leaving it on the stack.",
symbol
);
}
self.add_insertion(pushed_at, TEELOCAL, local_id.0);
}
}
/**********************************************************
FUNCTION HEADER
***********************************************************/
/// Generate bytes to declare the function's local variables
fn build_local_declarations(&mut self, local_types: &[ValueType]) {
// reserve one byte for num_batches
self.preamble.push(0);
if local_types.is_empty() {
return;
}
// Write declarations in batches of the same ValueType
let mut num_batches: u32 = 0;
let mut batch_type = local_types[0];
let mut batch_size = 0;
for t in local_types {
if *t == batch_type {
batch_size += 1;
} else {
self.preamble.encode_u32(batch_size);
self.preamble.push(batch_type as u8);
batch_type = *t;
batch_size = 1;
num_batches += 1;
}
}
self.preamble.encode_u32(batch_size);
self.preamble.push(batch_type as u8);
num_batches += 1;
// Go back and write the number of batches at the start
if num_batches < 128 {
self.preamble[0] = num_batches as u8;
} else {
// We need more than 1 byte to encode num_batches!
// This is a ridiculous edge case, so just pad to 5 bytes for simplicity
let old_len = self.preamble.len();
self.preamble.resize(old_len + 4, 0);
self.preamble.copy_within(1..old_len, 5);
self.preamble.overwrite_padded_u32(0, num_batches);
}
}
/// Generate instruction bytes to grab a frame of stack memory on entering the function
fn build_stack_frame_push(&mut self, frame_size: i32, frame_pointer: LocalId) {
// Can't use the usual instruction methods because they push to self.code.
// This is the only case where we push instructions somewhere different.
self.preamble.push(GETGLOBAL as u8);
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
self.preamble.push(I32CONST as u8);
self.preamble.encode_i32(frame_size);
self.preamble.push(I32SUB as u8);
self.preamble.push(TEELOCAL as u8);
self.preamble.encode_u32(frame_pointer.0);
self.preamble.push(SETGLOBAL as u8);
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
}
/// Generate instruction bytes to release a frame of stack memory on leaving the function
fn build_stack_frame_pop(&mut self, frame_size: i32, frame_pointer: LocalId) {
self.get_local(frame_pointer);
self.i32_const(frame_size);
self.i32_add();
self.set_global(STACK_POINTER_GLOBAL_ID);
}
/// Build the function header: local declarations, stack frame push/pop code, and function length
/// After this, all bytes have been generated (but not yet serialized) and we know the final size.
pub fn build_fn_header_and_footer(
&mut self,
local_types: &[ValueType],
frame_size: i32,
frame_pointer: Option<LocalId>,
) {
self.build_local_declarations(local_types);
if frame_size != 0 {
if let Some(frame_ptr_id) = frame_pointer {
let aligned_size = round_up_to_alignment!(frame_size, FRAME_ALIGNMENT_BYTES);
self.build_stack_frame_push(aligned_size, frame_ptr_id);
self.build_stack_frame_pop(aligned_size, frame_ptr_id); // footer
}
}
self.code.push(END as u8);
let inner_len = self.preamble.len() + self.code.len() + self.insert_bytes.len();
self.inner_length.encode_u32(inner_len as u32);
// Sort insertions. They are not created in order of assignment, but in order of *second* usage.
self.insertions.sort_by_key(|ins| ins.at);
}
/**********************************************************
SERIALIZE
***********************************************************/
pub fn size(&self) -> usize {
self.inner_length.len() + self.preamble.len() + self.code.len() + self.insert_bytes.len()
}
/// Serialize all byte vectors in the right order
/// Also update relocation offsets relative to the base offset (code section body start)
pub fn serialize_without_relocs<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_slice(&self.inner_length);
buffer.append_slice(&self.preamble);
let mut code_pos = 0;
for Insertion { at, start, end } in self.insertions.iter() {
buffer.append_slice(&self.code[code_pos..(*at)]);
buffer.append_slice(&self.insert_bytes[*start..*end]);
code_pos = *at;
}
buffer.append_slice(&self.code[code_pos..self.code.len()]);
}
/**********************************************************
INSTRUCTION HELPER METHODS
***********************************************************/
/// Base method for generating instructions
/// Emits the opcode and simulates VM stack push/pop
fn inst_base(&mut self, opcode: OpCode, pops: usize, push: bool) {
let current_stack = self.current_stack_mut();
let stack_size = current_stack.len();
debug_assert!(
stack_size >= pops,
"Wasm value stack underflow. Tried to pop {} but only {} available",
pops,
stack_size
);
let new_len = stack_size - pops as usize;
current_stack.truncate(new_len);
if push {
current_stack.push(Symbol::WASM_TMP);
}
self.code.push(opcode as u8);
}
/// Plain instruction without any immediates
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
self.inst_base(opcode, pops, push);
log_instruction!(
"{:10}\t\t{:?}",
format!("{:?}", opcode),
self.vm_block_stack
);
}
/// Block instruction
fn inst_block(&mut self, opcode: OpCode, pops: usize) {
self.inst_base(opcode, pops, false);
// We don't support block result types. Too hard to track types through arbitrary control flow.
self.code.push(BLOCK_NO_RESULT);
// Start a new block with a fresh value stack
self.vm_block_stack.push(VmBlock {
opcode,
value_stack: Vec::with_capacity_in(8, self.arena),
});
log_instruction!("{:10}\t{:?}", format!("{:?}", opcode), &self.vm_block_stack);
}
fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
self.inst_base(opcode, pops, push);
self.code.encode_u32(immediate);
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
immediate,
self.vm_block_stack
);
}
fn inst_mem(&mut self, opcode: OpCode, pops: usize, push: bool, align: Align, offset: u32) {
self.inst_base(opcode, pops, push);
self.code.push(align as u8);
self.code.encode_u32(offset);
log_instruction!(
"{:10} {:?} {}\t{:?}",
format!("{:?}", opcode),
align,
offset,
self.vm_block_stack
);
}
/**********************************************************
INSTRUCTION METHODS
One method for each Wasm instruction (in same order as the spec)
macros are for compactness & readability for the most common cases
Patterns that don't repeat very much don't have macros
***********************************************************/
instruction_no_args!(unreachable_, UNREACHABLE, 0, false);
instruction_no_args!(nop, NOP, 0, false);
pub fn block(&mut self) {
self.inst_block(BLOCK, 0);
}
pub fn loop_(&mut self) {
self.inst_block(LOOP, 0);
}
pub fn if_(&mut self) {
self.inst_block(IF, 1);
}
pub fn else_(&mut self) {
// Reuse the 'then' block but clear its value stack
self.current_stack_mut().clear();
self.inst(ELSE, 0, false);
}
pub fn end(&mut self) {
// We need to drop any unused values from the VM stack in order to pass Wasm validation.
// This happens, for example, in test `gen_tags::if_guard_exhaustiveness`
let n_unused = self
.vm_block_stack
.last()
.map(|block| block.value_stack.len())
.unwrap_or(0);
for _ in 0..n_unused {
self.drop_();
}
self.inst_base(END, 0, false);
self.vm_block_stack.pop();
log_instruction!("END \t\t{:?}", &self.vm_block_stack);
}
pub fn br(&mut self, levels: u32) {
self.inst_imm32(BR, 0, false, levels);
}
pub fn br_if(&mut self, levels: u32) {
// In dynamic execution, br_if can pop 2 values if condition is true and the target block has a result.
// But our stack model is for *static* analysis and we need it to be correct at the next instruction,
// where the branch was not taken. So we only pop 1 value, the condition.
self.inst_imm32(BRIF, 1, false, levels);
}
#[allow(dead_code)]
fn br_table() {
todo!("br instruction");
}
instruction_no_args!(return_, RETURN, 0, false);
pub fn call(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
self.call_impl(function_index, n_args, has_return_val, false)
}
pub fn call_import(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
self.call_impl(function_index, n_args, has_return_val, true)
}
#[inline(always)]
fn call_impl(
&mut self,
function_index: u32,
n_args: usize,
has_return_val: bool,
is_import: bool,
) {
self.inst_base(CALL, n_args, has_return_val);
if is_import {
self.import_relocations
.push((self.code.len(), function_index));
}
self.code.encode_padded_u32(function_index);
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", CALL),
function_index,
self.vm_block_stack
);
}
#[allow(dead_code)]
fn call_indirect() {
unimplemented!(
"There is no plan to implement call_indirect. Roc doesn't use function pointers"
);
}
instruction_no_args!(drop_, DROP, 1, false);
instruction_no_args!(select, SELECT, 3, true);
pub fn get_local(&mut self, id: LocalId) {
self.inst_imm32(GETLOCAL, 0, true, id.0);
}
pub fn set_local(&mut self, id: LocalId) {
self.inst_imm32(SETLOCAL, 1, false, id.0);
}
pub fn tee_local(&mut self, id: LocalId) {
self.inst_imm32(TEELOCAL, 0, false, id.0);
}
pub fn get_global(&mut self, id: u32) {
self.inst_imm32(GETGLOBAL, 0, true, id);
}
pub fn set_global(&mut self, id: u32) {
self.inst_imm32(SETGLOBAL, 1, false, id);
}
instruction_memargs!(i32_load, I32LOAD, 1, true);
instruction_memargs!(i64_load, I64LOAD, 1, true);
instruction_memargs!(f32_load, F32LOAD, 1, true);
instruction_memargs!(f64_load, F64LOAD, 1, true);
instruction_memargs!(i32_load8_s, I32LOAD8S, 1, true);
instruction_memargs!(i32_load8_u, I32LOAD8U, 1, true);
instruction_memargs!(i32_load16_s, I32LOAD16S, 1, true);
instruction_memargs!(i32_load16_u, I32LOAD16U, 1, true);
instruction_memargs!(i64_load8_s, I64LOAD8S, 1, true);
instruction_memargs!(i64_load8_u, I64LOAD8U, 1, true);
instruction_memargs!(i64_load16_s, I64LOAD16S, 1, true);
instruction_memargs!(i64_load16_u, I64LOAD16U, 1, true);
instruction_memargs!(i64_load32_s, I64LOAD32S, 1, true);
instruction_memargs!(i64_load32_u, I64LOAD32U, 1, true);
instruction_memargs!(i32_store, I32STORE, 2, false);
instruction_memargs!(i64_store, I64STORE, 2, false);
instruction_memargs!(f32_store, F32STORE, 2, false);
instruction_memargs!(f64_store, F64STORE, 2, false);
instruction_memargs!(i32_store8, I32STORE8, 2, false);
instruction_memargs!(i32_store16, I32STORE16, 2, false);
instruction_memargs!(i64_store8, I64STORE8, 2, false);
instruction_memargs!(i64_store16, I64STORE16, 2, false);
instruction_memargs!(i64_store32, I64STORE32, 2, false);
pub fn memory_size(&mut self) {
self.inst(CURRENTMEMORY, 0, true);
self.code.push(0);
}
pub fn memory_grow(&mut self) {
self.inst(GROWMEMORY, 1, true);
self.code.push(0);
}
fn log_const<T>(&self, opcode: OpCode, x: T)
where
T: std::fmt::Debug + std::fmt::Display,
{
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
x,
self.vm_block_stack
);
}
pub fn i32_const(&mut self, x: i32) {
self.inst_base(I32CONST, 0, true);
self.code.encode_i32(x);
self.log_const(I32CONST, x);
}
pub fn i64_const(&mut self, x: i64) {
self.inst_base(I64CONST, 0, true);
self.code.encode_i64(x);
self.log_const(I64CONST, x);
}
pub fn f32_const(&mut self, x: f32) {
self.inst_base(F32CONST, 0, true);
self.code.encode_f32(x);
self.log_const(F32CONST, x);
}
pub fn f64_const(&mut self, x: f64) {
self.inst_base(F64CONST, 0, true);
self.code.encode_f64(x);
self.log_const(F64CONST, x);
}
// TODO: Consider creating unified methods for numerical ops like 'eq' and 'add',
// passing the ValueType as an argument. Could simplify lowlevel code gen.
instruction_no_args!(i32_eqz, I32EQZ, 1, true);
instruction_no_args!(i32_eq, I32EQ, 2, true);
instruction_no_args!(i32_ne, I32NE, 2, true);
instruction_no_args!(i32_lt_s, I32LTS, 2, true);
instruction_no_args!(i32_lt_u, I32LTU, 2, true);
instruction_no_args!(i32_gt_s, I32GTS, 2, true);
instruction_no_args!(i32_gt_u, I32GTU, 2, true);
instruction_no_args!(i32_le_s, I32LES, 2, true);
instruction_no_args!(i32_le_u, I32LEU, 2, true);
instruction_no_args!(i32_ge_s, I32GES, 2, true);
instruction_no_args!(i32_ge_u, I32GEU, 2, true);
instruction_no_args!(i64_eqz, I64EQZ, 1, true);
instruction_no_args!(i64_eq, I64EQ, 2, true);
instruction_no_args!(i64_ne, I64NE, 2, true);
instruction_no_args!(i64_lt_s, I64LTS, 2, true);
instruction_no_args!(i64_lt_u, I64LTU, 2, true);
instruction_no_args!(i64_gt_s, I64GTS, 2, true);
instruction_no_args!(i64_gt_u, I64GTU, 2, true);
instruction_no_args!(i64_le_s, I64LES, 2, true);
instruction_no_args!(i64_le_u, I64LEU, 2, true);
instruction_no_args!(i64_ge_s, I64GES, 2, true);
instruction_no_args!(i64_ge_u, I64GEU, 2, true);
instruction_no_args!(f32_eq, F32EQ, 2, true);
instruction_no_args!(f32_ne, F32NE, 2, true);
instruction_no_args!(f32_lt, F32LT, 2, true);
instruction_no_args!(f32_gt, F32GT, 2, true);
instruction_no_args!(f32_le, F32LE, 2, true);
instruction_no_args!(f32_ge, F32GE, 2, true);
instruction_no_args!(f64_eq, F64EQ, 2, true);
instruction_no_args!(f64_ne, F64NE, 2, true);
instruction_no_args!(f64_lt, F64LT, 2, true);
instruction_no_args!(f64_gt, F64GT, 2, true);
instruction_no_args!(f64_le, F64LE, 2, true);
instruction_no_args!(f64_ge, F64GE, 2, true);
instruction_no_args!(i32_clz, I32CLZ, 1, true);
instruction_no_args!(i32_ctz, I32CTZ, 1, true);
instruction_no_args!(i32_popcnt, I32POPCNT, 1, true);
instruction_no_args!(i32_add, I32ADD, 2, true);
instruction_no_args!(i32_sub, I32SUB, 2, true);
instruction_no_args!(i32_mul, I32MUL, 2, true);
instruction_no_args!(i32_div_s, I32DIVS, 2, true);
instruction_no_args!(i32_div_u, I32DIVU, 2, true);
instruction_no_args!(i32_rem_s, I32REMS, 2, true);
instruction_no_args!(i32_rem_u, I32REMU, 2, true);
instruction_no_args!(i32_and, I32AND, 2, true);
instruction_no_args!(i32_or, I32OR, 2, true);
instruction_no_args!(i32_xor, I32XOR, 2, true);
instruction_no_args!(i32_shl, I32SHL, 2, true);
instruction_no_args!(i32_shr_s, I32SHRS, 2, true);
instruction_no_args!(i32_shr_u, I32SHRU, 2, true);
instruction_no_args!(i32_rotl, I32ROTL, 2, true);
instruction_no_args!(i32_rotr, I32ROTR, 2, true);
instruction_no_args!(i64_clz, I64CLZ, 1, true);
instruction_no_args!(i64_ctz, I64CTZ, 1, true);
instruction_no_args!(i64_popcnt, I64POPCNT, 1, true);
instruction_no_args!(i64_add, I64ADD, 2, true);
instruction_no_args!(i64_sub, I64SUB, 2, true);
instruction_no_args!(i64_mul, I64MUL, 2, true);
instruction_no_args!(i64_div_s, I64DIVS, 2, true);
instruction_no_args!(i64_div_u, I64DIVU, 2, true);
instruction_no_args!(i64_rem_s, I64REMS, 2, true);
instruction_no_args!(i64_rem_u, I64REMU, 2, true);
instruction_no_args!(i64_and, I64AND, 2, true);
instruction_no_args!(i64_or, I64OR, 2, true);
instruction_no_args!(i64_xor, I64XOR, 2, true);
instruction_no_args!(i64_shl, I64SHL, 2, true);
instruction_no_args!(i64_shr_s, I64SHRS, 2, true);
instruction_no_args!(i64_shr_u, I64SHRU, 2, true);
instruction_no_args!(i64_rotl, I64ROTL, 2, true);
instruction_no_args!(i64_rotr, I64ROTR, 2, true);
instruction_no_args!(f32_abs, F32ABS, 1, true);
instruction_no_args!(f32_neg, F32NEG, 1, true);
instruction_no_args!(f32_ceil, F32CEIL, 1, true);
instruction_no_args!(f32_floor, F32FLOOR, 1, true);
instruction_no_args!(f32_trunc, F32TRUNC, 1, true);
instruction_no_args!(f32_nearest, F32NEAREST, 1, true);
instruction_no_args!(f32_sqrt, F32SQRT, 1, true);
instruction_no_args!(f32_add, F32ADD, 2, true);
instruction_no_args!(f32_sub, F32SUB, 2, true);
instruction_no_args!(f32_mul, F32MUL, 2, true);
instruction_no_args!(f32_div, F32DIV, 2, true);
instruction_no_args!(f32_min, F32MIN, 2, true);
instruction_no_args!(f32_max, F32MAX, 2, true);
instruction_no_args!(f32_copysign, F32COPYSIGN, 2, true);
instruction_no_args!(f64_abs, F64ABS, 1, true);
instruction_no_args!(f64_neg, F64NEG, 1, true);
instruction_no_args!(f64_ceil, F64CEIL, 1, true);
instruction_no_args!(f64_floor, F64FLOOR, 1, true);
instruction_no_args!(f64_trunc, F64TRUNC, 1, true);
instruction_no_args!(f64_nearest, F64NEAREST, 1, true);
instruction_no_args!(f64_sqrt, F64SQRT, 1, true);
instruction_no_args!(f64_add, F64ADD, 2, true);
instruction_no_args!(f64_sub, F64SUB, 2, true);
instruction_no_args!(f64_mul, F64MUL, 2, true);
instruction_no_args!(f64_div, F64DIV, 2, true);
instruction_no_args!(f64_min, F64MIN, 2, true);
instruction_no_args!(f64_max, F64MAX, 2, true);
instruction_no_args!(f64_copysign, F64COPYSIGN, 2, true);
instruction_no_args!(i32_wrap_i64, I32WRAPI64, 1, true);
instruction_no_args!(i32_trunc_s_f32, I32TRUNCSF32, 1, true);
instruction_no_args!(i32_trunc_u_f32, I32TRUNCUF32, 1, true);
instruction_no_args!(i32_trunc_s_f64, I32TRUNCSF64, 1, true);
instruction_no_args!(i32_trunc_u_f64, I32TRUNCUF64, 1, true);
instruction_no_args!(i64_extend_s_i32, I64EXTENDSI32, 1, true);
instruction_no_args!(i64_extend_u_i32, I64EXTENDUI32, 1, true);
instruction_no_args!(i64_trunc_s_f32, I64TRUNCSF32, 1, true);
instruction_no_args!(i64_trunc_u_f32, I64TRUNCUF32, 1, true);
instruction_no_args!(i64_trunc_s_f64, I64TRUNCSF64, 1, true);
instruction_no_args!(i64_trunc_u_f64, I64TRUNCUF64, 1, true);
instruction_no_args!(f32_convert_s_i32, F32CONVERTSI32, 1, true);
instruction_no_args!(f32_convert_u_i32, F32CONVERTUI32, 1, true);
instruction_no_args!(f32_convert_s_i64, F32CONVERTSI64, 1, true);
instruction_no_args!(f32_convert_u_i64, F32CONVERTUI64, 1, true);
instruction_no_args!(f32_demote_f64, F32DEMOTEF64, 1, true);
instruction_no_args!(f64_convert_s_i32, F64CONVERTSI32, 1, true);
instruction_no_args!(f64_convert_u_i32, F64CONVERTUI32, 1, true);
instruction_no_args!(f64_convert_s_i64, F64CONVERTSI64, 1, true);
instruction_no_args!(f64_convert_u_i64, F64CONVERTUI64, 1, true);
instruction_no_args!(f64_promote_f32, F64PROMOTEF32, 1, true);
instruction_no_args!(i32_reinterpret_f32, I32REINTERPRETF32, 1, true);
instruction_no_args!(i64_reinterpret_f64, I64REINTERPRETF64, 1, true);
instruction_no_args!(f32_reinterpret_i32, F32REINTERPRETI32, 1, true);
instruction_no_args!(f64_reinterpret_i64, F64REINTERPRETI64, 1, true);
}

View file

@ -1,672 +0,0 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use super::parse::parse_fixed_size_items;
use super::sections::SectionId;
use super::serialize::{overwrite_padded_i32, overwrite_padded_u32};
use crate::wasm_module::parse::{Parse, ParseError, SkipBytes};
/*******************************************************************
*
* Relocation sections
*
* https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum IndexRelocType {
/// a function index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `call` instruction.
FunctionIndexLeb = 0,
/// a function table index encoded as a 5-byte [varint32].
/// Used to refer to the immediate argument of a `i32.const` instruction, e.g. taking the address of a function.
TableIndexSleb = 1,
/// a function table index encoded as a [uint32], e.g. taking the address of a function in a static data initializer.
TableIndexI32 = 2,
/// a type index encoded as a 5-byte [varuint32], e.g. the type immediate in a `call_indirect`.
TypeIndexLeb = 6,
/// a global index encoded as a 5-byte [varuint32], e.g. the index immediate in a `get_global`.
GlobalIndexLeb = 7,
/// an event index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `throw` and `if_except` instruction.
EventIndexLeb = 10,
/// a global index encoded as [uint32].
GlobalIndexI32 = 13,
/// the 64-bit counterpart of `R_WASM_TABLE_INDEX_SLEB`. A function table index encoded as a 10-byte [varint64].
/// Used to refer to the immediate argument of a `i64.const` instruction, e.g. taking the address of a function in Wasm64.
TableIndexSleb64 = 18,
/// the 64-bit counterpart of `R_WASM_TABLE_INDEX_I32`.
/// A function table index encoded as a [uint64], e.g. taking the address of a function in a static data initializer.
TableIndexI64 = 19,
/// a table number encoded as a 5-byte [varuint32]. Used for the table immediate argument in the table.* instructions.
TableNumberLeb = 20,
}
impl IndexRelocType {
fn from_u8(x: u8) -> Option<IndexRelocType> {
match x {
0 => Some(Self::FunctionIndexLeb),
1 => Some(Self::TableIndexSleb),
2 => Some(Self::TableIndexI32),
6 => Some(Self::TypeIndexLeb),
7 => Some(Self::GlobalIndexLeb),
10 => Some(Self::EventIndexLeb),
13 => Some(Self::GlobalIndexI32),
18 => Some(Self::TableIndexSleb64),
19 => Some(Self::TableIndexI64),
20 => Some(Self::TableNumberLeb),
_ => None,
}
}
}
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum OffsetRelocType {
/// a linear memory index encoded as a 5-byte [varuint32].
/// Used for the immediate argument of a `load` or `store` instruction, e.g. directly loading from or storing to a C++ global.
MemoryAddrLeb = 3,
/// a linear memory index encoded as a 5-byte [varint32].
/// Used for the immediate argument of a `i32.const` instruction, e.g. taking the address of a C++ global.
MemoryAddrSleb = 4,
/// a linear memory index encoded as a [uint32], e.g. taking the address of a C++ global in a static data initializer.
MemoryAddrI32 = 5,
/// a byte offset within code section for the specific function encoded as a [uint32].
/// The offsets start at the actual function code excluding its size field.
FunctionOffsetI32 = 8,
/// a byte offset from start of the specified section encoded as a [uint32].
SectionOffsetI32 = 9,
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_LEB`. A 64-bit linear memory index encoded as a 10-byte [varuint64],
/// Used for the immediate argument of a `load` or `store` instruction on a 64-bit linear memory array.
MemoryAddrLeb64 = 14,
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_SLEB`. A 64-bit linear memory index encoded as a 10-byte [varint64].
/// Used for the immediate argument of a `i64.const` instruction.
MemoryAddrSleb64 = 15,
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR`. A 64-bit linear memory index encoded as a [uint64],
/// e.g. taking the 64-bit address of a C++ global in a static data initializer.
MemoryAddrI64 = 16,
}
impl OffsetRelocType {
fn from_u8(x: u8) -> Option<OffsetRelocType> {
match x {
3 => Some(Self::MemoryAddrLeb),
4 => Some(Self::MemoryAddrSleb),
5 => Some(Self::MemoryAddrI32),
8 => Some(Self::FunctionOffsetI32),
9 => Some(Self::SectionOffsetI32),
14 => Some(Self::MemoryAddrLeb64),
15 => Some(Self::MemoryAddrSleb64),
16 => Some(Self::MemoryAddrI64),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub enum RelocationEntry {
Index {
type_id: IndexRelocType,
offset: u32, // offset 0 means the next byte after section id and size
symbol_index: u32, // index in symbol table
},
Offset {
type_id: OffsetRelocType,
offset: u32, // offset 0 means the next byte after section id and size
symbol_index: u32, // index in symbol table
addend: i32, // addend to add to the address
},
}
impl Parse<()> for RelocationEntry {
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let type_id_byte = bytes[*cursor];
*cursor += 1;
let offset = u32::parse((), bytes, cursor)?;
let symbol_index = u32::parse((), bytes, cursor)?;
if let Some(type_id) = IndexRelocType::from_u8(type_id_byte) {
return Ok(RelocationEntry::Index {
type_id,
offset,
symbol_index,
});
}
if let Some(type_id) = OffsetRelocType::from_u8(type_id_byte) {
let addend = i32::parse((), bytes, cursor)?;
return Ok(RelocationEntry::Offset {
type_id,
offset,
symbol_index,
addend,
});
}
Err(ParseError {
offset: *cursor,
message: format!("Unknown relocation type 0x{:2x}", type_id_byte),
})
}
}
#[derive(Debug)]
pub struct RelocationSection<'a> {
pub name: &'a str,
/// The *index* (not ID!) of the target section in the module
pub target_section_index: u32,
pub entries: Vec<'a, RelocationEntry>,
}
impl<'a> RelocationSection<'a> {
fn new(arena: &'a Bump, name: &'a str) -> Self {
RelocationSection {
name,
target_section_index: 0,
entries: bumpalo::vec![in arena],
}
}
pub fn apply_relocs_u32(&self, section_bytes: &mut [u8], sym_index: u32, value: u32) {
for entry in self.entries.iter() {
match entry {
RelocationEntry::Index {
type_id,
offset,
symbol_index,
} if *symbol_index == sym_index => {
use IndexRelocType::*;
let idx = *offset as usize;
match type_id {
FunctionIndexLeb | TypeIndexLeb | GlobalIndexLeb | EventIndexLeb
| TableNumberLeb => {
overwrite_padded_u32(&mut section_bytes[idx..], value);
}
_ => todo!("Linking relocation type {:?}", type_id),
}
}
RelocationEntry::Offset {
type_id,
offset,
symbol_index,
addend,
} if *symbol_index == sym_index => {
use OffsetRelocType::*;
let idx = *offset as usize;
match type_id {
MemoryAddrLeb => {
overwrite_padded_u32(&mut section_bytes[idx..], value + *addend as u32);
}
MemoryAddrSleb => {
overwrite_padded_i32(&mut section_bytes[idx..], value as i32 + *addend);
}
_ => todo!("Linking relocation type {:?}", type_id),
}
}
_ => {}
}
}
}
}
type RelocCtx<'a> = (&'a Bump, &'static str);
impl<'a> Parse<RelocCtx<'a>> for RelocationSection<'a> {
fn parse(ctx: RelocCtx<'a>, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let (arena, name) = ctx;
if *cursor > bytes.len() || bytes[*cursor] != SectionId::Custom as u8 {
// The section we're looking for is missing, which is the same as being empty.
return Ok(RelocationSection::new(arena, name));
}
*cursor += 1;
u32::skip_bytes(bytes, cursor)?; // section body size
let actual_name = <&'a str>::parse(arena, bytes, cursor)?;
if actual_name != name {
// The section we're looking for is missing, which is the same as being empty.
return Ok(RelocationSection::new(arena, name));
}
let target_section_index = u32::parse((), bytes, cursor)?;
let entries = parse_fixed_size_items(arena, bytes, cursor)?;
Ok(RelocationSection {
name,
target_section_index,
entries,
})
}
}
/*******************************************************************
*
* Linking section
*
* https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#linking-metadata-section
*
*******************************************************************/
/// Linking metadata for data segments
#[derive(Debug)]
pub struct LinkingSegment<'a> {
pub name: &'a str,
pub align_bytes_pow2: u32,
pub flags: u32,
}
impl<'a> Parse<&'a Bump> for LinkingSegment<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let name = <&'a str>::parse(arena, bytes, cursor)?;
let align_bytes_pow2 = u32::parse((), bytes, cursor)?;
let flags = u32::parse((), bytes, cursor)?;
Ok(LinkingSegment {
name,
align_bytes_pow2,
flags,
})
}
}
/// Linking metadata for init (start) functions
#[derive(Debug)]
pub struct LinkingInitFunc {
pub priority: u32,
pub symbol_index: u32, // index in the symbol table, not the function index
}
//------------------------------------------------
// Common data
//------------------------------------------------
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ComdatSymKind {
Data = 0,
Function = 1,
Global = 2,
Event = 3,
Table = 4,
Section = 5,
}
#[derive(Debug)]
pub struct ComdatSym {
pub kind: ComdatSymKind,
pub index: u32,
}
/// Linking metadata for common data
/// A COMDAT group may contain one or more functions, data segments, and/or custom sections.
/// The linker will include all of these elements with a given group name from one object file,
/// and will exclude any element with this group name from all other object files.
#[allow(dead_code)]
#[derive(Debug)]
pub struct LinkingComdat<'a> {
name: &'a str,
flags: u32,
syms: Vec<'a, ComdatSym>,
}
//------------------------------------------------
// Symbol table
//------------------------------------------------
/// Indicating that this is a weak symbol. When
/// linking multiple modules defining the same symbol, all weak definitions are
/// discarded if any strong definitions exist; then if multiple weak definitions
/// exist all but one (unspecified) are discarded; and finally it is an error if
/// more than one definition remains.
pub const WASM_SYM_BINDING_WEAK: u32 = 1;
/// Indicating that this is a local symbol (this is exclusive with `WASM_SYM_BINDING_WEAK`).
/// Local symbols are not to be exported, or linked to other modules/sections.
/// The names of all non-local symbols must be unique, but the names of local symbols
/// are not considered for uniqueness. A local function or global symbol cannot reference an import.
pub const WASM_SYM_BINDING_LOCAL: u32 = 2;
/// Indicating that this is a hidden symbol.
/// Hidden symbols are not to be exported when performing the final link, but
/// may be linked to other modules.
pub const WASM_SYM_VISIBILITY_HIDDEN: u32 = 4;
/// Indicating that this symbol is not defined.
/// For non-data symbols, this must match whether the symbol is an import
/// or is defined; for data symbols, determines whether a segment is specified.
pub const WASM_SYM_UNDEFINED: u32 = 0x10; // required if the symbol refers to an import
/// The symbol is intended to be exported from the
/// wasm module to the host environment. This differs from the visibility flags
/// in that it effects the static linker.
pub const WASM_SYM_EXPORTED: u32 = 0x20;
/// The symbol uses an explicit symbol name,
/// rather than reusing the name from a wasm import. This allows it to remap
/// imports from foreign WebAssembly modules into local symbols with different
/// names.
pub const WASM_SYM_EXPLICIT_NAME: u32 = 0x40; // use the name from the symbol table, not from the import
/// The symbol is intended to be included in the
/// linker output, regardless of whether it is used by the program.
pub const WASM_SYM_NO_STRIP: u32 = 0x80;
#[derive(Clone, Debug)]
pub enum WasmObjectSymbol<'a> {
ExplicitlyNamed {
flags: u32,
index: u32,
name: &'a str,
},
ImplicitlyNamed {
flags: u32,
index: u32,
},
}
impl<'a> Parse<&'a Bump> for WasmObjectSymbol<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let flags = u32::parse((), bytes, cursor)?;
let index = u32::parse((), bytes, cursor)?;
// If a symbol refers to an import, then we already have the name in the import section.
// The linking section doesn't repeat it, unless the "explicit name" flag is set (used for renaming).
// ("Undefined symbol" is linker jargon, and "import" is Wasm jargon. For functions, they're equivalent.)
let is_import = (flags & WASM_SYM_UNDEFINED) != 0;
let external_syms_have_explicit_names = (flags & WASM_SYM_EXPLICIT_NAME) != 0;
let has_explicit_name = !is_import || external_syms_have_explicit_names;
if has_explicit_name {
let name = <&'a str>::parse(arena, bytes, cursor)?;
Ok(Self::ExplicitlyNamed { flags, index, name })
} else {
Ok(Self::ImplicitlyNamed { flags, index })
}
}
}
#[derive(Clone, Debug)]
pub enum DataSymbol<'a> {
Defined {
flags: u32,
name: &'a str,
segment_index: u32,
segment_offset: u32,
size: u32,
},
Imported {
flags: u32,
name: &'a str,
},
}
impl<'a> Parse<&'a Bump> for DataSymbol<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let flags = u32::parse((), bytes, cursor)?;
let name = <&'a str>::parse(arena, bytes, cursor)?;
if (flags & WASM_SYM_UNDEFINED) != 0 {
Ok(Self::Imported { flags, name })
} else {
let segment_index = u32::parse((), bytes, cursor)?;
let segment_offset = u32::parse((), bytes, cursor)?;
let size = u32::parse((), bytes, cursor)?;
Ok(Self::Defined {
flags,
name,
segment_index,
segment_offset,
size,
})
}
}
}
/// We don't use this, but we need it in the symbol table so the indices are correct!
/// If we ever use it, note that it refers to section index, not section id.
#[derive(Clone, Debug)]
pub struct SectionSymbol {
_flags: u32,
_index: u32,
}
impl Parse<()> for SectionSymbol {
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let flags = u32::parse((), bytes, cursor)?;
let index = u32::parse((), bytes, cursor)?;
Ok(SectionSymbol {
_flags: flags,
_index: index,
})
}
}
#[derive(Clone, Debug)]
pub enum SymInfo<'a> {
Function(WasmObjectSymbol<'a>),
Data(DataSymbol<'a>),
Global(WasmObjectSymbol<'a>),
Section(SectionSymbol),
Event(WasmObjectSymbol<'a>),
Table(WasmObjectSymbol<'a>),
}
impl<'a> SymInfo<'a> {
pub fn name(&self) -> Option<&'a str> {
match self {
Self::Function(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
Self::Data(DataSymbol::Defined { name, .. }) => Some(name),
Self::Data(DataSymbol::Imported { name, .. }) => Some(name),
Self::Global(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
Self::Event(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
Self::Table(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
_ => None, // ImplicitlyNamed or SectionSymbols
}
}
}
#[repr(u8)]
#[derive(Debug)]
enum SymType {
Function = 0,
Data = 1,
Global = 2,
Section = 3,
Event = 4,
Table = 5,
}
impl Parse<()> for SymType {
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let offset = *cursor;
let type_id = bytes[offset];
*cursor += 1;
match type_id {
0 => Ok(Self::Function),
1 => Ok(Self::Data),
2 => Ok(Self::Global),
3 => Ok(Self::Section),
4 => Ok(Self::Event),
5 => Ok(Self::Table),
x => Err(ParseError {
offset,
message: format!("Invalid symbol info type in linking section: {}", x),
}),
}
}
}
impl<'a> Parse<&'a Bump> for SymInfo<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let type_id = SymType::parse((), bytes, cursor)?;
match type_id {
SymType::Function => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Function),
SymType::Data => DataSymbol::parse(arena, bytes, cursor).map(Self::Data),
SymType::Global => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Global),
SymType::Section => SectionSymbol::parse((), bytes, cursor).map(Self::Section),
SymType::Event => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Event),
SymType::Table => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Table),
}
}
}
//----------------------------------------------------------------
// Linking subsections
//----------------------------------------------------------------
#[repr(u8)]
#[derive(Debug)]
enum SubSectionId {
SegmentInfo = 5,
InitFuncs = 6,
ComdatInfo = 7,
SymbolTable = 8,
}
impl Parse<()> for SubSectionId {
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let id = bytes[*cursor];
let offset = *cursor;
*cursor += 1;
match id {
5 => Ok(Self::SegmentInfo),
6 => Ok(Self::InitFuncs),
7 => Ok(Self::ComdatInfo),
8 => Ok(Self::SymbolTable),
x => Err(ParseError {
offset,
message: format!("Invalid linking subsection ID {}", x),
}),
}
}
}
//----------------------------------------------------------------
// Linking metadata section
//----------------------------------------------------------------
const LINKING_VERSION: u8 = 2;
/// The spec describes this in very weird way, so we're doing something saner.
/// They call it an "array" of subsections with different variants, BUT this "array"
/// has an implicit length, and none of the items can be repeated, so a struct is better.
/// No point writing code to "find" the symbol table, when we know there's exactly one.
/// The only one we really use is the symbol table
#[derive(Debug)]
pub struct LinkingSection<'a> {
pub symbol_table: Vec<'a, SymInfo<'a>>,
pub segment_info: Vec<'a, LinkingSegment<'a>>,
pub init_funcs: Vec<'a, LinkingInitFunc>,
pub comdat_info: Vec<'a, LinkingComdat<'a>>,
}
impl<'a> LinkingSection<'a> {
const NAME: &'static str = "linking";
pub fn new(arena: &'a Bump) -> Self {
LinkingSection {
symbol_table: Vec::with_capacity_in(16, arena),
segment_info: Vec::with_capacity_in(16, arena),
init_funcs: Vec::with_capacity_in(0, arena),
comdat_info: Vec::with_capacity_in(0, arena),
}
}
pub fn find_internal_symbol(&self, target_name: &str) -> Result<usize, String> {
self.symbol_table
.iter()
.position(|sym| sym.name() == Some(target_name))
.ok_or_else(|| {
format!(
"Linking failed! Can't find `{}` in host symbol table",
target_name
)
})
}
pub fn find_and_reindex_imported_fn(
&mut self,
old_fn_index: u32,
new_fn_index: u32,
) -> Result<u32, String> {
self.symbol_table
.iter_mut()
.position(|sym| match sym {
SymInfo::Function(WasmObjectSymbol::ImplicitlyNamed { flags, index, .. })
| SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, .. }) => {
let found = *flags & WASM_SYM_UNDEFINED != 0 && *index == old_fn_index;
if found {
*index = new_fn_index;
}
found
}
_ => false,
})
.map(|sym_index| sym_index as u32)
.ok_or_else(|| {
format!(
"Linking failed! Can't find fn #{} in host symbol table",
old_fn_index
)
})
}
}
impl<'a> Parse<&'a Bump> for LinkingSection<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
if *cursor > bytes.len() || bytes[*cursor] != SectionId::Custom as u8 {
return Ok(LinkingSection::new(arena));
}
*cursor += 1;
let body_size = u32::parse((), bytes, cursor)?;
let section_end = *cursor + body_size as usize;
// Don't fail if it's the wrong section. Let the WasmModule validate presence/absence of sections
let actual_name = <&'a str>::parse(arena, bytes, cursor)?;
if actual_name != Self::NAME {
return Ok(LinkingSection::new(arena));
}
let linking_version = bytes[*cursor];
if linking_version != LINKING_VERSION {
return Err(ParseError {
offset: *cursor,
message: format!(
"This file uses version {} of Wasm linking data, but only version {} is supported.",
linking_version, LINKING_VERSION
),
});
}
*cursor += 1;
// Linking section is encoded as an array of subsections, but we prefer a struct internally.
// The order is not defined in the spec, so we loop over them and organise them into our struct.
// In theory, there could even be more than one of each. That would be weird, but easy to handle.
let mut section = LinkingSection::new(arena);
while *cursor < section_end {
let subsection_id = SubSectionId::parse((), bytes, cursor)?;
let len = u32::parse((), bytes, cursor)?; // bytes in the subsection
match subsection_id {
SubSectionId::SymbolTable => {
let count = u32::parse((), bytes, cursor)?;
for _ in 0..count {
let item = SymInfo::parse(arena, bytes, cursor)?;
section.symbol_table.push(item);
}
}
SubSectionId::SegmentInfo => {
let count = u32::parse((), bytes, cursor)?;
for _ in 0..count {
let item = LinkingSegment::parse(arena, bytes, cursor)?;
section.segment_info.push(item);
}
}
SubSectionId::InitFuncs | SubSectionId::ComdatInfo => {
// We don't use these subsections, just skip over them.
*cursor += len as usize;
}
}
}
Ok(section)
}
}

View file

@ -1,595 +0,0 @@
pub mod code_builder;
pub mod linking;
pub mod opcodes;
pub mod parse;
pub mod sections;
pub mod serialize;
use std::iter::repeat;
pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
pub use linking::{OffsetRelocType, RelocationEntry, SymInfo};
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature};
use bitvec::vec::BitVec;
use bumpalo::{collections::Vec, Bump};
use crate::DEBUG_SETTINGS;
use self::linking::{IndexRelocType, LinkingSection, RelocationSection, WasmObjectSymbol};
use self::parse::{Parse, ParseError};
use self::sections::{
CodeSection, DataSection, ElementSection, ExportSection, FunctionSection, GlobalSection,
ImportDesc, ImportSection, MemorySection, NameSection, OpaqueSection, Section, SectionId,
TableSection, TypeSection,
};
use self::serialize::{SerialBuffer, Serialize};
/// A representation of the WebAssembly binary file format
/// https://webassembly.github.io/spec/core/binary/modules.html
#[derive(Debug)]
pub struct WasmModule<'a> {
pub types: TypeSection<'a>,
pub import: ImportSection<'a>,
pub function: FunctionSection<'a>,
pub table: TableSection,
pub memory: MemorySection<'a>,
pub global: GlobalSection<'a>,
pub export: ExportSection<'a>,
pub start: OpaqueSection<'a>,
pub element: ElementSection<'a>,
pub code: CodeSection<'a>,
pub data: DataSection<'a>,
pub linking: LinkingSection<'a>,
pub reloc_code: RelocationSection<'a>,
pub reloc_data: RelocationSection<'a>,
pub names: NameSection<'a>,
}
impl<'a> WasmModule<'a> {
pub const WASM_VERSION: u32 = 1;
/// Create entries in the Type and Function sections for a function signature
pub fn add_function_signature(&mut self, signature: Signature<'a>) {
let index = self.types.insert(signature);
self.function.add_sig(index);
}
/// Serialize the module to bytes
pub fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(0);
buffer.append_slice("asm".as_bytes());
buffer.write_unencoded_u32(Self::WASM_VERSION);
self.types.serialize(buffer);
self.import.serialize(buffer);
self.function.serialize(buffer);
if !self.element.is_empty() {
self.table.serialize(buffer);
}
self.memory.serialize(buffer);
self.global.serialize(buffer);
self.export.serialize(buffer);
self.start.serialize(buffer);
self.element.serialize(buffer);
self.code.serialize(buffer);
self.data.serialize(buffer);
self.names.serialize(buffer);
}
/// Module size in bytes (assuming no linker data)
/// May be slightly overestimated. Intended for allocating buffer capacity.
pub fn size(&self) -> usize {
self.types.size()
+ self.import.size()
+ self.function.size()
+ self.table.size()
+ self.memory.size()
+ self.global.size()
+ self.export.size()
+ self.start.size()
+ self.element.size()
+ self.code.size()
+ self.data.size()
+ self.names.size()
}
pub fn preload(arena: &'a Bump, bytes: &[u8]) -> Result<Self, ParseError> {
let is_valid_magic_number = &bytes[0..4] == "\0asm".as_bytes();
let is_valid_version = bytes[4..8] == Self::WASM_VERSION.to_le_bytes();
if !is_valid_magic_number || !is_valid_version {
return Err(ParseError {
offset: 0,
message: "This file is not a WebAssembly binary. The file header is not valid."
.into(),
});
}
let mut cursor: usize = 8;
let types = TypeSection::parse(arena, bytes, &mut cursor)?;
let import = ImportSection::parse(arena, bytes, &mut cursor)?;
let function = FunctionSection::parse(arena, bytes, &mut cursor)?;
let table = TableSection::parse((), bytes, &mut cursor)?;
let memory = MemorySection::parse(arena, bytes, &mut cursor)?;
let global = GlobalSection::parse(arena, bytes, &mut cursor)?;
let export = ExportSection::parse(arena, bytes, &mut cursor)?;
let start = OpaqueSection::parse((arena, SectionId::Start), bytes, &mut cursor)?;
let element = ElementSection::parse(arena, bytes, &mut cursor)?;
let _data_count = OpaqueSection::parse((arena, SectionId::DataCount), bytes, &mut cursor)?;
let code = CodeSection::parse(arena, bytes, &mut cursor)?;
let data = DataSection::parse(arena, bytes, &mut cursor)?;
let linking = LinkingSection::parse(arena, bytes, &mut cursor)?;
let reloc_code = RelocationSection::parse((arena, "reloc.CODE"), bytes, &mut cursor)?;
let reloc_data = RelocationSection::parse((arena, "reloc.DATA"), bytes, &mut cursor)?;
let names = NameSection::parse(arena, bytes, &mut cursor)?;
let mut module_errors = String::new();
if types.is_empty() {
module_errors.push_str("Missing Type section\n");
}
if function.signatures.is_empty() {
module_errors.push_str("Missing Function section\n");
}
if code.preloaded_bytes.is_empty() {
module_errors.push_str("Missing Code section\n");
}
if linking.symbol_table.is_empty() {
module_errors.push_str("Missing \"linking\" Custom section\n");
}
if reloc_code.entries.is_empty() {
module_errors.push_str("Missing \"reloc.CODE\" Custom section\n");
}
if global.count != 0 {
let global_err_msg =
format!("All globals in a relocatable Wasm module should be imported, but found {} internally defined", global.count);
module_errors.push_str(&global_err_msg);
}
if !module_errors.is_empty() {
return Err(ParseError {
offset: 0,
message: format!("{}\n{}\n{}",
"The host file has the wrong structure. I need a relocatable WebAssembly binary file.",
"If you're using wasm-ld, try the --relocatable option.",
module_errors,
)
});
}
Ok(WasmModule {
types,
import,
function,
table,
memory,
global,
export,
start,
element,
code,
data,
linking,
reloc_code,
reloc_data,
names,
})
}
pub fn eliminate_dead_code(&mut self, arena: &'a Bump, called_host_fns: BitVec<usize>) {
if DEBUG_SETTINGS.skip_dead_code_elim {
return;
}
//
// Mark all live host functions
//
let import_count = self.import.imports.len();
let host_fn_min = import_count as u32 + self.code.dead_import_dummy_count;
let host_fn_max = host_fn_min + self.code.preloaded_count;
// All functions exported to JS must be kept alive
let exported_fns = self
.export
.exports
.iter()
.filter(|ex| ex.ty == ExportType::Func)
.map(|ex| ex.index);
// The ElementSection lists all functions whose "address" is taken.
// Find their signatures so we can trace all possible indirect calls.
// (The call_indirect instruction specifies a function signature.)
let indirect_callees_and_signatures = Vec::from_iter_in(
self.element
.segments
.iter()
.flat_map(|seg| seg.fn_indices.iter().copied())
.map(|fn_index| {
let sig = self.function.signatures[fn_index as usize - import_count];
(fn_index, sig)
}),
arena,
);
// Trace callees of the live functions, and mark those as live too
let live_flags = self.trace_live_host_functions(
arena,
called_host_fns,
exported_fns,
indirect_callees_and_signatures,
host_fn_min,
host_fn_max,
);
//
// Remove all unused JS imports
// We don't want to force the web page to provide dummy JS functions, it's a pain!
//
let mut live_import_fns = Vec::with_capacity_in(import_count, arena);
let mut fn_index = 0;
let mut eliminated_import_count = 0;
self.import.imports.retain(|import| {
if !matches!(import.description, ImportDesc::Func { .. }) {
true
} else {
let live = live_flags[fn_index];
if live {
live_import_fns.push(fn_index);
} else {
eliminated_import_count += 1;
}
fn_index += 1;
live
}
});
// Update the count of JS imports to replace with Wasm dummies
// (In addition to the ones we already replaced for each host-to-app call)
self.code.dead_import_dummy_count += eliminated_import_count as u32;
// FunctionSection
// Insert function signatures for the new Wasm dummy functions
let signature_count = self.function.signatures.len();
self.function
.signatures
.extend(repeat(0).take(eliminated_import_count));
self.function
.signatures
.copy_within(0..signature_count, eliminated_import_count);
// NameSection
// For each live import, swap its debug name to the right position
for (new_index, &old_index) in live_import_fns.iter().enumerate() {
let old_name: &str = self.names.function_names[old_index].1;
let new_name: &str = self.names.function_names[new_index].1;
self.names.function_names[new_index].1 = old_name;
self.names.function_names[old_index].1 = new_name;
}
// Relocate calls from host to JS imports
// This must happen *before* we run dead code elimination on the code section,
// so that byte offsets in the host's linking data will still be valid.
for (new_index, &old_index) in live_import_fns.iter().enumerate() {
if new_index == old_index {
continue;
}
let sym_index = self
.linking
.find_and_reindex_imported_fn(old_index as u32, new_index as u32)
.unwrap();
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
sym_index,
new_index as u32,
);
}
// Relocate calls from Roc app to JS imports
for code_builder in self.code.code_builders.iter_mut() {
code_builder.apply_import_relocs(&live_import_fns);
}
//
// Dead code elimination. Replace dead functions with tiny dummies.
// Live function indices are unchanged, so no relocations are needed.
//
let dummy = CodeBuilder::dummy(arena);
let mut dummy_bytes = Vec::with_capacity_in(dummy.size(), arena);
dummy.serialize(&mut dummy_bytes);
let mut buffer = Vec::with_capacity_in(self.code.preloaded_bytes.len(), arena);
self.code.preloaded_count.serialize(&mut buffer);
for (i, fn_index) in (host_fn_min..host_fn_max).enumerate() {
if live_flags[fn_index as usize] {
let code_start = self.code.preloaded_offsets[i] as usize;
let code_end = self.code.preloaded_offsets[i + 1] as usize;
buffer.extend_from_slice(&self.code.preloaded_bytes[code_start..code_end]);
} else {
buffer.extend_from_slice(&dummy_bytes);
}
}
self.code.preloaded_bytes = buffer;
}
fn trace_live_host_functions<I: Iterator<Item = u32>>(
&self,
arena: &'a Bump,
called_host_fns: BitVec<usize>,
exported_fns: I,
indirect_callees_and_signatures: Vec<'a, (u32, u32)>,
host_fn_min: u32,
host_fn_max: u32,
) -> BitVec<usize> {
let reloc_len = self.reloc_code.entries.len();
let mut call_offsets_and_symbols = Vec::with_capacity_in(reloc_len, arena);
let mut indirect_call_offsets_and_types = Vec::with_capacity_in(reloc_len, arena);
for entry in self.reloc_code.entries.iter() {
match entry {
RelocationEntry::Index {
type_id: IndexRelocType::FunctionIndexLeb,
offset,
symbol_index,
} => call_offsets_and_symbols.push((*offset, *symbol_index)),
RelocationEntry::Index {
type_id: IndexRelocType::TypeIndexLeb,
offset,
symbol_index,
} => indirect_call_offsets_and_types.push((*offset, *symbol_index)),
_ => {}
}
}
// Create a fast lookup from symbol index to function index, for the inner loop below
// (Do all the matching and dereferencing outside the loop)
let symbol_fn_indices: Vec<'a, u32> = Vec::from_iter_in(
self.linking
.symbol_table
.iter()
.map(|sym_info| match sym_info {
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { index, .. }) => *index,
SymInfo::Function(WasmObjectSymbol::ImplicitlyNamed { index, .. }) => *index,
_ => u32::MAX, // just use a dummy value for non-function symbols
}),
arena,
);
// Loop variables for the main loop below
let mut live_flags = BitVec::repeat(false, called_host_fns.len());
let mut next_pass_fns = BitVec::repeat(false, called_host_fns.len());
let mut current_pass_fns = called_host_fns;
for index in exported_fns.filter(|i| *i < host_fn_max) {
current_pass_fns.set(index as usize, true);
}
while current_pass_fns.count_ones() > 0 {
// All functions in this pass are live (they have been reached by earlier passes)
debug_assert_eq!(live_flags.len(), current_pass_fns.len());
live_flags |= &current_pass_fns;
// For each live function in the current pass
for fn_index in current_pass_fns.iter_ones() {
// Skip JS imports and Roc functions
if fn_index < host_fn_min as usize || fn_index >= host_fn_max as usize {
continue;
}
// Find where the function body is
let offset_index = fn_index - host_fn_min as usize;
let code_start = self.code.preloaded_offsets[offset_index];
let code_end = self.code.preloaded_offsets[offset_index + 1];
// For each call in the body
for (offset, symbol) in call_offsets_and_symbols.iter() {
if *offset > code_start && *offset < code_end {
// Find out which other function is being called
let callee = symbol_fn_indices[*symbol as usize];
// If it's not already marked live, include it in the next pass
if live_flags.get(callee as usize).as_deref() == Some(&false) {
next_pass_fns.set(callee as usize, true);
}
}
}
// For each indirect call in the body
for (offset, signature) in indirect_call_offsets_and_types.iter() {
if *offset > code_start && *offset < code_end {
// Find which indirect callees have the right type signature
let potential_callees = indirect_callees_and_signatures
.iter()
.filter(|(_, sig)| sig == signature)
.map(|(f, _)| *f);
// Mark them all as live
for f in potential_callees {
if live_flags.get(f as usize).as_deref() == Some(&false) {
next_pass_fns.set(f as usize, true);
}
}
}
}
}
std::mem::swap(&mut current_pass_fns, &mut next_pass_fns);
next_pass_fns.fill(false);
}
live_flags
}
pub fn relocate_internal_symbol(&mut self, sym_name: &str, value: u32) -> Result<u32, String> {
self.linking
.find_internal_symbol(sym_name)
.map(|sym_index| {
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
sym_index as u32,
value,
);
sym_index as u32
})
}
/// Linking steps for host-to-app functions like `roc__mainForHost_1_exposed`
/// (See further explanation in the gen_wasm README)
/// - Remove the target function from the ImportSection. It's not a JS import but the host declared it as one.
/// - Update all of its call sites to the new index in the app
/// - Swap the _last_ JavaScript import into the slot we just vacated
/// - Update all call sites for the swapped JS function
/// - Update the FunctionSection to show the correct type signature for the swapped JS function
/// - Insert a dummy function in the CodeSection, at the same index as the swapped JS function
pub fn link_host_to_app_calls(
&mut self,
arena: &'a Bump,
host_to_app_map: Vec<'a, (&'a str, u32)>,
) {
for (app_fn_name, app_fn_index) in host_to_app_map.into_iter() {
// Find the host import, and the last imported function to swap with it.
// Not all imports are functions, so the function index and import index may be different
// (We could support imported globals if we relocated them, although we don't at the time of this comment)
let mut host_fn = None;
let mut swap_fn = None;
self.import
.imports
.iter()
.enumerate()
.filter(|(_import_index, import)| {
matches!(import.description, ImportDesc::Func { .. })
})
.enumerate()
.for_each(|(fn_index, (import_index, import))| {
swap_fn = Some((import_index, fn_index));
if import.name == app_fn_name {
host_fn = Some((import_index, fn_index));
}
});
let (host_import_index, host_fn_index) = match host_fn {
Some(x) => x,
None => {
// The Wasm host doesn't call our app function, so it must be called from JS. Export it.
self.export.append(Export {
name: app_fn_name,
ty: ExportType::Func,
index: app_fn_index,
});
continue;
}
};
let (swap_import_index, swap_fn_index) = swap_fn.unwrap();
// Note: swap_remove will not work, because some imports may not be functions.
let swap_import = self.import.imports.remove(swap_import_index);
if swap_import_index != host_import_index {
self.import.imports[host_import_index] = swap_import;
}
// Find the host's symbol for the function we're linking
let host_sym_index = self
.linking
.find_and_reindex_imported_fn(host_fn_index as u32, app_fn_index)
.unwrap();
// Update calls to use the app function instead of the host import
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
host_sym_index,
app_fn_index,
);
if swap_import_index != host_import_index {
// get the name using the old host import index because we already swapped it!
let swap_fn_name = self.import.imports[host_import_index].name;
// Find the symbol for the swapped JS import
let swap_sym_index = self
.linking
.find_and_reindex_imported_fn(swap_fn_index as u32, host_fn_index as u32)
.unwrap();
// Update calls to the swapped JS import
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
swap_sym_index,
host_fn_index as u32,
);
// Update the name in the debug info
if let Some((_, debug_name)) = self
.names
.function_names
.iter_mut()
.find(|(i, _)| *i as usize == host_fn_index)
{
debug_name.clone_from(&swap_fn_name);
}
}
// Remember to insert a dummy function at the beginning of the code section
// to compensate for having one less import, so that function indices don't change.
self.code.dead_import_dummy_count += 1;
// Insert any type signature for the dummy. Signature index 0 will do.
self.function.signatures.insert(0, 0);
// Update the debug name for the dummy
if let Some((_, debug_name)) = self
.names
.function_names
.iter_mut()
.find(|(i, _)| *i as usize == swap_fn_index)
{
debug_name.clone_from(
&bumpalo::format!(in arena, "linking_dummy_{}", debug_name).into_bump_str(),
);
}
}
}
/// Create a name->index lookup table for host functions that may be called from the app
pub fn get_host_function_lookup(&self, arena: &'a Bump) -> Vec<'a, (&'a str, u32)> {
// Functions beginning with `roc_` go first, since they're most likely to be called
let roc_global_fns =
self.linking
.symbol_table
.iter()
.filter_map(|sym_info| match sym_info {
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, name })
if flags & linking::WASM_SYM_BINDING_LOCAL == 0
&& name.starts_with("roc_") =>
{
Some((*name, *index))
}
_ => None,
});
let other_global_fns =
self.linking
.symbol_table
.iter()
.filter_map(|sym_info| match sym_info {
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, name })
if flags & linking::WASM_SYM_BINDING_LOCAL == 0
&& !name.starts_with("roc_") =>
{
Some((*name, *index))
}
_ => None,
});
let import_fns = self
.import
.imports
.iter()
.filter(|import| matches!(import.description, ImportDesc::Func { .. }))
.enumerate()
.map(|(fn_index, import)| (import.name, fn_index as u32));
Vec::from_iter_in(
roc_global_fns.chain(other_global_fns).chain(import_fns),
arena,
)
}
}

View file

@ -1,310 +0,0 @@
use super::parse::{Parse, ParseError, SkipBytes};
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OpCode {
UNREACHABLE = 0x00,
NOP = 0x01,
BLOCK = 0x02,
LOOP = 0x03,
IF = 0x04,
ELSE = 0x05,
END = 0x0b,
BR = 0x0c,
BRIF = 0x0d,
BRTABLE = 0x0e,
RETURN = 0x0f,
CALL = 0x10,
CALLINDIRECT = 0x11,
DROP = 0x1a,
SELECT = 0x1b,
GETLOCAL = 0x20,
SETLOCAL = 0x21,
TEELOCAL = 0x22,
GETGLOBAL = 0x23,
SETGLOBAL = 0x24,
I32LOAD = 0x28,
I64LOAD = 0x29,
F32LOAD = 0x2a,
F64LOAD = 0x2b,
I32LOAD8S = 0x2c,
I32LOAD8U = 0x2d,
I32LOAD16S = 0x2e,
I32LOAD16U = 0x2f,
I64LOAD8S = 0x30,
I64LOAD8U = 0x31,
I64LOAD16S = 0x32,
I64LOAD16U = 0x33,
I64LOAD32S = 0x34,
I64LOAD32U = 0x35,
I32STORE = 0x36,
I64STORE = 0x37,
F32STORE = 0x38,
F64STORE = 0x39,
I32STORE8 = 0x3a,
I32STORE16 = 0x3b,
I64STORE8 = 0x3c,
I64STORE16 = 0x3d,
I64STORE32 = 0x3e,
CURRENTMEMORY = 0x3f,
GROWMEMORY = 0x40,
I32CONST = 0x41,
I64CONST = 0x42,
F32CONST = 0x43,
F64CONST = 0x44,
I32EQZ = 0x45,
I32EQ = 0x46,
I32NE = 0x47,
I32LTS = 0x48,
I32LTU = 0x49,
I32GTS = 0x4a,
I32GTU = 0x4b,
I32LES = 0x4c,
I32LEU = 0x4d,
I32GES = 0x4e,
I32GEU = 0x4f,
I64EQZ = 0x50,
I64EQ = 0x51,
I64NE = 0x52,
I64LTS = 0x53,
I64LTU = 0x54,
I64GTS = 0x55,
I64GTU = 0x56,
I64LES = 0x57,
I64LEU = 0x58,
I64GES = 0x59,
I64GEU = 0x5a,
F32EQ = 0x5b,
F32NE = 0x5c,
F32LT = 0x5d,
F32GT = 0x5e,
F32LE = 0x5f,
F32GE = 0x60,
F64EQ = 0x61,
F64NE = 0x62,
F64LT = 0x63,
F64GT = 0x64,
F64LE = 0x65,
F64GE = 0x66,
I32CLZ = 0x67,
I32CTZ = 0x68,
I32POPCNT = 0x69,
I32ADD = 0x6a,
I32SUB = 0x6b,
I32MUL = 0x6c,
I32DIVS = 0x6d,
I32DIVU = 0x6e,
I32REMS = 0x6f,
I32REMU = 0x70,
I32AND = 0x71,
I32OR = 0x72,
I32XOR = 0x73,
I32SHL = 0x74,
I32SHRS = 0x75,
I32SHRU = 0x76,
I32ROTL = 0x77,
I32ROTR = 0x78,
I64CLZ = 0x79,
I64CTZ = 0x7a,
I64POPCNT = 0x7b,
I64ADD = 0x7c,
I64SUB = 0x7d,
I64MUL = 0x7e,
I64DIVS = 0x7f,
I64DIVU = 0x80,
I64REMS = 0x81,
I64REMU = 0x82,
I64AND = 0x83,
I64OR = 0x84,
I64XOR = 0x85,
I64SHL = 0x86,
I64SHRS = 0x87,
I64SHRU = 0x88,
I64ROTL = 0x89,
I64ROTR = 0x8a,
F32ABS = 0x8b,
F32NEG = 0x8c,
F32CEIL = 0x8d,
F32FLOOR = 0x8e,
F32TRUNC = 0x8f,
F32NEAREST = 0x90,
F32SQRT = 0x91,
F32ADD = 0x92,
F32SUB = 0x93,
F32MUL = 0x94,
F32DIV = 0x95,
F32MIN = 0x96,
F32MAX = 0x97,
F32COPYSIGN = 0x98,
F64ABS = 0x99,
F64NEG = 0x9a,
F64CEIL = 0x9b,
F64FLOOR = 0x9c,
F64TRUNC = 0x9d,
F64NEAREST = 0x9e,
F64SQRT = 0x9f,
F64ADD = 0xa0,
F64SUB = 0xa1,
F64MUL = 0xa2,
F64DIV = 0xa3,
F64MIN = 0xa4,
F64MAX = 0xa5,
F64COPYSIGN = 0xa6,
I32WRAPI64 = 0xa7,
I32TRUNCSF32 = 0xa8,
I32TRUNCUF32 = 0xa9,
I32TRUNCSF64 = 0xaa,
I32TRUNCUF64 = 0xab,
I64EXTENDSI32 = 0xac,
I64EXTENDUI32 = 0xad,
I64TRUNCSF32 = 0xae,
I64TRUNCUF32 = 0xaf,
I64TRUNCSF64 = 0xb0,
I64TRUNCUF64 = 0xb1,
F32CONVERTSI32 = 0xb2,
F32CONVERTUI32 = 0xb3,
F32CONVERTSI64 = 0xb4,
F32CONVERTUI64 = 0xb5,
F32DEMOTEF64 = 0xb6,
F64CONVERTSI32 = 0xb7,
F64CONVERTUI32 = 0xb8,
F64CONVERTSI64 = 0xb9,
F64CONVERTUI64 = 0xba,
F64PROMOTEF32 = 0xbb,
I32REINTERPRETF32 = 0xbc,
I64REINTERPRETF64 = 0xbd,
F32REINTERPRETI32 = 0xbe,
F64REINTERPRETI64 = 0xbf,
}
/// The format of the *immediate* operands of an operator
/// Immediates appear directly in the byte stream after the opcode,
/// rather than being popped off the value stack. These are the possible forms.
#[derive(Debug)]
enum OpImmediates {
NoImmediate,
Byte1,
Bytes4,
Bytes8,
Leb32x1,
Leb64x1,
Leb32x2,
BrTable,
}
fn immediates_for(op: OpCode) -> Result<OpImmediates, String> {
use OpCode::*;
use OpImmediates::*;
let imm = match op {
UNREACHABLE => NoImmediate,
NOP => NoImmediate,
BLOCK | LOOP | IF => Byte1,
ELSE => NoImmediate,
END => NoImmediate,
BR | BRIF => Leb32x1,
BRTABLE => BrTable,
RETURN => NoImmediate,
CALL => Leb32x1,
CALLINDIRECT => Leb32x2,
DROP => NoImmediate,
SELECT => NoImmediate,
GETLOCAL | SETLOCAL | TEELOCAL => Leb32x1,
GETGLOBAL | SETGLOBAL => Leb32x1,
I32LOAD | I64LOAD | F32LOAD | F64LOAD | I32LOAD8S | I32LOAD8U | I32LOAD16S | I32LOAD16U
| I64LOAD8S | I64LOAD8U | I64LOAD16S | I64LOAD16U | I64LOAD32S | I64LOAD32U | I32STORE
| I64STORE | F32STORE | F64STORE | I32STORE8 | I32STORE16 | I64STORE8 | I64STORE16
| I64STORE32 => Leb32x2,
CURRENTMEMORY | GROWMEMORY => Byte1,
I32CONST => Leb32x1,
I64CONST => Leb64x1,
F32CONST => Bytes4,
F64CONST => Bytes8,
I32EQZ | I32EQ | I32NE | I32LTS | I32LTU | I32GTS | I32GTU | I32LES | I32LEU | I32GES
| I32GEU | I64EQZ | I64EQ | I64NE | I64LTS | I64LTU | I64GTS | I64GTU | I64LES | I64LEU
| I64GES | I64GEU | F32EQ | F32NE | F32LT | F32GT | F32LE | F32GE | F64EQ | F64NE
| F64LT | F64GT | F64LE | F64GE | I32CLZ | I32CTZ | I32POPCNT | I32ADD | I32SUB
| I32MUL | I32DIVS | I32DIVU | I32REMS | I32REMU | I32AND | I32OR | I32XOR | I32SHL
| I32SHRS | I32SHRU | I32ROTL | I32ROTR | I64CLZ | I64CTZ | I64POPCNT | I64ADD | I64SUB
| I64MUL | I64DIVS | I64DIVU | I64REMS | I64REMU | I64AND | I64OR | I64XOR | I64SHL
| I64SHRS | I64SHRU | I64ROTL | I64ROTR | F32ABS | F32NEG | F32CEIL | F32FLOOR
| F32TRUNC | F32NEAREST | F32SQRT | F32ADD | F32SUB | F32MUL | F32DIV | F32MIN | F32MAX
| F32COPYSIGN | F64ABS | F64NEG | F64CEIL | F64FLOOR | F64TRUNC | F64NEAREST | F64SQRT
| F64ADD | F64SUB | F64MUL | F64DIV | F64MIN | F64MAX | F64COPYSIGN | I32WRAPI64
| I32TRUNCSF32 | I32TRUNCUF32 | I32TRUNCSF64 | I32TRUNCUF64 | I64EXTENDSI32
| I64EXTENDUI32 | I64TRUNCSF32 | I64TRUNCUF32 | I64TRUNCSF64 | I64TRUNCUF64
| F32CONVERTSI32 | F32CONVERTUI32 | F32CONVERTSI64 | F32CONVERTUI64 | F32DEMOTEF64
| F64CONVERTSI32 | F64CONVERTUI32 | F64CONVERTSI64 | F64CONVERTUI64 | F64PROMOTEF32
| I32REINTERPRETF32 | I64REINTERPRETF64 | F32REINTERPRETI32 | F64REINTERPRETI64 => {
NoImmediate
}
// Catch-all in case of an invalid cast from u8 to OpCode while parsing binary
// (rustc keeps this code, I verified in Compiler Explorer)
#[allow(unreachable_patterns)]
_ => return Err(format!("Unknown Wasm instruction 0x{:02x}", op as u8)),
};
Ok(imm)
}
impl SkipBytes for OpCode {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
use OpImmediates::*;
let opcode_byte: u8 = bytes[*cursor];
let opcode: OpCode = unsafe { std::mem::transmute(opcode_byte) };
// will return Err if transmute was invalid
let immediates = immediates_for(opcode).map_err(|message| ParseError {
message,
offset: *cursor,
})?;
match immediates {
NoImmediate => {
*cursor += 1;
}
Byte1 => {
*cursor += 1 + 1;
}
Bytes4 => {
*cursor += 1 + 4;
}
Bytes8 => {
*cursor += 1 + 8;
}
Leb32x1 => {
*cursor += 1;
u32::skip_bytes(bytes, cursor)?;
}
Leb64x1 => {
*cursor += 1;
u64::skip_bytes(bytes, cursor)?;
}
Leb32x2 => {
*cursor += 1;
u32::skip_bytes(bytes, cursor)?;
u32::skip_bytes(bytes, cursor)?;
}
BrTable => {
*cursor += 1;
let n_labels = 1 + u32::parse((), bytes, cursor)?;
for _ in 0..n_labels {
u32::skip_bytes(bytes, cursor)?;
}
}
}
Ok(())
}
}

View file

@ -1,241 +0,0 @@
use super::serialize::MAX_SIZE_ENCODED_U32;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
/// Parse serialized bytes into a data structure
/// Specific parsers may need contextual data from other parts of the .wasm file
pub trait Parse<ParseContext>: Sized {
fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError>;
}
#[derive(Debug)]
pub struct ParseError {
pub offset: usize,
pub message: String,
}
/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format
/// Return the integer itself and the offset after it ends
fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), ()> {
let mut value = 0;
let mut shift = 0;
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
value += ((byte & 0x7f) as u32) << shift;
if (byte & 0x80) == 0 {
return Ok((value, i + 1));
}
shift += 7;
}
Err(())
}
impl Parse<()> for u32 {
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
match decode_u32(&bytes[*cursor..]) {
Ok((value, len)) => {
*cursor += len;
Ok(value)
}
Err(()) => Err(ParseError {
offset: *cursor,
message: format!(
"Failed to decode u32 as LEB-128 from bytes: {:2x?}",
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
),
}),
}
}
}
impl Parse<()> for u8 {
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let byte = bytes[*cursor];
*cursor += 1;
Ok(byte)
}
}
/// Decode a signed 32-bit integer from the provided buffer in LEB-128 format
/// Return the integer itself and the offset after it ends
fn decode_i32(bytes: &[u8]) -> Result<(i32, usize), ()> {
let mut value = 0;
let mut shift = 0;
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
value |= ((byte & 0x7f) as i32) << shift;
if (byte & 0x80) == 0 {
let is_negative = byte & 0x40 != 0;
if shift < MAX_SIZE_ENCODED_U32 && is_negative {
value |= -1 << shift;
}
return Ok((value, i + 1));
}
shift += 7;
}
Err(())
}
impl Parse<()> for i32 {
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
match decode_i32(&bytes[*cursor..]) {
Ok((value, len)) => {
*cursor += len;
Ok(value)
}
Err(()) => Err(ParseError {
offset: *cursor,
message: format!(
"Failed to decode i32 as LEB-128 from bytes: {:2x?}",
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
),
}),
}
}
}
impl<'a> Parse<&'a Bump> for &'a str {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let len = u32::parse((), bytes, cursor)?;
let end = *cursor + len as usize;
let bytes: &[u8] = &bytes[*cursor..end];
let copy = arena.alloc_slice_copy(bytes);
let s = unsafe { std::str::from_utf8_unchecked(copy) };
*cursor = end;
Ok(s)
}
}
pub fn parse_variable_size_items<'a, T>(
arena: &'a Bump,
bytes: &[u8],
cursor: &mut usize,
) -> Result<Vec<'a, T>, ParseError>
where
T: Parse<&'a Bump>,
{
let len = u32::parse((), bytes, cursor)?;
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
for _ in 0..len {
let item = T::parse(arena, bytes, cursor)?;
vector.push(item);
}
Ok(vector)
}
pub fn parse_fixed_size_items<'a, T>(
arena: &'a Bump,
bytes: &[u8],
cursor: &mut usize,
) -> Result<Vec<'a, T>, ParseError>
where
T: Parse<()>,
{
let len = u32::parse((), bytes, cursor)?;
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
for _ in 0..len {
let item = T::parse((), bytes, cursor)?;
vector.push(item);
}
Ok(vector)
}
/// Skip over serialized bytes for a type
/// This may, or may not, require looking at the byte values
pub trait SkipBytes: Sized {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError>;
}
impl SkipBytes for u32 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
const MAX_LEN: usize = 5;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 {
*cursor = i + 1;
return Ok(());
}
}
Err(ParseError {
offset: *cursor,
message: "Invalid LEB encoding".into(),
})
}
}
impl SkipBytes for u64 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
const MAX_LEN: usize = 10;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 {
*cursor = i + 1;
return Ok(());
}
}
Err(ParseError {
offset: *cursor,
message: "Invalid LEB encoding".into(),
})
}
}
impl SkipBytes for u8 {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
*cursor += 1;
Ok(())
}
}
/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str!
impl SkipBytes for String {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
let len = u32::parse((), bytes, cursor)?;
if false {
let str_bytes = &bytes[*cursor..(*cursor + len as usize)];
println!(
"Skipping string {:?}",
std::str::from_utf8(str_bytes).unwrap()
);
}
*cursor += len as usize;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::wasm_module::parse::decode_u32;
#[test]
fn test_decode_u32() {
assert_eq!(decode_u32(&[0]), Ok((0, 1)));
assert_eq!(decode_u32(&[64]), Ok((64, 1)));
assert_eq!(decode_u32(&[0x7f]), Ok((0x7f, 1)));
assert_eq!(decode_u32(&[0x80, 0x01]), Ok((0x80, 2)));
assert_eq!(decode_u32(&[0xff, 0x7f]), Ok((0x3fff, 2)));
assert_eq!(decode_u32(&[0x80, 0x80, 0x01]), Ok((0x4000, 3)));
assert_eq!(
decode_u32(&[0xff, 0xff, 0xff, 0xff, 0x0f]),
Ok((u32::MAX, MAX_SIZE_ENCODED_U32))
);
assert!(matches!(decode_u32(&[0x80; 6]), Err(_)));
assert!(matches!(decode_u32(&[0x80; 2]), Err(_)));
assert!(matches!(decode_u32(&[]), Err(_)));
}
#[test]
fn test_parse_u32_sequence() {
let bytes = &[0, 0x80, 0x01, 0xff, 0xff, 0xff, 0xff, 0x0f];
let expected = [0, 128, u32::MAX];
let mut cursor = 0;
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[0]);
assert_eq!(cursor, 1);
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[1]);
assert_eq!(cursor, 3);
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[2]);
assert_eq!(cursor, 8);
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,398 +0,0 @@
use bumpalo::collections::vec::Vec;
use std::fmt::Debug;
/// In the WebAssembly binary format, all integers are variable-length encoded (using LEB-128)
/// A small value like 3 or 100 is encoded as 1 byte. The value 128 needs 2 bytes, etc.
/// In practice, this saves space, since small numbers used more often than large numbers.
/// Of course there is a price for this - an encoded U32 can be up to 5 bytes wide.
pub const MAX_SIZE_ENCODED_U32: usize = 5;
pub(super) trait Serialize {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T);
}
impl Serialize for str {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self.as_bytes());
}
}
impl Serialize for &str {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self.as_bytes());
}
}
impl Serialize for u8 {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self);
}
}
impl Serialize for u32 {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(*self);
}
}
// Unit is used as a placeholder in parts of the Wasm spec we don't use yet
impl Serialize for () {
#[inline(always)]
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {}
}
impl<S: Serialize> Serialize for [S] {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
for item in self.iter() {
item.serialize(buffer);
}
}
}
impl Serialize for Vec<'_, u8> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self);
}
}
impl<S: Serialize> Serialize for Option<S> {
/// serialize Option as a vector of length 1 or 0
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Some(x) => {
buffer.append_u8(1);
x.serialize(buffer);
}
None => {
buffer.append_u8(0);
}
}
}
}
impl<A: Serialize, B: Serialize> Serialize for (A, B) {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.0.serialize(buffer);
self.1.serialize(buffer);
}
}
/// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length
///
/// All integers in Wasm are variable-length encoded, which saves space for small values.
/// The most significant bit indicates "more bytes are coming", and the other 7 are payload.
macro_rules! encode_uleb128 {
($name: ident, $ty: ty) => {
fn $name(&mut self, value: $ty) -> usize {
let mut x = value;
let start_len = self.size();
while x >= 0x80 {
self.append_u8(0x80 | ((x & 0x7f) as u8));
x >>= 7;
}
self.append_u8(x as u8);
self.size() - start_len
}
};
}
/// Write a signed integer into the provided buffer in LEB-128 format, returning byte length
macro_rules! encode_sleb128 {
($name: ident, $ty: ty) => {
fn $name(&mut self, value: $ty) -> usize {
let mut x = value;
let start_len = self.size();
loop {
let byte = (x & 0x7f) as u8;
x >>= 7;
let byte_is_negative = (byte & 0x40) != 0;
if ((x == 0 && !byte_is_negative) || (x == -1 && byte_is_negative)) {
self.append_u8(byte);
break;
}
self.append_u8(byte | 0x80);
}
self.size() - start_len
}
};
}
macro_rules! write_unencoded {
($name: ident, $ty: ty) => {
/// write an unencoded little-endian integer (only used in relocations)
fn $name(&mut self, value: $ty) {
let mut x = value;
let size = std::mem::size_of::<$ty>();
for _ in 0..size {
self.append_u8((x & 0xff) as u8);
x >>= 8;
}
}
};
}
/// For relocations
pub fn overwrite_padded_i32(buffer: &mut [u8], value: i32) {
let mut x = value;
for byte in buffer.iter_mut().take(4) {
*byte = 0x80 | ((x & 0x7f) as u8);
x >>= 7;
}
buffer[4] = (x & 0x7f) as u8;
}
pub fn overwrite_padded_u32(buffer: &mut [u8], value: u32) {
let mut x = value;
for byte in buffer.iter_mut().take(4) {
*byte = 0x80 | ((x & 0x7f) as u8);
x >>= 7;
}
buffer[4] = x as u8;
}
pub trait SerialBuffer: Debug {
fn append_u8(&mut self, b: u8);
fn overwrite_u8(&mut self, index: usize, b: u8);
fn append_slice(&mut self, b: &[u8]);
fn size(&self) -> usize;
encode_uleb128!(encode_u32, u32);
encode_uleb128!(encode_u64, u64);
encode_sleb128!(encode_i32, i32);
encode_sleb128!(encode_i64, i64);
fn reserve_padded_u32(&mut self) -> usize;
fn encode_padded_u32(&mut self, value: u32) -> usize;
fn overwrite_padded_u32(&mut self, index: usize, value: u32);
fn encode_f32(&mut self, value: f32) {
self.write_unencoded_u32(value.to_bits());
}
fn encode_f64(&mut self, value: f64) {
self.write_unencoded_u64(value.to_bits());
}
// methods for relocations
write_unencoded!(write_unencoded_u32, u32);
write_unencoded!(write_unencoded_u64, u64);
}
impl SerialBuffer for std::vec::Vec<u8> {
fn append_u8(&mut self, b: u8) {
self.push(b);
}
fn overwrite_u8(&mut self, index: usize, b: u8) {
self[index] = b;
}
fn append_slice(&mut self, b: &[u8]) {
self.extend_from_slice(b);
}
fn size(&self) -> usize {
self.len()
}
fn reserve_padded_u32(&mut self) -> usize {
let index = self.len();
self.resize(index + MAX_SIZE_ENCODED_U32, 0xff);
index
}
fn encode_padded_u32(&mut self, value: u32) -> usize {
let index = self.len();
let new_len = index + MAX_SIZE_ENCODED_U32;
self.resize(new_len, 0);
overwrite_padded_u32(&mut self[index..new_len], value);
index
}
fn overwrite_padded_u32(&mut self, index: usize, value: u32) {
overwrite_padded_u32(&mut self[index..(index + MAX_SIZE_ENCODED_U32)], value);
}
}
impl<'a> SerialBuffer for Vec<'a, u8> {
fn append_u8(&mut self, b: u8) {
self.push(b);
}
fn overwrite_u8(&mut self, index: usize, b: u8) {
self[index] = b;
}
fn append_slice(&mut self, b: &[u8]) {
self.extend_from_slice(b);
}
fn size(&self) -> usize {
self.len()
}
fn reserve_padded_u32(&mut self) -> usize {
let index = self.len();
self.resize(index + MAX_SIZE_ENCODED_U32, 0xff);
index
}
fn encode_padded_u32(&mut self, value: u32) -> usize {
let index = self.len();
let new_len = index + MAX_SIZE_ENCODED_U32;
self.resize(new_len, 0);
overwrite_padded_u32(&mut self[index..new_len], value);
index
}
fn overwrite_padded_u32(&mut self, index: usize, value: u32) {
overwrite_padded_u32(&mut self[index..(index + MAX_SIZE_ENCODED_U32)], value);
}
}
#[cfg(test)]
mod tests {
use super::*;
use bumpalo::{self, collections::Vec, Bump};
fn help_u32(arena: &Bump, value: u32) -> Vec<'_, u8> {
let mut buffer = Vec::with_capacity_in(MAX_SIZE_ENCODED_U32, arena);
buffer.encode_u32(value);
buffer
}
#[test]
fn test_encode_u32() {
let a = &Bump::new();
assert_eq!(help_u32(a, 0), &[0]);
assert_eq!(help_u32(a, 64), &[64]);
assert_eq!(help_u32(a, 0x7f), &[0x7f]);
assert_eq!(help_u32(a, 0x80), &[0x80, 0x01]);
assert_eq!(help_u32(a, 0x3fff), &[0xff, 0x7f]);
assert_eq!(help_u32(a, 0x4000), &[0x80, 0x80, 0x01]);
assert_eq!(help_u32(a, u32::MAX), &[0xff, 0xff, 0xff, 0xff, 0x0f]);
}
fn help_u64(arena: &Bump, value: u64) -> Vec<'_, u8> {
let mut buffer = Vec::with_capacity_in(10, arena);
buffer.encode_u64(value);
buffer
}
#[test]
fn test_encode_u64() {
let a = &Bump::new();
assert_eq!(help_u64(a, 0), &[0]);
assert_eq!(help_u64(a, 64), &[64]);
assert_eq!(help_u64(a, 0x7f), &[0x7f]);
assert_eq!(help_u64(a, 0x80), &[0x80, 0x01]);
assert_eq!(help_u64(a, 0x3fff), &[0xff, 0x7f]);
assert_eq!(help_u64(a, 0x4000), &[0x80, 0x80, 0x01]);
assert_eq!(
help_u64(a, u64::MAX),
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01],
);
}
fn help_i32(arena: &Bump, value: i32) -> Vec<'_, u8> {
let mut buffer = Vec::with_capacity_in(MAX_SIZE_ENCODED_U32, arena);
buffer.encode_i32(value);
buffer
}
#[test]
fn test_encode_i32() {
let a = &Bump::new();
assert_eq!(help_i32(a, 0), &[0]);
assert_eq!(help_i32(a, 1), &[1]);
assert_eq!(help_i32(a, -1), &[0x7f]);
assert_eq!(help_i32(a, 63), &[63]);
assert_eq!(help_i32(a, 64), &[0xc0, 0x0]);
assert_eq!(help_i32(a, -64), &[0x40]);
assert_eq!(help_i32(a, -65), &[0xbf, 0x7f]);
assert_eq!(help_i32(a, i32::MAX), &[0xff, 0xff, 0xff, 0xff, 0x07]);
assert_eq!(help_i32(a, i32::MIN), &[0x80, 0x80, 0x80, 0x80, 0x78]);
}
fn help_i64(arena: &Bump, value: i64) -> Vec<'_, u8> {
let mut buffer = Vec::with_capacity_in(10, arena);
buffer.encode_i64(value);
buffer
}
#[test]
fn test_encode_i64() {
let a = &Bump::new();
assert_eq!(help_i64(a, 0), &[0]);
assert_eq!(help_i64(a, 1), &[1]);
assert_eq!(help_i64(a, -1), &[0x7f]);
assert_eq!(help_i64(a, 63), &[63]);
assert_eq!(help_i64(a, 64), &[0xc0, 0x0]);
assert_eq!(help_i64(a, -64), &[0x40]);
assert_eq!(help_i64(a, -65), &[0xbf, 0x7f]);
assert_eq!(
help_i64(a, i64::MAX),
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00],
);
assert_eq!(
help_i64(a, i64::MIN),
&[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f],
);
}
#[test]
fn test_overwrite_u32_padded() {
let mut buffer = [0, 0, 0, 0, 0];
overwrite_padded_u32(&mut buffer, u32::MAX);
assert_eq!(buffer, [0xff, 0xff, 0xff, 0xff, 0x0f]);
overwrite_padded_u32(&mut buffer, 0);
assert_eq!(buffer, [0x80, 0x80, 0x80, 0x80, 0x00]);
overwrite_padded_u32(&mut buffer, 127);
assert_eq!(buffer, [0xff, 0x80, 0x80, 0x80, 0x00]);
overwrite_padded_u32(&mut buffer, 128);
assert_eq!(buffer, [0x80, 0x81, 0x80, 0x80, 0x00]);
}
#[test]
fn test_write_unencoded_u32() {
let mut buffer = std::vec::Vec::with_capacity(4);
buffer.write_unencoded_u32(0);
assert_eq!(buffer, &[0, 0, 0, 0]);
buffer.clear();
buffer.write_unencoded_u32(u32::MAX);
assert_eq!(buffer, &[0xff, 0xff, 0xff, 0xff]);
}
#[test]
fn test_write_unencoded_u64() {
let mut buffer = std::vec::Vec::with_capacity(8);
buffer.write_unencoded_u64(0);
assert_eq!(buffer, &[0, 0, 0, 0, 0, 0, 0, 0]);
buffer.clear();
buffer.write_unencoded_u64(u64::MAX);
assert_eq!(buffer, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
}
fn help_pad_i32(val: i32) -> [u8; MAX_SIZE_ENCODED_U32] {
let mut buffer = [0; MAX_SIZE_ENCODED_U32];
overwrite_padded_i32(&mut buffer, val);
buffer
}
#[test]
fn test_encode_padded_i32() {
assert_eq!(help_pad_i32(0), [0x80, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(1), [0x81, 0x80, 0x80, 0x80, 0x00]);
assert_eq!(help_pad_i32(-1), [0xff, 0xff, 0xff, 0xff, 0x7f]);
assert_eq!(help_pad_i32(i32::MAX), [0xff, 0xff, 0xff, 0xff, 0x07]);
assert_eq!(help_pad_i32(i32::MIN), [0x80, 0x80, 0x80, 0x80, 0x78]);
let mut buffer = [0xff; 10];
overwrite_padded_i32(&mut buffer[2..], 0);
assert_eq!(
buffer,
[0xff, 0xff, 0x80, 0x80, 0x80, 0x80, 0x00, 0xff, 0xff, 0xff]
);
}
}