mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 12:18:19 +00:00
move wasm_module out of gen_wasm
This commit is contained in:
parent
b907f01f1f
commit
63d9187343
17 changed files with 43 additions and 21 deletions
|
@ -17,3 +17,4 @@ roc_mono = { path = "../mono" }
|
|||
roc_target = { path = "../roc_target" }
|
||||
roc_std = { path = "../../roc_std" }
|
||||
roc_error_macros = { path = "../../error_macros" }
|
||||
roc_wasm_module = { path = "../../wasm_module" }
|
||||
|
|
|
@ -18,18 +18,18 @@ use roc_std::RocDec;
|
|||
use crate::layout::{CallConv, ReturnMethod, WasmLayout};
|
||||
use crate::low_level::{call_higher_order_lowlevel, LowLevelCall};
|
||||
use crate::storage::{AddressValue, Storage, StoredValue, StoredVarKind};
|
||||
use crate::wasm_module::linking::{DataSymbol, WasmObjectSymbol};
|
||||
use crate::wasm_module::sections::{
|
||||
ConstExpr, DataMode, DataSegment, Export, Global, GlobalType, Import, ImportDesc, Limits,
|
||||
MemorySection, NameSection,
|
||||
};
|
||||
use crate::wasm_module::{
|
||||
code_builder, CodeBuilder, ExportType, LocalId, Signature, SymInfo, ValueType, WasmModule,
|
||||
};
|
||||
use crate::{
|
||||
copy_memory, round_up_to_alignment, CopyMemoryConfig, Env, DEBUG_SETTINGS, MEMORY_NAME,
|
||||
PTR_SIZE, PTR_TYPE, TARGET_INFO,
|
||||
};
|
||||
use roc_wasm_module::linking::{DataSymbol, WasmObjectSymbol};
|
||||
use roc_wasm_module::sections::{
|
||||
ConstExpr, DataMode, DataSegment, Export, Global, GlobalType, Import, ImportDesc, Limits,
|
||||
MemorySection, NameSection,
|
||||
};
|
||||
use roc_wasm_module::{
|
||||
code_builder, CodeBuilder, ExportType, LocalId, Signature, SymInfo, ValueType, WasmModule,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum ProcSource {
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use roc_builtins::bitcode::{FloatWidth, IntWidth};
|
||||
use roc_mono::layout::{Layout, STLayoutInterner, UnionLayout};
|
||||
|
||||
use crate::wasm_module::ValueType;
|
||||
use crate::{PTR_SIZE, PTR_TYPE, TARGET_INFO};
|
||||
use roc_wasm_module::ValueType;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ReturnMethod {
|
||||
|
|
|
@ -3,7 +3,6 @@ mod backend;
|
|||
mod layout;
|
||||
mod low_level;
|
||||
mod storage;
|
||||
pub mod wasm_module;
|
||||
|
||||
// Helpers for interfacing to a Wasm module from outside
|
||||
pub mod wasm32_result;
|
||||
|
@ -19,10 +18,10 @@ use roc_mono::code_gen_help::CodeGenHelp;
|
|||
use roc_mono::ir::{Proc, ProcLayout};
|
||||
use roc_mono::layout::{LayoutIds, STLayoutInterner};
|
||||
use roc_target::TargetInfo;
|
||||
use wasm_module::parse::ParseError;
|
||||
use roc_wasm_module::parse::ParseError;
|
||||
|
||||
use crate::backend::{ProcLookupData, ProcSource, WasmBackend};
|
||||
use crate::wasm_module::{Align, CodeBuilder, LocalId, ValueType, WasmModule};
|
||||
use roc_wasm_module::{Align, CodeBuilder, LocalId, ValueType, WasmModule};
|
||||
|
||||
const TARGET_INFO: TargetInfo = TargetInfo::default_wasm32();
|
||||
const PTR_SIZE: u32 = {
|
||||
|
|
|
@ -12,8 +12,8 @@ use roc_mono::low_level::HigherOrder;
|
|||
use crate::backend::{ProcLookupData, ProcSource, WasmBackend};
|
||||
use crate::layout::{CallConv, StackMemoryFormat, WasmLayout};
|
||||
use crate::storage::{AddressValue, StackMemoryLocation, StoredValue};
|
||||
use crate::wasm_module::{Align, LocalId, ValueType};
|
||||
use crate::{PTR_TYPE, TARGET_INFO};
|
||||
use roc_wasm_module::{Align, LocalId, ValueType};
|
||||
|
||||
/// Number types used for Wasm code gen
|
||||
/// Unlike other enums, this contains no details about layout or storage.
|
||||
|
|
|
@ -7,8 +7,8 @@ use roc_module::symbol::Symbol;
|
|||
use roc_mono::layout::{Layout, STLayoutInterner};
|
||||
|
||||
use crate::layout::{CallConv, ReturnMethod, StackMemoryFormat, WasmLayout};
|
||||
use crate::wasm_module::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
|
||||
use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, PTR_TYPE};
|
||||
use roc_wasm_module::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
|
||||
|
||||
pub enum StoredVarKind {
|
||||
Variable,
|
||||
|
@ -592,7 +592,7 @@ impl<'a> Storage<'a> {
|
|||
| StoredValue::Local {
|
||||
value_type, size, ..
|
||||
} => {
|
||||
use crate::wasm_module::Align::*;
|
||||
use roc_wasm_module::Align::*;
|
||||
code_builder.get_local(to_ptr);
|
||||
self.load_symbols(code_builder, &[from_symbol]);
|
||||
match (value_type, size) {
|
||||
|
@ -666,7 +666,7 @@ impl<'a> Storage<'a> {
|
|||
| StoredValue::Local {
|
||||
value_type, size, ..
|
||||
} => {
|
||||
use crate::wasm_module::Align::*;
|
||||
use roc_wasm_module::Align::*;
|
||||
|
||||
if let AddressValue::NotLoaded(from_ptr) = from_addr {
|
||||
code_builder.get_local(from_ptr);
|
||||
|
|
|
@ -11,11 +11,11 @@ use roc_mono::layout::{Builtin, Layout, UnionLayout};
|
|||
use roc_target::TargetInfo;
|
||||
|
||||
use crate::wasm32_sized::Wasm32Sized;
|
||||
use crate::wasm_module::{
|
||||
use roc_std::{RocDec, RocList, RocOrder, RocResult, RocStr, I128, U128};
|
||||
use roc_wasm_module::{
|
||||
linking::SymInfo, linking::WasmObjectSymbol, Align, CodeBuilder, Export, ExportType, LocalId,
|
||||
Signature, ValueType, WasmModule,
|
||||
};
|
||||
use roc_std::{RocDec, RocList, RocOrder, RocResult, RocStr, I128, U128};
|
||||
|
||||
/// Type-driven wrapper generation
|
||||
pub trait Wasm32Result {
|
||||
|
|
|
@ -1,940 +0,0 @@
|
|||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use core::panic;
|
||||
use roc_error_macros::internal_error;
|
||||
|
||||
use roc_module::symbol::Symbol;
|
||||
|
||||
use super::opcodes::{OpCode, OpCode::*};
|
||||
use super::serialize::{SerialBuffer, Serialize};
|
||||
use crate::{
|
||||
round_up_to_alignment, DEBUG_SETTINGS, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID,
|
||||
};
|
||||
|
||||
macro_rules! log_instruction {
|
||||
($($x: expr),+) => {
|
||||
if DEBUG_SETTINGS.instructions { println!($($x,)*); }
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct LocalId(pub u32);
|
||||
|
||||
/// Wasm value type. (Rust representation matches Wasm encoding)
|
||||
#[repr(u8)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
pub enum ValueType {
|
||||
I32 = 0x7f,
|
||||
I64 = 0x7e,
|
||||
F32 = 0x7d,
|
||||
F64 = 0x7c,
|
||||
}
|
||||
|
||||
impl Serialize for ValueType {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.append_u8(*self as u8);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u8> for ValueType {
|
||||
fn from(x: u8) -> Self {
|
||||
match x {
|
||||
0x7f => Self::I32,
|
||||
0x7e => Self::I64,
|
||||
0x7d => Self::F32,
|
||||
0x7c => Self::F64,
|
||||
_ => internal_error!("Invalid ValueType 0x{:02x}", x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const BLOCK_NO_RESULT: u8 = 0x40;
|
||||
|
||||
/// A control block in our model of the VM
|
||||
/// Child blocks cannot "see" values from their parent block
|
||||
struct VmBlock<'a> {
|
||||
/// opcode indicating what kind of block this is
|
||||
opcode: OpCode,
|
||||
/// the stack of values for this block
|
||||
value_stack: Vec<'a, Symbol>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for VmBlock<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!("{:?} {:?}", self.opcode, self.value_stack))
|
||||
}
|
||||
}
|
||||
|
||||
/// Wasm memory alignment for load/store instructions.
|
||||
/// Rust representation matches Wasm encoding.
|
||||
/// It's an error to specify alignment higher than the "natural" alignment of the instruction
|
||||
#[repr(u8)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
|
||||
pub enum Align {
|
||||
Bytes1 = 0,
|
||||
Bytes2 = 1,
|
||||
Bytes4 = 2,
|
||||
Bytes8 = 3,
|
||||
}
|
||||
|
||||
impl Align {
|
||||
/// Calculate the largest possible alignment for a load/store at a given stack frame offset
|
||||
/// Assumes the stack frame is aligned to at least 8 bytes
|
||||
pub fn from_stack_offset(max_align: Align, offset: u32) -> Align {
|
||||
if (max_align == Align::Bytes8) && (offset & 7 == 0) {
|
||||
return Align::Bytes8;
|
||||
}
|
||||
if (max_align >= Align::Bytes4) && (offset & 3 == 0) {
|
||||
return Align::Bytes4;
|
||||
}
|
||||
if (max_align >= Align::Bytes2) && (offset & 1 == 0) {
|
||||
return Align::Bytes2;
|
||||
}
|
||||
Align::Bytes1
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for Align {
|
||||
fn from(x: u32) -> Align {
|
||||
match x {
|
||||
1 => Align::Bytes1,
|
||||
2 => Align::Bytes2,
|
||||
4 => Align::Bytes4,
|
||||
_ => {
|
||||
if x.count_ones() == 1 {
|
||||
Align::Bytes8 // Max value supported by any Wasm instruction
|
||||
} else {
|
||||
internal_error!("Cannot align to {} bytes", x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum VmSymbolState {
|
||||
/// Value doesn't exist yet
|
||||
NotYetPushed,
|
||||
|
||||
/// Value has been pushed onto the VM stack but not yet popped
|
||||
/// Remember where it was pushed, in case we need to insert another instruction there later
|
||||
Pushed { pushed_at: usize },
|
||||
|
||||
/// Value has been pushed and popped, so it's not on the VM stack any more.
|
||||
/// If we want to use it again later, we will have to create a local for it,
|
||||
/// by going back to insert a local.tee instruction at pushed_at
|
||||
Popped { pushed_at: usize },
|
||||
}
|
||||
|
||||
// An instruction (local.set or local.tee) to be inserted into the function code
|
||||
#[derive(Debug)]
|
||||
struct Insertion {
|
||||
at: usize,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
macro_rules! instruction_no_args {
|
||||
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
|
||||
pub fn $method_name(&mut self) {
|
||||
self.inst($opcode, $pops, $push);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! instruction_memargs {
|
||||
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
|
||||
pub fn $method_name(&mut self, align: Align, offset: u32) {
|
||||
self.inst_mem($opcode, $pops, $push, align, offset);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CodeBuilder<'a> {
|
||||
pub arena: &'a Bump,
|
||||
|
||||
/// The main container for the instructions
|
||||
code: Vec<'a, u8>,
|
||||
|
||||
/// Instruction bytes to be inserted into the code when finalizing the function
|
||||
/// (Used for setting locals when we realise they are used multiple times)
|
||||
insert_bytes: Vec<'a, u8>,
|
||||
|
||||
/// Code locations where the insert_bytes should go
|
||||
insertions: Vec<'a, Insertion>,
|
||||
|
||||
/// Bytes for local variable declarations and stack-frame setup code.
|
||||
/// We can't write this until we've finished the main code. But it goes
|
||||
/// before it in the final output, so we need a separate vector.
|
||||
preamble: Vec<'a, u8>,
|
||||
|
||||
/// Encoded bytes for the inner length of the function, locals + code.
|
||||
/// ("inner" because it doesn't include its own length!)
|
||||
/// Again, we can't write this until we've finished the code and preamble,
|
||||
/// but it goes before them in the binary, so it's a separate vector.
|
||||
inner_length: Vec<'a, u8>,
|
||||
|
||||
/// Our simulation model of the Wasm stack machine
|
||||
/// Nested blocks of instructions. A child block can't "see" the stack of its parent block
|
||||
vm_block_stack: Vec<'a, VmBlock<'a>>,
|
||||
|
||||
/// Relocations for calls to JS imports
|
||||
/// When we remove unused imports, the live ones are re-indexed
|
||||
import_relocations: Vec<'a, (usize, u32)>,
|
||||
}
|
||||
|
||||
impl<'a> Serialize for CodeBuilder<'a> {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
self.serialize_without_relocs(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::new_without_default)]
|
||||
impl<'a> CodeBuilder<'a> {
|
||||
pub fn new(arena: &'a Bump) -> Self {
|
||||
let mut vm_block_stack = Vec::with_capacity_in(8, arena);
|
||||
let function_block = VmBlock {
|
||||
opcode: BLOCK,
|
||||
value_stack: Vec::with_capacity_in(8, arena),
|
||||
};
|
||||
vm_block_stack.push(function_block);
|
||||
|
||||
CodeBuilder {
|
||||
arena,
|
||||
code: Vec::with_capacity_in(1024, arena),
|
||||
insertions: Vec::with_capacity_in(32, arena),
|
||||
insert_bytes: Vec::with_capacity_in(64, arena),
|
||||
preamble: Vec::with_capacity_in(32, arena),
|
||||
inner_length: Vec::with_capacity_in(5, arena),
|
||||
vm_block_stack,
|
||||
import_relocations: Vec::with_capacity_in(0, arena),
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
LINKING
|
||||
|
||||
***********************************************************/
|
||||
|
||||
/// Build a dummy function with just a single `unreachable` instruction
|
||||
pub fn dummy(arena: &'a Bump) -> Self {
|
||||
let mut builder = Self::new(arena);
|
||||
builder.unreachable_();
|
||||
builder.build_fn_header_and_footer(&[], 0, None);
|
||||
builder
|
||||
}
|
||||
|
||||
pub fn apply_import_relocs(&mut self, live_import_fns: &[usize]) {
|
||||
for (code_index, fn_index) in self.import_relocations.iter() {
|
||||
for (new_index, old_index) in live_import_fns.iter().enumerate() {
|
||||
if *fn_index as usize == *old_index {
|
||||
self.code
|
||||
.overwrite_padded_u32(*code_index, new_index as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
SYMBOLS
|
||||
|
||||
The Wasm VM stores temporary values in its stack machine.
|
||||
We track which stack positions correspond to IR Symbols,
|
||||
because it helps to generate more efficient code.
|
||||
|
||||
***********************************************************/
|
||||
|
||||
fn current_stack(&self) -> &Vec<'a, Symbol> {
|
||||
let block = self.vm_block_stack.last().unwrap();
|
||||
&block.value_stack
|
||||
}
|
||||
|
||||
fn current_stack_mut(&mut self) -> &mut Vec<'a, Symbol> {
|
||||
let block = self.vm_block_stack.last_mut().unwrap();
|
||||
&mut block.value_stack
|
||||
}
|
||||
|
||||
/// Set the Symbol that is at the top of the VM stack right now
|
||||
/// We will use this later when we need to load the Symbol
|
||||
pub fn set_top_symbol(&mut self, sym: Symbol) -> VmSymbolState {
|
||||
let current_stack = &mut self.vm_block_stack.last_mut().unwrap().value_stack;
|
||||
let pushed_at = self.code.len();
|
||||
let top_symbol: &mut Symbol = current_stack
|
||||
.last_mut()
|
||||
.unwrap_or_else(|| internal_error!("Empty stack when trying to set Symbol {:?}", sym));
|
||||
*top_symbol = sym;
|
||||
|
||||
VmSymbolState::Pushed { pushed_at }
|
||||
}
|
||||
|
||||
/// Verify if a sequence of symbols is at the top of the stack
|
||||
pub fn verify_stack_match(&self, symbols: &[Symbol]) -> bool {
|
||||
let current_stack = self.current_stack();
|
||||
let n_symbols = symbols.len();
|
||||
let stack_depth = current_stack.len();
|
||||
if n_symbols > stack_depth {
|
||||
return false;
|
||||
}
|
||||
let offset = stack_depth - n_symbols;
|
||||
|
||||
for (i, sym) in symbols.iter().enumerate() {
|
||||
if current_stack[offset + i] != *sym {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn add_insertion(&mut self, insert_at: usize, opcode: OpCode, immediate: u32) {
|
||||
let start = self.insert_bytes.len();
|
||||
|
||||
self.insert_bytes.push(opcode as u8);
|
||||
self.insert_bytes.encode_u32(immediate);
|
||||
|
||||
self.insertions.push(Insertion {
|
||||
at: insert_at,
|
||||
start,
|
||||
end: self.insert_bytes.len(),
|
||||
});
|
||||
|
||||
log_instruction!(
|
||||
"**insert {:?} {} at byte offset {}**",
|
||||
opcode,
|
||||
immediate,
|
||||
insert_at
|
||||
);
|
||||
}
|
||||
|
||||
/// Load a Symbol that is stored in the VM stack
|
||||
/// If it's already at the top of the stack, no code will be generated.
|
||||
/// Otherwise, local.set and local.get instructions will be inserted, using the LocalId provided.
|
||||
///
|
||||
/// If the return value is `Some(s)`, `s` should be stored by the caller, and provided in the next call.
|
||||
/// If the return value is `None`, the Symbol is no longer stored in the VM stack, but in a local.
|
||||
/// (In this case, the caller must remember to declare the local in the function header.)
|
||||
pub fn load_symbol(
|
||||
&mut self,
|
||||
symbol: Symbol,
|
||||
vm_state: VmSymbolState,
|
||||
next_local_id: LocalId,
|
||||
) -> Option<VmSymbolState> {
|
||||
use VmSymbolState::*;
|
||||
|
||||
match vm_state {
|
||||
NotYetPushed => {
|
||||
internal_error!("Symbol {:?} has no value yet. Nothing to load.", symbol)
|
||||
}
|
||||
|
||||
Pushed { pushed_at } => {
|
||||
match self.current_stack().last() {
|
||||
Some(top_symbol) if *top_symbol == symbol => {
|
||||
// We're lucky, the symbol is already on top of the current block's stack.
|
||||
// No code to generate! (This reduces code size by up to 25% in tests.)
|
||||
// Just let the caller know what happened
|
||||
Some(Popped { pushed_at })
|
||||
}
|
||||
_ => {
|
||||
// Symbol is not on top of the stack.
|
||||
// We should have saved it to a local, so go back and do that now.
|
||||
self.store_pushed_symbol_to_local(
|
||||
symbol,
|
||||
vm_state,
|
||||
pushed_at,
|
||||
next_local_id,
|
||||
);
|
||||
|
||||
// Recover the value again at the current position
|
||||
self.get_local(next_local_id);
|
||||
self.set_top_symbol(symbol);
|
||||
|
||||
// This Symbol is no longer stored in the VM stack, but in a local
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Popped { pushed_at } => {
|
||||
// This Symbol is being used for a second time
|
||||
// Insert a local.tee where it was pushed, so we don't interfere with the first usage
|
||||
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
|
||||
|
||||
// Insert a local.get at the current position
|
||||
self.get_local(next_local_id);
|
||||
self.set_top_symbol(symbol);
|
||||
|
||||
// This symbol has been promoted to a Local
|
||||
// Tell the caller it no longer has a VirtualMachineSymbolState
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Go back and store a Symbol in a local variable, without loading it at the current position
|
||||
pub fn store_symbol_to_local(
|
||||
&mut self,
|
||||
symbol: Symbol,
|
||||
vm_state: VmSymbolState,
|
||||
next_local_id: LocalId,
|
||||
) {
|
||||
use VmSymbolState::*;
|
||||
|
||||
match vm_state {
|
||||
NotYetPushed => {
|
||||
// Nothing to do
|
||||
}
|
||||
Pushed { pushed_at } => {
|
||||
self.store_pushed_symbol_to_local(symbol, vm_state, pushed_at, next_local_id)
|
||||
}
|
||||
Popped { pushed_at } => {
|
||||
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn store_pushed_symbol_to_local(
|
||||
&mut self,
|
||||
symbol: Symbol,
|
||||
vm_state: VmSymbolState,
|
||||
pushed_at: usize,
|
||||
local_id: LocalId,
|
||||
) {
|
||||
debug_assert!(matches!(vm_state, VmSymbolState::Pushed { .. }));
|
||||
|
||||
// Update our stack model at the position where we're going to set the SETLOCAL
|
||||
let mut found = false;
|
||||
for block in self.vm_block_stack.iter_mut() {
|
||||
if let Some(found_index) = block.value_stack.iter().position(|&s| s == symbol) {
|
||||
block.value_stack.remove(found_index);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Go back to the code position where it was pushed, and save it to a local
|
||||
if found {
|
||||
self.add_insertion(pushed_at, SETLOCAL, local_id.0);
|
||||
} else {
|
||||
if DEBUG_SETTINGS.instructions {
|
||||
println!(
|
||||
"{:?} has been popped implicitly. Leaving it on the stack.",
|
||||
symbol
|
||||
);
|
||||
}
|
||||
self.add_insertion(pushed_at, TEELOCAL, local_id.0);
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
FUNCTION HEADER
|
||||
|
||||
***********************************************************/
|
||||
|
||||
/// Generate bytes to declare the function's local variables
|
||||
fn build_local_declarations(&mut self, local_types: &[ValueType]) {
|
||||
// reserve one byte for num_batches
|
||||
self.preamble.push(0);
|
||||
|
||||
if local_types.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Write declarations in batches of the same ValueType
|
||||
let mut num_batches: u32 = 0;
|
||||
let mut batch_type = local_types[0];
|
||||
let mut batch_size = 0;
|
||||
for t in local_types {
|
||||
if *t == batch_type {
|
||||
batch_size += 1;
|
||||
} else {
|
||||
self.preamble.encode_u32(batch_size);
|
||||
self.preamble.push(batch_type as u8);
|
||||
batch_type = *t;
|
||||
batch_size = 1;
|
||||
num_batches += 1;
|
||||
}
|
||||
}
|
||||
self.preamble.encode_u32(batch_size);
|
||||
self.preamble.push(batch_type as u8);
|
||||
num_batches += 1;
|
||||
|
||||
// Go back and write the number of batches at the start
|
||||
if num_batches < 128 {
|
||||
self.preamble[0] = num_batches as u8;
|
||||
} else {
|
||||
// We need more than 1 byte to encode num_batches!
|
||||
// This is a ridiculous edge case, so just pad to 5 bytes for simplicity
|
||||
let old_len = self.preamble.len();
|
||||
self.preamble.resize(old_len + 4, 0);
|
||||
self.preamble.copy_within(1..old_len, 5);
|
||||
self.preamble.overwrite_padded_u32(0, num_batches);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate instruction bytes to grab a frame of stack memory on entering the function
|
||||
fn build_stack_frame_push(&mut self, frame_size: i32, frame_pointer: LocalId) {
|
||||
// Can't use the usual instruction methods because they push to self.code.
|
||||
// This is the only case where we push instructions somewhere different.
|
||||
self.preamble.push(GETGLOBAL as u8);
|
||||
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
|
||||
self.preamble.push(I32CONST as u8);
|
||||
self.preamble.encode_i32(frame_size);
|
||||
self.preamble.push(I32SUB as u8);
|
||||
self.preamble.push(TEELOCAL as u8);
|
||||
self.preamble.encode_u32(frame_pointer.0);
|
||||
self.preamble.push(SETGLOBAL as u8);
|
||||
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
|
||||
}
|
||||
|
||||
/// Generate instruction bytes to release a frame of stack memory on leaving the function
|
||||
fn build_stack_frame_pop(&mut self, frame_size: i32, frame_pointer: LocalId) {
|
||||
self.get_local(frame_pointer);
|
||||
self.i32_const(frame_size);
|
||||
self.i32_add();
|
||||
self.set_global(STACK_POINTER_GLOBAL_ID);
|
||||
}
|
||||
|
||||
/// Build the function header: local declarations, stack frame push/pop code, and function length
|
||||
/// After this, all bytes have been generated (but not yet serialized) and we know the final size.
|
||||
pub fn build_fn_header_and_footer(
|
||||
&mut self,
|
||||
local_types: &[ValueType],
|
||||
frame_size: i32,
|
||||
frame_pointer: Option<LocalId>,
|
||||
) {
|
||||
self.build_local_declarations(local_types);
|
||||
|
||||
if frame_size != 0 {
|
||||
if let Some(frame_ptr_id) = frame_pointer {
|
||||
let aligned_size = round_up_to_alignment!(frame_size, FRAME_ALIGNMENT_BYTES);
|
||||
self.build_stack_frame_push(aligned_size, frame_ptr_id);
|
||||
self.build_stack_frame_pop(aligned_size, frame_ptr_id); // footer
|
||||
}
|
||||
}
|
||||
|
||||
self.code.push(END as u8);
|
||||
|
||||
let inner_len = self.preamble.len() + self.code.len() + self.insert_bytes.len();
|
||||
self.inner_length.encode_u32(inner_len as u32);
|
||||
|
||||
// Sort insertions. They are not created in order of assignment, but in order of *second* usage.
|
||||
self.insertions.sort_by_key(|ins| ins.at);
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
SERIALIZE
|
||||
|
||||
***********************************************************/
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
self.inner_length.len() + self.preamble.len() + self.code.len() + self.insert_bytes.len()
|
||||
}
|
||||
|
||||
/// Serialize all byte vectors in the right order
|
||||
/// Also update relocation offsets relative to the base offset (code section body start)
|
||||
pub fn serialize_without_relocs<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.append_slice(&self.inner_length);
|
||||
buffer.append_slice(&self.preamble);
|
||||
|
||||
let mut code_pos = 0;
|
||||
for Insertion { at, start, end } in self.insertions.iter() {
|
||||
buffer.append_slice(&self.code[code_pos..(*at)]);
|
||||
buffer.append_slice(&self.insert_bytes[*start..*end]);
|
||||
code_pos = *at;
|
||||
}
|
||||
|
||||
buffer.append_slice(&self.code[code_pos..self.code.len()]);
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
INSTRUCTION HELPER METHODS
|
||||
|
||||
***********************************************************/
|
||||
|
||||
/// Base method for generating instructions
|
||||
/// Emits the opcode and simulates VM stack push/pop
|
||||
fn inst_base(&mut self, opcode: OpCode, pops: usize, push: bool) {
|
||||
let current_stack = self.current_stack_mut();
|
||||
let stack_size = current_stack.len();
|
||||
|
||||
debug_assert!(
|
||||
stack_size >= pops,
|
||||
"Wasm value stack underflow. Tried to pop {} but only {} available",
|
||||
pops,
|
||||
stack_size
|
||||
);
|
||||
|
||||
let new_len = stack_size - pops as usize;
|
||||
current_stack.truncate(new_len);
|
||||
if push {
|
||||
current_stack.push(Symbol::WASM_TMP);
|
||||
}
|
||||
self.code.push(opcode as u8);
|
||||
}
|
||||
|
||||
/// Plain instruction without any immediates
|
||||
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
|
||||
self.inst_base(opcode, pops, push);
|
||||
log_instruction!(
|
||||
"{:10}\t\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
self.vm_block_stack
|
||||
);
|
||||
}
|
||||
|
||||
/// Block instruction
|
||||
fn inst_block(&mut self, opcode: OpCode, pops: usize) {
|
||||
self.inst_base(opcode, pops, false);
|
||||
|
||||
// We don't support block result types. Too hard to track types through arbitrary control flow.
|
||||
self.code.push(BLOCK_NO_RESULT);
|
||||
|
||||
// Start a new block with a fresh value stack
|
||||
self.vm_block_stack.push(VmBlock {
|
||||
opcode,
|
||||
value_stack: Vec::with_capacity_in(8, self.arena),
|
||||
});
|
||||
|
||||
log_instruction!("{:10}\t{:?}", format!("{:?}", opcode), &self.vm_block_stack);
|
||||
}
|
||||
|
||||
fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
|
||||
self.inst_base(opcode, pops, push);
|
||||
self.code.encode_u32(immediate);
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
immediate,
|
||||
self.vm_block_stack
|
||||
);
|
||||
}
|
||||
|
||||
fn inst_mem(&mut self, opcode: OpCode, pops: usize, push: bool, align: Align, offset: u32) {
|
||||
self.inst_base(opcode, pops, push);
|
||||
self.code.push(align as u8);
|
||||
self.code.encode_u32(offset);
|
||||
log_instruction!(
|
||||
"{:10} {:?} {}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
align,
|
||||
offset,
|
||||
self.vm_block_stack
|
||||
);
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
INSTRUCTION METHODS
|
||||
|
||||
One method for each Wasm instruction (in same order as the spec)
|
||||
macros are for compactness & readability for the most common cases
|
||||
Patterns that don't repeat very much don't have macros
|
||||
|
||||
***********************************************************/
|
||||
|
||||
instruction_no_args!(unreachable_, UNREACHABLE, 0, false);
|
||||
instruction_no_args!(nop, NOP, 0, false);
|
||||
|
||||
pub fn block(&mut self) {
|
||||
self.inst_block(BLOCK, 0);
|
||||
}
|
||||
pub fn loop_(&mut self) {
|
||||
self.inst_block(LOOP, 0);
|
||||
}
|
||||
pub fn if_(&mut self) {
|
||||
self.inst_block(IF, 1);
|
||||
}
|
||||
pub fn else_(&mut self) {
|
||||
// Reuse the 'then' block but clear its value stack
|
||||
self.current_stack_mut().clear();
|
||||
self.inst(ELSE, 0, false);
|
||||
}
|
||||
|
||||
pub fn end(&mut self) {
|
||||
// We need to drop any unused values from the VM stack in order to pass Wasm validation.
|
||||
// This happens, for example, in test `gen_tags::if_guard_exhaustiveness`
|
||||
let n_unused = self
|
||||
.vm_block_stack
|
||||
.last()
|
||||
.map(|block| block.value_stack.len())
|
||||
.unwrap_or(0);
|
||||
|
||||
for _ in 0..n_unused {
|
||||
self.drop_();
|
||||
}
|
||||
|
||||
self.inst_base(END, 0, false);
|
||||
self.vm_block_stack.pop();
|
||||
|
||||
log_instruction!("END \t\t{:?}", &self.vm_block_stack);
|
||||
}
|
||||
pub fn br(&mut self, levels: u32) {
|
||||
self.inst_imm32(BR, 0, false, levels);
|
||||
}
|
||||
pub fn br_if(&mut self, levels: u32) {
|
||||
// In dynamic execution, br_if can pop 2 values if condition is true and the target block has a result.
|
||||
// But our stack model is for *static* analysis and we need it to be correct at the next instruction,
|
||||
// where the branch was not taken. So we only pop 1 value, the condition.
|
||||
self.inst_imm32(BRIF, 1, false, levels);
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
fn br_table() {
|
||||
todo!("br instruction");
|
||||
}
|
||||
|
||||
instruction_no_args!(return_, RETURN, 0, false);
|
||||
|
||||
pub fn call(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
|
||||
self.call_impl(function_index, n_args, has_return_val, false)
|
||||
}
|
||||
|
||||
pub fn call_import(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
|
||||
self.call_impl(function_index, n_args, has_return_val, true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn call_impl(
|
||||
&mut self,
|
||||
function_index: u32,
|
||||
n_args: usize,
|
||||
has_return_val: bool,
|
||||
is_import: bool,
|
||||
) {
|
||||
self.inst_base(CALL, n_args, has_return_val);
|
||||
|
||||
if is_import {
|
||||
self.import_relocations
|
||||
.push((self.code.len(), function_index));
|
||||
}
|
||||
|
||||
self.code.encode_padded_u32(function_index);
|
||||
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", CALL),
|
||||
function_index,
|
||||
self.vm_block_stack
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn call_indirect() {
|
||||
unimplemented!(
|
||||
"There is no plan to implement call_indirect. Roc doesn't use function pointers"
|
||||
);
|
||||
}
|
||||
|
||||
instruction_no_args!(drop_, DROP, 1, false);
|
||||
instruction_no_args!(select, SELECT, 3, true);
|
||||
|
||||
pub fn get_local(&mut self, id: LocalId) {
|
||||
self.inst_imm32(GETLOCAL, 0, true, id.0);
|
||||
}
|
||||
pub fn set_local(&mut self, id: LocalId) {
|
||||
self.inst_imm32(SETLOCAL, 1, false, id.0);
|
||||
}
|
||||
pub fn tee_local(&mut self, id: LocalId) {
|
||||
self.inst_imm32(TEELOCAL, 0, false, id.0);
|
||||
}
|
||||
pub fn get_global(&mut self, id: u32) {
|
||||
self.inst_imm32(GETGLOBAL, 0, true, id);
|
||||
}
|
||||
pub fn set_global(&mut self, id: u32) {
|
||||
self.inst_imm32(SETGLOBAL, 1, false, id);
|
||||
}
|
||||
|
||||
instruction_memargs!(i32_load, I32LOAD, 1, true);
|
||||
instruction_memargs!(i64_load, I64LOAD, 1, true);
|
||||
instruction_memargs!(f32_load, F32LOAD, 1, true);
|
||||
instruction_memargs!(f64_load, F64LOAD, 1, true);
|
||||
instruction_memargs!(i32_load8_s, I32LOAD8S, 1, true);
|
||||
instruction_memargs!(i32_load8_u, I32LOAD8U, 1, true);
|
||||
instruction_memargs!(i32_load16_s, I32LOAD16S, 1, true);
|
||||
instruction_memargs!(i32_load16_u, I32LOAD16U, 1, true);
|
||||
instruction_memargs!(i64_load8_s, I64LOAD8S, 1, true);
|
||||
instruction_memargs!(i64_load8_u, I64LOAD8U, 1, true);
|
||||
instruction_memargs!(i64_load16_s, I64LOAD16S, 1, true);
|
||||
instruction_memargs!(i64_load16_u, I64LOAD16U, 1, true);
|
||||
instruction_memargs!(i64_load32_s, I64LOAD32S, 1, true);
|
||||
instruction_memargs!(i64_load32_u, I64LOAD32U, 1, true);
|
||||
instruction_memargs!(i32_store, I32STORE, 2, false);
|
||||
instruction_memargs!(i64_store, I64STORE, 2, false);
|
||||
instruction_memargs!(f32_store, F32STORE, 2, false);
|
||||
instruction_memargs!(f64_store, F64STORE, 2, false);
|
||||
instruction_memargs!(i32_store8, I32STORE8, 2, false);
|
||||
instruction_memargs!(i32_store16, I32STORE16, 2, false);
|
||||
instruction_memargs!(i64_store8, I64STORE8, 2, false);
|
||||
instruction_memargs!(i64_store16, I64STORE16, 2, false);
|
||||
instruction_memargs!(i64_store32, I64STORE32, 2, false);
|
||||
|
||||
pub fn memory_size(&mut self) {
|
||||
self.inst(CURRENTMEMORY, 0, true);
|
||||
self.code.push(0);
|
||||
}
|
||||
pub fn memory_grow(&mut self) {
|
||||
self.inst(GROWMEMORY, 1, true);
|
||||
self.code.push(0);
|
||||
}
|
||||
|
||||
fn log_const<T>(&self, opcode: OpCode, x: T)
|
||||
where
|
||||
T: std::fmt::Debug + std::fmt::Display,
|
||||
{
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
x,
|
||||
self.vm_block_stack
|
||||
);
|
||||
}
|
||||
pub fn i32_const(&mut self, x: i32) {
|
||||
self.inst_base(I32CONST, 0, true);
|
||||
self.code.encode_i32(x);
|
||||
self.log_const(I32CONST, x);
|
||||
}
|
||||
pub fn i64_const(&mut self, x: i64) {
|
||||
self.inst_base(I64CONST, 0, true);
|
||||
self.code.encode_i64(x);
|
||||
self.log_const(I64CONST, x);
|
||||
}
|
||||
pub fn f32_const(&mut self, x: f32) {
|
||||
self.inst_base(F32CONST, 0, true);
|
||||
self.code.encode_f32(x);
|
||||
self.log_const(F32CONST, x);
|
||||
}
|
||||
pub fn f64_const(&mut self, x: f64) {
|
||||
self.inst_base(F64CONST, 0, true);
|
||||
self.code.encode_f64(x);
|
||||
self.log_const(F64CONST, x);
|
||||
}
|
||||
|
||||
// TODO: Consider creating unified methods for numerical ops like 'eq' and 'add',
|
||||
// passing the ValueType as an argument. Could simplify lowlevel code gen.
|
||||
instruction_no_args!(i32_eqz, I32EQZ, 1, true);
|
||||
instruction_no_args!(i32_eq, I32EQ, 2, true);
|
||||
instruction_no_args!(i32_ne, I32NE, 2, true);
|
||||
instruction_no_args!(i32_lt_s, I32LTS, 2, true);
|
||||
instruction_no_args!(i32_lt_u, I32LTU, 2, true);
|
||||
instruction_no_args!(i32_gt_s, I32GTS, 2, true);
|
||||
instruction_no_args!(i32_gt_u, I32GTU, 2, true);
|
||||
instruction_no_args!(i32_le_s, I32LES, 2, true);
|
||||
instruction_no_args!(i32_le_u, I32LEU, 2, true);
|
||||
instruction_no_args!(i32_ge_s, I32GES, 2, true);
|
||||
instruction_no_args!(i32_ge_u, I32GEU, 2, true);
|
||||
instruction_no_args!(i64_eqz, I64EQZ, 1, true);
|
||||
instruction_no_args!(i64_eq, I64EQ, 2, true);
|
||||
instruction_no_args!(i64_ne, I64NE, 2, true);
|
||||
instruction_no_args!(i64_lt_s, I64LTS, 2, true);
|
||||
instruction_no_args!(i64_lt_u, I64LTU, 2, true);
|
||||
instruction_no_args!(i64_gt_s, I64GTS, 2, true);
|
||||
instruction_no_args!(i64_gt_u, I64GTU, 2, true);
|
||||
instruction_no_args!(i64_le_s, I64LES, 2, true);
|
||||
instruction_no_args!(i64_le_u, I64LEU, 2, true);
|
||||
instruction_no_args!(i64_ge_s, I64GES, 2, true);
|
||||
instruction_no_args!(i64_ge_u, I64GEU, 2, true);
|
||||
instruction_no_args!(f32_eq, F32EQ, 2, true);
|
||||
instruction_no_args!(f32_ne, F32NE, 2, true);
|
||||
instruction_no_args!(f32_lt, F32LT, 2, true);
|
||||
instruction_no_args!(f32_gt, F32GT, 2, true);
|
||||
instruction_no_args!(f32_le, F32LE, 2, true);
|
||||
instruction_no_args!(f32_ge, F32GE, 2, true);
|
||||
instruction_no_args!(f64_eq, F64EQ, 2, true);
|
||||
instruction_no_args!(f64_ne, F64NE, 2, true);
|
||||
instruction_no_args!(f64_lt, F64LT, 2, true);
|
||||
instruction_no_args!(f64_gt, F64GT, 2, true);
|
||||
instruction_no_args!(f64_le, F64LE, 2, true);
|
||||
instruction_no_args!(f64_ge, F64GE, 2, true);
|
||||
instruction_no_args!(i32_clz, I32CLZ, 1, true);
|
||||
instruction_no_args!(i32_ctz, I32CTZ, 1, true);
|
||||
instruction_no_args!(i32_popcnt, I32POPCNT, 1, true);
|
||||
instruction_no_args!(i32_add, I32ADD, 2, true);
|
||||
instruction_no_args!(i32_sub, I32SUB, 2, true);
|
||||
instruction_no_args!(i32_mul, I32MUL, 2, true);
|
||||
instruction_no_args!(i32_div_s, I32DIVS, 2, true);
|
||||
instruction_no_args!(i32_div_u, I32DIVU, 2, true);
|
||||
instruction_no_args!(i32_rem_s, I32REMS, 2, true);
|
||||
instruction_no_args!(i32_rem_u, I32REMU, 2, true);
|
||||
instruction_no_args!(i32_and, I32AND, 2, true);
|
||||
instruction_no_args!(i32_or, I32OR, 2, true);
|
||||
instruction_no_args!(i32_xor, I32XOR, 2, true);
|
||||
instruction_no_args!(i32_shl, I32SHL, 2, true);
|
||||
instruction_no_args!(i32_shr_s, I32SHRS, 2, true);
|
||||
instruction_no_args!(i32_shr_u, I32SHRU, 2, true);
|
||||
instruction_no_args!(i32_rotl, I32ROTL, 2, true);
|
||||
instruction_no_args!(i32_rotr, I32ROTR, 2, true);
|
||||
instruction_no_args!(i64_clz, I64CLZ, 1, true);
|
||||
instruction_no_args!(i64_ctz, I64CTZ, 1, true);
|
||||
instruction_no_args!(i64_popcnt, I64POPCNT, 1, true);
|
||||
instruction_no_args!(i64_add, I64ADD, 2, true);
|
||||
instruction_no_args!(i64_sub, I64SUB, 2, true);
|
||||
instruction_no_args!(i64_mul, I64MUL, 2, true);
|
||||
instruction_no_args!(i64_div_s, I64DIVS, 2, true);
|
||||
instruction_no_args!(i64_div_u, I64DIVU, 2, true);
|
||||
instruction_no_args!(i64_rem_s, I64REMS, 2, true);
|
||||
instruction_no_args!(i64_rem_u, I64REMU, 2, true);
|
||||
instruction_no_args!(i64_and, I64AND, 2, true);
|
||||
instruction_no_args!(i64_or, I64OR, 2, true);
|
||||
instruction_no_args!(i64_xor, I64XOR, 2, true);
|
||||
instruction_no_args!(i64_shl, I64SHL, 2, true);
|
||||
instruction_no_args!(i64_shr_s, I64SHRS, 2, true);
|
||||
instruction_no_args!(i64_shr_u, I64SHRU, 2, true);
|
||||
instruction_no_args!(i64_rotl, I64ROTL, 2, true);
|
||||
instruction_no_args!(i64_rotr, I64ROTR, 2, true);
|
||||
instruction_no_args!(f32_abs, F32ABS, 1, true);
|
||||
instruction_no_args!(f32_neg, F32NEG, 1, true);
|
||||
instruction_no_args!(f32_ceil, F32CEIL, 1, true);
|
||||
instruction_no_args!(f32_floor, F32FLOOR, 1, true);
|
||||
instruction_no_args!(f32_trunc, F32TRUNC, 1, true);
|
||||
instruction_no_args!(f32_nearest, F32NEAREST, 1, true);
|
||||
instruction_no_args!(f32_sqrt, F32SQRT, 1, true);
|
||||
instruction_no_args!(f32_add, F32ADD, 2, true);
|
||||
instruction_no_args!(f32_sub, F32SUB, 2, true);
|
||||
instruction_no_args!(f32_mul, F32MUL, 2, true);
|
||||
instruction_no_args!(f32_div, F32DIV, 2, true);
|
||||
instruction_no_args!(f32_min, F32MIN, 2, true);
|
||||
instruction_no_args!(f32_max, F32MAX, 2, true);
|
||||
instruction_no_args!(f32_copysign, F32COPYSIGN, 2, true);
|
||||
instruction_no_args!(f64_abs, F64ABS, 1, true);
|
||||
instruction_no_args!(f64_neg, F64NEG, 1, true);
|
||||
instruction_no_args!(f64_ceil, F64CEIL, 1, true);
|
||||
instruction_no_args!(f64_floor, F64FLOOR, 1, true);
|
||||
instruction_no_args!(f64_trunc, F64TRUNC, 1, true);
|
||||
instruction_no_args!(f64_nearest, F64NEAREST, 1, true);
|
||||
instruction_no_args!(f64_sqrt, F64SQRT, 1, true);
|
||||
instruction_no_args!(f64_add, F64ADD, 2, true);
|
||||
instruction_no_args!(f64_sub, F64SUB, 2, true);
|
||||
instruction_no_args!(f64_mul, F64MUL, 2, true);
|
||||
instruction_no_args!(f64_div, F64DIV, 2, true);
|
||||
instruction_no_args!(f64_min, F64MIN, 2, true);
|
||||
instruction_no_args!(f64_max, F64MAX, 2, true);
|
||||
instruction_no_args!(f64_copysign, F64COPYSIGN, 2, true);
|
||||
instruction_no_args!(i32_wrap_i64, I32WRAPI64, 1, true);
|
||||
instruction_no_args!(i32_trunc_s_f32, I32TRUNCSF32, 1, true);
|
||||
instruction_no_args!(i32_trunc_u_f32, I32TRUNCUF32, 1, true);
|
||||
instruction_no_args!(i32_trunc_s_f64, I32TRUNCSF64, 1, true);
|
||||
instruction_no_args!(i32_trunc_u_f64, I32TRUNCUF64, 1, true);
|
||||
instruction_no_args!(i64_extend_s_i32, I64EXTENDSI32, 1, true);
|
||||
instruction_no_args!(i64_extend_u_i32, I64EXTENDUI32, 1, true);
|
||||
instruction_no_args!(i64_trunc_s_f32, I64TRUNCSF32, 1, true);
|
||||
instruction_no_args!(i64_trunc_u_f32, I64TRUNCUF32, 1, true);
|
||||
instruction_no_args!(i64_trunc_s_f64, I64TRUNCSF64, 1, true);
|
||||
instruction_no_args!(i64_trunc_u_f64, I64TRUNCUF64, 1, true);
|
||||
instruction_no_args!(f32_convert_s_i32, F32CONVERTSI32, 1, true);
|
||||
instruction_no_args!(f32_convert_u_i32, F32CONVERTUI32, 1, true);
|
||||
instruction_no_args!(f32_convert_s_i64, F32CONVERTSI64, 1, true);
|
||||
instruction_no_args!(f32_convert_u_i64, F32CONVERTUI64, 1, true);
|
||||
instruction_no_args!(f32_demote_f64, F32DEMOTEF64, 1, true);
|
||||
instruction_no_args!(f64_convert_s_i32, F64CONVERTSI32, 1, true);
|
||||
instruction_no_args!(f64_convert_u_i32, F64CONVERTUI32, 1, true);
|
||||
instruction_no_args!(f64_convert_s_i64, F64CONVERTSI64, 1, true);
|
||||
instruction_no_args!(f64_convert_u_i64, F64CONVERTUI64, 1, true);
|
||||
instruction_no_args!(f64_promote_f32, F64PROMOTEF32, 1, true);
|
||||
instruction_no_args!(i32_reinterpret_f32, I32REINTERPRETF32, 1, true);
|
||||
instruction_no_args!(i64_reinterpret_f64, I64REINTERPRETF64, 1, true);
|
||||
instruction_no_args!(f32_reinterpret_i32, F32REINTERPRETI32, 1, true);
|
||||
instruction_no_args!(f64_reinterpret_i64, F64REINTERPRETI64, 1, true);
|
||||
}
|
|
@ -1,672 +0,0 @@
|
|||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
||||
use super::parse::parse_fixed_size_items;
|
||||
use super::sections::SectionId;
|
||||
use super::serialize::{overwrite_padded_i32, overwrite_padded_u32};
|
||||
use crate::wasm_module::parse::{Parse, ParseError, SkipBytes};
|
||||
|
||||
/*******************************************************************
|
||||
*
|
||||
* Relocation sections
|
||||
*
|
||||
* https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections
|
||||
*
|
||||
*******************************************************************/
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
pub enum IndexRelocType {
|
||||
/// a function index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `call` instruction.
|
||||
FunctionIndexLeb = 0,
|
||||
/// a function table index encoded as a 5-byte [varint32].
|
||||
/// Used to refer to the immediate argument of a `i32.const` instruction, e.g. taking the address of a function.
|
||||
TableIndexSleb = 1,
|
||||
/// a function table index encoded as a [uint32], e.g. taking the address of a function in a static data initializer.
|
||||
TableIndexI32 = 2,
|
||||
/// a type index encoded as a 5-byte [varuint32], e.g. the type immediate in a `call_indirect`.
|
||||
TypeIndexLeb = 6,
|
||||
/// a global index encoded as a 5-byte [varuint32], e.g. the index immediate in a `get_global`.
|
||||
GlobalIndexLeb = 7,
|
||||
/// an event index encoded as a 5-byte [varuint32]. Used for the immediate argument of a `throw` and `if_except` instruction.
|
||||
EventIndexLeb = 10,
|
||||
/// a global index encoded as [uint32].
|
||||
GlobalIndexI32 = 13,
|
||||
/// the 64-bit counterpart of `R_WASM_TABLE_INDEX_SLEB`. A function table index encoded as a 10-byte [varint64].
|
||||
/// Used to refer to the immediate argument of a `i64.const` instruction, e.g. taking the address of a function in Wasm64.
|
||||
TableIndexSleb64 = 18,
|
||||
/// the 64-bit counterpart of `R_WASM_TABLE_INDEX_I32`.
|
||||
/// A function table index encoded as a [uint64], e.g. taking the address of a function in a static data initializer.
|
||||
TableIndexI64 = 19,
|
||||
/// a table number encoded as a 5-byte [varuint32]. Used for the table immediate argument in the table.* instructions.
|
||||
TableNumberLeb = 20,
|
||||
}
|
||||
|
||||
impl IndexRelocType {
|
||||
fn from_u8(x: u8) -> Option<IndexRelocType> {
|
||||
match x {
|
||||
0 => Some(Self::FunctionIndexLeb),
|
||||
1 => Some(Self::TableIndexSleb),
|
||||
2 => Some(Self::TableIndexI32),
|
||||
6 => Some(Self::TypeIndexLeb),
|
||||
7 => Some(Self::GlobalIndexLeb),
|
||||
10 => Some(Self::EventIndexLeb),
|
||||
13 => Some(Self::GlobalIndexI32),
|
||||
18 => Some(Self::TableIndexSleb64),
|
||||
19 => Some(Self::TableIndexI64),
|
||||
20 => Some(Self::TableNumberLeb),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
pub enum OffsetRelocType {
|
||||
/// a linear memory index encoded as a 5-byte [varuint32].
|
||||
/// Used for the immediate argument of a `load` or `store` instruction, e.g. directly loading from or storing to a C++ global.
|
||||
MemoryAddrLeb = 3,
|
||||
/// a linear memory index encoded as a 5-byte [varint32].
|
||||
/// Used for the immediate argument of a `i32.const` instruction, e.g. taking the address of a C++ global.
|
||||
MemoryAddrSleb = 4,
|
||||
/// a linear memory index encoded as a [uint32], e.g. taking the address of a C++ global in a static data initializer.
|
||||
MemoryAddrI32 = 5,
|
||||
/// a byte offset within code section for the specific function encoded as a [uint32].
|
||||
/// The offsets start at the actual function code excluding its size field.
|
||||
FunctionOffsetI32 = 8,
|
||||
/// a byte offset from start of the specified section encoded as a [uint32].
|
||||
SectionOffsetI32 = 9,
|
||||
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_LEB`. A 64-bit linear memory index encoded as a 10-byte [varuint64],
|
||||
/// Used for the immediate argument of a `load` or `store` instruction on a 64-bit linear memory array.
|
||||
MemoryAddrLeb64 = 14,
|
||||
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR_SLEB`. A 64-bit linear memory index encoded as a 10-byte [varint64].
|
||||
/// Used for the immediate argument of a `i64.const` instruction.
|
||||
MemoryAddrSleb64 = 15,
|
||||
/// the 64-bit counterpart of `R_WASM_MEMORY_ADDR`. A 64-bit linear memory index encoded as a [uint64],
|
||||
/// e.g. taking the 64-bit address of a C++ global in a static data initializer.
|
||||
MemoryAddrI64 = 16,
|
||||
}
|
||||
|
||||
impl OffsetRelocType {
|
||||
fn from_u8(x: u8) -> Option<OffsetRelocType> {
|
||||
match x {
|
||||
3 => Some(Self::MemoryAddrLeb),
|
||||
4 => Some(Self::MemoryAddrSleb),
|
||||
5 => Some(Self::MemoryAddrI32),
|
||||
8 => Some(Self::FunctionOffsetI32),
|
||||
9 => Some(Self::SectionOffsetI32),
|
||||
14 => Some(Self::MemoryAddrLeb64),
|
||||
15 => Some(Self::MemoryAddrSleb64),
|
||||
16 => Some(Self::MemoryAddrI64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RelocationEntry {
|
||||
Index {
|
||||
type_id: IndexRelocType,
|
||||
offset: u32, // offset 0 means the next byte after section id and size
|
||||
symbol_index: u32, // index in symbol table
|
||||
},
|
||||
Offset {
|
||||
type_id: OffsetRelocType,
|
||||
offset: u32, // offset 0 means the next byte after section id and size
|
||||
symbol_index: u32, // index in symbol table
|
||||
addend: i32, // addend to add to the address
|
||||
},
|
||||
}
|
||||
|
||||
impl Parse<()> for RelocationEntry {
|
||||
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let type_id_byte = bytes[*cursor];
|
||||
*cursor += 1;
|
||||
let offset = u32::parse((), bytes, cursor)?;
|
||||
let symbol_index = u32::parse((), bytes, cursor)?;
|
||||
|
||||
if let Some(type_id) = IndexRelocType::from_u8(type_id_byte) {
|
||||
return Ok(RelocationEntry::Index {
|
||||
type_id,
|
||||
offset,
|
||||
symbol_index,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(type_id) = OffsetRelocType::from_u8(type_id_byte) {
|
||||
let addend = i32::parse((), bytes, cursor)?;
|
||||
return Ok(RelocationEntry::Offset {
|
||||
type_id,
|
||||
offset,
|
||||
symbol_index,
|
||||
addend,
|
||||
});
|
||||
}
|
||||
|
||||
Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!("Unknown relocation type 0x{:2x}", type_id_byte),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RelocationSection<'a> {
|
||||
pub name: &'a str,
|
||||
/// The *index* (not ID!) of the target section in the module
|
||||
pub target_section_index: u32,
|
||||
pub entries: Vec<'a, RelocationEntry>,
|
||||
}
|
||||
|
||||
impl<'a> RelocationSection<'a> {
|
||||
fn new(arena: &'a Bump, name: &'a str) -> Self {
|
||||
RelocationSection {
|
||||
name,
|
||||
target_section_index: 0,
|
||||
entries: bumpalo::vec![in arena],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_relocs_u32(&self, section_bytes: &mut [u8], sym_index: u32, value: u32) {
|
||||
for entry in self.entries.iter() {
|
||||
match entry {
|
||||
RelocationEntry::Index {
|
||||
type_id,
|
||||
offset,
|
||||
symbol_index,
|
||||
} if *symbol_index == sym_index => {
|
||||
use IndexRelocType::*;
|
||||
let idx = *offset as usize;
|
||||
match type_id {
|
||||
FunctionIndexLeb | TypeIndexLeb | GlobalIndexLeb | EventIndexLeb
|
||||
| TableNumberLeb => {
|
||||
overwrite_padded_u32(&mut section_bytes[idx..], value);
|
||||
}
|
||||
_ => todo!("Linking relocation type {:?}", type_id),
|
||||
}
|
||||
}
|
||||
RelocationEntry::Offset {
|
||||
type_id,
|
||||
offset,
|
||||
symbol_index,
|
||||
addend,
|
||||
} if *symbol_index == sym_index => {
|
||||
use OffsetRelocType::*;
|
||||
let idx = *offset as usize;
|
||||
match type_id {
|
||||
MemoryAddrLeb => {
|
||||
overwrite_padded_u32(&mut section_bytes[idx..], value + *addend as u32);
|
||||
}
|
||||
MemoryAddrSleb => {
|
||||
overwrite_padded_i32(&mut section_bytes[idx..], value as i32 + *addend);
|
||||
}
|
||||
_ => todo!("Linking relocation type {:?}", type_id),
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type RelocCtx<'a> = (&'a Bump, &'static str);
|
||||
|
||||
impl<'a> Parse<RelocCtx<'a>> for RelocationSection<'a> {
|
||||
fn parse(ctx: RelocCtx<'a>, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let (arena, name) = ctx;
|
||||
|
||||
if *cursor > bytes.len() || bytes[*cursor] != SectionId::Custom as u8 {
|
||||
// The section we're looking for is missing, which is the same as being empty.
|
||||
return Ok(RelocationSection::new(arena, name));
|
||||
}
|
||||
*cursor += 1;
|
||||
u32::skip_bytes(bytes, cursor)?; // section body size
|
||||
|
||||
let actual_name = <&'a str>::parse(arena, bytes, cursor)?;
|
||||
if actual_name != name {
|
||||
// The section we're looking for is missing, which is the same as being empty.
|
||||
return Ok(RelocationSection::new(arena, name));
|
||||
}
|
||||
|
||||
let target_section_index = u32::parse((), bytes, cursor)?;
|
||||
let entries = parse_fixed_size_items(arena, bytes, cursor)?;
|
||||
Ok(RelocationSection {
|
||||
name,
|
||||
target_section_index,
|
||||
entries,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************
|
||||
*
|
||||
* Linking section
|
||||
*
|
||||
* https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#linking-metadata-section
|
||||
*
|
||||
*******************************************************************/
|
||||
|
||||
/// Linking metadata for data segments
|
||||
#[derive(Debug)]
|
||||
pub struct LinkingSegment<'a> {
|
||||
pub name: &'a str,
|
||||
pub align_bytes_pow2: u32,
|
||||
pub flags: u32,
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for LinkingSegment<'a> {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let name = <&'a str>::parse(arena, bytes, cursor)?;
|
||||
let align_bytes_pow2 = u32::parse((), bytes, cursor)?;
|
||||
let flags = u32::parse((), bytes, cursor)?;
|
||||
Ok(LinkingSegment {
|
||||
name,
|
||||
align_bytes_pow2,
|
||||
flags,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Linking metadata for init (start) functions
|
||||
#[derive(Debug)]
|
||||
pub struct LinkingInitFunc {
|
||||
pub priority: u32,
|
||||
pub symbol_index: u32, // index in the symbol table, not the function index
|
||||
}
|
||||
|
||||
//------------------------------------------------
|
||||
// Common data
|
||||
//------------------------------------------------
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
pub enum ComdatSymKind {
|
||||
Data = 0,
|
||||
Function = 1,
|
||||
Global = 2,
|
||||
Event = 3,
|
||||
Table = 4,
|
||||
Section = 5,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ComdatSym {
|
||||
pub kind: ComdatSymKind,
|
||||
pub index: u32,
|
||||
}
|
||||
|
||||
/// Linking metadata for common data
|
||||
/// A COMDAT group may contain one or more functions, data segments, and/or custom sections.
|
||||
/// The linker will include all of these elements with a given group name from one object file,
|
||||
/// and will exclude any element with this group name from all other object files.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct LinkingComdat<'a> {
|
||||
name: &'a str,
|
||||
flags: u32,
|
||||
syms: Vec<'a, ComdatSym>,
|
||||
}
|
||||
|
||||
//------------------------------------------------
|
||||
// Symbol table
|
||||
//------------------------------------------------
|
||||
|
||||
/// Indicating that this is a weak symbol. When
|
||||
/// linking multiple modules defining the same symbol, all weak definitions are
|
||||
/// discarded if any strong definitions exist; then if multiple weak definitions
|
||||
/// exist all but one (unspecified) are discarded; and finally it is an error if
|
||||
/// more than one definition remains.
|
||||
pub const WASM_SYM_BINDING_WEAK: u32 = 1;
|
||||
|
||||
/// Indicating that this is a local symbol (this is exclusive with `WASM_SYM_BINDING_WEAK`).
|
||||
/// Local symbols are not to be exported, or linked to other modules/sections.
|
||||
/// The names of all non-local symbols must be unique, but the names of local symbols
|
||||
/// are not considered for uniqueness. A local function or global symbol cannot reference an import.
|
||||
pub const WASM_SYM_BINDING_LOCAL: u32 = 2;
|
||||
|
||||
/// Indicating that this is a hidden symbol.
|
||||
/// Hidden symbols are not to be exported when performing the final link, but
|
||||
/// may be linked to other modules.
|
||||
pub const WASM_SYM_VISIBILITY_HIDDEN: u32 = 4;
|
||||
|
||||
/// Indicating that this symbol is not defined.
|
||||
/// For non-data symbols, this must match whether the symbol is an import
|
||||
/// or is defined; for data symbols, determines whether a segment is specified.
|
||||
pub const WASM_SYM_UNDEFINED: u32 = 0x10; // required if the symbol refers to an import
|
||||
|
||||
/// The symbol is intended to be exported from the
|
||||
/// wasm module to the host environment. This differs from the visibility flags
|
||||
/// in that it effects the static linker.
|
||||
pub const WASM_SYM_EXPORTED: u32 = 0x20;
|
||||
|
||||
/// The symbol uses an explicit symbol name,
|
||||
/// rather than reusing the name from a wasm import. This allows it to remap
|
||||
/// imports from foreign WebAssembly modules into local symbols with different
|
||||
/// names.
|
||||
pub const WASM_SYM_EXPLICIT_NAME: u32 = 0x40; // use the name from the symbol table, not from the import
|
||||
|
||||
/// The symbol is intended to be included in the
|
||||
/// linker output, regardless of whether it is used by the program.
|
||||
pub const WASM_SYM_NO_STRIP: u32 = 0x80;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum WasmObjectSymbol<'a> {
|
||||
ExplicitlyNamed {
|
||||
flags: u32,
|
||||
index: u32,
|
||||
name: &'a str,
|
||||
},
|
||||
ImplicitlyNamed {
|
||||
flags: u32,
|
||||
index: u32,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for WasmObjectSymbol<'a> {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let flags = u32::parse((), bytes, cursor)?;
|
||||
let index = u32::parse((), bytes, cursor)?;
|
||||
|
||||
// If a symbol refers to an import, then we already have the name in the import section.
|
||||
// The linking section doesn't repeat it, unless the "explicit name" flag is set (used for renaming).
|
||||
// ("Undefined symbol" is linker jargon, and "import" is Wasm jargon. For functions, they're equivalent.)
|
||||
let is_import = (flags & WASM_SYM_UNDEFINED) != 0;
|
||||
let external_syms_have_explicit_names = (flags & WASM_SYM_EXPLICIT_NAME) != 0;
|
||||
let has_explicit_name = !is_import || external_syms_have_explicit_names;
|
||||
|
||||
if has_explicit_name {
|
||||
let name = <&'a str>::parse(arena, bytes, cursor)?;
|
||||
Ok(Self::ExplicitlyNamed { flags, index, name })
|
||||
} else {
|
||||
Ok(Self::ImplicitlyNamed { flags, index })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum DataSymbol<'a> {
|
||||
Defined {
|
||||
flags: u32,
|
||||
name: &'a str,
|
||||
segment_index: u32,
|
||||
segment_offset: u32,
|
||||
size: u32,
|
||||
},
|
||||
Imported {
|
||||
flags: u32,
|
||||
name: &'a str,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for DataSymbol<'a> {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let flags = u32::parse((), bytes, cursor)?;
|
||||
let name = <&'a str>::parse(arena, bytes, cursor)?;
|
||||
|
||||
if (flags & WASM_SYM_UNDEFINED) != 0 {
|
||||
Ok(Self::Imported { flags, name })
|
||||
} else {
|
||||
let segment_index = u32::parse((), bytes, cursor)?;
|
||||
let segment_offset = u32::parse((), bytes, cursor)?;
|
||||
let size = u32::parse((), bytes, cursor)?;
|
||||
|
||||
Ok(Self::Defined {
|
||||
flags,
|
||||
name,
|
||||
segment_index,
|
||||
segment_offset,
|
||||
size,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We don't use this, but we need it in the symbol table so the indices are correct!
|
||||
/// If we ever use it, note that it refers to section index, not section id.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SectionSymbol {
|
||||
_flags: u32,
|
||||
_index: u32,
|
||||
}
|
||||
|
||||
impl Parse<()> for SectionSymbol {
|
||||
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let flags = u32::parse((), bytes, cursor)?;
|
||||
let index = u32::parse((), bytes, cursor)?;
|
||||
Ok(SectionSymbol {
|
||||
_flags: flags,
|
||||
_index: index,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SymInfo<'a> {
|
||||
Function(WasmObjectSymbol<'a>),
|
||||
Data(DataSymbol<'a>),
|
||||
Global(WasmObjectSymbol<'a>),
|
||||
Section(SectionSymbol),
|
||||
Event(WasmObjectSymbol<'a>),
|
||||
Table(WasmObjectSymbol<'a>),
|
||||
}
|
||||
|
||||
impl<'a> SymInfo<'a> {
|
||||
pub fn name(&self) -> Option<&'a str> {
|
||||
match self {
|
||||
Self::Function(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
|
||||
Self::Data(DataSymbol::Defined { name, .. }) => Some(name),
|
||||
Self::Data(DataSymbol::Imported { name, .. }) => Some(name),
|
||||
Self::Global(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
|
||||
Self::Event(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
|
||||
Self::Table(WasmObjectSymbol::ExplicitlyNamed { name, .. }) => Some(name),
|
||||
_ => None, // ImplicitlyNamed or SectionSymbols
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug)]
|
||||
enum SymType {
|
||||
Function = 0,
|
||||
Data = 1,
|
||||
Global = 2,
|
||||
Section = 3,
|
||||
Event = 4,
|
||||
Table = 5,
|
||||
}
|
||||
|
||||
impl Parse<()> for SymType {
|
||||
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let offset = *cursor;
|
||||
let type_id = bytes[offset];
|
||||
*cursor += 1;
|
||||
match type_id {
|
||||
0 => Ok(Self::Function),
|
||||
1 => Ok(Self::Data),
|
||||
2 => Ok(Self::Global),
|
||||
3 => Ok(Self::Section),
|
||||
4 => Ok(Self::Event),
|
||||
5 => Ok(Self::Table),
|
||||
x => Err(ParseError {
|
||||
offset,
|
||||
message: format!("Invalid symbol info type in linking section: {}", x),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for SymInfo<'a> {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let type_id = SymType::parse((), bytes, cursor)?;
|
||||
match type_id {
|
||||
SymType::Function => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Function),
|
||||
SymType::Data => DataSymbol::parse(arena, bytes, cursor).map(Self::Data),
|
||||
SymType::Global => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Global),
|
||||
SymType::Section => SectionSymbol::parse((), bytes, cursor).map(Self::Section),
|
||||
SymType::Event => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Event),
|
||||
SymType::Table => WasmObjectSymbol::parse(arena, bytes, cursor).map(Self::Table),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Linking subsections
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug)]
|
||||
enum SubSectionId {
|
||||
SegmentInfo = 5,
|
||||
InitFuncs = 6,
|
||||
ComdatInfo = 7,
|
||||
SymbolTable = 8,
|
||||
}
|
||||
|
||||
impl Parse<()> for SubSectionId {
|
||||
fn parse(_: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let id = bytes[*cursor];
|
||||
let offset = *cursor;
|
||||
*cursor += 1;
|
||||
match id {
|
||||
5 => Ok(Self::SegmentInfo),
|
||||
6 => Ok(Self::InitFuncs),
|
||||
7 => Ok(Self::ComdatInfo),
|
||||
8 => Ok(Self::SymbolTable),
|
||||
x => Err(ParseError {
|
||||
offset,
|
||||
message: format!("Invalid linking subsection ID {}", x),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Linking metadata section
|
||||
//----------------------------------------------------------------
|
||||
|
||||
const LINKING_VERSION: u8 = 2;
|
||||
|
||||
/// The spec describes this in very weird way, so we're doing something saner.
|
||||
/// They call it an "array" of subsections with different variants, BUT this "array"
|
||||
/// has an implicit length, and none of the items can be repeated, so a struct is better.
|
||||
/// No point writing code to "find" the symbol table, when we know there's exactly one.
|
||||
/// The only one we really use is the symbol table
|
||||
#[derive(Debug)]
|
||||
pub struct LinkingSection<'a> {
|
||||
pub symbol_table: Vec<'a, SymInfo<'a>>,
|
||||
pub segment_info: Vec<'a, LinkingSegment<'a>>,
|
||||
pub init_funcs: Vec<'a, LinkingInitFunc>,
|
||||
pub comdat_info: Vec<'a, LinkingComdat<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> LinkingSection<'a> {
|
||||
const NAME: &'static str = "linking";
|
||||
|
||||
pub fn new(arena: &'a Bump) -> Self {
|
||||
LinkingSection {
|
||||
symbol_table: Vec::with_capacity_in(16, arena),
|
||||
segment_info: Vec::with_capacity_in(16, arena),
|
||||
init_funcs: Vec::with_capacity_in(0, arena),
|
||||
comdat_info: Vec::with_capacity_in(0, arena),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_internal_symbol(&self, target_name: &str) -> Result<usize, String> {
|
||||
self.symbol_table
|
||||
.iter()
|
||||
.position(|sym| sym.name() == Some(target_name))
|
||||
.ok_or_else(|| {
|
||||
format!(
|
||||
"Linking failed! Can't find `{}` in host symbol table",
|
||||
target_name
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_and_reindex_imported_fn(
|
||||
&mut self,
|
||||
old_fn_index: u32,
|
||||
new_fn_index: u32,
|
||||
) -> Result<u32, String> {
|
||||
self.symbol_table
|
||||
.iter_mut()
|
||||
.position(|sym| match sym {
|
||||
SymInfo::Function(WasmObjectSymbol::ImplicitlyNamed { flags, index, .. })
|
||||
| SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, .. }) => {
|
||||
let found = *flags & WASM_SYM_UNDEFINED != 0 && *index == old_fn_index;
|
||||
if found {
|
||||
*index = new_fn_index;
|
||||
}
|
||||
found
|
||||
}
|
||||
_ => false,
|
||||
})
|
||||
.map(|sym_index| sym_index as u32)
|
||||
.ok_or_else(|| {
|
||||
format!(
|
||||
"Linking failed! Can't find fn #{} in host symbol table",
|
||||
old_fn_index
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for LinkingSection<'a> {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
if *cursor > bytes.len() || bytes[*cursor] != SectionId::Custom as u8 {
|
||||
return Ok(LinkingSection::new(arena));
|
||||
}
|
||||
*cursor += 1;
|
||||
let body_size = u32::parse((), bytes, cursor)?;
|
||||
let section_end = *cursor + body_size as usize;
|
||||
|
||||
// Don't fail if it's the wrong section. Let the WasmModule validate presence/absence of sections
|
||||
let actual_name = <&'a str>::parse(arena, bytes, cursor)?;
|
||||
if actual_name != Self::NAME {
|
||||
return Ok(LinkingSection::new(arena));
|
||||
}
|
||||
|
||||
let linking_version = bytes[*cursor];
|
||||
if linking_version != LINKING_VERSION {
|
||||
return Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!(
|
||||
"This file uses version {} of Wasm linking data, but only version {} is supported.",
|
||||
linking_version, LINKING_VERSION
|
||||
),
|
||||
});
|
||||
}
|
||||
*cursor += 1;
|
||||
|
||||
// Linking section is encoded as an array of subsections, but we prefer a struct internally.
|
||||
// The order is not defined in the spec, so we loop over them and organise them into our struct.
|
||||
// In theory, there could even be more than one of each. That would be weird, but easy to handle.
|
||||
let mut section = LinkingSection::new(arena);
|
||||
while *cursor < section_end {
|
||||
let subsection_id = SubSectionId::parse((), bytes, cursor)?;
|
||||
let len = u32::parse((), bytes, cursor)?; // bytes in the subsection
|
||||
match subsection_id {
|
||||
SubSectionId::SymbolTable => {
|
||||
let count = u32::parse((), bytes, cursor)?;
|
||||
for _ in 0..count {
|
||||
let item = SymInfo::parse(arena, bytes, cursor)?;
|
||||
section.symbol_table.push(item);
|
||||
}
|
||||
}
|
||||
SubSectionId::SegmentInfo => {
|
||||
let count = u32::parse((), bytes, cursor)?;
|
||||
for _ in 0..count {
|
||||
let item = LinkingSegment::parse(arena, bytes, cursor)?;
|
||||
section.segment_info.push(item);
|
||||
}
|
||||
}
|
||||
SubSectionId::InitFuncs | SubSectionId::ComdatInfo => {
|
||||
// We don't use these subsections, just skip over them.
|
||||
*cursor += len as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(section)
|
||||
}
|
||||
}
|
|
@ -1,595 +0,0 @@
|
|||
pub mod code_builder;
|
||||
pub mod linking;
|
||||
pub mod opcodes;
|
||||
pub mod parse;
|
||||
pub mod sections;
|
||||
pub mod serialize;
|
||||
|
||||
use std::iter::repeat;
|
||||
|
||||
pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
|
||||
pub use linking::{OffsetRelocType, RelocationEntry, SymInfo};
|
||||
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature};
|
||||
|
||||
use bitvec::vec::BitVec;
|
||||
use bumpalo::{collections::Vec, Bump};
|
||||
|
||||
use crate::DEBUG_SETTINGS;
|
||||
|
||||
use self::linking::{IndexRelocType, LinkingSection, RelocationSection, WasmObjectSymbol};
|
||||
use self::parse::{Parse, ParseError};
|
||||
use self::sections::{
|
||||
CodeSection, DataSection, ElementSection, ExportSection, FunctionSection, GlobalSection,
|
||||
ImportDesc, ImportSection, MemorySection, NameSection, OpaqueSection, Section, SectionId,
|
||||
TableSection, TypeSection,
|
||||
};
|
||||
use self::serialize::{SerialBuffer, Serialize};
|
||||
|
||||
/// A representation of the WebAssembly binary file format
|
||||
/// https://webassembly.github.io/spec/core/binary/modules.html
|
||||
#[derive(Debug)]
|
||||
pub struct WasmModule<'a> {
|
||||
pub types: TypeSection<'a>,
|
||||
pub import: ImportSection<'a>,
|
||||
pub function: FunctionSection<'a>,
|
||||
pub table: TableSection,
|
||||
pub memory: MemorySection<'a>,
|
||||
pub global: GlobalSection<'a>,
|
||||
pub export: ExportSection<'a>,
|
||||
pub start: OpaqueSection<'a>,
|
||||
pub element: ElementSection<'a>,
|
||||
pub code: CodeSection<'a>,
|
||||
pub data: DataSection<'a>,
|
||||
pub linking: LinkingSection<'a>,
|
||||
pub reloc_code: RelocationSection<'a>,
|
||||
pub reloc_data: RelocationSection<'a>,
|
||||
pub names: NameSection<'a>,
|
||||
}
|
||||
|
||||
impl<'a> WasmModule<'a> {
|
||||
pub const WASM_VERSION: u32 = 1;
|
||||
|
||||
/// Create entries in the Type and Function sections for a function signature
|
||||
pub fn add_function_signature(&mut self, signature: Signature<'a>) {
|
||||
let index = self.types.insert(signature);
|
||||
self.function.add_sig(index);
|
||||
}
|
||||
|
||||
/// Serialize the module to bytes
|
||||
pub fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.append_u8(0);
|
||||
buffer.append_slice("asm".as_bytes());
|
||||
buffer.write_unencoded_u32(Self::WASM_VERSION);
|
||||
|
||||
self.types.serialize(buffer);
|
||||
self.import.serialize(buffer);
|
||||
self.function.serialize(buffer);
|
||||
if !self.element.is_empty() {
|
||||
self.table.serialize(buffer);
|
||||
}
|
||||
self.memory.serialize(buffer);
|
||||
self.global.serialize(buffer);
|
||||
self.export.serialize(buffer);
|
||||
self.start.serialize(buffer);
|
||||
self.element.serialize(buffer);
|
||||
self.code.serialize(buffer);
|
||||
self.data.serialize(buffer);
|
||||
self.names.serialize(buffer);
|
||||
}
|
||||
|
||||
/// Module size in bytes (assuming no linker data)
|
||||
/// May be slightly overestimated. Intended for allocating buffer capacity.
|
||||
pub fn size(&self) -> usize {
|
||||
self.types.size()
|
||||
+ self.import.size()
|
||||
+ self.function.size()
|
||||
+ self.table.size()
|
||||
+ self.memory.size()
|
||||
+ self.global.size()
|
||||
+ self.export.size()
|
||||
+ self.start.size()
|
||||
+ self.element.size()
|
||||
+ self.code.size()
|
||||
+ self.data.size()
|
||||
+ self.names.size()
|
||||
}
|
||||
|
||||
pub fn preload(arena: &'a Bump, bytes: &[u8]) -> Result<Self, ParseError> {
|
||||
let is_valid_magic_number = &bytes[0..4] == "\0asm".as_bytes();
|
||||
let is_valid_version = bytes[4..8] == Self::WASM_VERSION.to_le_bytes();
|
||||
if !is_valid_magic_number || !is_valid_version {
|
||||
return Err(ParseError {
|
||||
offset: 0,
|
||||
message: "This file is not a WebAssembly binary. The file header is not valid."
|
||||
.into(),
|
||||
});
|
||||
}
|
||||
|
||||
let mut cursor: usize = 8;
|
||||
|
||||
let types = TypeSection::parse(arena, bytes, &mut cursor)?;
|
||||
let import = ImportSection::parse(arena, bytes, &mut cursor)?;
|
||||
let function = FunctionSection::parse(arena, bytes, &mut cursor)?;
|
||||
let table = TableSection::parse((), bytes, &mut cursor)?;
|
||||
let memory = MemorySection::parse(arena, bytes, &mut cursor)?;
|
||||
let global = GlobalSection::parse(arena, bytes, &mut cursor)?;
|
||||
let export = ExportSection::parse(arena, bytes, &mut cursor)?;
|
||||
let start = OpaqueSection::parse((arena, SectionId::Start), bytes, &mut cursor)?;
|
||||
let element = ElementSection::parse(arena, bytes, &mut cursor)?;
|
||||
let _data_count = OpaqueSection::parse((arena, SectionId::DataCount), bytes, &mut cursor)?;
|
||||
let code = CodeSection::parse(arena, bytes, &mut cursor)?;
|
||||
let data = DataSection::parse(arena, bytes, &mut cursor)?;
|
||||
let linking = LinkingSection::parse(arena, bytes, &mut cursor)?;
|
||||
let reloc_code = RelocationSection::parse((arena, "reloc.CODE"), bytes, &mut cursor)?;
|
||||
let reloc_data = RelocationSection::parse((arena, "reloc.DATA"), bytes, &mut cursor)?;
|
||||
let names = NameSection::parse(arena, bytes, &mut cursor)?;
|
||||
|
||||
let mut module_errors = String::new();
|
||||
if types.is_empty() {
|
||||
module_errors.push_str("Missing Type section\n");
|
||||
}
|
||||
if function.signatures.is_empty() {
|
||||
module_errors.push_str("Missing Function section\n");
|
||||
}
|
||||
if code.preloaded_bytes.is_empty() {
|
||||
module_errors.push_str("Missing Code section\n");
|
||||
}
|
||||
if linking.symbol_table.is_empty() {
|
||||
module_errors.push_str("Missing \"linking\" Custom section\n");
|
||||
}
|
||||
if reloc_code.entries.is_empty() {
|
||||
module_errors.push_str("Missing \"reloc.CODE\" Custom section\n");
|
||||
}
|
||||
if global.count != 0 {
|
||||
let global_err_msg =
|
||||
format!("All globals in a relocatable Wasm module should be imported, but found {} internally defined", global.count);
|
||||
module_errors.push_str(&global_err_msg);
|
||||
}
|
||||
|
||||
if !module_errors.is_empty() {
|
||||
return Err(ParseError {
|
||||
offset: 0,
|
||||
message: format!("{}\n{}\n{}",
|
||||
"The host file has the wrong structure. I need a relocatable WebAssembly binary file.",
|
||||
"If you're using wasm-ld, try the --relocatable option.",
|
||||
module_errors,
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
Ok(WasmModule {
|
||||
types,
|
||||
import,
|
||||
function,
|
||||
table,
|
||||
memory,
|
||||
global,
|
||||
export,
|
||||
start,
|
||||
element,
|
||||
code,
|
||||
data,
|
||||
linking,
|
||||
reloc_code,
|
||||
reloc_data,
|
||||
names,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn eliminate_dead_code(&mut self, arena: &'a Bump, called_host_fns: BitVec<usize>) {
|
||||
if DEBUG_SETTINGS.skip_dead_code_elim {
|
||||
return;
|
||||
}
|
||||
//
|
||||
// Mark all live host functions
|
||||
//
|
||||
|
||||
let import_count = self.import.imports.len();
|
||||
let host_fn_min = import_count as u32 + self.code.dead_import_dummy_count;
|
||||
let host_fn_max = host_fn_min + self.code.preloaded_count;
|
||||
|
||||
// All functions exported to JS must be kept alive
|
||||
let exported_fns = self
|
||||
.export
|
||||
.exports
|
||||
.iter()
|
||||
.filter(|ex| ex.ty == ExportType::Func)
|
||||
.map(|ex| ex.index);
|
||||
|
||||
// The ElementSection lists all functions whose "address" is taken.
|
||||
// Find their signatures so we can trace all possible indirect calls.
|
||||
// (The call_indirect instruction specifies a function signature.)
|
||||
let indirect_callees_and_signatures = Vec::from_iter_in(
|
||||
self.element
|
||||
.segments
|
||||
.iter()
|
||||
.flat_map(|seg| seg.fn_indices.iter().copied())
|
||||
.map(|fn_index| {
|
||||
let sig = self.function.signatures[fn_index as usize - import_count];
|
||||
(fn_index, sig)
|
||||
}),
|
||||
arena,
|
||||
);
|
||||
|
||||
// Trace callees of the live functions, and mark those as live too
|
||||
let live_flags = self.trace_live_host_functions(
|
||||
arena,
|
||||
called_host_fns,
|
||||
exported_fns,
|
||||
indirect_callees_and_signatures,
|
||||
host_fn_min,
|
||||
host_fn_max,
|
||||
);
|
||||
|
||||
//
|
||||
// Remove all unused JS imports
|
||||
// We don't want to force the web page to provide dummy JS functions, it's a pain!
|
||||
//
|
||||
let mut live_import_fns = Vec::with_capacity_in(import_count, arena);
|
||||
let mut fn_index = 0;
|
||||
let mut eliminated_import_count = 0;
|
||||
self.import.imports.retain(|import| {
|
||||
if !matches!(import.description, ImportDesc::Func { .. }) {
|
||||
true
|
||||
} else {
|
||||
let live = live_flags[fn_index];
|
||||
if live {
|
||||
live_import_fns.push(fn_index);
|
||||
} else {
|
||||
eliminated_import_count += 1;
|
||||
}
|
||||
fn_index += 1;
|
||||
live
|
||||
}
|
||||
});
|
||||
|
||||
// Update the count of JS imports to replace with Wasm dummies
|
||||
// (In addition to the ones we already replaced for each host-to-app call)
|
||||
self.code.dead_import_dummy_count += eliminated_import_count as u32;
|
||||
|
||||
// FunctionSection
|
||||
// Insert function signatures for the new Wasm dummy functions
|
||||
let signature_count = self.function.signatures.len();
|
||||
self.function
|
||||
.signatures
|
||||
.extend(repeat(0).take(eliminated_import_count));
|
||||
self.function
|
||||
.signatures
|
||||
.copy_within(0..signature_count, eliminated_import_count);
|
||||
|
||||
// NameSection
|
||||
// For each live import, swap its debug name to the right position
|
||||
for (new_index, &old_index) in live_import_fns.iter().enumerate() {
|
||||
let old_name: &str = self.names.function_names[old_index].1;
|
||||
let new_name: &str = self.names.function_names[new_index].1;
|
||||
self.names.function_names[new_index].1 = old_name;
|
||||
self.names.function_names[old_index].1 = new_name;
|
||||
}
|
||||
|
||||
// Relocate calls from host to JS imports
|
||||
// This must happen *before* we run dead code elimination on the code section,
|
||||
// so that byte offsets in the host's linking data will still be valid.
|
||||
for (new_index, &old_index) in live_import_fns.iter().enumerate() {
|
||||
if new_index == old_index {
|
||||
continue;
|
||||
}
|
||||
let sym_index = self
|
||||
.linking
|
||||
.find_and_reindex_imported_fn(old_index as u32, new_index as u32)
|
||||
.unwrap();
|
||||
self.reloc_code.apply_relocs_u32(
|
||||
&mut self.code.preloaded_bytes,
|
||||
sym_index,
|
||||
new_index as u32,
|
||||
);
|
||||
}
|
||||
|
||||
// Relocate calls from Roc app to JS imports
|
||||
for code_builder in self.code.code_builders.iter_mut() {
|
||||
code_builder.apply_import_relocs(&live_import_fns);
|
||||
}
|
||||
|
||||
//
|
||||
// Dead code elimination. Replace dead functions with tiny dummies.
|
||||
// Live function indices are unchanged, so no relocations are needed.
|
||||
//
|
||||
let dummy = CodeBuilder::dummy(arena);
|
||||
let mut dummy_bytes = Vec::with_capacity_in(dummy.size(), arena);
|
||||
dummy.serialize(&mut dummy_bytes);
|
||||
|
||||
let mut buffer = Vec::with_capacity_in(self.code.preloaded_bytes.len(), arena);
|
||||
self.code.preloaded_count.serialize(&mut buffer);
|
||||
for (i, fn_index) in (host_fn_min..host_fn_max).enumerate() {
|
||||
if live_flags[fn_index as usize] {
|
||||
let code_start = self.code.preloaded_offsets[i] as usize;
|
||||
let code_end = self.code.preloaded_offsets[i + 1] as usize;
|
||||
buffer.extend_from_slice(&self.code.preloaded_bytes[code_start..code_end]);
|
||||
} else {
|
||||
buffer.extend_from_slice(&dummy_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
self.code.preloaded_bytes = buffer;
|
||||
}
|
||||
|
||||
fn trace_live_host_functions<I: Iterator<Item = u32>>(
|
||||
&self,
|
||||
arena: &'a Bump,
|
||||
called_host_fns: BitVec<usize>,
|
||||
exported_fns: I,
|
||||
indirect_callees_and_signatures: Vec<'a, (u32, u32)>,
|
||||
host_fn_min: u32,
|
||||
host_fn_max: u32,
|
||||
) -> BitVec<usize> {
|
||||
let reloc_len = self.reloc_code.entries.len();
|
||||
|
||||
let mut call_offsets_and_symbols = Vec::with_capacity_in(reloc_len, arena);
|
||||
let mut indirect_call_offsets_and_types = Vec::with_capacity_in(reloc_len, arena);
|
||||
for entry in self.reloc_code.entries.iter() {
|
||||
match entry {
|
||||
RelocationEntry::Index {
|
||||
type_id: IndexRelocType::FunctionIndexLeb,
|
||||
offset,
|
||||
symbol_index,
|
||||
} => call_offsets_and_symbols.push((*offset, *symbol_index)),
|
||||
RelocationEntry::Index {
|
||||
type_id: IndexRelocType::TypeIndexLeb,
|
||||
offset,
|
||||
symbol_index,
|
||||
} => indirect_call_offsets_and_types.push((*offset, *symbol_index)),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a fast lookup from symbol index to function index, for the inner loop below
|
||||
// (Do all the matching and dereferencing outside the loop)
|
||||
let symbol_fn_indices: Vec<'a, u32> = Vec::from_iter_in(
|
||||
self.linking
|
||||
.symbol_table
|
||||
.iter()
|
||||
.map(|sym_info| match sym_info {
|
||||
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { index, .. }) => *index,
|
||||
SymInfo::Function(WasmObjectSymbol::ImplicitlyNamed { index, .. }) => *index,
|
||||
_ => u32::MAX, // just use a dummy value for non-function symbols
|
||||
}),
|
||||
arena,
|
||||
);
|
||||
|
||||
// Loop variables for the main loop below
|
||||
let mut live_flags = BitVec::repeat(false, called_host_fns.len());
|
||||
let mut next_pass_fns = BitVec::repeat(false, called_host_fns.len());
|
||||
let mut current_pass_fns = called_host_fns;
|
||||
for index in exported_fns.filter(|i| *i < host_fn_max) {
|
||||
current_pass_fns.set(index as usize, true);
|
||||
}
|
||||
|
||||
while current_pass_fns.count_ones() > 0 {
|
||||
// All functions in this pass are live (they have been reached by earlier passes)
|
||||
debug_assert_eq!(live_flags.len(), current_pass_fns.len());
|
||||
live_flags |= ¤t_pass_fns;
|
||||
|
||||
// For each live function in the current pass
|
||||
for fn_index in current_pass_fns.iter_ones() {
|
||||
// Skip JS imports and Roc functions
|
||||
if fn_index < host_fn_min as usize || fn_index >= host_fn_max as usize {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find where the function body is
|
||||
let offset_index = fn_index - host_fn_min as usize;
|
||||
let code_start = self.code.preloaded_offsets[offset_index];
|
||||
let code_end = self.code.preloaded_offsets[offset_index + 1];
|
||||
|
||||
// For each call in the body
|
||||
for (offset, symbol) in call_offsets_and_symbols.iter() {
|
||||
if *offset > code_start && *offset < code_end {
|
||||
// Find out which other function is being called
|
||||
let callee = symbol_fn_indices[*symbol as usize];
|
||||
|
||||
// If it's not already marked live, include it in the next pass
|
||||
if live_flags.get(callee as usize).as_deref() == Some(&false) {
|
||||
next_pass_fns.set(callee as usize, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For each indirect call in the body
|
||||
for (offset, signature) in indirect_call_offsets_and_types.iter() {
|
||||
if *offset > code_start && *offset < code_end {
|
||||
// Find which indirect callees have the right type signature
|
||||
let potential_callees = indirect_callees_and_signatures
|
||||
.iter()
|
||||
.filter(|(_, sig)| sig == signature)
|
||||
.map(|(f, _)| *f);
|
||||
// Mark them all as live
|
||||
for f in potential_callees {
|
||||
if live_flags.get(f as usize).as_deref() == Some(&false) {
|
||||
next_pass_fns.set(f as usize, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::mem::swap(&mut current_pass_fns, &mut next_pass_fns);
|
||||
next_pass_fns.fill(false);
|
||||
}
|
||||
|
||||
live_flags
|
||||
}
|
||||
|
||||
pub fn relocate_internal_symbol(&mut self, sym_name: &str, value: u32) -> Result<u32, String> {
|
||||
self.linking
|
||||
.find_internal_symbol(sym_name)
|
||||
.map(|sym_index| {
|
||||
self.reloc_code.apply_relocs_u32(
|
||||
&mut self.code.preloaded_bytes,
|
||||
sym_index as u32,
|
||||
value,
|
||||
);
|
||||
|
||||
sym_index as u32
|
||||
})
|
||||
}
|
||||
|
||||
/// Linking steps for host-to-app functions like `roc__mainForHost_1_exposed`
|
||||
/// (See further explanation in the gen_wasm README)
|
||||
/// - Remove the target function from the ImportSection. It's not a JS import but the host declared it as one.
|
||||
/// - Update all of its call sites to the new index in the app
|
||||
/// - Swap the _last_ JavaScript import into the slot we just vacated
|
||||
/// - Update all call sites for the swapped JS function
|
||||
/// - Update the FunctionSection to show the correct type signature for the swapped JS function
|
||||
/// - Insert a dummy function in the CodeSection, at the same index as the swapped JS function
|
||||
pub fn link_host_to_app_calls(
|
||||
&mut self,
|
||||
arena: &'a Bump,
|
||||
host_to_app_map: Vec<'a, (&'a str, u32)>,
|
||||
) {
|
||||
for (app_fn_name, app_fn_index) in host_to_app_map.into_iter() {
|
||||
// Find the host import, and the last imported function to swap with it.
|
||||
// Not all imports are functions, so the function index and import index may be different
|
||||
// (We could support imported globals if we relocated them, although we don't at the time of this comment)
|
||||
let mut host_fn = None;
|
||||
let mut swap_fn = None;
|
||||
self.import
|
||||
.imports
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_import_index, import)| {
|
||||
matches!(import.description, ImportDesc::Func { .. })
|
||||
})
|
||||
.enumerate()
|
||||
.for_each(|(fn_index, (import_index, import))| {
|
||||
swap_fn = Some((import_index, fn_index));
|
||||
if import.name == app_fn_name {
|
||||
host_fn = Some((import_index, fn_index));
|
||||
}
|
||||
});
|
||||
|
||||
let (host_import_index, host_fn_index) = match host_fn {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
// The Wasm host doesn't call our app function, so it must be called from JS. Export it.
|
||||
self.export.append(Export {
|
||||
name: app_fn_name,
|
||||
ty: ExportType::Func,
|
||||
index: app_fn_index,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let (swap_import_index, swap_fn_index) = swap_fn.unwrap();
|
||||
|
||||
// Note: swap_remove will not work, because some imports may not be functions.
|
||||
let swap_import = self.import.imports.remove(swap_import_index);
|
||||
if swap_import_index != host_import_index {
|
||||
self.import.imports[host_import_index] = swap_import;
|
||||
}
|
||||
|
||||
// Find the host's symbol for the function we're linking
|
||||
let host_sym_index = self
|
||||
.linking
|
||||
.find_and_reindex_imported_fn(host_fn_index as u32, app_fn_index)
|
||||
.unwrap();
|
||||
|
||||
// Update calls to use the app function instead of the host import
|
||||
self.reloc_code.apply_relocs_u32(
|
||||
&mut self.code.preloaded_bytes,
|
||||
host_sym_index,
|
||||
app_fn_index,
|
||||
);
|
||||
|
||||
if swap_import_index != host_import_index {
|
||||
// get the name using the old host import index because we already swapped it!
|
||||
let swap_fn_name = self.import.imports[host_import_index].name;
|
||||
|
||||
// Find the symbol for the swapped JS import
|
||||
let swap_sym_index = self
|
||||
.linking
|
||||
.find_and_reindex_imported_fn(swap_fn_index as u32, host_fn_index as u32)
|
||||
.unwrap();
|
||||
|
||||
// Update calls to the swapped JS import
|
||||
self.reloc_code.apply_relocs_u32(
|
||||
&mut self.code.preloaded_bytes,
|
||||
swap_sym_index,
|
||||
host_fn_index as u32,
|
||||
);
|
||||
|
||||
// Update the name in the debug info
|
||||
if let Some((_, debug_name)) = self
|
||||
.names
|
||||
.function_names
|
||||
.iter_mut()
|
||||
.find(|(i, _)| *i as usize == host_fn_index)
|
||||
{
|
||||
debug_name.clone_from(&swap_fn_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Remember to insert a dummy function at the beginning of the code section
|
||||
// to compensate for having one less import, so that function indices don't change.
|
||||
self.code.dead_import_dummy_count += 1;
|
||||
|
||||
// Insert any type signature for the dummy. Signature index 0 will do.
|
||||
self.function.signatures.insert(0, 0);
|
||||
|
||||
// Update the debug name for the dummy
|
||||
if let Some((_, debug_name)) = self
|
||||
.names
|
||||
.function_names
|
||||
.iter_mut()
|
||||
.find(|(i, _)| *i as usize == swap_fn_index)
|
||||
{
|
||||
debug_name.clone_from(
|
||||
&bumpalo::format!(in arena, "linking_dummy_{}", debug_name).into_bump_str(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a name->index lookup table for host functions that may be called from the app
|
||||
pub fn get_host_function_lookup(&self, arena: &'a Bump) -> Vec<'a, (&'a str, u32)> {
|
||||
// Functions beginning with `roc_` go first, since they're most likely to be called
|
||||
let roc_global_fns =
|
||||
self.linking
|
||||
.symbol_table
|
||||
.iter()
|
||||
.filter_map(|sym_info| match sym_info {
|
||||
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, name })
|
||||
if flags & linking::WASM_SYM_BINDING_LOCAL == 0
|
||||
&& name.starts_with("roc_") =>
|
||||
{
|
||||
Some((*name, *index))
|
||||
}
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let other_global_fns =
|
||||
self.linking
|
||||
.symbol_table
|
||||
.iter()
|
||||
.filter_map(|sym_info| match sym_info {
|
||||
SymInfo::Function(WasmObjectSymbol::ExplicitlyNamed { flags, index, name })
|
||||
if flags & linking::WASM_SYM_BINDING_LOCAL == 0
|
||||
&& !name.starts_with("roc_") =>
|
||||
{
|
||||
Some((*name, *index))
|
||||
}
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let import_fns = self
|
||||
.import
|
||||
.imports
|
||||
.iter()
|
||||
.filter(|import| matches!(import.description, ImportDesc::Func { .. }))
|
||||
.enumerate()
|
||||
.map(|(fn_index, import)| (import.name, fn_index as u32));
|
||||
|
||||
Vec::from_iter_in(
|
||||
roc_global_fns.chain(other_global_fns).chain(import_fns),
|
||||
arena,
|
||||
)
|
||||
}
|
||||
}
|
|
@ -1,310 +0,0 @@
|
|||
use super::parse::{Parse, ParseError, SkipBytes};
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum OpCode {
|
||||
UNREACHABLE = 0x00,
|
||||
NOP = 0x01,
|
||||
BLOCK = 0x02,
|
||||
LOOP = 0x03,
|
||||
IF = 0x04,
|
||||
ELSE = 0x05,
|
||||
END = 0x0b,
|
||||
BR = 0x0c,
|
||||
BRIF = 0x0d,
|
||||
BRTABLE = 0x0e,
|
||||
RETURN = 0x0f,
|
||||
CALL = 0x10,
|
||||
CALLINDIRECT = 0x11,
|
||||
DROP = 0x1a,
|
||||
SELECT = 0x1b,
|
||||
GETLOCAL = 0x20,
|
||||
SETLOCAL = 0x21,
|
||||
TEELOCAL = 0x22,
|
||||
GETGLOBAL = 0x23,
|
||||
SETGLOBAL = 0x24,
|
||||
I32LOAD = 0x28,
|
||||
I64LOAD = 0x29,
|
||||
F32LOAD = 0x2a,
|
||||
F64LOAD = 0x2b,
|
||||
I32LOAD8S = 0x2c,
|
||||
I32LOAD8U = 0x2d,
|
||||
I32LOAD16S = 0x2e,
|
||||
I32LOAD16U = 0x2f,
|
||||
I64LOAD8S = 0x30,
|
||||
I64LOAD8U = 0x31,
|
||||
I64LOAD16S = 0x32,
|
||||
I64LOAD16U = 0x33,
|
||||
I64LOAD32S = 0x34,
|
||||
I64LOAD32U = 0x35,
|
||||
I32STORE = 0x36,
|
||||
I64STORE = 0x37,
|
||||
F32STORE = 0x38,
|
||||
F64STORE = 0x39,
|
||||
I32STORE8 = 0x3a,
|
||||
I32STORE16 = 0x3b,
|
||||
I64STORE8 = 0x3c,
|
||||
I64STORE16 = 0x3d,
|
||||
I64STORE32 = 0x3e,
|
||||
CURRENTMEMORY = 0x3f,
|
||||
GROWMEMORY = 0x40,
|
||||
I32CONST = 0x41,
|
||||
I64CONST = 0x42,
|
||||
F32CONST = 0x43,
|
||||
F64CONST = 0x44,
|
||||
I32EQZ = 0x45,
|
||||
I32EQ = 0x46,
|
||||
I32NE = 0x47,
|
||||
I32LTS = 0x48,
|
||||
I32LTU = 0x49,
|
||||
I32GTS = 0x4a,
|
||||
I32GTU = 0x4b,
|
||||
I32LES = 0x4c,
|
||||
I32LEU = 0x4d,
|
||||
I32GES = 0x4e,
|
||||
I32GEU = 0x4f,
|
||||
I64EQZ = 0x50,
|
||||
I64EQ = 0x51,
|
||||
I64NE = 0x52,
|
||||
I64LTS = 0x53,
|
||||
I64LTU = 0x54,
|
||||
I64GTS = 0x55,
|
||||
I64GTU = 0x56,
|
||||
I64LES = 0x57,
|
||||
I64LEU = 0x58,
|
||||
I64GES = 0x59,
|
||||
I64GEU = 0x5a,
|
||||
|
||||
F32EQ = 0x5b,
|
||||
F32NE = 0x5c,
|
||||
F32LT = 0x5d,
|
||||
F32GT = 0x5e,
|
||||
F32LE = 0x5f,
|
||||
F32GE = 0x60,
|
||||
|
||||
F64EQ = 0x61,
|
||||
F64NE = 0x62,
|
||||
F64LT = 0x63,
|
||||
F64GT = 0x64,
|
||||
F64LE = 0x65,
|
||||
F64GE = 0x66,
|
||||
|
||||
I32CLZ = 0x67,
|
||||
I32CTZ = 0x68,
|
||||
I32POPCNT = 0x69,
|
||||
I32ADD = 0x6a,
|
||||
I32SUB = 0x6b,
|
||||
I32MUL = 0x6c,
|
||||
I32DIVS = 0x6d,
|
||||
I32DIVU = 0x6e,
|
||||
I32REMS = 0x6f,
|
||||
I32REMU = 0x70,
|
||||
I32AND = 0x71,
|
||||
I32OR = 0x72,
|
||||
I32XOR = 0x73,
|
||||
I32SHL = 0x74,
|
||||
I32SHRS = 0x75,
|
||||
I32SHRU = 0x76,
|
||||
I32ROTL = 0x77,
|
||||
I32ROTR = 0x78,
|
||||
|
||||
I64CLZ = 0x79,
|
||||
I64CTZ = 0x7a,
|
||||
I64POPCNT = 0x7b,
|
||||
I64ADD = 0x7c,
|
||||
I64SUB = 0x7d,
|
||||
I64MUL = 0x7e,
|
||||
I64DIVS = 0x7f,
|
||||
I64DIVU = 0x80,
|
||||
I64REMS = 0x81,
|
||||
I64REMU = 0x82,
|
||||
I64AND = 0x83,
|
||||
I64OR = 0x84,
|
||||
I64XOR = 0x85,
|
||||
I64SHL = 0x86,
|
||||
I64SHRS = 0x87,
|
||||
I64SHRU = 0x88,
|
||||
I64ROTL = 0x89,
|
||||
I64ROTR = 0x8a,
|
||||
F32ABS = 0x8b,
|
||||
F32NEG = 0x8c,
|
||||
F32CEIL = 0x8d,
|
||||
F32FLOOR = 0x8e,
|
||||
F32TRUNC = 0x8f,
|
||||
F32NEAREST = 0x90,
|
||||
F32SQRT = 0x91,
|
||||
F32ADD = 0x92,
|
||||
F32SUB = 0x93,
|
||||
F32MUL = 0x94,
|
||||
F32DIV = 0x95,
|
||||
F32MIN = 0x96,
|
||||
F32MAX = 0x97,
|
||||
F32COPYSIGN = 0x98,
|
||||
F64ABS = 0x99,
|
||||
F64NEG = 0x9a,
|
||||
F64CEIL = 0x9b,
|
||||
F64FLOOR = 0x9c,
|
||||
F64TRUNC = 0x9d,
|
||||
F64NEAREST = 0x9e,
|
||||
F64SQRT = 0x9f,
|
||||
F64ADD = 0xa0,
|
||||
F64SUB = 0xa1,
|
||||
F64MUL = 0xa2,
|
||||
F64DIV = 0xa3,
|
||||
F64MIN = 0xa4,
|
||||
F64MAX = 0xa5,
|
||||
F64COPYSIGN = 0xa6,
|
||||
|
||||
I32WRAPI64 = 0xa7,
|
||||
I32TRUNCSF32 = 0xa8,
|
||||
I32TRUNCUF32 = 0xa9,
|
||||
I32TRUNCSF64 = 0xaa,
|
||||
I32TRUNCUF64 = 0xab,
|
||||
I64EXTENDSI32 = 0xac,
|
||||
I64EXTENDUI32 = 0xad,
|
||||
I64TRUNCSF32 = 0xae,
|
||||
I64TRUNCUF32 = 0xaf,
|
||||
I64TRUNCSF64 = 0xb0,
|
||||
I64TRUNCUF64 = 0xb1,
|
||||
F32CONVERTSI32 = 0xb2,
|
||||
F32CONVERTUI32 = 0xb3,
|
||||
F32CONVERTSI64 = 0xb4,
|
||||
F32CONVERTUI64 = 0xb5,
|
||||
F32DEMOTEF64 = 0xb6,
|
||||
F64CONVERTSI32 = 0xb7,
|
||||
F64CONVERTUI32 = 0xb8,
|
||||
F64CONVERTSI64 = 0xb9,
|
||||
F64CONVERTUI64 = 0xba,
|
||||
F64PROMOTEF32 = 0xbb,
|
||||
|
||||
I32REINTERPRETF32 = 0xbc,
|
||||
I64REINTERPRETF64 = 0xbd,
|
||||
F32REINTERPRETI32 = 0xbe,
|
||||
F64REINTERPRETI64 = 0xbf,
|
||||
}
|
||||
|
||||
/// The format of the *immediate* operands of an operator
|
||||
/// Immediates appear directly in the byte stream after the opcode,
|
||||
/// rather than being popped off the value stack. These are the possible forms.
|
||||
#[derive(Debug)]
|
||||
enum OpImmediates {
|
||||
NoImmediate,
|
||||
Byte1,
|
||||
Bytes4,
|
||||
Bytes8,
|
||||
Leb32x1,
|
||||
Leb64x1,
|
||||
Leb32x2,
|
||||
BrTable,
|
||||
}
|
||||
|
||||
fn immediates_for(op: OpCode) -> Result<OpImmediates, String> {
|
||||
use OpCode::*;
|
||||
use OpImmediates::*;
|
||||
|
||||
let imm = match op {
|
||||
UNREACHABLE => NoImmediate,
|
||||
NOP => NoImmediate,
|
||||
BLOCK | LOOP | IF => Byte1,
|
||||
ELSE => NoImmediate,
|
||||
END => NoImmediate,
|
||||
BR | BRIF => Leb32x1,
|
||||
BRTABLE => BrTable,
|
||||
RETURN => NoImmediate,
|
||||
CALL => Leb32x1,
|
||||
CALLINDIRECT => Leb32x2,
|
||||
DROP => NoImmediate,
|
||||
SELECT => NoImmediate,
|
||||
GETLOCAL | SETLOCAL | TEELOCAL => Leb32x1,
|
||||
GETGLOBAL | SETGLOBAL => Leb32x1,
|
||||
|
||||
I32LOAD | I64LOAD | F32LOAD | F64LOAD | I32LOAD8S | I32LOAD8U | I32LOAD16S | I32LOAD16U
|
||||
| I64LOAD8S | I64LOAD8U | I64LOAD16S | I64LOAD16U | I64LOAD32S | I64LOAD32U | I32STORE
|
||||
| I64STORE | F32STORE | F64STORE | I32STORE8 | I32STORE16 | I64STORE8 | I64STORE16
|
||||
| I64STORE32 => Leb32x2,
|
||||
|
||||
CURRENTMEMORY | GROWMEMORY => Byte1,
|
||||
|
||||
I32CONST => Leb32x1,
|
||||
I64CONST => Leb64x1,
|
||||
F32CONST => Bytes4,
|
||||
F64CONST => Bytes8,
|
||||
|
||||
I32EQZ | I32EQ | I32NE | I32LTS | I32LTU | I32GTS | I32GTU | I32LES | I32LEU | I32GES
|
||||
| I32GEU | I64EQZ | I64EQ | I64NE | I64LTS | I64LTU | I64GTS | I64GTU | I64LES | I64LEU
|
||||
| I64GES | I64GEU | F32EQ | F32NE | F32LT | F32GT | F32LE | F32GE | F64EQ | F64NE
|
||||
| F64LT | F64GT | F64LE | F64GE | I32CLZ | I32CTZ | I32POPCNT | I32ADD | I32SUB
|
||||
| I32MUL | I32DIVS | I32DIVU | I32REMS | I32REMU | I32AND | I32OR | I32XOR | I32SHL
|
||||
| I32SHRS | I32SHRU | I32ROTL | I32ROTR | I64CLZ | I64CTZ | I64POPCNT | I64ADD | I64SUB
|
||||
| I64MUL | I64DIVS | I64DIVU | I64REMS | I64REMU | I64AND | I64OR | I64XOR | I64SHL
|
||||
| I64SHRS | I64SHRU | I64ROTL | I64ROTR | F32ABS | F32NEG | F32CEIL | F32FLOOR
|
||||
| F32TRUNC | F32NEAREST | F32SQRT | F32ADD | F32SUB | F32MUL | F32DIV | F32MIN | F32MAX
|
||||
| F32COPYSIGN | F64ABS | F64NEG | F64CEIL | F64FLOOR | F64TRUNC | F64NEAREST | F64SQRT
|
||||
| F64ADD | F64SUB | F64MUL | F64DIV | F64MIN | F64MAX | F64COPYSIGN | I32WRAPI64
|
||||
| I32TRUNCSF32 | I32TRUNCUF32 | I32TRUNCSF64 | I32TRUNCUF64 | I64EXTENDSI32
|
||||
| I64EXTENDUI32 | I64TRUNCSF32 | I64TRUNCUF32 | I64TRUNCSF64 | I64TRUNCUF64
|
||||
| F32CONVERTSI32 | F32CONVERTUI32 | F32CONVERTSI64 | F32CONVERTUI64 | F32DEMOTEF64
|
||||
| F64CONVERTSI32 | F64CONVERTUI32 | F64CONVERTSI64 | F64CONVERTUI64 | F64PROMOTEF32
|
||||
| I32REINTERPRETF32 | I64REINTERPRETF64 | F32REINTERPRETI32 | F64REINTERPRETI64 => {
|
||||
NoImmediate
|
||||
}
|
||||
|
||||
// Catch-all in case of an invalid cast from u8 to OpCode while parsing binary
|
||||
// (rustc keeps this code, I verified in Compiler Explorer)
|
||||
#[allow(unreachable_patterns)]
|
||||
_ => return Err(format!("Unknown Wasm instruction 0x{:02x}", op as u8)),
|
||||
};
|
||||
|
||||
Ok(imm)
|
||||
}
|
||||
|
||||
impl SkipBytes for OpCode {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
use OpImmediates::*;
|
||||
|
||||
let opcode_byte: u8 = bytes[*cursor];
|
||||
|
||||
let opcode: OpCode = unsafe { std::mem::transmute(opcode_byte) };
|
||||
// will return Err if transmute was invalid
|
||||
let immediates = immediates_for(opcode).map_err(|message| ParseError {
|
||||
message,
|
||||
offset: *cursor,
|
||||
})?;
|
||||
|
||||
match immediates {
|
||||
NoImmediate => {
|
||||
*cursor += 1;
|
||||
}
|
||||
Byte1 => {
|
||||
*cursor += 1 + 1;
|
||||
}
|
||||
Bytes4 => {
|
||||
*cursor += 1 + 4;
|
||||
}
|
||||
Bytes8 => {
|
||||
*cursor += 1 + 8;
|
||||
}
|
||||
Leb32x1 => {
|
||||
*cursor += 1;
|
||||
u32::skip_bytes(bytes, cursor)?;
|
||||
}
|
||||
Leb64x1 => {
|
||||
*cursor += 1;
|
||||
u64::skip_bytes(bytes, cursor)?;
|
||||
}
|
||||
Leb32x2 => {
|
||||
*cursor += 1;
|
||||
u32::skip_bytes(bytes, cursor)?;
|
||||
u32::skip_bytes(bytes, cursor)?;
|
||||
}
|
||||
BrTable => {
|
||||
*cursor += 1;
|
||||
let n_labels = 1 + u32::parse((), bytes, cursor)?;
|
||||
for _ in 0..n_labels {
|
||||
u32::skip_bytes(bytes, cursor)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,241 +0,0 @@
|
|||
use super::serialize::MAX_SIZE_ENCODED_U32;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
||||
/// Parse serialized bytes into a data structure
|
||||
/// Specific parsers may need contextual data from other parts of the .wasm file
|
||||
pub trait Parse<ParseContext>: Sized {
|
||||
fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError {
|
||||
pub offset: usize,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format
|
||||
/// Return the integer itself and the offset after it ends
|
||||
fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), ()> {
|
||||
let mut value = 0;
|
||||
let mut shift = 0;
|
||||
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
|
||||
value += ((byte & 0x7f) as u32) << shift;
|
||||
if (byte & 0x80) == 0 {
|
||||
return Ok((value, i + 1));
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
|
||||
impl Parse<()> for u32 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
match decode_u32(&bytes[*cursor..]) {
|
||||
Ok((value, len)) => {
|
||||
*cursor += len;
|
||||
Ok(value)
|
||||
}
|
||||
Err(()) => Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!(
|
||||
"Failed to decode u32 as LEB-128 from bytes: {:2x?}",
|
||||
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<()> for u8 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let byte = bytes[*cursor];
|
||||
*cursor += 1;
|
||||
Ok(byte)
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode a signed 32-bit integer from the provided buffer in LEB-128 format
|
||||
/// Return the integer itself and the offset after it ends
|
||||
fn decode_i32(bytes: &[u8]) -> Result<(i32, usize), ()> {
|
||||
let mut value = 0;
|
||||
let mut shift = 0;
|
||||
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
|
||||
value |= ((byte & 0x7f) as i32) << shift;
|
||||
if (byte & 0x80) == 0 {
|
||||
let is_negative = byte & 0x40 != 0;
|
||||
if shift < MAX_SIZE_ENCODED_U32 && is_negative {
|
||||
value |= -1 << shift;
|
||||
}
|
||||
return Ok((value, i + 1));
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
|
||||
impl Parse<()> for i32 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
match decode_i32(&bytes[*cursor..]) {
|
||||
Ok((value, len)) => {
|
||||
*cursor += len;
|
||||
Ok(value)
|
||||
}
|
||||
Err(()) => Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!(
|
||||
"Failed to decode i32 as LEB-128 from bytes: {:2x?}",
|
||||
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for &'a str {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let end = *cursor + len as usize;
|
||||
let bytes: &[u8] = &bytes[*cursor..end];
|
||||
let copy = arena.alloc_slice_copy(bytes);
|
||||
let s = unsafe { std::str::from_utf8_unchecked(copy) };
|
||||
*cursor = end;
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_variable_size_items<'a, T>(
|
||||
arena: &'a Bump,
|
||||
bytes: &[u8],
|
||||
cursor: &mut usize,
|
||||
) -> Result<Vec<'a, T>, ParseError>
|
||||
where
|
||||
T: Parse<&'a Bump>,
|
||||
{
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
|
||||
for _ in 0..len {
|
||||
let item = T::parse(arena, bytes, cursor)?;
|
||||
vector.push(item);
|
||||
}
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
pub fn parse_fixed_size_items<'a, T>(
|
||||
arena: &'a Bump,
|
||||
bytes: &[u8],
|
||||
cursor: &mut usize,
|
||||
) -> Result<Vec<'a, T>, ParseError>
|
||||
where
|
||||
T: Parse<()>,
|
||||
{
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
|
||||
for _ in 0..len {
|
||||
let item = T::parse((), bytes, cursor)?;
|
||||
vector.push(item);
|
||||
}
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
/// Skip over serialized bytes for a type
|
||||
/// This may, or may not, require looking at the byte values
|
||||
pub trait SkipBytes: Sized {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError>;
|
||||
}
|
||||
|
||||
impl SkipBytes for u32 {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
const MAX_LEN: usize = 5;
|
||||
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
|
||||
if byte & 0x80 == 0 {
|
||||
*cursor = i + 1;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: "Invalid LEB encoding".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl SkipBytes for u64 {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
const MAX_LEN: usize = 10;
|
||||
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
|
||||
if byte & 0x80 == 0 {
|
||||
*cursor = i + 1;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: "Invalid LEB encoding".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl SkipBytes for u8 {
|
||||
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
*cursor += 1;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str!
|
||||
impl SkipBytes for String {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
|
||||
if false {
|
||||
let str_bytes = &bytes[*cursor..(*cursor + len as usize)];
|
||||
println!(
|
||||
"Skipping string {:?}",
|
||||
std::str::from_utf8(str_bytes).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
*cursor += len as usize;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::wasm_module::parse::decode_u32;
|
||||
|
||||
#[test]
|
||||
fn test_decode_u32() {
|
||||
assert_eq!(decode_u32(&[0]), Ok((0, 1)));
|
||||
assert_eq!(decode_u32(&[64]), Ok((64, 1)));
|
||||
assert_eq!(decode_u32(&[0x7f]), Ok((0x7f, 1)));
|
||||
assert_eq!(decode_u32(&[0x80, 0x01]), Ok((0x80, 2)));
|
||||
assert_eq!(decode_u32(&[0xff, 0x7f]), Ok((0x3fff, 2)));
|
||||
assert_eq!(decode_u32(&[0x80, 0x80, 0x01]), Ok((0x4000, 3)));
|
||||
assert_eq!(
|
||||
decode_u32(&[0xff, 0xff, 0xff, 0xff, 0x0f]),
|
||||
Ok((u32::MAX, MAX_SIZE_ENCODED_U32))
|
||||
);
|
||||
assert!(matches!(decode_u32(&[0x80; 6]), Err(_)));
|
||||
assert!(matches!(decode_u32(&[0x80; 2]), Err(_)));
|
||||
assert!(matches!(decode_u32(&[]), Err(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u32_sequence() {
|
||||
let bytes = &[0, 0x80, 0x01, 0xff, 0xff, 0xff, 0xff, 0x0f];
|
||||
let expected = [0, 128, u32::MAX];
|
||||
let mut cursor = 0;
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[0]);
|
||||
assert_eq!(cursor, 1);
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[1]);
|
||||
assert_eq!(cursor, 3);
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[2]);
|
||||
assert_eq!(cursor, 8);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,398 +0,0 @@
|
|||
use bumpalo::collections::vec::Vec;
|
||||
use std::fmt::Debug;
|
||||
|
||||
/// In the WebAssembly binary format, all integers are variable-length encoded (using LEB-128)
|
||||
/// A small value like 3 or 100 is encoded as 1 byte. The value 128 needs 2 bytes, etc.
|
||||
/// In practice, this saves space, since small numbers used more often than large numbers.
|
||||
/// Of course there is a price for this - an encoded U32 can be up to 5 bytes wide.
|
||||
pub const MAX_SIZE_ENCODED_U32: usize = 5;
|
||||
|
||||
pub(super) trait Serialize {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T);
|
||||
}
|
||||
|
||||
impl Serialize for str {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.encode_u32(self.len() as u32);
|
||||
buffer.append_slice(self.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for &str {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.encode_u32(self.len() as u32);
|
||||
buffer.append_slice(self.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for u8 {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.append_u8(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for u32 {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.encode_u32(*self);
|
||||
}
|
||||
}
|
||||
|
||||
// Unit is used as a placeholder in parts of the Wasm spec we don't use yet
|
||||
impl Serialize for () {
|
||||
#[inline(always)]
|
||||
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {}
|
||||
}
|
||||
|
||||
impl<S: Serialize> Serialize for [S] {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.encode_u32(self.len() as u32);
|
||||
for item in self.iter() {
|
||||
item.serialize(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Vec<'_, u8> {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
buffer.encode_u32(self.len() as u32);
|
||||
buffer.append_slice(self);
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Serialize> Serialize for Option<S> {
|
||||
/// serialize Option as a vector of length 1 or 0
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
match self {
|
||||
Some(x) => {
|
||||
buffer.append_u8(1);
|
||||
x.serialize(buffer);
|
||||
}
|
||||
None => {
|
||||
buffer.append_u8(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Serialize, B: Serialize> Serialize for (A, B) {
|
||||
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
|
||||
self.0.serialize(buffer);
|
||||
self.1.serialize(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length
|
||||
///
|
||||
/// All integers in Wasm are variable-length encoded, which saves space for small values.
|
||||
/// The most significant bit indicates "more bytes are coming", and the other 7 are payload.
|
||||
macro_rules! encode_uleb128 {
|
||||
($name: ident, $ty: ty) => {
|
||||
fn $name(&mut self, value: $ty) -> usize {
|
||||
let mut x = value;
|
||||
let start_len = self.size();
|
||||
while x >= 0x80 {
|
||||
self.append_u8(0x80 | ((x & 0x7f) as u8));
|
||||
x >>= 7;
|
||||
}
|
||||
self.append_u8(x as u8);
|
||||
self.size() - start_len
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Write a signed integer into the provided buffer in LEB-128 format, returning byte length
|
||||
macro_rules! encode_sleb128 {
|
||||
($name: ident, $ty: ty) => {
|
||||
fn $name(&mut self, value: $ty) -> usize {
|
||||
let mut x = value;
|
||||
let start_len = self.size();
|
||||
loop {
|
||||
let byte = (x & 0x7f) as u8;
|
||||
x >>= 7;
|
||||
let byte_is_negative = (byte & 0x40) != 0;
|
||||
if ((x == 0 && !byte_is_negative) || (x == -1 && byte_is_negative)) {
|
||||
self.append_u8(byte);
|
||||
break;
|
||||
}
|
||||
self.append_u8(byte | 0x80);
|
||||
}
|
||||
self.size() - start_len
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! write_unencoded {
|
||||
($name: ident, $ty: ty) => {
|
||||
/// write an unencoded little-endian integer (only used in relocations)
|
||||
fn $name(&mut self, value: $ty) {
|
||||
let mut x = value;
|
||||
let size = std::mem::size_of::<$ty>();
|
||||
for _ in 0..size {
|
||||
self.append_u8((x & 0xff) as u8);
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// For relocations
|
||||
pub fn overwrite_padded_i32(buffer: &mut [u8], value: i32) {
|
||||
let mut x = value;
|
||||
for byte in buffer.iter_mut().take(4) {
|
||||
*byte = 0x80 | ((x & 0x7f) as u8);
|
||||
x >>= 7;
|
||||
}
|
||||
buffer[4] = (x & 0x7f) as u8;
|
||||
}
|
||||
|
||||
pub fn overwrite_padded_u32(buffer: &mut [u8], value: u32) {
|
||||
let mut x = value;
|
||||
for byte in buffer.iter_mut().take(4) {
|
||||
*byte = 0x80 | ((x & 0x7f) as u8);
|
||||
x >>= 7;
|
||||
}
|
||||
buffer[4] = x as u8;
|
||||
}
|
||||
|
||||
pub trait SerialBuffer: Debug {
|
||||
fn append_u8(&mut self, b: u8);
|
||||
fn overwrite_u8(&mut self, index: usize, b: u8);
|
||||
fn append_slice(&mut self, b: &[u8]);
|
||||
|
||||
fn size(&self) -> usize;
|
||||
|
||||
encode_uleb128!(encode_u32, u32);
|
||||
encode_uleb128!(encode_u64, u64);
|
||||
encode_sleb128!(encode_i32, i32);
|
||||
encode_sleb128!(encode_i64, i64);
|
||||
|
||||
fn reserve_padded_u32(&mut self) -> usize;
|
||||
fn encode_padded_u32(&mut self, value: u32) -> usize;
|
||||
fn overwrite_padded_u32(&mut self, index: usize, value: u32);
|
||||
|
||||
fn encode_f32(&mut self, value: f32) {
|
||||
self.write_unencoded_u32(value.to_bits());
|
||||
}
|
||||
|
||||
fn encode_f64(&mut self, value: f64) {
|
||||
self.write_unencoded_u64(value.to_bits());
|
||||
}
|
||||
|
||||
// methods for relocations
|
||||
write_unencoded!(write_unencoded_u32, u32);
|
||||
write_unencoded!(write_unencoded_u64, u64);
|
||||
}
|
||||
|
||||
impl SerialBuffer for std::vec::Vec<u8> {
|
||||
fn append_u8(&mut self, b: u8) {
|
||||
self.push(b);
|
||||
}
|
||||
fn overwrite_u8(&mut self, index: usize, b: u8) {
|
||||
self[index] = b;
|
||||
}
|
||||
fn append_slice(&mut self, b: &[u8]) {
|
||||
self.extend_from_slice(b);
|
||||
}
|
||||
fn size(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
fn reserve_padded_u32(&mut self) -> usize {
|
||||
let index = self.len();
|
||||
self.resize(index + MAX_SIZE_ENCODED_U32, 0xff);
|
||||
index
|
||||
}
|
||||
fn encode_padded_u32(&mut self, value: u32) -> usize {
|
||||
let index = self.len();
|
||||
let new_len = index + MAX_SIZE_ENCODED_U32;
|
||||
self.resize(new_len, 0);
|
||||
overwrite_padded_u32(&mut self[index..new_len], value);
|
||||
index
|
||||
}
|
||||
fn overwrite_padded_u32(&mut self, index: usize, value: u32) {
|
||||
overwrite_padded_u32(&mut self[index..(index + MAX_SIZE_ENCODED_U32)], value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SerialBuffer for Vec<'a, u8> {
|
||||
fn append_u8(&mut self, b: u8) {
|
||||
self.push(b);
|
||||
}
|
||||
fn overwrite_u8(&mut self, index: usize, b: u8) {
|
||||
self[index] = b;
|
||||
}
|
||||
fn append_slice(&mut self, b: &[u8]) {
|
||||
self.extend_from_slice(b);
|
||||
}
|
||||
fn size(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
fn reserve_padded_u32(&mut self) -> usize {
|
||||
let index = self.len();
|
||||
self.resize(index + MAX_SIZE_ENCODED_U32, 0xff);
|
||||
index
|
||||
}
|
||||
fn encode_padded_u32(&mut self, value: u32) -> usize {
|
||||
let index = self.len();
|
||||
let new_len = index + MAX_SIZE_ENCODED_U32;
|
||||
self.resize(new_len, 0);
|
||||
overwrite_padded_u32(&mut self[index..new_len], value);
|
||||
index
|
||||
}
|
||||
fn overwrite_padded_u32(&mut self, index: usize, value: u32) {
|
||||
overwrite_padded_u32(&mut self[index..(index + MAX_SIZE_ENCODED_U32)], value);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use bumpalo::{self, collections::Vec, Bump};
|
||||
|
||||
fn help_u32(arena: &Bump, value: u32) -> Vec<'_, u8> {
|
||||
let mut buffer = Vec::with_capacity_in(MAX_SIZE_ENCODED_U32, arena);
|
||||
buffer.encode_u32(value);
|
||||
buffer
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_u32() {
|
||||
let a = &Bump::new();
|
||||
assert_eq!(help_u32(a, 0), &[0]);
|
||||
assert_eq!(help_u32(a, 64), &[64]);
|
||||
assert_eq!(help_u32(a, 0x7f), &[0x7f]);
|
||||
assert_eq!(help_u32(a, 0x80), &[0x80, 0x01]);
|
||||
assert_eq!(help_u32(a, 0x3fff), &[0xff, 0x7f]);
|
||||
assert_eq!(help_u32(a, 0x4000), &[0x80, 0x80, 0x01]);
|
||||
assert_eq!(help_u32(a, u32::MAX), &[0xff, 0xff, 0xff, 0xff, 0x0f]);
|
||||
}
|
||||
|
||||
fn help_u64(arena: &Bump, value: u64) -> Vec<'_, u8> {
|
||||
let mut buffer = Vec::with_capacity_in(10, arena);
|
||||
buffer.encode_u64(value);
|
||||
buffer
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_u64() {
|
||||
let a = &Bump::new();
|
||||
assert_eq!(help_u64(a, 0), &[0]);
|
||||
assert_eq!(help_u64(a, 64), &[64]);
|
||||
assert_eq!(help_u64(a, 0x7f), &[0x7f]);
|
||||
assert_eq!(help_u64(a, 0x80), &[0x80, 0x01]);
|
||||
assert_eq!(help_u64(a, 0x3fff), &[0xff, 0x7f]);
|
||||
assert_eq!(help_u64(a, 0x4000), &[0x80, 0x80, 0x01]);
|
||||
assert_eq!(
|
||||
help_u64(a, u64::MAX),
|
||||
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01],
|
||||
);
|
||||
}
|
||||
|
||||
fn help_i32(arena: &Bump, value: i32) -> Vec<'_, u8> {
|
||||
let mut buffer = Vec::with_capacity_in(MAX_SIZE_ENCODED_U32, arena);
|
||||
buffer.encode_i32(value);
|
||||
buffer
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_i32() {
|
||||
let a = &Bump::new();
|
||||
assert_eq!(help_i32(a, 0), &[0]);
|
||||
assert_eq!(help_i32(a, 1), &[1]);
|
||||
assert_eq!(help_i32(a, -1), &[0x7f]);
|
||||
assert_eq!(help_i32(a, 63), &[63]);
|
||||
assert_eq!(help_i32(a, 64), &[0xc0, 0x0]);
|
||||
assert_eq!(help_i32(a, -64), &[0x40]);
|
||||
assert_eq!(help_i32(a, -65), &[0xbf, 0x7f]);
|
||||
assert_eq!(help_i32(a, i32::MAX), &[0xff, 0xff, 0xff, 0xff, 0x07]);
|
||||
assert_eq!(help_i32(a, i32::MIN), &[0x80, 0x80, 0x80, 0x80, 0x78]);
|
||||
}
|
||||
|
||||
fn help_i64(arena: &Bump, value: i64) -> Vec<'_, u8> {
|
||||
let mut buffer = Vec::with_capacity_in(10, arena);
|
||||
buffer.encode_i64(value);
|
||||
buffer
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_i64() {
|
||||
let a = &Bump::new();
|
||||
assert_eq!(help_i64(a, 0), &[0]);
|
||||
assert_eq!(help_i64(a, 1), &[1]);
|
||||
assert_eq!(help_i64(a, -1), &[0x7f]);
|
||||
assert_eq!(help_i64(a, 63), &[63]);
|
||||
assert_eq!(help_i64(a, 64), &[0xc0, 0x0]);
|
||||
assert_eq!(help_i64(a, -64), &[0x40]);
|
||||
assert_eq!(help_i64(a, -65), &[0xbf, 0x7f]);
|
||||
assert_eq!(
|
||||
help_i64(a, i64::MAX),
|
||||
&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00],
|
||||
);
|
||||
assert_eq!(
|
||||
help_i64(a, i64::MIN),
|
||||
&[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overwrite_u32_padded() {
|
||||
let mut buffer = [0, 0, 0, 0, 0];
|
||||
|
||||
overwrite_padded_u32(&mut buffer, u32::MAX);
|
||||
assert_eq!(buffer, [0xff, 0xff, 0xff, 0xff, 0x0f]);
|
||||
|
||||
overwrite_padded_u32(&mut buffer, 0);
|
||||
assert_eq!(buffer, [0x80, 0x80, 0x80, 0x80, 0x00]);
|
||||
|
||||
overwrite_padded_u32(&mut buffer, 127);
|
||||
assert_eq!(buffer, [0xff, 0x80, 0x80, 0x80, 0x00]);
|
||||
|
||||
overwrite_padded_u32(&mut buffer, 128);
|
||||
assert_eq!(buffer, [0x80, 0x81, 0x80, 0x80, 0x00]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_unencoded_u32() {
|
||||
let mut buffer = std::vec::Vec::with_capacity(4);
|
||||
|
||||
buffer.write_unencoded_u32(0);
|
||||
assert_eq!(buffer, &[0, 0, 0, 0]);
|
||||
|
||||
buffer.clear();
|
||||
buffer.write_unencoded_u32(u32::MAX);
|
||||
assert_eq!(buffer, &[0xff, 0xff, 0xff, 0xff]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_unencoded_u64() {
|
||||
let mut buffer = std::vec::Vec::with_capacity(8);
|
||||
|
||||
buffer.write_unencoded_u64(0);
|
||||
assert_eq!(buffer, &[0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
|
||||
buffer.clear();
|
||||
buffer.write_unencoded_u64(u64::MAX);
|
||||
assert_eq!(buffer, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
|
||||
}
|
||||
|
||||
fn help_pad_i32(val: i32) -> [u8; MAX_SIZE_ENCODED_U32] {
|
||||
let mut buffer = [0; MAX_SIZE_ENCODED_U32];
|
||||
overwrite_padded_i32(&mut buffer, val);
|
||||
buffer
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_padded_i32() {
|
||||
assert_eq!(help_pad_i32(0), [0x80, 0x80, 0x80, 0x80, 0x00]);
|
||||
assert_eq!(help_pad_i32(1), [0x81, 0x80, 0x80, 0x80, 0x00]);
|
||||
assert_eq!(help_pad_i32(-1), [0xff, 0xff, 0xff, 0xff, 0x7f]);
|
||||
assert_eq!(help_pad_i32(i32::MAX), [0xff, 0xff, 0xff, 0xff, 0x07]);
|
||||
assert_eq!(help_pad_i32(i32::MIN), [0x80, 0x80, 0x80, 0x80, 0x78]);
|
||||
|
||||
let mut buffer = [0xff; 10];
|
||||
overwrite_padded_i32(&mut buffer[2..], 0);
|
||||
assert_eq!(
|
||||
buffer,
|
||||
[0xff, 0xff, 0x80, 0x80, 0x80, 0x80, 0x00, 0xff, 0xff, 0xff]
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue