Move code_builder from wasm_module to gen_wasm

This commit is contained in:
Brian Carroll 2022-11-14 09:18:53 +00:00
parent c8f949d546
commit 4dea82b2f5
No known key found for this signature in database
GPG key ID: 5C7B2EC4101703C0
8 changed files with 169 additions and 192 deletions

View file

@ -1,940 +0,0 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use core::panic;
use roc_error_macros::internal_error;
use roc_module::symbol::Symbol;
use super::opcodes::{OpCode, OpCode::*};
use super::serialize::{SerialBuffer, Serialize};
use crate::{
round_up_to_alignment, DEBUG_SETTINGS, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID,
};
macro_rules! log_instruction {
($($x: expr),+) => {
if DEBUG_SETTINGS.instructions { println!($($x,)*); }
};
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
/// Wasm value type. (Rust representation matches Wasm encoding)
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ValueType {
I32 = 0x7f,
I64 = 0x7e,
F32 = 0x7d,
F64 = 0x7c,
}
impl Serialize for ValueType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self as u8);
}
}
impl From<u8> for ValueType {
fn from(x: u8) -> Self {
match x {
0x7f => Self::I32,
0x7e => Self::I64,
0x7d => Self::F32,
0x7c => Self::F64,
_ => internal_error!("Invalid ValueType 0x{:02x}", x),
}
}
}
const BLOCK_NO_RESULT: u8 = 0x40;
/// A control block in our model of the VM
/// Child blocks cannot "see" values from their parent block
struct VmBlock<'a> {
/// opcode indicating what kind of block this is
opcode: OpCode,
/// the stack of values for this block
value_stack: Vec<'a, Symbol>,
}
impl std::fmt::Debug for VmBlock<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{:?} {:?}", self.opcode, self.value_stack))
}
}
/// Wasm memory alignment for load/store instructions.
/// Rust representation matches Wasm encoding.
/// It's an error to specify alignment higher than the "natural" alignment of the instruction
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
pub enum Align {
Bytes1 = 0,
Bytes2 = 1,
Bytes4 = 2,
Bytes8 = 3,
}
impl Align {
/// Calculate the largest possible alignment for a load/store at a given stack frame offset
/// Assumes the stack frame is aligned to at least 8 bytes
pub fn from_stack_offset(max_align: Align, offset: u32) -> Align {
if (max_align == Align::Bytes8) && (offset & 7 == 0) {
return Align::Bytes8;
}
if (max_align >= Align::Bytes4) && (offset & 3 == 0) {
return Align::Bytes4;
}
if (max_align >= Align::Bytes2) && (offset & 1 == 0) {
return Align::Bytes2;
}
Align::Bytes1
}
}
impl From<u32> for Align {
fn from(x: u32) -> Align {
match x {
1 => Align::Bytes1,
2 => Align::Bytes2,
4 => Align::Bytes4,
_ => {
if x.count_ones() == 1 {
Align::Bytes8 // Max value supported by any Wasm instruction
} else {
internal_error!("Cannot align to {} bytes", x);
}
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum VmSymbolState {
/// Value doesn't exist yet
NotYetPushed,
/// Value has been pushed onto the VM stack but not yet popped
/// Remember where it was pushed, in case we need to insert another instruction there later
Pushed { pushed_at: usize },
/// Value has been pushed and popped, so it's not on the VM stack any more.
/// If we want to use it again later, we will have to create a local for it,
/// by going back to insert a local.tee instruction at pushed_at
Popped { pushed_at: usize },
}
// An instruction (local.set or local.tee) to be inserted into the function code
#[derive(Debug)]
struct Insertion {
at: usize,
start: usize,
end: usize,
}
macro_rules! instruction_no_args {
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
pub fn $method_name(&mut self) {
self.inst($opcode, $pops, $push);
}
};
}
macro_rules! instruction_memargs {
($method_name: ident, $opcode: expr, $pops: expr, $push: expr) => {
pub fn $method_name(&mut self, align: Align, offset: u32) {
self.inst_mem($opcode, $pops, $push, align, offset);
}
};
}
#[derive(Debug)]
pub struct CodeBuilder<'a> {
pub arena: &'a Bump,
/// The main container for the instructions
code: Vec<'a, u8>,
/// Instruction bytes to be inserted into the code when finalizing the function
/// (Used for setting locals when we realise they are used multiple times)
insert_bytes: Vec<'a, u8>,
/// Code locations where the insert_bytes should go
insertions: Vec<'a, Insertion>,
/// Bytes for local variable declarations and stack-frame setup code.
/// We can't write this until we've finished the main code. But it goes
/// before it in the final output, so we need a separate vector.
preamble: Vec<'a, u8>,
/// Encoded bytes for the inner length of the function, locals + code.
/// ("inner" because it doesn't include its own length!)
/// Again, we can't write this until we've finished the code and preamble,
/// but it goes before them in the binary, so it's a separate vector.
inner_length: Vec<'a, u8>,
/// Our simulation model of the Wasm stack machine
/// Nested blocks of instructions. A child block can't "see" the stack of its parent block
vm_block_stack: Vec<'a, VmBlock<'a>>,
/// Relocations for calls to JS imports
/// When we remove unused imports, the live ones are re-indexed
import_relocations: Vec<'a, (usize, u32)>,
}
impl<'a> Serialize for CodeBuilder<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.serialize_without_relocs(buffer);
}
}
#[allow(clippy::new_without_default)]
impl<'a> CodeBuilder<'a> {
pub fn new(arena: &'a Bump) -> Self {
let mut vm_block_stack = Vec::with_capacity_in(8, arena);
let function_block = VmBlock {
opcode: BLOCK,
value_stack: Vec::with_capacity_in(8, arena),
};
vm_block_stack.push(function_block);
CodeBuilder {
arena,
code: Vec::with_capacity_in(1024, arena),
insertions: Vec::with_capacity_in(32, arena),
insert_bytes: Vec::with_capacity_in(64, arena),
preamble: Vec::with_capacity_in(32, arena),
inner_length: Vec::with_capacity_in(5, arena),
vm_block_stack,
import_relocations: Vec::with_capacity_in(0, arena),
}
}
/**********************************************************
LINKING
***********************************************************/
/// Build a dummy function with just a single `unreachable` instruction
pub fn dummy(arena: &'a Bump) -> Self {
let mut builder = Self::new(arena);
builder.unreachable_();
builder.build_fn_header_and_footer(&[], 0, None);
builder
}
pub fn apply_import_relocs(&mut self, live_import_fns: &[usize]) {
for (code_index, fn_index) in self.import_relocations.iter() {
for (new_index, old_index) in live_import_fns.iter().enumerate() {
if *fn_index as usize == *old_index {
self.code
.overwrite_padded_u32(*code_index, new_index as u32);
}
}
}
}
/**********************************************************
SYMBOLS
The Wasm VM stores temporary values in its stack machine.
We track which stack positions correspond to IR Symbols,
because it helps to generate more efficient code.
***********************************************************/
fn current_stack(&self) -> &Vec<'a, Symbol> {
let block = self.vm_block_stack.last().unwrap();
&block.value_stack
}
fn current_stack_mut(&mut self) -> &mut Vec<'a, Symbol> {
let block = self.vm_block_stack.last_mut().unwrap();
&mut block.value_stack
}
/// Set the Symbol that is at the top of the VM stack right now
/// We will use this later when we need to load the Symbol
pub fn set_top_symbol(&mut self, sym: Symbol) -> VmSymbolState {
let current_stack = &mut self.vm_block_stack.last_mut().unwrap().value_stack;
let pushed_at = self.code.len();
let top_symbol: &mut Symbol = current_stack
.last_mut()
.unwrap_or_else(|| internal_error!("Empty stack when trying to set Symbol {:?}", sym));
*top_symbol = sym;
VmSymbolState::Pushed { pushed_at }
}
/// Verify if a sequence of symbols is at the top of the stack
pub fn verify_stack_match(&self, symbols: &[Symbol]) -> bool {
let current_stack = self.current_stack();
let n_symbols = symbols.len();
let stack_depth = current_stack.len();
if n_symbols > stack_depth {
return false;
}
let offset = stack_depth - n_symbols;
for (i, sym) in symbols.iter().enumerate() {
if current_stack[offset + i] != *sym {
return false;
}
}
true
}
fn add_insertion(&mut self, insert_at: usize, opcode: OpCode, immediate: u32) {
let start = self.insert_bytes.len();
self.insert_bytes.push(opcode as u8);
self.insert_bytes.encode_u32(immediate);
self.insertions.push(Insertion {
at: insert_at,
start,
end: self.insert_bytes.len(),
});
log_instruction!(
"**insert {:?} {} at byte offset {}**",
opcode,
immediate,
insert_at
);
}
/// Load a Symbol that is stored in the VM stack
/// If it's already at the top of the stack, no code will be generated.
/// Otherwise, local.set and local.get instructions will be inserted, using the LocalId provided.
///
/// If the return value is `Some(s)`, `s` should be stored by the caller, and provided in the next call.
/// If the return value is `None`, the Symbol is no longer stored in the VM stack, but in a local.
/// (In this case, the caller must remember to declare the local in the function header.)
pub fn load_symbol(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
next_local_id: LocalId,
) -> Option<VmSymbolState> {
use VmSymbolState::*;
match vm_state {
NotYetPushed => {
internal_error!("Symbol {:?} has no value yet. Nothing to load.", symbol)
}
Pushed { pushed_at } => {
match self.current_stack().last() {
Some(top_symbol) if *top_symbol == symbol => {
// We're lucky, the symbol is already on top of the current block's stack.
// No code to generate! (This reduces code size by up to 25% in tests.)
// Just let the caller know what happened
Some(Popped { pushed_at })
}
_ => {
// Symbol is not on top of the stack.
// We should have saved it to a local, so go back and do that now.
self.store_pushed_symbol_to_local(
symbol,
vm_state,
pushed_at,
next_local_id,
);
// Recover the value again at the current position
self.get_local(next_local_id);
self.set_top_symbol(symbol);
// This Symbol is no longer stored in the VM stack, but in a local
None
}
}
}
Popped { pushed_at } => {
// This Symbol is being used for a second time
// Insert a local.tee where it was pushed, so we don't interfere with the first usage
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
// Insert a local.get at the current position
self.get_local(next_local_id);
self.set_top_symbol(symbol);
// This symbol has been promoted to a Local
// Tell the caller it no longer has a VirtualMachineSymbolState
None
}
}
}
/// Go back and store a Symbol in a local variable, without loading it at the current position
pub fn store_symbol_to_local(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
next_local_id: LocalId,
) {
use VmSymbolState::*;
match vm_state {
NotYetPushed => {
// Nothing to do
}
Pushed { pushed_at } => {
self.store_pushed_symbol_to_local(symbol, vm_state, pushed_at, next_local_id)
}
Popped { pushed_at } => {
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
}
}
}
fn store_pushed_symbol_to_local(
&mut self,
symbol: Symbol,
vm_state: VmSymbolState,
pushed_at: usize,
local_id: LocalId,
) {
debug_assert!(matches!(vm_state, VmSymbolState::Pushed { .. }));
// Update our stack model at the position where we're going to set the SETLOCAL
let mut found = false;
for block in self.vm_block_stack.iter_mut() {
if let Some(found_index) = block.value_stack.iter().position(|&s| s == symbol) {
block.value_stack.remove(found_index);
found = true;
}
}
// Go back to the code position where it was pushed, and save it to a local
if found {
self.add_insertion(pushed_at, SETLOCAL, local_id.0);
} else {
if DEBUG_SETTINGS.instructions {
println!(
"{:?} has been popped implicitly. Leaving it on the stack.",
symbol
);
}
self.add_insertion(pushed_at, TEELOCAL, local_id.0);
}
}
/**********************************************************
FUNCTION HEADER
***********************************************************/
/// Generate bytes to declare the function's local variables
fn build_local_declarations(&mut self, local_types: &[ValueType]) {
// reserve one byte for num_batches
self.preamble.push(0);
if local_types.is_empty() {
return;
}
// Write declarations in batches of the same ValueType
let mut num_batches: u32 = 0;
let mut batch_type = local_types[0];
let mut batch_size = 0;
for t in local_types {
if *t == batch_type {
batch_size += 1;
} else {
self.preamble.encode_u32(batch_size);
self.preamble.push(batch_type as u8);
batch_type = *t;
batch_size = 1;
num_batches += 1;
}
}
self.preamble.encode_u32(batch_size);
self.preamble.push(batch_type as u8);
num_batches += 1;
// Go back and write the number of batches at the start
if num_batches < 128 {
self.preamble[0] = num_batches as u8;
} else {
// We need more than 1 byte to encode num_batches!
// This is a ridiculous edge case, so just pad to 5 bytes for simplicity
let old_len = self.preamble.len();
self.preamble.resize(old_len + 4, 0);
self.preamble.copy_within(1..old_len, 5);
self.preamble.overwrite_padded_u32(0, num_batches);
}
}
/// Generate instruction bytes to grab a frame of stack memory on entering the function
fn build_stack_frame_push(&mut self, frame_size: i32, frame_pointer: LocalId) {
// Can't use the usual instruction methods because they push to self.code.
// This is the only case where we push instructions somewhere different.
self.preamble.push(GETGLOBAL as u8);
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
self.preamble.push(I32CONST as u8);
self.preamble.encode_i32(frame_size);
self.preamble.push(I32SUB as u8);
self.preamble.push(TEELOCAL as u8);
self.preamble.encode_u32(frame_pointer.0);
self.preamble.push(SETGLOBAL as u8);
self.preamble.encode_u32(STACK_POINTER_GLOBAL_ID);
}
/// Generate instruction bytes to release a frame of stack memory on leaving the function
fn build_stack_frame_pop(&mut self, frame_size: i32, frame_pointer: LocalId) {
self.get_local(frame_pointer);
self.i32_const(frame_size);
self.i32_add();
self.set_global(STACK_POINTER_GLOBAL_ID);
}
/// Build the function header: local declarations, stack frame push/pop code, and function length
/// After this, all bytes have been generated (but not yet serialized) and we know the final size.
pub fn build_fn_header_and_footer(
&mut self,
local_types: &[ValueType],
frame_size: i32,
frame_pointer: Option<LocalId>,
) {
self.build_local_declarations(local_types);
if frame_size != 0 {
if let Some(frame_ptr_id) = frame_pointer {
let aligned_size = round_up_to_alignment!(frame_size, FRAME_ALIGNMENT_BYTES);
self.build_stack_frame_push(aligned_size, frame_ptr_id);
self.build_stack_frame_pop(aligned_size, frame_ptr_id); // footer
}
}
self.code.push(END as u8);
let inner_len = self.preamble.len() + self.code.len() + self.insert_bytes.len();
self.inner_length.encode_u32(inner_len as u32);
// Sort insertions. They are not created in order of assignment, but in order of *second* usage.
self.insertions.sort_by_key(|ins| ins.at);
}
/**********************************************************
SERIALIZE
***********************************************************/
pub fn size(&self) -> usize {
self.inner_length.len() + self.preamble.len() + self.code.len() + self.insert_bytes.len()
}
/// Serialize all byte vectors in the right order
/// Also update relocation offsets relative to the base offset (code section body start)
pub fn serialize_without_relocs<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_slice(&self.inner_length);
buffer.append_slice(&self.preamble);
let mut code_pos = 0;
for Insertion { at, start, end } in self.insertions.iter() {
buffer.append_slice(&self.code[code_pos..(*at)]);
buffer.append_slice(&self.insert_bytes[*start..*end]);
code_pos = *at;
}
buffer.append_slice(&self.code[code_pos..self.code.len()]);
}
/**********************************************************
INSTRUCTION HELPER METHODS
***********************************************************/
/// Base method for generating instructions
/// Emits the opcode and simulates VM stack push/pop
fn inst_base(&mut self, opcode: OpCode, pops: usize, push: bool) {
let current_stack = self.current_stack_mut();
let stack_size = current_stack.len();
debug_assert!(
stack_size >= pops,
"Wasm value stack underflow. Tried to pop {} but only {} available",
pops,
stack_size
);
let new_len = stack_size - pops as usize;
current_stack.truncate(new_len);
if push {
current_stack.push(Symbol::WASM_TMP);
}
self.code.push(opcode as u8);
}
/// Plain instruction without any immediates
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
self.inst_base(opcode, pops, push);
log_instruction!(
"{:10}\t\t{:?}",
format!("{:?}", opcode),
self.vm_block_stack
);
}
/// Block instruction
fn inst_block(&mut self, opcode: OpCode, pops: usize) {
self.inst_base(opcode, pops, false);
// We don't support block result types. Too hard to track types through arbitrary control flow.
self.code.push(BLOCK_NO_RESULT);
// Start a new block with a fresh value stack
self.vm_block_stack.push(VmBlock {
opcode,
value_stack: Vec::with_capacity_in(8, self.arena),
});
log_instruction!("{:10}\t{:?}", format!("{:?}", opcode), &self.vm_block_stack);
}
fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
self.inst_base(opcode, pops, push);
self.code.encode_u32(immediate);
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
immediate,
self.vm_block_stack
);
}
fn inst_mem(&mut self, opcode: OpCode, pops: usize, push: bool, align: Align, offset: u32) {
self.inst_base(opcode, pops, push);
self.code.push(align as u8);
self.code.encode_u32(offset);
log_instruction!(
"{:10} {:?} {}\t{:?}",
format!("{:?}", opcode),
align,
offset,
self.vm_block_stack
);
}
/**********************************************************
INSTRUCTION METHODS
One method for each Wasm instruction (in same order as the spec)
macros are for compactness & readability for the most common cases
Patterns that don't repeat very much don't have macros
***********************************************************/
instruction_no_args!(unreachable_, UNREACHABLE, 0, false);
instruction_no_args!(nop, NOP, 0, false);
pub fn block(&mut self) {
self.inst_block(BLOCK, 0);
}
pub fn loop_(&mut self) {
self.inst_block(LOOP, 0);
}
pub fn if_(&mut self) {
self.inst_block(IF, 1);
}
pub fn else_(&mut self) {
// Reuse the 'then' block but clear its value stack
self.current_stack_mut().clear();
self.inst(ELSE, 0, false);
}
pub fn end(&mut self) {
// We need to drop any unused values from the VM stack in order to pass Wasm validation.
// This happens, for example, in test `gen_tags::if_guard_exhaustiveness`
let n_unused = self
.vm_block_stack
.last()
.map(|block| block.value_stack.len())
.unwrap_or(0);
for _ in 0..n_unused {
self.drop_();
}
self.inst_base(END, 0, false);
self.vm_block_stack.pop();
log_instruction!("END \t\t{:?}", &self.vm_block_stack);
}
pub fn br(&mut self, levels: u32) {
self.inst_imm32(BR, 0, false, levels);
}
pub fn br_if(&mut self, levels: u32) {
// In dynamic execution, br_if can pop 2 values if condition is true and the target block has a result.
// But our stack model is for *static* analysis and we need it to be correct at the next instruction,
// where the branch was not taken. So we only pop 1 value, the condition.
self.inst_imm32(BRIF, 1, false, levels);
}
#[allow(dead_code)]
fn br_table() {
todo!("br instruction");
}
instruction_no_args!(return_, RETURN, 0, false);
pub fn call(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
self.call_impl(function_index, n_args, has_return_val, false)
}
pub fn call_import(&mut self, function_index: u32, n_args: usize, has_return_val: bool) {
self.call_impl(function_index, n_args, has_return_val, true)
}
#[inline(always)]
fn call_impl(
&mut self,
function_index: u32,
n_args: usize,
has_return_val: bool,
is_import: bool,
) {
self.inst_base(CALL, n_args, has_return_val);
if is_import {
self.import_relocations
.push((self.code.len(), function_index));
}
self.code.encode_padded_u32(function_index);
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", CALL),
function_index,
self.vm_block_stack
);
}
#[allow(dead_code)]
fn call_indirect() {
unimplemented!(
"There is no plan to implement call_indirect. Roc doesn't use function pointers"
);
}
instruction_no_args!(drop_, DROP, 1, false);
instruction_no_args!(select, SELECT, 3, true);
pub fn get_local(&mut self, id: LocalId) {
self.inst_imm32(GETLOCAL, 0, true, id.0);
}
pub fn set_local(&mut self, id: LocalId) {
self.inst_imm32(SETLOCAL, 1, false, id.0);
}
pub fn tee_local(&mut self, id: LocalId) {
self.inst_imm32(TEELOCAL, 0, false, id.0);
}
pub fn get_global(&mut self, id: u32) {
self.inst_imm32(GETGLOBAL, 0, true, id);
}
pub fn set_global(&mut self, id: u32) {
self.inst_imm32(SETGLOBAL, 1, false, id);
}
instruction_memargs!(i32_load, I32LOAD, 1, true);
instruction_memargs!(i64_load, I64LOAD, 1, true);
instruction_memargs!(f32_load, F32LOAD, 1, true);
instruction_memargs!(f64_load, F64LOAD, 1, true);
instruction_memargs!(i32_load8_s, I32LOAD8S, 1, true);
instruction_memargs!(i32_load8_u, I32LOAD8U, 1, true);
instruction_memargs!(i32_load16_s, I32LOAD16S, 1, true);
instruction_memargs!(i32_load16_u, I32LOAD16U, 1, true);
instruction_memargs!(i64_load8_s, I64LOAD8S, 1, true);
instruction_memargs!(i64_load8_u, I64LOAD8U, 1, true);
instruction_memargs!(i64_load16_s, I64LOAD16S, 1, true);
instruction_memargs!(i64_load16_u, I64LOAD16U, 1, true);
instruction_memargs!(i64_load32_s, I64LOAD32S, 1, true);
instruction_memargs!(i64_load32_u, I64LOAD32U, 1, true);
instruction_memargs!(i32_store, I32STORE, 2, false);
instruction_memargs!(i64_store, I64STORE, 2, false);
instruction_memargs!(f32_store, F32STORE, 2, false);
instruction_memargs!(f64_store, F64STORE, 2, false);
instruction_memargs!(i32_store8, I32STORE8, 2, false);
instruction_memargs!(i32_store16, I32STORE16, 2, false);
instruction_memargs!(i64_store8, I64STORE8, 2, false);
instruction_memargs!(i64_store16, I64STORE16, 2, false);
instruction_memargs!(i64_store32, I64STORE32, 2, false);
pub fn memory_size(&mut self) {
self.inst(CURRENTMEMORY, 0, true);
self.code.push(0);
}
pub fn memory_grow(&mut self) {
self.inst(GROWMEMORY, 1, true);
self.code.push(0);
}
fn log_const<T>(&self, opcode: OpCode, x: T)
where
T: std::fmt::Debug + std::fmt::Display,
{
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
x,
self.vm_block_stack
);
}
pub fn i32_const(&mut self, x: i32) {
self.inst_base(I32CONST, 0, true);
self.code.encode_i32(x);
self.log_const(I32CONST, x);
}
pub fn i64_const(&mut self, x: i64) {
self.inst_base(I64CONST, 0, true);
self.code.encode_i64(x);
self.log_const(I64CONST, x);
}
pub fn f32_const(&mut self, x: f32) {
self.inst_base(F32CONST, 0, true);
self.code.encode_f32(x);
self.log_const(F32CONST, x);
}
pub fn f64_const(&mut self, x: f64) {
self.inst_base(F64CONST, 0, true);
self.code.encode_f64(x);
self.log_const(F64CONST, x);
}
// TODO: Consider creating unified methods for numerical ops like 'eq' and 'add',
// passing the ValueType as an argument. Could simplify lowlevel code gen.
instruction_no_args!(i32_eqz, I32EQZ, 1, true);
instruction_no_args!(i32_eq, I32EQ, 2, true);
instruction_no_args!(i32_ne, I32NE, 2, true);
instruction_no_args!(i32_lt_s, I32LTS, 2, true);
instruction_no_args!(i32_lt_u, I32LTU, 2, true);
instruction_no_args!(i32_gt_s, I32GTS, 2, true);
instruction_no_args!(i32_gt_u, I32GTU, 2, true);
instruction_no_args!(i32_le_s, I32LES, 2, true);
instruction_no_args!(i32_le_u, I32LEU, 2, true);
instruction_no_args!(i32_ge_s, I32GES, 2, true);
instruction_no_args!(i32_ge_u, I32GEU, 2, true);
instruction_no_args!(i64_eqz, I64EQZ, 1, true);
instruction_no_args!(i64_eq, I64EQ, 2, true);
instruction_no_args!(i64_ne, I64NE, 2, true);
instruction_no_args!(i64_lt_s, I64LTS, 2, true);
instruction_no_args!(i64_lt_u, I64LTU, 2, true);
instruction_no_args!(i64_gt_s, I64GTS, 2, true);
instruction_no_args!(i64_gt_u, I64GTU, 2, true);
instruction_no_args!(i64_le_s, I64LES, 2, true);
instruction_no_args!(i64_le_u, I64LEU, 2, true);
instruction_no_args!(i64_ge_s, I64GES, 2, true);
instruction_no_args!(i64_ge_u, I64GEU, 2, true);
instruction_no_args!(f32_eq, F32EQ, 2, true);
instruction_no_args!(f32_ne, F32NE, 2, true);
instruction_no_args!(f32_lt, F32LT, 2, true);
instruction_no_args!(f32_gt, F32GT, 2, true);
instruction_no_args!(f32_le, F32LE, 2, true);
instruction_no_args!(f32_ge, F32GE, 2, true);
instruction_no_args!(f64_eq, F64EQ, 2, true);
instruction_no_args!(f64_ne, F64NE, 2, true);
instruction_no_args!(f64_lt, F64LT, 2, true);
instruction_no_args!(f64_gt, F64GT, 2, true);
instruction_no_args!(f64_le, F64LE, 2, true);
instruction_no_args!(f64_ge, F64GE, 2, true);
instruction_no_args!(i32_clz, I32CLZ, 1, true);
instruction_no_args!(i32_ctz, I32CTZ, 1, true);
instruction_no_args!(i32_popcnt, I32POPCNT, 1, true);
instruction_no_args!(i32_add, I32ADD, 2, true);
instruction_no_args!(i32_sub, I32SUB, 2, true);
instruction_no_args!(i32_mul, I32MUL, 2, true);
instruction_no_args!(i32_div_s, I32DIVS, 2, true);
instruction_no_args!(i32_div_u, I32DIVU, 2, true);
instruction_no_args!(i32_rem_s, I32REMS, 2, true);
instruction_no_args!(i32_rem_u, I32REMU, 2, true);
instruction_no_args!(i32_and, I32AND, 2, true);
instruction_no_args!(i32_or, I32OR, 2, true);
instruction_no_args!(i32_xor, I32XOR, 2, true);
instruction_no_args!(i32_shl, I32SHL, 2, true);
instruction_no_args!(i32_shr_s, I32SHRS, 2, true);
instruction_no_args!(i32_shr_u, I32SHRU, 2, true);
instruction_no_args!(i32_rotl, I32ROTL, 2, true);
instruction_no_args!(i32_rotr, I32ROTR, 2, true);
instruction_no_args!(i64_clz, I64CLZ, 1, true);
instruction_no_args!(i64_ctz, I64CTZ, 1, true);
instruction_no_args!(i64_popcnt, I64POPCNT, 1, true);
instruction_no_args!(i64_add, I64ADD, 2, true);
instruction_no_args!(i64_sub, I64SUB, 2, true);
instruction_no_args!(i64_mul, I64MUL, 2, true);
instruction_no_args!(i64_div_s, I64DIVS, 2, true);
instruction_no_args!(i64_div_u, I64DIVU, 2, true);
instruction_no_args!(i64_rem_s, I64REMS, 2, true);
instruction_no_args!(i64_rem_u, I64REMU, 2, true);
instruction_no_args!(i64_and, I64AND, 2, true);
instruction_no_args!(i64_or, I64OR, 2, true);
instruction_no_args!(i64_xor, I64XOR, 2, true);
instruction_no_args!(i64_shl, I64SHL, 2, true);
instruction_no_args!(i64_shr_s, I64SHRS, 2, true);
instruction_no_args!(i64_shr_u, I64SHRU, 2, true);
instruction_no_args!(i64_rotl, I64ROTL, 2, true);
instruction_no_args!(i64_rotr, I64ROTR, 2, true);
instruction_no_args!(f32_abs, F32ABS, 1, true);
instruction_no_args!(f32_neg, F32NEG, 1, true);
instruction_no_args!(f32_ceil, F32CEIL, 1, true);
instruction_no_args!(f32_floor, F32FLOOR, 1, true);
instruction_no_args!(f32_trunc, F32TRUNC, 1, true);
instruction_no_args!(f32_nearest, F32NEAREST, 1, true);
instruction_no_args!(f32_sqrt, F32SQRT, 1, true);
instruction_no_args!(f32_add, F32ADD, 2, true);
instruction_no_args!(f32_sub, F32SUB, 2, true);
instruction_no_args!(f32_mul, F32MUL, 2, true);
instruction_no_args!(f32_div, F32DIV, 2, true);
instruction_no_args!(f32_min, F32MIN, 2, true);
instruction_no_args!(f32_max, F32MAX, 2, true);
instruction_no_args!(f32_copysign, F32COPYSIGN, 2, true);
instruction_no_args!(f64_abs, F64ABS, 1, true);
instruction_no_args!(f64_neg, F64NEG, 1, true);
instruction_no_args!(f64_ceil, F64CEIL, 1, true);
instruction_no_args!(f64_floor, F64FLOOR, 1, true);
instruction_no_args!(f64_trunc, F64TRUNC, 1, true);
instruction_no_args!(f64_nearest, F64NEAREST, 1, true);
instruction_no_args!(f64_sqrt, F64SQRT, 1, true);
instruction_no_args!(f64_add, F64ADD, 2, true);
instruction_no_args!(f64_sub, F64SUB, 2, true);
instruction_no_args!(f64_mul, F64MUL, 2, true);
instruction_no_args!(f64_div, F64DIV, 2, true);
instruction_no_args!(f64_min, F64MIN, 2, true);
instruction_no_args!(f64_max, F64MAX, 2, true);
instruction_no_args!(f64_copysign, F64COPYSIGN, 2, true);
instruction_no_args!(i32_wrap_i64, I32WRAPI64, 1, true);
instruction_no_args!(i32_trunc_s_f32, I32TRUNCSF32, 1, true);
instruction_no_args!(i32_trunc_u_f32, I32TRUNCUF32, 1, true);
instruction_no_args!(i32_trunc_s_f64, I32TRUNCSF64, 1, true);
instruction_no_args!(i32_trunc_u_f64, I32TRUNCUF64, 1, true);
instruction_no_args!(i64_extend_s_i32, I64EXTENDSI32, 1, true);
instruction_no_args!(i64_extend_u_i32, I64EXTENDUI32, 1, true);
instruction_no_args!(i64_trunc_s_f32, I64TRUNCSF32, 1, true);
instruction_no_args!(i64_trunc_u_f32, I64TRUNCUF32, 1, true);
instruction_no_args!(i64_trunc_s_f64, I64TRUNCSF64, 1, true);
instruction_no_args!(i64_trunc_u_f64, I64TRUNCUF64, 1, true);
instruction_no_args!(f32_convert_s_i32, F32CONVERTSI32, 1, true);
instruction_no_args!(f32_convert_u_i32, F32CONVERTUI32, 1, true);
instruction_no_args!(f32_convert_s_i64, F32CONVERTSI64, 1, true);
instruction_no_args!(f32_convert_u_i64, F32CONVERTUI64, 1, true);
instruction_no_args!(f32_demote_f64, F32DEMOTEF64, 1, true);
instruction_no_args!(f64_convert_s_i32, F64CONVERTSI32, 1, true);
instruction_no_args!(f64_convert_u_i32, F64CONVERTUI32, 1, true);
instruction_no_args!(f64_convert_s_i64, F64CONVERTSI64, 1, true);
instruction_no_args!(f64_convert_u_i64, F64CONVERTUI64, 1, true);
instruction_no_args!(f64_promote_f32, F64PROMOTEF32, 1, true);
instruction_no_args!(i32_reinterpret_f32, I32REINTERPRETF32, 1, true);
instruction_no_args!(i64_reinterpret_f64, I64REINTERPRETF64, 1, true);
instruction_no_args!(f32_reinterpret_i32, F32REINTERPRETI32, 1, true);
instruction_no_args!(f64_reinterpret_i64, F64REINTERPRETI64, 1, true);
}

View file

@ -1,4 +1,3 @@
pub mod code_builder;
pub mod linking;
pub mod opcodes;
pub mod parse;
@ -7,8 +6,9 @@ pub mod serialize;
use std::iter::repeat;
pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
pub use linking::{OffsetRelocType, RelocationEntry, SymInfo};
use opcodes::OpCode;
use roc_error_macros::internal_error;
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature};
use bitvec::vec::BitVec;
@ -21,7 +21,7 @@ use self::sections::{
ImportDesc, ImportSection, MemorySection, NameSection, OpaqueSection, Section, SectionId,
TableSection, TypeSection,
};
use self::serialize::{SerialBuffer, Serialize};
pub use self::serialize::{SerialBuffer, Serialize};
pub const STACK_POINTER_GLOBAL_ID: u32 = 0;
pub const FRAME_ALIGNMENT_BYTES: i32 = 16;
@ -132,7 +132,7 @@ impl<'a> WasmModule<'a> {
if function.signatures.is_empty() {
module_errors.push_str("Missing Function section\n");
}
if code.preloaded_bytes.is_empty() {
if code.bytes.is_empty() {
module_errors.push_str("Missing Code section\n");
}
if linking.symbol_table.is_empty() {
@ -187,7 +187,7 @@ impl<'a> WasmModule<'a> {
let import_count = self.import.imports.len();
let host_fn_min = import_count as u32 + self.code.dead_import_dummy_count;
let host_fn_max = host_fn_min + self.code.preloaded_count;
let host_fn_max = host_fn_min + self.code.function_count;
// All functions exported to JS must be kept alive
let exported_fns = self
@ -278,39 +278,27 @@ impl<'a> WasmModule<'a> {
.linking
.find_and_reindex_imported_fn(old_index as u32, new_index as u32)
.unwrap();
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
sym_index,
new_index as u32,
);
}
// Relocate calls from Roc app to JS imports
for code_builder in self.code.code_builders.iter_mut() {
code_builder.apply_import_relocs(&live_import_fns);
self.reloc_code
.apply_relocs_u32(&mut self.code.bytes, sym_index, new_index as u32);
}
//
// Dead code elimination. Replace dead functions with tiny dummies.
// Live function indices are unchanged, so no relocations are needed.
//
let dummy = CodeBuilder::dummy(arena);
let mut dummy_bytes = Vec::with_capacity_in(dummy.size(), arena);
dummy.serialize(&mut dummy_bytes);
let mut buffer = Vec::with_capacity_in(self.code.preloaded_bytes.len(), arena);
self.code.preloaded_count.serialize(&mut buffer);
let mut buffer = Vec::with_capacity_in(self.code.bytes.len(), arena);
self.code.function_count.serialize(&mut buffer);
for (i, fn_index) in (host_fn_min..host_fn_max).enumerate() {
if live_flags[fn_index as usize] {
let code_start = self.code.preloaded_offsets[i] as usize;
let code_end = self.code.preloaded_offsets[i + 1] as usize;
buffer.extend_from_slice(&self.code.preloaded_bytes[code_start..code_end]);
let code_start = self.code.function_offsets[i] as usize;
let code_end = self.code.function_offsets[i + 1] as usize;
buffer.extend_from_slice(&self.code.bytes[code_start..code_end]);
} else {
buffer.extend_from_slice(&dummy_bytes);
buffer.extend_from_slice(&DUMMY_FUNCTION);
}
}
self.code.preloaded_bytes = buffer;
self.code.bytes = buffer;
}
fn trace_live_host_functions<I: Iterator<Item = u32>>(
@ -378,8 +366,8 @@ impl<'a> WasmModule<'a> {
// Find where the function body is
let offset_index = fn_index - host_fn_min as usize;
let code_start = self.code.preloaded_offsets[offset_index];
let code_end = self.code.preloaded_offsets[offset_index + 1];
let code_start = self.code.function_offsets[offset_index];
let code_end = self.code.function_offsets[offset_index + 1];
// For each call in the body
for (offset, symbol) in call_offsets_and_symbols.iter() {
@ -423,11 +411,8 @@ impl<'a> WasmModule<'a> {
self.linking
.find_internal_symbol(sym_name)
.map(|sym_index| {
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
sym_index as u32,
value,
);
self.reloc_code
.apply_relocs_u32(&mut self.code.bytes, sym_index as u32, value);
sym_index as u32
})
@ -494,11 +479,8 @@ impl<'a> WasmModule<'a> {
.unwrap();
// Update calls to use the app function instead of the host import
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
host_sym_index,
app_fn_index,
);
self.reloc_code
.apply_relocs_u32(&mut self.code.bytes, host_sym_index, app_fn_index);
if swap_import_index != host_import_index {
// get the name using the old host import index because we already swapped it!
@ -512,7 +494,7 @@ impl<'a> WasmModule<'a> {
// Update calls to the swapped JS import
self.reloc_code.apply_relocs_u32(
&mut self.code.preloaded_bytes,
&mut self.code.bytes,
swap_sym_index,
host_fn_index as u32,
);
@ -595,6 +577,89 @@ impl<'a> WasmModule<'a> {
}
}
/*******************************************************************
*
* Common types & utility functions
*
*******************************************************************/
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
/// Wasm value type. (Rust representation matches Wasm encoding)
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ValueType {
I32 = 0x7f,
I64 = 0x7e,
F32 = 0x7d,
F64 = 0x7c,
}
impl Serialize for ValueType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self as u8);
}
}
impl From<u8> for ValueType {
fn from(x: u8) -> Self {
match x {
0x7f => Self::I32,
0x7e => Self::I64,
0x7d => Self::F32,
0x7c => Self::F64,
_ => internal_error!("Invalid ValueType 0x{:02x}", x),
}
}
}
/// Wasm memory alignment for load/store instructions.
/// Rust representation matches Wasm encoding.
/// It's an error to specify alignment higher than the "natural" alignment of the instruction
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
pub enum Align {
Bytes1 = 0,
Bytes2 = 1,
Bytes4 = 2,
Bytes8 = 3,
}
impl Align {
/// Calculate the largest possible alignment for a load/store at a given stack frame offset
/// Assumes the stack frame is aligned to at least 8 bytes
pub fn from_stack_offset(max_align: Align, offset: u32) -> Align {
if (max_align == Align::Bytes8) && (offset & 7 == 0) {
return Align::Bytes8;
}
if (max_align >= Align::Bytes4) && (offset & 3 == 0) {
return Align::Bytes4;
}
if (max_align >= Align::Bytes2) && (offset & 1 == 0) {
return Align::Bytes2;
}
Align::Bytes1
}
}
impl From<u32> for Align {
fn from(x: u32) -> Align {
match x {
1 => Align::Bytes1,
2 => Align::Bytes2,
4 => Align::Bytes4,
_ => {
if x.count_ones() == 1 {
Align::Bytes8 // Max value supported by any Wasm instruction
} else {
internal_error!("Cannot align to {} bytes", x);
}
}
}
}
}
/// Round up to alignment_bytes (which must be a power of 2)
#[macro_export]
macro_rules! round_up_to_alignment {
@ -615,24 +680,20 @@ macro_rules! round_up_to_alignment {
};
}
/// Bytes for a dummy function with just a single `unreachable` instruction.
/// Used in dead code elimination to replace unused functions.
const DUMMY_FUNCTION: [u8; 4] = [
3, // inner byte length
0, // number of local variable declarations
OpCode::UNREACHABLE as u8, // panic if we were wrong to eliminate!
OpCode::END as u8, // end of function (required for validation)
];
// TODO: make this an environment variable
pub struct WasmDebugSettings {
proc_start_end: bool,
user_procs_ir: bool,
helper_procs_ir: bool,
let_stmt_ir: bool,
instructions: bool,
storage_map: bool,
pub keep_test_binary: bool,
pub skip_dead_code_elim: bool,
}
pub const DEBUG_SETTINGS: WasmDebugSettings = WasmDebugSettings {
proc_start_end: false && cfg!(debug_assertions),
user_procs_ir: false && cfg!(debug_assertions), // Note: we also have `ROC_PRINT_IR_AFTER_REFCOUNT=1 cargo test-gen-wasm`
helper_procs_ir: false && cfg!(debug_assertions),
let_stmt_ir: false && cfg!(debug_assertions),
instructions: false && cfg!(debug_assertions),
storage_map: false && cfg!(debug_assertions),
keep_test_binary: false && cfg!(debug_assertions),
skip_dead_code_elim: false && cfg!(debug_assertions),
};

View file

@ -4,11 +4,13 @@ use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_error_macros::internal_error;
use crate::DUMMY_FUNCTION;
use super::linking::{LinkingSection, SymInfo, WasmObjectSymbol};
use super::opcodes::OpCode;
use super::parse::{Parse, ParseError, SkipBytes};
use super::serialize::{SerialBuffer, Serialize, MAX_SIZE_ENCODED_U32};
use super::{CodeBuilder, ValueType};
use super::ValueType;
/*******************************************************************
*
@ -1168,20 +1170,17 @@ impl<'a> Serialize for ElementSection<'a> {
#[derive(Debug)]
pub struct CodeSection<'a> {
pub preloaded_count: u32,
pub preloaded_bytes: Vec<'a, u8>,
pub function_count: u32,
pub bytes: Vec<'a, u8>,
/// The start of each preloaded function
pub preloaded_offsets: Vec<'a, u32>,
pub function_offsets: Vec<'a, u32>,
/// Dead imports are replaced with dummy functions in CodeSection
pub dead_import_dummy_count: u32,
pub code_builders: Vec<'a, CodeBuilder<'a>>,
}
impl<'a> CodeSection<'a> {
pub fn size(&self) -> usize {
let builders_size: usize = self.code_builders.iter().map(|cb| cb.size()).sum();
MAX_SIZE_SECTION_HEADER + self.preloaded_bytes.len() + builders_size
MAX_SIZE_SECTION_HEADER + self.bytes.len()
}
pub fn parse(
@ -1224,11 +1223,10 @@ impl<'a> CodeSection<'a> {
debug_assert_eq!(preloaded_offsets.len(), 1 + count as usize);
Ok(CodeSection {
preloaded_count: count,
preloaded_bytes,
preloaded_offsets,
function_count: count,
bytes: preloaded_bytes,
function_offsets: preloaded_offsets,
dead_import_dummy_count: 0,
code_builders: Vec::with_capacity_in(0, arena),
})
}
}
@ -1236,26 +1234,17 @@ impl<'a> CodeSection<'a> {
impl<'a> Serialize for CodeSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
let header_indices = write_section_header(buffer, SectionId::Code);
buffer.encode_u32(
self.dead_import_dummy_count + self.preloaded_count + self.code_builders.len() as u32,
);
buffer.encode_u32(self.dead_import_dummy_count + self.function_count);
// Insert dummy functions, requested by our linking logic.
// This helps to minimise the number of functions we need to move around during linking.
let arena = self.code_builders[0].arena;
let dummy = CodeBuilder::dummy(arena);
for _ in 0..self.dead_import_dummy_count {
dummy.serialize(buffer);
DUMMY_FUNCTION.serialize(buffer);
}
// host + builtin functions
let first_fn_start = self.preloaded_offsets[0] as usize;
buffer.append_slice(&self.preloaded_bytes[first_fn_start..]);
// Roc functions
for code_builder in self.code_builders.iter() {
code_builder.serialize(buffer);
}
// real functions
let first_fn_start = self.function_offsets[0] as usize;
buffer.append_slice(&self.bytes[first_fn_start..]);
update_section_size(buffer, header_indices);
}

View file

@ -7,7 +7,7 @@ use std::fmt::Debug;
/// Of course there is a price for this - an encoded U32 can be up to 5 bytes wide.
pub const MAX_SIZE_ENCODED_U32: usize = 5;
pub(super) trait Serialize {
pub trait Serialize {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T);
}