mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 14:54:47 +00:00
More accurate model of the Wasm VM's stack machine, with control flow blocks
This commit is contained in:
parent
39263b0ab1
commit
a2abf9c3d2
3 changed files with 205 additions and 66 deletions
|
@ -1,7 +1,6 @@
|
|||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use core::panic;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use roc_module::symbol::Symbol;
|
||||
|
||||
|
@ -10,6 +9,13 @@ use super::opcodes::{OpCode, OpCode::*};
|
|||
use super::serialize::{SerialBuffer, Serialize};
|
||||
use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID};
|
||||
|
||||
const ENABLE_DEBUG_LOG: bool = true;
|
||||
macro_rules! log_instruction {
|
||||
($($x: expr),+) => {
|
||||
if ENABLE_DEBUG_LOG { println!($($x,)*); }
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct LocalId(pub u32);
|
||||
|
||||
|
@ -29,6 +35,7 @@ impl Serialize for ValueType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum BlockType {
|
||||
NoResult,
|
||||
Value(ValueType),
|
||||
|
@ -43,6 +50,24 @@ impl BlockType {
|
|||
}
|
||||
}
|
||||
|
||||
/// A control block in our model of the VM
|
||||
/// Child blocks cannot "see" values from their parent block
|
||||
struct VmBlock<'a> {
|
||||
/// opcode indicating what kind of block this is
|
||||
opcode: OpCode,
|
||||
/// the stack of values for this block
|
||||
value_stack: Vec<'a, Symbol>,
|
||||
/// whether this block pushes a result value to its parent
|
||||
has_result: bool,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for VmBlock<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let result = if self.has_result { "Result" } else { "NoResult" };
|
||||
f.write_fmt(format_args!("{:?} {}", self.opcode, result))
|
||||
}
|
||||
}
|
||||
|
||||
/// Wasm memory alignment. (Rust representation matches Wasm encoding)
|
||||
#[repr(u8)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
|
@ -113,6 +138,8 @@ macro_rules! instruction_memargs {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct CodeBuilder<'a> {
|
||||
arena: &'a Bump,
|
||||
|
||||
/// The main container for the instructions
|
||||
code: Vec<'a, u8>,
|
||||
|
||||
|
@ -135,8 +162,8 @@ pub struct CodeBuilder<'a> {
|
|||
inner_length: Vec<'a, u8>,
|
||||
|
||||
/// Our simulation model of the Wasm stack machine
|
||||
/// Keeps track of where Symbol values are in the VM stack
|
||||
vm_stack: Vec<'a, Symbol>,
|
||||
/// Nested blocks of instructions. A child block can't "see" the stack of its parent block
|
||||
vm_block_stack: Vec<'a, VmBlock<'a>>,
|
||||
|
||||
/// Linker info to help combine the Roc module with builtin & platform modules,
|
||||
/// e.g. to modify call instructions when function indices change
|
||||
|
@ -146,13 +173,22 @@ pub struct CodeBuilder<'a> {
|
|||
#[allow(clippy::new_without_default)]
|
||||
impl<'a> CodeBuilder<'a> {
|
||||
pub fn new(arena: &'a Bump) -> Self {
|
||||
let mut vm_block_stack = Vec::with_capacity_in(8, arena);
|
||||
let function_block = VmBlock {
|
||||
opcode: BLOCK,
|
||||
has_result: true,
|
||||
value_stack: Vec::with_capacity_in(8, arena),
|
||||
};
|
||||
vm_block_stack.push(function_block);
|
||||
|
||||
CodeBuilder {
|
||||
arena,
|
||||
code: Vec::with_capacity_in(1024, arena),
|
||||
insertions: Vec::with_capacity_in(32, arena),
|
||||
insert_bytes: Vec::with_capacity_in(64, arena),
|
||||
preamble: Vec::with_capacity_in(32, arena),
|
||||
inner_length: Vec::with_capacity_in(5, arena),
|
||||
vm_stack: Vec::with_capacity_in(32, arena),
|
||||
vm_block_stack,
|
||||
relocations: Vec::with_capacity_in(32, arena),
|
||||
}
|
||||
}
|
||||
|
@ -167,35 +203,39 @@ impl<'a> CodeBuilder<'a> {
|
|||
|
||||
***********************************************************/
|
||||
|
||||
fn current_stack(&self) -> &Vec<'a, Symbol> {
|
||||
let block = self.vm_block_stack.last().unwrap();
|
||||
&block.value_stack
|
||||
}
|
||||
|
||||
fn current_stack_mut(&mut self) -> &mut Vec<'a, Symbol> {
|
||||
let block = self.vm_block_stack.last_mut().unwrap();
|
||||
&mut block.value_stack
|
||||
}
|
||||
|
||||
/// Set the Symbol that is at the top of the VM stack right now
|
||||
/// We will use this later when we need to load the Symbol
|
||||
pub fn set_top_symbol(&mut self, sym: Symbol) -> VmSymbolState {
|
||||
let len = self.vm_stack.len();
|
||||
let current_stack = &mut self.vm_block_stack.last_mut().unwrap().value_stack;
|
||||
let pushed_at = self.code.len();
|
||||
|
||||
if len == 0 {
|
||||
panic!(
|
||||
"trying to set symbol with nothing on stack, code = {:?}",
|
||||
self.code
|
||||
);
|
||||
}
|
||||
|
||||
self.vm_stack[len - 1] = sym;
|
||||
let top_symbol: &mut Symbol = current_stack.last_mut().unwrap();
|
||||
*top_symbol = sym;
|
||||
|
||||
VmSymbolState::Pushed { pushed_at }
|
||||
}
|
||||
|
||||
/// Verify if a sequence of symbols is at the top of the stack
|
||||
pub fn verify_stack_match(&self, symbols: &[Symbol]) -> bool {
|
||||
let current_stack = self.current_stack();
|
||||
let n_symbols = symbols.len();
|
||||
let stack_depth = self.vm_stack.len();
|
||||
let stack_depth = current_stack.len();
|
||||
if n_symbols > stack_depth {
|
||||
return false;
|
||||
}
|
||||
let offset = stack_depth - n_symbols;
|
||||
|
||||
for (i, sym) in symbols.iter().enumerate() {
|
||||
if self.vm_stack[offset + i] != *sym {
|
||||
if current_stack[offset + i] != *sym {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -214,7 +254,12 @@ impl<'a> CodeBuilder<'a> {
|
|||
end: self.insert_bytes.len(),
|
||||
});
|
||||
|
||||
// println!("insert {:?} {} at byte offset {} ", opcode, immediate, insert_at);
|
||||
log_instruction!(
|
||||
"**insert {:?} {} at byte offset {}**",
|
||||
opcode,
|
||||
immediate,
|
||||
insert_at
|
||||
);
|
||||
}
|
||||
|
||||
/// Load a Symbol that is stored in the VM stack
|
||||
|
@ -233,35 +278,47 @@ impl<'a> CodeBuilder<'a> {
|
|||
use VmSymbolState::*;
|
||||
|
||||
match vm_state {
|
||||
NotYetPushed => panic!("Symbol {:?} has no value yet. Nothing to load.", symbol),
|
||||
NotYetPushed => unreachable!("Symbol {:?} has no value yet. Nothing to load.", symbol),
|
||||
|
||||
Pushed { pushed_at } => {
|
||||
let &top = self.vm_stack.last().unwrap();
|
||||
if top == symbol {
|
||||
// We're lucky, the symbol is already on top of the VM stack
|
||||
// No code to generate! (This reduces code size by up to 25% in tests.)
|
||||
// Just let the caller know what happened
|
||||
Some(Popped { pushed_at })
|
||||
} else {
|
||||
// Symbol is not on top of the stack. Find it.
|
||||
if let Some(found_index) = self.vm_stack.iter().rposition(|&s| s == symbol) {
|
||||
// Insert a local.set where the value was created
|
||||
self.add_insertion(pushed_at, SETLOCAL, next_local_id.0);
|
||||
match self.current_stack().last() {
|
||||
Some(top_symbol) if *top_symbol == symbol => {
|
||||
// We're lucky, the symbol is already on top of the current block's stack.
|
||||
// No code to generate! (This reduces code size by up to 25% in tests.)
|
||||
// Just let the caller know what happened
|
||||
Some(Popped { pushed_at })
|
||||
}
|
||||
_ => {
|
||||
// Symbol is not on top of the stack.
|
||||
// We should have saved it to a local, so go back and do that now.
|
||||
|
||||
// Take the value out of the stack where local.set was inserted
|
||||
self.vm_stack.remove(found_index);
|
||||
// It should still be on the stack in the block where it was assigned. Remove it.
|
||||
let mut found = false;
|
||||
for block in self.vm_block_stack.iter_mut() {
|
||||
if let Some(found_index) =
|
||||
block.value_stack.iter().position(|&s| s == symbol)
|
||||
{
|
||||
block.value_stack.remove(found_index);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert a local.get at the current position
|
||||
// Go back to the code position where it was pushed, and save it to a local
|
||||
if found {
|
||||
self.add_insertion(pushed_at, SETLOCAL, next_local_id.0);
|
||||
} else {
|
||||
if ENABLE_DEBUG_LOG {
|
||||
println!("{:?} has been popped implicitly. Leaving it on the stack.", symbol);
|
||||
}
|
||||
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
|
||||
}
|
||||
|
||||
// Recover the value again at the current position
|
||||
self.get_local(next_local_id);
|
||||
self.set_top_symbol(symbol);
|
||||
|
||||
// This Symbol is no longer stored in the VM stack, but in a local
|
||||
None
|
||||
} else {
|
||||
panic!(
|
||||
"{:?} has state {:?} but not found in VM stack",
|
||||
symbol, vm_state
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -284,7 +341,7 @@ impl<'a> CodeBuilder<'a> {
|
|||
|
||||
/**********************************************************
|
||||
|
||||
FINALIZE AND SERIALIZE
|
||||
FUNCTION HEADER
|
||||
|
||||
***********************************************************/
|
||||
|
||||
|
@ -377,6 +434,12 @@ impl<'a> CodeBuilder<'a> {
|
|||
self.insertions.sort_by_key(|ins| ins.at);
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
|
||||
SERIALIZE
|
||||
|
||||
***********************************************************/
|
||||
|
||||
/// Serialize all byte vectors in the right order
|
||||
/// Also update relocation offsets relative to the base offset (code section body start)
|
||||
pub fn serialize_with_relocs<T: SerialBuffer>(
|
||||
|
@ -435,33 +498,68 @@ impl<'a> CodeBuilder<'a> {
|
|||
|
||||
/// Base method for generating instructions
|
||||
/// Emits the opcode and simulates VM stack push/pop
|
||||
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
|
||||
let new_len = self.vm_stack.len() - pops as usize;
|
||||
self.vm_stack.truncate(new_len);
|
||||
fn inst_base(&mut self, opcode: OpCode, pops: usize, push: bool) {
|
||||
let current_stack = self.current_stack_mut();
|
||||
let new_len = current_stack.len() - pops as usize;
|
||||
current_stack.truncate(new_len);
|
||||
if push {
|
||||
self.vm_stack.push(Symbol::WASM_TMP);
|
||||
current_stack.push(Symbol::WASM_TMP);
|
||||
}
|
||||
|
||||
self.code.push(opcode as u8);
|
||||
|
||||
// println!("{:10}\t{:?}", format!("{:?}", opcode), &self.vm_stack);
|
||||
}
|
||||
|
||||
fn inst_imm8(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u8) {
|
||||
self.inst(opcode, pops, push);
|
||||
self.code.push(immediate);
|
||||
/// Plain instruction without any immediates
|
||||
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
|
||||
self.inst_base(opcode, pops, push);
|
||||
log_instruction!(
|
||||
"{:10}\t\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
self.current_stack()
|
||||
);
|
||||
}
|
||||
|
||||
// public for use in test code
|
||||
pub fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
|
||||
self.inst(opcode, pops, push);
|
||||
/// Block instruction
|
||||
fn inst_block(&mut self, opcode: OpCode, pops: usize, block_type: BlockType) {
|
||||
self.inst_base(opcode, pops, false);
|
||||
self.code.push(block_type.as_byte());
|
||||
|
||||
// Start a new block with a fresh value stack
|
||||
self.vm_block_stack.push(VmBlock {
|
||||
opcode,
|
||||
value_stack: Vec::with_capacity_in(8, self.arena),
|
||||
has_result: block_type != BlockType::NoResult,
|
||||
});
|
||||
|
||||
log_instruction!(
|
||||
"{:10} {:?}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
block_type,
|
||||
&self.vm_block_stack
|
||||
);
|
||||
}
|
||||
|
||||
fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
|
||||
self.inst_base(opcode, pops, push);
|
||||
self.code.encode_u32(immediate);
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
immediate,
|
||||
self.current_stack()
|
||||
);
|
||||
}
|
||||
|
||||
fn inst_mem(&mut self, opcode: OpCode, pops: usize, push: bool, align: Align, offset: u32) {
|
||||
self.inst(opcode, pops, push);
|
||||
self.inst_base(opcode, pops, push);
|
||||
self.code.push(align as u8);
|
||||
self.code.encode_u32(offset);
|
||||
log_instruction!(
|
||||
"{:10} {:?} {}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
align,
|
||||
offset,
|
||||
self.current_stack()
|
||||
);
|
||||
}
|
||||
|
||||
/// Insert a linker relocation for a memory address
|
||||
|
@ -488,22 +586,38 @@ impl<'a> CodeBuilder<'a> {
|
|||
instruction_no_args!(nop, NOP, 0, false);
|
||||
|
||||
pub fn block(&mut self, ty: BlockType) {
|
||||
self.inst_imm8(BLOCK, 0, false, ty.as_byte());
|
||||
self.inst_block(BLOCK, 0, ty);
|
||||
}
|
||||
pub fn loop_(&mut self, ty: BlockType) {
|
||||
self.inst_imm8(LOOP, 0, false, ty.as_byte());
|
||||
self.inst_block(LOOP, 0, ty);
|
||||
}
|
||||
pub fn if_(&mut self, ty: BlockType) {
|
||||
self.inst_imm8(IF, 1, false, ty.as_byte());
|
||||
self.inst_block(IF, 1, ty);
|
||||
}
|
||||
pub fn else_(&mut self) {
|
||||
// Reuse the 'then' block but clear its value stack
|
||||
self.current_stack_mut().clear();
|
||||
self.inst(ELSE, 0, false);
|
||||
}
|
||||
|
||||
instruction_no_args!(else_, ELSE, 0, false);
|
||||
instruction_no_args!(end, END, 0, false);
|
||||
pub fn end(&mut self) {
|
||||
self.inst_base(END, 0, false);
|
||||
|
||||
let ended_block = self.vm_block_stack.pop().unwrap();
|
||||
if ended_block.has_result {
|
||||
let result = ended_block.value_stack.last().unwrap();
|
||||
self.current_stack_mut().push(*result)
|
||||
}
|
||||
|
||||
log_instruction!("END \t\t{:?}", &self.vm_block_stack);
|
||||
}
|
||||
pub fn br(&mut self, levels: u32) {
|
||||
self.inst_imm32(BR, 0, false, levels);
|
||||
}
|
||||
pub fn br_if(&mut self, levels: u32) {
|
||||
// In dynamic execution, br_if can pop 2 values if condition is true and the target block has a result.
|
||||
// But our stack model is for *static* analysis and we need it to be correct at the next instruction,
|
||||
// where the branch was not taken. So we only pop 1 value, the condition.
|
||||
self.inst_imm32(BRIF, 1, false, levels);
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
|
@ -520,7 +634,7 @@ impl<'a> CodeBuilder<'a> {
|
|||
n_args: usize,
|
||||
has_return_val: bool,
|
||||
) {
|
||||
self.inst(CALL, n_args, has_return_val);
|
||||
self.inst_base(CALL, n_args, has_return_val);
|
||||
|
||||
let offset = self.code.len() as u32;
|
||||
self.code.encode_padded_u32(function_index);
|
||||
|
@ -533,6 +647,13 @@ impl<'a> CodeBuilder<'a> {
|
|||
offset,
|
||||
symbol_index,
|
||||
});
|
||||
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", CALL),
|
||||
function_index,
|
||||
self.current_stack()
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
|
@ -584,26 +705,44 @@ impl<'a> CodeBuilder<'a> {
|
|||
instruction_memargs!(i64_store32, I64STORE32, 2, false);
|
||||
|
||||
pub fn memory_size(&mut self) {
|
||||
self.inst_imm8(CURRENTMEMORY, 0, true, 0);
|
||||
self.inst(CURRENTMEMORY, 0, true);
|
||||
self.code.push(0);
|
||||
}
|
||||
pub fn memory_grow(&mut self) {
|
||||
self.inst_imm8(GROWMEMORY, 1, true, 0);
|
||||
self.inst(GROWMEMORY, 1, true);
|
||||
self.code.push(0);
|
||||
}
|
||||
|
||||
fn log_const<T>(&self, opcode: OpCode, x: T)
|
||||
where
|
||||
T: std::fmt::Debug + std::fmt::Display,
|
||||
{
|
||||
log_instruction!(
|
||||
"{:10}\t{}\t{:?}",
|
||||
format!("{:?}", opcode),
|
||||
x,
|
||||
self.current_stack()
|
||||
);
|
||||
}
|
||||
pub fn i32_const(&mut self, x: i32) {
|
||||
self.inst(I32CONST, 0, true);
|
||||
self.inst_base(I32CONST, 0, true);
|
||||
self.code.encode_i32(x);
|
||||
self.log_const(I32CONST, x);
|
||||
}
|
||||
pub fn i64_const(&mut self, x: i64) {
|
||||
self.inst(I64CONST, 0, true);
|
||||
self.inst_base(I64CONST, 0, true);
|
||||
self.code.encode_i64(x);
|
||||
self.log_const(I64CONST, x);
|
||||
}
|
||||
pub fn f32_const(&mut self, x: f32) {
|
||||
self.inst(F32CONST, 0, true);
|
||||
self.inst_base(F32CONST, 0, true);
|
||||
self.code.encode_f32(x);
|
||||
self.log_const(F32CONST, x);
|
||||
}
|
||||
pub fn f64_const(&mut self, x: f64) {
|
||||
self.inst(F64CONST, 0, true);
|
||||
self.inst_base(F64CONST, 0, true);
|
||||
self.code.encode_f64(x);
|
||||
self.log_const(F64CONST, x);
|
||||
}
|
||||
|
||||
// TODO: Consider creating unified methods for numerical ops like 'eq' and 'add',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue