Merge branch 'trunk' into refactor-builtin-list-drop

This commit is contained in:
satotake 2021-11-15 11:37:32 +00:00 committed by GitHub
commit c253273490
52 changed files with 1771 additions and 910 deletions

View file

@ -22,8 +22,8 @@ use crate::wasm_module::{
LocalId, Signature, SymInfo, ValueType,
};
use crate::{
copy_memory, CopyMemoryConfig, Env, BUILTINS_IMPORT_MODULE_NAME, MEMORY_NAME, PTR_TYPE,
STACK_POINTER_GLOBAL_ID, STACK_POINTER_NAME,
copy_memory, CopyMemoryConfig, Env, BUILTINS_IMPORT_MODULE_NAME, MEMORY_NAME, PTR_SIZE,
PTR_TYPE, STACK_POINTER_GLOBAL_ID, STACK_POINTER_NAME,
};
/// The memory address where the constants data will be loaded during module instantiation.
@ -540,6 +540,29 @@ impl<'a> WasmBackend<'a> {
Expr::Struct(fields) => self.create_struct(sym, layout, fields),
Expr::StructAtIndex {
index,
field_layouts,
structure,
} => {
if let StoredValue::StackMemory { location, .. } = self.storage.get(structure) {
let (local_id, mut offset) =
location.local_and_offset(self.storage.stack_frame_pointer);
for field in field_layouts.iter().take(*index as usize) {
offset += field.stack_size(PTR_SIZE);
}
self.storage.copy_value_from_memory(
&mut self.code_builder,
*sym,
local_id,
offset,
);
} else {
unreachable!("Unexpected storage for {:?}", structure)
}
Ok(())
}
x => Err(format!("Expression is not yet implemented {:?}", x)),
}
}

View file

@ -29,14 +29,14 @@ pub fn build_call_low_level<'a>(
match lowlevel {
StrConcat | StrJoinWith | StrIsEmpty | StrStartsWith | StrStartsWithCodePt
| StrEndsWith | StrSplit | StrCountGraphemes | StrFromInt | StrFromUtf8 | StrTrimLeft
| StrFromUtf8Range | StrToUtf8 | StrRepeat | StrFromFloat | StrTrim | ListLen
| ListGetUnsafe | ListSet | ListSingle | ListRepeat | ListReverse | ListConcat
| ListContains | ListAppend | ListPrepend | ListJoin | ListRange | ListMap | ListMap2
| ListMap3 | ListMap4 | ListMapWithIndex | ListKeepIf | ListWalk | ListWalkUntil
| ListWalkBackwards | ListKeepOks | ListKeepErrs | ListSortWith | ListSublist
| ListDropAt | ListSwap | ListAny | ListFindUnsafe | DictSize | DictEmpty | DictInsert
| DictRemove | DictContains | DictGetUnsafe | DictKeys | DictValues | DictUnion
| DictIntersection | DictDifference | DictWalk | SetFromList => {
| StrTrimRight | StrFromUtf8Range | StrToUtf8 | StrRepeat | StrFromFloat | StrTrim
| ListLen | ListGetUnsafe | ListSet | ListSingle | ListRepeat | ListReverse
| ListConcat | ListContains | ListAppend | ListPrepend | ListJoin | ListRange | ListMap
| ListMap2 | ListMap3 | ListMap4 | ListMapWithIndex | ListKeepIf | ListWalk
| ListWalkUntil | ListWalkBackwards | ListKeepOks | ListKeepErrs | ListSortWith
| ListSublist | ListDropAt | ListSwap | ListAny | ListFindUnsafe | DictSize | DictEmpty
| DictInsert | DictRemove | DictContains | DictGetUnsafe | DictKeys | DictValues
| DictUnion | DictIntersection | DictDifference | DictWalk | SetFromList => {
return NotImplemented;
}

View file

@ -5,7 +5,7 @@ use roc_collections::all::MutMap;
use roc_module::symbol::Symbol;
use crate::layout::WasmLayout;
use crate::wasm_module::{CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState};
use crate::wasm_module::{CodeBuilder, LocalId, ValueType, VmSymbolState};
use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, PTR_SIZE, PTR_TYPE};
pub enum StoredValueKind {
@ -33,7 +33,7 @@ impl StackMemoryLocation {
pub enum StoredValue {
/// A value stored implicitly in the VM stack (primitives only)
VirtualMachineStack {
vm_state: VirtualMachineSymbolState,
vm_state: VmSymbolState,
value_type: ValueType,
size: u32,
},
@ -126,7 +126,7 @@ impl<'a> Storage<'a> {
}
}
_ => StoredValue::VirtualMachineStack {
vm_state: VirtualMachineSymbolState::NotYetPushed,
vm_state: VmSymbolState::NotYetPushed,
value_type: *value_type,
size: *size,
},
@ -319,6 +319,67 @@ impl<'a> Storage<'a> {
}
}
/// Generate code to copy a StoredValue from an arbitrary memory location
/// (defined by a pointer and offset).
pub fn copy_value_from_memory(
&mut self,
code_builder: &mut CodeBuilder,
to_symbol: Symbol,
from_ptr: LocalId,
from_offset: u32,
) -> u32 {
let to_storage = self.get(&to_symbol).to_owned();
match to_storage {
StoredValue::StackMemory {
location,
size,
alignment_bytes,
} => {
let (to_ptr, to_offset) = location.local_and_offset(self.stack_frame_pointer);
copy_memory(
code_builder,
CopyMemoryConfig {
from_ptr,
from_offset,
to_ptr,
to_offset,
size,
alignment_bytes,
},
);
size
}
StoredValue::VirtualMachineStack {
value_type, size, ..
}
| StoredValue::Local {
value_type, size, ..
} => {
use crate::wasm_module::Align::*;
code_builder.get_local(from_ptr);
match (value_type, size) {
(ValueType::I64, 8) => code_builder.i64_load(Bytes8, from_offset),
(ValueType::I32, 4) => code_builder.i32_load(Bytes4, from_offset),
(ValueType::I32, 2) => code_builder.i32_load16_s(Bytes2, from_offset),
(ValueType::I32, 1) => code_builder.i32_load8_s(Bytes1, from_offset),
(ValueType::F32, 4) => code_builder.f32_load(Bytes4, from_offset),
(ValueType::F64, 8) => code_builder.f64_load(Bytes8, from_offset),
_ => {
panic!("Cannot store {:?} with alignment of {:?}", value_type, size);
}
};
if let StoredValue::Local { local_id, .. } = to_storage {
code_builder.set_local(local_id);
}
size
}
}
}
/// Generate code to copy from one StoredValue to another
/// Copies the _entire_ value. For struct fields etc., see `copy_value_to_memory`
pub fn clone_value(
@ -422,7 +483,7 @@ impl<'a> Storage<'a> {
} = storage
{
let local_id = self.get_next_local_id();
if vm_state != VirtualMachineSymbolState::NotYetPushed {
if vm_state != VmSymbolState::NotYetPushed {
code_builder.load_symbol(symbol, vm_state, local_id);
code_builder.set_local(local_id);
}

View file

@ -1,7 +1,6 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use core::panic;
use std::fmt::Debug;
use roc_module::symbol::Symbol;
@ -10,6 +9,13 @@ use super::opcodes::{OpCode, OpCode::*};
use super::serialize::{SerialBuffer, Serialize};
use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID};
const ENABLE_DEBUG_LOG: bool = true;
macro_rules! log_instruction {
($($x: expr),+) => {
if ENABLE_DEBUG_LOG { println!($($x,)*); }
};
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
@ -29,6 +35,7 @@ impl Serialize for ValueType {
}
}
#[derive(PartialEq, Eq, Debug)]
pub enum BlockType {
NoResult,
Value(ValueType),
@ -43,6 +50,31 @@ impl BlockType {
}
}
/// A control block in our model of the VM
/// Child blocks cannot "see" values from their parent block
struct VmBlock<'a> {
/// opcode indicating what kind of block this is
opcode: OpCode,
/// the stack of values for this block
value_stack: Vec<'a, Symbol>,
/// whether this block pushes a result value to its parent
has_result: bool,
}
impl std::fmt::Debug for VmBlock<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} {}",
self.opcode,
if self.has_result {
"Result"
} else {
"NoResult"
}
))
}
}
/// Wasm memory alignment. (Rust representation matches Wasm encoding)
#[repr(u8)]
#[derive(Clone, Copy, Debug)]
@ -73,7 +105,7 @@ impl From<u32> for Align {
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum VirtualMachineSymbolState {
pub enum VmSymbolState {
/// Value doesn't exist yet
NotYetPushed,
@ -113,6 +145,8 @@ macro_rules! instruction_memargs {
#[derive(Debug)]
pub struct CodeBuilder<'a> {
arena: &'a Bump,
/// The main container for the instructions
code: Vec<'a, u8>,
@ -135,8 +169,8 @@ pub struct CodeBuilder<'a> {
inner_length: Vec<'a, u8>,
/// Our simulation model of the Wasm stack machine
/// Keeps track of where Symbol values are in the VM stack
vm_stack: Vec<'a, Symbol>,
/// Nested blocks of instructions. A child block can't "see" the stack of its parent block
vm_block_stack: Vec<'a, VmBlock<'a>>,
/// Linker info to help combine the Roc module with builtin & platform modules,
/// e.g. to modify call instructions when function indices change
@ -146,13 +180,22 @@ pub struct CodeBuilder<'a> {
#[allow(clippy::new_without_default)]
impl<'a> CodeBuilder<'a> {
pub fn new(arena: &'a Bump) -> Self {
let mut vm_block_stack = Vec::with_capacity_in(8, arena);
let function_block = VmBlock {
opcode: BLOCK,
has_result: true,
value_stack: Vec::with_capacity_in(8, arena),
};
vm_block_stack.push(function_block);
CodeBuilder {
arena,
code: Vec::with_capacity_in(1024, arena),
insertions: Vec::with_capacity_in(32, arena),
insert_bytes: Vec::with_capacity_in(64, arena),
preamble: Vec::with_capacity_in(32, arena),
inner_length: Vec::with_capacity_in(5, arena),
vm_stack: Vec::with_capacity_in(32, arena),
vm_block_stack,
relocations: Vec::with_capacity_in(32, arena),
}
}
@ -167,35 +210,39 @@ impl<'a> CodeBuilder<'a> {
***********************************************************/
fn current_stack(&self) -> &Vec<'a, Symbol> {
let block = self.vm_block_stack.last().unwrap();
&block.value_stack
}
fn current_stack_mut(&mut self) -> &mut Vec<'a, Symbol> {
let block = self.vm_block_stack.last_mut().unwrap();
&mut block.value_stack
}
/// Set the Symbol that is at the top of the VM stack right now
/// We will use this later when we need to load the Symbol
pub fn set_top_symbol(&mut self, sym: Symbol) -> VirtualMachineSymbolState {
let len = self.vm_stack.len();
pub fn set_top_symbol(&mut self, sym: Symbol) -> VmSymbolState {
let current_stack = &mut self.vm_block_stack.last_mut().unwrap().value_stack;
let pushed_at = self.code.len();
let top_symbol: &mut Symbol = current_stack.last_mut().unwrap();
*top_symbol = sym;
if len == 0 {
panic!(
"trying to set symbol with nothing on stack, code = {:?}",
self.code
);
}
self.vm_stack[len - 1] = sym;
VirtualMachineSymbolState::Pushed { pushed_at }
VmSymbolState::Pushed { pushed_at }
}
/// Verify if a sequence of symbols is at the top of the stack
pub fn verify_stack_match(&self, symbols: &[Symbol]) -> bool {
let current_stack = self.current_stack();
let n_symbols = symbols.len();
let stack_depth = self.vm_stack.len();
let stack_depth = current_stack.len();
if n_symbols > stack_depth {
return false;
}
let offset = stack_depth - n_symbols;
for (i, sym) in symbols.iter().enumerate() {
if self.vm_stack[offset + i] != *sym {
if current_stack[offset + i] != *sym {
return false;
}
}
@ -214,7 +261,12 @@ impl<'a> CodeBuilder<'a> {
end: self.insert_bytes.len(),
});
// println!("insert {:?} {} at byte offset {} ", opcode, immediate, insert_at);
log_instruction!(
"**insert {:?} {} at byte offset {}**",
opcode,
immediate,
insert_at
);
}
/// Load a Symbol that is stored in the VM stack
@ -227,41 +279,56 @@ impl<'a> CodeBuilder<'a> {
pub fn load_symbol(
&mut self,
symbol: Symbol,
vm_state: VirtualMachineSymbolState,
vm_state: VmSymbolState,
next_local_id: LocalId,
) -> Option<VirtualMachineSymbolState> {
use VirtualMachineSymbolState::*;
) -> Option<VmSymbolState> {
use VmSymbolState::*;
match vm_state {
NotYetPushed => panic!("Symbol {:?} has no value yet. Nothing to load.", symbol),
NotYetPushed => unreachable!("Symbol {:?} has no value yet. Nothing to load.", symbol),
Pushed { pushed_at } => {
let &top = self.vm_stack.last().unwrap();
if top == symbol {
// We're lucky, the symbol is already on top of the VM stack
// No code to generate! (This reduces code size by up to 25% in tests.)
// Just let the caller know what happened
Some(Popped { pushed_at })
} else {
// Symbol is not on top of the stack. Find it.
if let Some(found_index) = self.vm_stack.iter().rposition(|&s| s == symbol) {
// Insert a local.set where the value was created
self.add_insertion(pushed_at, SETLOCAL, next_local_id.0);
match self.current_stack().last() {
Some(top_symbol) if *top_symbol == symbol => {
// We're lucky, the symbol is already on top of the current block's stack.
// No code to generate! (This reduces code size by up to 25% in tests.)
// Just let the caller know what happened
Some(Popped { pushed_at })
}
_ => {
// Symbol is not on top of the stack.
// We should have saved it to a local, so go back and do that now.
// Take the value out of the stack where local.set was inserted
self.vm_stack.remove(found_index);
// It should still be on the stack in the block where it was assigned. Remove it.
let mut found = false;
for block in self.vm_block_stack.iter_mut() {
if let Some(found_index) =
block.value_stack.iter().position(|&s| s == symbol)
{
block.value_stack.remove(found_index);
found = true;
}
}
// Insert a local.get at the current position
// Go back to the code position where it was pushed, and save it to a local
if found {
self.add_insertion(pushed_at, SETLOCAL, next_local_id.0);
} else {
if ENABLE_DEBUG_LOG {
println!(
"{:?} has been popped implicitly. Leaving it on the stack.",
symbol
);
}
self.add_insertion(pushed_at, TEELOCAL, next_local_id.0);
}
// Recover the value again at the current position
self.get_local(next_local_id);
self.set_top_symbol(symbol);
// This Symbol is no longer stored in the VM stack, but in a local
None
} else {
panic!(
"{:?} has state {:?} but not found in VM stack",
symbol, vm_state
);
}
}
}
@ -284,7 +351,7 @@ impl<'a> CodeBuilder<'a> {
/**********************************************************
FINALIZE AND SERIALIZE
FUNCTION HEADER
***********************************************************/
@ -377,6 +444,12 @@ impl<'a> CodeBuilder<'a> {
self.insertions.sort_by_key(|ins| ins.at);
}
/**********************************************************
SERIALIZE
***********************************************************/
/// Serialize all byte vectors in the right order
/// Also update relocation offsets relative to the base offset (code section body start)
pub fn serialize_with_relocs<T: SerialBuffer>(
@ -435,33 +508,68 @@ impl<'a> CodeBuilder<'a> {
/// Base method for generating instructions
/// Emits the opcode and simulates VM stack push/pop
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
let new_len = self.vm_stack.len() - pops as usize;
self.vm_stack.truncate(new_len);
fn inst_base(&mut self, opcode: OpCode, pops: usize, push: bool) {
let current_stack = self.current_stack_mut();
let new_len = current_stack.len() - pops as usize;
current_stack.truncate(new_len);
if push {
self.vm_stack.push(Symbol::WASM_TMP);
current_stack.push(Symbol::WASM_TMP);
}
self.code.push(opcode as u8);
// println!("{:10}\t{:?}", format!("{:?}", opcode), &self.vm_stack);
}
fn inst_imm8(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u8) {
self.inst(opcode, pops, push);
self.code.push(immediate);
/// Plain instruction without any immediates
fn inst(&mut self, opcode: OpCode, pops: usize, push: bool) {
self.inst_base(opcode, pops, push);
log_instruction!(
"{:10}\t\t{:?}",
format!("{:?}", opcode),
self.current_stack()
);
}
// public for use in test code
pub fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
self.inst(opcode, pops, push);
/// Block instruction
fn inst_block(&mut self, opcode: OpCode, pops: usize, block_type: BlockType) {
self.inst_base(opcode, pops, false);
self.code.push(block_type.as_byte());
// Start a new block with a fresh value stack
self.vm_block_stack.push(VmBlock {
opcode,
value_stack: Vec::with_capacity_in(8, self.arena),
has_result: block_type != BlockType::NoResult,
});
log_instruction!(
"{:10} {:?}\t{:?}",
format!("{:?}", opcode),
block_type,
&self.vm_block_stack
);
}
fn inst_imm32(&mut self, opcode: OpCode, pops: usize, push: bool, immediate: u32) {
self.inst_base(opcode, pops, push);
self.code.encode_u32(immediate);
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
immediate,
self.current_stack()
);
}
fn inst_mem(&mut self, opcode: OpCode, pops: usize, push: bool, align: Align, offset: u32) {
self.inst(opcode, pops, push);
self.inst_base(opcode, pops, push);
self.code.push(align as u8);
self.code.encode_u32(offset);
log_instruction!(
"{:10} {:?} {}\t{:?}",
format!("{:?}", opcode),
align,
offset,
self.current_stack()
);
}
/// Insert a linker relocation for a memory address
@ -488,22 +596,38 @@ impl<'a> CodeBuilder<'a> {
instruction_no_args!(nop, NOP, 0, false);
pub fn block(&mut self, ty: BlockType) {
self.inst_imm8(BLOCK, 0, false, ty.as_byte());
self.inst_block(BLOCK, 0, ty);
}
pub fn loop_(&mut self, ty: BlockType) {
self.inst_imm8(LOOP, 0, false, ty.as_byte());
self.inst_block(LOOP, 0, ty);
}
pub fn if_(&mut self, ty: BlockType) {
self.inst_imm8(IF, 1, false, ty.as_byte());
self.inst_block(IF, 1, ty);
}
pub fn else_(&mut self) {
// Reuse the 'then' block but clear its value stack
self.current_stack_mut().clear();
self.inst(ELSE, 0, false);
}
instruction_no_args!(else_, ELSE, 0, false);
instruction_no_args!(end, END, 0, false);
pub fn end(&mut self) {
self.inst_base(END, 0, false);
let ended_block = self.vm_block_stack.pop().unwrap();
if ended_block.has_result {
let result = ended_block.value_stack.last().unwrap();
self.current_stack_mut().push(*result)
}
log_instruction!("END \t\t{:?}", &self.vm_block_stack);
}
pub fn br(&mut self, levels: u32) {
self.inst_imm32(BR, 0, false, levels);
}
pub fn br_if(&mut self, levels: u32) {
// In dynamic execution, br_if can pop 2 values if condition is true and the target block has a result.
// But our stack model is for *static* analysis and we need it to be correct at the next instruction,
// where the branch was not taken. So we only pop 1 value, the condition.
self.inst_imm32(BRIF, 1, false, levels);
}
#[allow(dead_code)]
@ -520,7 +644,7 @@ impl<'a> CodeBuilder<'a> {
n_args: usize,
has_return_val: bool,
) {
self.inst(CALL, n_args, has_return_val);
self.inst_base(CALL, n_args, has_return_val);
let offset = self.code.len() as u32;
self.code.encode_padded_u32(function_index);
@ -533,6 +657,13 @@ impl<'a> CodeBuilder<'a> {
offset,
symbol_index,
});
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", CALL),
function_index,
self.current_stack()
);
}
#[allow(dead_code)]
@ -584,26 +715,44 @@ impl<'a> CodeBuilder<'a> {
instruction_memargs!(i64_store32, I64STORE32, 2, false);
pub fn memory_size(&mut self) {
self.inst_imm8(CURRENTMEMORY, 0, true, 0);
self.inst(CURRENTMEMORY, 0, true);
self.code.push(0);
}
pub fn memory_grow(&mut self) {
self.inst_imm8(GROWMEMORY, 1, true, 0);
self.inst(GROWMEMORY, 1, true);
self.code.push(0);
}
fn log_const<T>(&self, opcode: OpCode, x: T)
where
T: std::fmt::Debug + std::fmt::Display,
{
log_instruction!(
"{:10}\t{}\t{:?}",
format!("{:?}", opcode),
x,
self.current_stack()
);
}
pub fn i32_const(&mut self, x: i32) {
self.inst(I32CONST, 0, true);
self.inst_base(I32CONST, 0, true);
self.code.encode_i32(x);
self.log_const(I32CONST, x);
}
pub fn i64_const(&mut self, x: i64) {
self.inst(I64CONST, 0, true);
self.inst_base(I64CONST, 0, true);
self.code.encode_i64(x);
self.log_const(I64CONST, x);
}
pub fn f32_const(&mut self, x: f32) {
self.inst(F32CONST, 0, true);
self.inst_base(F32CONST, 0, true);
self.code.encode_f32(x);
self.log_const(F32CONST, x);
}
pub fn f64_const(&mut self, x: f64) {
self.inst(F64CONST, 0, true);
self.inst_base(F64CONST, 0, true);
self.code.encode_f64(x);
self.log_const(F64CONST, x);
}
// TODO: Consider creating unified methods for numerical ops like 'eq' and 'add',

View file

@ -4,8 +4,6 @@ pub mod opcodes;
pub mod sections;
pub mod serialize;
pub use code_builder::{
Align, BlockType, CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState,
};
pub use code_builder::{Align, BlockType, CodeBuilder, LocalId, ValueType, VmSymbolState};
pub use linking::{LinkingSubSection, SymInfo};
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature, WasmModule};

View file

@ -1,5 +1,5 @@
#[repr(u8)]
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OpCode {
UNREACHABLE = 0x00,
NOP = 0x01,