Merge remote-tracking branch 'origin/trunk' into tags-invalid-alignment

This commit is contained in:
Folkert 2021-11-03 18:21:16 +01:00
commit 83c47ea6ae
16 changed files with 861 additions and 396 deletions

7
Cargo.lock generated
View file

@ -2774,11 +2774,6 @@ dependencies = [
"syn 1.0.76",
]
[[package]]
name = "parity-wasm"
version = "0.44.0"
source = "git+https://github.com/brian-carroll/parity-wasm?branch=master#373f655f64d2260a2e9665811f7b6ed17f9db705"
[[package]]
name = "parking_lot"
version = "0.11.2"
@ -3890,7 +3885,6 @@ version = "0.1.0"
dependencies = [
"bumpalo",
"indoc 0.3.6",
"parity-wasm",
"pretty_assertions 0.5.1",
"roc_builtins",
"roc_can",
@ -4684,7 +4678,6 @@ dependencies = [
"bumpalo",
"indoc 0.3.6",
"libc",
"parity-wasm",
"pretty_assertions 0.5.1",
"roc_builtins",
"roc_can",

View file

@ -10,8 +10,6 @@ roc_collections = { path = "../collections" }
roc_module = { path = "../module" }
roc_mono = { path = "../mono" }
bumpalo = { version = "3.6.1", features = ["collections"] }
# TODO: switch to parity-wasm 0.44 once it's out (allows bumpalo vectors in some places)
parity-wasm = { git = "https://github.com/brian-carroll/parity-wasm", branch = "master" }
roc_std = { path = "../../roc_std" }
wasmer = "2.0.0"

View file

@ -225,6 +225,4 @@ The Module is a _specification_ for how to create an Instance of the program. Th
A WebAssembly module is equivalent to an executable file. It doesn't normally need relocations since at the WebAssembly layer, there is no Address Space Layout Randomisation. If it has relocations then it's an object file.
The [official spec](https://webassembly.github.io/spec/core/binary/modules.html#sections) lists the sections that are part of the final module. It doesn't mention any sections for relocations or symbol names, but it has room for "custom sections" that in practice seem to be used for that.
The WebAssembly `tool-conventions` repo has a document on [linking](https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md), and the `parity_wasm` crate supports "name" and "relocation" [sections](https://docs.rs/parity-wasm/0.42.2/parity_wasm/elements/enum.Section.html).
The [official spec](https://webassembly.github.io/spec/core/binary/modules.html#sections) lists the sections that are part of the final module. It doesn't mention any sections for relocations or symbol names, but it does support "custom" sections. Conventions to use those for linking are documented in the WebAssembly `tool-conventions` repo [here](https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md) and it mentions that LLVM is using those conventions.

View file

@ -1,6 +1,4 @@
use bumpalo::collections::Vec;
use parity_wasm::builder;
use parity_wasm::builder::{FunctionDefinition, ModuleBuilder};
use roc_collections::all::MutMap;
use roc_module::low_level::LowLevel;
@ -8,12 +6,10 @@ use roc_module::symbol::Symbol;
use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt};
use roc_mono::layout::{Builtin, Layout};
use crate::code_builder::{BlockType, CodeBuilder, ValueType};
use crate::layout::WasmLayout;
use crate::module_builder::RelocationEntry;
use crate::serialize::SerialBuffer;
use crate::storage::{Storage, StoredValue, StoredValueKind};
use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE};
use crate::wasm_module::{BlockType, CodeBuilder, LocalId, Signature, ValueType, WasmModule};
use crate::{copy_memory, CopyMemoryConfig, Env, PTR_TYPE};
// Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone.
// Follow Emscripten's example by using 1kB (4 bytes would probably do)
@ -26,12 +22,10 @@ pub struct WasmBackend<'a> {
env: &'a Env<'a>,
// Module-level data
pub module_builder: ModuleBuilder,
pub code_section_bytes: std::vec::Vec<u8>,
pub code_relocations: Vec<'a, RelocationEntry>,
pub module: WasmModule<'a>,
_data_offset_map: MutMap<Literal<'a>, u32>,
_data_offset_next: u32,
proc_symbols: &'a [Symbol],
proc_symbols: Vec<'a, Symbol>,
// Function-level data
code_builder: CodeBuilder<'a>,
@ -43,23 +37,15 @@ pub struct WasmBackend<'a> {
}
impl<'a> WasmBackend<'a> {
pub fn new(env: &'a Env<'a>, proc_symbols: &'a [Symbol]) -> Self {
let mut code_section_bytes = std::vec::Vec::with_capacity(4096);
// Code section header
code_section_bytes.reserve_padded_u32(); // byte length, to be written at the end
code_section_bytes.encode_padded_u32(proc_symbols.len() as u32); // modified later in unit tests
pub fn new(env: &'a Env<'a>, proc_symbols: Vec<'a, Symbol>) -> Self {
WasmBackend {
env,
// Module-level data
module_builder: builder::module(),
code_section_bytes,
module: WasmModule::new(env.arena),
_data_offset_map: MutMap::default(),
_data_offset_next: UNUSED_DATA_SECTION_BYTES,
proc_symbols,
code_relocations: Vec::with_capacity_in(256, env.arena),
// Function-level data
block_depth: 0,
@ -69,8 +55,13 @@ impl<'a> WasmBackend<'a> {
}
}
/// Reset function-level data
fn reset(&mut self) {
self.code_builder.clear();
// Push the completed CodeBuilder into the module and swap it for a new empty one
let mut swap_code_builder = CodeBuilder::new(self.env.arena);
std::mem::swap(&mut swap_code_builder, &mut self.code_builder);
self.module.code.code_builders.push(swap_code_builder);
self.storage.clear();
self.joinpoint_label_map.clear();
assert_eq!(self.block_depth, 0);
@ -82,52 +73,43 @@ impl<'a> WasmBackend<'a> {
***********************************************************/
pub fn build_proc(&mut self, proc: Proc<'a>, _sym: Symbol) -> Result<u32, String> {
// println!("\ngenerating procedure {:?}\n", sym);
pub fn build_proc(&mut self, proc: Proc<'a>, _sym: Symbol) -> Result<(), String> {
// println!("\ngenerating procedure {:?}\n", _sym);
// Use parity-wasm to add the signature in "types" and "functions" sections
// but no instructions, since we are building our own code section
let empty_function_def = self.start_proc(&proc);
let location = self.module_builder.push_function(empty_function_def);
let function_index = location.body;
self.start_proc(&proc);
self.build_stmt(&proc.body, &proc.ret_layout)?;
self.finalize_proc()?;
self.reset();
// println!("\nfinished generating {:?}\n", sym);
// println!("\nfinished generating {:?}\n", _sym);
Ok(function_index)
Ok(())
}
fn start_proc(&mut self, proc: &Proc<'a>) -> FunctionDefinition {
fn start_proc(&mut self, proc: &Proc<'a>) {
let ret_layout = WasmLayout::new(&proc.ret_layout);
let signature_builder = if let WasmLayout::StackMemory { .. } = ret_layout {
let ret_type = if ret_layout.is_stack_memory() {
self.storage.arg_types.push(PTR_TYPE);
self.start_block(BlockType::NoResult); // block to ensure all paths pop stack memory (if any)
builder::signature()
None
} else {
let ret_type = ret_layout.value_type();
self.start_block(BlockType::Value(ret_type)); // block to ensure all paths pop stack memory (if any)
builder::signature().with_result(ret_type.to_parity_wasm())
let ty = ret_layout.value_type();
self.start_block(BlockType::Value(ty)); // block to ensure all paths pop stack memory (if any)
Some(ty)
};
for (layout, symbol) in proc.args {
self.storage.allocate(
&WasmLayout::new(layout),
*symbol,
StoredValueKind::Parameter,
);
let arg_layout = WasmLayout::new(layout);
self.storage
.allocate(&arg_layout, *symbol, StoredValueKind::Parameter);
}
let parity_params = self.storage.arg_types.iter().map(|t| t.to_parity_wasm());
let signature = signature_builder.with_params(parity_params).build_sig();
// parity-wasm FunctionDefinition with no instructions
builder::function().with_signature(signature).build()
self.module.add_function_signature(Signature {
param_types: self.storage.arg_types.clone(),
ret_type,
});
}
fn finalize_proc(&mut self) -> Result<(), String> {
@ -141,8 +123,6 @@ impl<'a> WasmBackend<'a> {
self.storage.stack_frame_pointer,
);
let relocs = self.code_builder.serialize(&mut self.code_section_bytes);
self.code_relocations.extend(relocs);
Ok(())
}

View file

@ -1,6 +1,6 @@
use roc_mono::layout::{Layout, UnionLayout};
use crate::{code_builder::ValueType, PTR_SIZE, PTR_TYPE};
use crate::{wasm_module::ValueType, PTR_SIZE, PTR_TYPE};
// See README for background information on Wasm locals, memory and function calls
#[derive(Debug, Clone)]
@ -71,11 +71,7 @@ impl WasmLayout {
}
}
#[allow(dead_code)]
pub fn stack_memory(&self) -> u32 {
match self {
Self::StackMemory { size, .. } => *size,
_ => 0,
}
pub fn is_stack_memory(&self) -> bool {
matches!(self, Self::StackMemory { .. })
}
}

View file

@ -1,27 +1,21 @@
mod backend;
pub mod code_builder;
pub mod from_wasm32_memory;
mod layout;
pub mod module_builder;
pub mod opcodes;
pub mod serialize;
mod storage;
pub mod wasm_module;
use bumpalo::{self, collections::Vec, Bump};
use parity_wasm::builder;
use parity_wasm::elements::{Instruction, Internal, Module, Section};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{Proc, ProcLayout};
use roc_mono::layout::LayoutIds;
use crate::backend::WasmBackend;
use crate::code_builder::{Align, CodeBuilder, ValueType};
use crate::module_builder::{
LinkingSection, LinkingSubSection, RelocationSection, SectionId, SymInfo,
use crate::wasm_module::{
Align, CodeBuilder, Export, ExportType, Global, GlobalInitValue, GlobalType, LinkingSubSection,
LocalId, SymInfo, ValueType, WasmModule,
};
use crate::serialize::{SerialBuffer, Serialize};
const PTR_SIZE: u32 = 4;
const PTR_TYPE: ValueType = ValueType::I32;
@ -29,13 +23,6 @@ const PTR_TYPE: ValueType = ValueType::I32;
pub const STACK_POINTER_GLOBAL_ID: u32 = 0;
pub const FRAME_ALIGNMENT_BYTES: i32 = 16;
/// Code section ID from spec
/// https://webassembly.github.io/spec/core/binary/modules.html#sections
pub const CODE_SECTION_ID: u8 = 10;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
pub struct Env<'a> {
pub arena: &'a Bump,
pub interns: Interns,
@ -46,21 +33,19 @@ pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<std::vec::Vec<u8>, String> {
let (builder, code_section_bytes) = build_module_help(env, procedures)?;
let mut module = builder.build();
replace_code_section(&mut module, code_section_bytes);
module
.into_bytes()
.map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) })
let mut wasm_module = build_module_help(env, procedures)?;
let mut buffer = std::vec::Vec::with_capacity(4096);
wasm_module.serialize(&mut buffer);
Ok(buffer)
}
pub fn build_module_help<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<(builder::ModuleBuilder, std::vec::Vec<u8>), String> {
) -> Result<WasmModule<'a>, String> {
let proc_symbols = Vec::from_iter_in(procedures.keys().map(|(sym, _)| *sym), env.arena);
let mut backend = WasmBackend::new(env, &proc_symbols);
let mut backend = WasmBackend::new(env, proc_symbols);
let mut layout_ids = LayoutIds::default();
let mut symbol_table_entries = Vec::with_capacity_in(procedures.len(), env.arena);
@ -70,97 +55,40 @@ pub fn build_module_help<'a>(
.to_symbol_string(proc.name, &env.interns);
symbol_table_entries.push(SymInfo::for_function(i as u32, proc_name));
let function_index = backend.build_proc(proc, sym)?;
backend.build_proc(proc, sym)?;
if env.exposed_to_host.contains(&sym) {
let fn_name = layout_ids
.get_toplevel(sym, &layout)
.to_symbol_string(sym, &env.interns);
let export = builder::export()
.field(fn_name.as_str())
.with_internal(Internal::Function(function_index))
.build();
backend.module_builder.push_export(export);
backend.module.export.entries.push(Export {
name: fn_name,
ty: ExportType::Func,
index: i as u32,
});
}
}
// Update code section length
let inner_length = (backend.code_section_bytes.len() - 5) as u32;
backend
.code_section_bytes
.overwrite_padded_u32(0, inner_length);
let symbol_table = LinkingSubSection::SymbolTable(symbol_table_entries);
backend.module.linking.subsections.push(symbol_table);
// linking metadata section
let mut linking_section_bytes = std::vec::Vec::with_capacity(symbol_table_entries.len() * 20);
let linking_section = LinkingSection {
subsections: bumpalo::vec![in env.arena;
LinkingSubSection::SymbolTable(symbol_table_entries)
],
};
linking_section.serialize(&mut linking_section_bytes);
backend.module_builder = backend.module_builder.with_section(Section::Unparsed {
id: SectionId::Custom as u8,
payload: linking_section_bytes,
backend.module.export.entries.push(Export {
name: "memory".to_string(),
ty: ExportType::Mem,
index: 0,
});
// We always output the code section at the same index relative to other sections, and we need that for relocations.
// TODO: If there's a data section, this will be 6 so we'll need logic for that
// TODO: Build a cleaner solution after we replace parity-wasm with our own module_builder
const CODE_SECTION_INDEX: u32 = 5;
let code_reloc_section = RelocationSection {
name: "reloc.CODE",
target_section_index: CODE_SECTION_INDEX,
entries: &backend.code_relocations,
};
let mut code_reloc_section_bytes = std::vec::Vec::with_capacity(256);
code_reloc_section.serialize(&mut code_reloc_section_bytes);
// Must come after linking section
backend.module_builder = backend.module_builder.with_section(Section::Unparsed {
id: SectionId::Custom as u8,
payload: code_reloc_section_bytes,
let stack_pointer_init = backend.module.memory.min_size().unwrap() as i32;
backend.module.global.entries.push(Global {
ty: GlobalType {
value_type: ValueType::I32,
is_mutable: true,
},
init_value: GlobalInitValue::I32(stack_pointer_init),
});
const MIN_MEMORY_SIZE_KB: u32 = 1024;
const PAGE_SIZE_KB: u32 = 64;
let memory = builder::MemoryBuilder::new()
.with_min(MIN_MEMORY_SIZE_KB / PAGE_SIZE_KB)
.build();
backend.module_builder.push_memory(memory);
let memory_export = builder::export()
.field("memory")
.with_internal(Internal::Memory(0))
.build();
backend.module_builder.push_export(memory_export);
let stack_pointer_global = builder::global()
.with_type(parity_wasm::elements::ValueType::I32)
.mutable()
.init_expr(Instruction::I32Const((MIN_MEMORY_SIZE_KB * 1024) as i32))
.build();
backend.module_builder.push_global(stack_pointer_global);
Ok((backend.module_builder, backend.code_section_bytes))
}
/// Replace parity-wasm's code section with our own handmade one
pub fn replace_code_section(module: &mut Module, code_section_bytes: std::vec::Vec<u8>) {
let sections = module.sections_mut();
let code_section_index = sections
.iter()
.position(|s| matches!(s, Section::Code(_)))
.unwrap();
sections[code_section_index] = Section::Unparsed {
id: SectionId::Code as u8,
payload: code_section_bytes,
};
Ok(backend.module)
}
pub struct CopyMemoryConfig {

View file

@ -4,9 +4,9 @@ use bumpalo::Bump;
use roc_collections::all::MutMap;
use roc_module::symbol::Symbol;
use crate::code_builder::{CodeBuilder, ValueType, VirtualMachineSymbolState};
use crate::layout::WasmLayout;
use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, LocalId, PTR_SIZE, PTR_TYPE};
use crate::wasm_module::{CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState};
use crate::{copy_memory, round_up_to_alignment, CopyMemoryConfig, PTR_SIZE, PTR_TYPE};
pub enum StoredValueKind {
Parameter,
@ -291,7 +291,7 @@ impl<'a> Storage<'a> {
| StoredValue::Local {
value_type, size, ..
} => {
use crate::code_builder::Align::*;
use crate::wasm_module::Align::*;
code_builder.get_local(to_ptr);
self.load_symbols(code_builder, &[from_symbol]);
match (value_type, size) {

View file

@ -1,14 +1,17 @@
use bumpalo::collections::vec::{Drain, Vec};
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use core::panic;
use std::fmt::Debug;
use roc_module::symbol::Symbol;
use crate::module_builder::{IndexRelocType, RelocationEntry};
use crate::opcodes::*;
use crate::serialize::SerialBuffer;
use crate::{round_up_to_alignment, LocalId, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID};
use super::linking::{IndexRelocType, RelocationEntry};
use super::opcodes::*;
use super::serialize::{SerialBuffer, Serialize};
use crate::{round_up_to_alignment, FRAME_ALIGNMENT_BYTES, STACK_POINTER_GLOBAL_ID};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
/// Wasm value type. (Rust representation matches Wasm encoding)
#[repr(u8)]
@ -20,15 +23,9 @@ pub enum ValueType {
F64 = 0x7c,
}
// This is a bit unfortunate. Will go away if we generate our own Types section.
impl ValueType {
pub fn to_parity_wasm(&self) -> parity_wasm::elements::ValueType {
match self {
Self::I32 => parity_wasm::elements::ValueType::I32,
Self::I64 => parity_wasm::elements::ValueType::I64,
Self::F32 => parity_wasm::elements::ValueType::F32,
Self::F64 => parity_wasm::elements::ValueType::F64,
}
impl Serialize for ValueType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self as u8);
}
}
@ -92,8 +89,8 @@ pub enum VirtualMachineSymbolState {
// An instruction (local.set or local.tee) to be inserted into the function code
#[derive(Debug)]
struct InsertLocation {
insert_at: usize,
struct Insertion {
at: usize,
start: usize,
end: usize,
}
@ -124,7 +121,7 @@ pub struct CodeBuilder<'a> {
insert_bytes: Vec<'a, u8>,
/// Code locations where the insert_bytes should go
insert_locations: Vec<'a, InsertLocation>,
insertions: Vec<'a, Insertion>,
/// Bytes for local variable declarations and stack-frame setup code.
/// We can't write this until we've finished the main code. But it goes
@ -151,7 +148,7 @@ impl<'a> CodeBuilder<'a> {
pub fn new(arena: &'a Bump) -> Self {
CodeBuilder {
code: Vec::with_capacity_in(1024, arena),
insert_locations: Vec::with_capacity_in(32, arena),
insertions: Vec::with_capacity_in(32, arena),
insert_bytes: Vec::with_capacity_in(64, arena),
preamble: Vec::with_capacity_in(32, arena),
inner_length: Vec::with_capacity_in(5, arena),
@ -160,15 +157,6 @@ impl<'a> CodeBuilder<'a> {
}
}
pub fn clear(&mut self) {
self.code.clear();
self.insert_locations.clear();
self.insert_bytes.clear();
self.preamble.clear();
self.inner_length.clear();
self.vm_stack.clear();
}
/**********************************************************
SYMBOLS
@ -220,8 +208,8 @@ impl<'a> CodeBuilder<'a> {
self.insert_bytes.push(opcode);
self.insert_bytes.encode_u32(immediate);
self.insert_locations.push(InsertLocation {
insert_at,
self.insertions.push(Insertion {
at: insert_at,
start,
end: self.insert_bytes.len(),
});
@ -384,52 +372,56 @@ impl<'a> CodeBuilder<'a> {
let inner_len = self.preamble.len() + self.code.len() + self.insert_bytes.len();
self.inner_length.encode_u32(inner_len as u32);
}
/// Write out all the bytes in the right order
pub fn serialize<T: SerialBuffer>(
&mut self,
code_section_buf: &mut T,
) -> Drain<RelocationEntry> {
code_section_buf.append_slice(&self.inner_length);
code_section_buf.append_slice(&self.preamble);
// Sort insertions. They are not created in order of assignment, but in order of *second* usage.
self.insert_locations.sort_by_key(|loc| loc.insert_at);
self.insertions.sort_by_key(|ins| ins.at);
}
/// Serialize all byte vectors in the right order
/// Also update relocation offsets relative to the provided base offset in the buffer
pub fn serialize_with_relocs<T: SerialBuffer>(
&self,
buffer: &mut T,
final_relocs: &mut Vec<'a, RelocationEntry>,
reloc_base_offset: usize,
) {
buffer.append_slice(&self.inner_length);
buffer.append_slice(&self.preamble);
// Do the insertions & update relocation offsets
const CODE_SECTION_BODY_OFFSET: usize = 5;
let mut reloc_index = 0;
let mut code_pos: usize = 0;
for location in self.insert_locations.iter() {
let mut code_pos = 0;
let mut insert_iter = self.insertions.iter();
loop {
let next_insert = insert_iter.next();
let next_pos = next_insert.map(|i| i.at).unwrap_or_else(|| self.code.len());
// Relocation offset needs to be an index into the body of the code section, but
// at this point it is an index into self.code. Need to adjust for all previous functions
// in the code section, and for insertions in the current function.
let section_body_pos = code_section_buf.size() - CODE_SECTION_BODY_OFFSET;
let section_body_pos = buffer.size() - reloc_base_offset;
while reloc_index < self.relocations.len()
&& self.relocations[reloc_index].offset() < location.insert_at as u32
&& self.relocations[reloc_index].offset() < next_pos as u32
{
let offset_ref = self.relocations[reloc_index].offset_mut();
*offset_ref += (section_body_pos - code_pos) as u32;
let mut reloc_clone = self.relocations[reloc_index].clone();
*reloc_clone.offset_mut() += (section_body_pos - code_pos) as u32;
final_relocs.push(reloc_clone);
reloc_index += 1;
}
code_section_buf.append_slice(&self.code[code_pos..location.insert_at]);
code_section_buf.append_slice(&self.insert_bytes[location.start..location.end]);
code_pos = location.insert_at;
buffer.append_slice(&self.code[code_pos..next_pos]);
match next_insert {
Some(Insertion { at, start, end }) => {
buffer.append_slice(&self.insert_bytes[*start..*end]);
code_pos = *at;
}
None => {
break;
}
}
let section_body_pos = code_section_buf.size() - CODE_SECTION_BODY_OFFSET;
while reloc_index < self.relocations.len() {
let offset_ref = self.relocations[reloc_index].offset_mut();
*offset_ref += (section_body_pos - code_pos) as u32;
reloc_index += 1;
}
let len = self.code.len();
code_section_buf.append_slice(&self.code[code_pos..len]);
self.relocations.drain(0..)
}
/**********************************************************

View file

@ -1,73 +1,9 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use crate::code_builder::Align;
use crate::serialize::{SerialBuffer, Serialize};
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum SectionId {
Custom = 0,
Type = 1,
Import = 2,
Function = 3,
Table = 4,
Memory = 5,
Global = 6,
Export = 7,
Start = 8,
Element = 9,
Code = 10,
Data = 11,
DataCount = 12,
}
struct SectionHeaderIndices {
size_index: usize,
body_index: usize,
}
/// Write a section header, returning the position of the encoded length
fn _write_section_header<T: SerialBuffer>(buffer: &mut T, id: SectionId) -> SectionHeaderIndices {
buffer.append_byte(id as u8);
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Write a custom section header, returning the position of the encoded length
fn write_custom_section_header<T: SerialBuffer>(
buffer: &mut T,
name: &str,
) -> SectionHeaderIndices {
// buffer.append_byte(SectionId::Custom as u8); // TODO: uncomment when we get rid of parity_wasm
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
name.serialize(buffer);
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Update a section header with its final size, after writing the bytes
fn update_section_size<T: SerialBuffer>(buffer: &mut T, header_indices: SectionHeaderIndices) {
let size = buffer.size() - header_indices.body_index;
buffer.overwrite_padded_u32(header_indices.size_index, size as u32);
}
fn serialize_vector_with_count<'a, SB, S>(buffer: &mut SB, items: &Vec<'a, S>)
where
SB: SerialBuffer,
S: Serialize,
{
buffer.encode_u32(items.len() as u32);
for item in items.iter() {
item.serialize(buffer);
}
}
use super::sections::{update_section_size, write_custom_section_header};
use super::serialize::{SerialBuffer, Serialize};
use super::Align;
/*******************************************************************
*
@ -132,7 +68,7 @@ pub enum OffsetRelocType {
MemoryAddrI64 = 16,
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum RelocationEntry {
Index {
type_id: IndexRelocType,
@ -181,7 +117,7 @@ impl Serialize for RelocationEntry {
offset,
symbol_index,
} => {
buffer.append_byte(*type_id as u8);
buffer.append_u8(*type_id as u8);
buffer.encode_u32(*offset);
buffer.encode_u32(*symbol_index);
}
@ -191,7 +127,7 @@ impl Serialize for RelocationEntry {
symbol_index,
addend,
} => {
buffer.append_byte(*type_id as u8);
buffer.append_u8(*type_id as u8);
buffer.encode_u32(*offset);
buffer.encode_u32(*symbol_index);
buffer.encode_i32(*addend);
@ -204,18 +140,30 @@ impl Serialize for RelocationEntry {
pub struct RelocationSection<'a> {
pub name: &'a str,
/// The *index* (not ID!) of the target section in the module
pub target_section_index: u32,
pub entries: &'a Vec<'a, RelocationEntry>,
pub target_section_index: Option<u32>,
pub entries: Vec<'a, RelocationEntry>,
}
impl<'a> RelocationSection<'a> {
pub fn new(arena: &'a Bump, name: &'a str) -> Self {
RelocationSection {
name,
target_section_index: None,
entries: Vec::with_capacity_in(64, arena),
}
}
}
impl<'a> Serialize for RelocationSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if !self.entries.is_empty() {
let header_indices = write_custom_section_header(buffer, self.name);
buffer.encode_u32(self.target_section_index);
serialize_vector_with_count(buffer, self.entries);
buffer.encode_u32(self.target_section_index.unwrap());
self.entries.serialize(buffer);
update_section_size(buffer, header_indices);
}
}
}
/*******************************************************************
*
@ -231,6 +179,7 @@ pub struct LinkingSegment {
pub alignment: Align,
pub flags: u32,
}
impl Serialize for LinkingSegment {
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {
todo!();
@ -242,17 +191,16 @@ pub struct LinkingInitFunc {
pub priority: u32,
pub symbol_index: u32, // index in the symbol table, not the function index
}
impl Serialize for LinkingInitFunc {
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {
todo!();
}
}
//----------------
//
//------------------------------------------------
// Common data
//
//----------------
//------------------------------------------------
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
@ -269,6 +217,7 @@ pub struct ComdatSym {
pub kind: ComdatSymKind,
pub index: u32,
}
impl Serialize for ComdatSym {
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {
todo!();
@ -285,17 +234,16 @@ pub struct LinkingComdat<'a> {
flags: u32,
syms: Vec<'a, ComdatSym>,
}
impl<'a> Serialize for LinkingComdat<'a> {
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {
todo!();
}
}
//----------------
//
//------------------------------------------------
// Symbol table
//
//----------------
//------------------------------------------------
/// Indicating that this is a weak symbol. When
/// linking multiple modules defining the same symbol, all weak definitions are
@ -339,6 +287,7 @@ pub enum WasmObjectSymbol {
Defined { index: u32, name: String },
Imported { index: u32 },
}
impl Serialize for WasmObjectSymbol {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
@ -365,6 +314,7 @@ pub enum DataSymbol {
name: String,
},
}
impl Serialize for DataSymbol {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
@ -405,6 +355,7 @@ pub struct SymInfo {
flags: u32,
info: SymInfoFields,
}
impl SymInfo {
pub fn for_function(wasm_function_index: u32, name: String) -> Self {
let linking_symbol = WasmObjectSymbol::Defined {
@ -420,7 +371,7 @@ impl SymInfo {
impl Serialize for SymInfo {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_byte(match self.info {
buffer.append_u8(match self.info {
SymInfoFields::Function(_) => 0,
SymInfoFields::Data(_) => 1,
SymInfoFields::Global(_) => 2,
@ -442,11 +393,9 @@ impl Serialize for SymInfo {
}
}
//--------------------------------
//
//----------------------------------------------------------------
// Linking subsections
//
//--------------------------------
//----------------------------------------------------------------
pub enum LinkingSubSection<'a> {
/// Extra metadata about the data segments.
@ -459,9 +408,10 @@ pub enum LinkingSubSection<'a> {
/// Specifies extra information about the symbols present in the module.
SymbolTable(Vec<'a, SymInfo>),
}
impl<'a> Serialize for LinkingSubSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_byte(match self {
buffer.append_u8(match self {
Self::SegmentInfo(_) => 5,
Self::InitFuncs(_) => 6,
Self::ComdatInfo(_) => 7,
@ -470,10 +420,10 @@ impl<'a> Serialize for LinkingSubSection<'a> {
let payload_len_index = buffer.reserve_padded_u32();
let payload_start_index = buffer.size();
match self {
Self::SegmentInfo(items) => serialize_vector_with_count(buffer, items),
Self::InitFuncs(items) => serialize_vector_with_count(buffer, items),
Self::ComdatInfo(items) => serialize_vector_with_count(buffer, items),
Self::SymbolTable(items) => serialize_vector_with_count(buffer, items),
Self::SegmentInfo(items) => items.serialize(buffer),
Self::InitFuncs(items) => items.serialize(buffer),
Self::ComdatInfo(items) => items.serialize(buffer),
Self::SymbolTable(items) => items.serialize(buffer),
}
buffer.overwrite_padded_u32(
payload_len_index,
@ -482,15 +432,28 @@ impl<'a> Serialize for LinkingSubSection<'a> {
}
}
//----------------------------------------------------------------
// Linking metadata section
//----------------------------------------------------------------
const LINKING_VERSION: u8 = 2;
pub struct LinkingSection<'a> {
pub subsections: Vec<'a, LinkingSubSection<'a>>,
}
impl<'a> LinkingSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
LinkingSection {
subsections: Vec::with_capacity_in(1, arena),
}
}
}
impl<'a> Serialize for LinkingSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
let header_indices = write_custom_section_header(buffer, "linking");
buffer.append_byte(LINKING_VERSION);
buffer.append_u8(LINKING_VERSION);
for subsection in self.subsections.iter() {
subsection.serialize(buffer);
}

View file

@ -0,0 +1,13 @@
pub mod code_builder;
pub mod linking;
pub mod opcodes;
pub mod sections;
pub mod serialize;
pub use code_builder::{
Align, BlockType, CodeBuilder, LocalId, ValueType, VirtualMachineSymbolState,
};
pub use linking::{LinkingSubSection, SymInfo};
pub use sections::{
Export, ExportType, Global, GlobalInitValue, GlobalType, Signature, WasmModule,
};

View file

@ -0,0 +1,578 @@
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use super::linking::{LinkingSection, RelocationEntry, RelocationSection};
use super::opcodes;
use super::serialize::{SerialBuffer, Serialize};
use super::{CodeBuilder, ValueType};
/*******************************************************************
*
* Helpers
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum SectionId {
Custom = 0,
Type = 1,
Import = 2,
Function = 3,
Table = 4,
Memory = 5,
Global = 6,
Export = 7,
Start = 8,
Element = 9,
Code = 10,
Data = 11,
DataCount = 12,
}
pub struct SectionHeaderIndices {
size_index: usize,
body_index: usize,
}
/// Write a section header, returning the position of the encoded length
fn write_section_header<T: SerialBuffer>(buffer: &mut T, id: SectionId) -> SectionHeaderIndices {
buffer.append_u8(id as u8);
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Write a custom section header, returning the position of the encoded length
pub fn write_custom_section_header<T: SerialBuffer>(
buffer: &mut T,
name: &str,
) -> SectionHeaderIndices {
buffer.append_u8(SectionId::Custom as u8);
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
name.serialize(buffer);
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Update a section header with its final size, after writing the bytes
pub fn update_section_size<T: SerialBuffer>(buffer: &mut T, header_indices: SectionHeaderIndices) {
let size = buffer.size() - header_indices.body_index;
buffer.overwrite_padded_u32(header_indices.size_index, size as u32);
}
/// Serialize a section that is just a vector of some struct
fn serialize_vector_section<B: SerialBuffer, T: Serialize>(
buffer: &mut B,
section_id: SectionId,
subsections: &[T],
) {
if !subsections.is_empty() {
let header_indices = write_section_header(buffer, section_id);
subsections.serialize(buffer);
update_section_size(buffer, header_indices);
}
}
/*******************************************************************
*
* Type section
* Deduplicated list of function type signatures
*
*******************************************************************/
#[derive(PartialEq, Eq)]
pub struct Signature<'a> {
pub param_types: Vec<'a, ValueType>,
pub ret_type: Option<ValueType>,
}
impl<'a> Serialize for Signature<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(0x60);
self.param_types.serialize(buffer);
self.ret_type.serialize(buffer);
}
}
pub struct TypeSection<'a> {
/// Private. See WasmModule::add_function_signature
signatures: Vec<'a, Signature<'a>>,
}
impl<'a> TypeSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
TypeSection {
signatures: Vec::with_capacity_in(8, arena),
}
}
/// Find a matching signature or insert a new one. Return the index.
fn insert(&mut self, signature: Signature<'a>) -> u32 {
// Using linear search because we need to preserve indices stored in
// the Function section. (Also for practical sizes it's fast)
let maybe_index = self.signatures.iter().position(|s| *s == signature);
match maybe_index {
Some(index) => index as u32,
None => {
let index = self.signatures.len();
self.signatures.push(signature);
index as u32
}
}
}
}
impl<'a> Serialize for TypeSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Type, &self.signatures);
}
}
/*******************************************************************
*
* Import section
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum RefType {
Func = 0x70,
Extern = 0x6f,
}
pub struct TableType {
pub ref_type: RefType,
pub limits: Limits,
}
impl Serialize for TableType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(self.ref_type as u8);
self.limits.serialize(buffer);
}
}
pub enum ImportDesc {
Func { signature_index: u32 },
Table { ty: TableType },
Mem { limits: Limits },
Global { ty: GlobalType },
}
pub struct Import {
pub module: String,
pub name: String,
pub description: ImportDesc,
}
impl Serialize for Import {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.module.serialize(buffer);
self.name.serialize(buffer);
match &self.description {
ImportDesc::Func { signature_index } => {
buffer.append_u8(0);
buffer.encode_u32(*signature_index);
}
ImportDesc::Table { ty } => {
buffer.append_u8(1);
ty.serialize(buffer);
}
ImportDesc::Mem { limits } => {
buffer.append_u8(2);
limits.serialize(buffer);
}
ImportDesc::Global { ty } => {
buffer.append_u8(3);
ty.serialize(buffer);
}
}
}
}
pub struct ImportSection<'a> {
entries: Vec<'a, Import>,
}
impl<'a> ImportSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
ImportSection {
entries: bumpalo::vec![in arena],
}
}
}
impl<'a> Serialize for ImportSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Import, &self.entries);
}
}
/*******************************************************************
*
* Function section
* Maps function indices (Code section) to signature indices (Type section)
*
*******************************************************************/
pub struct FunctionSection<'a> {
/// Private. See WasmModule::add_function_signature
signature_indices: Vec<'a, u32>,
}
impl<'a> FunctionSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
FunctionSection {
signature_indices: Vec::with_capacity_in(8, arena),
}
}
}
impl<'a> Serialize for FunctionSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Function, &self.signature_indices);
}
}
/*******************************************************************
*
* Memory section
*
*******************************************************************/
pub enum Limits {
Min(u32),
MinMax(u32, u32),
}
impl Serialize for Limits {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Self::Min(min) => {
buffer.append_u8(0);
buffer.encode_u32(*min);
}
Self::MinMax(min, max) => {
buffer.append_u8(1);
buffer.encode_u32(*min);
buffer.encode_u32(*max);
}
}
}
}
pub struct MemorySection(Option<Limits>);
impl MemorySection {
pub const PAGE_SIZE: u32 = 64 * 1024;
pub fn new(bytes: u32) -> Self {
if bytes == 0 {
MemorySection(None)
} else {
let pages = (bytes + Self::PAGE_SIZE - 1) / Self::PAGE_SIZE;
MemorySection(Some(Limits::Min(pages)))
}
}
pub fn min_size(&self) -> Option<u32> {
match self {
MemorySection(Some(Limits::Min(min))) | MemorySection(Some(Limits::MinMax(min, _))) => {
Some(min * Self::PAGE_SIZE)
}
MemorySection(None) => None,
}
}
}
impl Serialize for MemorySection {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if let Some(limits) = &self.0 {
let header_indices = write_section_header(buffer, SectionId::Memory);
buffer.append_u8(1);
limits.serialize(buffer);
update_section_size(buffer, header_indices);
}
}
}
/*******************************************************************
*
* Global section
*
*******************************************************************/
pub struct GlobalType {
pub value_type: ValueType,
pub is_mutable: bool,
}
impl Serialize for GlobalType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(self.value_type as u8);
buffer.append_u8(self.is_mutable as u8);
}
}
pub enum GlobalInitValue {
I32(i32),
I64(i64),
F32(f32),
F64(f64),
}
pub struct Global {
pub ty: GlobalType,
pub init_value: GlobalInitValue,
}
impl Serialize for Global {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.ty.serialize(buffer);
match self.init_value {
GlobalInitValue::I32(x) => {
buffer.append_u8(opcodes::I32CONST);
buffer.encode_i32(x);
}
GlobalInitValue::I64(x) => {
buffer.append_u8(opcodes::I64CONST);
buffer.encode_i64(x);
}
GlobalInitValue::F32(x) => {
buffer.append_u8(opcodes::F32CONST);
buffer.encode_f32(x);
}
GlobalInitValue::F64(x) => {
buffer.append_u8(opcodes::F64CONST);
buffer.encode_f64(x);
}
}
buffer.append_u8(opcodes::END);
}
}
pub struct GlobalSection<'a> {
pub entries: Vec<'a, Global>,
}
impl<'a> GlobalSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
GlobalSection {
entries: Vec::with_capacity_in(1, arena),
}
}
}
impl<'a> Serialize for GlobalSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Global, &self.entries);
}
}
/*******************************************************************
*
* Export section
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ExportType {
Func = 0,
Table = 1,
Mem = 2,
Global = 3,
}
pub struct Export {
pub name: String,
pub ty: ExportType,
pub index: u32,
}
impl Serialize for Export {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.name.serialize(buffer);
buffer.append_u8(self.ty as u8);
buffer.encode_u32(self.index);
}
}
pub struct ExportSection<'a> {
pub entries: Vec<'a, Export>,
}
impl<'a> ExportSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
ExportSection {
entries: bumpalo::vec![in arena],
}
}
}
impl<'a> Serialize for ExportSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
serialize_vector_section(buffer, SectionId::Export, &self.entries);
}
}
/*******************************************************************
*
* Code section (see also code_builder.rs)
*
*******************************************************************/
#[derive(Debug)]
pub struct CodeSection<'a> {
pub code_builders: Vec<'a, CodeBuilder<'a>>,
}
impl<'a> CodeSection<'a> {
pub fn new(arena: &'a Bump) -> Self {
CodeSection {
code_builders: Vec::with_capacity_in(8, arena),
}
}
/// Serialize the code builders for all functions, and get code relocations with final offsets
pub fn serialize_mut<T: SerialBuffer>(
&mut self,
buffer: &mut T,
relocations: &mut Vec<'a, RelocationEntry>,
) {
let header_indices = write_section_header(buffer, SectionId::Code);
buffer.encode_u32(self.code_builders.len() as u32);
for code_builder in self.code_builders.iter_mut() {
code_builder.serialize_with_relocs(buffer, relocations, header_indices.body_index);
}
update_section_size(buffer, header_indices);
}
}
/*******************************************************************
*
* Module
*
* https://webassembly.github.io/spec/core/binary/modules.html
*
*******************************************************************/
pub struct WasmModule<'a> {
pub types: TypeSection<'a>,
pub import: ImportSection<'a>,
pub function: FunctionSection<'a>,
/// Dummy placeholder for tables (used for function pointers and host references)
pub table: (),
pub memory: MemorySection,
pub global: GlobalSection<'a>,
pub export: ExportSection<'a>,
/// Dummy placeholder for start function. In Roc, this would be part of the platform.
pub start: (),
/// Dummy placeholder for table elements. Roc does not use tables.
pub element: (),
/// Dummy placeholder for data count section, not yet implemented
pub data_count: (),
pub code: CodeSection<'a>,
/// Dummy placeholder for data section, not yet implemented
pub data: (),
pub linking: LinkingSection<'a>,
pub reloc_code: RelocationSection<'a>,
pub reloc_data: RelocationSection<'a>,
}
impl<'a> WasmModule<'a> {
pub const WASM_VERSION: u32 = 1;
pub fn new(arena: &'a Bump) -> Self {
WasmModule {
types: TypeSection::new(arena),
import: ImportSection::new(arena),
function: FunctionSection::new(arena),
table: (), // Unused in Roc (mainly for function pointers)
memory: MemorySection::new(1024 * 1024),
global: GlobalSection::new(arena),
export: ExportSection::new(arena),
start: (), // Entry function. In Roc this would be part of the platform.
element: (), // Unused in Roc (related to table section)
data_count: (), // TODO, related to data section
code: CodeSection::new(arena),
data: (), // TODO: program constants (e.g. string literals)
linking: LinkingSection::new(arena),
reloc_code: RelocationSection::new(arena, "reloc.CODE"),
reloc_data: RelocationSection::new(arena, "reloc.DATA"),
}
}
/// Create entries in the Type and Function sections for a function signature
pub fn add_function_signature(&mut self, signature: Signature<'a>) {
let index = self.types.insert(signature);
self.function.signature_indices.push(index);
}
#[allow(clippy::unit_arg)]
pub fn serialize<T: SerialBuffer>(&mut self, buffer: &mut T) {
buffer.append_u8(0);
buffer.append_slice("asm".as_bytes());
buffer.write_unencoded_u32(Self::WASM_VERSION);
let mut index: u32 = 0;
let mut prev_size = buffer.size();
self.types.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.import.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.function.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.table.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.memory.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.global.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.export.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.start.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.element.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.data_count.serialize(buffer);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.reloc_code.target_section_index = Some(index);
self.code
.serialize_mut(buffer, &mut self.reloc_code.entries);
maybe_increment_section(buffer.size(), &mut prev_size, &mut index);
self.data.serialize(buffer);
self.reloc_data.target_section_index = Some(index);
self.linking.serialize(buffer);
self.reloc_code.serialize(buffer);
self.reloc_data.serialize(buffer);
}
}
fn maybe_increment_section(size: usize, prev_size: &mut usize, index: &mut u32) {
if size > *prev_size {
*index += 1;
*prev_size = size;
}
}

View file

@ -1,5 +1,53 @@
use std::fmt::Debug;
use bumpalo::collections::vec::Vec;
pub trait Serialize {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T);
}
impl Serialize for str {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self.as_bytes());
}
}
impl Serialize for u32 {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(*self);
}
}
// Unit is used as a placeholder in parts of the Wasm spec we don't use yet
impl Serialize for () {
fn serialize<T: SerialBuffer>(&self, _buffer: &mut T) {}
}
impl<S: Serialize> Serialize for [S] {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
for item in self.iter() {
item.serialize(buffer);
}
}
}
impl<S: Serialize> Serialize for Option<S> {
/// serialize Option as a vector of length 1 or 0
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Some(x) => {
buffer.append_u8(1);
x.serialize(buffer);
}
None => {
buffer.append_u8(0);
}
}
}
}
/// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length
///
/// All integers in Wasm are variable-length encoded, which saves space for small values.
@ -10,10 +58,10 @@ macro_rules! encode_uleb128 {
let mut x = value;
let start_len = self.size();
while x >= 0x80 {
self.append_byte(0x80 | ((x & 0x7f) as u8));
self.append_u8(0x80 | ((x & 0x7f) as u8));
x >>= 7;
}
self.append_byte(x as u8);
self.append_u8(x as u8);
self.size() - start_len
}
};
@ -30,10 +78,10 @@ macro_rules! encode_sleb128 {
x >>= 7;
let byte_is_negative = (byte & 0x40) != 0;
if ((x == 0 && !byte_is_negative) || (x == -1 && byte_is_negative)) {
self.append_byte(byte);
self.append_u8(byte);
break;
}
self.append_byte(byte | 0x80);
self.append_u8(byte | 0x80);
}
self.size() - start_len
}
@ -47,7 +95,7 @@ macro_rules! write_unencoded {
let mut x = value;
let size = std::mem::size_of::<$ty>();
for _ in 0..size {
self.append_byte((x & 0xff) as u8);
self.append_u8((x & 0xff) as u8);
x >>= 8;
}
}
@ -61,17 +109,19 @@ macro_rules! encode_padded_sleb128 {
let mut x = value;
let size = (std::mem::size_of::<$ty>() / 4) * 5;
for _ in 0..(size - 1) {
self.append_byte(0x80 | (x & 0x7f) as u8);
self.append_u8(0x80 | (x & 0x7f) as u8);
x >>= 7;
}
self.append_byte((x & 0x7f) as u8);
self.append_u8((x & 0x7f) as u8);
}
};
}
pub trait SerialBuffer {
fn append_byte(&mut self, b: u8);
pub trait SerialBuffer: Debug {
fn append_u8(&mut self, b: u8);
fn overwrite_u8(&mut self, index: usize, b: u8);
fn append_slice(&mut self, b: &[u8]);
fn size(&self) -> usize;
encode_uleb128!(encode_u32, u32);
@ -98,17 +148,6 @@ pub trait SerialBuffer {
encode_padded_sleb128!(encode_padded_i64, i64);
}
pub trait Serialize {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T);
}
impl Serialize for str {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self.as_bytes());
}
}
fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) {
let mut x = value;
for byte in buffer.iter_mut().take(4) {
@ -119,9 +158,12 @@ fn overwrite_padded_u32_help(buffer: &mut [u8], value: u32) {
}
impl SerialBuffer for std::vec::Vec<u8> {
fn append_byte(&mut self, b: u8) {
fn append_u8(&mut self, b: u8) {
self.push(b);
}
fn overwrite_u8(&mut self, index: usize, b: u8) {
self[index] = b;
}
fn append_slice(&mut self, b: &[u8]) {
self.extend_from_slice(b);
}
@ -146,9 +188,12 @@ impl SerialBuffer for std::vec::Vec<u8> {
}
impl<'a> SerialBuffer for Vec<'a, u8> {
fn append_byte(&mut self, b: u8) {
fn append_u8(&mut self, b: u8) {
self.push(b);
}
fn overwrite_u8(&mut self, index: usize, b: u8) {
self[index] = b;
}
fn append_slice(&mut self, b: &[u8]) {
self.extend_from_slice(b);
}

View file

@ -9,9 +9,6 @@ edition = "2018"
# roc_module = { path = "../module" }
# roc_mono = { path = "../mono" }
# # TODO: switch to parity-wasm 0.44 once it's out (allows bumpalo vectors in some places)
parity-wasm = { git = "https://github.com/brian-carroll/parity-wasm", branch = "master" }
wasmer = "2.0.0"
wasmer-wasi = "2.0.0"

View file

@ -2,11 +2,9 @@ use std::cell::Cell;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use crate::helpers::wasm32_test_result::Wasm32TestResult;
use roc_can::builtins::builtin_defs_map;
use roc_collections::all::{MutMap, MutSet};
use roc_gen_wasm::replace_code_section;
// use roc_std::{RocDec, RocList, RocOrder, RocStr};
use crate::helpers::wasm32_test_result::Wasm32TestResult;
use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory;
const TEST_WRAPPER_NAME: &str = "test_wrapper";
@ -104,20 +102,17 @@ pub fn helper_wasm<'a, T: Wasm32TestResult>(
exposed_to_host,
};
let (mut builder, mut code_section_bytes) =
roc_gen_wasm::build_module_help(&env, procedures).unwrap();
let mut wasm_module = roc_gen_wasm::build_module_help(&env, procedures).unwrap();
T::insert_test_wrapper(
arena,
&mut builder,
&mut code_section_bytes,
&mut wasm_module,
TEST_WRAPPER_NAME,
main_fn_index as u32,
);
let mut parity_module = builder.build();
replace_code_section(&mut parity_module, code_section_bytes);
let module_bytes = parity_module.into_bytes().unwrap();
let mut module_bytes = std::vec::Vec::with_capacity(4096);
wasm_module.serialize(&mut module_bytes);
// for debugging (e.g. with wasm2wat or wasm-objdump)
if false {

View file

@ -1,44 +1,33 @@
use parity_wasm::builder;
use parity_wasm::elements::Internal;
use bumpalo::collections::Vec;
use roc_gen_wasm::code_builder::{Align, CodeBuilder, ValueType};
use roc_gen_wasm::from_wasm32_memory::FromWasm32Memory;
use roc_gen_wasm::{serialize::SerialBuffer, LocalId};
use roc_gen_wasm::wasm_module::opcodes;
use roc_gen_wasm::wasm_module::{
Align, CodeBuilder, Export, ExportType, LocalId, Signature, ValueType, WasmModule,
};
use roc_std::{RocDec, RocList, RocOrder, RocStr};
pub trait Wasm32TestResult {
fn insert_test_wrapper<'a>(
arena: &'a bumpalo::Bump,
module_builder: &mut builder::ModuleBuilder,
code_section_bytes: &mut std::vec::Vec<u8>,
wasm_module: &mut WasmModule<'a>,
wrapper_name: &str,
main_function_index: u32,
) {
let signature = builder::signature()
.with_result(parity_wasm::elements::ValueType::I32)
.build_sig();
wasm_module.add_function_signature(Signature {
param_types: Vec::with_capacity_in(0, arena),
ret_type: Some(ValueType::I32),
});
// parity-wasm FunctionDefinition with no instructions
let empty_fn_def = builder::function().with_signature(signature).build();
let location = module_builder.push_function(empty_fn_def);
let export = builder::export()
.field(wrapper_name)
.with_internal(Internal::Function(location.body))
.build();
module_builder.push_export(export);
wasm_module.export.entries.push(Export {
name: wrapper_name.to_string(),
ty: ExportType::Func,
index: wasm_module.code.code_builders.len() as u32,
});
let mut code_builder = CodeBuilder::new(arena);
Self::build_wrapper_body(&mut code_builder, main_function_index);
code_builder.serialize(code_section_bytes);
let mut num_procs = 0;
for (i, byte) in code_section_bytes[5..10].iter().enumerate() {
num_procs += ((byte & 0x7f) as u32) << (i * 7);
}
let inner_length = (code_section_bytes.len() - 5) as u32;
code_section_bytes.overwrite_padded_u32(0, inner_length);
code_section_bytes.overwrite_padded_u32(5, num_procs + 1);
wasm_module.code.code_builders.push(code_builder);
}
fn build_wrapper_body(code_builder: &mut CodeBuilder, main_function_index: u32);
@ -54,7 +43,7 @@ macro_rules! build_wrapper_body_primitive {
code_builder.get_local(frame_pointer_id);
// Raw "call" instruction. Don't bother with symbol & relocation since we're not going to link.
code_builder.inst_imm32(roc_gen_wasm::opcodes::CALL, 0, true, main_function_index);
code_builder.inst_imm32(opcodes::CALL, 0, true, main_function_index);
code_builder.$store_instruction($align, 0);
code_builder.get_local(frame_pointer_id);
@ -82,7 +71,7 @@ fn build_wrapper_body_stack_memory(
code_builder.get_local(local_id);
// Raw "call" instruction. Don't bother with symbol & relocation since we're not going to link.
code_builder.inst_imm32(roc_gen_wasm::opcodes::CALL, 0, true, main_function_index);
code_builder.inst_imm32(opcodes::CALL, 0, true, main_function_index);
code_builder.get_local(local_id);
code_builder.finalize(local_types, size as i32, frame_pointer);
}