Combine our handmade Code section with other sections from parity_wasm (tests compile but fail)

This commit is contained in:
Brian Carroll 2021-10-23 17:10:00 +02:00
parent 7c398ba238
commit 13577aa9ec
5 changed files with 149 additions and 65 deletions

View file

@ -11,7 +11,7 @@ use roc_mono::layout::{Builtin, Layout};
use crate::code_builder::{BlockType, CodeBuilder, ValueType};
use crate::layout::WasmLayout;
use crate::storage::{Storage, StoredValue, StoredValueKind};
use crate::{copy_memory, CopyMemoryConfig, Env, LocalId, PTR_TYPE};
use crate::{copy_memory, encode_u32_padded, CopyMemoryConfig, Env, LocalId, PTR_TYPE};
// Don't allocate any constant data at address zero or near it. Would be valid, but bug-prone.
// Follow Emscripten's example by using 1kB (4 bytes would probably do)
@ -20,18 +20,17 @@ const UNUSED_DATA_SECTION_BYTES: u32 = 1024;
#[derive(Clone, Copy, Debug)]
struct LabelId(u32);
// TODO: use Bumpalo Vec once parity_wasm supports general iterators (>=0.43)
pub struct WasmBackend<'a> {
// Module level: Wasm AST
pub module_builder: ModuleBuilder,
env: &'a Env<'a>,
// Module level: internal state & IR mappings
// Module-level data
pub module_builder: ModuleBuilder,
pub code_section_bytes: std::vec::Vec<u8>,
_data_offset_map: MutMap<Literal<'a>, u32>,
_data_offset_next: u32,
proc_symbol_map: MutMap<Symbol, CodeLocation>,
// Function level
// Function-level data
code_builder: CodeBuilder<'a>,
storage: Storage<'a>,
@ -41,21 +40,29 @@ pub struct WasmBackend<'a> {
}
impl<'a> WasmBackend<'a> {
pub fn new(env: &'a Env<'a>) -> Self {
pub fn new(env: &'a Env<'a>, num_procs: usize) -> Self {
// Code section is prefixed with the number of Wasm functions
// For now, this is the same as the number of IR procedures (until we start inlining!)
let mut code_section_bytes = std::vec::Vec::with_capacity(4096);
// Reserve space for code section header: inner byte length and number of functions
// Padded to the maximum 5 bytes each, so we can update later without moving everything
code_section_bytes.resize(10, 0);
encode_u32_padded(&mut code_section_bytes[5..10], num_procs as u32);
WasmBackend {
// Module: Wasm AST
module_builder: builder::module(),
env,
// Module: internal state & IR mappings
// Module-level data
module_builder: builder::module(),
code_section_bytes,
_data_offset_map: MutMap::default(),
_data_offset_next: UNUSED_DATA_SECTION_BYTES,
proc_symbol_map: MutMap::default(),
// Function-level data
block_depth: 0,
joinpoint_label_map: MutMap::default(),
// Functions
code_builder: CodeBuilder::new(env.arena),
storage: Storage::new(env.arena),
}
@ -86,7 +93,7 @@ impl<'a> WasmBackend<'a> {
self.build_stmt(&proc.body, &proc.ret_layout)?;
self.finalize_proc();
self.finalize_proc()?;
self.reset();
// println!("\nfinished generating {:?}\n", sym);
@ -123,7 +130,7 @@ impl<'a> WasmBackend<'a> {
builder::function().with_signature(signature).build()
}
fn finalize_proc(&mut self) {
fn finalize_proc(&mut self) -> Result<(), String> {
// end the block from start_proc, to ensure all paths pop stack memory (if any)
self.end_block();
@ -133,6 +140,11 @@ impl<'a> WasmBackend<'a> {
self.storage.stack_frame_size,
self.storage.stack_frame_pointer,
);
self.code_builder
.serialize(&mut self.code_section_bytes)
.map_err(|e| format!("{:?}", e))?;
Ok(())
}
/**********************************************************

View file

@ -60,6 +60,21 @@ pub enum Align {
// ... we can add more if we need them ...
}
impl From<u32> for Align {
fn from(x: u32) -> Align {
match x {
1 => Align::Bytes1,
2 => Align::Bytes2,
4 => Align::Bytes4,
8 => Align::Bytes8,
16 => Align::Bytes16,
32 => Align::Bytes32,
64 => Align::Bytes64,
_ => panic!("{:?}-byte alignment not supported", x),
}
}
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum VirtualMachineSymbolState {
/// Value doesn't exist yet
@ -99,6 +114,13 @@ macro_rules! instruction_memargs {
};
}
/// Finalize the code section bytes by writing its inner length at the start.
/// Assumes 5 bytes have been reserved for it (maximally-padded LEB-128)
pub fn finalize_code_section(code_section_bytes: &mut std::vec::Vec<u8>) {
let inner_len = (code_section_bytes.len() - 5) as u32;
encode_u32_padded(code_section_bytes[0..5], inner_len);
}
#[derive(Debug)]
pub struct CodeBuilder<'a> {
/// The main container for the instructions

View file

@ -13,14 +13,14 @@ use bumpalo::collections::Vec;
use bumpalo::Bump;
use parity_wasm::builder;
use parity_wasm::elements::Internal;
use parity_wasm::elements::{Instruction, Internal, Module, Section};
use roc_collections::all::{MutMap, MutSet};
use roc_module::symbol::{Interns, Symbol};
use roc_mono::ir::{Proc, ProcLayout};
use roc_mono::layout::LayoutIds;
use crate::backend::WasmBackend;
use crate::code_builder::{Align, CodeBuilder, ValueType};
use crate::code_builder::{finalize_code_section, Align, CodeBuilder, ValueType};
const PTR_SIZE: u32 = 4;
const PTR_TYPE: ValueType = ValueType::I32;
@ -28,6 +28,10 @@ const PTR_TYPE: ValueType = ValueType::I32;
pub const STACK_POINTER_GLOBAL_ID: u32 = 0;
pub const FRAME_ALIGNMENT_BYTES: i32 = 16;
/// Code section ID from spec
/// https://webassembly.github.io/spec/core/binary/modules.html#sections
pub const CODE_SECTION_ID: u8 = 10;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LocalId(pub u32);
@ -41,8 +45,10 @@ pub fn build_module<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<std::vec::Vec<u8>, String> {
let (builder, _) = build_module_help(env, procedures)?;
let module = builder.build();
let (builder, code_section_bytes, _) = build_module_help(env, procedures)?;
let mut module = builder.build();
replace_code_section(&mut module, code_section_bytes);
module
.into_bytes()
.map_err(|e| -> String { format!("Error serialising Wasm module {:?}", e) })
@ -51,8 +57,8 @@ pub fn build_module<'a>(
pub fn build_module_help<'a>(
env: &'a Env,
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<(builder::ModuleBuilder, u32), String> {
let mut backend = WasmBackend::new(env);
) -> Result<(builder::ModuleBuilder, std::vec::Vec<u8>, u32), String> {
let mut backend = WasmBackend::new(env, procedures.len());
let mut layout_ids = LayoutIds::default();
// Sort procedures by occurrence order
@ -85,6 +91,8 @@ pub fn build_module_help<'a>(
}
}
finalize_code_section(&mut backend.code_section_bytes);
// Because of the sorting above, we know the last function in the `for` is the main function.
// Here we grab its index and return it, so that the test_wrapper is able to call it.
// This is a workaround until we implement object files with symbols and relocations.
@ -106,23 +114,30 @@ pub fn build_module_help<'a>(
let stack_pointer_global = builder::global()
.with_type(parity_wasm::elements::ValueType::I32)
.mutable()
.init_expr(parity_wasm::elements::Instruction::I32Const(
(MIN_MEMORY_SIZE_KB * 1024) as i32,
))
.init_expr(Instruction::I32Const((MIN_MEMORY_SIZE_KB * 1024) as i32))
.build();
backend.module_builder.push_global(stack_pointer_global);
Ok((backend.module_builder, main_function_index))
Ok((
backend.module_builder,
backend.code_section_bytes,
main_function_index,
))
}
fn encode_alignment(bytes: u32) -> Align {
match bytes {
1 => Align::Bytes1,
2 => Align::Bytes2,
4 => Align::Bytes4,
8 => Align::Bytes8,
_ => panic!("{:?}-byte alignment is not supported", bytes),
/// Replace parity-wasm's code section with our own handmade one
pub fn replace_code_section(module: &mut Module, code_section_bytes: std::vec::Vec<u8>) {
let sections = module.sections_mut();
let mut code_section_index = usize::MAX;
for (i, s) in sections.iter().enumerate() {
if let Section::Code(_) = s {
code_section_index = i;
}
}
sections[code_section_index] = Section::Unparsed {
id: CODE_SECTION_ID,
payload: code_section_bytes,
};
}
pub struct CopyMemoryConfig {
@ -139,7 +154,7 @@ pub fn copy_memory(code_builder: &mut CodeBuilder, config: CopyMemoryConfig) {
return;
}
let alignment = encode_alignment(config.alignment_bytes);
let alignment = Align::from(config.alignment_bytes);
let mut i = 0;
while config.size - i >= 8 {
code_builder.get_local(config.to_ptr);
@ -196,7 +211,7 @@ pub fn encode_u32<'a>(buffer: &mut [u8], mut value: u32) -> usize {
///
/// All integers in Wasm are variable-length encoded, which saves space for small values.
/// The most significant bit indicates "more bytes are coming", and the other 7 are payload.
pub fn encode_u64<'a>(buffer: &mut Vec<'a, u8>, mut value: u64) -> usize {
pub fn encode_u64<'a>(buffer: &mut [u8], mut value: u64) -> usize {
let mut count = 0;
while value >= 0x80 {
buffer[count] = 0x80 | ((value & 0x7f) as u8);
@ -206,3 +221,19 @@ pub fn encode_u64<'a>(buffer: &mut Vec<'a, u8>, mut value: u64) -> usize {
buffer[count] = value as u8;
count + 1
}
/// Write a u32 value as LEB-128 encoded bytes, but padded to maximum byte length (5)
///
/// Sometimes we want a number to have fixed length, so we can update it later (e.g. relocations)
/// without moving all the following bytes. For those cases we pad it to maximum length.
/// For example, 3 is encoded as 0x83 0x80 0x80 0x80 0x00.
///
/// https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md#relocation-sections
pub fn encode_u32_padded<'a>(buffer: &mut [u8], mut value: u32) -> usize {
for i in 0..4 {
buffer[i] = 0x80 | ((value & 0x7f) as u8);
value >>= 7;
}
buffer[4] = value as u8;
5
}