roc/compiler/gen_wasm/src/wasm_module/sections.rs

1311 lines
38 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_collections::all::MutMap;
use roc_error_macros::internal_error;
use super::dead_code::{
copy_preloads_shrinking_dead_fns, parse_preloads_call_graph, trace_call_graph,
PreloadsCallGraph,
};
use super::linking::RelocationEntry;
use super::opcodes::OpCode;
use super::serialize::{
parse_u32_or_panic, SerialBuffer, Serialize, SkipBytes, MAX_SIZE_ENCODED_U32,
};
use super::{CodeBuilder, ValueType};
/*******************************************************************
*
* Helpers
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum SectionId {
Custom = 0,
Type = 1,
Import = 2,
Function = 3,
Table = 4,
Memory = 5,
Global = 6,
Export = 7,
Start = 8,
Element = 9,
Code = 10,
Data = 11,
/// DataCount section is unused. Only needed for single-pass validation of
/// memory.init and data.drop, which we don't use
DataCount = 12,
}
const MAX_SIZE_SECTION_HEADER: usize = std::mem::size_of::<SectionId>() + 2 * MAX_SIZE_ENCODED_U32;
pub trait Section<'a>: Sized {
const ID: SectionId;
fn get_bytes(&self) -> &[u8];
fn get_count(&self) -> u32;
fn size(&self) -> usize {
MAX_SIZE_SECTION_HEADER + self.get_bytes().len()
}
fn preload(arena: &'a Bump, module_bytes: &[u8], cursor: &mut usize) -> Self;
}
macro_rules! section_impl {
($structname: ident, $id: expr, $from_count_and_bytes: expr) => {
impl<'a> Section<'a> for $structname<'a> {
const ID: SectionId = $id;
fn get_bytes(&self) -> &[u8] {
&self.bytes
}
fn get_count(&self) -> u32 {
self.count
}
fn preload(arena: &'a Bump, module_bytes: &[u8], cursor: &mut usize) -> Self {
let (count, initial_bytes) = parse_section(Self::ID, module_bytes, cursor);
let mut bytes = Vec::with_capacity_in(initial_bytes.len() * 2, arena);
bytes.extend_from_slice(initial_bytes);
$from_count_and_bytes(count, bytes)
}
fn size(&self) -> usize {
section_size(self.get_bytes())
}
}
};
($structname: ident, $id: expr) => {
section_impl!($structname, $id, |count, bytes| $structname {
bytes,
count
});
};
}
impl<'a, Sec> Serialize for Sec
where
Sec: Section<'a>,
{
fn serialize<B: SerialBuffer>(&self, buffer: &mut B) {
if !self.get_bytes().is_empty() {
let header_indices = write_section_header(buffer, Self::ID);
buffer.encode_u32(self.get_count());
buffer.append_slice(self.get_bytes());
update_section_size(buffer, header_indices);
}
}
}
fn section_size(bytes: &[u8]) -> usize {
let id = 1;
let encoded_length = MAX_SIZE_ENCODED_U32;
let encoded_count = MAX_SIZE_ENCODED_U32;
id + encoded_length + encoded_count + bytes.len()
}
fn parse_section<'a>(id: SectionId, module_bytes: &'a [u8], cursor: &mut usize) -> (u32, &'a [u8]) {
if module_bytes[*cursor] != id as u8 {
return (0, &[]);
}
*cursor += 1;
let section_size = parse_u32_or_panic(module_bytes, cursor);
let count_start = *cursor;
let count = parse_u32_or_panic(module_bytes, cursor);
let body_start = *cursor;
let next_section_start = count_start + section_size as usize;
let body = &module_bytes[body_start..next_section_start];
*cursor = next_section_start;
(count, body)
}
pub struct SectionHeaderIndices {
size_index: usize,
body_index: usize,
}
/// Write a section header, returning the position of the encoded length
fn write_section_header<T: SerialBuffer>(buffer: &mut T, id: SectionId) -> SectionHeaderIndices {
buffer.append_u8(id as u8);
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Write a custom section header, returning the position of the encoded length
pub fn write_custom_section_header<T: SerialBuffer>(
buffer: &mut T,
name: &str,
) -> SectionHeaderIndices {
buffer.append_u8(SectionId::Custom as u8);
let size_index = buffer.reserve_padded_u32();
let body_index = buffer.size();
name.serialize(buffer);
SectionHeaderIndices {
size_index,
body_index,
}
}
/// Update a section header with its final size, after writing the bytes
pub fn update_section_size<T: SerialBuffer>(buffer: &mut T, header_indices: SectionHeaderIndices) {
let size = buffer.size() - header_indices.body_index;
buffer.overwrite_padded_u32(header_indices.size_index, size as u32);
}
/*******************************************************************
*
* Type section
* Deduplicated list of function type signatures
*
*******************************************************************/
#[derive(PartialEq, Eq, Debug)]
pub struct Signature<'a> {
pub param_types: Vec<'a, ValueType>,
pub ret_type: Option<ValueType>,
}
impl Signature<'_> {
pub const SEPARATOR: u8 = 0x60;
}
impl<'a> Serialize for Signature<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(Self::SEPARATOR);
self.param_types.serialize(buffer);
self.ret_type.serialize(buffer);
}
}
#[derive(Debug)]
pub struct TypeSection<'a> {
/// Private. See WasmModule::add_function_signature
arena: &'a Bump,
bytes: Vec<'a, u8>,
offsets: Vec<'a, usize>,
}
impl<'a> TypeSection<'a> {
/// Find a matching signature or insert a new one. Return the index.
pub fn insert(&mut self, signature: Signature<'a>) -> u32 {
let mut sig_bytes = Vec::with_capacity_in(signature.param_types.len() + 4, self.arena);
signature.serialize(&mut sig_bytes);
let sig_len = sig_bytes.len();
let bytes_len = self.bytes.len();
for (i, offset) in self.offsets.iter().enumerate() {
let end = offset + sig_len;
if end > bytes_len {
break;
}
if &self.bytes[*offset..end] == sig_bytes.as_slice() {
return i as u32;
}
}
let sig_id = self.offsets.len();
self.offsets.push(bytes_len);
self.bytes.extend_from_slice(&sig_bytes);
sig_id as u32
}
pub fn parse_offsets(&mut self) {
self.offsets.clear();
let mut i = 0;
while i < self.bytes.len() {
self.offsets.push(i);
debug_assert!(self.bytes[i] == Signature::SEPARATOR);
i += 1;
let n_params = parse_u32_or_panic(&self.bytes, &mut i);
i += n_params as usize; // skip over one byte per param type
let n_return_values = self.bytes[i];
i += 1 + n_return_values as usize;
}
}
}
impl<'a> Section<'a> for TypeSection<'a> {
const ID: SectionId = SectionId::Type;
fn get_bytes(&self) -> &[u8] {
&self.bytes
}
fn get_count(&self) -> u32 {
self.offsets.len() as u32
}
fn preload(arena: &'a Bump, module_bytes: &[u8], cursor: &mut usize) -> Self {
let (count, initial_bytes) = parse_section(Self::ID, module_bytes, cursor);
let mut bytes = Vec::with_capacity_in(initial_bytes.len() * 2, arena);
bytes.extend_from_slice(initial_bytes);
TypeSection {
arena,
bytes,
offsets: Vec::with_capacity_in(2 * count as usize, arena),
}
}
}
/*******************************************************************
*
* Import section
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum RefType {
Func = 0x70,
Extern = 0x6f,
}
#[derive(Debug)]
pub struct TableType {
pub ref_type: RefType,
pub limits: Limits,
}
impl Serialize for TableType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(self.ref_type as u8);
self.limits.serialize(buffer);
}
}
impl SkipBytes for TableType {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
u8::skip_bytes(bytes, cursor);
Limits::skip_bytes(bytes, cursor);
}
}
#[derive(Debug)]
pub enum ImportDesc {
Func { signature_index: u32 },
Table { ty: TableType },
Mem { limits: Limits },
Global { ty: GlobalType },
}
#[derive(Debug)]
pub struct Import {
pub module: &'static str,
pub name: String,
pub description: ImportDesc,
}
#[repr(u8)]
#[derive(Debug)]
enum ImportTypeId {
Func = 0,
Table = 1,
Mem = 2,
Global = 3,
}
impl From<u8> for ImportTypeId {
fn from(x: u8) -> Self {
match x {
0 => Self::Func,
1 => Self::Table,
2 => Self::Mem,
3 => Self::Global,
_ => internal_error!(
"Invalid ImportTypeId {} in platform/builtins object file",
x
),
}
}
}
impl Serialize for Import {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.module.serialize(buffer);
self.name.serialize(buffer);
match &self.description {
ImportDesc::Func { signature_index } => {
buffer.append_u8(ImportTypeId::Func as u8);
buffer.encode_u32(*signature_index);
}
ImportDesc::Table { ty } => {
buffer.append_u8(ImportTypeId::Table as u8);
ty.serialize(buffer);
}
ImportDesc::Mem { limits } => {
buffer.append_u8(ImportTypeId::Mem as u8);
limits.serialize(buffer);
}
ImportDesc::Global { ty } => {
buffer.append_u8(ImportTypeId::Global as u8);
ty.serialize(buffer);
}
}
}
}
#[derive(Debug)]
pub struct ImportSection<'a> {
pub count: u32,
pub function_count: u32,
pub bytes: Vec<'a, u8>,
}
impl<'a> ImportSection<'a> {
pub fn append(&mut self, import: Import) {
import.serialize(&mut self.bytes);
self.count += 1;
}
pub fn parse(&mut self, arena: &'a Bump) -> Vec<'a, u32> {
let mut fn_signatures = bumpalo::vec![in arena];
let mut cursor = 0;
while cursor < self.bytes.len() {
String::skip_bytes(&self.bytes, &mut cursor);
String::skip_bytes(&self.bytes, &mut cursor);
let type_id = ImportTypeId::from(self.bytes[cursor]);
cursor += 1;
match type_id {
ImportTypeId::Func => {
fn_signatures.push(parse_u32_or_panic(&self.bytes, &mut cursor));
}
ImportTypeId::Table => {
TableType::skip_bytes(&self.bytes, &mut cursor);
}
ImportTypeId::Mem => {
Limits::skip_bytes(&self.bytes, &mut cursor);
}
ImportTypeId::Global => {
GlobalType::skip_bytes(&self.bytes, &mut cursor);
}
}
}
self.function_count = fn_signatures.len() as u32;
fn_signatures
}
pub fn from_count_and_bytes(count: u32, bytes: Vec<'a, u8>) -> Self {
ImportSection {
bytes,
count,
function_count: 0,
}
}
}
section_impl!(
ImportSection,
SectionId::Import,
ImportSection::from_count_and_bytes
);
/*******************************************************************
*
* Function section
* Maps function indices (Code section) to signature indices (Type section)
*
*******************************************************************/
#[derive(Debug)]
pub struct FunctionSection<'a> {
pub count: u32,
pub bytes: Vec<'a, u8>,
}
impl<'a> FunctionSection<'a> {
pub fn add_sig(&mut self, sig_id: u32) {
self.bytes.encode_u32(sig_id);
self.count += 1;
}
pub fn parse(&self, arena: &'a Bump) -> Vec<'a, u32> {
let count = self.count as usize;
let mut signatures = Vec::with_capacity_in(count, arena);
let mut cursor = 0;
for _ in 0..count {
signatures.push(parse_u32_or_panic(&self.bytes, &mut cursor));
}
signatures
}
}
section_impl!(FunctionSection, SectionId::Function);
/*******************************************************************
*
* Memory section
*
*******************************************************************/
#[derive(Debug)]
pub enum Limits {
Min(u32),
MinMax(u32, u32),
}
#[repr(u8)]
enum LimitsId {
Min = 0,
MinMax = 1,
}
impl Serialize for Limits {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
Self::Min(min) => {
buffer.append_u8(LimitsId::Min as u8);
buffer.encode_u32(*min);
}
Self::MinMax(min, max) => {
buffer.append_u8(LimitsId::MinMax as u8);
buffer.encode_u32(*min);
buffer.encode_u32(*max);
}
}
}
}
impl SkipBytes for Limits {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
let variant_id = bytes[*cursor];
u8::skip_bytes(bytes, cursor); // advance past the variant byte
u32::skip_bytes(bytes, cursor); // skip "min"
if variant_id == LimitsId::MinMax as u8 {
u32::skip_bytes(bytes, cursor); // skip "max"
}
}
}
#[derive(Debug)]
pub struct MemorySection<'a> {
pub count: u32,
pub bytes: Vec<'a, u8>,
}
impl<'a> MemorySection<'a> {
pub const PAGE_SIZE: u32 = 64 * 1024;
pub fn new(arena: &'a Bump, memory_bytes: u32) -> Self {
if memory_bytes == 0 {
MemorySection {
count: 0,
bytes: bumpalo::vec![in arena],
}
} else {
let pages = (memory_bytes + Self::PAGE_SIZE - 1) / Self::PAGE_SIZE;
let limits = Limits::Min(pages);
let mut bytes = Vec::with_capacity_in(12, arena);
limits.serialize(&mut bytes);
MemorySection { count: 1, bytes }
}
}
}
section_impl!(MemorySection, SectionId::Memory);
/*******************************************************************
*
* Global section
*
*******************************************************************/
#[derive(Debug)]
pub struct GlobalType {
pub value_type: ValueType,
pub is_mutable: bool,
}
impl Serialize for GlobalType {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(self.value_type as u8);
buffer.append_u8(self.is_mutable as u8);
}
}
impl SkipBytes for GlobalType {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) {
*cursor += 2;
}
}
/// Constant expression for initialising globals or data segments
/// Note: This is restricted for simplicity, but the spec allows arbitrary constant expressions
#[derive(Debug)]
pub enum ConstExpr {
I32(i32),
I64(i64),
F32(f32),
F64(f64),
}
impl Serialize for ConstExpr {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match self {
ConstExpr::I32(x) => {
buffer.append_u8(OpCode::I32CONST as u8);
buffer.encode_i32(*x);
}
ConstExpr::I64(x) => {
buffer.append_u8(OpCode::I64CONST as u8);
buffer.encode_i64(*x);
}
ConstExpr::F32(x) => {
buffer.append_u8(OpCode::F32CONST as u8);
buffer.encode_f32(*x);
}
ConstExpr::F64(x) => {
buffer.append_u8(OpCode::F64CONST as u8);
buffer.encode_f64(*x);
}
}
buffer.append_u8(OpCode::END as u8);
}
}
#[derive(Debug)]
pub struct Global {
/// Type and mutability of the global
pub ty: GlobalType,
/// Initial value of the global.
pub init: ConstExpr,
}
impl Serialize for Global {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.ty.serialize(buffer);
self.init.serialize(buffer);
}
}
#[derive(Debug)]
pub struct GlobalSection<'a> {
pub count: u32,
pub bytes: Vec<'a, u8>,
}
impl<'a> GlobalSection<'a> {
pub fn new(arena: &'a Bump, globals: &[Global]) -> Self {
let capacity = 13 * globals.len();
let mut bytes = Vec::with_capacity_in(capacity, arena);
for global in globals {
global.serialize(&mut bytes);
}
GlobalSection {
count: globals.len() as u32,
bytes,
}
}
pub fn append(&mut self, global: Global) {
global.serialize(&mut self.bytes);
self.count += 1;
}
}
section_impl!(GlobalSection, SectionId::Global);
/*******************************************************************
*
* Export section
*
*******************************************************************/
#[repr(u8)]
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum ExportType {
Func = 0,
Table = 1,
Mem = 2,
Global = 3,
}
#[derive(Debug)]
pub struct Export<'a> {
pub name: &'a [u8],
pub ty: ExportType,
pub index: u32,
}
impl Serialize for Export<'_> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.name.serialize(buffer);
buffer.append_u8(self.ty as u8);
buffer.encode_u32(self.index);
}
}
#[derive(Debug)]
pub struct ExportSection<'a> {
pub count: u32,
pub bytes: Vec<'a, u8>,
pub function_indices: Vec<'a, u32>,
}
impl<'a> ExportSection<'a> {
const ID: SectionId = SectionId::Export;
pub fn append(&mut self, export: Export) {
export.serialize(&mut self.bytes);
self.count += 1;
if matches!(export.ty, ExportType::Func) {
self.function_indices.push(export.index);
}
}
pub fn size(&self) -> usize {
section_size(&self.bytes)
}
pub fn empty(arena: &'a Bump) -> Self {
ExportSection {
count: 0,
bytes: Vec::with_capacity_in(256, arena),
function_indices: Vec::with_capacity_in(4, arena),
}
}
}
impl SkipBytes for ExportSection<'_> {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
parse_section(Self::ID, bytes, cursor);
}
}
impl<'a> Serialize for ExportSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if !self.bytes.is_empty() {
let header_indices = write_section_header(buffer, Self::ID);
buffer.encode_u32(self.count);
buffer.append_slice(&self.bytes);
update_section_size(buffer, header_indices);
}
}
}
/*******************************************************************
*
* Element section
*
* Elements are entries in tables (see Table section)
* For example, Wasm uses a function table instead of function pointers,
* and each entry in that function table is an element.
* The call_indirect instruction uses element indices to refer to functions.
* This section therefore enumerates all indirectly-called functions.
*
*******************************************************************/
/*
https://webassembly.github.io/spec/core/binary/modules.html#binary-elemsec
Note: Wasm MVP only had variant 0x00, and tables only contained functions.
byte fields ⇒ syntax
-------------------------------------------------------------------------------------------------------------------------------------
0x00 e:expr y:vec(funcidx) ⇒ {type funcref, init ((ref.func y) end), mode active {table 0, offset e}}
0x01 et:elemkind y:vec(funcidx) ⇒ {type et, init ((ref.func y) end), mode passive}
0x02 x:tableidx e:expr et:elemkind y:vec(funcidx) ⇒ {type et, init ((ref.func y) end), mode active {table x, offset e}}
0x03 et:elemkind y:vec(funcidx) ⇒ {type et, init ((ref.func y) end), mode declarative}
0x04 e:expr el:vec(expr) ⇒ {type funcref, init el, mode active {table 0, offset e}}
0x05 et:reftype el:vec(expr) ⇒ {type et, init el, mode passive}
0x06 x:tableidx e:expr et:reftype el:vec(expr) ⇒ {type et, init el, mode active {table x, offset e}}
0x07 et:reftype el:vec(expr) ⇒ {type et, init el , mode declarative}
The initial byte can be interpreted as a bitfield
Bit 0 indicates a passive or declarative segment
Bit 1 indicates the presence of an explicit table index for an active segment and otherwise distinguishes passive from declarative segments
Bit 2 indicates the use of element type and element expressions instead of element kind and element indices.
Active means "element is loaded into the table on module instantiation"
Passive means it's loaded by explicit instructions in the code section
Declarative is a declaration that a reference will be taken at runtime
Sigh. This is only the post-MVP version of Wasm! Some day it'll end up as convoluted as x86, wait and see.
*/
#[repr(u8)]
#[derive(Debug)]
#[allow(dead_code)]
enum ElementKind {
FuncRef = 0,
}
#[repr(u8)]
#[allow(dead_code)]
enum ElementSegmentFormatId {
ActiveImplicitTableIndex = 0x00,
PassiveKindAndIndex = 0x01,
ActiveExplicitTableKindAndIndex = 0x02,
DeclarativeKindAndIndex = 0x03,
ActiveImplicitTableTypeAndExpr = 0x04,
PassiveTypeAndExpr = 0x05,
ActiveExplicitTableTypeAndExpr = 0x06,
DeclarativeTypeAndExpr = 0x07,
}
// A representation based on the (convoluted) binary format.
// NOTE: If we ever need a more intuitive format, we can base it on the syntax doc
// https://webassembly.github.io/spec/core/syntax/modules.html#syntax-elem
#[derive(Debug)]
#[allow(dead_code)]
enum ElementSegment<'a> {
/// This is the only variant we currently use. It is from the original Wasm MVP.
/// The rest are dead code but kept in case we need them later.
ActiveImplicitTableIndex {
offset: ConstExpr,
fn_indices: Vec<'a, u32>,
},
PassiveKindAndIndex {
kind: ElementKind,
fn_indices: Vec<'a, u32>,
},
ActiveExplicitTableKindAndIndex {
table_idx: u32,
offset: ConstExpr,
kind: ElementKind,
fn_indices: Vec<'a, u32>,
},
DeclarativeKindAndIndex {
kind: ElementKind,
fn_indices: Vec<'a, u32>,
},
ActiveImplicitTableTypeAndExpr {
offset: ConstExpr,
init: Vec<'a, ConstExpr>,
},
PassiveTypeAndExpr {
elem_type: RefType,
init: Vec<'a, ConstExpr>,
},
ActiveExplicitTableTypeAndExpr {
table_idx: u32,
offset: ConstExpr,
elem_type: RefType,
init: Vec<'a, ConstExpr>,
},
DeclarativeTypeAndExpr {
elem_type: RefType,
init: Vec<'a, ConstExpr>,
},
}
impl<'a> ElementSegment<'a> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Self {
// In practice we only need the original MVP format
let format_id = bytes[*cursor];
assert!(format_id == ElementSegmentFormatId::ActiveImplicitTableIndex as u8);
*cursor += 1;
// The table index offset is encoded as a ConstExpr, but only I32 makes sense
let const_expr_opcode = bytes[*cursor];
assert!(const_expr_opcode == OpCode::I32CONST as u8);
*cursor += 1;
let offset = parse_u32_or_panic(bytes, cursor);
assert!(bytes[*cursor] == OpCode::END as u8);
*cursor += 1;
let num_elems = parse_u32_or_panic(bytes, cursor);
let mut fn_indices = Vec::with_capacity_in(num_elems as usize, arena);
for _ in 0..num_elems {
let fn_idx = parse_u32_or_panic(bytes, cursor);
fn_indices.push(fn_idx);
}
ElementSegment::ActiveImplicitTableIndex {
offset: ConstExpr::I32(offset as i32),
fn_indices,
}
}
fn size(&self) -> usize {
let variant_id = 1;
let constexpr_opcode = 1;
let constexpr_value = MAX_SIZE_ENCODED_U32;
let vec_len = MAX_SIZE_ENCODED_U32;
let vec_contents = MAX_SIZE_ENCODED_U32
* if let ElementSegment::ActiveImplicitTableIndex { fn_indices, .. } = &self {
fn_indices.len()
} else {
internal_error!("Unsupported ElementSegment {:?}", self)
};
variant_id + constexpr_opcode + constexpr_value + vec_len + vec_contents
}
}
impl<'a> Serialize for ElementSegment<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if let ElementSegment::ActiveImplicitTableIndex { offset, fn_indices } = &self {
buffer.append_u8(ElementSegmentFormatId::ActiveImplicitTableIndex as u8);
offset.serialize(buffer);
fn_indices.serialize(buffer);
} else {
internal_error!("Unsupported ElementSegment {:?}", self)
}
}
}
#[derive(Debug)]
pub struct ElementSection<'a> {
segments: Vec<'a, ElementSegment<'a>>,
}
impl<'a> ElementSection<'a> {
const ID: SectionId = SectionId::Element;
pub fn preload(arena: &'a Bump, module_bytes: &[u8], cursor: &mut usize) -> Self {
let (num_segments, body_bytes) = parse_section(Self::ID, module_bytes, cursor);
let mut segments = Vec::with_capacity_in(num_segments as usize, arena);
let mut body_cursor = 0;
for _ in 0..num_segments {
let seg = ElementSegment::parse(arena, body_bytes, &mut body_cursor);
segments.push(seg);
}
ElementSection { segments }
}
pub fn size(&self) -> usize {
self.segments.iter().map(|seg| seg.size()).sum()
}
pub fn indirect_callees(&self, arena: &'a Bump) -> Vec<'a, u32> {
let mut result = bumpalo::vec![in arena];
for segment in self.segments.iter() {
if let ElementSegment::ActiveImplicitTableIndex { fn_indices, .. } = segment {
result.extend_from_slice(fn_indices);
} else {
internal_error!("Unsupported ElementSegment {:?}", self)
}
}
result
}
}
impl<'a> Serialize for ElementSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
let header_indices = write_section_header(buffer, Self::ID);
self.segments.serialize(buffer);
update_section_size(buffer, header_indices);
}
}
/*******************************************************************
*
* Code section (see also code_builder.rs)
*
*******************************************************************/
#[derive(Debug)]
pub struct CodeSection<'a> {
pub preloaded_count: u32,
pub preloaded_bytes: &'a [u8],
pub code_builders: Vec<'a, CodeBuilder<'a>>,
dead_code_metadata: PreloadsCallGraph<'a>,
}
impl<'a> CodeSection<'a> {
/// Serialize the code builders for all functions, and get code relocations with final offsets
pub fn serialize_with_relocs<T: SerialBuffer>(
&self,
buffer: &mut T,
relocations: &mut Vec<'a, RelocationEntry>,
) -> usize {
let header_indices = write_section_header(buffer, SectionId::Code);
buffer.encode_u32(self.preloaded_count + self.code_builders.len() as u32);
for code_builder in self.code_builders.iter() {
code_builder.serialize_with_relocs(buffer, relocations, header_indices.body_index);
}
let code_section_body_index = header_indices.body_index;
update_section_size(buffer, header_indices);
code_section_body_index
}
pub fn size(&self) -> usize {
let builders_size: usize = self.code_builders.iter().map(|cb| cb.size()).sum();
MAX_SIZE_SECTION_HEADER + self.preloaded_bytes.len() + builders_size
}
pub fn preload(
arena: &'a Bump,
module_bytes: &[u8],
cursor: &mut usize,
import_signatures: &[u32],
function_signatures: &[u32],
indirect_callees: &[u32],
) -> Self {
let (preloaded_count, initial_bytes) = parse_section(SectionId::Code, module_bytes, cursor);
let preloaded_bytes = arena.alloc_slice_copy(initial_bytes);
// TODO: Try to move this call_graph preparation to platform build time
let dead_code_metadata = parse_preloads_call_graph(
arena,
initial_bytes,
import_signatures,
function_signatures,
indirect_callees,
);
CodeSection {
preloaded_count,
preloaded_bytes,
code_builders: Vec::with_capacity_in(0, arena),
dead_code_metadata,
}
}
pub(super) fn remove_dead_preloads<T: IntoIterator<Item = u32>>(
&mut self,
arena: &'a Bump,
import_fn_count: u32,
exported_fns: &[u32],
called_preload_fns: T,
) {
let live_ext_fn_indices = trace_call_graph(
arena,
&self.dead_code_metadata,
exported_fns,
called_preload_fns,
);
let mut buffer = Vec::with_capacity_in(self.preloaded_bytes.len(), arena);
copy_preloads_shrinking_dead_fns(
arena,
&mut buffer,
&self.dead_code_metadata,
self.preloaded_bytes,
import_fn_count,
live_ext_fn_indices,
);
self.preloaded_bytes = buffer.into_bump_slice();
}
}
impl<'a> Serialize for CodeSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
let header_indices = write_section_header(buffer, SectionId::Code);
buffer.encode_u32(self.preloaded_count + self.code_builders.len() as u32);
buffer.append_slice(self.preloaded_bytes);
for code_builder in self.code_builders.iter() {
code_builder.serialize(buffer);
}
update_section_size(buffer, header_indices);
}
}
/*******************************************************************
*
* Data section
*
*******************************************************************/
#[derive(Debug)]
pub enum DataMode {
/// A data segment that auto-loads into memory on instantiation
Active { offset: ConstExpr },
/// A data segment that can be loaded with the `memory.init` instruction
Passive,
}
impl DataMode {
pub fn active_at(offset: u32) -> Self {
DataMode::Active {
offset: ConstExpr::I32(offset as i32),
}
}
}
#[derive(Debug)]
pub struct DataSegment<'a> {
pub mode: DataMode,
pub init: Vec<'a, u8>,
}
impl Serialize for DataSegment<'_> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
match &self.mode {
DataMode::Active { offset } => {
buffer.append_u8(0); // variant ID
offset.serialize(buffer);
}
DataMode::Passive => {
buffer.append_u8(1); // variant ID
}
}
self.init.serialize(buffer);
}
}
#[derive(Debug)]
pub struct DataSection<'a> {
count: u32,
pub bytes: Vec<'a, u8>,
}
impl<'a> DataSection<'a> {
pub fn append_segment(&mut self, segment: DataSegment<'a>) -> u32 {
let index = self.count;
self.count += 1;
segment.serialize(&mut self.bytes);
index
}
}
section_impl!(DataSection, SectionId::Data);
/*******************************************************************
*
* Opaque section
*
*******************************************************************/
/// A Wasm module section that we don't use for Roc code,
/// but may be present in a preloaded binary
#[derive(Debug, Default)]
pub struct OpaqueSection<'a> {
bytes: &'a [u8],
}
impl<'a> OpaqueSection<'a> {
pub fn size(&self) -> usize {
self.bytes.len()
}
pub fn preload(
id: SectionId,
arena: &'a Bump,
module_bytes: &[u8],
cursor: &mut usize,
) -> Self {
let bytes: &[u8];
if module_bytes[*cursor] != id as u8 {
bytes = &[];
} else {
let section_start = *cursor;
*cursor += 1;
let section_size = parse_u32_or_panic(module_bytes, cursor);
let next_section_start = *cursor + section_size as usize;
bytes = &module_bytes[section_start..next_section_start];
*cursor = next_section_start;
};
OpaqueSection {
bytes: arena.alloc_slice_clone(bytes),
}
}
}
impl Serialize for OpaqueSection<'_> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_slice(self.bytes);
}
}
/*******************************************************************
*
* Name section
* https://webassembly.github.io/spec/core/appendix/custom.html#name-section
*
*******************************************************************/
#[repr(u8)]
#[allow(dead_code)]
enum NameSubSections {
ModuleName = 0,
FunctionNames = 1,
LocalNames = 2,
}
#[derive(Debug)]
pub struct NameSection<'a> {
pub bytes: Vec<'a, u8>,
pub functions: MutMap<&'a [u8], u32>,
}
impl<'a> NameSection<'a> {
const ID: SectionId = SectionId::Custom;
const NAME: &'static str = "name";
pub fn parse(arena: &'a Bump, module_bytes: &[u8], cursor: &mut usize) -> Self {
// Custom section ID
let section_id_byte = module_bytes[*cursor];
if section_id_byte != Self::ID as u8 {
internal_error!(
"Expected section ID 0x{:x}, but found 0x{:x} at offset 0x{:x}",
Self::ID as u8,
section_id_byte,
*cursor
);
}
*cursor += 1;
// Section size
let section_size = parse_u32_or_panic(module_bytes, cursor) as usize;
let section_end = *cursor + section_size;
let mut bytes = Vec::with_capacity_in(section_size, arena);
bytes.extend_from_slice(&module_bytes[*cursor..section_end]);
let functions = MutMap::default();
let mut section = NameSection { bytes, functions };
section.parse_body(arena, module_bytes, cursor, section_end);
section
}
fn parse_body(
&mut self,
arena: &'a Bump,
module_bytes: &[u8],
cursor: &mut usize,
section_end: usize,
) {
// Custom section name
let section_name_len = parse_u32_or_panic(module_bytes, cursor);
let section_name_end = *cursor + section_name_len as usize;
let section_name = &module_bytes[*cursor..section_name_end];
if section_name != Self::NAME.as_bytes() {
internal_error!(
"Expected Custom section {:?}, found {:?}",
Self::NAME,
std::str::from_utf8(section_name)
);
}
*cursor = section_name_end;
// Find function names subsection
let mut found_function_names = false;
for _possible_subsection_id in 0..2 {
let subsection_id = module_bytes[*cursor];
*cursor += 1;
let subsection_size = parse_u32_or_panic(module_bytes, cursor);
if subsection_id == NameSubSections::FunctionNames as u8 {
found_function_names = true;
break;
}
*cursor += subsection_size as usize;
if *cursor >= section_end {
internal_error!("Failed to parse Name section");
}
}
if !found_function_names {
internal_error!("Failed to parse Name section");
}
// Function names
let num_entries = parse_u32_or_panic(module_bytes, cursor) as usize;
for _ in 0..num_entries {
let fn_index = parse_u32_or_panic(module_bytes, cursor);
let name_len = parse_u32_or_panic(module_bytes, cursor);
let name_end = *cursor + name_len as usize;
let name_bytes: &[u8] = &module_bytes[*cursor..name_end];
*cursor = name_end;
self.functions
.insert(arena.alloc_slice_copy(name_bytes), fn_index);
}
}
}
impl<'a> Serialize for NameSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if !self.bytes.is_empty() {
let header_indices = write_section_header(buffer, Self::ID);
buffer.append_slice(&self.bytes);
update_section_size(buffer, header_indices);
}
}
}
/*******************************************************************
*
* Unit tests
*
*******************************************************************/
#[cfg(test)]
mod tests {
use super::*;
use bumpalo::{self, collections::Vec, Bump};
fn test_assert_types_preload<'a>(arena: &'a Bump, original: &TypeSection<'a>) {
// Serialize the Type section that we built from Roc code
let mut original_serialized = Vec::with_capacity_in(6 + original.bytes.len(), arena);
original.serialize(&mut original_serialized);
// Reconstruct a new TypeSection by "pre-loading" the bytes of the original
let mut cursor = 0;
let mut preloaded = TypeSection::preload(arena, &original_serialized, &mut cursor);
preloaded.parse_offsets();
debug_assert_eq!(original.offsets, preloaded.offsets);
debug_assert_eq!(original.bytes, preloaded.bytes);
}
#[test]
fn test_type_section() {
use ValueType::*;
let arena = &Bump::new();
let signatures = [
Signature {
param_types: bumpalo::vec![in arena],
ret_type: None,
},
Signature {
param_types: bumpalo::vec![in arena; I32, I64, F32, F64],
ret_type: None,
},
Signature {
param_types: bumpalo::vec![in arena; I32, I32, I32],
ret_type: Some(I32),
},
];
let capacity = signatures.len();
let mut section = TypeSection {
arena,
bytes: Vec::with_capacity_in(capacity * 4, arena),
offsets: Vec::with_capacity_in(capacity, arena),
};
for sig in signatures {
section.insert(sig);
}
test_assert_types_preload(arena, &section);
}
}