wasm: Make NameSection easier to manipulate

This commit is contained in:
Brian Carroll 2022-06-05 20:57:31 +02:00
parent 66c0c423b3
commit 376cbbc4b2
No known key found for this signature in database
GPG key ID: 5C7B2EC4101703C0
3 changed files with 48 additions and 46 deletions

View file

@ -2,7 +2,6 @@ use std::fmt::{Debug, Formatter};
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use roc_collections::all::MutMap;
use roc_error_macros::internal_error; use roc_error_macros::internal_error;
use super::dead_code::{ use super::dead_code::{
@ -1496,10 +1495,7 @@ enum NameSubSections {
} }
pub struct NameSection<'a> { pub struct NameSection<'a> {
/// count may not be the same as functions.len() because of duplicates! pub function_names: Vec<'a, (u32, &'a str)>,
pub count: u32,
pub bytes: Vec<'a, u8>,
pub functions: MutMap<&'a str, u32>,
} }
impl<'a> NameSection<'a> { impl<'a> NameSection<'a> {
@ -1507,14 +1503,14 @@ impl<'a> NameSection<'a> {
const NAME: &'static str = "name"; const NAME: &'static str = "name";
pub fn size(&self) -> usize { pub fn size(&self) -> usize {
self.bytes.len() self.function_names
.iter()
.map(|(_, s)| MAX_SIZE_ENCODED_U32 + s.len())
.sum()
} }
pub fn append_function(&mut self, index: u32, name: &'a str) { pub fn append_function(&mut self, index: u32, name: &'a str) {
index.serialize(&mut self.bytes); self.function_names.push((index, name));
name.serialize(&mut self.bytes);
self.count += 1; // always increment even for duplicate names
self.functions.insert(name, index);
} }
} }
@ -1523,9 +1519,7 @@ impl<'a> Parse<&'a Bump> for NameSection<'a> {
// If we're already past the end of the preloaded file then there is no Name section // If we're already past the end of the preloaded file then there is no Name section
if *cursor >= module_bytes.len() { if *cursor >= module_bytes.len() {
return Ok(NameSection { return Ok(NameSection {
count: 0, function_names: bumpalo::vec![in arena],
bytes: bumpalo::vec![in arena],
functions: MutMap::default(),
}); });
} }
@ -1549,12 +1543,6 @@ impl<'a> Parse<&'a Bump> for NameSection<'a> {
let section_size = u32::parse((), module_bytes, cursor)? as usize; let section_size = u32::parse((), module_bytes, cursor)? as usize;
let section_end = *cursor + section_size; let section_end = *cursor + section_size;
let mut section = NameSection {
count: 0,
bytes: Vec::with_capacity_in(section_size, arena),
functions: MutMap::default(),
};
let section_name = <&'a str>::parse(arena, module_bytes, cursor)?; let section_name = <&'a str>::parse(arena, module_bytes, cursor)?;
if section_name != Self::NAME { if section_name != Self::NAME {
let message = format!( let message = format!(
@ -1593,19 +1581,17 @@ impl<'a> Parse<&'a Bump> for NameSection<'a> {
}); });
} }
// Function names let count = u32::parse((), module_bytes, cursor)?;
section.count = u32::parse((), module_bytes, cursor)?; let mut section = NameSection {
let fn_names_start = *cursor; function_names: Vec::with_capacity_in(count as usize, arena),
for _ in 0..section.count { };
let fn_index = u32::parse((), module_bytes, cursor)?;
let name_bytes = <&'a str>::parse(arena, module_bytes, cursor)?;
section.functions.insert(name_bytes, fn_index);
}
// Copy only the bytes for the function names segment // Function names
section for _ in 0..count {
.bytes let index = u32::parse((), module_bytes, cursor)?;
.extend_from_slice(&module_bytes[fn_names_start..*cursor]); let name = <&'a str>::parse(arena, module_bytes, cursor)?;
section.function_names.push((index, name));
}
*cursor = section_end; *cursor = section_end;
@ -1615,18 +1601,21 @@ impl<'a> Parse<&'a Bump> for NameSection<'a> {
impl<'a> Serialize for NameSection<'a> { impl<'a> Serialize for NameSection<'a> {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) { fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
if !self.bytes.is_empty() { if !self.function_names.is_empty() {
let header_indices = write_custom_section_header(buffer, Self::NAME); let header_indices = write_custom_section_header(buffer, Self::NAME);
let subsection_id = NameSubSections::FunctionNames as u8; let subsection_id = NameSubSections::FunctionNames as u8;
subsection_id.serialize(buffer); subsection_id.serialize(buffer);
let subsection_byte_size = (MAX_SIZE_ENCODED_U32 + self.bytes.len()) as u32; let subsection_size_index = buffer.encode_padded_u32(0);
subsection_byte_size.serialize(buffer); let subsection_start = buffer.size();
buffer.encode_padded_u32(self.count); self.function_names.serialize(buffer);
buffer.append_slice(&self.bytes); buffer.overwrite_padded_u32(
subsection_size_index,
(buffer.size() - subsection_start) as u32,
);
update_section_size(buffer, header_indices); update_section_size(buffer, header_indices);
} }
@ -1637,15 +1626,7 @@ impl<'a> Debug for NameSection<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "NameSection")?; writeln!(f, "NameSection")?;
// We want to display index->name because it matches the binary format and looks nicer. for (index, name) in self.function_names.iter() {
// But our hashmap is name->index because that's what code gen wants to look up.
let mut by_index = std::vec::Vec::with_capacity(self.functions.len());
for (name, index) in self.functions.iter() {
by_index.push((*index, name));
}
by_index.sort_unstable();
for (index, name) in by_index.iter() {
writeln!(f, " {:4}: {}", index, name)?; writeln!(f, " {:4}: {}", index, name)?;
} }

View file

@ -18,6 +18,13 @@ impl Serialize for str {
} }
} }
impl Serialize for &str {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.encode_u32(self.len() as u32);
buffer.append_slice(self.as_bytes());
}
}
impl Serialize for u8 { impl Serialize for u8 {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) { fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
buffer.append_u8(*self); buffer.append_u8(*self);
@ -67,6 +74,13 @@ impl<S: Serialize> Serialize for Option<S> {
} }
} }
impl<A: Serialize, B: Serialize> Serialize for (A, B) {
fn serialize<T: SerialBuffer>(&self, buffer: &mut T) {
self.0.serialize(buffer);
self.1.serialize(buffer);
}
}
/// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length /// Write an unsigned integer into the provided buffer in LEB-128 format, returning byte length
/// ///
/// All integers in Wasm are variable-length encoded, which saves space for small values. /// All integers in Wasm are variable-length encoded, which saves space for small values.

View file

@ -137,7 +137,14 @@ fn compile_roc_to_wasm_bytes<'a, T: Wasm32Result>(
T::insert_wrapper(arena, &mut module, TEST_WRAPPER_NAME, main_fn_index); T::insert_wrapper(arena, &mut module, TEST_WRAPPER_NAME, main_fn_index);
// Export the initialiser function for refcount tests // Export the initialiser function for refcount tests
let init_refcount_idx = module.names.functions[INIT_REFCOUNT_NAME]; let init_refcount_idx = module
.names
.function_names
.iter()
.filter(|(_, name)| *name == INIT_REFCOUNT_NAME)
.map(|(i, _)| *i)
.next()
.unwrap();
module.export.append(Export { module.export.append(Export {
name: INIT_REFCOUNT_NAME, name: INIT_REFCOUNT_NAME,
ty: ExportType::Func, ty: ExportType::Func,