wasm: Create ParseError type

This commit is contained in:
Brian Carroll 2022-05-22 10:42:30 +01:00
parent 66c78ceed5
commit c16a5ad8ae
No known key found for this signature in database
GPG key ID: 9CF4E3BF9C4722C7
7 changed files with 72 additions and 40 deletions

View file

@ -100,7 +100,7 @@ impl<'a> WasmBackend<'a> {
}); });
// TODO: move this to module parsing // TODO: move this to module parsing
let next_constant_addr = module.global.parse_u32_at_index(data_end_idx).unwrap_or_else(|e| { let next_constant_addr = module.global.parse_u32_at_index(data_end_idx).unwrap_or_else(|e| {
internal_error!("Failed to parse __data_end from object file: {}", e); internal_error!("Failed to parse __data_end from object file: {:?}", e);
}); });
module.export.exports = app_exports; module.export.exports = app_exports;

View file

@ -17,6 +17,7 @@ use roc_mono::code_gen_help::CodeGenHelp;
use roc_mono::ir::{Proc, ProcLayout}; use roc_mono::ir::{Proc, ProcLayout};
use roc_mono::layout::LayoutIds; use roc_mono::layout::LayoutIds;
use roc_target::TargetInfo; use roc_target::TargetInfo;
use wasm_module::parse::ParseError;
use crate::backend::{ProcLookupData, ProcSource, WasmBackend}; use crate::backend::{ProcLookupData, ProcSource, WasmBackend};
use crate::wasm_module::{ use crate::wasm_module::{
@ -59,7 +60,8 @@ pub fn build_module<'a>(
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<std::vec::Vec<u8>, String> { ) -> Result<std::vec::Vec<u8>, String> {
let (mut wasm_module, called_preload_fns, _) = let (mut wasm_module, called_preload_fns, _) =
build_module_unserialized(env, interns, preload_bytes, procedures)?; build_module_unserialized(env, interns, preload_bytes, procedures)
.map_err(|e| format!("{:?}", e))?;
wasm_module.remove_dead_preloads(env.arena, called_preload_fns); wasm_module.remove_dead_preloads(env.arena, called_preload_fns);
@ -77,7 +79,7 @@ pub fn build_module_unserialized<'a>(
interns: &'a mut Interns, interns: &'a mut Interns,
preload_bytes: &[u8], preload_bytes: &[u8],
procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>, procedures: MutMap<(Symbol, ProcLayout<'a>), Proc<'a>>,
) -> Result<(WasmModule<'a>, Vec<'a, u32>, u32), String> { ) -> Result<(WasmModule<'a>, Vec<'a, u32>, u32), ParseError> {
let mut layout_ids = LayoutIds::default(); let mut layout_ids = LayoutIds::default();
let mut procs = Vec::with_capacity_in(procedures.len(), env.arena); let mut procs = Vec::with_capacity_in(procedures.len(), env.arena);
let mut proc_lookup = Vec::with_capacity_in(procedures.len() * 2, env.arena); let mut proc_lookup = Vec::with_capacity_in(procedures.len() * 2, env.arena);

View file

@ -121,10 +121,9 @@ pub fn parse_preloads_call_graph<'a>(
u32::skip_bytes(code_section_body, &mut cursor).unwrap(); // table_idx u32::skip_bytes(code_section_body, &mut cursor).unwrap(); // table_idx
} else { } else {
OpCode::skip_bytes(code_section_body, &mut cursor).unwrap_or_else(|e| { OpCode::skip_bytes(code_section_body, &mut cursor).unwrap_or_else(|e| {
let display_slice = &code_section_body[cursor - 4..cursor + 4];
panic!( panic!(
"Error parsing host object file, at offset {} (0x{:x}) within Code section: {}\nbytes around the error: {:x?}\nstart of code section: {:x?}", "Error parsing host object file, at offset 0x{:x} within Code section: {}",
cursor, cursor, e, display_slice, &code_section_body[0..16] cursor, e.message
) )
}); });
} }

View file

@ -2,9 +2,9 @@ pub mod code_builder;
mod dead_code; mod dead_code;
pub mod linking; pub mod linking;
pub mod opcodes; pub mod opcodes;
pub mod parse;
pub mod sections; pub mod sections;
pub mod serialize; pub mod serialize;
pub mod parse;
use bumpalo::{collections::Vec, Bump}; use bumpalo::{collections::Vec, Bump};
pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState}; pub use code_builder::{Align, CodeBuilder, LocalId, ValueType, VmSymbolState};
@ -12,6 +12,7 @@ pub use linking::SymInfo;
pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature}; pub use sections::{ConstExpr, Export, ExportType, Global, GlobalType, Signature};
use self::linking::{LinkingSection, RelocationSection}; use self::linking::{LinkingSection, RelocationSection};
use self::parse::ParseError;
use self::sections::{ use self::sections::{
CodeSection, DataSection, ElementSection, ExportSection, FunctionSection, GlobalSection, CodeSection, DataSection, ElementSection, ExportSection, FunctionSection, GlobalSection,
ImportSection, MemorySection, NameSection, OpaqueSection, Section, SectionId, TableSection, ImportSection, MemorySection, NameSection, OpaqueSection, Section, SectionId, TableSection,
@ -121,11 +122,15 @@ impl<'a> WasmModule<'a> {
+ self.names.size() + self.names.size()
} }
pub fn preload(arena: &'a Bump, bytes: &[u8]) -> Result<Self, String> { pub fn preload(arena: &'a Bump, bytes: &[u8]) -> Result<Self, ParseError> {
let is_valid_magic_number = &bytes[0..4] == "\0asm".as_bytes(); let is_valid_magic_number = &bytes[0..4] == "\0asm".as_bytes();
let is_valid_version = bytes[4..8] == Self::WASM_VERSION.to_le_bytes(); let is_valid_version = bytes[4..8] == Self::WASM_VERSION.to_le_bytes();
if !is_valid_magic_number || !is_valid_version { if !is_valid_magic_number || !is_valid_version {
return Err("This file is not a WebAssembly binary. The file header is not valid.".into()); return Err(ParseError {
offset: 0,
message: "This file is not a WebAssembly binary. The file header is not valid."
.into(),
});
} }
let mut cursor: usize = 8; let mut cursor: usize = 8;

View file

@ -1,4 +1,4 @@
use super::parse::{Parse, SkipBytes}; use super::parse::{Parse, ParseError, SkipBytes};
#[repr(u8)] #[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@ -259,13 +259,17 @@ fn immediates_for(op: OpCode) -> Result<OpImmediates, String> {
} }
impl SkipBytes for OpCode { impl SkipBytes for OpCode {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
use OpImmediates::*; use OpImmediates::*;
let opcode_byte: u8 = bytes[*cursor]; let opcode_byte: u8 = bytes[*cursor];
let opcode: OpCode = unsafe { std::mem::transmute(opcode_byte) }; let opcode: OpCode = unsafe { std::mem::transmute(opcode_byte) };
let immediates = immediates_for(opcode)?; // will return Err if transmute was invalid // will return Err if transmute was invalid
let immediates = immediates_for(opcode).map_err(|message| ParseError {
message,
offset: *cursor,
})?;
match immediates { match immediates {
NoImmediate => { NoImmediate => {

View file

@ -4,18 +4,24 @@ use bumpalo::Bump;
/// Parse serialized bytes into a data structure /// Parse serialized bytes into a data structure
/// Specific parsers may need contextual data from other parts of the .wasm file /// Specific parsers may need contextual data from other parts of the .wasm file
pub trait Parse<ParseContext>: Sized { pub trait Parse<ParseContext>: Sized {
fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, String>; fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError>;
}
#[derive(Debug)]
pub struct ParseError {
pub offset: usize,
pub message: String,
} }
/// Skip over serialized bytes for a type /// Skip over serialized bytes for a type
/// This may, or may not, require looking at the byte values /// This may, or may not, require looking at the byte values
pub trait SkipBytes: Sized { pub trait SkipBytes: Sized {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String>; fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError>;
} }
/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format /// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format
/// Return the integer itself and the offset after it ends /// Return the integer itself and the offset after it ends
fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> { fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), ()> {
let mut value = 0; let mut value = 0;
let mut shift = 0; let mut shift = 0;
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() { for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
@ -25,23 +31,30 @@ fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> {
} }
shift += 7; shift += 7;
} }
Err(format!( Err(())
"Failed to decode u32 as LEB-128 from bytes: {:2x?}",
std::vec::Vec::from_iter(bytes.iter().take(MAX_SIZE_ENCODED_U32))
))
} }
impl Parse<()> for u32 { impl Parse<()> for u32 {
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, String> { fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let (value, len) = decode_u32(&bytes[*cursor..])?; match decode_u32(&bytes[*cursor..]) {
Ok((value, len)) => {
*cursor += len; *cursor += len;
Ok(value) Ok(value)
} }
Err(()) => Err(ParseError {
offset: *cursor,
message: format!(
"Failed to decode u32 as LEB-128 from bytes: {:2x?}",
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
),
}),
}
}
} }
// Parse string bytes without utf8 validation // Parse string bytes without utf8 validation
impl<'a> Parse<&'a Bump> for &'a [u8] { impl<'a> Parse<&'a Bump> for &'a [u8] {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, String> { fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
let len = u32::parse((), bytes, cursor)?; let len = u32::parse((), bytes, cursor)?;
let end = *cursor + len as usize; let end = *cursor + len as usize;
let bytes: &[u8] = &bytes[*cursor..end]; let bytes: &[u8] = &bytes[*cursor..end];
@ -52,7 +65,7 @@ impl<'a> Parse<&'a Bump> for &'a [u8] {
} }
impl SkipBytes for u32 { impl SkipBytes for u32 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
const MAX_LEN: usize = 5; const MAX_LEN: usize = 5;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 { if byte & 0x80 == 0 {
@ -60,12 +73,15 @@ impl SkipBytes for u32 {
return Ok(()); return Ok(());
} }
} }
Err("Invalid LEB encoding".into()) Err(ParseError {
offset: *cursor,
message: "Invalid LEB encoding".into(),
})
} }
} }
impl SkipBytes for u64 { impl SkipBytes for u64 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
const MAX_LEN: usize = 10; const MAX_LEN: usize = 10;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) { for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 { if byte & 0x80 == 0 {
@ -73,12 +89,15 @@ impl SkipBytes for u64 {
return Ok(()); return Ok(());
} }
} }
Err("Invalid LEB encoding".into()) Err(ParseError {
offset: *cursor,
message: "Invalid LEB encoding".into(),
})
} }
} }
impl SkipBytes for u8 { impl SkipBytes for u8 {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
*cursor += 1; *cursor += 1;
Ok(()) Ok(())
} }
@ -86,7 +105,7 @@ impl SkipBytes for u8 {
/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str! /// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str!
impl SkipBytes for String { impl SkipBytes for String {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
let len = u32::parse((), bytes, cursor)?; let len = u32::parse((), bytes, cursor)?;
if false { if false {

View file

@ -11,7 +11,7 @@ use super::dead_code::{
}; };
use super::linking::RelocationEntry; use super::linking::RelocationEntry;
use super::opcodes::OpCode; use super::opcodes::OpCode;
use super::parse::{Parse, SkipBytes}; use super::parse::{Parse, ParseError, SkipBytes};
use super::serialize::{SerialBuffer, Serialize, MAX_SIZE_ENCODED_U32}; use super::serialize::{SerialBuffer, Serialize, MAX_SIZE_ENCODED_U32};
use super::{CodeBuilder, ValueType}; use super::{CodeBuilder, ValueType};
@ -351,7 +351,7 @@ impl<'a> ImportSection<'a> {
self.count += 1; self.count += 1;
} }
pub fn parse(&mut self, arena: &'a Bump) -> Result<Vec<'a, u32>, String> { pub fn parse(&mut self, arena: &'a Bump) -> Result<Vec<'a, u32>, ParseError> {
let mut fn_signatures = bumpalo::vec![in arena]; let mut fn_signatures = bumpalo::vec![in arena];
let mut cursor = 0; let mut cursor = 0;
while cursor < self.bytes.len() { while cursor < self.bytes.len() {
@ -459,7 +459,7 @@ impl Serialize for TableType {
} }
impl SkipBytes for TableType { impl SkipBytes for TableType {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
u8::skip_bytes(bytes, cursor)?; u8::skip_bytes(bytes, cursor)?;
Limits::skip_bytes(bytes, cursor)?; Limits::skip_bytes(bytes, cursor)?;
Ok(()) Ok(())
@ -563,7 +563,7 @@ impl Serialize for Limits {
} }
impl SkipBytes for Limits { impl SkipBytes for Limits {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
let variant_id = bytes[*cursor]; let variant_id = bytes[*cursor];
u8::skip_bytes(bytes, cursor)?; // advance past the variant byte u8::skip_bytes(bytes, cursor)?; // advance past the variant byte
u32::skip_bytes(bytes, cursor)?; // skip "min" u32::skip_bytes(bytes, cursor)?; // skip "min"
@ -638,7 +638,7 @@ impl Serialize for GlobalType {
} }
impl SkipBytes for GlobalType { impl SkipBytes for GlobalType {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
*cursor += 2; *cursor += 2;
Ok(()) Ok(())
} }
@ -655,15 +655,18 @@ pub enum ConstExpr {
} }
impl ConstExpr { impl ConstExpr {
fn parse_u32(bytes: &[u8], cursor: &mut usize) -> Result<u32, String> { fn parse_u32(bytes: &[u8], cursor: &mut usize) -> Result<u32, ParseError> {
let err = Err("Invalid ConstExpr. Expected i32.".into()); let err = Err(ParseError {
offset: *cursor,
message: "Invalid ConstExpr. Expected i32.".into(),
});
if bytes[*cursor] != OpCode::I32CONST as u8 { if bytes[*cursor] != OpCode::I32CONST as u8 {
return err; return err;
} }
*cursor += 1; *cursor += 1;
let value = u32::parse((), bytes, cursor).unwrap(); let value = u32::parse((), bytes, cursor)?;
if bytes[*cursor] != OpCode::END as u8 { if bytes[*cursor] != OpCode::END as u8 {
return err; return err;
@ -706,7 +709,7 @@ impl Serialize for ConstExpr {
} }
impl SkipBytes for ConstExpr { impl SkipBytes for ConstExpr {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> { fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
while bytes[*cursor] != OpCode::END as u8 { while bytes[*cursor] != OpCode::END as u8 {
OpCode::skip_bytes(bytes, cursor)?; OpCode::skip_bytes(bytes, cursor)?;
} }
@ -737,7 +740,7 @@ pub struct GlobalSection<'a> {
} }
impl<'a> GlobalSection<'a> { impl<'a> GlobalSection<'a> {
pub fn parse_u32_at_index(&self, index: u32) -> Result<u32, String> { pub fn parse_u32_at_index(&self, index: u32) -> Result<u32, ParseError> {
let mut cursor = 0; let mut cursor = 0;
for _ in 0..index { for _ in 0..index {
GlobalType::skip_bytes(&self.bytes, &mut cursor)?; GlobalType::skip_bytes(&self.bytes, &mut cursor)?;