Start using Result to handle Wasm module parse failures

This commit is contained in:
Brian Carroll 2022-05-22 09:30:41 +01:00
parent fc5d71c20c
commit 8114ec60c1
No known key found for this signature in database
GPG key ID: 9CF4E3BF9C4722C7
11 changed files with 220 additions and 155 deletions

View file

@ -1,16 +1,23 @@
use super::serialize::MAX_SIZE_ENCODED_U32;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_error_macros::internal_error;
/// Parse serialized bytes into a data structure
/// Specific parsers may need contextual data from other parts of the .wasm file
pub trait Parse<ParseContext>: Sized {
fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, String>;
}
/// Skip over serialized bytes for a type
/// This may, or may not, require looking at the byte values
pub trait SkipBytes {
fn skip_bytes(bytes: &[u8], cursor: &mut usize);
pub trait SkipBytes: Sized {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String>;
}
/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format
/// Return the integer itself and the offset after it ends
pub fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> {
fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> {
let mut value = 0;
let mut shift = 0;
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
@ -26,6 +33,27 @@ pub fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), String> {
))
}
impl Parse<()> for u32 {
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, String> {
let (value, len) = decode_u32(&bytes[*cursor..])?;
*cursor += len;
Ok(value)
}
}
// Parse a vector of bytes (used for strings, but we don't bother with utf8 validation)
impl<'a> Parse<&'a Bump> for Vec<'a, u8> {
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, String> {
let len = parse_u32_or_panic(bytes, cursor);
let end = *cursor + len as usize;
let bytes: &[u8] = &bytes[*cursor..end];
let mut copy = Vec::with_capacity_in(bytes.len(), arena);
copy.extend_from_slice(bytes);
*cursor = end;
Ok(copy)
}
}
pub fn parse_u32_or_panic(bytes: &[u8], cursor: &mut usize) -> u32 {
let (value, len) = decode_u32(&bytes[*cursor..]).unwrap_or_else(|e| internal_error!("{}", e));
*cursor += len;
@ -42,40 +70,41 @@ pub fn parse_string_bytes<'a>(arena: &'a Bump, bytes: &[u8], cursor: &mut usize)
}
impl SkipBytes for u32 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> {
const MAX_LEN: usize = 5;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 {
*cursor = i + 1;
return;
return Ok(());
}
}
internal_error!("Invalid LEB encoding");
Err("Invalid LEB encoding".into())
}
}
impl SkipBytes for u64 {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> {
const MAX_LEN: usize = 10;
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
if byte & 0x80 == 0 {
*cursor = i + 1;
return;
return Ok(());
}
}
internal_error!("Invalid LEB encoding");
Err("Invalid LEB encoding".into())
}
}
impl SkipBytes for u8 {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) {
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), String> {
*cursor += 1;
Ok(())
}
}
/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str!
impl SkipBytes for String {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) {
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), String> {
let len = parse_u32_or_panic(bytes, cursor);
if false {
@ -87,5 +116,45 @@ impl SkipBytes for String {
}
*cursor += len as usize;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::wasm_module::parse::{decode_u32, parse_u32_or_panic};
#[test]
fn test_decode_u32() {
assert_eq!(decode_u32(&[0]), Ok((0, 1)));
assert_eq!(decode_u32(&[64]), Ok((64, 1)));
assert_eq!(decode_u32(&[0x7f]), Ok((0x7f, 1)));
assert_eq!(decode_u32(&[0x80, 0x01]), Ok((0x80, 2)));
assert_eq!(decode_u32(&[0xff, 0x7f]), Ok((0x3fff, 2)));
assert_eq!(decode_u32(&[0x80, 0x80, 0x01]), Ok((0x4000, 3)));
assert_eq!(
decode_u32(&[0xff, 0xff, 0xff, 0xff, 0x0f]),
Ok((u32::MAX, MAX_SIZE_ENCODED_U32))
);
assert!(matches!(decode_u32(&[0x80; 6]), Err(_)));
assert!(matches!(decode_u32(&[0x80; 2]), Err(_)));
assert!(matches!(decode_u32(&[]), Err(_)));
}
#[test]
fn test_parse_u32_sequence() {
let bytes = &[0, 0x80, 0x01, 0xff, 0xff, 0xff, 0xff, 0x0f];
let expected = [0, 128, u32::MAX];
let mut cursor = 0;
assert_eq!(parse_u32_or_panic(bytes, &mut cursor), expected[0]);
assert_eq!(cursor, 1);
assert_eq!(parse_u32_or_panic(bytes, &mut cursor), expected[1]);
assert_eq!(cursor, 3);
assert_eq!(parse_u32_or_panic(bytes, &mut cursor), expected[2]);
assert_eq!(cursor, 8);
}
}