mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 14:24:45 +00:00
moved all crates into seperate folder + related path fixes
This commit is contained in:
parent
12ef03bb86
commit
eee85fa45d
1063 changed files with 92 additions and 93 deletions
241
crates/compiler/gen_wasm/src/wasm_module/parse.rs
Normal file
241
crates/compiler/gen_wasm/src/wasm_module/parse.rs
Normal file
|
@ -0,0 +1,241 @@
|
|||
use super::serialize::MAX_SIZE_ENCODED_U32;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
||||
/// Parse serialized bytes into a data structure
|
||||
/// Specific parsers may need contextual data from other parts of the .wasm file
|
||||
pub trait Parse<ParseContext>: Sized {
|
||||
fn parse(ctx: ParseContext, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError {
|
||||
pub offset: usize,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Decode an unsigned 32-bit integer from the provided buffer in LEB-128 format
|
||||
/// Return the integer itself and the offset after it ends
|
||||
fn decode_u32(bytes: &[u8]) -> Result<(u32, usize), ()> {
|
||||
let mut value = 0;
|
||||
let mut shift = 0;
|
||||
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
|
||||
value += ((byte & 0x7f) as u32) << shift;
|
||||
if (byte & 0x80) == 0 {
|
||||
return Ok((value, i + 1));
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
|
||||
impl Parse<()> for u32 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
match decode_u32(&bytes[*cursor..]) {
|
||||
Ok((value, len)) => {
|
||||
*cursor += len;
|
||||
Ok(value)
|
||||
}
|
||||
Err(()) => Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!(
|
||||
"Failed to decode u32 as LEB-128 from bytes: {:2x?}",
|
||||
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<()> for u8 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let byte = bytes[*cursor];
|
||||
*cursor += 1;
|
||||
Ok(byte)
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode a signed 32-bit integer from the provided buffer in LEB-128 format
|
||||
/// Return the integer itself and the offset after it ends
|
||||
fn decode_i32(bytes: &[u8]) -> Result<(i32, usize), ()> {
|
||||
let mut value = 0;
|
||||
let mut shift = 0;
|
||||
for (i, byte) in bytes.iter().take(MAX_SIZE_ENCODED_U32).enumerate() {
|
||||
value |= ((byte & 0x7f) as i32) << shift;
|
||||
if (byte & 0x80) == 0 {
|
||||
let is_negative = byte & 0x40 != 0;
|
||||
if shift < MAX_SIZE_ENCODED_U32 && is_negative {
|
||||
value |= -1 << shift;
|
||||
}
|
||||
return Ok((value, i + 1));
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
|
||||
impl Parse<()> for i32 {
|
||||
fn parse(_ctx: (), bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
match decode_i32(&bytes[*cursor..]) {
|
||||
Ok((value, len)) => {
|
||||
*cursor += len;
|
||||
Ok(value)
|
||||
}
|
||||
Err(()) => Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: format!(
|
||||
"Failed to decode i32 as LEB-128 from bytes: {:2x?}",
|
||||
&bytes[*cursor..][..MAX_SIZE_ENCODED_U32]
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parse<&'a Bump> for &'a str {
|
||||
fn parse(arena: &'a Bump, bytes: &[u8], cursor: &mut usize) -> Result<Self, ParseError> {
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let end = *cursor + len as usize;
|
||||
let bytes: &[u8] = &bytes[*cursor..end];
|
||||
let copy = arena.alloc_slice_copy(bytes);
|
||||
let s = unsafe { std::str::from_utf8_unchecked(copy) };
|
||||
*cursor = end;
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_variable_size_items<'a, T>(
|
||||
arena: &'a Bump,
|
||||
bytes: &[u8],
|
||||
cursor: &mut usize,
|
||||
) -> Result<Vec<'a, T>, ParseError>
|
||||
where
|
||||
T: Parse<&'a Bump>,
|
||||
{
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
|
||||
for _ in 0..len {
|
||||
let item = T::parse(arena, bytes, cursor)?;
|
||||
vector.push(item);
|
||||
}
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
pub fn parse_fixed_size_items<'a, T>(
|
||||
arena: &'a Bump,
|
||||
bytes: &[u8],
|
||||
cursor: &mut usize,
|
||||
) -> Result<Vec<'a, T>, ParseError>
|
||||
where
|
||||
T: Parse<()>,
|
||||
{
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
let mut vector: Vec<'a, T> = Vec::with_capacity_in(len as usize, arena);
|
||||
for _ in 0..len {
|
||||
let item = T::parse((), bytes, cursor)?;
|
||||
vector.push(item);
|
||||
}
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
/// Skip over serialized bytes for a type
|
||||
/// This may, or may not, require looking at the byte values
|
||||
pub trait SkipBytes: Sized {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError>;
|
||||
}
|
||||
|
||||
impl SkipBytes for u32 {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
const MAX_LEN: usize = 5;
|
||||
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
|
||||
if byte & 0x80 == 0 {
|
||||
*cursor = i + 1;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: "Invalid LEB encoding".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl SkipBytes for u64 {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
const MAX_LEN: usize = 10;
|
||||
for (i, byte) in bytes.iter().enumerate().skip(*cursor).take(MAX_LEN) {
|
||||
if byte & 0x80 == 0 {
|
||||
*cursor = i + 1;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(ParseError {
|
||||
offset: *cursor,
|
||||
message: "Invalid LEB encoding".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl SkipBytes for u8 {
|
||||
fn skip_bytes(_bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
*cursor += 1;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: This is just for skipping over Wasm bytes. We don't actually care about String vs str!
|
||||
impl SkipBytes for String {
|
||||
fn skip_bytes(bytes: &[u8], cursor: &mut usize) -> Result<(), ParseError> {
|
||||
let len = u32::parse((), bytes, cursor)?;
|
||||
|
||||
if false {
|
||||
let str_bytes = &bytes[*cursor..(*cursor + len as usize)];
|
||||
println!(
|
||||
"Skipping string {:?}",
|
||||
std::str::from_utf8(str_bytes).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
*cursor += len as usize;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::wasm_module::parse::decode_u32;
|
||||
|
||||
#[test]
|
||||
fn test_decode_u32() {
|
||||
assert_eq!(decode_u32(&[0]), Ok((0, 1)));
|
||||
assert_eq!(decode_u32(&[64]), Ok((64, 1)));
|
||||
assert_eq!(decode_u32(&[0x7f]), Ok((0x7f, 1)));
|
||||
assert_eq!(decode_u32(&[0x80, 0x01]), Ok((0x80, 2)));
|
||||
assert_eq!(decode_u32(&[0xff, 0x7f]), Ok((0x3fff, 2)));
|
||||
assert_eq!(decode_u32(&[0x80, 0x80, 0x01]), Ok((0x4000, 3)));
|
||||
assert_eq!(
|
||||
decode_u32(&[0xff, 0xff, 0xff, 0xff, 0x0f]),
|
||||
Ok((u32::MAX, MAX_SIZE_ENCODED_U32))
|
||||
);
|
||||
assert!(matches!(decode_u32(&[0x80; 6]), Err(_)));
|
||||
assert!(matches!(decode_u32(&[0x80; 2]), Err(_)));
|
||||
assert!(matches!(decode_u32(&[]), Err(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u32_sequence() {
|
||||
let bytes = &[0, 0x80, 0x01, 0xff, 0xff, 0xff, 0xff, 0x0f];
|
||||
let expected = [0, 128, u32::MAX];
|
||||
let mut cursor = 0;
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[0]);
|
||||
assert_eq!(cursor, 1);
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[1]);
|
||||
assert_eq!(cursor, 3);
|
||||
|
||||
assert_eq!(u32::parse((), bytes, &mut cursor).unwrap(), expected[2]);
|
||||
assert_eq!(cursor, 8);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue