Rework frozen modules and directly deserialize to CodeObject<Literal>

This commit is contained in:
Noa 2023-01-13 22:25:43 -06:00
parent 969ea23d67
commit 9d6ae774f8
2 changed files with 129 additions and 71 deletions

View file

@ -1,7 +1,6 @@
//! Implement python as a virtual machine with bytecodes. This module //! Implement python as a virtual machine with bytecodes. This module
//! implements bytecode structure. //! implements bytecode structure.
use crate::marshal::MarshalError;
use crate::{marshal, Location}; use crate::{marshal, Location};
use bitflags::bitflags; use bitflags::bitflags;
use itertools::Itertools; use itertools::Itertools;
@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy {
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name; fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
} }
pub trait AsBag {
type Bag: ConstantBag;
#[allow(clippy::wrong_self_convention)]
fn as_bag(self) -> Self::Bag;
}
impl<Bag: ConstantBag> AsBag for Bag {
type Bag = Self;
fn as_bag(self) -> Self {
self
}
}
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub struct BasicBag; pub struct BasicBag;
@ -1077,27 +1089,6 @@ impl<C: Constant> CodeObject<C> {
} }
} }
impl CodeObject<ConstantData> {
/// Load a code object from bytes
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
use lz4_flex::block::DecompressError;
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
MarshalError::Eof
}
_ => MarshalError::InvalidBytecode,
})?;
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
}
/// Serialize this bytecode to bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let mut data = Vec::new();
marshal::serialize_code(&mut data, self);
lz4_flex::compress_prepend_size(&data)
}
}
impl<C: Constant> fmt::Display for CodeObject<C> { impl<C: Constant> fmt::Display for CodeObject<C> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display_inner(f, false, 1)?; self.display_inner(f, false, 1)?;
@ -1483,32 +1474,81 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
} }
} }
/// A frozen module. Holds a code object and whether it is part of a package pub mod frozen_lib {
#[derive(Debug)] use super::*;
pub struct FrozenModule { use marshal::{Read, ReadBorrowed, Write};
pub code: CodeObject<ConstantData>,
/// A frozen module. Holds a frozen code object and whether it is part of a package
#[derive(Copy, Clone)]
pub struct FrozenModule<B = &'static [u8]> {
pub code: FrozenCodeObject<B>,
pub package: bool, pub package: bool,
} }
pub mod frozen_lib { #[derive(Copy, Clone)]
use super::*; pub struct FrozenCodeObject<B> {
use marshal::{Read, Write}; pub bytes: B,
}
impl<B: AsRef<[u8]>> FrozenCodeObject<B> {
/// Decode a frozen codeobject
#[inline]
pub fn decode<Bag: AsBag>(
&self,
bag: Bag,
) -> CodeObject<<Bag::Bag as ConstantBag>::Constant> {
Self::_decode(self.bytes.as_ref(), bag.as_bag())
}
fn _decode<Bag: ConstantBag>(data: &[u8], bag: Bag) -> CodeObject<Bag::Constant> {
let decompressed = lz4_flex::decompress_size_prepended(data)
.expect("deserialize frozen CodeObject failed");
marshal::deserialize_code(&mut &decompressed[..], bag)
.expect("deserializing frozen CodeObject failed")
}
}
impl FrozenCodeObject<Vec<u8>> {
pub fn encode<C: Constant>(code: &CodeObject<C>) -> Self {
let mut data = Vec::new();
marshal::serialize_code(&mut data, code);
let bytes = lz4_flex::compress_prepend_size(&data);
FrozenCodeObject { bytes }
}
}
#[repr(transparent)]
pub struct FrozenLib<B: ?Sized = [u8]> {
pub bytes: B,
}
impl<B: AsRef<[u8]> + ?Sized> FrozenLib<B> {
pub const fn from_ref(b: &B) -> &FrozenLib<B> {
unsafe { &*(b as *const B as *const FrozenLib<B>) }
}
/// Decode a library to a iterable of frozen modules /// Decode a library to a iterable of frozen modules
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter { pub fn decode(&self) -> FrozenModulesIter<'_> {
let data = lz4_flex::decompress_size_prepended(bytes).unwrap(); let mut data = self.bytes.as_ref();
let mut data = marshal::Cursor { data, position: 0 };
let remaining = data.read_u32().unwrap(); let remaining = data.read_u32().unwrap();
FrozenModulesIter { remaining, data } FrozenModulesIter { remaining, data }
} }
pub struct FrozenModulesIter {
remaining: u32,
data: marshal::Cursor<Vec<u8>>,
} }
impl Iterator for FrozenModulesIter { impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib<B> {
type Item = (String, FrozenModule); type Item = (&'a str, FrozenModule<&'a [u8]>);
type IntoIter = FrozenModulesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.decode()
}
}
pub struct FrozenModulesIter<'a> {
remaining: u32,
data: &'a [u8],
}
impl<'a> Iterator for FrozenModulesIter<'a> {
type Item = (&'a str, FrozenModule<&'a [u8]>);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.remaining > 0 { if self.remaining > 0 {
@ -1524,31 +1564,37 @@ pub mod frozen_lib {
(self.remaining as usize, Some(self.remaining as usize)) (self.remaining as usize, Some(self.remaining as usize))
} }
} }
impl ExactSizeIterator for FrozenModulesIter {} impl ExactSizeIterator for FrozenModulesIter<'_> {}
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> { fn read_entry<'a>(
rdr: &mut &'a [u8],
) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> {
let len = rdr.read_u32()?; let len = rdr.read_u32()?;
let name = rdr.read_str(len)?.to_owned(); let name = rdr.read_str_borrow(len)?;
let code = marshal::deserialize_code(rdr, BasicBag)?; let len = rdr.read_u32()?;
let code_slice = rdr.read_slice_borrow(len)?;
let code = FrozenCodeObject { bytes: code_slice };
let package = rdr.read_u8()? != 0; let package = rdr.read_u8()? != 0;
Ok((name, FrozenModule { code, package })) Ok((name, FrozenModule { code, package }))
} }
impl FrozenLib<Vec<u8>> {
/// Encode the given iterator of frozen modules into a compressed vector of bytes /// Encode the given iterator of frozen modules into a compressed vector of bytes
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8> pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib<Vec<u8>>
where where
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>, I: IntoIterator<Item = (&'a str, FrozenModule<B>)>,
I::IntoIter: ExactSizeIterator + Clone, I::IntoIter: ExactSizeIterator + Clone,
{ {
let iter = lib.into_iter(); let iter = lib.into_iter();
let mut data = Vec::new(); let mut bytes = Vec::new();
write_lib(&mut data, iter); write_lib(&mut bytes, iter);
lz4_flex::compress_prepend_size(&data) Self { bytes }
}
} }
fn write_lib<'a>( fn write_lib<'a, B: AsRef<[u8]>>(
buf: &mut impl Write, buf: &mut Vec<u8>,
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>, lib: impl ExactSizeIterator<Item = (&'a str, FrozenModule<B>)>,
) { ) {
marshal::write_len(buf, lib.len()); marshal::write_len(buf, lib.len());
for (name, module) in lib { for (name, module) in lib {
@ -1556,10 +1602,9 @@ pub mod frozen_lib {
} }
} }
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) { fn write_entry(buf: &mut Vec<u8>, name: &str, module: FrozenModule<impl AsRef<[u8]>>) {
marshal::write_len(buf, name.len()); marshal::write_vec(buf, name.as_bytes());
buf.write_slice(name.as_bytes()); marshal::write_vec(buf, module.code.bytes.as_ref());
marshal::serialize_code(buf, &module.code);
buf.write_u8(module.package as u8); buf.write_u8(module.package as u8);
} }
} }

View file

@ -130,8 +130,21 @@ pub trait Read {
} }
} }
pub(crate) trait ReadBorrowed<'a>: Read {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
}
}
impl Read for &[u8] { impl Read for &[u8] {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> { fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
self.read_slice_borrow(n)
}
}
impl<'a> ReadBorrowed<'a> for &'a [u8] {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?; let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
*self = &self[n as usize..]; *self = &self[n as usize..];
Ok(data) Ok(data)
@ -474,6 +487,11 @@ pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
buf.write_u32(len); buf.write_u32(len);
} }
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
write_len(buf, slice.len());
buf.write_slice(slice);
}
pub fn serialize_value<W: Write, D: Dumpable>( pub fn serialize_value<W: Write, D: Dumpable>(
buf: &mut W, buf: &mut W,
constant: DumpableValue<'_, D>, constant: DumpableValue<'_, D>,
@ -501,13 +519,11 @@ pub fn serialize_value<W: Write, D: Dumpable>(
} }
DumpableValue::Str(s) => { DumpableValue::Str(s) => {
buf.write_u8(Type::Unicode as u8); buf.write_u8(Type::Unicode as u8);
write_len(buf, s.len()); write_vec(buf, s.as_bytes());
buf.write_slice(s.as_bytes());
} }
DumpableValue::Bytes(b) => { DumpableValue::Bytes(b) => {
buf.write_u8(Type::Bytes as u8); buf.write_u8(Type::Bytes as u8);
write_len(buf, b.len()); write_vec(buf, b);
buf.write_slice(b);
} }
DumpableValue::Code(c) => { DumpableValue::Code(c) => {
buf.write_u8(Type::Code as u8); buf.write_u8(Type::Code as u8);
@ -580,14 +596,12 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
buf.write_u32(code.arg_count); buf.write_u32(code.arg_count);
buf.write_u32(code.kwonlyarg_count); buf.write_u32(code.kwonlyarg_count);
write_len(buf, code.source_path.as_ref().len()); write_vec(buf, code.source_path.as_ref().as_bytes());
buf.write_slice(code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number); buf.write_u32(code.first_line_number);
buf.write_u32(code.max_stackdepth); buf.write_u32(code.max_stackdepth);
write_len(buf, code.obj_name.as_ref().len()); write_vec(buf, code.obj_name.as_ref().as_bytes());
buf.write_slice(code.obj_name.as_ref().as_bytes());
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]); let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
write_len(buf, cell2arg.len()); write_len(buf, cell2arg.len());
@ -603,8 +617,7 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
let mut write_names = |names: &[C::Name]| { let mut write_names = |names: &[C::Name]| {
write_len(buf, names.len()); write_len(buf, names.len());
for name in names { for name in names {
write_len(buf, name.as_ref().len()); write_vec(buf, name.as_ref().as_bytes());
buf.write_slice(name.as_ref().as_bytes());
} }
}; };