diff --git a/core/src/bytecode.rs b/core/src/bytecode.rs index 3b32031..c01c4b8 100644 --- a/core/src/bytecode.rs +++ b/core/src/bytecode.rs @@ -1,7 +1,6 @@ //! Implement python as a virtual machine with bytecodes. This module //! implements bytecode structure. -use crate::marshal::MarshalError; use crate::{marshal, Location}; use bitflags::bitflags; use itertools::Itertools; @@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy { fn make_name(&self, name: &str) -> ::Name; } +pub trait AsBag { + type Bag: ConstantBag; + #[allow(clippy::wrong_self_convention)] + fn as_bag(self) -> Self::Bag; +} + +impl AsBag for Bag { + type Bag = Self; + fn as_bag(self) -> Self { + self + } +} + #[derive(Clone, Copy)] pub struct BasicBag; @@ -1077,27 +1089,6 @@ impl CodeObject { } } -impl CodeObject { - /// Load a code object from bytes - pub fn from_bytes(data: &[u8]) -> Result { - use lz4_flex::block::DecompressError; - let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e { - DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => { - MarshalError::Eof - } - _ => MarshalError::InvalidBytecode, - })?; - marshal::deserialize_code(&mut &raw_bincode[..], BasicBag) - } - - /// Serialize this bytecode to bytes. - pub fn to_bytes(&self) -> Vec { - let mut data = Vec::new(); - marshal::serialize_code(&mut data, self); - lz4_flex::compress_prepend_size(&data) - } -} - impl fmt::Display for CodeObject { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.display_inner(f, false, 1)?; @@ -1483,32 +1474,81 @@ impl fmt::Debug for CodeObject { } } -/// A frozen module. Holds a code object and whether it is part of a package -#[derive(Debug)] -pub struct FrozenModule { - pub code: CodeObject, - pub package: bool, -} - pub mod frozen_lib { use super::*; - use marshal::{Read, Write}; + use marshal::{Read, ReadBorrowed, Write}; - /// Decode a library to a iterable of frozen modules - pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter { - let data = lz4_flex::decompress_size_prepended(bytes).unwrap(); - let mut data = marshal::Cursor { data, position: 0 }; - let remaining = data.read_u32().unwrap(); - FrozenModulesIter { remaining, data } + /// A frozen module. Holds a frozen code object and whether it is part of a package + #[derive(Copy, Clone)] + pub struct FrozenModule { + pub code: FrozenCodeObject, + pub package: bool, } - pub struct FrozenModulesIter { + #[derive(Copy, Clone)] + pub struct FrozenCodeObject { + pub bytes: B, + } + + impl> FrozenCodeObject { + /// Decode a frozen codeobject + #[inline] + pub fn decode( + &self, + bag: Bag, + ) -> CodeObject<::Constant> { + Self::_decode(self.bytes.as_ref(), bag.as_bag()) + } + fn _decode(data: &[u8], bag: Bag) -> CodeObject { + let decompressed = lz4_flex::decompress_size_prepended(data) + .expect("deserialize frozen CodeObject failed"); + marshal::deserialize_code(&mut &decompressed[..], bag) + .expect("deserializing frozen CodeObject failed") + } + } + + impl FrozenCodeObject> { + pub fn encode(code: &CodeObject) -> Self { + let mut data = Vec::new(); + marshal::serialize_code(&mut data, code); + let bytes = lz4_flex::compress_prepend_size(&data); + FrozenCodeObject { bytes } + } + } + + #[repr(transparent)] + pub struct FrozenLib { + pub bytes: B, + } + + impl + ?Sized> FrozenLib { + pub const fn from_ref(b: &B) -> &FrozenLib { + unsafe { &*(b as *const B as *const FrozenLib) } + } + + /// Decode a library to a iterable of frozen modules + pub fn decode(&self) -> FrozenModulesIter<'_> { + let mut data = self.bytes.as_ref(); + let remaining = data.read_u32().unwrap(); + FrozenModulesIter { remaining, data } + } + } + + impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib { + type Item = (&'a str, FrozenModule<&'a [u8]>); + type IntoIter = FrozenModulesIter<'a>; + fn into_iter(self) -> Self::IntoIter { + self.decode() + } + } + + pub struct FrozenModulesIter<'a> { remaining: u32, - data: marshal::Cursor>, + data: &'a [u8], } - impl Iterator for FrozenModulesIter { - type Item = (String, FrozenModule); + impl<'a> Iterator for FrozenModulesIter<'a> { + type Item = (&'a str, FrozenModule<&'a [u8]>); fn next(&mut self) -> Option { if self.remaining > 0 { @@ -1524,31 +1564,37 @@ pub mod frozen_lib { (self.remaining as usize, Some(self.remaining as usize)) } } - impl ExactSizeIterator for FrozenModulesIter {} + impl ExactSizeIterator for FrozenModulesIter<'_> {} - fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> { + fn read_entry<'a>( + rdr: &mut &'a [u8], + ) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> { let len = rdr.read_u32()?; - let name = rdr.read_str(len)?.to_owned(); - let code = marshal::deserialize_code(rdr, BasicBag)?; + let name = rdr.read_str_borrow(len)?; + let len = rdr.read_u32()?; + let code_slice = rdr.read_slice_borrow(len)?; + let code = FrozenCodeObject { bytes: code_slice }; let package = rdr.read_u8()? != 0; Ok((name, FrozenModule { code, package })) } - /// Encode the given iterator of frozen modules into a compressed vector of bytes - pub fn encode_lib<'a, I>(lib: I) -> Vec - where - I: IntoIterator, - I::IntoIter: ExactSizeIterator + Clone, - { - let iter = lib.into_iter(); - let mut data = Vec::new(); - write_lib(&mut data, iter); - lz4_flex::compress_prepend_size(&data) + impl FrozenLib> { + /// Encode the given iterator of frozen modules into a compressed vector of bytes + pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib> + where + I: IntoIterator)>, + I::IntoIter: ExactSizeIterator + Clone, + { + let iter = lib.into_iter(); + let mut bytes = Vec::new(); + write_lib(&mut bytes, iter); + Self { bytes } + } } - fn write_lib<'a>( - buf: &mut impl Write, - lib: impl ExactSizeIterator, + fn write_lib<'a, B: AsRef<[u8]>>( + buf: &mut Vec, + lib: impl ExactSizeIterator)>, ) { marshal::write_len(buf, lib.len()); for (name, module) in lib { @@ -1556,10 +1602,9 @@ pub mod frozen_lib { } } - fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) { - marshal::write_len(buf, name.len()); - buf.write_slice(name.as_bytes()); - marshal::serialize_code(buf, &module.code); + fn write_entry(buf: &mut Vec, name: &str, module: FrozenModule>) { + marshal::write_vec(buf, name.as_bytes()); + marshal::write_vec(buf, module.code.bytes.as_ref()); buf.write_u8(module.package as u8); } } diff --git a/core/src/marshal.rs b/core/src/marshal.rs index 69d8879..e9f962f 100644 --- a/core/src/marshal.rs +++ b/core/src/marshal.rs @@ -130,8 +130,21 @@ pub trait Read { } } +pub(crate) trait ReadBorrowed<'a>: Read { + fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>; + fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> { + Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?) + } +} + impl Read for &[u8] { fn read_slice(&mut self, n: u32) -> Result<&[u8]> { + self.read_slice_borrow(n) + } +} + +impl<'a> ReadBorrowed<'a> for &'a [u8] { + fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> { let data = self.get(..n as usize).ok_or(MarshalError::Eof)?; *self = &self[n as usize..]; Ok(data) @@ -474,6 +487,11 @@ pub(crate) fn write_len(buf: &mut W, len: usize) { buf.write_u32(len); } +pub(crate) fn write_vec(buf: &mut W, slice: &[u8]) { + write_len(buf, slice.len()); + buf.write_slice(slice); +} + pub fn serialize_value( buf: &mut W, constant: DumpableValue<'_, D>, @@ -501,13 +519,11 @@ pub fn serialize_value( } DumpableValue::Str(s) => { buf.write_u8(Type::Unicode as u8); - write_len(buf, s.len()); - buf.write_slice(s.as_bytes()); + write_vec(buf, s.as_bytes()); } DumpableValue::Bytes(b) => { buf.write_u8(Type::Bytes as u8); - write_len(buf, b.len()); - buf.write_slice(b); + write_vec(buf, b); } DumpableValue::Code(c) => { buf.write_u8(Type::Code as u8); @@ -580,14 +596,12 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) buf.write_u32(code.arg_count); buf.write_u32(code.kwonlyarg_count); - write_len(buf, code.source_path.as_ref().len()); - buf.write_slice(code.source_path.as_ref().as_bytes()); + write_vec(buf, code.source_path.as_ref().as_bytes()); buf.write_u32(code.first_line_number); buf.write_u32(code.max_stackdepth); - write_len(buf, code.obj_name.as_ref().len()); - buf.write_slice(code.obj_name.as_ref().as_bytes()); + write_vec(buf, code.obj_name.as_ref().as_bytes()); let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]); write_len(buf, cell2arg.len()); @@ -603,8 +617,7 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) let mut write_names = |names: &[C::Name]| { write_len(buf, names.len()); for name in names { - write_len(buf, name.as_ref().len()); - buf.write_slice(name.as_ref().as_bytes()); + write_vec(buf, name.as_ref().as_bytes()); } };