mirror of
https://github.com/RustPython/Parser.git
synced 2025-08-26 21:34:55 +00:00
Merge pull request #4608 from coolreader18/bag-deser
Rework frozen modules and directly deserialize to CodeObject<Literal>
This commit is contained in:
commit
0ea53825db
2 changed files with 129 additions and 71 deletions
|
@ -1,7 +1,6 @@
|
||||||
//! Implement python as a virtual machine with bytecodes. This module
|
//! Implement python as a virtual machine with bytecodes. This module
|
||||||
//! implements bytecode structure.
|
//! implements bytecode structure.
|
||||||
|
|
||||||
use crate::marshal::MarshalError;
|
|
||||||
use crate::{marshal, Location};
|
use crate::{marshal, Location};
|
||||||
use bitflags::bitflags;
|
use bitflags::bitflags;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy {
|
||||||
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
|
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait AsBag {
|
||||||
|
type Bag: ConstantBag;
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
fn as_bag(self) -> Self::Bag;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Bag: ConstantBag> AsBag for Bag {
|
||||||
|
type Bag = Self;
|
||||||
|
fn as_bag(self) -> Self {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct BasicBag;
|
pub struct BasicBag;
|
||||||
|
|
||||||
|
@ -1077,27 +1089,6 @@ impl<C: Constant> CodeObject<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CodeObject<ConstantData> {
|
|
||||||
/// Load a code object from bytes
|
|
||||||
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
|
|
||||||
use lz4_flex::block::DecompressError;
|
|
||||||
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
|
|
||||||
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
|
|
||||||
MarshalError::Eof
|
|
||||||
}
|
|
||||||
_ => MarshalError::InvalidBytecode,
|
|
||||||
})?;
|
|
||||||
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Serialize this bytecode to bytes.
|
|
||||||
pub fn to_bytes(&self) -> Vec<u8> {
|
|
||||||
let mut data = Vec::new();
|
|
||||||
marshal::serialize_code(&mut data, self);
|
|
||||||
lz4_flex::compress_prepend_size(&data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: Constant> fmt::Display for CodeObject<C> {
|
impl<C: Constant> fmt::Display for CodeObject<C> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
self.display_inner(f, false, 1)?;
|
self.display_inner(f, false, 1)?;
|
||||||
|
@ -1483,32 +1474,81 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A frozen module. Holds a code object and whether it is part of a package
|
pub mod frozen_lib {
|
||||||
#[derive(Debug)]
|
use super::*;
|
||||||
pub struct FrozenModule {
|
use marshal::{Read, ReadBorrowed, Write};
|
||||||
pub code: CodeObject<ConstantData>,
|
|
||||||
|
/// A frozen module. Holds a frozen code object and whether it is part of a package
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct FrozenModule<B = &'static [u8]> {
|
||||||
|
pub code: FrozenCodeObject<B>,
|
||||||
pub package: bool,
|
pub package: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod frozen_lib {
|
#[derive(Copy, Clone)]
|
||||||
use super::*;
|
pub struct FrozenCodeObject<B> {
|
||||||
use marshal::{Read, Write};
|
pub bytes: B,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: AsRef<[u8]>> FrozenCodeObject<B> {
|
||||||
|
/// Decode a frozen codeobject
|
||||||
|
#[inline]
|
||||||
|
pub fn decode<Bag: AsBag>(
|
||||||
|
&self,
|
||||||
|
bag: Bag,
|
||||||
|
) -> CodeObject<<Bag::Bag as ConstantBag>::Constant> {
|
||||||
|
Self::_decode(self.bytes.as_ref(), bag.as_bag())
|
||||||
|
}
|
||||||
|
fn _decode<Bag: ConstantBag>(data: &[u8], bag: Bag) -> CodeObject<Bag::Constant> {
|
||||||
|
let decompressed = lz4_flex::decompress_size_prepended(data)
|
||||||
|
.expect("deserialize frozen CodeObject failed");
|
||||||
|
marshal::deserialize_code(&mut &decompressed[..], bag)
|
||||||
|
.expect("deserializing frozen CodeObject failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FrozenCodeObject<Vec<u8>> {
|
||||||
|
pub fn encode<C: Constant>(code: &CodeObject<C>) -> Self {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
marshal::serialize_code(&mut data, code);
|
||||||
|
let bytes = lz4_flex::compress_prepend_size(&data);
|
||||||
|
FrozenCodeObject { bytes }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct FrozenLib<B: ?Sized = [u8]> {
|
||||||
|
pub bytes: B,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: AsRef<[u8]> + ?Sized> FrozenLib<B> {
|
||||||
|
pub const fn from_ref(b: &B) -> &FrozenLib<B> {
|
||||||
|
unsafe { &*(b as *const B as *const FrozenLib<B>) }
|
||||||
|
}
|
||||||
|
|
||||||
/// Decode a library to a iterable of frozen modules
|
/// Decode a library to a iterable of frozen modules
|
||||||
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
|
pub fn decode(&self) -> FrozenModulesIter<'_> {
|
||||||
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
|
let mut data = self.bytes.as_ref();
|
||||||
let mut data = marshal::Cursor { data, position: 0 };
|
|
||||||
let remaining = data.read_u32().unwrap();
|
let remaining = data.read_u32().unwrap();
|
||||||
FrozenModulesIter { remaining, data }
|
FrozenModulesIter { remaining, data }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FrozenModulesIter {
|
|
||||||
remaining: u32,
|
|
||||||
data: marshal::Cursor<Vec<u8>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for FrozenModulesIter {
|
impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib<B> {
|
||||||
type Item = (String, FrozenModule);
|
type Item = (&'a str, FrozenModule<&'a [u8]>);
|
||||||
|
type IntoIter = FrozenModulesIter<'a>;
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
self.decode()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FrozenModulesIter<'a> {
|
||||||
|
remaining: u32,
|
||||||
|
data: &'a [u8],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for FrozenModulesIter<'a> {
|
||||||
|
type Item = (&'a str, FrozenModule<&'a [u8]>);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if self.remaining > 0 {
|
if self.remaining > 0 {
|
||||||
|
@ -1524,31 +1564,37 @@ pub mod frozen_lib {
|
||||||
(self.remaining as usize, Some(self.remaining as usize))
|
(self.remaining as usize, Some(self.remaining as usize))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ExactSizeIterator for FrozenModulesIter {}
|
impl ExactSizeIterator for FrozenModulesIter<'_> {}
|
||||||
|
|
||||||
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> {
|
fn read_entry<'a>(
|
||||||
|
rdr: &mut &'a [u8],
|
||||||
|
) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> {
|
||||||
let len = rdr.read_u32()?;
|
let len = rdr.read_u32()?;
|
||||||
let name = rdr.read_str(len)?.to_owned();
|
let name = rdr.read_str_borrow(len)?;
|
||||||
let code = marshal::deserialize_code(rdr, BasicBag)?;
|
let len = rdr.read_u32()?;
|
||||||
|
let code_slice = rdr.read_slice_borrow(len)?;
|
||||||
|
let code = FrozenCodeObject { bytes: code_slice };
|
||||||
let package = rdr.read_u8()? != 0;
|
let package = rdr.read_u8()? != 0;
|
||||||
Ok((name, FrozenModule { code, package }))
|
Ok((name, FrozenModule { code, package }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FrozenLib<Vec<u8>> {
|
||||||
/// Encode the given iterator of frozen modules into a compressed vector of bytes
|
/// Encode the given iterator of frozen modules into a compressed vector of bytes
|
||||||
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
|
pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib<Vec<u8>>
|
||||||
where
|
where
|
||||||
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
|
I: IntoIterator<Item = (&'a str, FrozenModule<B>)>,
|
||||||
I::IntoIter: ExactSizeIterator + Clone,
|
I::IntoIter: ExactSizeIterator + Clone,
|
||||||
{
|
{
|
||||||
let iter = lib.into_iter();
|
let iter = lib.into_iter();
|
||||||
let mut data = Vec::new();
|
let mut bytes = Vec::new();
|
||||||
write_lib(&mut data, iter);
|
write_lib(&mut bytes, iter);
|
||||||
lz4_flex::compress_prepend_size(&data)
|
Self { bytes }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_lib<'a>(
|
fn write_lib<'a, B: AsRef<[u8]>>(
|
||||||
buf: &mut impl Write,
|
buf: &mut Vec<u8>,
|
||||||
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>,
|
lib: impl ExactSizeIterator<Item = (&'a str, FrozenModule<B>)>,
|
||||||
) {
|
) {
|
||||||
marshal::write_len(buf, lib.len());
|
marshal::write_len(buf, lib.len());
|
||||||
for (name, module) in lib {
|
for (name, module) in lib {
|
||||||
|
@ -1556,10 +1602,9 @@ pub mod frozen_lib {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) {
|
fn write_entry(buf: &mut Vec<u8>, name: &str, module: FrozenModule<impl AsRef<[u8]>>) {
|
||||||
marshal::write_len(buf, name.len());
|
marshal::write_vec(buf, name.as_bytes());
|
||||||
buf.write_slice(name.as_bytes());
|
marshal::write_vec(buf, module.code.bytes.as_ref());
|
||||||
marshal::serialize_code(buf, &module.code);
|
|
||||||
buf.write_u8(module.package as u8);
|
buf.write_u8(module.package as u8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,8 +130,21 @@ pub trait Read {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) trait ReadBorrowed<'a>: Read {
|
||||||
|
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
|
||||||
|
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
|
||||||
|
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Read for &[u8] {
|
impl Read for &[u8] {
|
||||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
||||||
|
self.read_slice_borrow(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ReadBorrowed<'a> for &'a [u8] {
|
||||||
|
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
|
||||||
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
|
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
|
||||||
*self = &self[n as usize..];
|
*self = &self[n as usize..];
|
||||||
Ok(data)
|
Ok(data)
|
||||||
|
@ -474,6 +487,11 @@ pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
|
||||||
buf.write_u32(len);
|
buf.write_u32(len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
|
||||||
|
write_len(buf, slice.len());
|
||||||
|
buf.write_slice(slice);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn serialize_value<W: Write, D: Dumpable>(
|
pub fn serialize_value<W: Write, D: Dumpable>(
|
||||||
buf: &mut W,
|
buf: &mut W,
|
||||||
constant: DumpableValue<'_, D>,
|
constant: DumpableValue<'_, D>,
|
||||||
|
@ -501,13 +519,11 @@ pub fn serialize_value<W: Write, D: Dumpable>(
|
||||||
}
|
}
|
||||||
DumpableValue::Str(s) => {
|
DumpableValue::Str(s) => {
|
||||||
buf.write_u8(Type::Unicode as u8);
|
buf.write_u8(Type::Unicode as u8);
|
||||||
write_len(buf, s.len());
|
write_vec(buf, s.as_bytes());
|
||||||
buf.write_slice(s.as_bytes());
|
|
||||||
}
|
}
|
||||||
DumpableValue::Bytes(b) => {
|
DumpableValue::Bytes(b) => {
|
||||||
buf.write_u8(Type::Bytes as u8);
|
buf.write_u8(Type::Bytes as u8);
|
||||||
write_len(buf, b.len());
|
write_vec(buf, b);
|
||||||
buf.write_slice(b);
|
|
||||||
}
|
}
|
||||||
DumpableValue::Code(c) => {
|
DumpableValue::Code(c) => {
|
||||||
buf.write_u8(Type::Code as u8);
|
buf.write_u8(Type::Code as u8);
|
||||||
|
@ -580,14 +596,12 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
|
||||||
buf.write_u32(code.arg_count);
|
buf.write_u32(code.arg_count);
|
||||||
buf.write_u32(code.kwonlyarg_count);
|
buf.write_u32(code.kwonlyarg_count);
|
||||||
|
|
||||||
write_len(buf, code.source_path.as_ref().len());
|
write_vec(buf, code.source_path.as_ref().as_bytes());
|
||||||
buf.write_slice(code.source_path.as_ref().as_bytes());
|
|
||||||
|
|
||||||
buf.write_u32(code.first_line_number);
|
buf.write_u32(code.first_line_number);
|
||||||
buf.write_u32(code.max_stackdepth);
|
buf.write_u32(code.max_stackdepth);
|
||||||
|
|
||||||
write_len(buf, code.obj_name.as_ref().len());
|
write_vec(buf, code.obj_name.as_ref().as_bytes());
|
||||||
buf.write_slice(code.obj_name.as_ref().as_bytes());
|
|
||||||
|
|
||||||
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
|
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
|
||||||
write_len(buf, cell2arg.len());
|
write_len(buf, cell2arg.len());
|
||||||
|
@ -603,8 +617,7 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
|
||||||
let mut write_names = |names: &[C::Name]| {
|
let mut write_names = |names: &[C::Name]| {
|
||||||
write_len(buf, names.len());
|
write_len(buf, names.len());
|
||||||
for name in names {
|
for name in names {
|
||||||
write_len(buf, name.as_ref().len());
|
write_vec(buf, name.as_ref().as_bytes());
|
||||||
buf.write_slice(name.as_ref().as_bytes());
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue