mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-07 21:25:31 +00:00
Custom marshal enc/decoding impl
This commit is contained in:
parent
8f425e9ce2
commit
b80bbec8e6
9 changed files with 857 additions and 204 deletions
|
@ -9,5 +9,3 @@ edition = "2021"
|
|||
rustpython-compiler-core = { path = "core" }
|
||||
rustpython-codegen = { path = "codegen" }
|
||||
rustpython-parser = { path = "parser" }
|
||||
|
||||
thiserror = { workspace = true }
|
||||
|
|
|
@ -8,14 +8,10 @@ repository = "https://github.com/RustPython/RustPython"
|
|||
license = "MIT"
|
||||
|
||||
[dependencies]
|
||||
bincode = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
bstr = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
num-bigint = { workspace = true, features = ["serde"] }
|
||||
num-complex = { workspace = true, features = ["serde"] }
|
||||
num_enum = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
thiserror = { workspace = true }
|
||||
num-bigint = { workspace = true }
|
||||
num-complex = { workspace = true }
|
||||
|
||||
lz4_flex = "0.9.2"
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
//! Implement python as a virtual machine with bytecodes. This module
|
||||
//! implements bytecode structure.
|
||||
|
||||
use crate::Location;
|
||||
use crate::marshal::MarshalError;
|
||||
use crate::{marshal, Location};
|
||||
use bitflags::bitflags;
|
||||
use bstr::ByteSlice;
|
||||
use itertools::Itertools;
|
||||
use num_bigint::BigInt;
|
||||
use num_complex::Complex64;
|
||||
use num_enum::{IntoPrimitive, TryFromPrimitive};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
use std::{collections::BTreeSet, fmt, hash, mem};
|
||||
|
||||
|
@ -31,9 +29,7 @@ impl Constant for ConstantData {
|
|||
ConstantData::Str { value } => Str { value },
|
||||
ConstantData::Bytes { value } => Bytes { value },
|
||||
ConstantData::Code { code } => Code { code },
|
||||
ConstantData::Tuple { elements } => Tuple {
|
||||
elements: Box::new(elements.iter().map(|e| e.borrow_constant())),
|
||||
},
|
||||
ConstantData::Tuple { elements } => Tuple { elements },
|
||||
ConstantData::None => None,
|
||||
ConstantData::Ellipsis => Ellipsis,
|
||||
}
|
||||
|
@ -44,6 +40,9 @@ impl Constant for ConstantData {
|
|||
pub trait ConstantBag: Sized + Copy {
|
||||
type Constant: Constant;
|
||||
fn make_constant<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant;
|
||||
fn make_int(&self, value: BigInt) -> Self::Constant;
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Constant>) -> Self::Constant;
|
||||
fn make_code(&self, code: CodeObject<Self::Constant>) -> Self::Constant;
|
||||
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
|
||||
}
|
||||
|
||||
|
@ -55,6 +54,19 @@ impl ConstantBag for BasicBag {
|
|||
fn make_constant<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant {
|
||||
constant.to_owned()
|
||||
}
|
||||
fn make_int(&self, value: BigInt) -> Self::Constant {
|
||||
ConstantData::Integer { value }
|
||||
}
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Constant>) -> Self::Constant {
|
||||
ConstantData::Tuple {
|
||||
elements: elements.collect(),
|
||||
}
|
||||
}
|
||||
fn make_code(&self, code: CodeObject<Self::Constant>) -> Self::Constant {
|
||||
ConstantData::Code {
|
||||
code: Box::new(code),
|
||||
}
|
||||
}
|
||||
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name {
|
||||
name.to_owned()
|
||||
}
|
||||
|
@ -62,26 +74,22 @@ impl ConstantBag for BasicBag {
|
|||
|
||||
/// Primary container of a single code object. Each python function has
|
||||
/// a codeobject. Also a module has a codeobject.
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[derive(Clone)]
|
||||
pub struct CodeObject<C: Constant = ConstantData> {
|
||||
pub instructions: Box<[CodeUnit]>,
|
||||
pub locations: Box<[Location]>,
|
||||
pub flags: CodeFlags,
|
||||
pub posonlyarg_count: usize,
|
||||
pub posonlyarg_count: u32,
|
||||
// Number of positional-only arguments
|
||||
pub arg_count: usize,
|
||||
pub kwonlyarg_count: usize,
|
||||
pub arg_count: u32,
|
||||
pub kwonlyarg_count: u32,
|
||||
pub source_path: C::Name,
|
||||
pub first_line_number: usize,
|
||||
pub first_line_number: u32,
|
||||
pub max_stackdepth: u32,
|
||||
pub obj_name: C::Name,
|
||||
// Name of the object that created this code object
|
||||
pub cell2arg: Option<Box<[isize]>>,
|
||||
pub cell2arg: Option<Box<[i32]>>,
|
||||
pub constants: Box<[C]>,
|
||||
#[serde(bound(
|
||||
deserialize = "C::Name: serde::Deserialize<'de>",
|
||||
serialize = "C::Name: serde::Serialize"
|
||||
))]
|
||||
pub names: Box<[C::Name]>,
|
||||
pub varnames: Box<[C::Name]>,
|
||||
pub cellvars: Box<[C::Name]>,
|
||||
|
@ -89,7 +97,6 @@ pub struct CodeObject<C: Constant = ConstantData> {
|
|||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct CodeFlags: u16 {
|
||||
const NEW_LOCALS = 0x01;
|
||||
const IS_GENERATOR = 0x02;
|
||||
|
@ -114,7 +121,7 @@ impl CodeFlags {
|
|||
}
|
||||
|
||||
/// an opcode argument that may be extended by a prior ExtendedArg
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct OpArgByte(pub u8);
|
||||
impl OpArgByte {
|
||||
|
@ -211,23 +218,7 @@ impl OpArgType for bool {
|
|||
}
|
||||
}
|
||||
|
||||
macro_rules! enum_oparg {
|
||||
($t:ident) => {
|
||||
impl OpArgType for $t {
|
||||
#[inline(always)]
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
$t::try_from_primitive(x as _).ok()
|
||||
}
|
||||
#[inline(always)]
|
||||
fn to_oparg(self) -> u32 {
|
||||
u8::from(self).into()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize)]
|
||||
#[serde(bound = "")]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Arg<T: OpArgType>(PhantomData<T>);
|
||||
|
||||
impl<T: OpArgType> Arg<T> {
|
||||
|
@ -302,7 +293,7 @@ impl fmt::Display for Label {
|
|||
}
|
||||
|
||||
/// Transforms a value prior to formatting it.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum ConversionFlag {
|
||||
/// No conversion
|
||||
|
@ -314,32 +305,57 @@ pub enum ConversionFlag {
|
|||
/// Converts by calling `repr(<value>)`.
|
||||
Repr = b'r',
|
||||
}
|
||||
enum_oparg!(ConversionFlag);
|
||||
|
||||
impl OpArgType for ConversionFlag {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match u8::try_from(x).ok()? {
|
||||
0 => Self::None,
|
||||
b's' => Self::Str,
|
||||
b'a' => Self::Ascii,
|
||||
b'r' => Self::Repr,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<usize> for ConversionFlag {
|
||||
type Error = usize;
|
||||
fn try_from(b: usize) -> Result<Self, Self::Error> {
|
||||
u8::try_from(b)
|
||||
.ok()
|
||||
.and_then(|b| Self::try_from(b).ok())
|
||||
.ok_or(b)
|
||||
u32::try_from(b).ok().and_then(Self::from_oparg).ok_or(b)
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of Raise that occurred.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum RaiseKind {
|
||||
Reraise,
|
||||
Raise,
|
||||
RaiseCause,
|
||||
}
|
||||
enum_oparg!(RaiseKind);
|
||||
|
||||
impl OpArgType for RaiseKind {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match x {
|
||||
0 => Self::Reraise,
|
||||
1 => Self::Raise,
|
||||
2 => Self::RaiseCause,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub type NameIdx = u32;
|
||||
|
||||
/// A Single bytecode instruction.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum Instruction {
|
||||
/// Importing by name
|
||||
ImportName {
|
||||
|
@ -562,7 +578,29 @@ pub enum Instruction {
|
|||
}
|
||||
const _: () = assert!(mem::size_of::<Instruction>() == 1);
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize)]
|
||||
impl From<Instruction> for u8 {
|
||||
#[inline]
|
||||
fn from(ins: Instruction) -> u8 {
|
||||
// SAFETY: there's no padding bits
|
||||
unsafe { std::mem::transmute::<Instruction, u8>(ins) }
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for Instruction {
|
||||
type Error = crate::marshal::MarshalError;
|
||||
|
||||
#[inline]
|
||||
fn try_from(value: u8) -> Result<Self, crate::marshal::MarshalError> {
|
||||
if value <= u8::from(Instruction::ExtendedArg) {
|
||||
Ok(unsafe { std::mem::transmute::<u8, Instruction>(value) })
|
||||
} else {
|
||||
Err(crate::marshal::MarshalError::InvalidBytecode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct CodeUnit {
|
||||
pub op: Instruction,
|
||||
pub arg: OpArgByte,
|
||||
|
@ -579,7 +617,6 @@ impl CodeUnit {
|
|||
use self::Instruction::*;
|
||||
|
||||
bitflags! {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct MakeFunctionFlags: u8 {
|
||||
const CLOSURE = 0x01;
|
||||
const ANNOTATIONS = 0x02;
|
||||
|
@ -607,7 +644,7 @@ impl OpArgType for MakeFunctionFlags {
|
|||
/// let b = ConstantData::Boolean {value: false};
|
||||
/// assert_ne!(a, b);
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConstantData {
|
||||
Tuple { elements: Vec<ConstantData> },
|
||||
Integer { value: BigInt },
|
||||
|
@ -677,36 +714,40 @@ pub enum BorrowedConstant<'a, C: Constant> {
|
|||
Str { value: &'a str },
|
||||
Bytes { value: &'a [u8] },
|
||||
Code { code: &'a CodeObject<C> },
|
||||
Tuple { elements: BorrowedTupleIter<'a, C> },
|
||||
Tuple { elements: &'a [C] },
|
||||
None,
|
||||
Ellipsis,
|
||||
}
|
||||
|
||||
type BorrowedTupleIter<'a, C> = Box<dyn Iterator<Item = BorrowedConstant<'a, C>> + 'a>;
|
||||
impl<C: Constant> Copy for BorrowedConstant<'_, C> {}
|
||||
impl<C: Constant> Clone for BorrowedConstant<'_, C> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Constant> BorrowedConstant<'_, C> {
|
||||
// takes `self` because we need to consume the iterator
|
||||
pub fn fmt_display(self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
pub fn fmt_display(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
BorrowedConstant::Integer { value } => write!(f, "{value}"),
|
||||
BorrowedConstant::Float { value } => write!(f, "{value}"),
|
||||
BorrowedConstant::Complex { value } => write!(f, "{value}"),
|
||||
BorrowedConstant::Boolean { value } => {
|
||||
write!(f, "{}", if value { "True" } else { "False" })
|
||||
write!(f, "{}", if *value { "True" } else { "False" })
|
||||
}
|
||||
BorrowedConstant::Str { value } => write!(f, "{value:?}"),
|
||||
BorrowedConstant::Bytes { value } => write!(f, "b{:?}", value.as_bstr()),
|
||||
BorrowedConstant::Bytes { value } => write!(f, "b\"{}\"", value.escape_ascii()),
|
||||
BorrowedConstant::Code { code } => write!(f, "{code:?}"),
|
||||
BorrowedConstant::Tuple { elements } => {
|
||||
write!(f, "(")?;
|
||||
let mut first = true;
|
||||
for c in elements {
|
||||
for c in *elements {
|
||||
if first {
|
||||
first = false
|
||||
} else {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
c.fmt_display(f)?;
|
||||
c.borrow_constant().fmt_display(f)?;
|
||||
}
|
||||
write!(f, ")")
|
||||
}
|
||||
|
@ -733,7 +774,10 @@ impl<C: Constant> BorrowedConstant<'_, C> {
|
|||
code: Box::new(code.map_clone_bag(&BasicBag)),
|
||||
},
|
||||
BorrowedConstant::Tuple { elements } => Tuple {
|
||||
elements: elements.map(BorrowedConstant::to_owned).collect(),
|
||||
elements: elements
|
||||
.iter()
|
||||
.map(|c| c.borrow_constant().to_owned())
|
||||
.collect(),
|
||||
},
|
||||
BorrowedConstant::None => None,
|
||||
BorrowedConstant::Ellipsis => Ellipsis,
|
||||
|
@ -742,7 +786,7 @@ impl<C: Constant> BorrowedConstant<'_, C> {
|
|||
}
|
||||
|
||||
/// The possible comparison operators
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum ComparisonOperator {
|
||||
// be intentional with bits so that we can do eval_ord with just a bitwise and
|
||||
|
@ -754,9 +798,25 @@ pub enum ComparisonOperator {
|
|||
LessOrEqual = 0b101,
|
||||
GreaterOrEqual = 0b110,
|
||||
}
|
||||
enum_oparg!(ComparisonOperator);
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
impl OpArgType for ComparisonOperator {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match x {
|
||||
0b001 => Self::Less,
|
||||
0b010 => Self::Greater,
|
||||
0b011 => Self::NotEqual,
|
||||
0b100 => Self::Equal,
|
||||
0b101 => Self::LessOrEqual,
|
||||
0b110 => Self::GreaterOrEqual,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum TestOperator {
|
||||
In,
|
||||
|
@ -766,7 +826,22 @@ pub enum TestOperator {
|
|||
/// two exceptions that match?
|
||||
ExceptionMatch,
|
||||
}
|
||||
enum_oparg!(TestOperator);
|
||||
|
||||
impl OpArgType for TestOperator {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match x {
|
||||
0 => Self::In,
|
||||
1 => Self::NotIn,
|
||||
2 => Self::Is,
|
||||
3 => Self::IsNot,
|
||||
4 => Self::ExceptionMatch,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The possible Binary operators
|
||||
/// # Examples
|
||||
|
@ -776,7 +851,7 @@ enum_oparg!(TestOperator);
|
|||
/// use rustpython_compiler_core::BinaryOperator::Add;
|
||||
/// let op = BinaryOperation {op: Add};
|
||||
/// ```
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum BinaryOperator {
|
||||
Power,
|
||||
|
@ -793,10 +868,33 @@ pub enum BinaryOperator {
|
|||
Xor,
|
||||
Or,
|
||||
}
|
||||
enum_oparg!(BinaryOperator);
|
||||
|
||||
impl OpArgType for BinaryOperator {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match x {
|
||||
0 => Self::Power,
|
||||
1 => Self::Multiply,
|
||||
2 => Self::MatrixMultiply,
|
||||
3 => Self::Divide,
|
||||
4 => Self::FloorDivide,
|
||||
5 => Self::Modulo,
|
||||
6 => Self::Add,
|
||||
7 => Self::Subtract,
|
||||
8 => Self::Lshift,
|
||||
9 => Self::Rshift,
|
||||
10 => Self::And,
|
||||
11 => Self::Xor,
|
||||
12 => Self::Or,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The possible unary operators
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum UnaryOperator {
|
||||
Not,
|
||||
|
@ -804,7 +902,21 @@ pub enum UnaryOperator {
|
|||
Minus,
|
||||
Plus,
|
||||
}
|
||||
enum_oparg!(UnaryOperator);
|
||||
|
||||
impl OpArgType for UnaryOperator {
|
||||
fn to_oparg(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
fn from_oparg(x: u32) -> Option<Self> {
|
||||
Some(match x {
|
||||
0 => Self::Not,
|
||||
1 => Self::Invert,
|
||||
2 => Self::Minus,
|
||||
3 => Self::Plus,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct UnpackExArgs {
|
||||
|
@ -867,10 +979,10 @@ impl<C: Constant> CodeObject<C> {
|
|||
/// Get all arguments of the code object
|
||||
/// like inspect.getargs
|
||||
pub fn arg_names(&self) -> Arguments<C::Name> {
|
||||
let nargs = self.arg_count;
|
||||
let nkwargs = self.kwonlyarg_count;
|
||||
let nargs = self.arg_count as usize;
|
||||
let nkwargs = self.kwonlyarg_count as usize;
|
||||
let mut varargspos = nargs + nkwargs;
|
||||
let posonlyargs = &self.varnames[..self.posonlyarg_count];
|
||||
let posonlyargs = &self.varnames[..self.posonlyarg_count as usize];
|
||||
let args = &self.varnames[..nargs];
|
||||
let kwonlyargs = &self.varnames[nargs..varargspos];
|
||||
|
||||
|
@ -1033,49 +1145,23 @@ impl<C: Constant> CodeObject<C> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Error that occurs during code deserialization
|
||||
#[derive(Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum CodeDeserializeError {
|
||||
/// Unexpected End Of File
|
||||
Eof,
|
||||
/// Invalid Bytecode
|
||||
Other,
|
||||
}
|
||||
|
||||
impl fmt::Display for CodeDeserializeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Eof => f.write_str("unexpected end of data"),
|
||||
Self::Other => f.write_str("invalid bytecode"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for CodeDeserializeError {}
|
||||
|
||||
impl CodeObject<ConstantData> {
|
||||
/// Load a code object from bytes
|
||||
pub fn from_bytes(data: &[u8]) -> Result<Self, CodeDeserializeError> {
|
||||
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
|
||||
use lz4_flex::block::DecompressError;
|
||||
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
|
||||
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
|
||||
CodeDeserializeError::Eof
|
||||
MarshalError::Eof
|
||||
}
|
||||
_ => CodeDeserializeError::Other,
|
||||
_ => MarshalError::InvalidBytecode,
|
||||
})?;
|
||||
let data = bincode::deserialize(&raw_bincode).map_err(|e| match *e {
|
||||
bincode::ErrorKind::Io(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
|
||||
CodeDeserializeError::Eof
|
||||
}
|
||||
_ => CodeDeserializeError::Other,
|
||||
})?;
|
||||
Ok(data)
|
||||
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
|
||||
}
|
||||
|
||||
/// Serialize this bytecode to bytes.
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let data = bincode::serialize(&self).expect("CodeObject is not serializable");
|
||||
let mut data = Vec::new();
|
||||
marshal::serialize_code(&mut data, self);
|
||||
lz4_flex::compress_prepend_size(&data)
|
||||
}
|
||||
}
|
||||
|
@ -1466,7 +1552,7 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
|
|||
}
|
||||
|
||||
/// A frozen module. Holds a code object and whether it is part of a package
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Debug)]
|
||||
pub struct FrozenModule {
|
||||
pub code: CodeObject<ConstantData>,
|
||||
pub package: bool,
|
||||
|
@ -1474,34 +1560,28 @@ pub struct FrozenModule {
|
|||
|
||||
pub mod frozen_lib {
|
||||
use super::*;
|
||||
use bincode::{options, Options};
|
||||
use std::io;
|
||||
use marshal::{Read, Write};
|
||||
|
||||
/// Decode a library to a iterable of frozen modules
|
||||
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
|
||||
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
|
||||
let r = VecReader { data, pos: 0 };
|
||||
let mut de = bincode::Deserializer::with_bincode_read(r, options());
|
||||
let len = u64::deserialize(&mut de).unwrap().try_into().unwrap();
|
||||
FrozenModulesIter { len, de }
|
||||
let mut data = marshal::Cursor { data, position: 0 };
|
||||
let remaining = data.read_u32().unwrap();
|
||||
FrozenModulesIter { remaining, data }
|
||||
}
|
||||
|
||||
pub struct FrozenModulesIter {
|
||||
len: usize,
|
||||
// ideally this could be a SeqAccess, but I think that would require existential types
|
||||
de: bincode::Deserializer<VecReader, bincode::DefaultOptions>,
|
||||
remaining: u32,
|
||||
data: marshal::Cursor<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl Iterator for FrozenModulesIter {
|
||||
type Item = (String, FrozenModule);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// manually mimic bincode's seq encoding, which is <len:u64> <element*len>
|
||||
// This probably won't change (bincode doesn't require padding or anything), but
|
||||
// it's not guaranteed by semver as far as I can tell
|
||||
if self.len > 0 {
|
||||
let entry = Deserialize::deserialize(&mut self.de).unwrap();
|
||||
self.len -= 1;
|
||||
if self.remaining > 0 {
|
||||
let entry = read_entry(&mut self.data).unwrap();
|
||||
self.remaining -= 1;
|
||||
Some(entry)
|
||||
} else {
|
||||
None
|
||||
|
@ -1509,12 +1589,19 @@ pub mod frozen_lib {
|
|||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.len, Some(self.len))
|
||||
(self.remaining as usize, Some(self.remaining as usize))
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for FrozenModulesIter {}
|
||||
|
||||
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> {
|
||||
let len = rdr.read_u32()?;
|
||||
let name = rdr.read_str(len)?.to_owned();
|
||||
let code = marshal::deserialize_code(rdr, BasicBag)?;
|
||||
let package = rdr.read_u8()? != 0;
|
||||
Ok((name, FrozenModule { code, package }))
|
||||
}
|
||||
|
||||
/// Encode the given iterator of frozen modules into a compressed vector of bytes
|
||||
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
|
||||
where
|
||||
|
@ -1522,82 +1609,25 @@ pub mod frozen_lib {
|
|||
I::IntoIter: ExactSizeIterator + Clone,
|
||||
{
|
||||
let iter = lib.into_iter();
|
||||
let data = options().serialize(&SerializeLib { iter }).unwrap();
|
||||
let mut data = Vec::new();
|
||||
write_lib(&mut data, iter);
|
||||
lz4_flex::compress_prepend_size(&data)
|
||||
}
|
||||
|
||||
struct SerializeLib<I> {
|
||||
iter: I,
|
||||
}
|
||||
|
||||
impl<'a, I> Serialize for SerializeLib<I>
|
||||
where
|
||||
I: ExactSizeIterator<Item = (&'a str, &'a FrozenModule)> + Clone,
|
||||
{
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
serializer.collect_seq(self.iter.clone())
|
||||
fn write_lib<'a>(
|
||||
buf: &mut impl Write,
|
||||
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>,
|
||||
) {
|
||||
marshal::write_len(buf, lib.len());
|
||||
for (name, module) in lib {
|
||||
write_entry(buf, name, module);
|
||||
}
|
||||
}
|
||||
|
||||
/// Owned version of bincode::de::read::SliceReader<'a>
|
||||
struct VecReader {
|
||||
data: Vec<u8>,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl io::Read for VecReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let mut subslice = &self.data[self.pos..];
|
||||
let n = io::Read::read(&mut subslice, buf)?;
|
||||
self.pos += n;
|
||||
Ok(n)
|
||||
}
|
||||
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
|
||||
self.get_byte_slice(buf.len())
|
||||
.map(|data| buf.copy_from_slice(data))
|
||||
}
|
||||
}
|
||||
|
||||
impl VecReader {
|
||||
#[inline(always)]
|
||||
fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> {
|
||||
let subslice = &self.data[self.pos..];
|
||||
match subslice.get(..length) {
|
||||
Some(ret) => {
|
||||
self.pos += length;
|
||||
Ok(ret)
|
||||
}
|
||||
None => Err(io::ErrorKind::UnexpectedEof.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'storage> bincode::BincodeRead<'storage> for VecReader {
|
||||
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
let bytes = self.get_byte_slice(length)?;
|
||||
match ::std::str::from_utf8(bytes) {
|
||||
Ok(s) => visitor.visit_str(s),
|
||||
Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_byte_buffer(&mut self, length: usize) -> bincode::Result<Vec<u8>> {
|
||||
self.get_byte_slice(length)
|
||||
.map(|x| x.to_vec())
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
|
||||
where
|
||||
V: serde::de::Visitor<'storage>,
|
||||
{
|
||||
visitor.visit_bytes(self.get_byte_slice(length)?)
|
||||
}
|
||||
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) {
|
||||
marshal::write_len(buf, name.len());
|
||||
buf.write_slice(name.as_bytes());
|
||||
marshal::serialize_code(buf, &module.code);
|
||||
buf.write_u8(module.package as u8);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::Location;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -16,7 +17,14 @@ impl<T> std::ops::Deref for BaseError<T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T> std::error::Error for BaseError<T> where T: std::fmt::Display + std::fmt::Debug {}
|
||||
impl<T> StdError for BaseError<T>
|
||||
where
|
||||
T: StdError + 'static,
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn StdError + 'static)> {
|
||||
Some(&self.error)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for BaseError<T>
|
||||
where
|
||||
|
@ -51,12 +59,18 @@ impl<T> BaseError<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[derive(Debug)]
|
||||
pub struct CompileError<T> {
|
||||
pub body: BaseError<T>,
|
||||
pub statement: Option<String>,
|
||||
}
|
||||
|
||||
impl<T: StdError + 'static> StdError for CompileError<T> {
|
||||
fn source(&self) -> Option<&(dyn StdError + 'static)> {
|
||||
self.body.source()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::ops::Deref for CompileError<T> {
|
||||
type Target = BaseError<T>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
mod bytecode;
|
||||
mod error;
|
||||
mod location;
|
||||
pub mod marshal;
|
||||
mod mode;
|
||||
|
||||
pub use bytecode::*;
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Sourcecode location.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Location {
|
||||
pub(super) row: u32,
|
||||
pub(super) column: u32,
|
||||
|
|
615
core/src/marshal.rs
Normal file
615
core/src/marshal.rs
Normal file
|
@ -0,0 +1,615 @@
|
|||
use core::fmt;
|
||||
use std::convert::Infallible;
|
||||
|
||||
use num_bigint::{BigInt, Sign};
|
||||
use num_complex::Complex64;
|
||||
|
||||
use crate::{bytecode::*, Location};
|
||||
|
||||
pub const FORMAT_VERSION: u32 = 4;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MarshalError {
|
||||
/// Unexpected End Of File
|
||||
Eof,
|
||||
/// Invalid Bytecode
|
||||
InvalidBytecode,
|
||||
/// Invalid utf8 in string
|
||||
InvalidUtf8,
|
||||
/// Bad type marker
|
||||
BadType,
|
||||
}
|
||||
|
||||
impl fmt::Display for MarshalError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Eof => f.write_str("unexpected end of data"),
|
||||
Self::InvalidBytecode => f.write_str("invalid bytecode"),
|
||||
Self::InvalidUtf8 => f.write_str("invalid utf8"),
|
||||
Self::BadType => f.write_str("bad type marker"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::str::Utf8Error> for MarshalError {
|
||||
fn from(_: std::str::Utf8Error) -> Self {
|
||||
Self::InvalidUtf8
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MarshalError {}
|
||||
|
||||
type Result<T, E = MarshalError> = std::result::Result<T, E>;
|
||||
|
||||
#[repr(u8)]
|
||||
enum Type {
|
||||
// Null = b'0',
|
||||
None = b'N',
|
||||
False = b'F',
|
||||
True = b'T',
|
||||
StopIter = b'S',
|
||||
Ellipsis = b'.',
|
||||
Int = b'i',
|
||||
Float = b'g',
|
||||
Complex = b'y',
|
||||
// Long = b'l', // i32
|
||||
Bytes = b's', // = TYPE_STRING
|
||||
// Interned = b't',
|
||||
// Ref = b'r',
|
||||
Tuple = b'(',
|
||||
List = b'[',
|
||||
Dict = b'{',
|
||||
Code = b'c',
|
||||
Unicode = b'u',
|
||||
// Unknown = b'?',
|
||||
Set = b'<',
|
||||
FrozenSet = b'>',
|
||||
Ascii = b'a',
|
||||
// AsciiInterned = b'A',
|
||||
// SmallTuple = b')',
|
||||
// ShortAscii = b'z',
|
||||
// ShortAsciiInterned = b'Z',
|
||||
}
|
||||
// const FLAG_REF: u8 = b'\x80';
|
||||
|
||||
impl TryFrom<u8> for Type {
|
||||
type Error = MarshalError;
|
||||
fn try_from(value: u8) -> Result<Self> {
|
||||
use Type::*;
|
||||
Ok(match value {
|
||||
// b'0' => Null,
|
||||
b'N' => None,
|
||||
b'F' => False,
|
||||
b'T' => True,
|
||||
b'S' => StopIter,
|
||||
b'.' => Ellipsis,
|
||||
b'i' => Int,
|
||||
b'g' => Float,
|
||||
b'y' => Complex,
|
||||
// b'l' => Long,
|
||||
b's' => Bytes,
|
||||
// b't' => Interned,
|
||||
// b'r' => Ref,
|
||||
b'(' => Tuple,
|
||||
b'[' => List,
|
||||
b'{' => Dict,
|
||||
b'c' => Code,
|
||||
b'u' => Unicode,
|
||||
// b'?' => Unknown,
|
||||
b'<' => Set,
|
||||
b'>' => FrozenSet,
|
||||
b'a' => Ascii,
|
||||
// b'A' => AsciiInterned,
|
||||
// b')' => SmallTuple,
|
||||
// b'z' => ShortAscii,
|
||||
// b'Z' => ShortAsciiInterned,
|
||||
_ => return Err(MarshalError::BadType),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Read {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]>;
|
||||
fn read_array<const N: usize>(&mut self) -> Result<&[u8; N]> {
|
||||
self.read_slice(N as u32).map(|s| s.try_into().unwrap())
|
||||
}
|
||||
fn read_str(&mut self, len: u32) -> Result<&str> {
|
||||
Ok(std::str::from_utf8(self.read_slice(len)?)?)
|
||||
}
|
||||
fn read_u8(&mut self) -> Result<u8> {
|
||||
Ok(u8::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u16(&mut self) -> Result<u16> {
|
||||
Ok(u16::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u32(&mut self) -> Result<u32> {
|
||||
Ok(u32::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u64(&mut self) -> Result<u64> {
|
||||
Ok(u64::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for &[u8] {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
||||
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
|
||||
*self = &self[n as usize..];
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Cursor<B> {
|
||||
pub data: B,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
impl<B: AsRef<[u8]>> Read for Cursor<B> {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
||||
let data = &self.data.as_ref()[self.position..];
|
||||
let slice = data.get(..n as usize).ok_or(MarshalError::Eof)?;
|
||||
self.position += n as usize;
|
||||
Ok(slice)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_code<R: Read, Bag: ConstantBag>(
|
||||
rdr: &mut R,
|
||||
bag: Bag,
|
||||
) -> Result<CodeObject<Bag::Constant>> {
|
||||
let len = rdr.read_u32()?;
|
||||
let instructions = rdr.read_slice(len * 2)?;
|
||||
let instructions = instructions
|
||||
.chunks_exact(2)
|
||||
.map(|cu| {
|
||||
let op = Instruction::try_from(cu[0])?;
|
||||
let arg = OpArgByte(cu[1]);
|
||||
Ok(CodeUnit { op, arg })
|
||||
})
|
||||
.collect::<Result<Box<[CodeUnit]>>>()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let locations = (0..len)
|
||||
.map(|_| {
|
||||
Ok(Location {
|
||||
row: rdr.read_u32()?,
|
||||
column: rdr.read_u32()?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Box<[Location]>>>()?;
|
||||
|
||||
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
|
||||
|
||||
let posonlyarg_count = rdr.read_u32()?;
|
||||
let arg_count = rdr.read_u32()?;
|
||||
let kwonlyarg_count = rdr.read_u32()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let source_path = bag.make_name(rdr.read_str(len)?);
|
||||
|
||||
let first_line_number = rdr.read_u32()?;
|
||||
let max_stackdepth = rdr.read_u32()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let obj_name = bag.make_name(rdr.read_str(len)?);
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let cell2arg = (len != 0)
|
||||
.then(|| {
|
||||
(0..len)
|
||||
.map(|_| Ok(rdr.read_u32()? as i32))
|
||||
.collect::<Result<Box<[i32]>>>()
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let constants = (0..len)
|
||||
.map(|_| deserialize_value(rdr, bag))
|
||||
.collect::<Result<Box<[_]>>>()?;
|
||||
|
||||
let mut read_names = || {
|
||||
let len = rdr.read_u32()?;
|
||||
(0..len)
|
||||
.map(|_| {
|
||||
let len = rdr.read_u32()?;
|
||||
Ok(bag.make_name(rdr.read_str(len)?))
|
||||
})
|
||||
.collect::<Result<Box<[_]>>>()
|
||||
};
|
||||
|
||||
let names = read_names()?;
|
||||
let varnames = read_names()?;
|
||||
let cellvars = read_names()?;
|
||||
let freevars = read_names()?;
|
||||
|
||||
Ok(CodeObject {
|
||||
instructions,
|
||||
locations,
|
||||
flags,
|
||||
posonlyarg_count,
|
||||
arg_count,
|
||||
kwonlyarg_count,
|
||||
source_path,
|
||||
first_line_number,
|
||||
max_stackdepth,
|
||||
obj_name,
|
||||
cell2arg,
|
||||
constants,
|
||||
names,
|
||||
varnames,
|
||||
cellvars,
|
||||
freevars,
|
||||
})
|
||||
}
|
||||
|
||||
pub trait MarshalBag: Copy {
|
||||
type Value;
|
||||
fn make_bool(&self, value: bool) -> Self::Value;
|
||||
fn make_none(&self) -> Self::Value;
|
||||
fn make_ellipsis(&self) -> Self::Value;
|
||||
fn make_float(&self, value: f64) -> Self::Value;
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value;
|
||||
fn make_str(&self, value: &str) -> Self::Value;
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value;
|
||||
fn make_int(&self, value: BigInt) -> Self::Value;
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
|
||||
fn make_code(
|
||||
&self,
|
||||
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
|
||||
) -> Self::Value;
|
||||
fn make_stop_iter(&self) -> Result<Self::Value>;
|
||||
fn make_list(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_set(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_frozenset(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_dict(
|
||||
&self,
|
||||
it: impl Iterator<Item = (Self::Value, Self::Value)>,
|
||||
) -> Result<Self::Value>;
|
||||
type ConstantBag: ConstantBag;
|
||||
fn constant_bag(self) -> Self::ConstantBag;
|
||||
}
|
||||
|
||||
impl<Bag: ConstantBag> MarshalBag for Bag {
|
||||
type Value = Bag::Constant;
|
||||
fn make_bool(&self, value: bool) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Boolean { value })
|
||||
}
|
||||
fn make_none(&self) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::None)
|
||||
}
|
||||
fn make_ellipsis(&self) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Ellipsis)
|
||||
}
|
||||
fn make_float(&self, value: f64) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Float { value })
|
||||
}
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
|
||||
}
|
||||
fn make_str(&self, value: &str) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
|
||||
}
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Bytes { value })
|
||||
}
|
||||
fn make_int(&self, value: BigInt) -> Self::Value {
|
||||
self.make_int(value)
|
||||
}
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value {
|
||||
self.make_tuple(elements)
|
||||
}
|
||||
fn make_code(
|
||||
&self,
|
||||
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
|
||||
) -> Self::Value {
|
||||
self.make_code(code)
|
||||
}
|
||||
fn make_stop_iter(&self) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_list(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_set(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_frozenset(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_dict(
|
||||
&self,
|
||||
_: impl Iterator<Item = (Self::Value, Self::Value)>,
|
||||
) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
type ConstantBag = Self;
|
||||
fn constant_bag(self) -> Self::ConstantBag {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Result<Bag::Value> {
|
||||
let typ = Type::try_from(rdr.read_u8()?)?;
|
||||
let value = match typ {
|
||||
Type::True => bag.make_bool(true),
|
||||
Type::False => bag.make_bool(false),
|
||||
Type::None => bag.make_none(),
|
||||
Type::StopIter => bag.make_stop_iter()?,
|
||||
Type::Ellipsis => bag.make_ellipsis(),
|
||||
Type::Int => {
|
||||
let len = rdr.read_u32()? as i32;
|
||||
let sign = if len < 0 { Sign::Minus } else { Sign::Plus };
|
||||
let bytes = rdr.read_slice(len.unsigned_abs())?;
|
||||
let int = BigInt::from_bytes_le(sign, bytes);
|
||||
bag.make_int(int)
|
||||
}
|
||||
Type::Float => {
|
||||
let value = f64::from_bits(rdr.read_u64()?);
|
||||
bag.make_float(value)
|
||||
}
|
||||
Type::Complex => {
|
||||
let re = f64::from_bits(rdr.read_u64()?);
|
||||
let im = f64::from_bits(rdr.read_u64()?);
|
||||
let value = Complex64 { re, im };
|
||||
bag.make_complex(value)
|
||||
}
|
||||
Type::Ascii | Type::Unicode => {
|
||||
let len = rdr.read_u32()?;
|
||||
let value = rdr.read_str(len)?;
|
||||
bag.make_str(value)
|
||||
}
|
||||
Type::Tuple => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_tuple(it))?
|
||||
}
|
||||
Type::List => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_list(it))??
|
||||
}
|
||||
Type::Set => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_set(it))??
|
||||
}
|
||||
Type::FrozenSet => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_frozenset(it))??
|
||||
}
|
||||
Type::Dict => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| {
|
||||
let k = deserialize_value(rdr, bag)?;
|
||||
let v = deserialize_value(rdr, bag)?;
|
||||
Ok::<_, MarshalError>((k, v))
|
||||
});
|
||||
itertools::process_results(it, |it| bag.make_dict(it))??
|
||||
}
|
||||
Type::Bytes => {
|
||||
// Following CPython, after marshaling, byte arrays are converted into bytes.
|
||||
let len = rdr.read_u32()?;
|
||||
let value = rdr.read_slice(len)?;
|
||||
bag.make_bytes(value)
|
||||
}
|
||||
Type::Code => bag.make_code(deserialize_code(rdr, bag.constant_bag())?),
|
||||
};
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
pub trait Dumpable: Sized {
|
||||
type Error;
|
||||
type Constant: Constant;
|
||||
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error>;
|
||||
}
|
||||
|
||||
pub enum DumpableValue<'a, D: Dumpable> {
|
||||
Integer(&'a BigInt),
|
||||
Float(f64),
|
||||
Complex(Complex64),
|
||||
Boolean(bool),
|
||||
Str(&'a str),
|
||||
Bytes(&'a [u8]),
|
||||
Code(&'a CodeObject<D::Constant>),
|
||||
Tuple(&'a [D]),
|
||||
None,
|
||||
Ellipsis,
|
||||
StopIter,
|
||||
List(&'a [D]),
|
||||
Set(&'a [D]),
|
||||
Frozenset(&'a [D]),
|
||||
Dict(&'a [(D, D)]),
|
||||
}
|
||||
|
||||
impl<'a, C: Constant> From<BorrowedConstant<'a, C>> for DumpableValue<'a, C> {
|
||||
fn from(c: BorrowedConstant<'a, C>) -> Self {
|
||||
match c {
|
||||
BorrowedConstant::Integer { value } => Self::Integer(value),
|
||||
BorrowedConstant::Float { value } => Self::Float(value),
|
||||
BorrowedConstant::Complex { value } => Self::Complex(value),
|
||||
BorrowedConstant::Boolean { value } => Self::Boolean(value),
|
||||
BorrowedConstant::Str { value } => Self::Str(value),
|
||||
BorrowedConstant::Bytes { value } => Self::Bytes(value),
|
||||
BorrowedConstant::Code { code } => Self::Code(code),
|
||||
BorrowedConstant::Tuple { elements } => Self::Tuple(elements),
|
||||
BorrowedConstant::None => Self::None,
|
||||
BorrowedConstant::Ellipsis => Self::Ellipsis,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Constant> Dumpable for C {
|
||||
type Error = Infallible;
|
||||
type Constant = Self;
|
||||
#[inline(always)]
|
||||
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error> {
|
||||
Ok(f(self.borrow_constant().into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Write {
|
||||
fn write_slice(&mut self, slice: &[u8]);
|
||||
fn write_u8(&mut self, v: u8) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u16(&mut self, v: u16) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u32(&mut self, v: u32) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u64(&mut self, v: u64) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for Vec<u8> {
|
||||
fn write_slice(&mut self, slice: &[u8]) {
|
||||
self.extend_from_slice(slice)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
|
||||
let Ok(len) = len.try_into() else { panic!("too long to serialize") };
|
||||
buf.write_u32(len);
|
||||
}
|
||||
|
||||
pub fn serialize_value<W: Write, D: Dumpable>(
|
||||
buf: &mut W,
|
||||
constant: DumpableValue<'_, D>,
|
||||
) -> Result<(), D::Error> {
|
||||
match constant {
|
||||
DumpableValue::Integer(int) => {
|
||||
buf.write_u8(Type::Int as u8);
|
||||
let (sign, bytes) = int.to_bytes_le();
|
||||
let len: i32 = bytes.len().try_into().expect("too long to serialize");
|
||||
let len = if sign == Sign::Minus { -len } else { len };
|
||||
buf.write_u32(len as u32);
|
||||
buf.write_slice(&bytes);
|
||||
}
|
||||
DumpableValue::Float(f) => {
|
||||
buf.write_u8(Type::Float as u8);
|
||||
buf.write_u64(f.to_bits());
|
||||
}
|
||||
DumpableValue::Complex(c) => {
|
||||
buf.write_u8(Type::Complex as u8);
|
||||
buf.write_u64(c.re.to_bits());
|
||||
buf.write_u64(c.im.to_bits());
|
||||
}
|
||||
DumpableValue::Boolean(b) => {
|
||||
buf.write_u8(if b { Type::True } else { Type::False } as u8);
|
||||
}
|
||||
DumpableValue::Str(s) => {
|
||||
buf.write_u8(Type::Unicode as u8);
|
||||
write_len(buf, s.len());
|
||||
buf.write_slice(s.as_bytes());
|
||||
}
|
||||
DumpableValue::Bytes(b) => {
|
||||
buf.write_u8(Type::Bytes as u8);
|
||||
write_len(buf, b.len());
|
||||
buf.write_slice(b);
|
||||
}
|
||||
DumpableValue::Code(c) => {
|
||||
buf.write_u8(Type::Code as u8);
|
||||
serialize_code(buf, c);
|
||||
}
|
||||
DumpableValue::Tuple(tup) => {
|
||||
buf.write_u8(Type::Tuple as u8);
|
||||
write_len(buf, tup.len());
|
||||
for val in tup {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::None => {
|
||||
buf.write_u8(Type::None as u8);
|
||||
}
|
||||
DumpableValue::Ellipsis => {
|
||||
buf.write_u8(Type::Ellipsis as u8);
|
||||
}
|
||||
DumpableValue::StopIter => {
|
||||
buf.write_u8(Type::StopIter as u8);
|
||||
}
|
||||
DumpableValue::List(l) => {
|
||||
buf.write_u8(Type::List as u8);
|
||||
write_len(buf, l.len());
|
||||
for val in l {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Set(set) => {
|
||||
buf.write_u8(Type::Set as u8);
|
||||
write_len(buf, set.len());
|
||||
for val in set {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Frozenset(set) => {
|
||||
buf.write_u8(Type::FrozenSet as u8);
|
||||
write_len(buf, set.len());
|
||||
for val in set {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Dict(d) => {
|
||||
buf.write_u8(Type::Dict as u8);
|
||||
write_len(buf, d.len());
|
||||
for (k, v) in d {
|
||||
k.with_dump(|val| serialize_value(buf, val))??;
|
||||
v.with_dump(|val| serialize_value(buf, val))??;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
|
||||
write_len(buf, code.instructions.len());
|
||||
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
|
||||
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
|
||||
buf.write_slice(instructions_bytes);
|
||||
|
||||
write_len(buf, code.locations.len());
|
||||
for loc in &*code.locations {
|
||||
buf.write_u32(loc.row);
|
||||
buf.write_u32(loc.column);
|
||||
}
|
||||
|
||||
buf.write_u16(code.flags.bits());
|
||||
|
||||
buf.write_u32(code.posonlyarg_count);
|
||||
buf.write_u32(code.arg_count);
|
||||
buf.write_u32(code.kwonlyarg_count);
|
||||
|
||||
write_len(buf, code.source_path.as_ref().len());
|
||||
buf.write_slice(code.source_path.as_ref().as_bytes());
|
||||
|
||||
buf.write_u32(code.first_line_number);
|
||||
buf.write_u32(code.max_stackdepth);
|
||||
|
||||
write_len(buf, code.obj_name.as_ref().len());
|
||||
buf.write_slice(code.obj_name.as_ref().as_bytes());
|
||||
|
||||
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
|
||||
write_len(buf, cell2arg.len());
|
||||
for &i in cell2arg {
|
||||
buf.write_u32(i as u32)
|
||||
}
|
||||
|
||||
write_len(buf, code.constants.len());
|
||||
for constant in &*code.constants {
|
||||
serialize_value(buf, constant.borrow_constant().into()).unwrap_or_else(|x| match x {})
|
||||
}
|
||||
|
||||
let mut write_names = |names: &[C::Name]| {
|
||||
write_len(buf, names.len());
|
||||
for name in names {
|
||||
write_len(buf, name.as_ref().len());
|
||||
buf.write_slice(name.as_ref().as_bytes());
|
||||
}
|
||||
};
|
||||
|
||||
write_names(&code.names);
|
||||
write_names(&code.varnames);
|
||||
write_names(&code.cellvars);
|
||||
write_names(&code.freevars);
|
||||
}
|
|
@ -26,7 +26,6 @@ itertools = { workspace = true }
|
|||
log = { workspace = true }
|
||||
num-bigint = { workspace = true }
|
||||
num-traits = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
unicode_names2 = { workspace = true }
|
||||
|
||||
unic-emoji-char = "0.9.0"
|
||||
|
|
|
@ -200,7 +200,7 @@ pub fn parse_tokens(
|
|||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||
|
||||
/// Represents the different types of errors that can occur during parsing.
|
||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ParseErrorType {
|
||||
/// Parser encountered an unexpected end of input
|
||||
Eof,
|
||||
|
@ -215,6 +215,8 @@ pub enum ParseErrorType {
|
|||
Lexical(LexicalErrorType),
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseErrorType {}
|
||||
|
||||
// Convert `lalrpop_util::ParseError` to our internal type
|
||||
fn parse_error_from_lalrpop(
|
||||
err: LalrpopError<Location, Tok, LexicalError>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue