Custom marshal enc/decoding impl

This commit is contained in:
Noa 2022-12-15 13:25:02 -06:00
parent 50b5388711
commit 41b465dee1
14 changed files with 913 additions and 229 deletions

View file

@ -9,5 +9,3 @@ edition = "2021"
rustpython-compiler-core = { path = "core" }
rustpython-codegen = { path = "codegen" }
rustpython-parser = { path = "parser" }
thiserror = { workspace = true }

View file

@ -16,9 +16,8 @@ bitflags = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
num-complex = { workspace = true, features = ["serde"] }
num-complex = { workspace = true }
num-traits = { workspace = true }
thiserror = { workspace = true }
[dev-dependencies]
rustpython-parser = { path = "../parser" }

View file

@ -249,9 +249,9 @@ impl Compiler {
fn push_output(
&mut self,
flags: bytecode::CodeFlags,
posonlyarg_count: usize,
arg_count: usize,
kwonlyarg_count: usize,
posonlyarg_count: u32,
arg_count: u32,
kwonlyarg_count: u32,
obj_name: String,
) {
let source_path = self.source_path.clone();
@ -936,9 +936,11 @@ impl Compiler {
self.push_output(
bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED,
args.posonlyargs.len(),
args.posonlyargs.len() + args.args.len(),
args.kwonlyargs.len(),
args.posonlyargs.len().try_into().unwrap(),
(args.posonlyargs.len() + args.args.len())
.try_into()
.unwrap(),
args.kwonlyargs.len().try_into().unwrap(),
name.to_owned(),
);
@ -2750,8 +2752,8 @@ impl Compiler {
self.current_source_location = location;
}
fn get_source_line_number(&self) -> usize {
self.current_source_location.row()
fn get_source_line_number(&self) -> u32 {
self.current_source_location.row() as u32
}
fn push_qualified_path(&mut self, name: &str) {

View file

@ -2,7 +2,7 @@ use std::fmt;
pub type CodegenError = rustpython_compiler_core::BaseError<CodegenErrorType>;
#[derive(Debug, thiserror::Error)]
#[derive(Debug)]
#[non_exhaustive]
pub enum CodegenErrorType {
/// Invalid assignment, cannot store value in target.
@ -33,6 +33,8 @@ pub enum CodegenErrorType {
NotImplementedYet, // RustPython marker for unimplemented features
}
impl std::error::Error for CodegenErrorType {}
impl fmt::Display for CodegenErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use CodegenErrorType::*;

View file

@ -63,11 +63,11 @@ impl Default for Block {
pub struct CodeInfo {
pub flags: CodeFlags,
pub posonlyarg_count: usize, // Number of positional-only arguments
pub arg_count: usize,
pub kwonlyarg_count: usize,
pub posonlyarg_count: u32, // Number of positional-only arguments
pub arg_count: u32,
pub kwonlyarg_count: u32,
pub source_path: String,
pub first_line_number: usize,
pub first_line_number: u32,
pub obj_name: String, // Name of the object that created this code object
pub blocks: Vec<Block>,
@ -172,15 +172,15 @@ impl CodeInfo {
}
}
fn cell2arg(&self) -> Option<Box<[isize]>> {
fn cell2arg(&self) -> Option<Box<[i32]>> {
if self.cellvar_cache.is_empty() {
return None;
}
let total_args = self.arg_count
+ self.kwonlyarg_count
+ self.flags.contains(CodeFlags::HAS_VARARGS) as usize
+ self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as usize;
+ self.flags.contains(CodeFlags::HAS_VARARGS) as u32
+ self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as u32;
let mut found_cellarg = false;
let cell2arg = self
@ -190,10 +190,10 @@ impl CodeInfo {
self.varname_cache
.get_index_of(var)
// check that it's actually an arg
.filter(|i| *i < total_args)
.filter(|i| *i < total_args as usize)
.map_or(-1, |i| {
found_cellarg = true;
i as isize
i as i32
})
})
.collect::<Box<[_]>>();

View file

@ -8,14 +8,10 @@ repository = "https://github.com/RustPython/RustPython"
license = "MIT"
[dependencies]
bincode = { workspace = true }
bitflags = { workspace = true }
bstr = { workspace = true }
itertools = { workspace = true }
num-bigint = { workspace = true, features = ["serde"] }
num-complex = { workspace = true, features = ["serde"] }
num_enum = { workspace = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
num-bigint = { workspace = true }
num-complex = { workspace = true }
lz4_flex = "0.9.2"

View file

@ -1,14 +1,12 @@
//! Implement python as a virtual machine with bytecodes. This module
//! implements bytecode structure.
use crate::Location;
use crate::marshal::MarshalError;
use crate::{marshal, Location};
use bitflags::bitflags;
use bstr::ByteSlice;
use itertools::Itertools;
use num_bigint::BigInt;
use num_complex::Complex64;
use num_enum::{IntoPrimitive, TryFromPrimitive};
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use std::{collections::BTreeSet, fmt, hash, mem};
@ -31,9 +29,7 @@ impl Constant for ConstantData {
ConstantData::Str { value } => Str { value },
ConstantData::Bytes { value } => Bytes { value },
ConstantData::Code { code } => Code { code },
ConstantData::Tuple { elements } => Tuple {
elements: Box::new(elements.iter().map(|e| e.borrow_constant())),
},
ConstantData::Tuple { elements } => Tuple { elements },
ConstantData::None => None,
ConstantData::Ellipsis => Ellipsis,
}
@ -44,6 +40,9 @@ impl Constant for ConstantData {
pub trait ConstantBag: Sized + Copy {
type Constant: Constant;
fn make_constant<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant;
fn make_int(&self, value: BigInt) -> Self::Constant;
fn make_tuple(&self, elements: impl Iterator<Item = Self::Constant>) -> Self::Constant;
fn make_code(&self, code: CodeObject<Self::Constant>) -> Self::Constant;
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
}
@ -55,6 +54,19 @@ impl ConstantBag for BasicBag {
fn make_constant<C: Constant>(&self, constant: BorrowedConstant<C>) -> Self::Constant {
constant.to_owned()
}
fn make_int(&self, value: BigInt) -> Self::Constant {
ConstantData::Integer { value }
}
fn make_tuple(&self, elements: impl Iterator<Item = Self::Constant>) -> Self::Constant {
ConstantData::Tuple {
elements: elements.collect(),
}
}
fn make_code(&self, code: CodeObject<Self::Constant>) -> Self::Constant {
ConstantData::Code {
code: Box::new(code),
}
}
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name {
name.to_owned()
}
@ -62,26 +74,22 @@ impl ConstantBag for BasicBag {
/// Primary container of a single code object. Each python function has
/// a codeobject. Also a module has a codeobject.
#[derive(Clone, Serialize, Deserialize)]
#[derive(Clone)]
pub struct CodeObject<C: Constant = ConstantData> {
pub instructions: Box<[CodeUnit]>,
pub locations: Box<[Location]>,
pub flags: CodeFlags,
pub posonlyarg_count: usize,
pub posonlyarg_count: u32,
// Number of positional-only arguments
pub arg_count: usize,
pub kwonlyarg_count: usize,
pub arg_count: u32,
pub kwonlyarg_count: u32,
pub source_path: C::Name,
pub first_line_number: usize,
pub first_line_number: u32,
pub max_stackdepth: u32,
pub obj_name: C::Name,
// Name of the object that created this code object
pub cell2arg: Option<Box<[isize]>>,
pub cell2arg: Option<Box<[i32]>>,
pub constants: Box<[C]>,
#[serde(bound(
deserialize = "C::Name: serde::Deserialize<'de>",
serialize = "C::Name: serde::Serialize"
))]
pub names: Box<[C::Name]>,
pub varnames: Box<[C::Name]>,
pub cellvars: Box<[C::Name]>,
@ -89,7 +97,6 @@ pub struct CodeObject<C: Constant = ConstantData> {
}
bitflags! {
#[derive(Serialize, Deserialize)]
pub struct CodeFlags: u16 {
const NEW_LOCALS = 0x01;
const IS_GENERATOR = 0x02;
@ -114,7 +121,7 @@ impl CodeFlags {
}
/// an opcode argument that may be extended by a prior ExtendedArg
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct OpArgByte(pub u8);
impl OpArgByte {
@ -211,23 +218,7 @@ impl OpArgType for bool {
}
}
macro_rules! enum_oparg {
($t:ident) => {
impl OpArgType for $t {
#[inline(always)]
fn from_oparg(x: u32) -> Option<Self> {
$t::try_from_primitive(x as _).ok()
}
#[inline(always)]
fn to_oparg(self) -> u32 {
u8::from(self).into()
}
}
};
}
#[derive(Copy, Clone, Serialize, Deserialize)]
#[serde(bound = "")]
#[derive(Copy, Clone)]
pub struct Arg<T: OpArgType>(PhantomData<T>);
impl<T: OpArgType> Arg<T> {
@ -302,7 +293,7 @@ impl fmt::Display for Label {
}
/// Transforms a value prior to formatting it.
#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum ConversionFlag {
/// No conversion
@ -314,32 +305,57 @@ pub enum ConversionFlag {
/// Converts by calling `repr(<value>)`.
Repr = b'r',
}
enum_oparg!(ConversionFlag);
impl OpArgType for ConversionFlag {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match u8::try_from(x).ok()? {
0 => Self::None,
b's' => Self::Str,
b'a' => Self::Ascii,
b'r' => Self::Repr,
_ => return None,
})
}
}
impl TryFrom<usize> for ConversionFlag {
type Error = usize;
fn try_from(b: usize) -> Result<Self, Self::Error> {
u8::try_from(b)
.ok()
.and_then(|b| Self::try_from(b).ok())
.ok_or(b)
u32::try_from(b).ok().and_then(Self::from_oparg).ok_or(b)
}
}
/// The kind of Raise that occurred.
#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum RaiseKind {
Reraise,
Raise,
RaiseCause,
}
enum_oparg!(RaiseKind);
impl OpArgType for RaiseKind {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match x {
0 => Self::Reraise,
1 => Self::Raise,
2 => Self::RaiseCause,
_ => return None,
})
}
}
pub type NameIdx = u32;
/// A Single bytecode instruction.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
pub enum Instruction {
/// Importing by name
ImportName {
@ -562,7 +578,29 @@ pub enum Instruction {
}
const _: () = assert!(mem::size_of::<Instruction>() == 1);
#[derive(Copy, Clone, Serialize, Deserialize)]
impl From<Instruction> for u8 {
#[inline]
fn from(ins: Instruction) -> u8 {
// SAFETY: there's no padding bits
unsafe { std::mem::transmute::<Instruction, u8>(ins) }
}
}
impl TryFrom<u8> for Instruction {
type Error = crate::marshal::MarshalError;
#[inline]
fn try_from(value: u8) -> Result<Self, crate::marshal::MarshalError> {
if value <= u8::from(Instruction::ExtendedArg) {
Ok(unsafe { std::mem::transmute::<u8, Instruction>(value) })
} else {
Err(crate::marshal::MarshalError::InvalidBytecode)
}
}
}
#[derive(Copy, Clone)]
#[repr(C)]
pub struct CodeUnit {
pub op: Instruction,
pub arg: OpArgByte,
@ -579,7 +617,6 @@ impl CodeUnit {
use self::Instruction::*;
bitflags! {
#[derive(Serialize, Deserialize)]
pub struct MakeFunctionFlags: u8 {
const CLOSURE = 0x01;
const ANNOTATIONS = 0x02;
@ -607,7 +644,7 @@ impl OpArgType for MakeFunctionFlags {
/// let b = ConstantData::Boolean {value: false};
/// assert_ne!(a, b);
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone)]
pub enum ConstantData {
Tuple { elements: Vec<ConstantData> },
Integer { value: BigInt },
@ -677,36 +714,40 @@ pub enum BorrowedConstant<'a, C: Constant> {
Str { value: &'a str },
Bytes { value: &'a [u8] },
Code { code: &'a CodeObject<C> },
Tuple { elements: BorrowedTupleIter<'a, C> },
Tuple { elements: &'a [C] },
None,
Ellipsis,
}
type BorrowedTupleIter<'a, C> = Box<dyn Iterator<Item = BorrowedConstant<'a, C>> + 'a>;
impl<C: Constant> Copy for BorrowedConstant<'_, C> {}
impl<C: Constant> Clone for BorrowedConstant<'_, C> {
fn clone(&self) -> Self {
*self
}
}
impl<C: Constant> BorrowedConstant<'_, C> {
// takes `self` because we need to consume the iterator
pub fn fmt_display(self, f: &mut fmt::Formatter) -> fmt::Result {
pub fn fmt_display(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
BorrowedConstant::Integer { value } => write!(f, "{value}"),
BorrowedConstant::Float { value } => write!(f, "{value}"),
BorrowedConstant::Complex { value } => write!(f, "{value}"),
BorrowedConstant::Boolean { value } => {
write!(f, "{}", if value { "True" } else { "False" })
write!(f, "{}", if *value { "True" } else { "False" })
}
BorrowedConstant::Str { value } => write!(f, "{value:?}"),
BorrowedConstant::Bytes { value } => write!(f, "b{:?}", value.as_bstr()),
BorrowedConstant::Bytes { value } => write!(f, "b\"{}\"", value.escape_ascii()),
BorrowedConstant::Code { code } => write!(f, "{code:?}"),
BorrowedConstant::Tuple { elements } => {
write!(f, "(")?;
let mut first = true;
for c in elements {
for c in *elements {
if first {
first = false
} else {
write!(f, ", ")?;
}
c.fmt_display(f)?;
c.borrow_constant().fmt_display(f)?;
}
write!(f, ")")
}
@ -733,7 +774,10 @@ impl<C: Constant> BorrowedConstant<'_, C> {
code: Box::new(code.map_clone_bag(&BasicBag)),
},
BorrowedConstant::Tuple { elements } => Tuple {
elements: elements.map(BorrowedConstant::to_owned).collect(),
elements: elements
.iter()
.map(|c| c.borrow_constant().to_owned())
.collect(),
},
BorrowedConstant::None => None,
BorrowedConstant::Ellipsis => Ellipsis,
@ -742,7 +786,7 @@ impl<C: Constant> BorrowedConstant<'_, C> {
}
/// The possible comparison operators
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
pub enum ComparisonOperator {
// be intentional with bits so that we can do eval_ord with just a bitwise and
@ -754,9 +798,25 @@ pub enum ComparisonOperator {
LessOrEqual = 0b101,
GreaterOrEqual = 0b110,
}
enum_oparg!(ComparisonOperator);
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
impl OpArgType for ComparisonOperator {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match x {
0b001 => Self::Less,
0b010 => Self::Greater,
0b011 => Self::NotEqual,
0b100 => Self::Equal,
0b101 => Self::LessOrEqual,
0b110 => Self::GreaterOrEqual,
_ => return None,
})
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
pub enum TestOperator {
In,
@ -766,7 +826,22 @@ pub enum TestOperator {
/// two exceptions that match?
ExceptionMatch,
}
enum_oparg!(TestOperator);
impl OpArgType for TestOperator {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match x {
0 => Self::In,
1 => Self::NotIn,
2 => Self::Is,
3 => Self::IsNot,
4 => Self::ExceptionMatch,
_ => return None,
})
}
}
/// The possible Binary operators
/// # Examples
@ -776,7 +851,7 @@ enum_oparg!(TestOperator);
/// use rustpython_compiler_core::BinaryOperator::Add;
/// let op = BinaryOperation {op: Add};
/// ```
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
pub enum BinaryOperator {
Power,
@ -793,10 +868,33 @@ pub enum BinaryOperator {
Xor,
Or,
}
enum_oparg!(BinaryOperator);
impl OpArgType for BinaryOperator {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match x {
0 => Self::Power,
1 => Self::Multiply,
2 => Self::MatrixMultiply,
3 => Self::Divide,
4 => Self::FloorDivide,
5 => Self::Modulo,
6 => Self::Add,
7 => Self::Subtract,
8 => Self::Lshift,
9 => Self::Rshift,
10 => Self::And,
11 => Self::Xor,
12 => Self::Or,
_ => return None,
})
}
}
/// The possible unary operators
#[derive(Debug, Copy, Clone, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
pub enum UnaryOperator {
Not,
@ -804,7 +902,21 @@ pub enum UnaryOperator {
Minus,
Plus,
}
enum_oparg!(UnaryOperator);
impl OpArgType for UnaryOperator {
fn to_oparg(self) -> u32 {
self as u32
}
fn from_oparg(x: u32) -> Option<Self> {
Some(match x {
0 => Self::Not,
1 => Self::Invert,
2 => Self::Minus,
3 => Self::Plus,
_ => return None,
})
}
}
#[derive(Copy, Clone)]
pub struct UnpackExArgs {
@ -867,10 +979,10 @@ impl<C: Constant> CodeObject<C> {
/// Get all arguments of the code object
/// like inspect.getargs
pub fn arg_names(&self) -> Arguments<C::Name> {
let nargs = self.arg_count;
let nkwargs = self.kwonlyarg_count;
let nargs = self.arg_count as usize;
let nkwargs = self.kwonlyarg_count as usize;
let mut varargspos = nargs + nkwargs;
let posonlyargs = &self.varnames[..self.posonlyarg_count];
let posonlyargs = &self.varnames[..self.posonlyarg_count as usize];
let args = &self.varnames[..nargs];
let kwonlyargs = &self.varnames[nargs..varargspos];
@ -1033,49 +1145,23 @@ impl<C: Constant> CodeObject<C> {
}
}
/// Error that occurs during code deserialization
#[derive(Debug)]
#[non_exhaustive]
pub enum CodeDeserializeError {
/// Unexpected End Of File
Eof,
/// Invalid Bytecode
Other,
}
impl fmt::Display for CodeDeserializeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Eof => f.write_str("unexpected end of data"),
Self::Other => f.write_str("invalid bytecode"),
}
}
}
impl std::error::Error for CodeDeserializeError {}
impl CodeObject<ConstantData> {
/// Load a code object from bytes
pub fn from_bytes(data: &[u8]) -> Result<Self, CodeDeserializeError> {
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
use lz4_flex::block::DecompressError;
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
CodeDeserializeError::Eof
MarshalError::Eof
}
_ => CodeDeserializeError::Other,
_ => MarshalError::InvalidBytecode,
})?;
let data = bincode::deserialize(&raw_bincode).map_err(|e| match *e {
bincode::ErrorKind::Io(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
CodeDeserializeError::Eof
}
_ => CodeDeserializeError::Other,
})?;
Ok(data)
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
}
/// Serialize this bytecode to bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let data = bincode::serialize(&self).expect("CodeObject is not serializable");
let mut data = Vec::new();
marshal::serialize_code(&mut data, self);
lz4_flex::compress_prepend_size(&data)
}
}
@ -1466,7 +1552,7 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
}
/// A frozen module. Holds a code object and whether it is part of a package
#[derive(Serialize, Deserialize, Debug)]
#[derive(Debug)]
pub struct FrozenModule {
pub code: CodeObject<ConstantData>,
pub package: bool,
@ -1474,34 +1560,28 @@ pub struct FrozenModule {
pub mod frozen_lib {
use super::*;
use bincode::{options, Options};
use std::io;
use marshal::{Read, Write};
/// Decode a library to a iterable of frozen modules
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
let r = VecReader { data, pos: 0 };
let mut de = bincode::Deserializer::with_bincode_read(r, options());
let len = u64::deserialize(&mut de).unwrap().try_into().unwrap();
FrozenModulesIter { len, de }
let mut data = marshal::Cursor { data, position: 0 };
let remaining = data.read_u32().unwrap();
FrozenModulesIter { remaining, data }
}
pub struct FrozenModulesIter {
len: usize,
// ideally this could be a SeqAccess, but I think that would require existential types
de: bincode::Deserializer<VecReader, bincode::DefaultOptions>,
remaining: u32,
data: marshal::Cursor<Vec<u8>>,
}
impl Iterator for FrozenModulesIter {
type Item = (String, FrozenModule);
fn next(&mut self) -> Option<Self::Item> {
// manually mimic bincode's seq encoding, which is <len:u64> <element*len>
// This probably won't change (bincode doesn't require padding or anything), but
// it's not guaranteed by semver as far as I can tell
if self.len > 0 {
let entry = Deserialize::deserialize(&mut self.de).unwrap();
self.len -= 1;
if self.remaining > 0 {
let entry = read_entry(&mut self.data).unwrap();
self.remaining -= 1;
Some(entry)
} else {
None
@ -1509,12 +1589,19 @@ pub mod frozen_lib {
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.len, Some(self.len))
(self.remaining as usize, Some(self.remaining as usize))
}
}
impl ExactSizeIterator for FrozenModulesIter {}
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> {
let len = rdr.read_u32()?;
let name = rdr.read_str(len)?.to_owned();
let code = marshal::deserialize_code(rdr, BasicBag)?;
let package = rdr.read_u8()? != 0;
Ok((name, FrozenModule { code, package }))
}
/// Encode the given iterator of frozen modules into a compressed vector of bytes
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
where
@ -1522,82 +1609,25 @@ pub mod frozen_lib {
I::IntoIter: ExactSizeIterator + Clone,
{
let iter = lib.into_iter();
let data = options().serialize(&SerializeLib { iter }).unwrap();
let mut data = Vec::new();
write_lib(&mut data, iter);
lz4_flex::compress_prepend_size(&data)
}
struct SerializeLib<I> {
iter: I,
}
impl<'a, I> Serialize for SerializeLib<I>
where
I: ExactSizeIterator<Item = (&'a str, &'a FrozenModule)> + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.collect_seq(self.iter.clone())
fn write_lib<'a>(
buf: &mut impl Write,
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>,
) {
marshal::write_len(buf, lib.len());
for (name, module) in lib {
write_entry(buf, name, module);
}
}
/// Owned version of bincode::de::read::SliceReader<'a>
struct VecReader {
data: Vec<u8>,
pos: usize,
}
impl io::Read for VecReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut subslice = &self.data[self.pos..];
let n = io::Read::read(&mut subslice, buf)?;
self.pos += n;
Ok(n)
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.get_byte_slice(buf.len())
.map(|data| buf.copy_from_slice(data))
}
}
impl VecReader {
#[inline(always)]
fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> {
let subslice = &self.data[self.pos..];
match subslice.get(..length) {
Some(ret) => {
self.pos += length;
Ok(ret)
}
None => Err(io::ErrorKind::UnexpectedEof.into()),
}
}
}
impl<'storage> bincode::BincodeRead<'storage> for VecReader {
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
let bytes = self.get_byte_slice(length)?;
match ::std::str::from_utf8(bytes) {
Ok(s) => visitor.visit_str(s),
Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()),
}
}
fn get_byte_buffer(&mut self, length: usize) -> bincode::Result<Vec<u8>> {
self.get_byte_slice(length)
.map(|x| x.to_vec())
.map_err(Into::into)
}
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
visitor.visit_bytes(self.get_byte_slice(length)?)
}
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) {
marshal::write_len(buf, name.len());
buf.write_slice(name.as_bytes());
marshal::serialize_code(buf, &module.code);
buf.write_u8(module.package as u8);
}
}

View file

@ -1,4 +1,5 @@
use crate::Location;
use std::error::Error as StdError;
use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)]
@ -16,7 +17,14 @@ impl<T> std::ops::Deref for BaseError<T> {
}
}
impl<T> std::error::Error for BaseError<T> where T: std::fmt::Display + std::fmt::Debug {}
impl<T> StdError for BaseError<T>
where
T: StdError + 'static,
{
fn source(&self) -> Option<&(dyn StdError + 'static)> {
Some(&self.error)
}
}
impl<T> Display for BaseError<T>
where
@ -51,12 +59,18 @@ impl<T> BaseError<T> {
}
}
#[derive(Debug, thiserror::Error)]
#[derive(Debug)]
pub struct CompileError<T> {
pub body: BaseError<T>,
pub statement: Option<String>,
}
impl<T: StdError + 'static> StdError for CompileError<T> {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
self.body.source()
}
}
impl<T> std::ops::Deref for CompileError<T> {
type Target = BaseError<T>;
fn deref(&self) -> &Self::Target {

View file

@ -4,6 +4,7 @@
mod bytecode;
mod error;
mod location;
pub mod marshal;
mod mode;
pub use bytecode::*;

View file

@ -1,7 +1,5 @@
use serde::{Deserialize, Serialize};
/// Sourcecode location.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Location {
pub(super) row: u32,
pub(super) column: u32,

615
core/src/marshal.rs Normal file
View file

@ -0,0 +1,615 @@
use core::fmt;
use std::convert::Infallible;
use num_bigint::{BigInt, Sign};
use num_complex::Complex64;
use crate::{bytecode::*, Location};
pub const FORMAT_VERSION: u32 = 4;
#[derive(Debug)]
pub enum MarshalError {
/// Unexpected End Of File
Eof,
/// Invalid Bytecode
InvalidBytecode,
/// Invalid utf8 in string
InvalidUtf8,
/// Bad type marker
BadType,
}
impl fmt::Display for MarshalError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Eof => f.write_str("unexpected end of data"),
Self::InvalidBytecode => f.write_str("invalid bytecode"),
Self::InvalidUtf8 => f.write_str("invalid utf8"),
Self::BadType => f.write_str("bad type marker"),
}
}
}
impl From<std::str::Utf8Error> for MarshalError {
fn from(_: std::str::Utf8Error) -> Self {
Self::InvalidUtf8
}
}
impl std::error::Error for MarshalError {}
type Result<T, E = MarshalError> = std::result::Result<T, E>;
#[repr(u8)]
enum Type {
// Null = b'0',
None = b'N',
False = b'F',
True = b'T',
StopIter = b'S',
Ellipsis = b'.',
Int = b'i',
Float = b'g',
Complex = b'y',
// Long = b'l', // i32
Bytes = b's', // = TYPE_STRING
// Interned = b't',
// Ref = b'r',
Tuple = b'(',
List = b'[',
Dict = b'{',
Code = b'c',
Unicode = b'u',
// Unknown = b'?',
Set = b'<',
FrozenSet = b'>',
Ascii = b'a',
// AsciiInterned = b'A',
// SmallTuple = b')',
// ShortAscii = b'z',
// ShortAsciiInterned = b'Z',
}
// const FLAG_REF: u8 = b'\x80';
impl TryFrom<u8> for Type {
type Error = MarshalError;
fn try_from(value: u8) -> Result<Self> {
use Type::*;
Ok(match value {
// b'0' => Null,
b'N' => None,
b'F' => False,
b'T' => True,
b'S' => StopIter,
b'.' => Ellipsis,
b'i' => Int,
b'g' => Float,
b'y' => Complex,
// b'l' => Long,
b's' => Bytes,
// b't' => Interned,
// b'r' => Ref,
b'(' => Tuple,
b'[' => List,
b'{' => Dict,
b'c' => Code,
b'u' => Unicode,
// b'?' => Unknown,
b'<' => Set,
b'>' => FrozenSet,
b'a' => Ascii,
// b'A' => AsciiInterned,
// b')' => SmallTuple,
// b'z' => ShortAscii,
// b'Z' => ShortAsciiInterned,
_ => return Err(MarshalError::BadType),
})
}
}
pub trait Read {
fn read_slice(&mut self, n: u32) -> Result<&[u8]>;
fn read_array<const N: usize>(&mut self) -> Result<&[u8; N]> {
self.read_slice(N as u32).map(|s| s.try_into().unwrap())
}
fn read_str(&mut self, len: u32) -> Result<&str> {
Ok(std::str::from_utf8(self.read_slice(len)?)?)
}
fn read_u8(&mut self) -> Result<u8> {
Ok(u8::from_le_bytes(*self.read_array()?))
}
fn read_u16(&mut self) -> Result<u16> {
Ok(u16::from_le_bytes(*self.read_array()?))
}
fn read_u32(&mut self) -> Result<u32> {
Ok(u32::from_le_bytes(*self.read_array()?))
}
fn read_u64(&mut self) -> Result<u64> {
Ok(u64::from_le_bytes(*self.read_array()?))
}
}
impl Read for &[u8] {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
*self = &self[n as usize..];
Ok(data)
}
}
pub struct Cursor<B> {
pub data: B,
pub position: usize,
}
impl<B: AsRef<[u8]>> Read for Cursor<B> {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
let data = &self.data.as_ref()[self.position..];
let slice = data.get(..n as usize).ok_or(MarshalError::Eof)?;
self.position += n as usize;
Ok(slice)
}
}
pub fn deserialize_code<R: Read, Bag: ConstantBag>(
rdr: &mut R,
bag: Bag,
) -> Result<CodeObject<Bag::Constant>> {
let len = rdr.read_u32()?;
let instructions = rdr.read_slice(len * 2)?;
let instructions = instructions
.chunks_exact(2)
.map(|cu| {
let op = Instruction::try_from(cu[0])?;
let arg = OpArgByte(cu[1]);
Ok(CodeUnit { op, arg })
})
.collect::<Result<Box<[CodeUnit]>>>()?;
let len = rdr.read_u32()?;
let locations = (0..len)
.map(|_| {
Ok(Location {
row: rdr.read_u32()?,
column: rdr.read_u32()?,
})
})
.collect::<Result<Box<[Location]>>>()?;
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
let posonlyarg_count = rdr.read_u32()?;
let arg_count = rdr.read_u32()?;
let kwonlyarg_count = rdr.read_u32()?;
let len = rdr.read_u32()?;
let source_path = bag.make_name(rdr.read_str(len)?);
let first_line_number = rdr.read_u32()?;
let max_stackdepth = rdr.read_u32()?;
let len = rdr.read_u32()?;
let obj_name = bag.make_name(rdr.read_str(len)?);
let len = rdr.read_u32()?;
let cell2arg = (len != 0)
.then(|| {
(0..len)
.map(|_| Ok(rdr.read_u32()? as i32))
.collect::<Result<Box<[i32]>>>()
})
.transpose()?;
let len = rdr.read_u32()?;
let constants = (0..len)
.map(|_| deserialize_value(rdr, bag))
.collect::<Result<Box<[_]>>>()?;
let mut read_names = || {
let len = rdr.read_u32()?;
(0..len)
.map(|_| {
let len = rdr.read_u32()?;
Ok(bag.make_name(rdr.read_str(len)?))
})
.collect::<Result<Box<[_]>>>()
};
let names = read_names()?;
let varnames = read_names()?;
let cellvars = read_names()?;
let freevars = read_names()?;
Ok(CodeObject {
instructions,
locations,
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
max_stackdepth,
obj_name,
cell2arg,
constants,
names,
varnames,
cellvars,
freevars,
})
}
pub trait MarshalBag: Copy {
type Value;
fn make_bool(&self, value: bool) -> Self::Value;
fn make_none(&self) -> Self::Value;
fn make_ellipsis(&self) -> Self::Value;
fn make_float(&self, value: f64) -> Self::Value;
fn make_complex(&self, value: Complex64) -> Self::Value;
fn make_str(&self, value: &str) -> Self::Value;
fn make_bytes(&self, value: &[u8]) -> Self::Value;
fn make_int(&self, value: BigInt) -> Self::Value;
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value;
fn make_stop_iter(&self) -> Result<Self::Value>;
fn make_list(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_set(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_frozenset(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_dict(
&self,
it: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value>;
type ConstantBag: ConstantBag;
fn constant_bag(self) -> Self::ConstantBag;
}
impl<Bag: ConstantBag> MarshalBag for Bag {
type Value = Bag::Constant;
fn make_bool(&self, value: bool) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Boolean { value })
}
fn make_none(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::None)
}
fn make_ellipsis(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Ellipsis)
}
fn make_float(&self, value: f64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Float { value })
}
fn make_complex(&self, value: Complex64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
}
fn make_str(&self, value: &str) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
}
fn make_bytes(&self, value: &[u8]) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Bytes { value })
}
fn make_int(&self, value: BigInt) -> Self::Value {
self.make_int(value)
}
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value {
self.make_tuple(elements)
}
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value {
self.make_code(code)
}
fn make_stop_iter(&self) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_list(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_set(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_frozenset(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_dict(
&self,
_: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
type ConstantBag = Self;
fn constant_bag(self) -> Self::ConstantBag {
self
}
}
pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Result<Bag::Value> {
let typ = Type::try_from(rdr.read_u8()?)?;
let value = match typ {
Type::True => bag.make_bool(true),
Type::False => bag.make_bool(false),
Type::None => bag.make_none(),
Type::StopIter => bag.make_stop_iter()?,
Type::Ellipsis => bag.make_ellipsis(),
Type::Int => {
let len = rdr.read_u32()? as i32;
let sign = if len < 0 { Sign::Minus } else { Sign::Plus };
let bytes = rdr.read_slice(len.unsigned_abs())?;
let int = BigInt::from_bytes_le(sign, bytes);
bag.make_int(int)
}
Type::Float => {
let value = f64::from_bits(rdr.read_u64()?);
bag.make_float(value)
}
Type::Complex => {
let re = f64::from_bits(rdr.read_u64()?);
let im = f64::from_bits(rdr.read_u64()?);
let value = Complex64 { re, im };
bag.make_complex(value)
}
Type::Ascii | Type::Unicode => {
let len = rdr.read_u32()?;
let value = rdr.read_str(len)?;
bag.make_str(value)
}
Type::Tuple => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_tuple(it))?
}
Type::List => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_list(it))??
}
Type::Set => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_set(it))??
}
Type::FrozenSet => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_frozenset(it))??
}
Type::Dict => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| {
let k = deserialize_value(rdr, bag)?;
let v = deserialize_value(rdr, bag)?;
Ok::<_, MarshalError>((k, v))
});
itertools::process_results(it, |it| bag.make_dict(it))??
}
Type::Bytes => {
// Following CPython, after marshaling, byte arrays are converted into bytes.
let len = rdr.read_u32()?;
let value = rdr.read_slice(len)?;
bag.make_bytes(value)
}
Type::Code => bag.make_code(deserialize_code(rdr, bag.constant_bag())?),
};
Ok(value)
}
pub trait Dumpable: Sized {
type Error;
type Constant: Constant;
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error>;
}
pub enum DumpableValue<'a, D: Dumpable> {
Integer(&'a BigInt),
Float(f64),
Complex(Complex64),
Boolean(bool),
Str(&'a str),
Bytes(&'a [u8]),
Code(&'a CodeObject<D::Constant>),
Tuple(&'a [D]),
None,
Ellipsis,
StopIter,
List(&'a [D]),
Set(&'a [D]),
Frozenset(&'a [D]),
Dict(&'a [(D, D)]),
}
impl<'a, C: Constant> From<BorrowedConstant<'a, C>> for DumpableValue<'a, C> {
fn from(c: BorrowedConstant<'a, C>) -> Self {
match c {
BorrowedConstant::Integer { value } => Self::Integer(value),
BorrowedConstant::Float { value } => Self::Float(value),
BorrowedConstant::Complex { value } => Self::Complex(value),
BorrowedConstant::Boolean { value } => Self::Boolean(value),
BorrowedConstant::Str { value } => Self::Str(value),
BorrowedConstant::Bytes { value } => Self::Bytes(value),
BorrowedConstant::Code { code } => Self::Code(code),
BorrowedConstant::Tuple { elements } => Self::Tuple(elements),
BorrowedConstant::None => Self::None,
BorrowedConstant::Ellipsis => Self::Ellipsis,
}
}
}
impl<C: Constant> Dumpable for C {
type Error = Infallible;
type Constant = Self;
#[inline(always)]
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error> {
Ok(f(self.borrow_constant().into()))
}
}
pub trait Write {
fn write_slice(&mut self, slice: &[u8]);
fn write_u8(&mut self, v: u8) {
self.write_slice(&v.to_le_bytes())
}
fn write_u16(&mut self, v: u16) {
self.write_slice(&v.to_le_bytes())
}
fn write_u32(&mut self, v: u32) {
self.write_slice(&v.to_le_bytes())
}
fn write_u64(&mut self, v: u64) {
self.write_slice(&v.to_le_bytes())
}
}
impl Write for Vec<u8> {
fn write_slice(&mut self, slice: &[u8]) {
self.extend_from_slice(slice)
}
}
pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
let Ok(len) = len.try_into() else { panic!("too long to serialize") };
buf.write_u32(len);
}
pub fn serialize_value<W: Write, D: Dumpable>(
buf: &mut W,
constant: DumpableValue<'_, D>,
) -> Result<(), D::Error> {
match constant {
DumpableValue::Integer(int) => {
buf.write_u8(Type::Int as u8);
let (sign, bytes) = int.to_bytes_le();
let len: i32 = bytes.len().try_into().expect("too long to serialize");
let len = if sign == Sign::Minus { -len } else { len };
buf.write_u32(len as u32);
buf.write_slice(&bytes);
}
DumpableValue::Float(f) => {
buf.write_u8(Type::Float as u8);
buf.write_u64(f.to_bits());
}
DumpableValue::Complex(c) => {
buf.write_u8(Type::Complex as u8);
buf.write_u64(c.re.to_bits());
buf.write_u64(c.im.to_bits());
}
DumpableValue::Boolean(b) => {
buf.write_u8(if b { Type::True } else { Type::False } as u8);
}
DumpableValue::Str(s) => {
buf.write_u8(Type::Unicode as u8);
write_len(buf, s.len());
buf.write_slice(s.as_bytes());
}
DumpableValue::Bytes(b) => {
buf.write_u8(Type::Bytes as u8);
write_len(buf, b.len());
buf.write_slice(b);
}
DumpableValue::Code(c) => {
buf.write_u8(Type::Code as u8);
serialize_code(buf, c);
}
DumpableValue::Tuple(tup) => {
buf.write_u8(Type::Tuple as u8);
write_len(buf, tup.len());
for val in tup {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::None => {
buf.write_u8(Type::None as u8);
}
DumpableValue::Ellipsis => {
buf.write_u8(Type::Ellipsis as u8);
}
DumpableValue::StopIter => {
buf.write_u8(Type::StopIter as u8);
}
DumpableValue::List(l) => {
buf.write_u8(Type::List as u8);
write_len(buf, l.len());
for val in l {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Set(set) => {
buf.write_u8(Type::Set as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Frozenset(set) => {
buf.write_u8(Type::FrozenSet as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Dict(d) => {
buf.write_u8(Type::Dict as u8);
write_len(buf, d.len());
for (k, v) in d {
k.with_dump(|val| serialize_value(buf, val))??;
v.with_dump(|val| serialize_value(buf, val))??;
}
}
}
Ok(())
}
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
write_len(buf, code.instructions.len());
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
buf.write_slice(instructions_bytes);
write_len(buf, code.locations.len());
for loc in &*code.locations {
buf.write_u32(loc.row);
buf.write_u32(loc.column);
}
buf.write_u16(code.flags.bits());
buf.write_u32(code.posonlyarg_count);
buf.write_u32(code.arg_count);
buf.write_u32(code.kwonlyarg_count);
write_len(buf, code.source_path.as_ref().len());
buf.write_slice(code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number);
buf.write_u32(code.max_stackdepth);
write_len(buf, code.obj_name.as_ref().len());
buf.write_slice(code.obj_name.as_ref().as_bytes());
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
write_len(buf, cell2arg.len());
for &i in cell2arg {
buf.write_u32(i as u32)
}
write_len(buf, code.constants.len());
for constant in &*code.constants {
serialize_value(buf, constant.borrow_constant().into()).unwrap_or_else(|x| match x {})
}
let mut write_names = |names: &[C::Name]| {
write_len(buf, names.len());
for name in names {
write_len(buf, name.as_ref().len());
buf.write_slice(name.as_ref().as_bytes());
}
};
write_names(&code.names);
write_names(&code.varnames);
write_names(&code.cellvars);
write_names(&code.freevars);
}

View file

@ -26,7 +26,6 @@ itertools = { workspace = true }
log = { workspace = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
thiserror = { workspace = true }
unicode_names2 = { workspace = true }
unic-emoji-char = "0.9.0"

View file

@ -200,7 +200,7 @@ pub fn parse_tokens(
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq, thiserror::Error)]
#[derive(Debug, PartialEq)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
Eof,
@ -215,6 +215,8 @@ pub enum ParseErrorType {
Lexical(LexicalErrorType),
}
impl std::error::Error for ParseErrorType {}
// Convert `lalrpop_util::ParseError` to our internal type
fn parse_error_from_lalrpop(
err: LalrpopError<Location, Tok, LexicalError>,

View file

@ -7,12 +7,40 @@ use rustpython_parser::{
pub use rustpython_codegen::compile::CompileOpts;
pub use rustpython_compiler_core::{BaseError as CompileErrorBody, CodeObject, Mode};
#[derive(Debug, thiserror::Error)]
use std::error::Error as StdError;
use std::fmt;
#[derive(Debug)]
pub enum CompileErrorType {
#[error(transparent)]
Codegen(#[from] rustpython_codegen::error::CodegenErrorType),
#[error(transparent)]
Parse(#[from] parser::ParseErrorType),
Codegen(rustpython_codegen::error::CodegenErrorType),
Parse(parser::ParseErrorType),
}
impl StdError for CompileErrorType {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
match self {
CompileErrorType::Codegen(e) => e.source(),
CompileErrorType::Parse(e) => e.source(),
}
}
}
impl fmt::Display for CompileErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
CompileErrorType::Codegen(e) => e.fmt(f),
CompileErrorType::Parse(e) => e.fmt(f),
}
}
}
impl From<rustpython_codegen::error::CodegenErrorType> for CompileErrorType {
fn from(source: rustpython_codegen::error::CodegenErrorType) -> Self {
CompileErrorType::Codegen(source)
}
}
impl From<parser::ParseErrorType> for CompileErrorType {
fn from(source: parser::ParseErrorType) -> Self {
CompileErrorType::Parse(source)
}
}
pub type CompileError = rustpython_compiler_core::CompileError<CompileErrorType>;