//! Implement python as a virtual machine with bytecodes. This module //! implements bytecode structure. #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_root_url = "https://docs.rs/rustpython-bytecode/")] mod mode; pub use mode::Mode; use bitflags::bitflags; use bstr::ByteSlice; use itertools::Itertools; use num_bigint::BigInt; use num_complex::Complex64; use serde::{Deserialize, Serialize}; use std::{collections::BTreeSet, fmt, hash}; /// Sourcecode location. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct Location { row: u32, column: u32, } impl Location { /// Creates a new Location object at the given row and column. /// /// # Example /// ``` /// use rustpython_bytecode::Location; /// let loc = Location::new(10, 10); /// ``` pub fn new(row: usize, column: usize) -> Self { let row = row.try_into().expect("Location::row over u32"); let column = column.try_into().expect("Location::column over u32"); Location { row, column } } /// Current row pub fn row(&self) -> usize { self.row as usize } /// Current column pub fn column(&self) -> usize { self.column as usize } } pub trait Constant: Sized { type Name: AsRef; /// Transforms the given Constant to a BorrowedConstant fn borrow_constant(&self) -> BorrowedConstant; } impl Constant for ConstantData { type Name = String; fn borrow_constant(&self) -> BorrowedConstant { use BorrowedConstant::*; match self { ConstantData::Integer { value } => Integer { value }, ConstantData::Float { value } => Float { value: *value }, ConstantData::Complex { value } => Complex { value: *value }, ConstantData::Boolean { value } => Boolean { value: *value }, ConstantData::Str { value } => Str { value }, ConstantData::Bytes { value } => Bytes { value }, ConstantData::Code { code } => Code { code }, ConstantData::Tuple { elements } => Tuple { elements: Box::new(elements.iter().map(|e| e.borrow_constant())), }, ConstantData::None => None, ConstantData::Ellipsis => Ellipsis, } } } /// A Constant Bag pub trait ConstantBag: Sized + Copy { type Constant: Constant; fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant; fn make_name(&self, name: &str) -> ::Name; } #[derive(Clone, Copy)] pub struct BasicBag; impl ConstantBag for BasicBag { type Constant = ConstantData; fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant { constant.to_owned() } fn make_name(&self, name: &str) -> ::Name { name.to_owned() } } /// Primary container of a single code object. Each python function has /// a codeobject. Also a module has a codeobject. #[derive(Clone, Serialize, Deserialize)] pub struct CodeObject { pub instructions: Box<[Instruction]>, pub locations: Box<[Location]>, pub flags: CodeFlags, pub posonlyarg_count: usize, // Number of positional-only arguments pub arg_count: usize, pub kwonlyarg_count: usize, pub source_path: C::Name, pub first_line_number: usize, pub max_stackdepth: u32, pub obj_name: C::Name, // Name of the object that created this code object pub cell2arg: Option>, pub constants: Box<[C]>, #[serde(bound( deserialize = "C::Name: serde::Deserialize<'de>", serialize = "C::Name: serde::Serialize" ))] pub names: Box<[C::Name]>, pub varnames: Box<[C::Name]>, pub cellvars: Box<[C::Name]>, pub freevars: Box<[C::Name]>, } bitflags! { #[derive(Serialize, Deserialize)] pub struct CodeFlags: u16 { const NEW_LOCALS = 0x01; const IS_GENERATOR = 0x02; const IS_COROUTINE = 0x04; const HAS_VARARGS = 0x08; const HAS_VARKEYWORDS = 0x10; const IS_OPTIMIZED = 0x20; } } impl CodeFlags { pub const NAME_MAPPING: &'static [(&'static str, CodeFlags)] = &[ ("GENERATOR", CodeFlags::IS_GENERATOR), ("COROUTINE", CodeFlags::IS_COROUTINE), ( "ASYNC_GENERATOR", Self::from_bits_truncate(Self::IS_GENERATOR.bits | Self::IS_COROUTINE.bits), ), ("VARARGS", CodeFlags::HAS_VARARGS), ("VARKEYWORDS", CodeFlags::HAS_VARKEYWORDS), ]; } #[derive(Serialize, Debug, Deserialize, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] // XXX: if you add a new instruction that stores a Label, make sure to add it in // Instruction::label_arg{,_mut} pub struct Label(pub u32); impl fmt::Display for Label { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.0.fmt(f) } } /// Transforms a value prior to formatting it. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[repr(u8)] pub enum ConversionFlag { /// No conversion None = 0, // CPython uses -1 but not pleasure for us /// Converts by calling `str()`. Str = b's', /// Converts by calling `ascii()`. Ascii = b'a', /// Converts by calling `repr()`. Repr = b'r', } impl TryFrom for ConversionFlag { type Error = usize; fn try_from(b: usize) -> Result { let b = b.try_into().map_err(|_| b)?; match b { 0 => Ok(Self::None), b's' => Ok(Self::Str), b'a' => Ok(Self::Ascii), b'r' => Ok(Self::Repr), b => Err(b as usize), } } } /// The kind of Raise that occurred. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum RaiseKind { Reraise, Raise, RaiseCause, } pub type NameIdx = u32; /// A Single bytecode instruction. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum Instruction { /// Importing by name ImportName { idx: NameIdx, }, /// Importing without name ImportNameless, /// Import * ImportStar, /// from ... import ... ImportFrom { idx: NameIdx, }, LoadFast(NameIdx), LoadNameAny(NameIdx), LoadGlobal(NameIdx), LoadDeref(NameIdx), LoadClassDeref(NameIdx), StoreFast(NameIdx), StoreLocal(NameIdx), StoreGlobal(NameIdx), StoreDeref(NameIdx), DeleteFast(NameIdx), DeleteLocal(NameIdx), DeleteGlobal(NameIdx), DeleteDeref(NameIdx), LoadClosure(NameIdx), Subscript, StoreSubscript, DeleteSubscript, StoreAttr { idx: NameIdx, }, DeleteAttr { idx: NameIdx, }, LoadConst { /// index into constants vec idx: u32, }, UnaryOperation { op: UnaryOperator, }, BinaryOperation { op: BinaryOperator, }, BinaryOperationInplace { op: BinaryOperator, }, LoadAttr { idx: NameIdx, }, TestOperation { op: TestOperator, }, CompareOperation { op: ComparisonOperator, }, Pop, Rotate2, Rotate3, Duplicate, Duplicate2, GetIter, Continue { target: Label, }, Break { target: Label, }, Jump { target: Label, }, /// Pop the top of the stack, and jump if this value is true. JumpIfTrue { target: Label, }, /// Pop the top of the stack, and jump if this value is false. JumpIfFalse { target: Label, }, /// Peek at the top of the stack, and jump if this value is true. /// Otherwise, pop top of stack. JumpIfTrueOrPop { target: Label, }, /// Peek at the top of the stack, and jump if this value is false. /// Otherwise, pop top of stack. JumpIfFalseOrPop { target: Label, }, MakeFunction(MakeFunctionFlags), CallFunctionPositional { nargs: u32, }, CallFunctionKeyword { nargs: u32, }, CallFunctionEx { has_kwargs: bool, }, LoadMethod { idx: NameIdx, }, CallMethodPositional { nargs: u32, }, CallMethodKeyword { nargs: u32, }, CallMethodEx { has_kwargs: bool, }, ForIter { target: Label, }, ReturnValue, YieldValue, YieldFrom, SetupAnnotation, SetupLoop { break_target: Label, }, /// Setup a finally handler, which will be called whenever one of this events occurs: /// - the block is popped /// - the function returns /// - an exception is returned SetupFinally { handler: Label, }, /// Enter a finally block, without returning, excepting, just because we are there. EnterFinally, /// Marker bytecode for the end of a finally sequence. /// When this bytecode is executed, the eval loop does one of those things: /// - Continue at a certain bytecode position /// - Propagate the exception /// - Return from a function /// - Do nothing at all, just continue EndFinally, SetupExcept { handler: Label, }, SetupWith { end: Label, }, WithCleanupStart, WithCleanupFinish, PopBlock, Raise { kind: RaiseKind, }, BuildString { size: u32, }, BuildTuple { unpack: bool, size: u32, }, BuildList { unpack: bool, size: u32, }, BuildSet { unpack: bool, size: u32, }, BuildMap { unpack: bool, for_call: bool, size: u32, }, DictUpdate, BuildSlice { /// whether build a slice with a third step argument step: bool, }, ListAppend { i: u32, }, SetAdd { i: u32, }, MapAdd { i: u32, }, PrintExpr, LoadBuildClass, UnpackSequence { size: u32, }, UnpackEx { before: u8, after: u8, }, FormatValue { conversion: ConversionFlag, }, PopException, Reverse { amount: u32, }, GetAwaitable, BeforeAsyncWith, SetupAsyncWith { end: Label, }, GetAIter, GetANext, EndAsyncFor, } static_assertions::assert_eq_size!(Instruction, u64); use self::Instruction::*; bitflags! { #[derive(Serialize, Deserialize)] pub struct MakeFunctionFlags: u8 { const CLOSURE = 0x01; const ANNOTATIONS = 0x02; const KW_ONLY_DEFAULTS = 0x04; const DEFAULTS = 0x08; } } /// A Constant (which usually encapsulates data within it) /// /// # Examples /// ``` /// use rustpython_bytecode::ConstantData; /// let a = ConstantData::Float {value: 120f64}; /// let b = ConstantData::Boolean {value: false}; /// assert_ne!(a, b); /// ``` #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ConstantData { Tuple { elements: Vec }, Integer { value: BigInt }, Float { value: f64 }, Complex { value: Complex64 }, Boolean { value: bool }, Str { value: String }, Bytes { value: Vec }, Code { code: Box }, None, Ellipsis, } impl PartialEq for ConstantData { fn eq(&self, other: &Self) -> bool { use ConstantData::*; match (self, other) { (Integer { value: a }, Integer { value: b }) => a == b, // we want to compare floats *by actual value* - if we have the *exact same* float // already in a constant cache, we want to use that (Float { value: a }, Float { value: b }) => a.to_bits() == b.to_bits(), (Complex { value: a }, Complex { value: b }) => { a.re.to_bits() == b.re.to_bits() && a.im.to_bits() == b.im.to_bits() } (Boolean { value: a }, Boolean { value: b }) => a == b, (Str { value: a }, Str { value: b }) => a == b, (Bytes { value: a }, Bytes { value: b }) => a == b, (Code { code: a }, Code { code: b }) => std::ptr::eq(a.as_ref(), b.as_ref()), (Tuple { elements: a }, Tuple { elements: b }) => a == b, (None, None) => true, (Ellipsis, Ellipsis) => true, _ => false, } } } impl Eq for ConstantData {} impl hash::Hash for ConstantData { fn hash(&self, state: &mut H) { use ConstantData::*; std::mem::discriminant(self).hash(state); match self { Integer { value } => value.hash(state), Float { value } => value.to_bits().hash(state), Complex { value } => { value.re.to_bits().hash(state); value.im.to_bits().hash(state); } Boolean { value } => value.hash(state), Str { value } => value.hash(state), Bytes { value } => value.hash(state), Code { code } => std::ptr::hash(code.as_ref(), state), Tuple { elements } => elements.hash(state), None => {} Ellipsis => {} } } } /// A borrowed Constant pub enum BorrowedConstant<'a, C: Constant> { Integer { value: &'a BigInt }, Float { value: f64 }, Complex { value: Complex64 }, Boolean { value: bool }, Str { value: &'a str }, Bytes { value: &'a [u8] }, Code { code: &'a CodeObject }, Tuple { elements: BorrowedTupleIter<'a, C> }, None, Ellipsis, } type BorrowedTupleIter<'a, C> = Box> + 'a>; impl BorrowedConstant<'_, C> { // takes `self` because we need to consume the iterator pub fn fmt_display(self, f: &mut fmt::Formatter) -> fmt::Result { match self { BorrowedConstant::Integer { value } => write!(f, "{}", value), BorrowedConstant::Float { value } => write!(f, "{}", value), BorrowedConstant::Complex { value } => write!(f, "{}", value), BorrowedConstant::Boolean { value } => { write!(f, "{}", if value { "True" } else { "False" }) } BorrowedConstant::Str { value } => write!(f, "{:?}", value), BorrowedConstant::Bytes { value } => write!(f, "b{:?}", value.as_bstr()), BorrowedConstant::Code { code } => write!(f, "{:?}", code), BorrowedConstant::Tuple { elements } => { write!(f, "(")?; let mut first = true; for c in elements { if first { first = false } else { write!(f, ", ")?; } c.fmt_display(f)?; } write!(f, ")") } BorrowedConstant::None => write!(f, "None"), BorrowedConstant::Ellipsis => write!(f, "..."), } } pub fn to_owned(self) -> ConstantData { use ConstantData::*; match self { BorrowedConstant::Integer { value } => Integer { value: value.clone(), }, BorrowedConstant::Float { value } => Float { value }, BorrowedConstant::Complex { value } => Complex { value }, BorrowedConstant::Boolean { value } => Boolean { value }, BorrowedConstant::Str { value } => Str { value: value.to_owned(), }, BorrowedConstant::Bytes { value } => Bytes { value: value.to_owned(), }, BorrowedConstant::Code { code } => Code { code: Box::new(code.map_clone_bag(&BasicBag)), }, BorrowedConstant::Tuple { elements } => Tuple { elements: elements.map(BorrowedConstant::to_owned).collect(), }, BorrowedConstant::None => None, BorrowedConstant::Ellipsis => Ellipsis, } } } /// The possible comparison operators #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum ComparisonOperator { // be intentional with bits so that we can do eval_ord with just a bitwise and // bits: | Equal | Greater | Less | Less = 0b001, Greater = 0b010, NotEqual = 0b011, Equal = 0b100, LessOrEqual = 0b101, GreaterOrEqual = 0b110, } #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum TestOperator { In, NotIn, Is, IsNot, /// two exceptions that match? ExceptionMatch, } /// The possible Binary operators /// # Examples /// /// ``` /// use rustpython_bytecode::Instruction::BinaryOperation; /// use rustpython_bytecode::BinaryOperator::Add; /// let op = BinaryOperation {op: Add}; /// ``` #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum BinaryOperator { Power, Multiply, MatrixMultiply, Divide, FloorDivide, Modulo, Add, Subtract, Lshift, Rshift, And, Xor, Or, } /// The possible unary operators #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum UnaryOperator { Not, Invert, Minus, Plus, } /* Maintain a stack of blocks on the VM. pub enum BlockType { Loop, Except, } */ /// Argument structure pub struct Arguments<'a, N: AsRef> { pub posonlyargs: &'a [N], pub args: &'a [N], pub vararg: Option<&'a N>, pub kwonlyargs: &'a [N], pub varkwarg: Option<&'a N>, } impl> fmt::Debug for Arguments<'_, N> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { macro_rules! fmt_slice { ($x:expr) => { format_args!("[{}]", $x.iter().map(AsRef::as_ref).format(", ")) }; } f.debug_struct("Arguments") .field("posonlyargs", &fmt_slice!(self.posonlyargs)) .field("args", &fmt_slice!(self.posonlyargs)) .field("vararg", &self.vararg.map(N::as_ref)) .field("kwonlyargs", &fmt_slice!(self.kwonlyargs)) .field("varkwarg", &self.varkwarg.map(N::as_ref)) .finish() } } impl CodeObject { /// Get all arguments of the code object /// like inspect.getargs pub fn arg_names(&self) -> Arguments { let nargs = self.arg_count; let nkwargs = self.kwonlyarg_count; let mut varargspos = nargs + nkwargs; let posonlyargs = &self.varnames[..self.posonlyarg_count]; let args = &self.varnames[..nargs]; let kwonlyargs = &self.varnames[nargs..varargspos]; let vararg = if self.flags.contains(CodeFlags::HAS_VARARGS) { let vararg = &self.varnames[varargspos]; varargspos += 1; Some(vararg) } else { None }; let varkwarg = if self.flags.contains(CodeFlags::HAS_VARKEYWORDS) { Some(&self.varnames[varargspos]) } else { None }; Arguments { posonlyargs, args, vararg, kwonlyargs, varkwarg, } } /// Return the labels targeted by the instructions of this CodeObject pub fn label_targets(&self) -> BTreeSet