//! Implement python as a virtual machine with bytecodes. This module //! implements bytecode structure. use crate::Location; use bitflags::bitflags; use bstr::ByteSlice; use itertools::Itertools; use num_bigint::BigInt; use num_complex::Complex64; use num_enum::{IntoPrimitive, TryFromPrimitive}; use serde::{Deserialize, Serialize}; use std::marker::PhantomData; use std::{collections::BTreeSet, fmt, hash, mem}; pub trait Constant: Sized { type Name: AsRef; /// Transforms the given Constant to a BorrowedConstant fn borrow_constant(&self) -> BorrowedConstant; } impl Constant for ConstantData { type Name = String; fn borrow_constant(&self) -> BorrowedConstant { use BorrowedConstant::*; match self { ConstantData::Integer { value } => Integer { value }, ConstantData::Float { value } => Float { value: *value }, ConstantData::Complex { value } => Complex { value: *value }, ConstantData::Boolean { value } => Boolean { value: *value }, ConstantData::Str { value } => Str { value }, ConstantData::Bytes { value } => Bytes { value }, ConstantData::Code { code } => Code { code }, ConstantData::Tuple { elements } => Tuple { elements: Box::new(elements.iter().map(|e| e.borrow_constant())), }, ConstantData::None => None, ConstantData::Ellipsis => Ellipsis, } } } /// A Constant Bag pub trait ConstantBag: Sized + Copy { type Constant: Constant; fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant; fn make_name(&self, name: &str) -> ::Name; } #[derive(Clone, Copy)] pub struct BasicBag; impl ConstantBag for BasicBag { type Constant = ConstantData; fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant { constant.to_owned() } fn make_name(&self, name: &str) -> ::Name { name.to_owned() } } /// Primary container of a single code object. Each python function has /// a codeobject. Also a module has a codeobject. #[derive(Clone, Serialize, Deserialize)] pub struct CodeObject { pub instructions: Box<[CodeUnit]>, pub locations: Box<[Location]>, pub flags: CodeFlags, pub posonlyarg_count: usize, // Number of positional-only arguments pub arg_count: usize, pub kwonlyarg_count: usize, pub source_path: C::Name, pub first_line_number: usize, pub max_stackdepth: u32, pub obj_name: C::Name, // Name of the object that created this code object pub cell2arg: Option>, pub constants: Box<[C]>, #[serde(bound( deserialize = "C::Name: serde::Deserialize<'de>", serialize = "C::Name: serde::Serialize" ))] pub names: Box<[C::Name]>, pub varnames: Box<[C::Name]>, pub cellvars: Box<[C::Name]>, pub freevars: Box<[C::Name]>, } bitflags! { #[derive(Serialize, Deserialize)] pub struct CodeFlags: u16 { const NEW_LOCALS = 0x01; const IS_GENERATOR = 0x02; const IS_COROUTINE = 0x04; const HAS_VARARGS = 0x08; const HAS_VARKEYWORDS = 0x10; const IS_OPTIMIZED = 0x20; } } impl CodeFlags { pub const NAME_MAPPING: &'static [(&'static str, CodeFlags)] = &[ ("GENERATOR", CodeFlags::IS_GENERATOR), ("COROUTINE", CodeFlags::IS_COROUTINE), ( "ASYNC_GENERATOR", Self::from_bits_truncate(Self::IS_GENERATOR.bits | Self::IS_COROUTINE.bits), ), ("VARARGS", CodeFlags::HAS_VARARGS), ("VARKEYWORDS", CodeFlags::HAS_VARKEYWORDS), ]; } /// an opcode argument that may be extended by a prior ExtendedArg #[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] #[repr(transparent)] pub struct OpArgByte(pub u8); impl OpArgByte { pub const fn null() -> Self { OpArgByte(0) } } impl fmt::Debug for OpArgByte { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) } } /// a full 32-bit oparg, including any possible ExtendedArg extension #[derive(Copy, Clone, Debug)] #[repr(transparent)] pub struct OpArg(pub u32); impl OpArg { pub const fn null() -> Self { OpArg(0) } /// Returns how many CodeUnits a instruction with this oparg will be encoded as #[inline] pub fn instr_size(self) -> usize { (self.0 > 0xff) as usize + (self.0 > 0xff_ff) as usize + (self.0 > 0xff_ff_ff) as usize + 1 } /// returns the arg split into any necessary ExtendedArg components (in big-endian order) and /// the arg for the real opcode itself #[inline(always)] pub fn split(self) -> (impl ExactSizeIterator, OpArgByte) { let mut it = self .0 .to_le_bytes() .map(OpArgByte) .into_iter() .take(self.instr_size()); let lo = it.next().unwrap(); (it.rev(), lo) } } #[derive(Default, Copy, Clone)] #[repr(transparent)] pub struct OpArgState { state: u32, } impl OpArgState { #[inline(always)] pub fn get(&mut self, ins: CodeUnit) -> (Instruction, OpArg) { let arg = self.extend(ins.arg); if ins.op != Instruction::ExtendedArg { self.reset(); } (ins.op, arg) } #[inline(always)] pub fn extend(&mut self, arg: OpArgByte) -> OpArg { self.state = self.state << 8 | u32::from(arg.0); OpArg(self.state) } #[inline(always)] pub fn reset(&mut self) { self.state = 0 } } pub trait OpArgType: Copy { fn from_oparg(x: u32) -> Option; fn to_oparg(self) -> u32; } impl OpArgType for u32 { #[inline(always)] fn from_oparg(x: u32) -> Option { Some(x) } #[inline(always)] fn to_oparg(self) -> u32 { self } } impl OpArgType for bool { #[inline(always)] fn from_oparg(x: u32) -> Option { Some(x != 0) } #[inline(always)] fn to_oparg(self) -> u32 { self as u32 } } macro_rules! enum_oparg { ($t:ident) => { impl OpArgType for $t { #[inline(always)] fn from_oparg(x: u32) -> Option { $t::try_from_primitive(x as _).ok() } #[inline(always)] fn to_oparg(self) -> u32 { u8::from(self).into() } } }; } #[derive(Copy, Clone, Serialize, Deserialize)] #[serde(bound = "")] pub struct Arg(PhantomData); impl Arg { #[inline] pub fn marker() -> Self { Arg(PhantomData) } #[inline] pub fn new(arg: T) -> (Self, OpArg) { (Self(PhantomData), OpArg(arg.to_oparg())) } #[inline] pub fn new_single(arg: T) -> (Self, OpArgByte) where T: Into, { (Self(PhantomData), OpArgByte(arg.into())) } #[inline(always)] pub fn get(self, arg: OpArg) -> T { self.try_get(arg).unwrap() } #[inline(always)] pub fn try_get(self, arg: OpArg) -> Option { T::from_oparg(arg.0) } #[inline(always)] /// # Safety /// T::from_oparg(self) must succeed pub unsafe fn get_unchecked(self, arg: OpArg) -> T { match T::from_oparg(arg.0) { Some(t) => t, None => std::hint::unreachable_unchecked(), } } } impl PartialEq for Arg { fn eq(&self, _: &Self) -> bool { true } } impl Eq for Arg {} impl fmt::Debug for Arg { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Arg<{}>", std::any::type_name::()) } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] // XXX: if you add a new instruction that stores a Label, make sure to add it in // Instruction::label_arg pub struct Label(pub u32); impl OpArgType for Label { #[inline(always)] fn from_oparg(x: u32) -> Option { Some(Label(x)) } #[inline(always)] fn to_oparg(self) -> u32 { self.0 } } impl fmt::Display for Label { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.0.fmt(f) } } /// Transforms a value prior to formatting it. #[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)] #[repr(u8)] pub enum ConversionFlag { /// No conversion None = 0, // CPython uses -1 but not pleasure for us /// Converts by calling `str()`. Str = b's', /// Converts by calling `ascii()`. Ascii = b'a', /// Converts by calling `repr()`. Repr = b'r', } enum_oparg!(ConversionFlag); impl TryFrom for ConversionFlag { type Error = usize; fn try_from(b: usize) -> Result { u8::try_from(b) .ok() .and_then(|b| Self::try_from(b).ok()) .ok_or(b) } } /// The kind of Raise that occurred. #[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)] #[repr(u8)] pub enum RaiseKind { Reraise, Raise, RaiseCause, } enum_oparg!(RaiseKind); pub type NameIdx = u32; /// A Single bytecode instruction. #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum Instruction { /// Importing by name ImportName { idx: Arg, }, /// Importing without name ImportNameless, /// Import * ImportStar, /// from ... import ... ImportFrom { idx: Arg, }, LoadFast(Arg), LoadNameAny(Arg), LoadGlobal(Arg), LoadDeref(Arg), LoadClassDeref(Arg), StoreFast(Arg), StoreLocal(Arg), StoreGlobal(Arg), StoreDeref(Arg), DeleteFast(Arg), DeleteLocal(Arg), DeleteGlobal(Arg), DeleteDeref(Arg), LoadClosure(Arg), Subscript, StoreSubscript, DeleteSubscript, StoreAttr { idx: Arg, }, DeleteAttr { idx: Arg, }, LoadConst { /// index into constants vec idx: Arg, }, UnaryOperation { op: Arg, }, BinaryOperation { op: Arg, }, BinaryOperationInplace { op: Arg, }, LoadAttr { idx: Arg, }, TestOperation { op: Arg, }, CompareOperation { op: Arg, }, Pop, Rotate2, Rotate3, Duplicate, Duplicate2, GetIter, Continue { target: Arg