mirror of
https://github.com/erg-lang/erg.git
synced 2025-09-29 20:34:44 +00:00
501 lines
18 KiB
Rust
501 lines
18 KiB
Rust
use std::fmt;
|
|
use std::fs::File;
|
|
use std::io::{BufReader, Read, Write};
|
|
use std::path::Path;
|
|
|
|
use crate::deserialize::{DeserializeResult, Deserializer};
|
|
use crate::impl_display_from_debug;
|
|
use crate::opcode::Opcode;
|
|
use crate::python_util::detect_magic_number;
|
|
use crate::serialize::*;
|
|
use crate::traits::HasType;
|
|
use crate::ty::{Type, TypePair};
|
|
use crate::value::ValueObj;
|
|
use crate::Str;
|
|
|
|
pub fn consts_into_bytes(consts: Vec<ValueObj>) -> Vec<u8> {
|
|
let mut tuple = vec![];
|
|
if consts.len() > u8::MAX as usize {
|
|
tuple.push(DataTypePrefix::Tuple as u8);
|
|
tuple.append(&mut (consts.len() as u32).to_le_bytes().to_vec());
|
|
} else {
|
|
tuple.push(DataTypePrefix::SmallTuple as u8);
|
|
tuple.push(consts.len() as u8);
|
|
}
|
|
for obj in consts {
|
|
tuple.append(&mut obj.into_bytes());
|
|
}
|
|
tuple
|
|
}
|
|
|
|
/// Bit masks for CodeObj.flags
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
#[repr(u32)]
|
|
pub enum CodeObjFlags {
|
|
Optimized = 0x0001,
|
|
NewLocals = 0x0002,
|
|
VarArgs = 0x0004,
|
|
VarKeywords = 0x0008,
|
|
Nested = 0x0010,
|
|
Generator = 0x0020,
|
|
NoFree = 0x0040,
|
|
Coroutine = 0x0080,
|
|
IterableCoroutine = 0x0100,
|
|
AsyncGenerator = 0x0200,
|
|
// CO_GENERATOR_ALLOWED = 0x0400,
|
|
FutureDivision = 0x2000,
|
|
FutureAbsoluteImport = 0x4000,
|
|
FutureWithStatement = 0x8000,
|
|
FuturePrintFunction = 0x1_0000,
|
|
FutureUnicodeLiterals = 0x2_0000,
|
|
FutureBarryAsBDFL = 0x4_0000,
|
|
FutureGeneratorStop = 0x8_0000,
|
|
FutureAnnotations = 0x10_0000,
|
|
// Erg-specific flags
|
|
EvmDynParam = 0x1000_0000,
|
|
EvmDynamic = 0x2000_0000,
|
|
EvmNoGC = 0x4000_0000,
|
|
Illegal = 0x0000,
|
|
}
|
|
|
|
impl Into<CodeObjFlags> for u32 {
|
|
fn into(self) -> CodeObjFlags {
|
|
match self {
|
|
0x0001 => CodeObjFlags::Optimized,
|
|
0x0002 => CodeObjFlags::NewLocals,
|
|
0x0004 => CodeObjFlags::VarArgs,
|
|
0x0008 => CodeObjFlags::VarKeywords,
|
|
0x0010 => CodeObjFlags::Nested,
|
|
0x0020 => CodeObjFlags::Generator,
|
|
0x0040 => CodeObjFlags::NoFree,
|
|
0x0080 => CodeObjFlags::Coroutine,
|
|
0x0100 => CodeObjFlags::IterableCoroutine,
|
|
0x0200 => CodeObjFlags::AsyncGenerator,
|
|
// CO_GENERATOR_ALLOWED,
|
|
0x2000 => CodeObjFlags::FutureDivision,
|
|
0x4000 => CodeObjFlags::FutureAbsoluteImport,
|
|
0x8000 => CodeObjFlags::FutureWithStatement,
|
|
0x1_0000 => CodeObjFlags::FuturePrintFunction,
|
|
0x2_0000 => CodeObjFlags::FutureUnicodeLiterals,
|
|
0x4_0000 => CodeObjFlags::FutureBarryAsBDFL,
|
|
0x8_0000 => CodeObjFlags::FutureGeneratorStop,
|
|
0x10_0000 => CodeObjFlags::FutureAnnotations,
|
|
// EVM flags
|
|
0x1000_0000 => CodeObjFlags::EvmDynParam,
|
|
0x2000_0000 => CodeObjFlags::EvmDynamic,
|
|
0x4000_0000 => CodeObjFlags::EvmNoGC,
|
|
_ => CodeObjFlags::Illegal,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CodeObjFlags {
|
|
pub const fn is_in(&self, flags: u32) -> bool {
|
|
(flags & *self as u32) != 0
|
|
}
|
|
}
|
|
|
|
/// Implementation of `PyCodeObject`, see Include/cpython/code.h in CPython for details.
|
|
///
|
|
/// 各属性をErg側のObjに変換すると遅くなりそうなので、アクサスされたときのみ変換して提供する
|
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
pub struct CodeObj {
|
|
pub argcount: u32,
|
|
pub posonlyargcount: u32,
|
|
pub kwonlyargcount: u32,
|
|
pub nlocals: u32, // == params + local vars
|
|
pub stacksize: u32,
|
|
pub flags: u32,
|
|
pub code: Vec<u8>,
|
|
pub consts: Vec<ValueObj>, // objects used in the code (literal)
|
|
pub names: Vec<Str>, // names used in the code object
|
|
pub varnames: Vec<Str>, // names defined in the code object
|
|
pub freevars: Vec<Str>, // names captured from the outer scope
|
|
pub cellvars: Vec<Str>, // names used in the inner function (closure)
|
|
pub filename: Str,
|
|
pub name: Str,
|
|
pub firstlineno: u32,
|
|
// lnotab (line number table): see Object/lnotab_notes.txt in CPython for details
|
|
// e.g. +12bytes, +3line -> [.., 0x1C, 0x03, ..]
|
|
// ([sdelta, ldelta, sdelta, ldelta, ..])
|
|
// if delta > 255 -> [255, 0, 255-delta, ...]
|
|
pub lnotab: Vec<u8>,
|
|
}
|
|
|
|
impl HasType for CodeObj {
|
|
fn ref_t(&self) -> &Type {
|
|
&Type::Code
|
|
}
|
|
fn signature_t(&self) -> Option<&Type> {
|
|
None
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for CodeObj {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(
|
|
f,
|
|
"<code object {} at {:p}, file \"{}\", line {}>",
|
|
self.name, self, self.filename, self.firstlineno
|
|
)
|
|
}
|
|
}
|
|
|
|
impl_display_from_debug!(CodeObj);
|
|
|
|
impl Default for CodeObj {
|
|
fn default() -> Self {
|
|
Self {
|
|
argcount: 0,
|
|
posonlyargcount: 0,
|
|
kwonlyargcount: 0,
|
|
nlocals: 0,
|
|
stacksize: 2, // Seems to be the default in CPython, but not sure why
|
|
flags: CodeObjFlags::NoFree as u32,
|
|
code: Vec::new(),
|
|
consts: Vec::new(),
|
|
names: Vec::new(),
|
|
varnames: Vec::new(),
|
|
freevars: Vec::new(),
|
|
cellvars: Vec::new(),
|
|
filename: "<dummy>".into(),
|
|
name: "<dummy>".into(),
|
|
firstlineno: 1,
|
|
lnotab: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CodeObj {
|
|
pub fn new<S: Into<Str>>(
|
|
argcount: u32,
|
|
posonlyargcount: u32,
|
|
kwonlyargcount: u32,
|
|
nlocals: u32,
|
|
stacksize: u32,
|
|
flags: u32,
|
|
code: Vec<u8>,
|
|
consts: Vec<ValueObj>,
|
|
names: Vec<Str>,
|
|
varnames: Vec<Str>,
|
|
freevars: Vec<Str>,
|
|
cellvars: Vec<Str>,
|
|
filename: Str,
|
|
name: S,
|
|
firstlineno: u32,
|
|
lnotab: Vec<u8>,
|
|
) -> Self {
|
|
Self {
|
|
argcount,
|
|
posonlyargcount,
|
|
kwonlyargcount,
|
|
nlocals,
|
|
stacksize,
|
|
flags,
|
|
code,
|
|
consts,
|
|
names,
|
|
varnames,
|
|
freevars,
|
|
cellvars,
|
|
filename,
|
|
name: name.into(),
|
|
firstlineno,
|
|
lnotab,
|
|
}
|
|
}
|
|
|
|
pub fn empty<S: Into<Str>, T: Into<Str>>(
|
|
params: Vec<Str>,
|
|
filename: S,
|
|
name: T,
|
|
firstlineno: u32,
|
|
) -> Self {
|
|
Self {
|
|
argcount: params.len() as u32,
|
|
posonlyargcount: 0,
|
|
kwonlyargcount: 0,
|
|
nlocals: params.len() as u32,
|
|
stacksize: 2, // Seems to be the default in CPython, but not sure why
|
|
flags: CodeObjFlags::NoFree as u32,
|
|
code: Vec::with_capacity(8),
|
|
consts: Vec::with_capacity(4),
|
|
names: Vec::with_capacity(3),
|
|
varnames: params,
|
|
freevars: Vec::new(),
|
|
cellvars: Vec::new(),
|
|
filename: filename.into(),
|
|
name: name.into(),
|
|
firstlineno,
|
|
lnotab: Vec::with_capacity(4),
|
|
}
|
|
}
|
|
|
|
pub fn from_pyc<P: AsRef<Path>>(path: P) -> DeserializeResult<Self> {
|
|
let mut f = BufReader::new(File::open(path)?);
|
|
let v = &mut Vec::with_capacity(16);
|
|
f.read_to_end(v)?;
|
|
let python_ver = get_magic_num_from_bytes(&Deserializer::consume::<4>(v));
|
|
let _padding = Deserializer::deserialize_u32(v);
|
|
let _timestamp = Deserializer::deserialize_u32(v);
|
|
let _padding = Deserializer::deserialize_u32(v);
|
|
let code = Self::from_bytes(v, python_ver)?;
|
|
Ok(code)
|
|
}
|
|
|
|
pub fn from_bytes(v: &mut Vec<u8>, python_ver: u32) -> DeserializeResult<Self> {
|
|
let mut des = Deserializer::new();
|
|
let argcount = Deserializer::deserialize_u32(v);
|
|
let posonlyargcount = if python_ver >= 3413 {
|
|
Deserializer::deserialize_u32(v)
|
|
} else {
|
|
0
|
|
};
|
|
let kwonlyargcount = Deserializer::deserialize_u32(v);
|
|
let nlocals = Deserializer::deserialize_u32(v);
|
|
let stacksize = Deserializer::deserialize_u32(v);
|
|
let flags = Deserializer::deserialize_u32(v);
|
|
let code = des.deserialize_bytes(v)?;
|
|
let consts = des.deserialize_const_vec(v, python_ver)?;
|
|
let names = des.deserialize_str_vec(v, python_ver)?;
|
|
let varnames = des.deserialize_str_vec(v, python_ver)?;
|
|
let freevars = des.deserialize_str_vec(v, python_ver)?;
|
|
let cellvars = des.deserialize_str_vec(v, python_ver)?;
|
|
let filename = des.deserialize_str(v, python_ver)?;
|
|
let name = des.deserialize_str(v, python_ver)?;
|
|
let firstlineno = Deserializer::deserialize_u32(v);
|
|
let lnotab = des.deserialize_bytes(v)?;
|
|
Ok(CodeObj::new(
|
|
argcount,
|
|
posonlyargcount,
|
|
kwonlyargcount,
|
|
nlocals,
|
|
stacksize,
|
|
flags,
|
|
code,
|
|
consts,
|
|
names,
|
|
varnames,
|
|
freevars,
|
|
cellvars,
|
|
filename,
|
|
name,
|
|
firstlineno,
|
|
lnotab,
|
|
))
|
|
}
|
|
|
|
pub fn into_bytes(self, python_ver: u32) -> Vec<u8> {
|
|
let mut bytes = vec![DataTypePrefix::Code as u8];
|
|
bytes.append(&mut self.argcount.to_le_bytes().to_vec());
|
|
if python_ver >= 3413 {
|
|
bytes.append(&mut self.posonlyargcount.to_le_bytes().to_vec());
|
|
}
|
|
bytes.append(&mut self.kwonlyargcount.to_le_bytes().to_vec());
|
|
bytes.append(&mut self.nlocals.to_le_bytes().to_vec());
|
|
bytes.append(&mut self.stacksize.to_le_bytes().to_vec());
|
|
bytes.append(&mut self.flags.to_le_bytes().to_vec());
|
|
// co_code is represented as PyStrObject (Not Ascii, Unicode)
|
|
bytes.append(&mut raw_string_into_bytes(self.code));
|
|
bytes.append(&mut consts_into_bytes(self.consts)); // write as PyTupleObject
|
|
bytes.append(&mut strs_into_bytes(self.names));
|
|
bytes.append(&mut strs_into_bytes(self.varnames));
|
|
bytes.append(&mut strs_into_bytes(self.freevars));
|
|
bytes.append(&mut strs_into_bytes(self.cellvars));
|
|
bytes.append(&mut str_into_bytes(self.filename, false));
|
|
bytes.append(&mut str_into_bytes(self.name, true));
|
|
bytes.append(&mut self.firstlineno.to_le_bytes().to_vec());
|
|
// lnotab is represented as PyStrObject
|
|
bytes.append(&mut raw_string_into_bytes(self.lnotab));
|
|
bytes
|
|
}
|
|
|
|
pub fn dump_as_pyc<P: AsRef<Path>>(
|
|
self,
|
|
path: P,
|
|
python_ver: Option<u32>,
|
|
) -> std::io::Result<()> {
|
|
let mut file = File::create(path)?;
|
|
let mut bytes = Vec::with_capacity(16);
|
|
let python_ver = python_ver.unwrap_or_else(|| detect_magic_number());
|
|
bytes.append(&mut get_magic_num_bytes(python_ver).to_vec());
|
|
bytes.append(&mut vec![0; 4]); // padding
|
|
bytes.append(&mut get_timestamp_bytes().to_vec());
|
|
bytes.append(&mut vec![0; 4]); // padding
|
|
bytes.append(&mut self.into_bytes(python_ver));
|
|
file.write_all(&bytes[..])?;
|
|
Ok(())
|
|
}
|
|
|
|
fn tables_info(&self) -> String {
|
|
let mut tables = "".to_string();
|
|
if !self.consts.is_empty() {
|
|
tables += &format!("Constants:\n");
|
|
}
|
|
for (i, obj) in self.consts.iter().enumerate() {
|
|
tables += &format!(" {}: {}\n", i, obj);
|
|
}
|
|
if !self.names.is_empty() {
|
|
tables += &format!("Names:\n");
|
|
}
|
|
for (i, name) in self.names.iter().enumerate() {
|
|
tables += &format!(" {}: {}\n", i, name);
|
|
}
|
|
if !self.varnames.is_empty() {
|
|
tables += &format!("Varnames:\n");
|
|
}
|
|
for (i, varname) in self.varnames.iter().enumerate() {
|
|
tables += &format!(" {}: {}\n", i, varname);
|
|
}
|
|
if !self.cellvars.is_empty() {
|
|
tables += &format!("Cellvars:\n");
|
|
}
|
|
for (i, cellvar) in self.cellvars.iter().enumerate() {
|
|
tables += &format!(" {}: {}\n", i, cellvar);
|
|
}
|
|
if !self.freevars.is_empty() {
|
|
tables += &format!("Freevars:\n");
|
|
}
|
|
for (i, freevar) in self.freevars.iter().enumerate() {
|
|
tables += &format!(" {}: {}\n", i, freevar);
|
|
}
|
|
tables
|
|
}
|
|
|
|
fn attrs_info(&self) -> String {
|
|
let mut attrs = "".to_string();
|
|
attrs += &format!("Name: {}\n", self.name);
|
|
attrs += &format!("FileName: {}\n", self.filename);
|
|
attrs += &format!("Argument count: {}\n", self.argcount);
|
|
attrs += &format!("Positional-only arguments: {}\n", self.posonlyargcount);
|
|
attrs += &format!("Kw-only arguments: {}\n", self.kwonlyargcount);
|
|
attrs += &format!("Number of locals: {}\n", self.nlocals);
|
|
attrs += &format!("Stack size: {}\n", self.stacksize);
|
|
let mut flagged = "".to_string();
|
|
for i in 0..32 {
|
|
if (self.flags & (1 << i)) != 0 {
|
|
let flag: CodeObjFlags = 2u32.pow(i).into();
|
|
flagged += &format!("{:?}, ", flag);
|
|
}
|
|
}
|
|
flagged.pop();
|
|
flagged.pop();
|
|
attrs += &format!("Flags: {}\n", flagged);
|
|
attrs
|
|
}
|
|
|
|
fn instr_info(&self) -> String {
|
|
let mut lnotab_iter = self.lnotab.iter();
|
|
let mut code_iter = self.code.iter();
|
|
let mut idx = 0;
|
|
let mut line_offset = 0;
|
|
let mut lineno = self.firstlineno as u8;
|
|
let mut sdelta = lnotab_iter.next().unwrap_or(&0);
|
|
let mut ldelta = lnotab_iter.next().unwrap_or(&0);
|
|
let mut instrs = "".to_string();
|
|
instrs += &format!("lnotab: {:?}\n", self.lnotab);
|
|
if *sdelta != 0 {
|
|
instrs += &format!("{}:\n", lineno);
|
|
}
|
|
loop {
|
|
if *sdelta == line_offset {
|
|
line_offset = 0;
|
|
lineno += ldelta;
|
|
instrs += &format!("{}:\n", lineno);
|
|
sdelta = lnotab_iter.next().unwrap_or(&0);
|
|
ldelta = lnotab_iter.next().unwrap_or(&0);
|
|
}
|
|
if let (Some(op), Some(arg)) = (code_iter.next(), code_iter.next()) {
|
|
let op = Opcode::from(*op);
|
|
let s_op = op.to_string();
|
|
instrs += &format!("{:>15} {:<25}", idx, s_op);
|
|
match op {
|
|
Opcode::COMPARE_OP => {
|
|
let op = match arg {
|
|
0 => "<",
|
|
1 => "<=",
|
|
2 => "==",
|
|
3 => "!=",
|
|
4 => ">",
|
|
5 => ">=",
|
|
_ => "?",
|
|
};
|
|
instrs += &format!("{} ({})", arg, op);
|
|
}
|
|
Opcode::STORE_NAME
|
|
| Opcode::LOAD_NAME
|
|
| Opcode::STORE_GLOBAL
|
|
| Opcode::LOAD_GLOBAL
|
|
| Opcode::STORE_ATTR
|
|
| Opcode::LOAD_ATTR
|
|
| Opcode::LOAD_METHOD => {
|
|
instrs += &format!("{} ({})", arg, self.names.get(*arg as usize).unwrap());
|
|
}
|
|
Opcode::STORE_DEREF | Opcode::LOAD_DEREF => {
|
|
instrs +=
|
|
&format!("{} ({})", arg, self.freevars.get(*arg as usize).unwrap());
|
|
}
|
|
Opcode::STORE_FAST | Opcode::LOAD_FAST => {
|
|
instrs +=
|
|
&format!("{} ({})", arg, self.varnames.get(*arg as usize).unwrap());
|
|
}
|
|
Opcode::LOAD_CONST => {
|
|
instrs += &format!("{} ({})", arg, self.consts.get(*arg as usize).unwrap());
|
|
}
|
|
Opcode::FOR_ITER => {
|
|
instrs += &format!("{} (to {})", arg, idx + arg * 2 + 2);
|
|
}
|
|
Opcode::JUMP_FORWARD => {
|
|
instrs += &format!("{} (to {})", arg, idx + arg * 2 + 2);
|
|
}
|
|
Opcode::JUMP_ABSOLUTE => {
|
|
instrs += &format!("{} (to {})", arg, arg * 2);
|
|
}
|
|
Opcode::POP_JUMP_IF_FALSE | Opcode::POP_JUMP_IF_TRUE => {
|
|
instrs += &format!("{} (to {})", arg, arg * 2);
|
|
}
|
|
Opcode::MAKE_FUNCTION => {
|
|
let flag = match arg {
|
|
8 => "(closure)",
|
|
// TODO:
|
|
_ => "",
|
|
};
|
|
instrs += &format!("{} {}", arg, flag);
|
|
}
|
|
// Ergでは引数で型キャストする
|
|
Opcode::BINARY_ADD
|
|
| Opcode::BINARY_SUBTRACT
|
|
| Opcode::BINARY_MULTIPLY
|
|
| Opcode::BINARY_TRUE_DIVIDE => {
|
|
instrs += &format!("{} ({:?})", arg, TypePair::from(*arg));
|
|
}
|
|
other if other.take_arg() => {
|
|
instrs += &format!("{}", arg);
|
|
}
|
|
_ => {}
|
|
}
|
|
instrs.push('\n');
|
|
idx += 2;
|
|
line_offset += 2;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
instrs
|
|
}
|
|
|
|
pub fn code_info(&self) -> String {
|
|
let mut info = "".to_string();
|
|
info += &format!("Disassembly of {:?}:\n", self);
|
|
info += &self.attrs_info();
|
|
info += &self.tables_info();
|
|
info += &self.instr_info();
|
|
info.push('\n');
|
|
for cons in self.consts.iter() {
|
|
if let ValueObj::Code(c) = cons {
|
|
info += &c.code_info();
|
|
}
|
|
}
|
|
info
|
|
}
|
|
}
|