mirror of
https://github.com/erg-lang/erg.git
synced 2025-07-07 21:25:31 +00:00
173 lines
5.8 KiB
Rust
173 lines
5.8 KiB
Rust
//! オブジェクトのシリアライズ(バイナリ列化)のためのユーティリティーを定義・実装する
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
use crate::impl_display_from_debug;
|
|
use crate::python_util::PythonVersion;
|
|
use crate::Str;
|
|
|
|
/* Python bytecode specification */
|
|
// 0~3 byte: magic number
|
|
// 4~7 byte: padding (0;4)
|
|
// 8~B byte: UNIX timestamp
|
|
// C~F byte: padding (0;4)
|
|
// 10~ byte: marshalled code objects
|
|
// the unary magic number of Python bytecode
|
|
// magic number = version number (2byte) + 168624128 (0x0A0D0000)
|
|
// e.g. Python 3.7.4 b5's version number: 3394
|
|
// -> magic number: 0x0AD0D042
|
|
// -> bytes (little endian): 42 0D 0D 0A
|
|
pub const fn get_magic_num_bytes(python_ver: u32) -> [u8; 4] {
|
|
pub const PREFIX: u32 = 0xA0D0000;
|
|
(PREFIX | python_ver).to_le_bytes()
|
|
}
|
|
|
|
pub const fn get_magic_num_from_bytes(bytes: &[u8; 4]) -> u32 {
|
|
u32::from_le_bytes([bytes[0], bytes[1], 0, 0])
|
|
}
|
|
|
|
pub const fn get_ver_from_magic_num(magic_num: u32) -> PythonVersion {
|
|
match magic_num {
|
|
3360..=3379 => PythonVersion::new(3, Some(6), Some(0)),
|
|
3390..=3394 => PythonVersion::new(3, Some(7), Some(0)),
|
|
3400..=3413 => PythonVersion::new(3, Some(8), Some(0)),
|
|
3420..=3425 => PythonVersion::new(3, Some(9), Some(0)),
|
|
3430..=3439 => PythonVersion::new(3, Some(10), Some(0)), // main: 3439
|
|
3495 => PythonVersion::new(3, Some(11), Some(0)),
|
|
3531 => PythonVersion::new(3, Some(12), Some(0)),
|
|
_ => panic!("unknown magic number (unsupported Python version)"),
|
|
}
|
|
}
|
|
|
|
pub fn get_timestamp_bytes() -> [u8; 4] {
|
|
let secs = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.map(|dur| dur.as_secs() as u32)
|
|
.unwrap_or(0);
|
|
secs.to_le_bytes()
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
|
#[repr(u8)]
|
|
pub enum DataTypePrefix {
|
|
/* sized objects */
|
|
Illegal = 0,
|
|
Int32 = b'i', // 0x69
|
|
Int64 = b'I', // 0x49
|
|
Float = b'f', // 0x66 (float32, not generated anymore?)
|
|
BinFloat = b'g', // 0x67 (float64)
|
|
Complex = b'x', // 0x78
|
|
BinComplex = b'y', // 0x79
|
|
True = b'T', // 0x54
|
|
False = b'F', // 0x46
|
|
None = b'N', // 0x4E
|
|
StopIter = b'S', // 0x53
|
|
Ref = b'r',
|
|
/* unsized objects (ref counted) */
|
|
Long = b'l', // 0x6C + len: u32 + payload: 2*len+3byte (~ -2^31-1 && 2^31 ~)
|
|
Str = b's', // 0x73 + len: u32 + payload
|
|
ShortAscii = b'z' + 0x80, // 0x7A (0xFA) + len: u8 + payload
|
|
ShortAsciiInterned = b'Z' + 0x80, // 0x5A (0xDA) + len: u8 + payload
|
|
Unicode = b'u', // 0x75 + len: u32 + payload
|
|
Interned = b't', // 0x74 + len + payload
|
|
SmallTuple = b')', // 0x29 + len: u8 + payload
|
|
Tuple = b'(', // 0x28 + len: u32 + payload
|
|
Code = b'c' + 0x80, // 0x63 (0xE3)
|
|
/* Erg specific prefix */
|
|
Builtin = b'b', // 0x62 + str
|
|
Nat = b'n',
|
|
}
|
|
|
|
impl_display_from_debug!(DataTypePrefix);
|
|
|
|
impl From<u8> for DataTypePrefix {
|
|
fn from(item: u8) -> Self {
|
|
match item as char {
|
|
'i' | '\u{00E9}' => Self::Int32,
|
|
'I' => Self::Int64,
|
|
'l' => Self::Long,
|
|
'f' => Self::Float,
|
|
'g' => Self::BinFloat,
|
|
'x' => Self::Complex,
|
|
'y' => Self::BinComplex,
|
|
'T' => Self::True,
|
|
'F' => Self::False,
|
|
'N' => Self::None,
|
|
'S' => Self::StopIter,
|
|
's' | '\u{00F3}' => Self::Str,
|
|
'Z' | '\u{00DA}' => Self::ShortAsciiInterned,
|
|
'z' | '\u{00FA}' => Self::ShortAscii,
|
|
'u' => Self::Unicode,
|
|
't' => Self::Interned,
|
|
'(' | '\u{00A8}' => Self::Tuple,
|
|
')' | '\u{00A9}' => Self::SmallTuple,
|
|
'c' | '\u{00E3}' => Self::Code,
|
|
'b' => Self::Builtin,
|
|
'n' => Self::Nat,
|
|
/*'\u{00F9}' => DataTypeUnaryOp::ErgInt8,
|
|
'\u{00FA}' => DataTypeUnaryOp::ErgInt32,
|
|
'\u{00FB}' => DataTypeUnaryOp::ErgFloat32,
|
|
'\u{00FC}' => DataTypeUnaryOp::ErgStr,
|
|
'\u{00FD}' => DataTypeUnaryOp::ErgFloat,
|
|
'\u{00FE}' => DataTypeUnaryOp::HeapArray,
|
|
'\u{00FF}' => DataTypeUnaryOp::NumArray,*/
|
|
_ => Self::Illegal,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl DataTypePrefix {
|
|
pub const fn is_sized(&self) -> bool {
|
|
matches!(
|
|
self,
|
|
Self::Long
|
|
| Self::Str
|
|
| Self::ShortAscii
|
|
| Self::ShortAsciiInterned
|
|
| Self::Unicode
|
|
| Self::Interned
|
|
| Self::SmallTuple
|
|
| Self::Tuple
|
|
| Self::Code
|
|
| Self::Builtin
|
|
)
|
|
}
|
|
}
|
|
|
|
pub fn strs_into_bytes(names: Vec<Str>) -> Vec<u8> {
|
|
let mut tuple = vec![];
|
|
if names.len() > u8::MAX as usize {
|
|
tuple.push(DataTypePrefix::Tuple as u8);
|
|
tuple.append(&mut (names.len() as u32).to_le_bytes().to_vec());
|
|
} else {
|
|
tuple.push(DataTypePrefix::SmallTuple as u8);
|
|
tuple.push(names.len() as u8);
|
|
}
|
|
for name in names.into_iter() {
|
|
tuple.append(&mut str_into_bytes(name, true));
|
|
}
|
|
tuple
|
|
}
|
|
|
|
pub fn str_into_bytes(cont: Str, is_interned: bool) -> Vec<u8> {
|
|
let mut bytes = vec![];
|
|
if cont.is_ascii() && cont.len() <= u8::MAX as usize {
|
|
if is_interned {
|
|
bytes.push(DataTypePrefix::ShortAsciiInterned as u8);
|
|
} else {
|
|
bytes.push(DataTypePrefix::ShortAscii as u8);
|
|
}
|
|
bytes.push(cont.len() as u8);
|
|
} else {
|
|
bytes.push(DataTypePrefix::Unicode as u8);
|
|
bytes.append(&mut (cont.len() as u32).to_le_bytes().to_vec());
|
|
};
|
|
bytes.append(&mut cont.as_bytes().to_vec());
|
|
bytes
|
|
}
|
|
|
|
pub fn raw_string_into_bytes(mut cont: Vec<u8>) -> Vec<u8> {
|
|
let mut tuple = vec![DataTypePrefix::Str as u8];
|
|
tuple.append(&mut (cont.len() as u32).to_le_bytes().to_vec());
|
|
tuple.append(&mut cont);
|
|
tuple
|
|
}
|