erg/crates/erg_common/serialize.rs
2025-01-21 14:52:52 +09:00

173 lines
5.8 KiB
Rust

//! オブジェクトのシリアライズ(バイナリ列化)のためのユーティリティーを定義・実装する
use std::time::{SystemTime, UNIX_EPOCH};
use crate::impl_display_from_debug;
use crate::python_util::PythonVersion;
use crate::Str;
/* Python bytecode specification */
// 0~3 byte: magic number
// 4~7 byte: padding (0;4)
// 8~B byte: UNIX timestamp
// C~F byte: padding (0;4)
// 10~ byte: marshalled code objects
// the unary magic number of Python bytecode
// magic number = version number (2byte) + 168624128 (0x0A0D0000)
// e.g. Python 3.7.4 b5's version number: 3394
// -> magic number: 0x0AD0D042
// -> bytes (little endian): 42 0D 0D 0A
pub const fn get_magic_num_bytes(python_ver: u32) -> [u8; 4] {
pub const PREFIX: u32 = 0xA0D0000;
(PREFIX | python_ver).to_le_bytes()
}
pub const fn get_magic_num_from_bytes(bytes: &[u8; 4]) -> u32 {
u32::from_le_bytes([bytes[0], bytes[1], 0, 0])
}
pub const fn get_ver_from_magic_num(magic_num: u32) -> PythonVersion {
match magic_num {
3360..=3379 => PythonVersion::new(3, Some(6), Some(0)),
3390..=3394 => PythonVersion::new(3, Some(7), Some(0)),
3400..=3413 => PythonVersion::new(3, Some(8), Some(0)),
3420..=3425 => PythonVersion::new(3, Some(9), Some(0)),
3430..=3439 => PythonVersion::new(3, Some(10), Some(0)), // main: 3439
3495 => PythonVersion::new(3, Some(11), Some(0)),
3531 => PythonVersion::new(3, Some(12), Some(0)),
_ => panic!("unknown magic number (unsupported Python version)"),
}
}
pub fn get_timestamp_bytes() -> [u8; 4] {
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|dur| dur.as_secs() as u32)
.unwrap_or(0);
secs.to_le_bytes()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum DataTypePrefix {
/* sized objects */
Illegal = 0,
Int32 = b'i', // 0x69
Int64 = b'I', // 0x49
Float = b'f', // 0x66 (float32, not generated anymore?)
BinFloat = b'g', // 0x67 (float64)
Complex = b'x', // 0x78
BinComplex = b'y', // 0x79
True = b'T', // 0x54
False = b'F', // 0x46
None = b'N', // 0x4E
StopIter = b'S', // 0x53
Ref = b'r',
/* unsized objects (ref counted) */
Long = b'l', // 0x6C + len: u32 + payload: 2*len+3byte (~ -2^31-1 && 2^31 ~)
Str = b's', // 0x73 + len: u32 + payload
ShortAscii = b'z' + 0x80, // 0x7A (0xFA) + len: u8 + payload
ShortAsciiInterned = b'Z' + 0x80, // 0x5A (0xDA) + len: u8 + payload
Unicode = b'u', // 0x75 + len: u32 + payload
Interned = b't', // 0x74 + len + payload
SmallTuple = b')', // 0x29 + len: u8 + payload
Tuple = b'(', // 0x28 + len: u32 + payload
Code = b'c' + 0x80, // 0x63 (0xE3)
/* Erg specific prefix */
Builtin = b'b', // 0x62 + str
Nat = b'n',
}
impl_display_from_debug!(DataTypePrefix);
impl From<u8> for DataTypePrefix {
fn from(item: u8) -> Self {
match item as char {
'i' | '\u{00E9}' => Self::Int32,
'I' => Self::Int64,
'l' => Self::Long,
'f' => Self::Float,
'g' => Self::BinFloat,
'x' => Self::Complex,
'y' => Self::BinComplex,
'T' => Self::True,
'F' => Self::False,
'N' => Self::None,
'S' => Self::StopIter,
's' | '\u{00F3}' => Self::Str,
'Z' | '\u{00DA}' => Self::ShortAsciiInterned,
'z' | '\u{00FA}' => Self::ShortAscii,
'u' => Self::Unicode,
't' => Self::Interned,
'(' | '\u{00A8}' => Self::Tuple,
')' | '\u{00A9}' => Self::SmallTuple,
'c' | '\u{00E3}' => Self::Code,
'b' => Self::Builtin,
'n' => Self::Nat,
/*'\u{00F9}' => DataTypeUnaryOp::ErgInt8,
'\u{00FA}' => DataTypeUnaryOp::ErgInt32,
'\u{00FB}' => DataTypeUnaryOp::ErgFloat32,
'\u{00FC}' => DataTypeUnaryOp::ErgStr,
'\u{00FD}' => DataTypeUnaryOp::ErgFloat,
'\u{00FE}' => DataTypeUnaryOp::HeapArray,
'\u{00FF}' => DataTypeUnaryOp::NumArray,*/
_ => Self::Illegal,
}
}
}
impl DataTypePrefix {
pub const fn is_sized(&self) -> bool {
matches!(
self,
Self::Long
| Self::Str
| Self::ShortAscii
| Self::ShortAsciiInterned
| Self::Unicode
| Self::Interned
| Self::SmallTuple
| Self::Tuple
| Self::Code
| Self::Builtin
)
}
}
pub fn strs_into_bytes(names: Vec<Str>) -> Vec<u8> {
let mut tuple = vec![];
if names.len() > u8::MAX as usize {
tuple.push(DataTypePrefix::Tuple as u8);
tuple.append(&mut (names.len() as u32).to_le_bytes().to_vec());
} else {
tuple.push(DataTypePrefix::SmallTuple as u8);
tuple.push(names.len() as u8);
}
for name in names.into_iter() {
tuple.append(&mut str_into_bytes(name, true));
}
tuple
}
pub fn str_into_bytes(cont: Str, is_interned: bool) -> Vec<u8> {
let mut bytes = vec![];
if cont.is_ascii() && cont.len() <= u8::MAX as usize {
if is_interned {
bytes.push(DataTypePrefix::ShortAsciiInterned as u8);
} else {
bytes.push(DataTypePrefix::ShortAscii as u8);
}
bytes.push(cont.len() as u8);
} else {
bytes.push(DataTypePrefix::Unicode as u8);
bytes.append(&mut (cont.len() as u32).to_le_bytes().to_vec());
};
bytes.append(&mut cont.as_bytes().to_vec());
bytes
}
pub fn raw_string_into_bytes(mut cont: Vec<u8>) -> Vec<u8> {
let mut tuple = vec![DataTypePrefix::Str as u8];
tuple.append(&mut (cont.len() as u32).to_le_bytes().to_vec());
tuple.append(&mut cont);
tuple
}