fix: bytecode reader

This commit is contained in:
Shunsuke Shibayama 2023-08-24 14:17:49 +09:00
parent afed802b24
commit 9db6e5fa4d
4 changed files with 123 additions and 103 deletions

View file

@ -38,11 +38,11 @@ impl TryFrom<&str> for ErgMode {
"desugar" | "desugarer" => Ok(Self::Desugar), "desugar" | "desugarer" => Ok(Self::Desugar),
"typecheck" | "lower" | "tc" => Ok(Self::TypeCheck), "typecheck" | "lower" | "tc" => Ok(Self::TypeCheck),
"fullcheck" | "check" | "checker" => Ok(Self::FullCheck), "fullcheck" | "check" | "checker" => Ok(Self::FullCheck),
"compile" | "compiler" => Ok(Self::Compile), "comp" | "compile" | "compiler" => Ok(Self::Compile),
"transpile" | "transpiler" => Ok(Self::Transpile), "trans" | "transpile" | "transpiler" => Ok(Self::Transpile),
"run" | "execute" => Ok(Self::Execute), "run" | "execute" => Ok(Self::Execute),
"server" | "language-server" => Ok(Self::LanguageServer), "server" | "language-server" => Ok(Self::LanguageServer),
"byteread" | "read" | "reader" => Ok(Self::Read), "byteread" | "read" | "reader" | "dis" => Ok(Self::Read),
_ => Err(()), _ => Err(()),
} }
} }
@ -217,11 +217,6 @@ impl ErgConfig {
// not `for` because we need to consume the next argument // not `for` because we need to consume the next argument
while let Some(arg) = args.next() { while let Some(arg) = args.next() {
match &arg[..] { match &arg[..] {
/* Commands */
"lex" | "parse" | "desugar" | "typecheck" | "check" | "compile" | "transpile"
| "run" | "execute" | "server" | "tc" => {
cfg.mode = ErgMode::try_from(&arg[..]).unwrap();
}
/* Options */ /* Options */
"--" => { "--" => {
for arg in args { for arg in args {
@ -407,6 +402,9 @@ USAGE:
process::exit(2); process::exit(2);
} }
_ => { _ => {
if let Ok(mode) = ErgMode::try_from(&arg[..]) {
cfg.mode = mode;
} else {
let path = PathBuf::from_str(&arg[..]) let path = PathBuf::from_str(&arg[..])
.unwrap_or_else(|_| panic!("invalid file path: {arg}")); .unwrap_or_else(|_| panic!("invalid file path: {arg}"));
let path = normalize_path(path); let path = normalize_path(path);
@ -428,6 +426,7 @@ USAGE:
} }
} }
} }
}
if cfg.input.is_repl() && cfg.mode != ErgMode::LanguageServer { if cfg.input.is_repl() && cfg.mode != ErgMode::LanguageServer {
let is_stdin_piped = !stdin().is_terminal(); let is_stdin_piped = !stdin().is_terminal();
let input = if is_stdin_piped { let input = if is_stdin_piped {

View file

@ -149,7 +149,7 @@ pub fn strs_into_bytes(names: Vec<Str>) -> Vec<u8> {
pub fn str_into_bytes(cont: Str, is_interned: bool) -> Vec<u8> { pub fn str_into_bytes(cont: Str, is_interned: bool) -> Vec<u8> {
let mut bytes = vec![]; let mut bytes = vec![];
if cont.is_ascii() { if cont.is_ascii() && cont.len() <= u8::MAX as usize {
if is_interned { if is_interned {
bytes.push(DataTypePrefix::ShortAsciiInterned as u8); bytes.push(DataTypePrefix::ShortAsciiInterned as u8);
} else { } else {

View file

@ -91,6 +91,18 @@ pub enum FastKind {
Free = 0x80, Free = 0x80,
} }
impl TryFrom<u8> for FastKind {
type Error = &'static str;
fn try_from(kind: u8) -> Result<Self, Self::Error> {
match kind {
0x20 => Ok(Self::Local),
0x40 => Ok(Self::Cell),
0x80 => Ok(Self::Free),
_ => Err("invalid kind"),
}
}
}
/// Bit masks for CodeObj.flags /// Bit masks for CodeObj.flags
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)] #[repr(u32)]
@ -290,7 +302,7 @@ impl CodeObj {
} }
} }
pub fn from_pyc<P: AsRef<Path>>(path: P) -> DeserializeResult<Self> { pub fn from_pyc<P: AsRef<Path>>(path: P) -> DeserializeResult<(Self, PythonVersion)> {
let mut f = BufReader::new(File::open(path)?); let mut f = BufReader::new(File::open(path)?);
let v = &mut Vec::with_capacity(16); let v = &mut Vec::with_capacity(16);
f.read_to_end(v)?; f.read_to_end(v)?;
@ -300,10 +312,11 @@ impl CodeObj {
let _timestamp = Deserializer::deserialize_u32(v); let _timestamp = Deserializer::deserialize_u32(v);
let _padding = Deserializer::deserialize_u32(v); let _padding = Deserializer::deserialize_u32(v);
let code = Self::from_bytes(v, python_ver)?; let code = Self::from_bytes(v, python_ver)?;
Ok(code) Ok((code, python_ver))
} }
pub fn from_bytes(v: &mut Vec<u8>, python_ver: PythonVersion) -> DeserializeResult<Self> { pub fn from_bytes(v: &mut Vec<u8>, python_ver: PythonVersion) -> DeserializeResult<Self> {
assert_eq!(v.remove(0), DataTypePrefix::Code as u8, "not a code object");
let mut des = Deserializer::new(); let mut des = Deserializer::new();
let argcount = Deserializer::deserialize_u32(v); let argcount = Deserializer::deserialize_u32(v);
let posonlyargcount = if python_ver.minor >= Some(8) { let posonlyargcount = if python_ver.minor >= Some(8) {
@ -320,15 +333,16 @@ impl CodeObj {
let stacksize = Deserializer::deserialize_u32(v); let stacksize = Deserializer::deserialize_u32(v);
let flags = Deserializer::deserialize_u32(v); let flags = Deserializer::deserialize_u32(v);
let code = des.deserialize_bytes(v)?; let code = des.deserialize_bytes(v)?;
let consts = des.deserialize_const_vec(v, python_ver)?; let consts = des.deserialize_const_vec(v, python_ver, Some("consts"))?;
let names = des.deserialize_str_vec(v, python_ver)?; let names = des.deserialize_str_vec(v, python_ver, Some("names"))?;
// TODO: localplusnames let (varnames, freevars, cellvars) = des.deserialize_locals(v, python_ver)?;
let varnames = des.deserialize_str_vec(v, python_ver)?; let filename = des.deserialize_str(v, python_ver, Some("filename"))?;
let freevars = des.deserialize_str_vec(v, python_ver)?; let name = des.deserialize_str(v, python_ver, Some("name"))?;
let cellvars = des.deserialize_str_vec(v, python_ver)?; let qualname = if python_ver.minor >= Some(11) {
let filename = des.deserialize_str(v, python_ver)?; des.deserialize_str(v, python_ver, Some("qualname"))?
let name = des.deserialize_str(v, python_ver)?; } else {
let qualname = des.deserialize_str(v, python_ver)?; name.clone()
};
let firstlineno = Deserializer::deserialize_u32(v); let firstlineno = Deserializer::deserialize_u32(v);
let lnotab = des.deserialize_bytes(v)?; let lnotab = des.deserialize_bytes(v)?;
let exceptiontable = if python_ver.minor >= Some(11) { let exceptiontable = if python_ver.minor >= Some(11) {

View file

@ -11,7 +11,7 @@ use erg_common::traits::ExitStatus;
use erg_common::{fn_name, switch_lang}; use erg_common::{fn_name, switch_lang};
use erg_common::{ArcArray, Str}; use erg_common::{ArcArray, Str};
use super::codeobj::CodeObj; use super::codeobj::{CodeObj, FastKind};
use super::constructors::array_t; use super::constructors::array_t;
use super::typaram::TyParam; use super::typaram::TyParam;
use super::value::ValueObj; use super::value::ValueObj;
@ -70,22 +70,29 @@ impl DeserializeError {
) )
} }
pub fn type_error(expect: &Type, found: &Type) -> Self { pub fn type_error(field: Option<&str>, expect: &Type, found: &Type) -> Self {
let field = switch_lang!(
"japanese" => field.map(|f| format!("フィールド{f}の読み取りに失敗しました: ")),
"simplified_chinese" => field.map(|f| format!("读取字段{f}失败: ")),
"traditional_chinese" => field.map(|f| format!("讀取字段{f}失敗: ")),
"english" => field.map(|f| format!("failed to read field {f}: ")),
)
.unwrap_or("".to_string());
Self::new( Self::new(
0, 0,
fn_name!(), fn_name!(),
switch_lang!( switch_lang!(
"japanese" => format!( "japanese" => format!(
"{expect}型オブジェクトを予期しましたが、 読み込んだオブジェクトは{found}型です", "{field}{expect}型オブジェクトを予期しましたが、 読み込んだオブジェクトは{found}型です",
), ),
"simplified_chinese" => format!( "simplified_chinese" => format!(
"期望{expect}对象,但反序列化的对象是{found}", "{field}期望{expect}对象,但反序列化的对象是{found}",
), ),
"traditional_chinese" => format!( "traditional_chinese" => format!(
"期望一個{expect}對象,但反序列化的對像是{found}", "{field}期望一個{expect}對象,但反序列化的對像是{found}",
), ),
"english" => format!( "english" => format!(
"expect a {expect} object, but the deserialized object is {found}", "{field}expect a {expect} object, but the deserialized object is {found}",
), ),
), ),
) )
@ -113,8 +120,8 @@ impl Deserializer {
pub fn run(cfg: ErgConfig) -> ExitStatus { pub fn run(cfg: ErgConfig) -> ExitStatus {
let filename = cfg.input.path(); let filename = cfg.input.path();
match CodeObj::from_pyc(filename) { match CodeObj::from_pyc(filename) {
Ok(codeobj) => { Ok((codeobj, ver)) => {
println!("{}", codeobj.code_info(None)); println!("{}", codeobj.code_info(Some(ver)));
ExitStatus::OK ExitStatus::OK
} }
Err(e) => { Err(e) => {
@ -195,60 +202,8 @@ impl Deserializer {
Ok(self.get_cached_arr(&arr)) Ok(self.get_cached_arr(&arr))
} }
DataTypePrefix::Code => { DataTypePrefix::Code => {
let argcount = Self::deserialize_u32(v); v.insert(0, DataTypePrefix::Code as u8);
let posonlyargcount = if python_ver.minor >= Some(8) { Ok(ValueObj::from(CodeObj::from_bytes(v, python_ver)?))
Self::deserialize_u32(v)
} else {
0
};
let kwonlyargcount = Self::deserialize_u32(v);
let nlocals = if python_ver.minor < Some(11) {
Self::deserialize_u32(v)
} else {
0
};
let stacksize = Self::deserialize_u32(v);
let flags = Self::deserialize_u32(v);
let code = self.deserialize_bytes(v)?;
let consts = self.deserialize_const_vec(v, python_ver)?;
let names = self.deserialize_str_vec(v, python_ver)?;
let varnames = self.deserialize_str_vec(v, python_ver)?;
let freevars = self.deserialize_str_vec(v, python_ver)?;
let cellvars = self.deserialize_str_vec(v, python_ver)?;
let filename = self.deserialize_str(v, python_ver)?;
let name = self.deserialize_str(v, python_ver)?;
let qualname = if python_ver.minor >= Some(11) {
self.deserialize_str(v, python_ver)?
} else {
name.clone()
};
let firstlineno = Self::deserialize_u32(v);
let lnotab = self.deserialize_bytes(v)?;
let exceptiontable = if python_ver.minor >= Some(11) {
self.deserialize_bytes(v)?
} else {
vec![]
};
Ok(ValueObj::from(CodeObj {
argcount,
posonlyargcount,
kwonlyargcount,
nlocals,
stacksize,
flags,
code,
consts,
names,
varnames,
freevars,
cellvars,
filename,
name,
qualname,
firstlineno,
lnotab,
exceptiontable,
}))
} }
DataTypePrefix::None => Ok(ValueObj::None), DataTypePrefix::None => Ok(ValueObj::None),
other => Err(DeserializeError::new( other => Err(DeserializeError::new(
@ -268,10 +223,15 @@ impl Deserializer {
&mut self, &mut self,
v: &mut Vec<u8>, v: &mut Vec<u8>,
python_ver: PythonVersion, python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Vec<ValueObj>> { ) -> DeserializeResult<Vec<ValueObj>> {
match self.deserialize_const(v, python_ver)? { match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => Ok(arr.to_vec()), ValueObj::Array(arr) => Ok(arr.to_vec()),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())), other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
} }
} }
@ -279,10 +239,15 @@ impl Deserializer {
&mut self, &mut self,
v: &mut Vec<u8>, v: &mut Vec<u8>,
python_ver: PythonVersion, python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<ArcArray<ValueObj>> { ) -> DeserializeResult<ArcArray<ValueObj>> {
match self.deserialize_const(v, python_ver)? { match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => Ok(arr), ValueObj::Array(arr) => Ok(arr),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())), other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
} }
} }
@ -293,7 +258,11 @@ impl Deserializer {
pub fn try_into_str(&mut self, c: ValueObj) -> DeserializeResult<Str> { pub fn try_into_str(&mut self, c: ValueObj) -> DeserializeResult<Str> {
match c { match c {
ValueObj::Str(s) => Ok(s), ValueObj::Str(s) => Ok(s),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())), other => Err(DeserializeError::type_error(
None,
&Type::Str,
other.ref_t(),
)),
} }
} }
@ -301,9 +270,10 @@ impl Deserializer {
&mut self, &mut self,
v: &mut Vec<u8>, v: &mut Vec<u8>,
python_ver: PythonVersion, python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Vec<Str>> { ) -> DeserializeResult<Vec<Str>> {
match self.deserialize_const(v, python_ver)? { match self.deserialize_const(v, python_ver)? {
ValueObj::Array(arr) => { ValueObj::Array(arr) | ValueObj::Tuple(arr) => {
let mut strs = Vec::with_capacity(arr.len()); let mut strs = Vec::with_capacity(arr.len());
for c in arr.iter().cloned() { for c in arr.iter().cloned() {
strs.push(self.try_into_str(c)?); strs.push(self.try_into_str(c)?);
@ -311,20 +281,57 @@ impl Deserializer {
Ok(strs) Ok(strs)
} }
other => Err(DeserializeError::type_error( other => Err(DeserializeError::type_error(
field,
&array_t(Type::Str, TyParam::erased(Type::Nat)), &array_t(Type::Str, TyParam::erased(Type::Nat)),
other.ref_t(), &other.class(),
)), )),
} }
} }
pub fn deserialize_locals(
&mut self,
v: &mut Vec<u8>,
python_ver: PythonVersion,
) -> DeserializeResult<(Vec<Str>, Vec<Str>, Vec<Str>)> {
if python_ver.minor >= Some(11) {
let names =
self.deserialize_str_vec(v, python_ver, Some("varnames, freevars, cellvars"))?;
let kinds = self.deserialize_bytes(v)?;
assert_eq!(names.len(), kinds.len());
// partition
let mut varnames = vec![];
let mut freevars = vec![];
let mut cellvars = vec![];
for (name, kind) in names.into_iter().zip(kinds.into_iter()) {
match FastKind::try_from(kind) {
Ok(FastKind::Local) => varnames.push(name),
Ok(FastKind::Free) => freevars.push(name),
Ok(FastKind::Cell) => cellvars.push(name),
_ => unreachable!(),
}
}
Ok((varnames, freevars, cellvars))
} else {
let varnames = self.deserialize_str_vec(v, python_ver, Some("varnames"))?;
let freevars = self.deserialize_str_vec(v, python_ver, Some("freevars"))?;
let cellvars = self.deserialize_str_vec(v, python_ver, Some("cellvars"))?;
Ok((varnames, freevars, cellvars))
}
}
pub fn deserialize_str( pub fn deserialize_str(
&mut self, &mut self,
v: &mut Vec<u8>, v: &mut Vec<u8>,
python_ver: PythonVersion, python_ver: PythonVersion,
field: Option<&str>,
) -> DeserializeResult<Str> { ) -> DeserializeResult<Str> {
match self.deserialize_const(v, python_ver)? { match self.deserialize_const(v, python_ver)? {
ValueObj::Str(s) => Ok(s), ValueObj::Str(s) => Ok(s),
other => Err(DeserializeError::type_error(&Type::Str, other.ref_t())), other => Err(DeserializeError::type_error(
field,
&Type::Str,
other.ref_t(),
)),
} }
} }