This commit is contained in:
Shunsuke Shibayama 2022-11-03 23:44:38 +09:00
parent a95568ee48
commit fd60746f6a
6 changed files with 211 additions and 98 deletions

View file

@ -64,9 +64,6 @@ pub enum CommonOpcode {
DELETE_FAST = 126,
RAISE_VARARGS = 130,
MAKE_FUNCTION = 132,
LOAD_CLOSURE = 135,
LOAD_DEREF = 136,
STORE_DEREF = 137,
CALL_FUNCTION_EX = 142,
EXTENDED_ARG = 144,
LOAD_METHOD = 160,
@ -131,9 +128,6 @@ impl TryFrom<u8> for CommonOpcode {
126 => DELETE_FAST,
130 => RAISE_VARARGS,
132 => MAKE_FUNCTION,
135 => LOAD_CLOSURE,
136 => LOAD_DEREF,
137 => STORE_DEREF,
142 => CALL_FUNCTION_EX,
144 => EXTENDED_ARG,
160 => LOAD_METHOD,

View file

@ -82,9 +82,10 @@ impl_u8_enum! {Opcode311;
RAISE_VARARGS = 130,
CALL_FUNCTION = 131,
MAKE_FUNCTION = 132,
LOAD_CLOSURE = 135,
LOAD_DEREF = 136,
STORE_DEREF = 137,
MAKE_CELL = 135,
LOAD_CLOSURE = 136,
LOAD_DEREF = 137,
STORE_DEREF = 138,
JUMP_BACKWARD = 140,
CALL_FUNCTION_EX = 142,
EXTENDED_ARG = 144,

View file

@ -480,6 +480,7 @@ impl CodeGenerator {
}
Some(StoreLoadKind::Deref) => {
self.mut_cur_block_codeobj().freevars.push(name.clone());
// TODO: in 3.11 freevars are unified with varnames
// cellvarsのpushはrec_search()で行われる
Name::deref(self.cur_block_codeobj().freevars.len() - 1)
}
@ -509,26 +510,53 @@ impl CodeGenerator {
Name::local(self.cur_block_codeobj().names.len() - 1)
}
fn select_load_instr(&self, kind: StoreLoadKind, acc_kind: AccessKind) -> u8 {
match kind {
StoreLoadKind::Fast | StoreLoadKind::FastConst => LOAD_FAST as u8,
StoreLoadKind::Global | StoreLoadKind::GlobalConst => LOAD_NAME as u8, //LOAD_GLOBAL as u8,
StoreLoadKind::Deref | StoreLoadKind::DerefConst =>
if self.py_version.minor >= Some(11) { Opcode311::LOAD_DEREF as u8 }
else { Opcode310::LOAD_DEREF as u8 },
StoreLoadKind::Local | StoreLoadKind::LocalConst => {
match acc_kind {
Name => LOAD_NAME as u8,
Attr => LOAD_ATTR as u8,
Method => LOAD_METHOD as u8,
}
}
}
}
fn select_store_instr(&self, kind: StoreLoadKind, acc_kind: AccessKind) -> u8 {
match kind {
StoreLoadKind::Fast => STORE_FAST as u8,
StoreLoadKind::FastConst => STORE_FAST as u8, // ERG_STORE_FAST_IMMUT,
// NOTE: First-time variables are treated as GLOBAL, but they are always first-time variables when assigned, so they are just NAME
// NOTE: 初見の変数はGLOBAL扱いになるが、代入時は必ず初見であるので単なるNAME
StoreLoadKind::Global | StoreLoadKind::GlobalConst => STORE_NAME as u8,
StoreLoadKind::Deref | StoreLoadKind::DerefConst =>
if self.py_version.minor >= Some(11) { Opcode311::STORE_DEREF as u8 }
else { Opcode310::STORE_DEREF as u8 },
StoreLoadKind::Local | StoreLoadKind::LocalConst => {
match acc_kind {
Name => STORE_NAME as u8,
Attr => STORE_ATTR as u8,
// cannot overwrite methods directly
Method => STORE_ATTR as u8,
}
}
}
}
fn emit_load_name_instr(&mut self, ident: Identifier) {
log!(info "entered {}({ident})", fn_name!());
let escaped = escape_name(ident);
let name = self
.local_search(&escaped, Name)
.unwrap_or_else(|| self.register_name(escaped));
let instr = match name.kind {
StoreLoadKind::Fast | StoreLoadKind::FastConst => LOAD_FAST,
StoreLoadKind::Global | StoreLoadKind::GlobalConst => {
if self.py_version.minor >= Some(11) {
LOAD_NAME
} else {
LOAD_GLOBAL
}
}
StoreLoadKind::Deref | StoreLoadKind::DerefConst => LOAD_DEREF,
StoreLoadKind::Local | StoreLoadKind::LocalConst => LOAD_NAME,
};
// let null_idx = self.cur_block_codeobj().code.len() - 2;
/*if instr == LOAD_GLOBAL
let instr = self.select_load_instr(name.kind, Name);
/*let null_idx = self.cur_block_codeobj().code.len() - 2;
if instr == LOAD_GLOBAL
&& self.cur_block_codeobj().code.get(null_idx) == Some(&(Opcode311::PUSH_NULL as u8))
{
self.mut_cur_block_codeobj().code.pop();
@ -538,7 +566,7 @@ impl CodeGenerator {
self.write_instr(instr);
self.write_arg(name.idx);
self.stack_inc();
if instr == LOAD_GLOBAL && self.py_version.minor >= Some(11) {
if instr == LOAD_GLOBAL as u8 && self.py_version.minor >= Some(11) {
self.write_bytes(&[0; 2]);
self.write_bytes(&[0; 8]);
}
@ -627,12 +655,7 @@ impl CodeGenerator {
let name = self
.local_search(&escaped, Attr)
.unwrap_or_else(|| self.register_attr(escaped));
let instr = match name.kind {
StoreLoadKind::Fast | StoreLoadKind::FastConst => LOAD_FAST,
StoreLoadKind::Global | StoreLoadKind::GlobalConst => LOAD_GLOBAL,
StoreLoadKind::Deref | StoreLoadKind::DerefConst => LOAD_DEREF,
StoreLoadKind::Local | StoreLoadKind::LocalConst => LOAD_ATTR,
};
let instr = self.select_load_instr(name.kind, Attr);
self.write_instr(instr);
self.write_arg(name.idx);
if self.py_version.minor >= Some(11) {
@ -649,12 +672,7 @@ impl CodeGenerator {
let name = self
.local_search(&escaped, Method)
.unwrap_or_else(|| self.register_method(escaped));
let instr = match name.kind {
StoreLoadKind::Fast | StoreLoadKind::FastConst => LOAD_FAST,
StoreLoadKind::Global | StoreLoadKind::GlobalConst => LOAD_GLOBAL,
StoreLoadKind::Deref | StoreLoadKind::DerefConst => LOAD_DEREF,
StoreLoadKind::Local | StoreLoadKind::LocalConst => LOAD_METHOD,
};
let instr = self.select_load_instr(name.kind, Method);
self.write_instr(instr);
self.write_arg(name.idx);
if self.py_version.minor >= Some(11) {
@ -673,26 +691,14 @@ impl CodeGenerator {
self.register_attr(escaped)
}
});
let instr = match name.kind {
StoreLoadKind::Fast => STORE_FAST,
StoreLoadKind::FastConst => STORE_FAST, // ERG_STORE_FAST_IMMUT,
// NOTE: First-time variables are treated as GLOBAL, but they are always first-time variables when assigned, so they are just NAME
// NOTE: 初見の変数はGLOBAL扱いになるが、代入時は必ず初見であるので単なるNAME
StoreLoadKind::Global | StoreLoadKind::GlobalConst => STORE_NAME,
StoreLoadKind::Deref | StoreLoadKind::DerefConst => STORE_DEREF,
StoreLoadKind::Local | StoreLoadKind::LocalConst => {
match acc_kind {
AccessKind::Name => STORE_NAME,
AccessKind::Attr => STORE_ATTR,
// cannot overwrite methods directly
AccessKind::Method => STORE_ATTR,
}
}
};
let instr = self.select_store_instr(name.kind, acc_kind);
self.write_instr(instr);
self.write_arg(name.idx);
self.stack_dec();
if instr == STORE_ATTR {
if instr == STORE_ATTR as u8 {
if self.py_version.minor >= Some(11) {
self.write_bytes(&[0; 8]);
}
self.stack_dec();
}
}
@ -1097,16 +1103,7 @@ impl CodeGenerator {
}
let code = self.emit_block(body.block, Some(name.clone()), params);
// code.flags += CodeObjFlags::Optimized as u32;
if !self.cur_block_codeobj().cellvars.is_empty() {
let cellvars_len = self.cur_block_codeobj().cellvars.len();
for i in 0..cellvars_len {
self.write_instr(LOAD_CLOSURE);
self.write_arg(i);
}
self.write_instr(BUILD_TUPLE);
self.write_arg(cellvars_len);
make_function_flag += 8;
}
self.register_cellvars(&mut make_function_flag);
self.emit_load_const(code);
if self.py_version.minor < Some(11) {
if let Some(class) = class_name {
@ -1144,16 +1141,7 @@ impl CodeGenerator {
make_function_flag += MakeFunctionFlags::Defaults as usize;
}
let code = self.emit_block(lambda.body, Some("<lambda>".into()), params);
if !self.cur_block_codeobj().cellvars.is_empty() {
let cellvars_len = self.cur_block_codeobj().cellvars.len();
for i in 0..cellvars_len {
self.write_instr(LOAD_CLOSURE);
self.write_arg(i);
}
self.write_instr(BUILD_TUPLE);
self.write_arg(cellvars_len);
make_function_flag += MakeFunctionFlags::Closure as usize;
}
self.register_cellvars(&mut make_function_flag);
self.emit_load_const(code);
if self.py_version.minor < Some(11) {
self.emit_load_const("<lambda>");
@ -1169,6 +1157,25 @@ impl CodeGenerator {
}
}
fn register_cellvars(&mut self, flag: &mut usize) {
if !self.cur_block_codeobj().cellvars.is_empty() {
let cellvars_len = self.cur_block_codeobj().cellvars.len();
for i in 0..cellvars_len {
if self.py_version.minor >= Some(11) {
self.write_instr(Opcode311::MAKE_CELL);
self.write_arg(i);
self.write_instr(Opcode311::LOAD_CLOSURE);
} else {
self.write_instr(Opcode310::LOAD_CLOSURE);
}
self.write_arg(i);
}
self.write_instr(BUILD_TUPLE);
self.write_arg(cellvars_len);
*flag += MakeFunctionFlags::Closure as usize;
}
}
fn emit_unaryop(&mut self, unary: UnaryOp) {
log!(info "entered {} ({unary})", fn_name!());
let tycode = TypeCode::from(unary.lhs_t());
@ -1658,7 +1665,53 @@ impl CodeGenerator {
pop_jump_points
}
fn emit_with_instr_3_10(&mut self, args: Args) {
fn emit_with_instr_311(&mut self, args: Args) {
log!(info "entered {}", fn_name!());
let mut args = args;
let expr = args.remove(0);
let lambda = enum_unwrap!(args.remove(0), Expr::Lambda);
let params = self.gen_param_names(&lambda.params);
self.emit_expr(expr);
self.write_instr(Opcode311::BEFORE_WITH);
self.write_arg(0);
// push __exit__, __enter__() to the stack
self.stack_inc_n(2);
let lambda_line = lambda.body.last().unwrap().ln_begin().unwrap_or(0);
self.emit_with_block(lambda.body, params);
let stash = Identifier::private_with_line(Str::from(fresh_varname()), lambda_line);
self.emit_store_instr(stash.clone(), Name);
self.emit_load_const(ValueObj::None);
self.emit_load_const(ValueObj::None);
self.emit_load_const(ValueObj::None);
self.emit_precall_and_call(2);
self.emit_pop_top();
let idx_jump_forward = self.lasti();
self.write_instr(Opcode311::JUMP_FORWARD);
self.write_arg(0);
self.write_instr(Opcode311::PUSH_EXC_INFO);
self.write_arg(0);
self.write_instr(Opcode308::WITH_EXCEPT_START);
self.write_arg(0);
self.write_instr(Opcode311::POP_JUMP_FORWARD_IF_TRUE);
self.write_arg(4);
self.write_instr(Opcode311::RERAISE);
self.write_arg(0);
self.write_instr(Opcode311::COPY);
self.write_arg(3);
self.write_instr(Opcode311::POP_EXCEPT);
self.write_arg(0);
self.write_instr(Opcode311::RERAISE);
self.write_arg(1);
self.emit_pop_top();
self.write_instr(Opcode311::POP_EXCEPT);
self.write_arg(0);
self.emit_pop_top();
self.emit_pop_top();
self.calc_edit_jump(idx_jump_forward + 1, self.lasti() - idx_jump_forward - 2);
self.emit_load_name_instr(stash);
}
fn emit_with_instr_310(&mut self, args: Args) {
log!(info "entered {}", fn_name!());
let mut args = args;
let expr = args.remove(0);
@ -1712,7 +1765,7 @@ impl CodeGenerator {
self.emit_load_name_instr(stash);
}
fn emit_with_instr_3_8(&mut self, args: Args) {
fn emit_with_instr_308(&mut self, args: Args) {
log!(info "entered {}", fn_name!());
let mut args = args;
let expr = args.remove(0);
@ -1775,10 +1828,11 @@ impl CodeGenerator {
"if" | "if!" => self.emit_if_instr(args),
"match" | "match!" => self.emit_match_instr(args, true),
"with!" => {
if self.py_version.minor_is(3, 8) {
self.emit_with_instr_3_8(args)
} else {
self.emit_with_instr_3_10(args)
match self.py_version.minor {
Some(11) => self.emit_with_instr_311(args),
Some(10) => self.emit_with_instr_310(args),
Some(8) => self.emit_with_instr_308(args),
_ => todo!(),
}
}
// "pyimport" | "py" |
@ -1894,6 +1948,7 @@ impl CodeGenerator {
log!(info "entered {}", fn_name!());
method_name.dot = None;
method_name.vi.py_name = Some(Str::ever(func_name));
self.emit_push_null();
self.emit_load_name_instr(method_name);
args.insert_pos(0, PosArg::new(obj));
self.emit_args_311(args, Name);
@ -2376,10 +2431,16 @@ impl CodeGenerator {
&name,
firstlineno,
));
if self.py_version.minor >= Some(11) {
let idx_copy_free_vars = if self.py_version.minor >= Some(11) {
let idx_copy_free_vars = self.lasti();
self.write_instr(Opcode311::COPY_FREE_VARS);
self.write_arg(0);
self.write_instr(Opcode311::RESUME);
self.write_arg(0);
}
idx_copy_free_vars
} else {
0
};
let init_stack_len = self.stack_len();
for expr in block.into_iter() {
self.emit_expr(expr);
@ -2412,6 +2473,12 @@ impl CodeGenerator {
if !self.cur_block_codeobj().varnames.is_empty() {
self.mut_cur_block_codeobj().flags += CodeObjFlags::NewLocals as u32;
}
let freevars_len = self.cur_block_codeobj().freevars.len();
if freevars_len > 0 {
self.edit_code(idx_copy_free_vars + 1, freevars_len);
} else if self.py_version.minor >= Some(11) {
self.edit_code(idx_copy_free_vars, CommonOpcode::NOP as usize);
}
// end of flagging
let unit = self.units.pop().unwrap();
// increase lineno

View file

@ -487,6 +487,22 @@ impl CodeObj {
self.dump_additional_info(op, arg, idx, instrs);
}
match op308 {
Opcode308::STORE_DEREF | Opcode308::LOAD_DEREF => {
write!(
instrs,
"{arg} ({})",
self.freevars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode308::LOAD_CLOSURE => {
write!(
instrs,
"{arg} ({})",
self.cellvars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode308::JUMP_ABSOLUTE => {
write!(instrs, "{arg} (to {})", *arg as usize * 2).unwrap();
}
@ -513,6 +529,22 @@ impl CodeObj {
self.dump_additional_info(op, arg, idx, instrs);
}
match op310 {
Opcode310::STORE_DEREF | Opcode310::LOAD_DEREF => {
write!(
instrs,
"{arg} ({})",
self.freevars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode310::LOAD_CLOSURE => {
write!(
instrs,
"{arg} ({})",
self.cellvars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode310::JUMP_ABSOLUTE => {
write!(instrs, "{arg} (to {})", *arg as usize * 2).unwrap();
}
@ -541,6 +573,22 @@ impl CodeObj {
self.dump_additional_info(op, arg, idx, instrs);
}
match op311 {
Opcode311::STORE_DEREF | Opcode311::LOAD_DEREF => {
write!(
instrs,
"{arg} ({})",
self.freevars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode311::MAKE_CELL | Opcode311::LOAD_CLOSURE => {
write!(
instrs,
"{arg} ({})",
self.cellvars.get(*arg as usize).unwrap()
)
.unwrap();
}
Opcode311::POP_JUMP_FORWARD_IF_FALSE
| Opcode311::POP_JUMP_FORWARD_IF_TRUE => {
write!(instrs, "{arg} (to {})", idx + *arg as usize * 2 + 2).unwrap();
@ -549,7 +597,8 @@ impl CodeObj {
write!(instrs, "{arg} (to {})", idx - *arg as usize * 2 + 2).unwrap();
}
Opcode311::PRECALL | Opcode311::CALL
| Opcode311::COPY | Opcode311::SWAP => {
| Opcode311::COPY | Opcode311::SWAP
| Opcode311::COPY_FREE_VARS => {
write!(instrs, "{arg}").unwrap();
}
Opcode311::KW_NAMES => {
@ -598,22 +647,6 @@ impl CodeObj {
)
.unwrap();
}
CommonOpcode::STORE_DEREF | CommonOpcode::LOAD_DEREF => {
write!(
instrs,
"{arg} ({})",
self.freevars.get(*arg as usize).unwrap()
)
.unwrap();
}
CommonOpcode::LOAD_CLOSURE => {
write!(
instrs,
"{arg} ({})",
self.cellvars.get(*arg as usize).unwrap()
)
.unwrap();
}
CommonOpcode::STORE_FAST | CommonOpcode::LOAD_FAST => {
write!(
instrs,

View file

@ -4,6 +4,15 @@ Python bytecode variable manipulation commands are accessed through namei (name
One instruction is 2 bytes, and the instruction and arguments are stored in little endian.
Instructions that do not take arguments also use 2 bytes (the argument part is 0).
* Change in 3.11: Instructions are no longer fixed length and some instructions exceed 2 bytes. The extra byte sequence is zero in most cases, and its purpose is unknown, but it is thought to be an optimization option. The known irregular byte length instructions are as follows.
* `PRECALL` (4 bytes)
* `CALL` (10 byte)
* `BINARY_OP` (4 byte)
* `STORE_ATTR` (10 byte)
* `COMPARE_OP` (6 byte)
* `LOAD_GLOBAL` (12 byte)
* `LOAD_ATTR` (10 byte)
## STORE_NAME(namei)
```python

View file

@ -6,6 +6,15 @@ Python bytecodeの変数操作系の命令はnamei (name index)を通してア
1命令は2byteで、命令、引数がlittle endianで格納されている。
引数を取らない命令も2byte使っている(引数部は0)。
* 3.11での変更: 命令は固定長ではなくなり、一部の命令が2バイトを超えることがある。余計に入ったバイト列は殆どの場合が0であり、その目的は不明だが、最適化オプションが入るのではないかと考えられる。判明している変則バイト長命令は以下の通り。
* `PRECALL` (4 byte)
* `CALL` (10 byte)
* `BINARY_OP` (4 byte)
* `STORE_ATTR` (10 byte)
* `COMPARE_OP` (6 byte)
* `LOAD_GLOBAL` (12 byte)
* `LOAD_ATTR` (10 byte)
## STORE_NAME(namei)
```python