Have a block-based IR for compiling, calculate max stack size

This commit is contained in:
Noah 2020-12-16 15:29:56 -06:00
parent db041c0f6c
commit 596e338c51
3 changed files with 440 additions and 248 deletions

View file

@ -6,6 +6,7 @@
//! https://github.com/micropython/micropython/blob/master/py/compile.c //! https://github.com/micropython/micropython/blob/master/py/compile.c
use crate::error::{CompileError, CompileErrorType}; use crate::error::{CompileError, CompileErrorType};
use crate::ir::{self, CodeInfo};
pub use crate::mode::Mode; pub use crate::mode::Mode;
use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolScope, SymbolTable}; use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolScope, SymbolTable};
use indexmap::IndexSet; use indexmap::IndexSet;
@ -13,78 +14,10 @@ use itertools::Itertools;
use num_complex::Complex64; use num_complex::Complex64;
use num_traits::ToPrimitive; use num_traits::ToPrimitive;
use rustpython_ast as ast; use rustpython_ast as ast;
use rustpython_bytecode::{self as bytecode, CodeObject, ConstantData, Instruction, Label}; use rustpython_bytecode::{self as bytecode, CodeObject, ConstantData, Instruction};
type CompileResult<T> = Result<T, CompileError>; type CompileResult<T> = Result<T, CompileError>;
struct CodeInfo {
code: CodeObject,
instructions: Vec<Instruction>,
locations: Vec<bytecode::Location>,
constants: Vec<ConstantData>,
name_cache: IndexSet<String>,
varname_cache: IndexSet<String>,
cellvar_cache: IndexSet<String>,
freevar_cache: IndexSet<String>,
label_map: Vec<Label>,
}
impl CodeInfo {
fn finalize_code(self) -> CodeObject {
let CodeInfo {
mut code,
instructions,
locations,
constants,
name_cache,
varname_cache,
cellvar_cache,
freevar_cache,
label_map,
} = self;
code.instructions = instructions.into();
code.locations = locations.into();
code.constants = constants.into();
code.names = name_cache.into_iter().collect();
code.varnames = varname_cache.into_iter().collect();
code.cellvars = cellvar_cache.into_iter().collect();
code.freevars = freevar_cache.into_iter().collect();
if !code.cellvars.is_empty() {
let total_args = code.arg_count
+ code.kwonlyarg_count
+ code.flags.contains(bytecode::CodeFlags::HAS_VARARGS) as usize
+ code.flags.contains(bytecode::CodeFlags::HAS_VARKEYWORDS) as usize;
let all_args = &code.varnames[..total_args];
let mut found_cellarg = false;
let cell2arg = code
.cellvars
.iter()
.map(|var| {
all_args.iter().position(|arg| var == arg).map_or(-1, |i| {
found_cellarg = true;
i as isize
})
})
.collect::<Box<[_]>>();
if found_cellarg {
code.cell2arg = Some(cell2arg);
}
}
for instruction in &mut *code.instructions {
// this is a little bit hacky, as until now the data stored inside Labels in
// Instructions is just bookkeeping, but I think it's the best way to do this
if let Some(l) = instruction.label_arg_mut() {
let real_label = label_map[l.0 as usize];
debug_assert!(real_label.0 != u32::MAX, "label wasn't set");
*l = real_label;
}
}
code
}
}
enum NameUsage { enum NameUsage {
Load, Load,
Store, Store,
@ -117,7 +50,7 @@ impl Default for CompileOpts {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
struct CompileContext { struct CompileContext {
loop_data: Option<(Label, Label)>, loop_data: Option<(ir::BlockIdx, ir::BlockIdx)>,
in_class: bool, in_class: bool,
func: FunctionContext, func: FunctionContext,
} }
@ -130,9 +63,6 @@ enum FunctionContext {
} }
impl CompileContext { impl CompileContext {
fn in_loop(self) -> bool {
self.loop_data.is_some()
}
fn in_func(self) -> bool { fn in_func(self) -> bool {
self.func != FunctionContext::NoFunction self.func != FunctionContext::NoFunction
} }
@ -199,23 +129,21 @@ pub fn compile_program_single(
impl Compiler { impl Compiler {
fn new(opts: CompileOpts, source_path: String, code_name: String) -> Self { fn new(opts: CompileOpts, source_path: String, code_name: String) -> Self {
let module_code = CodeInfo { let module_code = CodeInfo {
code: CodeObject::new( flags: bytecode::CodeFlags::NEW_LOCALS,
bytecode::CodeFlags::NEW_LOCALS, posonlyarg_count: 0,
0, arg_count: 0,
0, kwonlyarg_count: 0,
0, source_path: source_path.clone(),
source_path.clone(), first_line_number: 0,
0, obj_name: code_name,
code_name,
), blocks: vec![ir::Block::default()],
instructions: Vec::new(), block_order: vec![bytecode::Label(0)],
locations: Vec::new(),
constants: Vec::new(), constants: Vec::new(),
name_cache: IndexSet::new(), name_cache: IndexSet::new(),
varname_cache: IndexSet::new(), varname_cache: IndexSet::new(),
cellvar_cache: IndexSet::new(), cellvar_cache: IndexSet::new(),
freevar_cache: IndexSet::new(), freevar_cache: IndexSet::new(),
label_map: Vec::new(),
}; };
Compiler { Compiler {
code_stack: vec![module_code], code_stack: vec![module_code],
@ -244,7 +172,16 @@ impl Compiler {
} }
} }
fn push_output(&mut self, code: CodeObject) { fn push_output(
&mut self,
flags: bytecode::CodeFlags,
posonlyarg_count: usize,
arg_count: usize,
kwonlyarg_count: usize,
source_path: String,
first_line_number: usize,
obj_name: String,
) {
let table = self let table = self
.symbol_table_stack .symbol_table_stack
.last_mut() .last_mut()
@ -268,15 +205,21 @@ impl Compiler {
self.symbol_table_stack.push(table); self.symbol_table_stack.push(table);
let info = CodeInfo { let info = CodeInfo {
code, flags,
instructions: Vec::new(), posonlyarg_count,
locations: Vec::new(), arg_count,
kwonlyarg_count,
source_path,
first_line_number,
obj_name,
blocks: vec![ir::Block::default()],
block_order: vec![bytecode::Label(0)],
constants: Vec::new(), constants: Vec::new(),
name_cache: IndexSet::new(), name_cache: IndexSet::new(),
varname_cache: IndexSet::new(), varname_cache: IndexSet::new(),
cellvar_cache, cellvar_cache,
freevar_cache, freevar_cache,
label_map: Vec::new(),
}; };
self.code_stack.push(info); self.code_stack.push(info);
} }
@ -284,7 +227,10 @@ impl Compiler {
fn pop_code_object(&mut self) -> CodeObject { fn pop_code_object(&mut self) -> CodeObject {
let table = self.symbol_table_stack.pop().unwrap(); let table = self.symbol_table_stack.pop().unwrap();
assert!(table.sub_tables.is_empty()); assert!(table.sub_tables.is_empty());
self.code_stack.pop().unwrap().finalize_code() self.code_stack
.pop()
.unwrap()
.finalize_code(self.opts.optimize)
} }
// could take impl Into<Cow<str>>, but everything is borrowed from ast structs; we never // could take impl Into<Cow<str>>, but everything is borrowed from ast structs; we never
@ -581,27 +527,28 @@ impl Compiler {
// Handled during symbol table construction. // Handled during symbol table construction.
} }
If { test, body, orelse } => { If { test, body, orelse } => {
let end_label = self.new_label(); let after_block = self.new_block();
match orelse { match orelse {
None => { None => {
// Only if: // Only if:
self.compile_jump_if(test, false, end_label)?; self.compile_jump_if(test, false, after_block)?;
self.compile_statements(body)?; self.compile_statements(body)?;
self.set_label(end_label);
} }
Some(statements) => { Some(statements) => {
// if - else: // if - else:
let else_label = self.new_label(); let else_block = self.new_block();
self.compile_jump_if(test, false, else_label)?; self.compile_jump_if(test, false, else_block)?;
self.compile_statements(body)?; self.compile_statements(body)?;
self.emit(Instruction::Jump { target: end_label }); self.emit(Instruction::Jump {
target: after_block,
});
// else: // else:
self.set_label(else_label); self.switch_to_block(else_block);
self.compile_statements(statements)?; self.compile_statements(statements)?;
} }
} }
self.set_label(end_label); self.switch_to_block(after_block);
} }
While { test, body, orelse } => self.compile_while(test, body, orelse)?, While { test, body, orelse } => self.compile_while(test, body, orelse)?,
With { With {
@ -611,10 +558,10 @@ impl Compiler {
} => { } => {
let is_async = *is_async; let is_async = *is_async;
let end_labels = items let end_blocks = items
.iter() .iter()
.map(|item| { .map(|item| {
let end_label = self.new_label(); let end_block = self.new_block();
self.compile_expression(&item.context_expr)?; self.compile_expression(&item.context_expr)?;
if is_async { if is_async {
@ -622,9 +569,9 @@ impl Compiler {
self.emit(Instruction::GetAwaitable); self.emit(Instruction::GetAwaitable);
self.emit_constant(ConstantData::None); self.emit_constant(ConstantData::None);
self.emit(Instruction::YieldFrom); self.emit(Instruction::YieldFrom);
self.emit(Instruction::SetupAsyncWith { end: end_label }); self.emit(Instruction::SetupAsyncWith { end: end_block });
} else { } else {
self.emit(Instruction::SetupWith { end: end_label }); self.emit(Instruction::SetupWith { end: end_block });
} }
match &item.optional_vars { match &item.optional_vars {
@ -635,7 +582,7 @@ impl Compiler {
self.emit(Instruction::Pop); self.emit(Instruction::Pop);
} }
} }
Ok(end_label) Ok(end_block)
}) })
.collect::<CompileResult<Vec<_>>>()?; .collect::<CompileResult<Vec<_>>>()?;
@ -643,10 +590,11 @@ impl Compiler {
// sort of "stack up" the layers of with blocks: // sort of "stack up" the layers of with blocks:
// with a, b: body -> start_with(a) start_with(b) body() end_with(b) end_with(a) // with a, b: body -> start_with(a) start_with(b) body() end_with(b) end_with(a)
for end_label in end_labels.into_iter().rev() { for end_block in end_blocks.into_iter().rev() {
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
self.emit(Instruction::EnterFinally); self.emit(Instruction::EnterFinally);
self.set_label(end_label);
self.switch_to_block(end_block);
self.emit(Instruction::WithCleanupStart); self.emit(Instruction::WithCleanupStart);
if is_async { if is_async {
@ -707,8 +655,9 @@ impl Compiler {
Assert { test, msg } => { Assert { test, msg } => {
// if some flag, ignore all assert statements! // if some flag, ignore all assert statements!
if self.opts.optimize == 0 { if self.opts.optimize == 0 {
let end_label = self.new_label(); let after_block = self.new_block();
self.compile_jump_if(test, true, end_label)?; self.compile_jump_if(test, true, after_block)?;
let assertion_error = self.name("AssertionError"); let assertion_error = self.name("AssertionError");
self.emit(Instruction::LoadGlobal(assertion_error)); self.emit(Instruction::LoadGlobal(assertion_error));
match msg { match msg {
@ -723,15 +672,18 @@ impl Compiler {
self.emit(Instruction::Raise { self.emit(Instruction::Raise {
kind: bytecode::RaiseKind::Raise, kind: bytecode::RaiseKind::Raise,
}); });
self.set_label(end_label);
self.switch_to_block(after_block);
} }
} }
Break => { Break => match self.ctx.loop_data {
if !self.ctx.in_loop() { Some((_, end)) => {
return Err(self.error_loc(CompileErrorType::InvalidBreak, statement.location)); self.emit(Instruction::Break { target: end });
} }
self.emit(Instruction::Break); None => {
} return Err(self.error_loc(CompileErrorType::InvalidBreak, statement.location))
}
},
Continue => match self.ctx.loop_data { Continue => match self.ctx.loop_data {
Some((start, _)) => { Some((start, _)) => {
self.emit(Instruction::Continue { target: start }); self.emit(Instruction::Continue { target: start });
@ -750,7 +702,7 @@ impl Compiler {
Some(v) => { Some(v) => {
if self.ctx.func == FunctionContext::AsyncFunction if self.ctx.func == FunctionContext::AsyncFunction
&& self && self
.current_code() .current_codeinfo()
.flags .flags
.contains(bytecode::CodeFlags::IS_GENERATOR) .contains(bytecode::CodeFlags::IS_GENERATOR)
{ {
@ -873,7 +825,7 @@ impl Compiler {
} }
let line_number = self.get_source_line_number(); let line_number = self.get_source_line_number();
self.push_output(CodeObject::new( self.push_output(
bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED,
args.posonlyargs_count, args.posonlyargs_count,
args.args.len(), args.args.len(),
@ -881,7 +833,7 @@ impl Compiler {
self.source_path.clone(), self.source_path.clone(),
line_number, line_number,
name.to_owned(), name.to_owned(),
)); );
for name in &args.args { for name in &args.args {
self.varname(&name.arg); self.varname(&name.arg);
@ -893,7 +845,7 @@ impl Compiler {
let mut compile_varargs = |va: &ast::Varargs, flag| match va { let mut compile_varargs = |va: &ast::Varargs, flag| match va {
ast::Varargs::None | ast::Varargs::Unnamed => {} ast::Varargs::None | ast::Varargs::Unnamed => {}
ast::Varargs::Named(name) => { ast::Varargs::Named(name) => {
self.current_code().flags |= flag; self.current_codeinfo().flags |= flag;
self.varname(&name.arg); self.varname(&name.arg);
} }
}; };
@ -925,29 +877,30 @@ impl Compiler {
orelse: &Option<ast::Suite>, orelse: &Option<ast::Suite>,
finalbody: &Option<ast::Suite>, finalbody: &Option<ast::Suite>,
) -> CompileResult<()> { ) -> CompileResult<()> {
let mut handler_label = self.new_label(); let handler_block = self.new_block();
let finally_handler_label = self.new_label(); let finally_block = self.new_block();
let else_label = self.new_label();
// Setup a finally block if we have a finally statement. // Setup a finally block if we have a finally statement.
if finalbody.is_some() { if finalbody.is_some() {
self.emit(Instruction::SetupFinally { self.emit(Instruction::SetupFinally {
handler: finally_handler_label, handler: finally_block,
}); });
} }
let else_block = self.new_block();
// try: // try:
self.emit(Instruction::SetupExcept { self.emit(Instruction::SetupExcept {
handler: handler_label, handler: handler_block,
}); });
self.compile_statements(body)?; self.compile_statements(body)?;
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
self.emit(Instruction::Jump { target: else_label }); self.emit(Instruction::Jump { target: else_block });
// except handlers: // except handlers:
self.set_label(handler_label); self.switch_to_block(handler_block);
// Exception is on top of stack now // Exception is on top of stack now
handler_label = self.new_label(); let mut next_handler = self.new_block();
for handler in handlers { for handler in handlers {
// If we gave a typ, // If we gave a typ,
// check if this handler can handle the exception: // check if this handler can handle the exception:
@ -963,7 +916,7 @@ impl Compiler {
// We cannot handle this exception type: // We cannot handle this exception type:
self.emit(Instruction::JumpIfFalse { self.emit(Instruction::JumpIfFalse {
target: handler_label, target: next_handler,
}); });
// We have a match, store in name (except x as y) // We have a match, store in name (except x as y)
@ -990,18 +943,18 @@ impl Compiler {
} }
self.emit(Instruction::Jump { self.emit(Instruction::Jump {
target: finally_handler_label, target: finally_block,
}); });
// Emit a new label for the next handler // Emit a new label for the next handler
self.set_label(handler_label); self.switch_to_block(next_handler);
handler_label = self.new_label(); next_handler = self.new_block();
} }
self.emit(Instruction::Jump { self.emit(Instruction::Jump {
target: handler_label, target: next_handler,
}); });
self.set_label(handler_label); self.switch_to_block(next_handler);
// If code flows here, we have an unhandled exception, // If code flows here, we have an unhandled exception,
// raise the exception again! // raise the exception again!
self.emit(Instruction::Raise { self.emit(Instruction::Raise {
@ -1010,7 +963,7 @@ impl Compiler {
// We successfully ran the try block: // We successfully ran the try block:
// else: // else:
self.set_label(else_label); self.switch_to_block(else_block);
if let Some(statements) = orelse { if let Some(statements) = orelse {
self.compile_statements(statements)?; self.compile_statements(statements)?;
} }
@ -1023,7 +976,7 @@ impl Compiler {
} }
// finally: // finally:
self.set_label(finally_handler_label); self.switch_to_block(finally_block);
if let Some(statements) = finalbody { if let Some(statements) = finalbody {
self.compile_statements(statements)?; self.compile_statements(statements)?;
self.emit(Instruction::EndFinally); self.emit(Instruction::EndFinally);
@ -1269,7 +1222,7 @@ impl Compiler {
self.emit(Instruction::LoadBuildClass); self.emit(Instruction::LoadBuildClass);
let line_number = self.get_source_line_number(); let line_number = self.get_source_line_number();
self.push_output(CodeObject::new( self.push_output(
bytecode::CodeFlags::empty(), bytecode::CodeFlags::empty(),
0, 0,
0, 0,
@ -1277,7 +1230,7 @@ impl Compiler {
self.source_path.clone(), self.source_path.clone(),
line_number, line_number,
name.to_owned(), name.to_owned(),
)); );
let (new_body, doc_str) = get_doc(body); let (new_body, doc_str) = get_doc(body);
@ -1396,28 +1349,28 @@ impl Compiler {
body: &[ast::Statement], body: &[ast::Statement],
orelse: &Option<Vec<ast::Statement>>, orelse: &Option<Vec<ast::Statement>>,
) -> CompileResult<()> { ) -> CompileResult<()> {
let start_label = self.new_label(); let while_block = self.new_block();
let else_label = self.new_label(); let else_block = self.new_block();
let end_label = self.new_label(); let after_block = self.new_block();
self.emit(Instruction::SetupLoop { end: end_label }); self.emit(Instruction::SetupLoop);
self.set_label(start_label); self.switch_to_block(while_block);
self.compile_jump_if(test, false, else_label)?; self.compile_jump_if(test, false, else_block)?;
let was_in_loop = self.ctx.loop_data; let was_in_loop = self.ctx.loop_data;
self.ctx.loop_data = Some((start_label, end_label)); self.ctx.loop_data = Some((while_block, after_block));
self.compile_statements(body)?; self.compile_statements(body)?;
self.ctx.loop_data = was_in_loop; self.ctx.loop_data = was_in_loop;
self.emit(Instruction::Jump { self.emit(Instruction::Jump {
target: start_label, target: while_block,
}); });
self.set_label(else_label); self.switch_to_block(else_block);
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
if let Some(orelse) = orelse { if let Some(orelse) = orelse {
self.compile_statements(orelse)?; self.compile_statements(orelse)?;
} }
self.set_label(end_label); self.switch_to_block(after_block);
Ok(()) Ok(())
} }
@ -1430,77 +1383,73 @@ impl Compiler {
is_async: bool, is_async: bool,
) -> CompileResult<()> { ) -> CompileResult<()> {
// Start loop // Start loop
let start_label = self.new_label(); let for_block = self.new_block();
let else_label = self.new_label(); let else_block = self.new_block();
let end_label = self.new_label(); let after_block = self.new_block();
self.emit(Instruction::SetupLoop { end: end_label }); self.emit(Instruction::SetupLoop);
// The thing iterated: // The thing iterated:
self.compile_expression(iter)?; self.compile_expression(iter)?;
if is_async { if is_async {
let check_asynciter_label = self.new_label(); let check_asynciter_block = self.new_block();
let body_label = self.new_label(); let body_block = self.new_block();
self.emit(Instruction::GetAIter); self.emit(Instruction::GetAIter);
self.set_label(start_label); self.switch_to_block(for_block);
self.emit(Instruction::SetupExcept { self.emit(Instruction::SetupExcept {
handler: check_asynciter_label, handler: check_asynciter_block,
}); });
self.emit(Instruction::GetANext); self.emit(Instruction::GetANext);
self.emit_constant(ConstantData::None); self.emit_constant(ConstantData::None);
self.emit(Instruction::YieldFrom); self.emit(Instruction::YieldFrom);
self.compile_store(target)?; self.compile_store(target)?;
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
self.emit(Instruction::Jump { target: body_label }); self.emit(Instruction::Jump { target: body_block });
self.set_label(check_asynciter_label); self.switch_to_block(check_asynciter_block);
self.emit(Instruction::Duplicate); self.emit(Instruction::Duplicate);
let stopasynciter = self.name("StopAsyncIteration"); let stopasynciter = self.name("StopAsyncIteration");
self.emit(Instruction::LoadGlobal(stopasynciter)); self.emit(Instruction::LoadGlobal(stopasynciter));
self.emit(Instruction::CompareOperation { self.emit(Instruction::CompareOperation {
op: bytecode::ComparisonOperator::ExceptionMatch, op: bytecode::ComparisonOperator::ExceptionMatch,
}); });
self.emit(Instruction::JumpIfTrue { target: else_label }); self.emit(Instruction::JumpIfTrue { target: else_block });
self.emit(Instruction::Raise { self.emit(Instruction::Raise {
kind: bytecode::RaiseKind::Reraise, kind: bytecode::RaiseKind::Reraise,
}); });
let was_in_loop = self.ctx.loop_data; self.switch_to_block(body_block);
self.ctx.loop_data = Some((start_label, end_label));
self.set_label(body_label);
self.compile_statements(body)?;
self.ctx.loop_data = was_in_loop;
} else { } else {
// Retrieve Iterator // Retrieve Iterator
self.emit(Instruction::GetIter); self.emit(Instruction::GetIter);
self.set_label(start_label); self.switch_to_block(for_block);
self.emit(Instruction::ForIter { target: else_label }); self.emit(Instruction::ForIter { target: else_block });
// Start of loop iteration, set targets: // Start of loop iteration, set targets:
self.compile_store(target)?; self.compile_store(target)?;
let was_in_loop = self.ctx.loop_data;
self.ctx.loop_data = Some((start_label, end_label));
self.compile_statements(body)?;
self.ctx.loop_data = was_in_loop;
} }
self.emit(Instruction::Jump { let was_in_loop = self.ctx.loop_data;
target: start_label, self.ctx.loop_data = Some((for_block, after_block));
}); self.compile_statements(body)?;
self.set_label(else_label); self.ctx.loop_data = was_in_loop;
self.emit(Instruction::Jump { target: for_block });
self.switch_to_block(else_block);
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
if let Some(orelse) = orelse { if let Some(orelse) = orelse {
self.compile_statements(orelse)?; self.compile_statements(orelse)?;
} }
self.set_label(end_label);
self.switch_to_block(after_block);
if is_async { if is_async {
self.emit(Instruction::Pop); self.emit(Instruction::Pop);
} }
Ok(()) Ok(())
} }
@ -1536,8 +1485,13 @@ impl Compiler {
// initialize lhs outside of loop // initialize lhs outside of loop
self.compile_expression(&vals[0])?; self.compile_expression(&vals[0])?;
let break_label = self.new_label(); let end_blocks = if vals.len() > 2 {
let last_label = self.new_label(); let break_block = self.new_block();
let after_block = self.new_block();
Some((break_block, after_block))
} else {
None
};
// for all comparisons except the last (as the last one doesn't need a conditional jump) // for all comparisons except the last (as the last one doesn't need a conditional jump)
let ops_slice = &ops[0..ops.len()]; let ops_slice = &ops[0..ops.len()];
@ -1553,9 +1507,11 @@ impl Compiler {
}); });
// if comparison result is false, we break with this value; if true, try the next one. // if comparison result is false, we break with this value; if true, try the next one.
self.emit(Instruction::JumpIfFalseOrPop { if let Some((break_block, _)) = end_blocks {
target: break_label, self.emit(Instruction::JumpIfFalseOrPop {
}); target: break_block,
});
}
} }
// handle the last comparison // handle the last comparison
@ -1564,15 +1520,17 @@ impl Compiler {
op: to_operator(ops.last().unwrap()), op: to_operator(ops.last().unwrap()),
}); });
if vals.len() > 2 { if let Some((break_block, after_block)) = end_blocks {
self.emit(Instruction::Jump { target: last_label }); self.emit(Instruction::Jump {
target: after_block,
});
// early exit left us with stack: `rhs, comparison_result`. We need to clean up rhs. // early exit left us with stack: `rhs, comparison_result`. We need to clean up rhs.
self.set_label(break_label); self.switch_to_block(break_block);
self.emit(Instruction::Rotate { amount: 2 }); self.emit(Instruction::Rotate { amount: 2 });
self.emit(Instruction::Pop); self.emit(Instruction::Pop);
self.set_label(last_label); self.switch_to_block(after_block);
} }
Ok(()) Ok(())
@ -1715,7 +1673,7 @@ impl Compiler {
&mut self, &mut self,
expression: &ast::Expression, expression: &ast::Expression,
condition: bool, condition: bool,
target_label: Label, target_block: ir::BlockIdx,
) -> CompileResult<()> { ) -> CompileResult<()> {
// Compile expression for test, and jump to label if false // Compile expression for test, and jump to label if false
match &expression.node { match &expression.node {
@ -1724,21 +1682,21 @@ impl Compiler {
ast::BooleanOperator::And => { ast::BooleanOperator::And => {
if condition { if condition {
// If all values are true. // If all values are true.
let end_label = self.new_label(); let end_block = self.new_block();
let (last_value, values) = values.split_last().unwrap(); let (last_value, values) = values.split_last().unwrap();
// If any of the values is false, we can short-circuit. // If any of the values is false, we can short-circuit.
for value in values { for value in values {
self.compile_jump_if(value, false, end_label)?; self.compile_jump_if(value, false, end_block)?;
} }
// It depends upon the last value now: will it be true? // It depends upon the last value now: will it be true?
self.compile_jump_if(last_value, true, target_label)?; self.compile_jump_if(last_value, true, target_block)?;
self.set_label(end_label); self.switch_to_block(end_block);
} else { } else {
// If any value is false, the whole condition is false. // If any value is false, the whole condition is false.
for value in values { for value in values {
self.compile_jump_if(value, false, target_label)?; self.compile_jump_if(value, false, target_block)?;
} }
} }
} }
@ -1746,21 +1704,21 @@ impl Compiler {
if condition { if condition {
// If any of the values is true. // If any of the values is true.
for value in values { for value in values {
self.compile_jump_if(value, true, target_label)?; self.compile_jump_if(value, true, target_block)?;
} }
} else { } else {
// If all of the values are false. // If all of the values are false.
let end_label = self.new_label(); let end_block = self.new_block();
let (last_value, values) = values.split_last().unwrap(); let (last_value, values) = values.split_last().unwrap();
// If any value is true, we can short-circuit: // If any value is true, we can short-circuit:
for value in values { for value in values {
self.compile_jump_if(value, true, end_label)?; self.compile_jump_if(value, true, end_block)?;
} }
// It all depends upon the last value now! // It all depends upon the last value now!
self.compile_jump_if(last_value, false, target_label)?; self.compile_jump_if(last_value, false, target_block)?;
self.set_label(end_label); self.switch_to_block(end_block);
} }
} }
} }
@ -1769,18 +1727,18 @@ impl Compiler {
op: ast::UnaryOperator::Not, op: ast::UnaryOperator::Not,
a, a,
} => { } => {
self.compile_jump_if(a, !condition, target_label)?; self.compile_jump_if(a, !condition, target_block)?;
} }
_ => { _ => {
// Fall back case which always will work! // Fall back case which always will work!
self.compile_expression(expression)?; self.compile_expression(expression)?;
if condition { if condition {
self.emit(Instruction::JumpIfTrue { self.emit(Instruction::JumpIfTrue {
target: target_label, target: target_block,
}); });
} else { } else {
self.emit(Instruction::JumpIfFalse { self.emit(Instruction::JumpIfFalse {
target: target_label, target: target_block,
}); });
} }
} }
@ -1795,7 +1753,7 @@ impl Compiler {
op: &ast::BooleanOperator, op: &ast::BooleanOperator,
values: &[ast::Expression], values: &[ast::Expression],
) -> CompileResult<()> { ) -> CompileResult<()> {
let end_label = self.new_label(); let after_block = self.new_block();
let (last_value, values) = values.split_last().unwrap(); let (last_value, values) = values.split_last().unwrap();
for value in values { for value in values {
@ -1803,17 +1761,21 @@ impl Compiler {
match op { match op {
ast::BooleanOperator::And => { ast::BooleanOperator::And => {
self.emit(Instruction::JumpIfFalseOrPop { target: end_label }); self.emit(Instruction::JumpIfFalseOrPop {
target: after_block,
});
} }
ast::BooleanOperator::Or => { ast::BooleanOperator::Or => {
self.emit(Instruction::JumpIfTrueOrPop { target: end_label }); self.emit(Instruction::JumpIfTrueOrPop {
target: after_block,
});
} }
} }
} }
// If all values did not qualify, take the value of the last value: // If all values did not qualify, take the value of the last value:
self.compile_expression(last_value)?; self.compile_expression(last_value)?;
self.set_label(end_label); self.switch_to_block(after_block);
Ok(()) Ok(())
} }
@ -2046,17 +2008,22 @@ impl Compiler {
return Err(self.error(CompileErrorType::InvalidStarExpr)); return Err(self.error(CompileErrorType::InvalidStarExpr));
} }
IfExpression { test, body, orelse } => { IfExpression { test, body, orelse } => {
let no_label = self.new_label(); let else_block = self.new_block();
let end_label = self.new_label(); let after_block = self.new_block();
self.compile_jump_if(test, false, no_label)?; self.compile_jump_if(test, false, else_block)?;
// True case // True case
self.compile_expression(body)?; self.compile_expression(body)?;
self.emit(Instruction::Jump { target: end_label }); self.emit(Instruction::Jump {
target: after_block,
});
// False case // False case
self.set_label(no_label); self.switch_to_block(else_block);
self.compile_expression(orelse)?; self.compile_expression(orelse)?;
// End // End
self.set_label(end_label); self.switch_to_block(after_block);
} }
NamedExpression { left, right } => { NamedExpression { left, right } => {
@ -2210,7 +2177,7 @@ impl Compiler {
let line_number = self.get_source_line_number(); let line_number = self.get_source_line_number();
// Create magnificent function <listcomp>: // Create magnificent function <listcomp>:
self.push_output(CodeObject::new( self.push_output(
bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED,
1, 1,
1, 1,
@ -2218,7 +2185,7 @@ impl Compiler {
self.source_path.clone(), self.source_path.clone(),
line_number, line_number,
name.clone(), name.clone(),
)); );
let arg0 = self.varname(".0"); let arg0 = self.varname(".0");
// Create empty object of proper type: // Create empty object of proper type:
@ -2251,6 +2218,9 @@ impl Compiler {
unimplemented!("async for comprehensions"); unimplemented!("async for comprehensions");
} }
// Setup for loop:
self.emit(Instruction::SetupLoop);
if loop_labels.is_empty() { if loop_labels.is_empty() {
// Load iterator onto stack (passed as first argument): // Load iterator onto stack (passed as first argument):
self.emit(Instruction::LoadFast(arg0)); self.emit(Instruction::LoadFast(arg0));
@ -2262,19 +2232,20 @@ impl Compiler {
self.emit(Instruction::GetIter); self.emit(Instruction::GetIter);
} }
// Setup for loop: let loop_block = self.new_block();
let start_label = self.new_label(); let after_block = self.new_block();
let end_label = self.new_label(); loop_labels.push((loop_block, after_block));
loop_labels.push((start_label, end_label));
self.emit(Instruction::SetupLoop { end: end_label }); self.switch_to_block(loop_block);
self.set_label(start_label); self.emit(Instruction::ForIter {
self.emit(Instruction::ForIter { target: end_label }); target: after_block,
});
self.compile_store(&generator.target)?; self.compile_store(&generator.target)?;
// Now evaluate the ifs: // Now evaluate the ifs:
for if_condition in &generator.ifs { for if_condition in &generator.ifs {
self.compile_jump_if(if_condition, false, start_label)? self.compile_jump_if(if_condition, false, loop_block)?
} }
} }
@ -2320,14 +2291,12 @@ impl Compiler {
} }
} }
for (start_label, end_label) in loop_labels.iter().rev() { for (loop_block, after_block) in loop_labels.iter().rev().copied() {
// Repeat: // Repeat:
self.emit(Instruction::Jump { self.emit(Instruction::Jump { target: loop_block });
target: *start_label,
});
// End of for loop: // End of for loop:
self.set_label(*end_label); self.switch_to_block(after_block);
self.emit(Instruction::PopBlock); self.emit(Instruction::PopBlock);
} }
@ -2428,14 +2397,16 @@ impl Compiler {
} }
// Low level helper functions: // Low level helper functions:
fn emit(&mut self, instruction: Instruction) { fn emit(&mut self, instr: Instruction) {
let location = compile_location(&self.current_source_location); let location = compile_location(&self.current_source_location);
// TODO: insert source filename // TODO: insert source filename
let info = self.current_codeinfo(); self.current_block()
info.instructions.push(instruction); .instructions
info.locations.push(location); .push(ir::InstructionInfo { instr, location });
} }
// fn block_done()
fn emit_constant(&mut self, constant: ConstantData) { fn emit_constant(&mut self, constant: ConstantData) {
let info = self.current_codeinfo(); let info = self.current_codeinfo();
let idx = info.constants.len() as u32; let idx = info.constants.len() as u32;
@ -2443,35 +2414,31 @@ impl Compiler {
self.emit(Instruction::LoadConst { idx }) self.emit(Instruction::LoadConst { idx })
} }
fn current_code(&mut self) -> &mut CodeObject {
&mut self.current_codeinfo().code
}
fn current_codeinfo(&mut self) -> &mut CodeInfo { fn current_codeinfo(&mut self) -> &mut CodeInfo {
self.code_stack.last_mut().expect("no code on stack") self.code_stack.last_mut().expect("no code on stack")
} }
// Generate a new label fn current_block(&mut self) -> &mut ir::Block {
fn new_label(&mut self) -> Label { let info = self.current_codeinfo();
let label_map = &mut self.current_codeinfo().label_map; &mut info.blocks[info.block_order.last().unwrap().0 as usize]
let label = Label(label_map.len() as u32);
label_map.push(Label(u32::MAX));
label
} }
// Assign current position the given label fn new_block(&mut self) -> ir::BlockIdx {
fn set_label(&mut self, label: Label) { let code = self.current_codeinfo();
let CodeInfo { let idx = bytecode::Label(code.blocks.len() as u32);
instructions, code.blocks.push(ir::Block::default());
label_map, idx
.. }
} = self.current_codeinfo();
let actual_label = Label(instructions.len() as u32); fn switch_to_block(&mut self, block: ir::BlockIdx) {
let prev_val = std::mem::replace(&mut label_map[label.0 as usize], actual_label); let code = self.current_codeinfo();
let last = code.block_order.last().unwrap();
code.blocks[last.0 as usize].done = true;
debug_assert!( debug_assert!(
prev_val.0 == u32::MAX || prev_val == actual_label, !code.blocks[block.0 as usize].done,
"double-set a label" "switching to done block"
); );
code.block_order.push(block);
} }
fn set_source_location(&mut self, location: ast::Location) { fn set_source_location(&mut self, location: ast::Location) {
@ -2491,7 +2458,7 @@ impl Compiler {
} }
fn mark_generator(&mut self) { fn mark_generator(&mut self) {
self.current_code().flags |= bytecode::CodeFlags::IS_GENERATOR self.current_codeinfo().flags |= bytecode::CodeFlags::IS_GENERATOR
} }
} }

224
src/ir.rs Normal file
View file

@ -0,0 +1,224 @@
use indexmap::IndexSet;
use rustpython_bytecode::{CodeFlags, CodeObject, ConstantData, Instruction, Label, Location};
pub type BlockIdx = Label;
pub struct InstructionInfo {
/// If the instruction has a Label argument, it's actually a BlockIdx, not a code offset
pub instr: Instruction,
pub location: Location,
}
pub struct Block {
pub instructions: Vec<InstructionInfo>,
pub done: bool,
}
impl Default for Block {
fn default() -> Self {
Block {
instructions: Vec::new(),
done: false,
}
}
}
pub struct CodeInfo {
pub flags: CodeFlags,
pub posonlyarg_count: usize, // Number of positional-only arguments
pub arg_count: usize,
pub kwonlyarg_count: usize,
pub source_path: String,
pub first_line_number: usize,
pub obj_name: String, // Name of the object that created this code object
pub blocks: Vec<Block>,
pub block_order: Vec<BlockIdx>,
pub constants: Vec<ConstantData>,
pub name_cache: IndexSet<String>,
pub varname_cache: IndexSet<String>,
pub cellvar_cache: IndexSet<String>,
pub freevar_cache: IndexSet<String>,
}
impl CodeInfo {
pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
let max_stacksize = self.max_stacksize();
let cell2arg = self.cell2arg();
if optimize > 0 {
self.dce();
}
let CodeInfo {
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
obj_name,
mut blocks,
block_order,
constants,
name_cache,
varname_cache,
cellvar_cache,
freevar_cache,
} = self;
assert!(block_order.len() == blocks.len());
let mut num_instructions = 0;
let mut block_to_offset = vec![Label(0); blocks.len()];
for idx in &block_order {
let idx = idx.0 as usize;
block_to_offset[idx] = Label(num_instructions as u32);
num_instructions += blocks[idx].instructions.len();
}
let mut instructions = Vec::with_capacity(num_instructions);
let mut locations = Vec::with_capacity(num_instructions);
for idx in block_order {
let block = std::mem::take(&mut blocks[idx.0 as usize]);
for mut instr in block.instructions {
if let Some(l) = instr.instr.label_arg_mut() {
*l = block_to_offset[l.0 as usize];
}
instructions.push(instr.instr);
locations.push(instr.location);
}
}
CodeObject {
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
obj_name,
max_stacksize,
instructions: instructions.into_boxed_slice(),
locations: locations.into_boxed_slice(),
constants: constants.into(),
names: name_cache.into_iter().collect(),
varnames: varname_cache.into_iter().collect(),
cellvars: cellvar_cache.into_iter().collect(),
freevars: freevar_cache.into_iter().collect(),
cell2arg,
}
}
fn cell2arg(&self) -> Option<Box<[isize]>> {
if self.cellvar_cache.is_empty() {
return None;
}
let total_args = self.arg_count
+ self.kwonlyarg_count
+ self.flags.contains(CodeFlags::HAS_VARARGS) as usize
+ self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as usize;
let mut found_cellarg = false;
let cell2arg = self
.cellvar_cache
.iter()
.map(|var| {
self.varname_cache
.get_index_of(var)
// check that it's actually an arg
.filter(|i| *i < total_args)
.map_or(-1, |i| {
found_cellarg = true;
i as isize
})
})
.collect::<Box<[_]>>();
if found_cellarg {
Some(cell2arg)
} else {
None
}
}
fn dce(&mut self) {
for block in &mut self.blocks {
let mut last_instr = None;
for (i, ins) in block.instructions.iter().enumerate() {
if ins.instr.unconditional_branch() {
last_instr = Some(i);
break;
}
}
if let Some(i) = last_instr {
block.instructions.truncate(i + 1);
}
}
}
fn max_stacksize(&self) -> u32 {
let mut maxdepth = 0;
let mut stack = Vec::with_capacity(self.blocks.len());
let mut startdepths = vec![0; self.blocks.len()];
stack.push((Label(0), 0));
'process_blocks: while let Some((block, blockorder)) = stack.pop() {
let mut depth = startdepths[block.0 as usize];
for i in &self.blocks[block.0 as usize].instructions {
let instr = &i.instr;
let effect = instr.stack_effect(false);
let new_depth = depth + effect;
if new_depth > maxdepth {
maxdepth = new_depth
}
// we don't want to worry about Continue or Break, they use unwinding to jump to
// their targets and as such the stack size is taken care of in frame.rs by setting
// it back to the level it was at when SetupLoop was run
let jump_label = instr.label_arg().filter(
|_| !matches!(instr, Instruction::Continue { .. } | Instruction::Break { .. }),
);
if let Some(&target_block) = jump_label {
let effect = instr.stack_effect(true);
let target_depth = depth + effect;
if target_depth > maxdepth {
maxdepth = target_depth
}
stackdepth_push(
&mut stack,
&mut startdepths,
(target_block, u32::MAX),
target_depth,
);
}
depth = new_depth;
if instr.unconditional_branch() {
continue 'process_blocks;
}
}
let next_blockorder = if blockorder == u32::MAX {
self.block_order.iter().position(|x| *x == block).unwrap() as u32 + 1
} else {
blockorder + 1
};
let next = self.block_order[next_blockorder as usize];
stackdepth_push(&mut stack, &mut startdepths, (next, next_blockorder), depth);
}
maxdepth as u32
}
}
fn stackdepth_push(
stack: &mut Vec<(Label, u32)>,
startdepths: &mut [i32],
target: (Label, u32),
depth: i32,
) {
let block_depth = &mut startdepths[target.0 .0 as usize];
if depth > *block_depth {
*block_depth = depth;
stack.push(target);
}
}

View file

@ -7,5 +7,6 @@ extern crate log;
pub mod compile; pub mod compile;
pub mod error; pub mod error;
pub mod ir;
pub mod mode; pub mod mode;
pub mod symboltable; pub mod symboltable;