diff --git a/Cargo.toml b/Cargo.toml index 6531fc9..ef55098 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,4 @@ rustpython-bytecode = { path = "../bytecode", version = "0.1.0" } rustpython-parser = { path = "../parser", version = "0.1.0" } num-complex = { version = "0.2", features = ["serde"] } log = "0.3" +arrayvec = "0.4" diff --git a/src/compile.rs b/src/compile.rs index 2b7d34e..37d66f0 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -6,13 +6,17 @@ //! https://github.com/micropython/micropython/blob/master/py/compile.c use crate::error::{CompileError, CompileErrorType}; +use crate::output_stream::{CodeObjectStream, OutputStream}; +use crate::peephole::PeepholeOptimizer; use crate::symboltable::{make_symbol_table, statements_to_symbol_table, Symbol, SymbolScope}; use num_complex::Complex64; use rustpython_bytecode::bytecode::{self, CallType, CodeObject, Instruction, Varargs}; use rustpython_parser::{ast, parser}; -struct Compiler { - code_object_stack: Vec, +type BasicOutputStream = PeepholeOptimizer; + +struct Compiler { + output_stack: Vec, scope_stack: Vec, nxt_label: usize, source_path: Option, @@ -109,18 +113,21 @@ enum EvalContext { Expression, } -type Label = usize; +pub(crate) type Label = usize; -impl Default for Compiler { +impl Default for Compiler +where + O: OutputStream + Default, +{ fn default() -> Self { Compiler::new(0) } } -impl Compiler { +impl Compiler { fn new(optimize: u8) -> Self { Compiler { - code_object_stack: Vec::new(), + output_stack: Vec::new(), scope_stack: Vec::new(), nxt_label: 0, source_path: None, @@ -132,9 +139,13 @@ impl Compiler { } } + fn push_output(&mut self, code: CodeObject) { + self.output_stack.push(code.into()); + } + fn push_new_code_object(&mut self, obj_name: String) { let line_number = self.get_source_line_number(); - self.code_object_stack.push(CodeObject::new( + self.push_output(CodeObject::new( Vec::new(), Varargs::None, Vec::new(), @@ -146,8 +157,7 @@ impl Compiler { } fn pop_code_object(&mut self) -> CodeObject { - // self.scope_stack.pop().unwrap(); - self.code_object_stack.pop().unwrap() + self.output_stack.pop().unwrap().into() } fn compile_program( @@ -155,10 +165,10 @@ impl Compiler { program: &ast::Program, symbol_scope: SymbolScope, ) -> Result<(), CompileError> { - let size_before = self.code_object_stack.len(); + let size_before = self.output_stack.len(); self.scope_stack.push(symbol_scope); self.compile_statements(&program.statements)?; - assert!(self.code_object_stack.len() == size_before); + assert_eq!(self.output_stack.len(), size_before); // Emit None at end: self.emit(Instruction::LoadConst { @@ -636,7 +646,7 @@ impl Compiler { } let line_number = self.get_source_line_number(); - self.code_object_stack.push(CodeObject::new( + self.push_output(CodeObject::new( args.args.iter().map(|a| a.arg.clone()).collect(), compile_varargs(&args.vararg), args.kwonlyargs.iter().map(|a| a.arg.clone()).collect(), @@ -889,7 +899,7 @@ impl Compiler { self.prepare_decorators(decorator_list)?; self.emit(Instruction::LoadBuildClass); let line_number = self.get_source_line_number(); - self.code_object_stack.push(CodeObject::new( + self.push_output(CodeObject::new( vec![], Varargs::None, vec![], @@ -1481,7 +1491,6 @@ impl Compiler { self.set_label(end_label); } } - self.optimize_instruction(); Ok(()) } @@ -1620,7 +1629,7 @@ impl Compiler { let line_number = self.get_source_line_number(); // Create magnificent function : - self.code_object_stack.push(CodeObject::new( + self.push_output(CodeObject::new( vec![".0".to_string()], Varargs::None, vec![], @@ -1799,61 +1808,6 @@ impl Compiler { Ok(()) } - fn optimize_instruction(&mut self) { - let instructions = self.current_instructions(); - match instructions.pop().unwrap() { - Instruction::BinaryOperation { op, inplace } => { - macro_rules! lc { - ($name:ident {$($field:tt)*}) => { - Instruction::LoadConst { - value: bytecode::Constant::$name {$($field)*}, - } - }; - ($name:ident, $($value:tt)*) => { - lc!($name { value: $($value)* }) - }; - } - macro_rules! emitconst { - ($($arg:tt)*) => { - self.emit(lc!($($arg)*)) - }; - } - macro_rules! op { - ($op:ident) => { - bytecode::BinaryOperator::$op - }; - } - let rhs = instructions.pop().unwrap(); - let lhs = instructions.pop().unwrap(); - match (op, lhs, rhs) { - (op!(Add), lc!(Integer, lhs), lc!(Integer, rhs)) => { - emitconst!(Integer, lhs + rhs) - } - (op!(Subtract), lc!(Integer, lhs), lc!(Integer, rhs)) => { - emitconst!(Integer, lhs - rhs) - } - (op!(Add), lc!(Float, lhs), lc!(Float, rhs)) => emitconst!(Float, lhs + rhs), - (op!(Subtract), lc!(Float, lhs), lc!(Float, rhs)) => { - emitconst!(Float, lhs - rhs) - } - (op!(Power), lc!(Float, lhs), lc!(Float, rhs)) => { - emitconst!(Float, lhs.powf(rhs)) - } - (op!(Add), lc!(String, mut lhs), lc!(String, rhs)) => { - lhs.push_str(&rhs); - emitconst!(String, lhs); - } - (op, lhs, rhs) => { - self.emit(lhs); - self.emit(rhs); - self.emit(Instruction::BinaryOperation { op, inplace }); - } - } - } - other => self.emit(other), - } - } - // Scope helpers: fn enter_scope(&mut self) { // println!("Enter scope {:?}", self.scope_stack); @@ -1877,18 +1831,11 @@ impl Compiler { // Low level helper functions: fn emit(&mut self, instruction: Instruction) { let location = compile_location(&self.current_source_location); - let cur_code_obj = self.current_code_object(); - cur_code_obj.instructions.push(instruction); - cur_code_obj.locations.push(location); // TODO: insert source filename - } - - fn current_code_object(&mut self) -> &mut CodeObject { - self.code_object_stack.last_mut().unwrap() - } - - fn current_instructions(&mut self) -> &mut Vec { - &mut self.current_code_object().instructions + self.output_stack + .last_mut() + .unwrap() + .emit(instruction, location); } // Generate a new label @@ -1900,9 +1847,7 @@ impl Compiler { // Assign current position the given label fn set_label(&mut self, label: Label) { - let position = self.current_code_object().instructions.len(); - // assert!(label not in self.label_map) - self.current_code_object().label_map.insert(label, position); + self.output_stack.last_mut().unwrap().set_label(label) } fn set_source_location(&mut self, location: &ast::Location) { @@ -1922,7 +1867,7 @@ impl Compiler { } fn mark_generator(&mut self) { - self.current_code_object().is_generator = true; + self.output_stack.last_mut().unwrap().mark_generator(); } } @@ -1971,7 +1916,7 @@ mod tests { use rustpython_parser::parser; fn compile_exec(source: &str) -> CodeObject { - let mut compiler: Compiler = Default::default(); + let mut compiler = Compiler::default(); compiler.source_path = Some("source_path".to_string()); compiler.push_new_code_object("".to_string()); let ast = parser::parse_program(&source.to_string()).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index de949be..0201e24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,3 +9,5 @@ extern crate log; pub mod compile; pub mod error; pub mod symboltable; +pub mod output_stream; +pub mod peephole; diff --git a/src/output_stream.rs b/src/output_stream.rs new file mode 100644 index 0000000..35321dc --- /dev/null +++ b/src/output_stream.rs @@ -0,0 +1,40 @@ +use crate::compile::Label; +use rustpython_bytecode::bytecode::{CodeObject, Instruction, Location}; + +pub trait OutputStream: From + Into { + /// Output an instruction + fn emit(&mut self, instruction: Instruction, location: Location); + /// Set a label on an instruction + fn set_label(&mut self, label: Label); + /// Mark the inner CodeObject as a generator + fn mark_generator(&mut self); +} + +pub struct CodeObjectStream { + code: CodeObject, +} + +impl From for CodeObjectStream { + fn from(code: CodeObject) -> Self { + CodeObjectStream { code } + } +} +impl From for CodeObject { + fn from(stream: CodeObjectStream) -> Self { + stream.code + } +} + +impl OutputStream for CodeObjectStream { + fn emit(&mut self, instruction: Instruction, location: Location) { + self.code.instructions.push(instruction); + self.code.locations.push(location); + } + fn set_label(&mut self, label: Label) { + let position = self.code.instructions.len(); + self.code.label_map.insert(label, position); + } + fn mark_generator(&mut self) { + self.code.is_generator = true; + } +} diff --git a/src/peephole.rs b/src/peephole.rs new file mode 100644 index 0000000..0db9507 --- /dev/null +++ b/src/peephole.rs @@ -0,0 +1,121 @@ +use crate::output_stream::OutputStream; +use arrayvec::ArrayVec; +use rustpython_bytecode::bytecode::{self, CodeObject, Instruction, Location}; + +const PEEPHOLE_BUFFER_SIZE: usize = 10; + +pub struct PeepholeOptimizer { + inner: O, + buffer: ArrayVec<[(Instruction, Location); PEEPHOLE_BUFFER_SIZE]>, +} + +impl From for PeepholeOptimizer { + fn from(code: CodeObject) -> Self { + Self::new(code.into()) + } +} +impl From> for CodeObject { + fn from(mut peep: PeepholeOptimizer) -> Self { + peep.flush(); + peep.inner.into() + } +} + +impl PeepholeOptimizer { + pub fn new(inner: O) -> Self { + PeepholeOptimizer { + inner, + buffer: ArrayVec::default(), + } + } + + fn emit(&mut self, instruction: Instruction, loc: Location) { + if self.buffer.is_full() { + let (instr, loc) = self.buffer.remove(0); + self.inner.emit(instr, loc); + assert_eq!(self.buffer.len(), PEEPHOLE_BUFFER_SIZE - 1) + } + // safe because we just checked that: if full then remove one element from it + unsafe { self.buffer.push_unchecked((instruction, loc)) }; + } + + fn pop(&mut self) -> (Instruction, Location) { + self.buffer.pop().unwrap() + } + + fn optimize(&mut self, instruction: Instruction, loc: Location) { + match instruction { + Instruction::BinaryOperation { op, inplace } => { + let (rhs, rhs_loc) = self.pop(); + let (lhs, lhs_loc) = self.pop(); + macro_rules! lc { + ($name:ident {$($field:tt)*}) => { + Instruction::LoadConst { + value: bytecode::Constant::$name {$($field)*}, + } + }; + ($name:ident, $($value:tt)*) => { + lc!($name { value: $($value)* }) + }; + } + macro_rules! emitconst { + ($($arg:tt)*) => { + self.emit(lc!($($arg)*), lhs_loc) + }; + } + macro_rules! op { + ($op:ident) => { + bytecode::BinaryOperator::$op + }; + } + match (op, lhs, rhs) { + (op!(Add), lc!(Integer, lhs), lc!(Integer, rhs)) => { + emitconst!(Integer, lhs + rhs) + } + (op!(Subtract), lc!(Integer, lhs), lc!(Integer, rhs)) => { + emitconst!(Integer, lhs - rhs) + } + (op!(Add), lc!(Float, lhs), lc!(Float, rhs)) => emitconst!(Float, lhs + rhs), + (op!(Subtract), lc!(Float, lhs), lc!(Float, rhs)) => { + emitconst!(Float, lhs - rhs) + } + (op!(Power), lc!(Float, lhs), lc!(Float, rhs)) => { + emitconst!(Float, lhs.powf(rhs)) + } + (op!(Add), lc!(String, mut lhs), lc!(String, rhs)) => { + lhs.push_str(&rhs); + emitconst!(String, lhs); + } + (op, lhs, rhs) => { + self.emit(lhs, lhs_loc); + self.emit(rhs, rhs_loc); + self.emit(Instruction::BinaryOperation { op, inplace }, loc); + } + } + } + other => self.emit(other, loc), + } + } + + fn flush(&mut self) { + for (instruction, location) in self.buffer.drain(..) { + self.inner.emit(instruction, location); + } + } +} + +impl OutputStream for PeepholeOptimizer +where + O: OutputStream, +{ + fn emit(&mut self, instruction: Instruction, location: Location) { + self.optimize(instruction, location); + } + fn set_label(&mut self, label: crate::compile::Label) { + self.flush(); + self.inner.set_label(label); + } + fn mark_generator(&mut self) { + self.inner.mark_generator() + } +}