//! //! Take an AST and transform it into bytecode //! //! Inspirational code: //! https://github.com/python/cpython/blob/master/Python/compile.c //! https://github.com/micropython/micropython/blob/master/py/compile.c use crate::error::{CompileError, CompileErrorType}; pub use crate::mode::Mode; use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolScope, SymbolTable}; use indexmap::IndexSet; use itertools::Itertools; use num_complex::Complex64; use rustpython_ast as ast; use rustpython_bytecode::bytecode::{self, CallType, CodeObject, Instruction, Label}; type CompileResult = Result; struct CodeInfo { code: CodeObject, name_cache: IndexSet, varname_cache: IndexSet, cellvar_cache: IndexSet, freevar_cache: IndexSet, label_map: Vec>, } impl CodeInfo { fn finalize_code(self) -> CodeObject { let CodeInfo { mut code, name_cache, varname_cache, cellvar_cache, freevar_cache, label_map, } = self; code.names.extend(name_cache); code.varnames.extend(varname_cache); code.cellvars.extend(cellvar_cache); code.freevars.extend(freevar_cache); for instruction in &mut code.instructions { use Instruction::*; // this is a little bit hacky, as until now the data stored inside Labels in // Instructions is just bookkeeping, but I think it's the best way to do this // XXX: any new instruction that uses a label has to be added here match instruction { Jump { target: l } | JumpIfTrue { target: l } | JumpIfFalse { target: l } | JumpIfTrueOrPop { target: l } | JumpIfFalseOrPop { target: l } | ForIter { target: l } | SetupFinally { handler: l } | SetupExcept { handler: l } | SetupWith { end: l } | SetupAsyncWith { end: l } => { *l = label_map[l.0].expect("label never set"); } SetupLoop { start, end } => { *start = label_map[start.0].expect("label never set"); *end = label_map[end.0].expect("label never set"); } #[rustfmt::skip] Import { .. } | ImportStar | ImportFrom { .. } | LoadFast(_) | LoadLocal(_) | LoadGlobal(_) | LoadDeref(_) | LoadClassDeref(_) | StoreFast(_) | StoreLocal(_) | StoreGlobal(_) | StoreDeref(_) | DeleteFast(_) | DeleteLocal(_) | DeleteGlobal(_) | DeleteDeref(_) | LoadClosure(_) | Subscript | StoreSubscript | DeleteSubscript | StoreAttr { .. } | DeleteAttr { .. } | LoadConst { .. } | UnaryOperation { .. } | BinaryOperation { .. } | LoadAttr { .. } | CompareOperation { .. } | Pop | Rotate { .. } | Duplicate | GetIter | Continue | Break | MakeFunction | CallFunction { .. } | ReturnValue | YieldValue | YieldFrom | SetupAnnotation | EnterFinally | EndFinally | WithCleanupStart | WithCleanupFinish | PopBlock | Raise { .. } | BuildString { .. } | BuildTuple { .. } | BuildList { .. } | BuildSet { .. } | BuildMap { .. } | BuildSlice { .. } | ListAppend { .. } | SetAdd { .. } | MapAdd { .. } | PrintExpr | LoadBuildClass | UnpackSequence { .. } | UnpackEx { .. } | FormatValue { .. } | PopException | Reverse { .. } | GetAwaitable | BeforeAsyncWith | GetAIter | GetANext | MapAddRev { .. } => {} } } code } } enum NameUsage { Load, Store, Delete, } /// Main structure holding the state of compilation. struct Compiler { code_stack: Vec, symbol_table_stack: Vec, source_path: String, current_source_location: ast::Location, current_qualified_path: Option, done_with_future_stmts: bool, ctx: CompileContext, opts: CompileOpts, } #[derive(Debug, Clone)] pub struct CompileOpts { /// How optimized the bytecode output should be; any optimize > 0 does /// not emit assert statements pub optimize: u8, } impl Default for CompileOpts { fn default() -> Self { CompileOpts { optimize: 0 } } } #[derive(Clone, Copy)] struct CompileContext { in_loop: bool, in_class: bool, func: FunctionContext, } #[derive(Clone, Copy, PartialEq)] enum FunctionContext { NoFunction, Function, AsyncFunction, } impl CompileContext { fn in_func(self) -> bool { !matches!(self.func, FunctionContext::NoFunction) } } /// A helper function for the shared code of the different compile functions fn with_compiler( source_path: String, opts: CompileOpts, f: impl FnOnce(&mut Compiler) -> CompileResult<()>, ) -> CompileResult { let mut compiler = Compiler::new(opts, source_path, "".to_owned()); f(&mut compiler)?; let code = compiler.pop_code_object(); trace!("Compilation completed: {:?}", code); Ok(code) } /// Compile a standard Python program to bytecode pub fn compile_program( ast: ast::Program, source_path: String, opts: CompileOpts, ) -> CompileResult { let symbol_table = match make_symbol_table(&ast) { Ok(x) => x, Err(e) => return Err(e.into_compile_error(source_path)), }; with_compiler(source_path, opts, |compiler| { compiler.compile_program(&ast, symbol_table) }) } /// Compile a single Python expression to bytecode pub fn compile_statement_eval( statement: Vec, source_path: String, opts: CompileOpts, ) -> CompileResult { let symbol_table = match statements_to_symbol_table(&statement) { Ok(x) => x, Err(e) => return Err(e.into_compile_error(source_path)), }; with_compiler(source_path, opts, |compiler| { compiler.compile_statement_eval(&statement, symbol_table) }) } /// Compile a Python program to bytecode for the context of a REPL pub fn compile_program_single( ast: ast::Program, source_path: String, opts: CompileOpts, ) -> CompileResult { let symbol_table = match make_symbol_table(&ast) { Ok(x) => x, Err(e) => return Err(e.into_compile_error(source_path)), }; with_compiler(source_path, opts, |compiler| { compiler.compile_program_single(&ast, symbol_table) }) } impl Compiler { fn new(opts: CompileOpts, source_path: String, code_name: String) -> Self { let module_code = CodeInfo { code: CodeObject::new( Default::default(), 0, 0, 0, source_path.clone(), 0, code_name, ), name_cache: IndexSet::new(), varname_cache: IndexSet::new(), cellvar_cache: IndexSet::new(), freevar_cache: IndexSet::new(), label_map: Vec::new(), }; Compiler { code_stack: vec![module_code], symbol_table_stack: Vec::new(), source_path, current_source_location: ast::Location::default(), current_qualified_path: None, done_with_future_stmts: false, ctx: CompileContext { in_loop: false, in_class: false, func: FunctionContext::NoFunction, }, opts, } } fn error(&self, error: CompileErrorType) -> CompileError { self.error_loc(error, self.current_source_location) } fn error_loc(&self, error: CompileErrorType, location: ast::Location) -> CompileError { CompileError { error, location, source_path: self.source_path.clone(), } } fn push_output(&mut self, code: CodeObject) { let table = self .symbol_table_stack .last_mut() .unwrap() .sub_tables .remove(0); let cellvar_cache = table .symbols .iter() .filter(|(_, s)| matches!(s.scope, SymbolScope::Cell)) .map(|(var, _)| var.clone()) .collect(); let freevar_cache = table .symbols .iter() // TODO: check if Free or FREE_CLASS symbol .filter(|(_, s)| matches!(s.scope, SymbolScope::Free)) .map(|(var, _)| var.clone()) .collect(); self.symbol_table_stack.push(table); let info = CodeInfo { code, name_cache: IndexSet::new(), varname_cache: IndexSet::new(), cellvar_cache, freevar_cache, label_map: Vec::new(), }; self.code_stack.push(info); } fn pop_code_object(&mut self) -> CodeObject { let table = self.symbol_table_stack.pop().unwrap(); assert!(table.sub_tables.is_empty()); self.code_stack.pop().unwrap().finalize_code() } // could take impl Into>, but everything is borrowed from ast structs; we never // actually have a `String` to pass fn name(&mut self, name: &str) -> bytecode::NameIdx { self._name_inner(name, |i| &mut i.name_cache) } fn varname(&mut self, name: &str) -> bytecode::NameIdx { self._name_inner(name, |i| &mut i.varname_cache) } fn _name_inner( &mut self, name: &str, cache: impl FnOnce(&mut CodeInfo) -> &mut IndexSet, ) -> bytecode::NameIdx { let cache = cache(self.code_stack.last_mut().expect("nothing on stack")); cache .get_index_of(name) .unwrap_or_else(|| cache.insert_full(name.to_owned()).0) } fn compile_program( &mut self, program: &ast::Program, symbol_table: SymbolTable, ) -> CompileResult<()> { let size_before = self.code_stack.len(); self.symbol_table_stack.push(symbol_table); let (statements, doc) = get_doc(&program.statements); if let Some(value) = doc { self.emit_constant(bytecode::ConstantData::Str { value }); let doc = self.name("__doc__"); self.emit(Instruction::StoreGlobal(doc)) } self.compile_statements(statements)?; assert_eq!(self.code_stack.len(), size_before); // Emit None at end: self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::ReturnValue); Ok(()) } fn compile_program_single( &mut self, program: &ast::Program, symbol_table: SymbolTable, ) -> CompileResult<()> { self.symbol_table_stack.push(symbol_table); let mut emitted_return = false; for (i, statement) in program.statements.iter().enumerate() { let is_last = i == program.statements.len() - 1; if let ast::StatementType::Expression { ref expression } = statement.node { self.compile_expression(expression)?; if is_last { self.emit(Instruction::Duplicate); self.emit(Instruction::PrintExpr); self.emit(Instruction::ReturnValue); emitted_return = true; } else { self.emit(Instruction::PrintExpr); } } else { self.compile_statement(&statement)?; } } if !emitted_return { self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::ReturnValue); } Ok(()) } // Compile statement in eval mode: fn compile_statement_eval( &mut self, statements: &[ast::Statement], symbol_table: SymbolTable, ) -> CompileResult<()> { self.symbol_table_stack.push(symbol_table); for statement in statements { if let ast::StatementType::Expression { ref expression } = statement.node { self.compile_expression(expression)?; } else { return Err(self.error_loc(CompileErrorType::ExpectExpr, statement.location)); } } self.emit(Instruction::ReturnValue); Ok(()) } fn compile_statements(&mut self, statements: &[ast::Statement]) -> CompileResult<()> { for statement in statements { self.compile_statement(statement)? } Ok(()) } fn load_name(&mut self, name: &str) { self.compile_name(name, NameUsage::Load) } fn store_name(&mut self, name: &str) { self.compile_name(name, NameUsage::Store) } fn compile_name(&mut self, name: &str, usage: NameUsage) { let symbol_table = self.symbol_table_stack.last().unwrap(); let symbol = symbol_table.lookup(name).expect( "The symbol must be present in the symbol table, even when it is undefined in python.", ); let info = self.code_stack.last_mut().unwrap(); let mut cache = &mut info.name_cache; enum NameOpType { Fast, Global, Deref, Local, } let op_typ = match symbol.scope { SymbolScope::Local if self.ctx.in_func() => { cache = &mut info.varname_cache; NameOpType::Fast } SymbolScope::Local => NameOpType::Local, SymbolScope::GlobalImplicit if self.ctx.in_func() => NameOpType::Global, SymbolScope::GlobalImplicit => NameOpType::Local, SymbolScope::GlobalExplicit => NameOpType::Global, SymbolScope::Free => { cache = &mut info.freevar_cache; NameOpType::Deref } SymbolScope::Cell => { cache = &mut info.cellvar_cache; NameOpType::Deref } // TODO: is this right? SymbolScope::Unknown => NameOpType::Global, }; let idx = cache .get_index_of(name) .unwrap_or_else(|| cache.insert_full(name.to_owned()).0); let op = match op_typ { NameOpType::Fast => match usage { NameUsage::Load => Instruction::LoadFast, NameUsage::Store => Instruction::StoreFast, NameUsage::Delete => Instruction::DeleteFast, }, NameOpType::Global => match usage { NameUsage::Load => Instruction::LoadGlobal, NameUsage::Store => Instruction::StoreGlobal, NameUsage::Delete => Instruction::DeleteGlobal, }, NameOpType::Deref => match usage { NameUsage::Load => Instruction::LoadDeref, NameUsage::Store => Instruction::StoreDeref, NameUsage::Delete => Instruction::DeleteDeref, }, NameOpType::Local => match usage { NameUsage::Load => Instruction::LoadLocal, NameUsage::Store => Instruction::StoreLocal, NameUsage::Delete => Instruction::DeleteLocal, }, }; self.emit(op(idx)); } fn compile_statement(&mut self, statement: &ast::Statement) -> CompileResult<()> { trace!("Compiling {:?}", statement); self.set_source_location(statement.location); use ast::StatementType::*; match &statement.node { // we do this here because `from __future__` still executes that `from` statement at runtime, // we still need to compile the ImportFrom down below ImportFrom { module, names, .. } if module.as_deref() == Some("__future__") => { self.compile_future_features(&names)? } // if we find any other statement, stop accepting future statements _ => self.done_with_future_stmts = true, } match &statement.node { Import { names } => { // import a, b, c as d for name in names { let name_idx = Some(self.name(&name.symbol)); self.emit(Instruction::Import { name_idx, symbols_idx: vec![], level: 0, }); if let Some(alias) = &name.alias { for part in name.symbol.split('.').skip(1) { let idx = self.name(part); self.emit(Instruction::LoadAttr { idx }); } self.store_name(alias); } else { self.store_name(name.symbol.split('.').next().unwrap()); } } } ImportFrom { level, module, names, } => { let import_star = names.iter().any(|n| n.symbol == "*"); let module_idx = module.as_ref().map(|s| self.name(s)); if import_star { let star = self.name("*"); // from .... import * self.emit(Instruction::Import { name_idx: module_idx, symbols_idx: vec![star], level: *level, }); self.emit(Instruction::ImportStar); } else { // from mod import a, b as c // First, determine the fromlist (for import lib): let from_list = names.iter().map(|n| self.name(&n.symbol)).collect(); // Load module once: self.emit(Instruction::Import { name_idx: module_idx, symbols_idx: from_list, level: *level, }); for name in names { let idx = self.name(&name.symbol); // import symbol from module: self.emit(Instruction::ImportFrom { idx }); // Store module under proper name: if let Some(alias) = &name.alias { self.store_name(alias); } else { self.store_name(&name.symbol); } } // Pop module from stack: self.emit(Instruction::Pop); } } Expression { expression } => { self.compile_expression(expression)?; // Pop result of stack, since we not use it: self.emit(Instruction::Pop); } Global { .. } | Nonlocal { .. } => { // Handled during symbol table construction. } If { test, body, orelse } => { let end_label = self.new_label(); match orelse { None => { // Only if: self.compile_jump_if(test, false, end_label)?; self.compile_statements(body)?; self.set_label(end_label); } Some(statements) => { // if - else: let else_label = self.new_label(); self.compile_jump_if(test, false, else_label)?; self.compile_statements(body)?; self.emit(Instruction::Jump { target: end_label }); // else: self.set_label(else_label); self.compile_statements(statements)?; } } self.set_label(end_label); } While { test, body, orelse } => self.compile_while(test, body, orelse)?, With { is_async, items, body, } => { let is_async = *is_async; let end_labels = items .iter() .map(|item| { let end_label = self.new_label(); self.compile_expression(&item.context_expr)?; if is_async { self.emit(Instruction::BeforeAsyncWith); self.emit(Instruction::GetAwaitable); self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::YieldFrom); self.emit(Instruction::SetupAsyncWith { end: end_label }); } else { self.emit(Instruction::SetupWith { end: end_label }); } match &item.optional_vars { Some(var) => { self.compile_store(var)?; } None => { self.emit(Instruction::Pop); } } Ok(end_label) }) .collect::>>()?; self.compile_statements(body)?; // sort of "stack up" the layers of with blocks: // with a, b: body -> start_with(a) start_with(b) body() end_with(b) end_with(a) for end_label in end_labels.into_iter().rev() { self.emit(Instruction::PopBlock); self.emit(Instruction::EnterFinally); self.set_label(end_label); self.emit(Instruction::WithCleanupStart); if is_async { self.emit(Instruction::GetAwaitable); self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::YieldFrom); } self.emit(Instruction::WithCleanupFinish); } } For { is_async, target, iter, body, orelse, } => self.compile_for(target, iter, body, orelse, *is_async)?, Raise { exception, cause } => match exception { Some(value) => { self.compile_expression(value)?; match cause { Some(cause) => { self.compile_expression(cause)?; self.emit(Instruction::Raise { argc: 2 }); } None => { self.emit(Instruction::Raise { argc: 1 }); } } } None => { self.emit(Instruction::Raise { argc: 0 }); } }, Try { body, handlers, orelse, finalbody, } => self.compile_try_statement(body, handlers, orelse, finalbody)?, FunctionDef { is_async, name, args, body, decorator_list, returns, } => { self.compile_function_def(name, args, body, decorator_list, returns, *is_async)?; } ClassDef { name, body, bases, keywords, decorator_list, } => self.compile_class_def(name, body, bases, keywords, decorator_list)?, Assert { test, msg } => { // if some flag, ignore all assert statements! if self.opts.optimize == 0 { let end_label = self.new_label(); self.compile_jump_if(test, true, end_label)?; let assertion_error = self.name("AssertionError"); self.emit(Instruction::LoadGlobal(assertion_error)); match msg { Some(e) => { self.compile_expression(e)?; self.emit(Instruction::CallFunction { typ: CallType::Positional(1), }); } None => { self.emit(Instruction::CallFunction { typ: CallType::Positional(0), }); } } self.emit(Instruction::Raise { argc: 1 }); self.set_label(end_label); } } Break => { if !self.ctx.in_loop { return Err(self.error_loc(CompileErrorType::InvalidBreak, statement.location)); } self.emit(Instruction::Break); } Continue => { if !self.ctx.in_loop { return Err( self.error_loc(CompileErrorType::InvalidContinue, statement.location) ); } self.emit(Instruction::Continue); } Return { value } => { if !self.ctx.in_func() { return Err(self.error_loc(CompileErrorType::InvalidReturn, statement.location)); } match value { Some(v) => { if self.ctx.func == FunctionContext::AsyncFunction && self .current_code() .flags .contains(bytecode::CodeFlags::IS_GENERATOR) { return Err(self.error_loc( CompileErrorType::AsyncReturnValue, statement.location, )); } self.compile_expression(v)?; } None => { self.emit_constant(bytecode::ConstantData::None); } } self.emit(Instruction::ReturnValue); } Assign { targets, value } => { self.compile_expression(value)?; for (i, target) in targets.iter().enumerate() { if i + 1 != targets.len() { self.emit(Instruction::Duplicate); } self.compile_store(target)?; } } AugAssign { target, op, value } => { self.compile_expression(target)?; self.compile_expression(value)?; // Perform operation: self.compile_op(op, true); self.compile_store(target)?; } AnnAssign { target, annotation, value, } => self.compile_annotated_assign(target, annotation, value)?, Delete { targets } => { for target in targets { self.compile_delete(target)?; } } Pass => { // No need to emit any code here :) } } Ok(()) } fn compile_delete(&mut self, expression: &ast::Expression) -> CompileResult<()> { match &expression.node { ast::ExpressionType::Identifier { name } => { self.compile_name(name, NameUsage::Delete); } ast::ExpressionType::Attribute { value, name } => { self.compile_expression(value)?; let idx = self.name(name); self.emit(Instruction::DeleteAttr { idx }); } ast::ExpressionType::Subscript { a, b } => { self.compile_expression(a)?; self.compile_expression(b)?; self.emit(Instruction::DeleteSubscript); } ast::ExpressionType::Tuple { elements } => { for element in elements { self.compile_delete(element)?; } } _ => return Err(self.error(CompileErrorType::Delete(expression.name()))), } Ok(()) } fn enter_function(&mut self, name: &str, args: &ast::Parameters) -> CompileResult<()> { let have_defaults = !args.defaults.is_empty(); if have_defaults { // Construct a tuple: let size = args.defaults.len(); for element in &args.defaults { self.compile_expression(element)?; } self.emit(Instruction::BuildTuple { size, unpack: false, }); } let mut num_kw_only_defaults = 0; for (kw, default) in args.kwonlyargs.iter().zip(&args.kw_defaults) { if let Some(default) = default { self.emit_constant(bytecode::ConstantData::Str { value: kw.arg.clone(), }); self.compile_expression(default)?; num_kw_only_defaults += 1; } } if num_kw_only_defaults > 0 { self.emit(Instruction::BuildMap { size: num_kw_only_defaults, unpack: false, for_call: false, }); } let mut flags = bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED; if have_defaults { flags |= bytecode::CodeFlags::HAS_DEFAULTS; } if num_kw_only_defaults > 0 { flags |= bytecode::CodeFlags::HAS_KW_ONLY_DEFAULTS; } let line_number = self.get_source_line_number(); self.push_output(CodeObject::new( flags, args.posonlyargs_count, args.args.len(), args.kwonlyargs.len(), self.source_path.clone(), line_number, name.to_owned(), )); for name in &args.args { self.name(&name.arg); } for name in &args.kwonlyargs { self.name(&name.arg); } let mut compile_varargs = |va: &ast::Varargs, flag| match va { ast::Varargs::None | ast::Varargs::Unnamed => {} ast::Varargs::Named(name) => { self.current_code().flags |= flag; self.name(&name.arg); } }; compile_varargs(&args.vararg, bytecode::CodeFlags::HAS_VARARGS); compile_varargs(&args.kwarg, bytecode::CodeFlags::HAS_VARKEYWORDS); Ok(()) } fn prepare_decorators(&mut self, decorator_list: &[ast::Expression]) -> CompileResult<()> { for decorator in decorator_list { self.compile_expression(decorator)?; } Ok(()) } fn apply_decorators(&mut self, decorator_list: &[ast::Expression]) { // Apply decorators: for _ in decorator_list { self.emit(Instruction::CallFunction { typ: CallType::Positional(1), }); } } fn compile_try_statement( &mut self, body: &[ast::Statement], handlers: &[ast::ExceptHandler], orelse: &Option, finalbody: &Option, ) -> CompileResult<()> { let mut handler_label = self.new_label(); let finally_handler_label = self.new_label(); let else_label = self.new_label(); // Setup a finally block if we have a finally statement. if finalbody.is_some() { self.emit(Instruction::SetupFinally { handler: finally_handler_label, }); } // try: self.emit(Instruction::SetupExcept { handler: handler_label, }); self.compile_statements(body)?; self.emit(Instruction::PopBlock); self.emit(Instruction::Jump { target: else_label }); // except handlers: self.set_label(handler_label); // Exception is on top of stack now handler_label = self.new_label(); for handler in handlers { // If we gave a typ, // check if this handler can handle the exception: if let Some(exc_type) = &handler.typ { // Duplicate exception for test: self.emit(Instruction::Duplicate); // Check exception type: self.compile_expression(exc_type)?; self.emit(Instruction::CompareOperation { op: bytecode::ComparisonOperator::ExceptionMatch, }); // We cannot handle this exception type: self.emit(Instruction::JumpIfFalse { target: handler_label, }); // We have a match, store in name (except x as y) if let Some(alias) = &handler.name { self.store_name(alias); } else { // Drop exception from top of stack: self.emit(Instruction::Pop); } } else { // Catch all! // Drop exception from top of stack: self.emit(Instruction::Pop); } // Handler code: self.compile_statements(&handler.body)?; self.emit(Instruction::PopException); if finalbody.is_some() { self.emit(Instruction::PopBlock); // pop finally block // We enter the finally block, without exception. self.emit(Instruction::EnterFinally); } self.emit(Instruction::Jump { target: finally_handler_label, }); // Emit a new label for the next handler self.set_label(handler_label); handler_label = self.new_label(); } self.emit(Instruction::Jump { target: handler_label, }); self.set_label(handler_label); // If code flows here, we have an unhandled exception, // raise the exception again! self.emit(Instruction::Raise { argc: 0 }); // We successfully ran the try block: // else: self.set_label(else_label); if let Some(statements) = orelse { self.compile_statements(statements)?; } if finalbody.is_some() { self.emit(Instruction::PopBlock); // pop finally block // We enter the finally block, without return / exception. self.emit(Instruction::EnterFinally); } // finally: self.set_label(finally_handler_label); if let Some(statements) = finalbody { self.compile_statements(statements)?; self.emit(Instruction::EndFinally); } Ok(()) } fn compile_function_def( &mut self, name: &str, args: &ast::Parameters, body: &[ast::Statement], decorator_list: &[ast::Expression], returns: &Option, // TODO: use type hint somehow.. is_async: bool, ) -> CompileResult<()> { // Create bytecode for this function: // remember to restore self.ctx.in_loop to the original after the function is compiled let prev_ctx = self.ctx; self.ctx = CompileContext { in_loop: false, in_class: prev_ctx.in_class, func: if is_async { FunctionContext::AsyncFunction } else { FunctionContext::Function }, }; let qualified_name = self.create_qualified_name(name, ""); let old_qualified_path = self.current_qualified_path.take(); self.current_qualified_path = Some(self.create_qualified_name(name, ".")); self.prepare_decorators(decorator_list)?; self.enter_function(name, args)?; let (body, doc_str) = get_doc(body); self.compile_statements(body)?; // Emit None at end: match body.last().map(|s| &s.node) { Some(ast::StatementType::Return { .. }) => { // the last instruction is a ReturnValue already, we don't need to emit it } _ => { self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::ReturnValue); } } let mut code = self.pop_code_object(); // Prepare type annotations: let mut num_annotations = 0; // Return annotation: if let Some(annotation) = returns { // key: self.emit_constant(bytecode::ConstantData::Str { value: "return".to_owned(), }); // value: self.compile_expression(annotation)?; num_annotations += 1; } let mut visit_arg_annotation = |arg: &ast::Parameter| -> CompileResult<()> { if let Some(annotation) = &arg.annotation { self.emit_constant(bytecode::ConstantData::Str { value: arg.arg.to_owned(), }); self.compile_expression(&annotation)?; num_annotations += 1; } Ok(()) }; for arg in args.args.iter().chain(args.kwonlyargs.iter()) { visit_arg_annotation(arg)?; } if let ast::Varargs::Named(arg) = &args.vararg { visit_arg_annotation(arg)?; } if let ast::Varargs::Named(arg) = &args.kwarg { visit_arg_annotation(arg)?; } if num_annotations > 0 { code.flags |= bytecode::CodeFlags::HAS_ANNOTATIONS; self.emit(Instruction::BuildMap { size: num_annotations, unpack: false, for_call: false, }); } if is_async { code.flags |= bytecode::CodeFlags::IS_COROUTINE; } if !code.freevars.is_empty() { for var in &code.freevars { let symbol = self.symbol_table_stack.last().unwrap().lookup(var).unwrap(); let parent_code = self.code_stack.last().unwrap(); let vars = match symbol.scope { SymbolScope::Free => &parent_code.freevar_cache, SymbolScope::Cell => &parent_code.cellvar_cache, _ => unreachable!(), }; let mut idx = vars.get_index_of(var).unwrap(); if let SymbolScope::Free = symbol.scope { idx += parent_code.cellvar_cache.len(); } self.emit(Instruction::LoadClosure(idx)) } self.emit(Instruction::BuildTuple { size: code.freevars.len(), unpack: false, }) } self.emit_constant(bytecode::ConstantData::Code { code: Box::new(code), }); self.emit_constant(bytecode::ConstantData::Str { value: qualified_name, }); // Turn code object into function object: self.emit(Instruction::MakeFunction); self.emit(Instruction::Duplicate); self.load_docstring(doc_str); self.emit(Instruction::Rotate { amount: 2 }); let doc = self.name("__doc__"); self.emit(Instruction::StoreAttr { idx: doc }); self.apply_decorators(decorator_list); self.store_name(name); self.current_qualified_path = old_qualified_path; self.ctx = prev_ctx; Ok(()) } fn find_ann(&self, body: &[ast::Statement]) -> bool { use ast::StatementType::*; let option_stmt_to_bool = |suit: &Option| -> bool { match suit { Some(stmts) => self.find_ann(stmts), None => false, } }; for statement in body { let res = match &statement.node { AnnAssign { target: _, annotation: _, value: _, } => true, For { is_async: _, target: _, iter: _, body, orelse, } => self.find_ann(body) || option_stmt_to_bool(orelse), If { test: _, body, orelse, } => self.find_ann(body) || option_stmt_to_bool(orelse), While { test: _, body, orelse, } => self.find_ann(body) || option_stmt_to_bool(orelse), With { is_async: _, items: _, body, } => self.find_ann(body), Try { body, handlers: _, orelse, finalbody, } => { self.find_ann(&body) || option_stmt_to_bool(orelse) || option_stmt_to_bool(finalbody) } _ => false, }; if res { return true; } } false } fn compile_class_def( &mut self, name: &str, body: &[ast::Statement], bases: &[ast::Expression], keywords: &[ast::Keyword], decorator_list: &[ast::Expression], ) -> CompileResult<()> { let prev_ctx = self.ctx; self.ctx = CompileContext { func: FunctionContext::NoFunction, in_class: true, in_loop: false, }; let qualified_name = self.create_qualified_name(name, ""); let old_qualified_path = self.current_qualified_path.take(); self.current_qualified_path = Some(qualified_name.clone()); self.prepare_decorators(decorator_list)?; self.emit(Instruction::LoadBuildClass); let line_number = self.get_source_line_number(); self.push_output(CodeObject::new( bytecode::CodeFlags::empty(), 0, 0, 0, self.source_path.clone(), line_number, name.to_owned(), )); let (new_body, doc_str) = get_doc(body); let dunder_name = self.name("__name__"); self.emit(Instruction::LoadGlobal(dunder_name)); let dunder_module = self.name("__module__"); self.emit(Instruction::StoreLocal(dunder_module)); self.emit_constant(bytecode::ConstantData::Str { value: qualified_name.clone(), }); let qualname = self.name("__qualname__"); self.emit(Instruction::StoreLocal(qualname)); self.load_docstring(doc_str); let doc = self.name("__doc__"); self.emit(Instruction::StoreLocal(doc)); // setup annotations if self.find_ann(body) { self.emit(Instruction::SetupAnnotation); } self.compile_statements(new_body)?; self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::ReturnValue); let code = self.pop_code_object(); self.emit_constant(bytecode::ConstantData::Code { code: Box::new(code), }); self.emit_constant(bytecode::ConstantData::Str { value: name.to_owned(), }); // Turn code object into function object: self.emit(Instruction::MakeFunction); self.emit_constant(bytecode::ConstantData::Str { value: qualified_name, }); for base in bases { self.compile_expression(base)?; } if !keywords.is_empty() { let mut kwarg_names = vec![]; for keyword in keywords { if let Some(name) = &keyword.name { kwarg_names.push(bytecode::ConstantData::Str { value: name.to_owned(), }); } else { // This means **kwargs! panic!("name must be set"); } self.compile_expression(&keyword.value)?; } self.emit_constant(bytecode::ConstantData::Tuple { elements: kwarg_names, }); self.emit(Instruction::CallFunction { typ: CallType::Keyword(2 + keywords.len() + bases.len()), }); } else { self.emit(Instruction::CallFunction { typ: CallType::Positional(2 + bases.len()), }); } self.apply_decorators(decorator_list); self.store_name(name); self.current_qualified_path = old_qualified_path; self.ctx = prev_ctx; Ok(()) } fn load_docstring(&mut self, doc_str: Option) { // TODO: __doc__ must be default None and no bytecodes unless it is Some // Duplicate top of stack (the function or class object) // Doc string value: self.emit_constant(match doc_str { Some(doc) => bytecode::ConstantData::Str { value: doc }, None => bytecode::ConstantData::None, // set docstring None if not declared }); } fn compile_while( &mut self, test: &ast::Expression, body: &[ast::Statement], orelse: &Option>, ) -> CompileResult<()> { let start_label = self.new_label(); let else_label = self.new_label(); let end_label = self.new_label(); self.emit(Instruction::SetupLoop { start: start_label, end: end_label, }); self.set_label(start_label); self.compile_jump_if(test, false, else_label)?; let was_in_loop = self.ctx.in_loop; self.ctx.in_loop = true; self.compile_statements(body)?; self.ctx.in_loop = was_in_loop; self.emit(Instruction::Jump { target: start_label, }); self.set_label(else_label); self.emit(Instruction::PopBlock); if let Some(orelse) = orelse { self.compile_statements(orelse)?; } self.set_label(end_label); Ok(()) } fn compile_for( &mut self, target: &ast::Expression, iter: &ast::Expression, body: &[ast::Statement], orelse: &Option>, is_async: bool, ) -> CompileResult<()> { // Start loop let start_label = self.new_label(); let else_label = self.new_label(); let end_label = self.new_label(); self.emit(Instruction::SetupLoop { start: start_label, end: end_label, }); // The thing iterated: self.compile_expression(iter)?; if is_async { let check_asynciter_label = self.new_label(); let body_label = self.new_label(); self.emit(Instruction::GetAIter); self.set_label(start_label); self.emit(Instruction::SetupExcept { handler: check_asynciter_label, }); self.emit(Instruction::GetANext); self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::YieldFrom); self.compile_store(target)?; self.emit(Instruction::PopBlock); self.emit(Instruction::Jump { target: body_label }); self.set_label(check_asynciter_label); self.emit(Instruction::Duplicate); let stopasynciter = self.name("StopAsyncIteration"); self.emit(Instruction::LoadGlobal(stopasynciter)); self.emit(Instruction::CompareOperation { op: bytecode::ComparisonOperator::ExceptionMatch, }); self.emit(Instruction::JumpIfTrue { target: else_label }); self.emit(Instruction::Raise { argc: 0 }); let was_in_loop = self.ctx.in_loop; self.ctx.in_loop = true; self.set_label(body_label); self.compile_statements(body)?; self.ctx.in_loop = was_in_loop; } else { // Retrieve Iterator self.emit(Instruction::GetIter); self.set_label(start_label); self.emit(Instruction::ForIter { target: else_label }); // Start of loop iteration, set targets: self.compile_store(target)?; let was_in_loop = self.ctx.in_loop; self.ctx.in_loop = true; self.compile_statements(body)?; self.ctx.in_loop = was_in_loop; } self.emit(Instruction::Jump { target: start_label, }); self.set_label(else_label); self.emit(Instruction::PopBlock); if let Some(orelse) = orelse { self.compile_statements(orelse)?; } self.set_label(end_label); if is_async { self.emit(Instruction::Pop); } Ok(()) } fn compile_chained_comparison( &mut self, vals: &[ast::Expression], ops: &[ast::Comparison], ) -> CompileResult<()> { assert!(!ops.is_empty()); assert_eq!(vals.len(), ops.len() + 1); let to_operator = |op: &ast::Comparison| match op { ast::Comparison::Equal => bytecode::ComparisonOperator::Equal, ast::Comparison::NotEqual => bytecode::ComparisonOperator::NotEqual, ast::Comparison::Less => bytecode::ComparisonOperator::Less, ast::Comparison::LessOrEqual => bytecode::ComparisonOperator::LessOrEqual, ast::Comparison::Greater => bytecode::ComparisonOperator::Greater, ast::Comparison::GreaterOrEqual => bytecode::ComparisonOperator::GreaterOrEqual, ast::Comparison::In => bytecode::ComparisonOperator::In, ast::Comparison::NotIn => bytecode::ComparisonOperator::NotIn, ast::Comparison::Is => bytecode::ComparisonOperator::Is, ast::Comparison::IsNot => bytecode::ComparisonOperator::IsNot, }; // a == b == c == d // compile into (pseudocode): // result = a == b // if result: // result = b == c // if result: // result = c == d // initialize lhs outside of loop self.compile_expression(&vals[0])?; let break_label = self.new_label(); let last_label = self.new_label(); // for all comparisons except the last (as the last one doesn't need a conditional jump) let ops_slice = &ops[0..ops.len()]; let vals_slice = &vals[1..ops.len()]; for (op, val) in ops_slice.iter().zip(vals_slice.iter()) { self.compile_expression(val)?; // store rhs for the next comparison in chain self.emit(Instruction::Duplicate); self.emit(Instruction::Rotate { amount: 3 }); self.emit(Instruction::CompareOperation { op: to_operator(op), }); // if comparison result is false, we break with this value; if true, try the next one. self.emit(Instruction::JumpIfFalseOrPop { target: break_label, }); } // handle the last comparison self.compile_expression(vals.last().unwrap())?; self.emit(Instruction::CompareOperation { op: to_operator(ops.last().unwrap()), }); if vals.len() > 2 { self.emit(Instruction::Jump { target: last_label }); // early exit left us with stack: `rhs, comparison_result`. We need to clean up rhs. self.set_label(break_label); self.emit(Instruction::Rotate { amount: 2 }); self.emit(Instruction::Pop); self.set_label(last_label); } Ok(()) } fn compile_annotated_assign( &mut self, target: &ast::Expression, annotation: &ast::Expression, value: &Option, ) -> CompileResult<()> { if let Some(value) = value { self.compile_expression(value)?; self.compile_store(target)?; } // Annotations are only evaluated in a module or class. if self.ctx.in_func() { return Ok(()); } // Compile annotation: self.compile_expression(annotation)?; if let ast::ExpressionType::Identifier { name } = &target.node { // Store as dict entry in __annotations__ dict: if !self.ctx.in_func() { let annotations = self.name("__annotations__"); self.emit(Instruction::LoadLocal(annotations)); self.emit_constant(bytecode::ConstantData::Str { value: name.to_owned(), }); self.emit(Instruction::StoreSubscript); } } else { // Drop annotation if not assigned to simple identifier. self.emit(Instruction::Pop); } Ok(()) } fn compile_store(&mut self, target: &ast::Expression) -> CompileResult<()> { match &target.node { ast::ExpressionType::Identifier { name } => { self.store_name(name); } ast::ExpressionType::Subscript { a, b } => { self.compile_expression(a)?; self.compile_expression(b)?; self.emit(Instruction::StoreSubscript); } ast::ExpressionType::Attribute { value, name } => { self.compile_expression(value)?; let idx = self.name(name); self.emit(Instruction::StoreAttr { idx }); } ast::ExpressionType::List { elements } | ast::ExpressionType::Tuple { elements } => { let mut seen_star = false; // Scan for star args: for (i, element) in elements.iter().enumerate() { if let ast::ExpressionType::Starred { .. } = &element.node { if seen_star { return Err(self.error(CompileErrorType::MultipleStarArgs)); } else { seen_star = true; self.emit(Instruction::UnpackEx { before: i, after: elements.len() - i - 1, }); } } } if !seen_star { self.emit(Instruction::UnpackSequence { size: elements.len(), }); } for element in elements { if let ast::ExpressionType::Starred { value } = &element.node { self.compile_store(value)?; } else { self.compile_store(element)?; } } } _ => { return Err(self.error(match target.node { ast::ExpressionType::Starred { .. } => CompileErrorType::SyntaxError( "starred assignment target must be in a list or tuple".to_owned(), ), _ => CompileErrorType::Assign(target.name()), })) } } Ok(()) } fn compile_op(&mut self, op: &ast::Operator, inplace: bool) { let i = match op { ast::Operator::Add => bytecode::BinaryOperator::Add, ast::Operator::Sub => bytecode::BinaryOperator::Subtract, ast::Operator::Mult => bytecode::BinaryOperator::Multiply, ast::Operator::MatMult => bytecode::BinaryOperator::MatrixMultiply, ast::Operator::Div => bytecode::BinaryOperator::Divide, ast::Operator::FloorDiv => bytecode::BinaryOperator::FloorDivide, ast::Operator::Mod => bytecode::BinaryOperator::Modulo, ast::Operator::Pow => bytecode::BinaryOperator::Power, ast::Operator::LShift => bytecode::BinaryOperator::Lshift, ast::Operator::RShift => bytecode::BinaryOperator::Rshift, ast::Operator::BitOr => bytecode::BinaryOperator::Or, ast::Operator::BitXor => bytecode::BinaryOperator::Xor, ast::Operator::BitAnd => bytecode::BinaryOperator::And, }; self.emit(Instruction::BinaryOperation { op: i, inplace }); } /// Implement boolean short circuit evaluation logic. /// https://en.wikipedia.org/wiki/Short-circuit_evaluation /// /// This means, in a boolean statement 'x and y' the variable y will /// not be evaluated when x is false. /// /// The idea is to jump to a label if the expression is either true or false /// (indicated by the condition parameter). fn compile_jump_if( &mut self, expression: &ast::Expression, condition: bool, target_label: Label, ) -> CompileResult<()> { // Compile expression for test, and jump to label if false match &expression.node { ast::ExpressionType::BoolOp { op, values } => { match op { ast::BooleanOperator::And => { if condition { // If all values are true. let end_label = self.new_label(); let (last_value, values) = values.split_last().unwrap(); // If any of the values is false, we can short-circuit. for value in values { self.compile_jump_if(value, false, end_label)?; } // It depends upon the last value now: will it be true? self.compile_jump_if(last_value, true, target_label)?; self.set_label(end_label); } else { // If any value is false, the whole condition is false. for value in values { self.compile_jump_if(value, false, target_label)?; } } } ast::BooleanOperator::Or => { if condition { // If any of the values is true. for value in values { self.compile_jump_if(value, true, target_label)?; } } else { // If all of the values are false. let end_label = self.new_label(); let (last_value, values) = values.split_last().unwrap(); // If any value is true, we can short-circuit: for value in values { self.compile_jump_if(value, true, end_label)?; } // It all depends upon the last value now! self.compile_jump_if(last_value, false, target_label)?; self.set_label(end_label); } } } } ast::ExpressionType::Unop { op: ast::UnaryOperator::Not, a, } => { self.compile_jump_if(a, !condition, target_label)?; } _ => { // Fall back case which always will work! self.compile_expression(expression)?; if condition { self.emit(Instruction::JumpIfTrue { target: target_label, }); } else { self.emit(Instruction::JumpIfFalse { target: target_label, }); } } } Ok(()) } /// Compile a boolean operation as an expression. /// This means, that the last value remains on the stack. fn compile_bool_op( &mut self, op: &ast::BooleanOperator, values: &[ast::Expression], ) -> CompileResult<()> { let end_label = self.new_label(); let (last_value, values) = values.split_last().unwrap(); for value in values { self.compile_expression(value)?; match op { ast::BooleanOperator::And => { self.emit(Instruction::JumpIfFalseOrPop { target: end_label }); } ast::BooleanOperator::Or => { self.emit(Instruction::JumpIfTrueOrPop { target: end_label }); } } } // If all values did not qualify, take the value of the last value: self.compile_expression(last_value)?; self.set_label(end_label); Ok(()) } fn compile_dict( &mut self, pairs: &[(Option, ast::Expression)], ) -> CompileResult<()> { let mut size = 0; let mut has_unpacking = false; for (is_unpacking, subpairs) in &pairs.iter().group_by(|e| e.0.is_none()) { if is_unpacking { for (_, value) in subpairs { self.compile_expression(value)?; size += 1; } has_unpacking = true; } else { let mut subsize = 0; for (key, value) in subpairs { if let Some(key) = key { self.compile_expression(key)?; self.compile_expression(value)?; subsize += 1; } } self.emit(Instruction::BuildMap { size: subsize, unpack: false, for_call: false, }); size += 1; } } if size == 0 { self.emit(Instruction::BuildMap { size, unpack: false, for_call: false, }); } if size > 1 || has_unpacking { self.emit(Instruction::BuildMap { size, unpack: true, for_call: false, }); } Ok(()) } fn compile_expression(&mut self, expression: &ast::Expression) -> CompileResult<()> { trace!("Compiling {:?}", expression); self.set_source_location(expression.location); use ast::ExpressionType::*; match &expression.node { Call { function, args, keywords, } => self.compile_call(function, args, keywords)?, BoolOp { op, values } => self.compile_bool_op(op, values)?, Binop { a, op, b } => { self.compile_expression(a)?; self.compile_expression(b)?; // Perform operation: self.compile_op(op, false); } Subscript { a, b } => { self.compile_expression(a)?; self.compile_expression(b)?; self.emit(Instruction::Subscript); } Unop { op, a } => { self.compile_expression(a)?; // Perform operation: let i = match op { ast::UnaryOperator::Pos => bytecode::UnaryOperator::Plus, ast::UnaryOperator::Neg => bytecode::UnaryOperator::Minus, ast::UnaryOperator::Not => bytecode::UnaryOperator::Not, ast::UnaryOperator::Inv => bytecode::UnaryOperator::Invert, }; let i = Instruction::UnaryOperation { op: i }; self.emit(i); } Attribute { value, name } => { self.compile_expression(value)?; let idx = self.name(name); self.emit(Instruction::LoadAttr { idx }); } Compare { vals, ops } => { self.compile_chained_comparison(vals, ops)?; } Number { value } => { let const_value = match value { ast::Number::Integer { value } => bytecode::ConstantData::Integer { value: value.clone(), }, ast::Number::Float { value } => bytecode::ConstantData::Float { value: *value }, ast::Number::Complex { real, imag } => bytecode::ConstantData::Complex { value: Complex64::new(*real, *imag), }, }; self.emit_constant(const_value); } List { elements } => { let size = elements.len(); let must_unpack = self.gather_elements(elements)?; self.emit(Instruction::BuildList { size, unpack: must_unpack, }); } Tuple { elements } => { let size = elements.len(); let must_unpack = self.gather_elements(elements)?; self.emit(Instruction::BuildTuple { size, unpack: must_unpack, }); } Set { elements } => { let size = elements.len(); let must_unpack = self.gather_elements(elements)?; self.emit(Instruction::BuildSet { size, unpack: must_unpack, }); } Dict { elements } => { self.compile_dict(elements)?; } Slice { elements } => { let size = elements.len(); for element in elements { self.compile_expression(element)?; } self.emit(Instruction::BuildSlice { size }); } Yield { value } => { if !self.ctx.in_func() { return Err(self.error(CompileErrorType::InvalidYield)); } self.mark_generator(); match value { Some(expression) => self.compile_expression(expression)?, Option::None => self.emit_constant(bytecode::ConstantData::None), }; self.emit(Instruction::YieldValue); } Await { value } => { if self.ctx.func != FunctionContext::AsyncFunction { return Err(self.error(CompileErrorType::InvalidAwait)); } self.compile_expression(value)?; self.emit(Instruction::GetAwaitable); self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::YieldFrom); } YieldFrom { value } => { match self.ctx.func { FunctionContext::NoFunction => { return Err(self.error(CompileErrorType::InvalidYieldFrom)) } FunctionContext::AsyncFunction => { return Err(self.error(CompileErrorType::AsyncYieldFrom)) } FunctionContext::Function => {} } self.mark_generator(); self.compile_expression(value)?; self.emit(Instruction::GetIter); self.emit_constant(bytecode::ConstantData::None); self.emit(Instruction::YieldFrom); } True => { self.emit_constant(bytecode::ConstantData::Boolean { value: true }); } False => { self.emit_constant(bytecode::ConstantData::Boolean { value: false }); } ast::ExpressionType::None => { self.emit_constant(bytecode::ConstantData::None); } Ellipsis => { self.emit_constant(bytecode::ConstantData::Ellipsis); } ast::ExpressionType::String { value } => { self.compile_string(value)?; } Bytes { value } => { self.emit_constant(bytecode::ConstantData::Bytes { value: value.clone(), }); } Identifier { name } => { self.load_name(name); } Lambda { args, body } => { let prev_ctx = self.ctx; self.ctx = CompileContext { in_loop: false, in_class: prev_ctx.in_class, func: FunctionContext::Function, }; let name = "".to_owned(); self.enter_function(&name, args)?; self.compile_expression(body)?; self.emit(Instruction::ReturnValue); let code = self.pop_code_object(); self.emit_constant(bytecode::ConstantData::Code { code: Box::new(code), }); self.emit_constant(bytecode::ConstantData::Str { value: name }); // Turn code object into function object: self.emit(Instruction::MakeFunction); self.ctx = prev_ctx; } Comprehension { kind, generators } => { self.compile_comprehension(kind, generators)?; } Starred { .. } => { return Err(self.error(CompileErrorType::InvalidStarExpr)); } IfExpression { test, body, orelse } => { let no_label = self.new_label(); let end_label = self.new_label(); self.compile_jump_if(test, false, no_label)?; // True case self.compile_expression(body)?; self.emit(Instruction::Jump { target: end_label }); // False case self.set_label(no_label); self.compile_expression(orelse)?; // End self.set_label(end_label); } NamedExpression { left, right } => { self.compile_expression(right)?; self.emit(Instruction::Duplicate); self.compile_store(left)?; } } Ok(()) } fn compile_keywords(&mut self, keywords: &[ast::Keyword]) -> CompileResult<()> { let mut size = 0; for (is_unpacking, subkeywords) in &keywords.iter().group_by(|e| e.name.is_none()) { if is_unpacking { for keyword in subkeywords { self.compile_expression(&keyword.value)?; size += 1; } } else { let mut subsize = 0; for keyword in subkeywords { if let Some(name) = &keyword.name { self.emit_constant(bytecode::ConstantData::Str { value: name.to_owned(), }); self.compile_expression(&keyword.value)?; subsize += 1; } } self.emit(Instruction::BuildMap { size: subsize, unpack: false, for_call: false, }); size += 1; } } if size > 1 { self.emit(Instruction::BuildMap { size, unpack: true, for_call: true, }); } Ok(()) } fn compile_call( &mut self, function: &ast::Expression, args: &[ast::Expression], keywords: &[ast::Keyword], ) -> CompileResult<()> { self.compile_expression(function)?; let count = args.len() + keywords.len(); // Normal arguments: let must_unpack = self.gather_elements(args)?; let has_double_star = keywords.iter().any(|k| k.name.is_none()); if must_unpack || has_double_star { // Create a tuple with positional args: self.emit(Instruction::BuildTuple { size: args.len(), unpack: must_unpack, }); // Create an optional map with kw-args: if !keywords.is_empty() { self.compile_keywords(keywords)?; self.emit(Instruction::CallFunction { typ: CallType::Ex(true), }); } else { self.emit(Instruction::CallFunction { typ: CallType::Ex(false), }); } } else { // Keyword arguments: if !keywords.is_empty() { let mut kwarg_names = vec![]; for keyword in keywords { if let Some(name) = &keyword.name { kwarg_names.push(bytecode::ConstantData::Str { value: name.to_owned(), }); } else { // This means **kwargs! panic!("name must be set"); } self.compile_expression(&keyword.value)?; } self.emit_constant(bytecode::ConstantData::Tuple { elements: kwarg_names, }); self.emit(Instruction::CallFunction { typ: CallType::Keyword(count), }); } else { self.emit(Instruction::CallFunction { typ: CallType::Positional(count), }); } } Ok(()) } // Given a vector of expr / star expr generate code which gives either // a list of expressions on the stack, or a list of tuples. fn gather_elements(&mut self, elements: &[ast::Expression]) -> CompileResult { // First determine if we have starred elements: let has_stars = elements .iter() .any(|e| matches!(e.node, ast::ExpressionType::Starred { .. })); for element in elements { if let ast::ExpressionType::Starred { value } = &element.node { self.compile_expression(value)?; } else { self.compile_expression(element)?; if has_stars { self.emit(Instruction::BuildTuple { size: 1, unpack: false, }); } } } Ok(has_stars) } fn compile_comprehension( &mut self, kind: &ast::ComprehensionKind, generators: &[ast::Comprehension], ) -> CompileResult<()> { // We must have at least one generator: assert!(!generators.is_empty()); let name = match kind { ast::ComprehensionKind::GeneratorExpression { .. } => "", ast::ComprehensionKind::List { .. } => "", ast::ComprehensionKind::Set { .. } => "", ast::ComprehensionKind::Dict { .. } => "", } .to_owned(); let line_number = self.get_source_line_number(); // Create magnificent function : self.push_output(CodeObject::new( Default::default(), 1, 1, 0, self.source_path.clone(), line_number, name.clone(), )); let arg0 = self.varname(".0"); // Create empty object of proper type: match kind { ast::ComprehensionKind::GeneratorExpression { .. } => {} ast::ComprehensionKind::List { .. } => { self.emit(Instruction::BuildList { size: 0, unpack: false, }); } ast::ComprehensionKind::Set { .. } => { self.emit(Instruction::BuildSet { size: 0, unpack: false, }); } ast::ComprehensionKind::Dict { .. } => { self.emit(Instruction::BuildMap { size: 0, unpack: false, for_call: false, }); } } let mut loop_labels = vec![]; for generator in generators { if generator.is_async { unimplemented!("async for comprehensions"); } if loop_labels.is_empty() { // Load iterator onto stack (passed as first argument): self.emit(Instruction::LoadFast(arg0)); } else { // Evaluate iterated item: self.compile_expression(&generator.iter)?; // Get iterator / turn item into an iterator self.emit(Instruction::GetIter); } // Setup for loop: let start_label = self.new_label(); let end_label = self.new_label(); loop_labels.push((start_label, end_label)); self.emit(Instruction::SetupLoop { start: start_label, end: end_label, }); self.set_label(start_label); self.emit(Instruction::ForIter { target: end_label }); self.compile_store(&generator.target)?; // Now evaluate the ifs: for if_condition in &generator.ifs { self.compile_jump_if(if_condition, false, start_label)? } } let mut compile_element = |element| { self.compile_expression(element).map_err(|e| { if let CompileErrorType::InvalidStarExpr = e.error { self.error(CompileErrorType::SyntaxError( "iterable unpacking cannot be used in comprehension".to_owned(), )) } else { e } }) }; match kind { ast::ComprehensionKind::GeneratorExpression { element } => { compile_element(element)?; self.mark_generator(); self.emit(Instruction::YieldValue); self.emit(Instruction::Pop); } ast::ComprehensionKind::List { element } => { compile_element(element)?; self.emit(Instruction::ListAppend { i: 1 + generators.len(), }); } ast::ComprehensionKind::Set { element } => { compile_element(element)?; self.emit(Instruction::SetAdd { i: 1 + generators.len(), }); } ast::ComprehensionKind::Dict { key, value } => { // changed evaluation order for Py38 named expression PEP 572 self.compile_expression(key)?; self.compile_expression(value)?; self.emit(Instruction::MapAddRev { i: 1 + generators.len(), }); } } for (start_label, end_label) in loop_labels.iter().rev() { // Repeat: self.emit(Instruction::Jump { target: *start_label, }); // End of for loop: self.set_label(*end_label); self.emit(Instruction::PopBlock); } // Return freshly filled list: self.emit(Instruction::ReturnValue); // Fetch code for listcomp function: let code = self.pop_code_object(); // List comprehension code: self.emit_constant(bytecode::ConstantData::Code { code: Box::new(code), }); // List comprehension function name: self.emit_constant(bytecode::ConstantData::Str { value: name }); // Turn code object into function object: self.emit(Instruction::MakeFunction); // Evaluate iterated item: self.compile_expression(&generators[0].iter)?; // Get iterator / turn item into an iterator self.emit(Instruction::GetIter); // Call just created function: self.emit(Instruction::CallFunction { typ: CallType::Positional(1), }); Ok(()) } fn compile_string(&mut self, string: &ast::StringGroup) -> CompileResult<()> { if let Some(value) = try_get_constant_string(string) { self.emit_constant(bytecode::ConstantData::Str { value }); } else { match string { ast::StringGroup::Joined { values } => { for value in values { self.compile_string(value)?; } self.emit(Instruction::BuildString { size: values.len() }) } ast::StringGroup::Constant { value } => { self.emit_constant(bytecode::ConstantData::Str { value: value.to_owned(), }); } ast::StringGroup::FormattedValue { value, conversion, spec, } => { match spec { Some(spec) => self.compile_string(spec)?, None => self.emit_constant(bytecode::ConstantData::Str { value: String::new(), }), }; self.compile_expression(value)?; self.emit(Instruction::FormatValue { conversion: conversion.map(compile_conversion_flag), }); } } } Ok(()) } fn compile_future_features( &mut self, features: &[ast::ImportSymbol], ) -> Result<(), CompileError> { if self.done_with_future_stmts { return Err(self.error(CompileErrorType::InvalidFuturePlacement)); } for feature in features { match &*feature.symbol { // Python 3 features; we've already implemented them by default "nested_scopes" | "generators" | "division" | "absolute_import" | "with_statement" | "print_function" | "unicode_literals" => {} // "generator_stop" => {} // "annotations" => {} other => { return Err(self.error(CompileErrorType::InvalidFutureFeature(other.to_owned()))) } } } Ok(()) } // Low level helper functions: fn emit(&mut self, instruction: Instruction) { let location = compile_location(&self.current_source_location); // TODO: insert source filename let code = self.current_code(); code.instructions.push(instruction); code.locations.push(location); } fn emit_constant(&mut self, constant: bytecode::ConstantData) { let code = self.current_code(); let idx = code.constants.len(); code.constants.push(constant); self.emit(Instruction::LoadConst { idx }) } fn current_code(&mut self) -> &mut CodeObject { &mut self .code_stack .last_mut() .expect("No OutputStream on stack") .code } // Generate a new label fn new_label(&mut self) -> Label { let label_map = &mut self.code_stack.last_mut().unwrap().label_map; let label = Label(label_map.len()); label_map.push(None); label } // Assign current position the given label fn set_label(&mut self, label: Label) { let CodeInfo { code, label_map, .. } = self.code_stack.last_mut().unwrap(); let actual_label = Label(code.instructions.len()); let prev_val = std::mem::replace(&mut label_map[label.0], Some(actual_label)); debug_assert!( prev_val.map_or(true, |x| x == actual_label), "double-set a label" ); } fn set_source_location(&mut self, location: ast::Location) { self.current_source_location = location; } fn get_source_line_number(&mut self) -> usize { self.current_source_location.row() } fn create_qualified_name(&self, name: &str, suffix: &str) -> String { if let Some(ref qualified_path) = self.current_qualified_path { format!("{}.{}{}", qualified_path, name, suffix) } else { format!("{}{}", name, suffix) } } fn mark_generator(&mut self) { self.current_code().flags |= bytecode::CodeFlags::IS_GENERATOR } } fn get_doc(body: &[ast::Statement]) -> (&[ast::Statement], Option) { if let Some((val, body_rest)) = body.split_first() { if let ast::StatementType::Expression { ref expression } = val.node { if let ast::ExpressionType::String { value } = &expression.node { if let Some(value) = try_get_constant_string(value) { return (body_rest, Some(value)); } } } } (body, None) } fn try_get_constant_string(string: &ast::StringGroup) -> Option { fn get_constant_string_inner(out_string: &mut String, string: &ast::StringGroup) -> bool { match string { ast::StringGroup::Constant { value } => { out_string.push_str(&value); true } ast::StringGroup::Joined { values } => values .iter() .all(|value| get_constant_string_inner(out_string, value)), ast::StringGroup::FormattedValue { .. } => false, } } let mut out_string = String::new(); if get_constant_string_inner(&mut out_string, string) { Some(out_string) } else { None } } fn compile_location(location: &ast::Location) -> bytecode::Location { bytecode::Location::new(location.row(), location.column()) } fn compile_conversion_flag(conversion_flag: ast::ConversionFlag) -> bytecode::ConversionFlag { match conversion_flag { ast::ConversionFlag::Ascii => bytecode::ConversionFlag::Ascii, ast::ConversionFlag::Repr => bytecode::ConversionFlag::Repr, ast::ConversionFlag::Str => bytecode::ConversionFlag::Str, } } #[cfg(test)] mod tests { use super::{CompileOpts, Compiler}; use crate::symboltable::make_symbol_table; use rustpython_bytecode::bytecode::CodeObject; use rustpython_parser::parser; fn compile_exec(source: &str) -> CodeObject { let mut compiler: Compiler = Compiler::new( CompileOpts::default(), "source_path".to_owned(), "".to_owned(), ); let ast = parser::parse_program(source).unwrap(); let symbol_scope = make_symbol_table(&ast).unwrap(); compiler.compile_program(&ast, symbol_scope).unwrap(); compiler.pop_code_object() } macro_rules! assert_dis_snapshot { ($value:expr) => { insta::assert_snapshot!( insta::internals::AutoName, $value.display_expand_codeobjects().to_string(), stringify!($value) ) }; } #[test] fn test_if_ors() { assert_dis_snapshot!(compile_exec( "\ if True or False or False: pass " )); } #[test] fn test_if_ands() { assert_dis_snapshot!(compile_exec( "\ if True and False and False: pass " )); } #[test] fn test_if_mixed() { assert_dis_snapshot!(compile_exec( "\ if (True and False) or (False and True): pass " )); } }