Fast locals part 1

This commit is contained in:
Noah 2020-11-28 14:51:28 -06:00
parent 534ca2a8de
commit 410bd76f38
2 changed files with 248 additions and 184 deletions

View file

@ -7,9 +7,7 @@
use crate::error::{CompileError, CompileErrorType}; use crate::error::{CompileError, CompileErrorType};
pub use crate::mode::Mode; pub use crate::mode::Mode;
use crate::symboltable::{ use crate::symboltable::{make_symbol_table, statements_to_symbol_table, SymbolScope, SymbolTable};
make_symbol_table, statements_to_symbol_table, Symbol, SymbolScope, SymbolTable,
};
use indexmap::IndexSet; use indexmap::IndexSet;
use itertools::Itertools; use itertools::Itertools;
use num_complex::Complex64; use num_complex::Complex64;
@ -21,6 +19,9 @@ type CompileResult<T> = Result<T, CompileError>;
struct CodeInfo { struct CodeInfo {
code: CodeObject, code: CodeObject,
name_cache: IndexSet<String>, name_cache: IndexSet<String>,
varname_cache: IndexSet<String>,
cellvar_cache: IndexSet<String>,
freevar_cache: IndexSet<String>,
label_map: Vec<Option<Label>>, label_map: Vec<Option<Label>>,
} }
impl CodeInfo { impl CodeInfo {
@ -28,9 +29,17 @@ impl CodeInfo {
let CodeInfo { let CodeInfo {
mut code, mut code,
name_cache, name_cache,
varname_cache,
cellvar_cache,
freevar_cache,
label_map, label_map,
} = self; } = self;
code.names.extend(name_cache); code.names.extend(name_cache);
code.varnames.extend(varname_cache);
code.cellvars.extend(cellvar_cache);
code.freevars.extend(freevar_cache);
for instruction in &mut code.instructions { for instruction in &mut code.instructions {
use Instruction::*; use Instruction::*;
// this is a little bit hacky, as until now the data stored inside Labels in // this is a little bit hacky, as until now the data stored inside Labels in
@ -55,8 +64,10 @@ impl CodeInfo {
} }
#[rustfmt::skip] #[rustfmt::skip]
Import { .. } | ImportStar | ImportFrom { .. } | LoadName { .. } | StoreName { .. } Import { .. } | ImportStar | ImportFrom { .. } | LoadFast(_) | LoadLocal(_)
| DeleteName { .. } | Subscript | StoreSubscript | DeleteSubscript | LoadGlobal(_) | LoadDeref(_) | LoadClassDeref(_) | StoreFast(_) | StoreLocal(_)
| StoreGlobal(_) | StoreDeref(_) | DeleteFast(_) | DeleteLocal(_) | DeleteGlobal(_)
| DeleteDeref(_) | LoadClosure(_) | Subscript | StoreSubscript | DeleteSubscript
| StoreAttr { .. } | DeleteAttr { .. } | LoadConst { .. } | UnaryOperation { .. } | StoreAttr { .. } | DeleteAttr { .. } | LoadConst { .. } | UnaryOperation { .. }
| BinaryOperation { .. } | LoadAttr { .. } | CompareOperation { .. } | Pop | BinaryOperation { .. } | LoadAttr { .. } | CompareOperation { .. } | Pop
| Rotate { .. } | Duplicate | GetIter | Continue | Break | MakeFunction | Rotate { .. } | Duplicate | GetIter | Continue | Break | MakeFunction
@ -73,6 +84,12 @@ impl CodeInfo {
} }
} }
enum NameUsage {
Load,
Store,
Delete,
}
/// Main structure holding the state of compilation. /// Main structure holding the state of compilation.
struct Compiler { struct Compiler {
code_stack: Vec<CodeInfo>, code_stack: Vec<CodeInfo>,
@ -100,6 +117,7 @@ impl Default for CompileOpts {
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
struct CompileContext { struct CompileContext {
in_loop: bool, in_loop: bool,
in_class: bool,
func: FunctionContext, func: FunctionContext,
} }
@ -122,8 +140,7 @@ fn with_compiler(
opts: CompileOpts, opts: CompileOpts,
f: impl FnOnce(&mut Compiler) -> CompileResult<()>, f: impl FnOnce(&mut Compiler) -> CompileResult<()>,
) -> CompileResult<CodeObject> { ) -> CompileResult<CodeObject> {
let mut compiler = Compiler::new(opts, source_path); let mut compiler = Compiler::new(opts, source_path, "<module>".to_owned());
compiler.push_new_code_object("<module>".to_owned());
f(&mut compiler)?; f(&mut compiler)?;
let code = compiler.pop_code_object(); let code = compiler.pop_code_object();
trace!("Compilation completed: {:?}", code); trace!("Compilation completed: {:?}", code);
@ -176,9 +193,25 @@ pub fn compile_program_single(
} }
impl Compiler { impl Compiler {
fn new(opts: CompileOpts, source_path: String) -> Self { fn new(opts: CompileOpts, source_path: String, code_name: String) -> Self {
let module_code = CodeInfo {
code: CodeObject::new(
Default::default(),
0,
0,
0,
source_path.clone(),
0,
code_name,
),
name_cache: IndexSet::new(),
varname_cache: IndexSet::new(),
cellvar_cache: IndexSet::new(),
freevar_cache: IndexSet::new(),
label_map: Vec::new(),
};
Compiler { Compiler {
code_stack: Vec::new(), code_stack: vec![module_code],
symbol_table_stack: Vec::new(), symbol_table_stack: Vec::new(),
source_path, source_path,
current_source_location: ast::Location::default(), current_source_location: ast::Location::default(),
@ -186,6 +219,7 @@ impl Compiler {
done_with_future_stmts: false, done_with_future_stmts: false,
ctx: CompileContext { ctx: CompileContext {
in_loop: false, in_loop: false,
in_class: false,
func: FunctionContext::NoFunction, func: FunctionContext::NoFunction,
}, },
opts, opts,
@ -204,43 +238,63 @@ impl Compiler {
} }
fn push_output(&mut self, code: CodeObject) { fn push_output(&mut self, code: CodeObject) {
self.code_stack.push(CodeInfo { let table = self
.symbol_table_stack
.last_mut()
.unwrap()
.sub_tables
.remove(0);
let cellvar_cache = table
.symbols
.iter()
.filter(|(_, s)| matches!(s.scope, SymbolScope::Cell))
.map(|(var, _)| var.clone())
.collect();
let freevar_cache = table
.symbols
.iter()
// TODO: check if Free or FREE_CLASS symbol
.filter(|(_, s)| matches!(s.scope, SymbolScope::Free))
.map(|(var, _)| var.clone())
.collect();
self.symbol_table_stack.push(table);
let info = CodeInfo {
code, code,
name_cache: IndexSet::new(), name_cache: IndexSet::new(),
varname_cache: IndexSet::new(),
cellvar_cache,
freevar_cache,
label_map: Vec::new(), label_map: Vec::new(),
}); };
} self.code_stack.push(info);
fn push_new_code_object(&mut self, obj_name: String) {
let line_number = self.get_source_line_number();
self.push_output(CodeObject::new(
Default::default(),
0,
0,
0,
self.source_path.clone(),
line_number,
obj_name,
));
} }
fn pop_code_object(&mut self) -> CodeObject { fn pop_code_object(&mut self) -> CodeObject {
let table = self.symbol_table_stack.pop().unwrap();
assert!(table.sub_tables.is_empty());
self.code_stack.pop().unwrap().finalize_code() self.code_stack.pop().unwrap().finalize_code()
} }
// could take impl Into<Cow<str>>, but everything is borrowed from ast structs; we never // could take impl Into<Cow<str>>, but everything is borrowed from ast structs; we never
// actually have a `String` to pass // actually have a `String` to pass
fn name(&mut self, name: &str) -> bytecode::NameIdx { fn name(&mut self, name: &str) -> bytecode::NameIdx {
let cache = &mut self self._name_inner(name, |i| &mut i.name_cache)
.code_stack
.last_mut()
.expect("nothing on stack")
.name_cache;
if let Some(x) = cache.get_index_of(name) {
x
} else {
cache.insert_full(name.to_owned()).0
} }
fn varname(&mut self, name: &str) -> bytecode::NameIdx {
self._name_inner(name, |i| &mut i.varname_cache)
}
fn _name_inner(
&mut self,
name: &str,
cache: impl FnOnce(&mut CodeInfo) -> &mut IndexSet<String>,
) -> bytecode::NameIdx {
let cache = cache(self.code_stack.last_mut().expect("nothing on stack"));
cache
.get_index_of(name)
.unwrap_or_else(|| cache.insert_full(name.to_owned()).0)
} }
fn compile_program( fn compile_program(
@ -255,10 +309,7 @@ impl Compiler {
if let Some(value) = doc { if let Some(value) = doc {
self.emit_constant(bytecode::ConstantData::Str { value }); self.emit_constant(bytecode::ConstantData::Str { value });
let doc = self.name("__doc__"); let doc = self.name("__doc__");
self.emit(Instruction::StoreName { self.emit(Instruction::StoreGlobal(doc))
idx: doc,
scope: bytecode::NameScope::Global,
});
} }
self.compile_statements(statements)?; self.compile_statements(statements)?;
@ -331,34 +382,73 @@ impl Compiler {
Ok(()) Ok(())
} }
fn scope_for_name(&self, name: &str) -> bytecode::NameScope {
let symbol = self.lookup_name(name);
match symbol.scope {
SymbolScope::Global => bytecode::NameScope::Global,
SymbolScope::Nonlocal => bytecode::NameScope::NonLocal,
SymbolScope::Unknown => bytecode::NameScope::Free,
SymbolScope::Local => {
// Only in function block, we use load local
// https://github.com/python/cpython/blob/master/Python/compile.c#L3582
if self.ctx.in_func() {
bytecode::NameScope::Local
} else {
bytecode::NameScope::Free
}
}
}
}
fn load_name(&mut self, name: &str) { fn load_name(&mut self, name: &str) {
let scope = self.scope_for_name(name); self.compile_name(name, NameUsage::Load)
let idx = self.name(name);
self.emit(Instruction::LoadName { idx, scope });
} }
fn store_name(&mut self, name: &str) { fn store_name(&mut self, name: &str) {
let scope = self.scope_for_name(name); self.compile_name(name, NameUsage::Store)
let idx = self.name(name); }
self.emit(Instruction::StoreName { idx, scope });
fn compile_name(&mut self, name: &str, usage: NameUsage) {
let symbol_table = self.symbol_table_stack.last().unwrap();
let symbol = symbol_table.lookup(name).expect(
"The symbol must be present in the symbol table, even when it is undefined in python.",
);
let info = self.code_stack.last_mut().unwrap();
let mut cache = &mut info.name_cache;
enum NameOpType {
Fast,
Global,
Deref,
Local,
}
let op_typ = match symbol.scope {
SymbolScope::Local if self.ctx.in_func() => {
cache = &mut info.varname_cache;
NameOpType::Fast
}
SymbolScope::Local => NameOpType::Local,
SymbolScope::GlobalImplicit if self.ctx.in_func() => NameOpType::Global,
SymbolScope::GlobalImplicit => NameOpType::Local,
SymbolScope::GlobalExplicit => NameOpType::Global,
SymbolScope::Free => {
cache = &mut info.freevar_cache;
NameOpType::Deref
}
SymbolScope::Cell => {
cache = &mut info.cellvar_cache;
NameOpType::Deref
}
// TODO: is this right?
SymbolScope::Unknown => NameOpType::Global,
};
let idx = cache
.get_index_of(name)
.unwrap_or_else(|| cache.insert_full(name.to_owned()).0);
let op = match op_typ {
NameOpType::Fast => match usage {
NameUsage::Load => Instruction::LoadFast,
NameUsage::Store => Instruction::StoreFast,
NameUsage::Delete => Instruction::DeleteFast,
},
NameOpType::Global => match usage {
NameUsage::Load => Instruction::LoadGlobal,
NameUsage::Store => Instruction::StoreGlobal,
NameUsage::Delete => Instruction::DeleteGlobal,
},
NameOpType::Deref => match usage {
NameUsage::Load => Instruction::LoadDeref,
NameUsage::Store => Instruction::StoreDeref,
NameUsage::Delete => Instruction::DeleteDeref,
},
NameOpType::Local => match usage {
NameUsage::Load => Instruction::LoadLocal,
NameUsage::Store => Instruction::StoreLocal,
NameUsage::Delete => Instruction::DeleteLocal,
},
};
self.emit(op(idx));
} }
fn compile_statement(&mut self, statement: &ast::Statement) -> CompileResult<()> { fn compile_statement(&mut self, statement: &ast::Statement) -> CompileResult<()> {
@ -584,10 +674,7 @@ impl Compiler {
let end_label = self.new_label(); let end_label = self.new_label();
self.compile_jump_if(test, true, end_label)?; self.compile_jump_if(test, true, end_label)?;
let assertion_error = self.name("AssertionError"); let assertion_error = self.name("AssertionError");
self.emit(Instruction::LoadName { self.emit(Instruction::LoadGlobal(assertion_error));
idx: assertion_error,
scope: bytecode::NameScope::Global,
});
match msg { match msg {
Some(e) => { Some(e) => {
self.compile_expression(e)?; self.compile_expression(e)?;
@ -683,8 +770,7 @@ impl Compiler {
fn compile_delete(&mut self, expression: &ast::Expression) -> CompileResult<()> { fn compile_delete(&mut self, expression: &ast::Expression) -> CompileResult<()> {
match &expression.node { match &expression.node {
ast::ExpressionType::Identifier { name } => { ast::ExpressionType::Identifier { name } => {
let idx = self.name(name); self.compile_name(name, NameUsage::Delete);
self.emit(Instruction::DeleteName { idx });
} }
ast::ExpressionType::Attribute { value, name } => { ast::ExpressionType::Attribute { value, name } => {
self.compile_expression(value)?; self.compile_expression(value)?;
@ -738,7 +824,7 @@ impl Compiler {
}); });
} }
let mut flags = bytecode::CodeFlags::default(); let mut flags = bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED;
if have_defaults { if have_defaults {
flags |= bytecode::CodeFlags::HAS_DEFAULTS; flags |= bytecode::CodeFlags::HAS_DEFAULTS;
} }
@ -775,8 +861,6 @@ impl Compiler {
compile_varargs(&args.vararg, bytecode::CodeFlags::HAS_VARARGS); compile_varargs(&args.vararg, bytecode::CodeFlags::HAS_VARARGS);
compile_varargs(&args.kwarg, bytecode::CodeFlags::HAS_VARKEYWORDS); compile_varargs(&args.kwarg, bytecode::CodeFlags::HAS_VARKEYWORDS);
self.enter_scope();
Ok(()) Ok(())
} }
@ -923,6 +1007,7 @@ impl Compiler {
self.ctx = CompileContext { self.ctx = CompileContext {
in_loop: false, in_loop: false,
in_class: prev_ctx.in_class,
func: if is_async { func: if is_async {
FunctionContext::AsyncFunction FunctionContext::AsyncFunction
} else { } else {
@ -954,7 +1039,6 @@ impl Compiler {
} }
let mut code = self.pop_code_object(); let mut code = self.pop_code_object();
self.leave_scope();
// Prepare type annotations: // Prepare type annotations:
let mut num_annotations = 0; let mut num_annotations = 0;
@ -1006,6 +1090,27 @@ impl Compiler {
code.flags |= bytecode::CodeFlags::IS_COROUTINE; code.flags |= bytecode::CodeFlags::IS_COROUTINE;
} }
if !code.freevars.is_empty() {
for var in &code.freevars {
let symbol = self.symbol_table_stack.last().unwrap().lookup(var).unwrap();
let parent_code = self.code_stack.last().unwrap();
let vars = match symbol.scope {
SymbolScope::Free => &parent_code.freevar_cache,
SymbolScope::Cell => &parent_code.cellvar_cache,
_ => unreachable!(),
};
let mut idx = vars.get_index_of(var).unwrap();
if let SymbolScope::Free = symbol.scope {
idx += parent_code.cellvar_cache.len();
}
self.emit(Instruction::LoadClosure(idx))
}
self.emit(Instruction::BuildTuple {
size: code.freevars.len(),
unpack: false,
})
}
self.emit_constant(bytecode::ConstantData::Code { self.emit_constant(bytecode::ConstantData::Code {
code: Box::new(code), code: Box::new(code),
}); });
@ -1098,6 +1203,7 @@ impl Compiler {
let prev_ctx = self.ctx; let prev_ctx = self.ctx;
self.ctx = CompileContext { self.ctx = CompileContext {
func: FunctionContext::NoFunction, func: FunctionContext::NoFunction,
in_class: true,
in_loop: false, in_loop: false,
}; };
@ -1109,7 +1215,7 @@ impl Compiler {
self.emit(Instruction::LoadBuildClass); self.emit(Instruction::LoadBuildClass);
let line_number = self.get_source_line_number(); let line_number = self.get_source_line_number();
self.push_output(CodeObject::new( self.push_output(CodeObject::new(
Default::default(), bytecode::CodeFlags::empty(),
0, 0,
0, 0,
0, 0,
@ -1117,34 +1223,21 @@ impl Compiler {
line_number, line_number,
name.to_owned(), name.to_owned(),
)); ));
self.enter_scope();
let (new_body, doc_str) = get_doc(body); let (new_body, doc_str) = get_doc(body);
let dunder_name = self.name("__name__"); let dunder_name = self.name("__name__");
self.emit(Instruction::LoadName { self.emit(Instruction::LoadGlobal(dunder_name));
idx: dunder_name,
scope: bytecode::NameScope::Global,
});
let dunder_module = self.name("__module__"); let dunder_module = self.name("__module__");
self.emit(Instruction::StoreName { self.emit(Instruction::StoreLocal(dunder_module));
idx: dunder_module,
scope: bytecode::NameScope::Free,
});
self.emit_constant(bytecode::ConstantData::Str { self.emit_constant(bytecode::ConstantData::Str {
value: qualified_name.clone(), value: qualified_name.clone(),
}); });
let qualname = self.name("__qualname__"); let qualname = self.name("__qualname__");
self.emit(Instruction::StoreName { self.emit(Instruction::StoreLocal(qualname));
idx: qualname,
scope: bytecode::NameScope::Free,
});
self.load_docstring(doc_str); self.load_docstring(doc_str);
let doc = self.name("__doc__"); let doc = self.name("__doc__");
self.emit(Instruction::StoreName { self.emit(Instruction::StoreLocal(doc));
idx: doc,
scope: bytecode::NameScope::Free,
});
// setup annotations // setup annotations
if self.find_ann(body) { if self.find_ann(body) {
self.emit(Instruction::SetupAnnotation); self.emit(Instruction::SetupAnnotation);
@ -1153,9 +1246,7 @@ impl Compiler {
self.emit_constant(bytecode::ConstantData::None); self.emit_constant(bytecode::ConstantData::None);
self.emit(Instruction::ReturnValue); self.emit(Instruction::ReturnValue);
let mut code = self.pop_code_object(); let code = self.pop_code_object();
code.flags.remove(bytecode::CodeFlags::NEW_LOCALS);
self.leave_scope();
self.emit_constant(bytecode::ConstantData::Code { self.emit_constant(bytecode::ConstantData::Code {
code: Box::new(code), code: Box::new(code),
@ -1295,10 +1386,7 @@ impl Compiler {
self.set_label(check_asynciter_label); self.set_label(check_asynciter_label);
self.emit(Instruction::Duplicate); self.emit(Instruction::Duplicate);
let stopasynciter = self.name("StopAsyncIteration"); let stopasynciter = self.name("StopAsyncIteration");
self.emit(Instruction::LoadName { self.emit(Instruction::LoadGlobal(stopasynciter));
idx: stopasynciter,
scope: bytecode::NameScope::Global,
});
self.emit(Instruction::CompareOperation { self.emit(Instruction::CompareOperation {
op: bytecode::ComparisonOperator::ExceptionMatch, op: bytecode::ComparisonOperator::ExceptionMatch,
}); });
@ -1436,15 +1524,14 @@ impl Compiler {
if let ast::ExpressionType::Identifier { name } = &target.node { if let ast::ExpressionType::Identifier { name } = &target.node {
// Store as dict entry in __annotations__ dict: // Store as dict entry in __annotations__ dict:
if !self.ctx.in_func() {
let annotations = self.name("__annotations__"); let annotations = self.name("__annotations__");
self.emit(Instruction::LoadName { self.emit(Instruction::LoadLocal(annotations));
idx: annotations,
scope: bytecode::NameScope::Local,
});
self.emit_constant(bytecode::ConstantData::Str { self.emit_constant(bytecode::ConstantData::Str {
value: name.to_owned(), value: name.to_owned(),
}); });
self.emit(Instruction::StoreSubscript); self.emit(Instruction::StoreSubscript);
}
} else { } else {
// Drop annotation if not assigned to simple identifier. // Drop annotation if not assigned to simple identifier.
self.emit(Instruction::Pop); self.emit(Instruction::Pop);
@ -1846,6 +1933,7 @@ impl Compiler {
let prev_ctx = self.ctx; let prev_ctx = self.ctx;
self.ctx = CompileContext { self.ctx = CompileContext {
in_loop: false, in_loop: false,
in_class: prev_ctx.in_class,
func: FunctionContext::Function, func: FunctionContext::Function,
}; };
@ -1854,7 +1942,6 @@ impl Compiler {
self.compile_expression(body)?; self.compile_expression(body)?;
self.emit(Instruction::ReturnValue); self.emit(Instruction::ReturnValue);
let code = self.pop_code_object(); let code = self.pop_code_object();
self.leave_scope();
self.emit_constant(bytecode::ConstantData::Code { self.emit_constant(bytecode::ConstantData::Code {
code: Box::new(code), code: Box::new(code),
}); });
@ -2044,8 +2131,7 @@ impl Compiler {
line_number, line_number,
name.clone(), name.clone(),
)); ));
let arg0 = self.name(".0"); let arg0 = self.varname(".0");
self.enter_scope();
// Create empty object of proper type: // Create empty object of proper type:
match kind { match kind {
@ -2079,10 +2165,7 @@ impl Compiler {
if loop_labels.is_empty() { if loop_labels.is_empty() {
// Load iterator onto stack (passed as first argument): // Load iterator onto stack (passed as first argument):
self.emit(Instruction::LoadName { self.emit(Instruction::LoadFast(arg0));
idx: arg0,
scope: bytecode::NameScope::Local,
});
} else { } else {
// Evaluate iterated item: // Evaluate iterated item:
self.compile_expression(&generator.iter)?; self.compile_expression(&generator.iter)?;
@ -2169,9 +2252,6 @@ impl Compiler {
// Fetch code for listcomp function: // Fetch code for listcomp function:
let code = self.pop_code_object(); let code = self.pop_code_object();
// Pop scope
self.leave_scope();
// List comprehension code: // List comprehension code:
self.emit_constant(bytecode::ConstantData::Code { self.emit_constant(bytecode::ConstantData::Code {
code: Box::new(code), code: Box::new(code),
@ -2255,33 +2335,6 @@ impl Compiler {
Ok(()) Ok(())
} }
// Scope helpers:
fn enter_scope(&mut self) {
// println!("Enter scope {:?}", self.symbol_table_stack);
// Enter first subscope!
let table = self
.symbol_table_stack
.last_mut()
.unwrap()
.sub_tables
.remove(0);
self.symbol_table_stack.push(table);
}
fn leave_scope(&mut self) {
// println!("Leave scope {:?}", self.symbol_table_stack);
let table = self.symbol_table_stack.pop().unwrap();
assert!(table.sub_tables.is_empty());
}
fn lookup_name(&self, name: &str) -> &Symbol {
// println!("Looking up {:?}", name);
let symbol_table = self.symbol_table_stack.last().unwrap();
symbol_table.lookup(name).expect(
"The symbol must be present in the symbol table, even when it is undefined in python.",
)
}
// Low level helper functions: // Low level helper functions:
fn emit(&mut self, instruction: Instruction) { fn emit(&mut self, instruction: Instruction) {
let location = compile_location(&self.current_source_location); let location = compile_location(&self.current_source_location);
@ -2402,9 +2455,11 @@ mod tests {
use rustpython_parser::parser; use rustpython_parser::parser;
fn compile_exec(source: &str) -> CodeObject { fn compile_exec(source: &str) -> CodeObject {
let mut compiler: Compiler = let mut compiler: Compiler = Compiler::new(
Compiler::new(CompileOpts::default(), "source_path".to_owned()); CompileOpts::default(),
compiler.push_new_code_object("<module>".to_owned()); "source_path".to_owned(),
"<module>".to_owned(),
);
let ast = parser::parse_program(source).unwrap(); let ast = parser::parse_program(source).unwrap();
let symbol_scope = make_symbol_table(&ast).unwrap(); let symbol_scope = make_symbol_table(&ast).unwrap();
compiler.compile_program(&ast, symbol_scope).unwrap(); compiler.compile_program(&ast, symbol_scope).unwrap();

View file

@ -64,7 +64,7 @@ impl SymbolTable {
} }
} }
#[derive(Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum SymbolTableType { pub enum SymbolTableType {
Module, Module,
Class, Class,
@ -85,12 +85,14 @@ impl fmt::Display for SymbolTableType {
/// Indicator for a single symbol what the scope of this symbol is. /// Indicator for a single symbol what the scope of this symbol is.
/// The scope can be unknown, which is unfortunate, but not impossible. /// The scope can be unknown, which is unfortunate, but not impossible.
#[derive(Debug, Clone)] #[derive(Debug, Clone, Copy)]
pub enum SymbolScope { pub enum SymbolScope {
Global,
Nonlocal,
Local,
Unknown, Unknown,
Local,
GlobalExplicit,
GlobalImplicit,
Free,
Cell,
} }
/// A single symbol in a table. Has various properties such as the scope /// A single symbol in a table. Has various properties such as the scope
@ -104,7 +106,6 @@ pub struct Symbol {
pub is_referenced: bool, pub is_referenced: bool,
pub is_assigned: bool, pub is_assigned: bool,
pub is_parameter: bool, pub is_parameter: bool,
pub is_free: bool,
pub is_annotated: bool, pub is_annotated: bool,
pub is_imported: bool, pub is_imported: bool,
@ -126,7 +127,6 @@ impl Symbol {
is_referenced: false, is_referenced: false,
is_assigned: false, is_assigned: false,
is_parameter: false, is_parameter: false,
is_free: false,
is_annotated: false, is_annotated: false,
is_imported: false, is_imported: false,
is_assign_namedexpr_in_comprehension: false, is_assign_namedexpr_in_comprehension: false,
@ -135,7 +135,10 @@ impl Symbol {
} }
pub fn is_global(&self) -> bool { pub fn is_global(&self) -> bool {
matches!(self.scope, SymbolScope::Global) matches!(
self.scope,
SymbolScope::GlobalExplicit | SymbolScope::GlobalImplicit
)
} }
pub fn is_local(&self) -> bool { pub fn is_local(&self) -> bool {
@ -228,7 +231,7 @@ impl<'a> SymbolTableAnalyzer<'a> {
self.analyze_symbol_comprehension(symbol, 0)? self.analyze_symbol_comprehension(symbol, 0)?
} else { } else {
match symbol.scope { match symbol.scope {
SymbolScope::Nonlocal => { SymbolScope::Free => {
let scope_depth = self.tables.len(); let scope_depth = self.tables.len();
if scope_depth > 0 { if scope_depth > 0 {
// check if the name is already defined in any outer scope // check if the name is already defined in any outer scope
@ -251,10 +254,10 @@ impl<'a> SymbolTableAnalyzer<'a> {
}); });
} }
} }
SymbolScope::Global => { SymbolScope::GlobalExplicit | SymbolScope::GlobalImplicit => {
// TODO: add more checks for globals? // TODO: add more checks for globals?
} }
SymbolScope::Local => { SymbolScope::Local | SymbolScope::Cell => {
// all is well // all is well
} }
SymbolScope::Unknown => { SymbolScope::Unknown => {
@ -270,24 +273,28 @@ impl<'a> SymbolTableAnalyzer<'a> {
// Interesting stuff about the __class__ variable: // Interesting stuff about the __class__ variable:
// https://docs.python.org/3/reference/datamodel.html?highlight=__class__#creating-the-class-object // https://docs.python.org/3/reference/datamodel.html?highlight=__class__#creating-the-class-object
symbol.name == "__class__" symbol.name == "__class__"
|| self.tables.iter().skip(1).any(|(symbols, typ)| { || self.tables.iter().skip(1).rev().any(|(symbols, typ)| {
*typ != SymbolTableType::Class && symbols.contains_key(&symbol.name) *typ != SymbolTableType::Class
&& symbols
.get(&symbol.name)
.map_or(false, |sym| sym.is_local() && sym.is_assigned)
}) })
} }
fn analyze_unknown_symbol(&self, symbol: &mut Symbol) { fn analyze_unknown_symbol(&self, symbol: &mut Symbol) {
if symbol.is_assigned || symbol.is_parameter { let scope = if symbol.is_assigned || symbol.is_parameter {
symbol.scope = SymbolScope::Local; SymbolScope::Local
} else if self.found_in_outer_scope(symbol) { } else if self.found_in_outer_scope(symbol) {
// Symbol is in some outer scope. // Symbol is in some outer scope.
symbol.is_free = true; SymbolScope::Free
} else if self.tables.is_empty() { } else if self.tables.is_empty() {
// Don't make assumptions when we don't know. // Don't make assumptions when we don't know.
symbol.scope = SymbolScope::Unknown; SymbolScope::Unknown
} else { } else {
// If there are scopes above we can assume global. // If there are scopes above we assume global.
symbol.scope = SymbolScope::Global; SymbolScope::GlobalImplicit
} };
symbol.scope = scope;
} }
// Implements the symbol analysis and scope extension for names // Implements the symbol analysis and scope extension for names
@ -302,7 +309,7 @@ impl<'a> SymbolTableAnalyzer<'a> {
// when this is called, we expect to be in the direct parent scope of the scope that contains 'symbol' // when this is called, we expect to be in the direct parent scope of the scope that contains 'symbol'
let offs = self.tables.len() - 1 - parent_offset; let offs = self.tables.len() - 1 - parent_offset;
let last = self.tables.get_mut(offs).unwrap(); let last = self.tables.get_mut(offs).unwrap();
let symbols = &mut last.0; let symbols = &mut *last.0;
let table_type = last.1; let table_type = last.1;
// it is not allowed to use an iterator variable as assignee in a named expression // it is not allowed to use an iterator variable as assignee in a named expression
@ -319,7 +326,7 @@ impl<'a> SymbolTableAnalyzer<'a> {
match table_type { match table_type {
SymbolTableType::Module => { SymbolTableType::Module => {
symbol.scope = SymbolScope::Global; symbol.scope = SymbolScope::GlobalImplicit;
} }
SymbolTableType::Class => { SymbolTableType::Class => {
// named expressions are forbidden in comprehensions on class scope // named expressions are forbidden in comprehensions on class scope
@ -332,17 +339,13 @@ impl<'a> SymbolTableAnalyzer<'a> {
SymbolTableType::Function => { SymbolTableType::Function => {
if let Some(parent_symbol) = symbols.get_mut(&symbol.name) { if let Some(parent_symbol) = symbols.get_mut(&symbol.name) {
if let SymbolScope::Unknown = parent_symbol.scope { if let SymbolScope::Unknown = parent_symbol.scope {
parent_symbol.is_assigned = true; // this information is new, as the asignment is done in inner scope // this information is new, as the asignment is done in inner scope
parent_symbol.is_assigned = true;
//self.analyze_unknown_symbol(symbol); // not needed, symbol is analyzed anyhow when its scope is analyzed //self.analyze_unknown_symbol(symbol); // not needed, symbol is analyzed anyhow when its scope is analyzed
} }
match symbol.scope { if !symbol.is_global() {
SymbolScope::Global => { symbol.scope = SymbolScope::Free;
symbol.scope = SymbolScope::Global;
}
_ => {
symbol.scope = SymbolScope::Nonlocal;
}
} }
} else { } else {
let mut cloned_sym = symbol.clone(); let mut cloned_sym = symbol.clone();
@ -372,7 +375,7 @@ impl<'a> SymbolTableAnalyzer<'a> {
// ouside, too, and set it therefore to non-local scope. I.e., we expect to // ouside, too, and set it therefore to non-local scope. I.e., we expect to
// find a definition on a higher level // find a definition on a higher level
let mut cloned_sym = symbol.clone(); let mut cloned_sym = symbol.clone();
cloned_sym.scope = SymbolScope::Nonlocal; cloned_sym.scope = SymbolScope::Free;
last.0.insert(cloned_sym.name.to_owned(), cloned_sym); last.0.insert(cloned_sym.name.to_owned(), cloned_sym);
} }
} }
@ -408,7 +411,7 @@ struct SymbolTableBuilder {
/// was used. /// was used.
/// In cpython this is stored in the AST, but I think this /// In cpython this is stored in the AST, but I think this
/// is not logical, since it is not context free. /// is not logical, since it is not context free.
#[derive(Copy, Clone)] #[derive(Copy, Clone, PartialEq)]
enum ExpressionContext { enum ExpressionContext {
Load, Load,
Store, Store,
@ -825,6 +828,12 @@ impl SymbolTableBuilder {
self.register_name(name, SymbolUsage::Iter, location)?; self.register_name(name, SymbolUsage::Iter, location)?;
} }
} }
if context == ExpressionContext::Load
&& self.tables.last().unwrap().typ == SymbolTableType::Function
&& name == "super"
{
self.register_name("__class__", SymbolUsage::Used, location)?;
}
} }
Lambda { args, body } => { Lambda { args, body } => {
self.enter_function("lambda", args, expression.location.row())?; self.enter_function("lambda", args, expression.location.row())?;
@ -949,7 +958,7 @@ impl SymbolTableBuilder {
// Role already set.. // Role already set..
match role { match role {
SymbolUsage::Global => { SymbolUsage::Global => {
if let SymbolScope::Global = symbol.scope { if symbol.is_global() {
// Ok // Ok
} else { } else {
return Err(SymbolTableError { return Err(SymbolTableError {
@ -1014,7 +1023,7 @@ impl SymbolTableBuilder {
let symbol = table.symbols.get_mut(name).unwrap(); let symbol = table.symbols.get_mut(name).unwrap();
match role { match role {
SymbolUsage::Nonlocal => { SymbolUsage::Nonlocal => {
symbol.scope = SymbolScope::Nonlocal; symbol.scope = SymbolScope::Free;
} }
SymbolUsage::Imported => { SymbolUsage::Imported => {
symbol.is_assigned = true; symbol.is_assigned = true;
@ -1040,8 +1049,8 @@ impl SymbolTableBuilder {
} }
SymbolUsage::Global => { SymbolUsage::Global => {
if let SymbolScope::Unknown = symbol.scope { if let SymbolScope::Unknown = symbol.scope {
symbol.scope = SymbolScope::Global; symbol.scope = SymbolScope::GlobalImplicit;
} else if let SymbolScope::Global = symbol.scope { } else if symbol.is_global() {
// Global scope can be set to global // Global scope can be set to global
} else { } else {
return Err(SymbolTableError { return Err(SymbolTableError {