mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-08 05:35:22 +00:00
Split off bytecode compilation into a separate crate
This commit is contained in:
commit
064919348d
6 changed files with 2995 additions and 0 deletions
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "rustpython_compiler"
|
||||
version = "0.1.0"
|
||||
authors = ["coolreader18 <33094578+coolreader18@users.noreply.github.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bitflags = "1.1"
|
||||
rustpython_parser = { path = "../parser" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
num-complex = { version = "0.2", features = ["serde"] }
|
||||
num-bigint = { version = "0.2", features = ["serde"] }
|
||||
log = "0.3"
|
446
src/bytecode.rs
Normal file
446
src/bytecode.rs
Normal file
|
@ -0,0 +1,446 @@
|
|||
//! Implement python as a virtual machine with bytecodes. This module
|
||||
//! implements bytecode structure.
|
||||
|
||||
/*
|
||||
* Primitive instruction type, which can be encoded and decoded.
|
||||
*/
|
||||
|
||||
use bitflags::bitflags;
|
||||
use num_bigint::BigInt;
|
||||
use num_complex::Complex64;
|
||||
use rustpython_parser::ast;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
|
||||
/// Primary container of a single code object. Each python function has
|
||||
/// a codeobject. Also a module has a codeobject.
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CodeObject {
|
||||
pub instructions: Vec<Instruction>,
|
||||
/// Jump targets.
|
||||
pub label_map: HashMap<Label, usize>,
|
||||
pub locations: Vec<ast::Location>,
|
||||
pub arg_names: Vec<String>, // Names of positional arguments
|
||||
pub varargs: Varargs, // *args or *
|
||||
pub kwonlyarg_names: Vec<String>,
|
||||
pub varkeywords: Varargs, // **kwargs or **
|
||||
pub source_path: String,
|
||||
pub first_line_number: usize,
|
||||
pub obj_name: String, // Name of the object that created this code object
|
||||
pub is_generator: bool,
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FunctionOpArg: u8 {
|
||||
const HAS_DEFAULTS = 0x01;
|
||||
const HAS_KW_ONLY_DEFAULTS = 0x02;
|
||||
const HAS_ANNOTATIONS = 0x04;
|
||||
}
|
||||
}
|
||||
|
||||
pub type Label = usize;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum NameScope {
|
||||
Local,
|
||||
NonLocal,
|
||||
Global,
|
||||
}
|
||||
|
||||
/// A Single bytecode instruction.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Instruction {
|
||||
Import {
|
||||
name: String,
|
||||
symbol: Option<String>,
|
||||
},
|
||||
ImportStar {
|
||||
name: String,
|
||||
},
|
||||
LoadName {
|
||||
name: String,
|
||||
scope: NameScope,
|
||||
},
|
||||
StoreName {
|
||||
name: String,
|
||||
scope: NameScope,
|
||||
},
|
||||
DeleteName {
|
||||
name: String,
|
||||
},
|
||||
StoreSubscript,
|
||||
DeleteSubscript,
|
||||
StoreAttr {
|
||||
name: String,
|
||||
},
|
||||
DeleteAttr {
|
||||
name: String,
|
||||
},
|
||||
LoadConst {
|
||||
value: Constant,
|
||||
},
|
||||
UnaryOperation {
|
||||
op: UnaryOperator,
|
||||
},
|
||||
BinaryOperation {
|
||||
op: BinaryOperator,
|
||||
inplace: bool,
|
||||
},
|
||||
LoadAttr {
|
||||
name: String,
|
||||
},
|
||||
CompareOperation {
|
||||
op: ComparisonOperator,
|
||||
},
|
||||
Pop,
|
||||
Rotate {
|
||||
amount: usize,
|
||||
},
|
||||
Duplicate,
|
||||
GetIter,
|
||||
Pass,
|
||||
Continue,
|
||||
Break,
|
||||
Jump {
|
||||
target: Label,
|
||||
},
|
||||
JumpIf {
|
||||
target: Label,
|
||||
},
|
||||
JumpIfFalse {
|
||||
target: Label,
|
||||
},
|
||||
MakeFunction {
|
||||
flags: FunctionOpArg,
|
||||
},
|
||||
CallFunction {
|
||||
typ: CallType,
|
||||
},
|
||||
ForIter {
|
||||
target: Label,
|
||||
},
|
||||
ReturnValue,
|
||||
YieldValue,
|
||||
YieldFrom,
|
||||
SetupLoop {
|
||||
start: Label,
|
||||
end: Label,
|
||||
},
|
||||
SetupExcept {
|
||||
handler: Label,
|
||||
},
|
||||
SetupWith {
|
||||
end: Label,
|
||||
},
|
||||
CleanupWith {
|
||||
end: Label,
|
||||
},
|
||||
PopBlock,
|
||||
Raise {
|
||||
argc: usize,
|
||||
},
|
||||
BuildString {
|
||||
size: usize,
|
||||
},
|
||||
BuildTuple {
|
||||
size: usize,
|
||||
unpack: bool,
|
||||
},
|
||||
BuildList {
|
||||
size: usize,
|
||||
unpack: bool,
|
||||
},
|
||||
BuildSet {
|
||||
size: usize,
|
||||
unpack: bool,
|
||||
},
|
||||
BuildMap {
|
||||
size: usize,
|
||||
unpack: bool,
|
||||
},
|
||||
BuildSlice {
|
||||
size: usize,
|
||||
},
|
||||
ListAppend {
|
||||
i: usize,
|
||||
},
|
||||
SetAdd {
|
||||
i: usize,
|
||||
},
|
||||
MapAdd {
|
||||
i: usize,
|
||||
},
|
||||
PrintExpr,
|
||||
LoadBuildClass,
|
||||
UnpackSequence {
|
||||
size: usize,
|
||||
},
|
||||
UnpackEx {
|
||||
before: usize,
|
||||
after: usize,
|
||||
},
|
||||
Unpack,
|
||||
FormatValue {
|
||||
conversion: Option<ast::ConversionFlag>,
|
||||
spec: String,
|
||||
},
|
||||
PopException,
|
||||
}
|
||||
|
||||
use self::Instruction::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum CallType {
|
||||
Positional(usize),
|
||||
Keyword(usize),
|
||||
Ex(bool),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Constant {
|
||||
Integer { value: BigInt },
|
||||
Float { value: f64 },
|
||||
Complex { value: Complex64 },
|
||||
Boolean { value: bool },
|
||||
String { value: String },
|
||||
Bytes { value: Vec<u8> },
|
||||
Code { code: Box<CodeObject> },
|
||||
Tuple { elements: Vec<Constant> },
|
||||
None,
|
||||
Ellipsis,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum ComparisonOperator {
|
||||
Greater,
|
||||
GreaterOrEqual,
|
||||
Less,
|
||||
LessOrEqual,
|
||||
Equal,
|
||||
NotEqual,
|
||||
In,
|
||||
NotIn,
|
||||
Is,
|
||||
IsNot,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum BinaryOperator {
|
||||
Power,
|
||||
Multiply,
|
||||
MatrixMultiply,
|
||||
Divide,
|
||||
FloorDivide,
|
||||
Modulo,
|
||||
Add,
|
||||
Subtract,
|
||||
Subscript,
|
||||
Lshift,
|
||||
Rshift,
|
||||
And,
|
||||
Xor,
|
||||
Or,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum UnaryOperator {
|
||||
Not,
|
||||
Invert,
|
||||
Minus,
|
||||
Plus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Varargs {
|
||||
None,
|
||||
Unnamed,
|
||||
Named(String),
|
||||
}
|
||||
|
||||
/*
|
||||
Maintain a stack of blocks on the VM.
|
||||
pub enum BlockType {
|
||||
Loop,
|
||||
Except,
|
||||
}
|
||||
*/
|
||||
|
||||
impl CodeObject {
|
||||
pub fn new(
|
||||
arg_names: Vec<String>,
|
||||
varargs: Varargs,
|
||||
kwonlyarg_names: Vec<String>,
|
||||
varkeywords: Varargs,
|
||||
source_path: String,
|
||||
first_line_number: usize,
|
||||
obj_name: String,
|
||||
) -> CodeObject {
|
||||
CodeObject {
|
||||
instructions: Vec::new(),
|
||||
label_map: HashMap::new(),
|
||||
locations: Vec::new(),
|
||||
arg_names,
|
||||
varargs,
|
||||
kwonlyarg_names,
|
||||
varkeywords,
|
||||
source_path,
|
||||
first_line_number,
|
||||
obj_name,
|
||||
is_generator: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_constants(&self) -> impl Iterator<Item = &Constant> {
|
||||
self.instructions.iter().filter_map(|x| {
|
||||
if let Instruction::LoadConst { value } = x {
|
||||
Some(value)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CodeObject {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let label_targets: HashSet<&usize> = self.label_map.values().collect();
|
||||
for (offset, instruction) in self.instructions.iter().enumerate() {
|
||||
let arrow = if label_targets.contains(&offset) {
|
||||
">>"
|
||||
} else {
|
||||
" "
|
||||
};
|
||||
write!(f, " {} {:5} ", arrow, offset)?;
|
||||
instruction.fmt_dis(f, &self.label_map)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
fn fmt_dis(&self, f: &mut fmt::Formatter, label_map: &HashMap<Label, usize>) -> fmt::Result {
|
||||
macro_rules! w {
|
||||
($variant:ident) => {
|
||||
write!(f, "{:20}\n", stringify!($variant))
|
||||
};
|
||||
($variant:ident, $var:expr) => {
|
||||
write!(f, "{:20} ({})\n", stringify!($variant), $var)
|
||||
};
|
||||
($variant:ident, $var1:expr, $var2:expr) => {
|
||||
write!(f, "{:20} ({}, {})\n", stringify!($variant), $var1, $var2)
|
||||
};
|
||||
}
|
||||
|
||||
match self {
|
||||
Import { name, symbol } => w!(Import, name, format!("{:?}", symbol)),
|
||||
ImportStar { name } => w!(ImportStar, name),
|
||||
LoadName { name, scope } => w!(LoadName, name, format!("{:?}", scope)),
|
||||
StoreName { name, scope } => w!(StoreName, name, format!("{:?}", scope)),
|
||||
DeleteName { name } => w!(DeleteName, name),
|
||||
StoreSubscript => w!(StoreSubscript),
|
||||
DeleteSubscript => w!(DeleteSubscript),
|
||||
StoreAttr { name } => w!(StoreAttr, name),
|
||||
DeleteAttr { name } => w!(DeleteAttr, name),
|
||||
LoadConst { value } => w!(LoadConst, value),
|
||||
UnaryOperation { op } => w!(UnaryOperation, format!("{:?}", op)),
|
||||
BinaryOperation { op, inplace } => w!(BinaryOperation, format!("{:?}", op), inplace),
|
||||
LoadAttr { name } => w!(LoadAttr, name),
|
||||
CompareOperation { op } => w!(CompareOperation, format!("{:?}", op)),
|
||||
Pop => w!(Pop),
|
||||
Rotate { amount } => w!(Rotate, amount),
|
||||
Duplicate => w!(Duplicate),
|
||||
GetIter => w!(GetIter),
|
||||
Pass => w!(Pass),
|
||||
Continue => w!(Continue),
|
||||
Break => w!(Break),
|
||||
Jump { target } => w!(Jump, label_map[target]),
|
||||
JumpIf { target } => w!(JumpIf, label_map[target]),
|
||||
JumpIfFalse { target } => w!(JumpIfFalse, label_map[target]),
|
||||
MakeFunction { flags } => w!(MakeFunction, format!("{:?}", flags)),
|
||||
CallFunction { typ } => w!(CallFunction, format!("{:?}", typ)),
|
||||
ForIter { target } => w!(ForIter, label_map[target]),
|
||||
ReturnValue => w!(ReturnValue),
|
||||
YieldValue => w!(YieldValue),
|
||||
YieldFrom => w!(YieldFrom),
|
||||
SetupLoop { start, end } => w!(SetupLoop, label_map[start], label_map[end]),
|
||||
SetupExcept { handler } => w!(SetupExcept, handler),
|
||||
SetupWith { end } => w!(SetupWith, end),
|
||||
CleanupWith { end } => w!(CleanupWith, end),
|
||||
PopBlock => w!(PopBlock),
|
||||
Raise { argc } => w!(Raise, argc),
|
||||
BuildString { size } => w!(BuildString, size),
|
||||
BuildTuple { size, unpack } => w!(BuildTuple, size, unpack),
|
||||
BuildList { size, unpack } => w!(BuildList, size, unpack),
|
||||
BuildSet { size, unpack } => w!(BuildSet, size, unpack),
|
||||
BuildMap { size, unpack } => w!(BuildMap, size, unpack),
|
||||
BuildSlice { size } => w!(BuildSlice, size),
|
||||
ListAppend { i } => w!(ListAppend, i),
|
||||
SetAdd { i } => w!(SetAdd, i),
|
||||
MapAdd { i } => w!(MapAdd, i),
|
||||
PrintExpr => w!(PrintExpr),
|
||||
LoadBuildClass => w!(LoadBuildClass),
|
||||
UnpackSequence { size } => w!(UnpackSequence, size),
|
||||
UnpackEx { before, after } => w!(UnpackEx, before, after),
|
||||
Unpack => w!(Unpack),
|
||||
FormatValue { spec, .. } => w!(FormatValue, spec), // TODO: write conversion
|
||||
PopException => w!(PopException),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Constant {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Constant::Integer { value } => write!(f, "{}", value),
|
||||
Constant::Float { value } => write!(f, "{}", value),
|
||||
Constant::Complex { value } => write!(f, "{}", value),
|
||||
Constant::Boolean { value } => write!(f, "{}", value),
|
||||
Constant::String { value } => write!(f, "{:?}", value),
|
||||
Constant::Bytes { value } => write!(f, "{:?}", value),
|
||||
Constant::Code { code } => write!(f, "{:?}", code),
|
||||
Constant::Tuple { elements } => write!(
|
||||
f,
|
||||
"({})",
|
||||
elements
|
||||
.iter()
|
||||
.map(|e| format!("{}", e))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
),
|
||||
Constant::None => write!(f, "None"),
|
||||
Constant::Ellipsis => write!(f, "Ellipsis"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for CodeObject {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"<code object {} at ??? file {:?}, line {}>",
|
||||
self.obj_name, self.source_path, self.first_line_number
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ast::Varargs> for Varargs {
|
||||
fn from(varargs: ast::Varargs) -> Varargs {
|
||||
match varargs {
|
||||
ast::Varargs::None => Varargs::None,
|
||||
ast::Varargs::Unnamed => Varargs::Unnamed,
|
||||
ast::Varargs::Named(param) => Varargs::Named(param.arg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a ast::Varargs> for Varargs {
|
||||
fn from(varargs: &'a ast::Varargs) -> Varargs {
|
||||
match varargs {
|
||||
ast::Varargs::None => Varargs::None,
|
||||
ast::Varargs::Unnamed => Varargs::Unnamed,
|
||||
ast::Varargs::Named(ref param) => Varargs::Named(param.arg.clone()),
|
||||
}
|
||||
}
|
||||
}
|
1874
src/compile.rs
Normal file
1874
src/compile.rs
Normal file
File diff suppressed because it is too large
Load diff
67
src/error.rs
Normal file
67
src/error.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
use rustpython_parser::error::ParseError;
|
||||
use rustpython_parser::lexer::Location;
|
||||
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CompileError {
|
||||
pub error: CompileErrorType,
|
||||
pub location: Location,
|
||||
}
|
||||
|
||||
impl From<ParseError> for CompileError {
|
||||
fn from(error: ParseError) -> Self {
|
||||
CompileError {
|
||||
error: CompileErrorType::Parse(error),
|
||||
location: Default::default(), // TODO: extract location from parse error!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CompileErrorType {
|
||||
/// Invalid assignment, cannot store value in target.
|
||||
Assign(&'static str),
|
||||
/// Invalid delete
|
||||
Delete(&'static str),
|
||||
/// Expected an expression got a statement
|
||||
ExpectExpr,
|
||||
/// Parser error
|
||||
Parse(ParseError),
|
||||
SyntaxError(String),
|
||||
/// Multiple `*` detected
|
||||
StarArgs,
|
||||
/// Break statement outside of loop.
|
||||
InvalidBreak,
|
||||
/// Continue statement outside of loop.
|
||||
InvalidContinue,
|
||||
InvalidReturn,
|
||||
InvalidYield,
|
||||
}
|
||||
|
||||
impl fmt::Display for CompileError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match &self.error {
|
||||
CompileErrorType::Assign(target) => write!(f, "can't assign to {}", target),
|
||||
CompileErrorType::Delete(target) => write!(f, "can't delete {}", target),
|
||||
CompileErrorType::ExpectExpr => write!(f, "Expecting expression, got statement"),
|
||||
CompileErrorType::Parse(err) => write!(f, "{}", err),
|
||||
CompileErrorType::SyntaxError(err) => write!(f, "{}", err),
|
||||
CompileErrorType::StarArgs => write!(f, "Two starred expressions in assignment"),
|
||||
CompileErrorType::InvalidBreak => write!(f, "'break' outside loop"),
|
||||
CompileErrorType::InvalidContinue => write!(f, "'continue' outside loop"),
|
||||
CompileErrorType::InvalidReturn => write!(f, "'return' outside function"),
|
||||
CompileErrorType::InvalidYield => write!(f, "'yield' outside function"),
|
||||
}?;
|
||||
|
||||
// Print line number:
|
||||
write!(f, " at line {:?}", self.location.get_row())
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for CompileError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
None
|
||||
}
|
||||
}
|
7
src/lib.rs
Normal file
7
src/lib.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
pub mod bytecode;
|
||||
pub mod compile;
|
||||
pub mod error;
|
||||
mod symboltable;
|
588
src/symboltable.rs
Normal file
588
src/symboltable.rs
Normal file
|
@ -0,0 +1,588 @@
|
|||
/* Python code is pre-scanned for symbols in the ast.
|
||||
|
||||
This ensures that global and nonlocal keywords are picked up.
|
||||
Then the compiler can use the symbol table to generate proper
|
||||
load and store instructions for names.
|
||||
|
||||
Inspirational file: https://github.com/python/cpython/blob/master/Python/symtable.c
|
||||
*/
|
||||
|
||||
use crate::error::{CompileError, CompileErrorType};
|
||||
use rustpython_parser::ast;
|
||||
use rustpython_parser::lexer::Location;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub fn make_symbol_table(program: &ast::Program) -> Result<SymbolScope, SymbolTableError> {
|
||||
let mut builder = SymbolTableBuilder::new();
|
||||
builder.enter_scope();
|
||||
builder.scan_program(program)?;
|
||||
assert_eq!(builder.scopes.len(), 1);
|
||||
|
||||
let symbol_table = builder.scopes.pop().unwrap();
|
||||
analyze_symbol_table(&symbol_table, None)?;
|
||||
Ok(symbol_table)
|
||||
}
|
||||
|
||||
pub fn statements_to_symbol_table(
|
||||
statements: &[ast::LocatedStatement],
|
||||
) -> Result<SymbolScope, SymbolTableError> {
|
||||
let mut builder = SymbolTableBuilder::new();
|
||||
builder.enter_scope();
|
||||
builder.scan_statements(statements)?;
|
||||
assert_eq!(builder.scopes.len(), 1);
|
||||
|
||||
let symbol_table = builder.scopes.pop().unwrap();
|
||||
analyze_symbol_table(&symbol_table, None)?;
|
||||
Ok(symbol_table)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SymbolRole {
|
||||
Global,
|
||||
Nonlocal,
|
||||
Used,
|
||||
Assigned,
|
||||
}
|
||||
|
||||
/// Captures all symbols in the current scope, and has a list of subscopes in this scope.
|
||||
pub struct SymbolScope {
|
||||
/// A set of symbols present on this scope level.
|
||||
pub symbols: HashMap<String, SymbolRole>,
|
||||
|
||||
/// A list of subscopes in the order as found in the
|
||||
/// AST nodes.
|
||||
pub sub_scopes: Vec<SymbolScope>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SymbolTableError {
|
||||
error: String,
|
||||
location: Location,
|
||||
}
|
||||
|
||||
impl From<SymbolTableError> for CompileError {
|
||||
fn from(error: SymbolTableError) -> Self {
|
||||
CompileError {
|
||||
error: CompileErrorType::SyntaxError(error.error),
|
||||
location: error.location,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type SymbolTableResult = Result<(), SymbolTableError>;
|
||||
|
||||
impl SymbolScope {
|
||||
pub fn new() -> Self {
|
||||
SymbolScope {
|
||||
symbols: HashMap::new(),
|
||||
sub_scopes: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lookup(&self, name: &str) -> Option<&SymbolRole> {
|
||||
self.symbols.get(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SymbolScope {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"SymbolScope({:?} symbols, {:?} sub scopes)",
|
||||
self.symbols.len(),
|
||||
self.sub_scopes.len()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform some sort of analysis on nonlocals, globals etc..
|
||||
See also: https://github.com/python/cpython/blob/master/Python/symtable.c#L410
|
||||
*/
|
||||
fn analyze_symbol_table(
|
||||
symbol_scope: &SymbolScope,
|
||||
parent_symbol_scope: Option<&SymbolScope>,
|
||||
) -> SymbolTableResult {
|
||||
// Analyze sub scopes:
|
||||
for sub_scope in &symbol_scope.sub_scopes {
|
||||
analyze_symbol_table(&sub_scope, Some(symbol_scope))?;
|
||||
}
|
||||
|
||||
// Analyze symbols:
|
||||
for (symbol_name, symbol_role) in &symbol_scope.symbols {
|
||||
analyze_symbol(symbol_name, symbol_role, parent_symbol_scope)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::single_match)]
|
||||
fn analyze_symbol(
|
||||
symbol_name: &str,
|
||||
symbol_role: &SymbolRole,
|
||||
parent_symbol_scope: Option<&SymbolScope>,
|
||||
) -> SymbolTableResult {
|
||||
match symbol_role {
|
||||
SymbolRole::Nonlocal => {
|
||||
// check if name is defined in parent scope!
|
||||
if let Some(parent_symbol_scope) = parent_symbol_scope {
|
||||
if !parent_symbol_scope.symbols.contains_key(symbol_name) {
|
||||
return Err(SymbolTableError {
|
||||
error: format!("no binding for nonlocal '{}' found", symbol_name),
|
||||
location: Default::default(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
return Err(SymbolTableError {
|
||||
error: format!(
|
||||
"nonlocal {} defined at place without an enclosing scope",
|
||||
symbol_name
|
||||
),
|
||||
location: Default::default(),
|
||||
});
|
||||
}
|
||||
}
|
||||
// TODO: add more checks for globals
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct SymbolTableBuilder {
|
||||
// Scope stack.
|
||||
pub scopes: Vec<SymbolScope>,
|
||||
}
|
||||
|
||||
impl SymbolTableBuilder {
|
||||
pub fn new() -> Self {
|
||||
SymbolTableBuilder { scopes: vec![] }
|
||||
}
|
||||
|
||||
pub fn enter_scope(&mut self) {
|
||||
let scope = SymbolScope::new();
|
||||
self.scopes.push(scope);
|
||||
}
|
||||
|
||||
fn leave_scope(&mut self) {
|
||||
// Pop scope and add to subscopes of parent scope.
|
||||
let scope = self.scopes.pop().unwrap();
|
||||
self.scopes.last_mut().unwrap().sub_scopes.push(scope);
|
||||
}
|
||||
|
||||
pub fn scan_program(&mut self, program: &ast::Program) -> SymbolTableResult {
|
||||
self.scan_statements(&program.statements)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn scan_statements(&mut self, statements: &[ast::LocatedStatement]) -> SymbolTableResult {
|
||||
for statement in statements {
|
||||
self.scan_statement(statement)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_parameters(&mut self, parameters: &[ast::Parameter]) -> SymbolTableResult {
|
||||
for parameter in parameters {
|
||||
self.scan_parameter(parameter)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_parameter(&mut self, parameter: &ast::Parameter) -> SymbolTableResult {
|
||||
self.register_name(¶meter.arg, SymbolRole::Assigned)
|
||||
}
|
||||
|
||||
fn scan_parameters_annotations(&mut self, parameters: &[ast::Parameter]) -> SymbolTableResult {
|
||||
for parameter in parameters {
|
||||
self.scan_parameter_annotation(parameter)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_parameter_annotation(&mut self, parameter: &ast::Parameter) -> SymbolTableResult {
|
||||
if let Some(annotation) = ¶meter.annotation {
|
||||
self.scan_expression(&annotation)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_statement(&mut self, statement: &ast::LocatedStatement) -> SymbolTableResult {
|
||||
match &statement.node {
|
||||
ast::Statement::Global { names } => {
|
||||
for name in names {
|
||||
self.register_name(name, SymbolRole::Global)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::Nonlocal { names } => {
|
||||
for name in names {
|
||||
self.register_name(name, SymbolRole::Nonlocal)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::FunctionDef {
|
||||
name,
|
||||
body,
|
||||
args,
|
||||
decorator_list,
|
||||
returns,
|
||||
}
|
||||
| ast::Statement::AsyncFunctionDef {
|
||||
name,
|
||||
body,
|
||||
args,
|
||||
decorator_list,
|
||||
returns,
|
||||
} => {
|
||||
self.scan_expressions(decorator_list)?;
|
||||
self.register_name(name, SymbolRole::Assigned)?;
|
||||
|
||||
self.enter_function(args)?;
|
||||
|
||||
self.scan_statements(body)?;
|
||||
if let Some(expression) = returns {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
self.leave_scope();
|
||||
}
|
||||
ast::Statement::ClassDef {
|
||||
name,
|
||||
body,
|
||||
bases,
|
||||
keywords,
|
||||
decorator_list,
|
||||
} => {
|
||||
self.register_name(name, SymbolRole::Assigned)?;
|
||||
self.enter_scope();
|
||||
self.scan_statements(body)?;
|
||||
self.leave_scope();
|
||||
self.scan_expressions(bases)?;
|
||||
for keyword in keywords {
|
||||
self.scan_expression(&keyword.value)?;
|
||||
}
|
||||
self.scan_expressions(decorator_list)?;
|
||||
}
|
||||
ast::Statement::Expression { expression } => self.scan_expression(expression)?,
|
||||
ast::Statement::If { test, body, orelse } => {
|
||||
self.scan_expression(test)?;
|
||||
self.scan_statements(body)?;
|
||||
if let Some(code) = orelse {
|
||||
self.scan_statements(code)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::For {
|
||||
target,
|
||||
iter,
|
||||
body,
|
||||
orelse,
|
||||
}
|
||||
| ast::Statement::AsyncFor {
|
||||
target,
|
||||
iter,
|
||||
body,
|
||||
orelse,
|
||||
} => {
|
||||
self.scan_expression(target)?;
|
||||
self.scan_expression(iter)?;
|
||||
self.scan_statements(body)?;
|
||||
if let Some(code) = orelse {
|
||||
self.scan_statements(code)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::While { test, body, orelse } => {
|
||||
self.scan_expression(test)?;
|
||||
self.scan_statements(body)?;
|
||||
if let Some(code) = orelse {
|
||||
self.scan_statements(code)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::Break | ast::Statement::Continue | ast::Statement::Pass => {
|
||||
// No symbols here.
|
||||
}
|
||||
ast::Statement::Import { import_parts } => {
|
||||
for part in import_parts {
|
||||
if let Some(alias) = &part.alias {
|
||||
// `import mymodule as myalias`
|
||||
// `from mymodule import myimportname as myalias`
|
||||
self.register_name(alias, SymbolRole::Assigned)?;
|
||||
} else if let Some(symbol) = &part.symbol {
|
||||
// `from mymodule import myimport`
|
||||
self.register_name(symbol, SymbolRole::Assigned)?;
|
||||
} else {
|
||||
// `import module`
|
||||
self.register_name(&part.module, SymbolRole::Assigned)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
ast::Statement::Return { value } => {
|
||||
if let Some(expression) = value {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::Assert { test, msg } => {
|
||||
self.scan_expression(test)?;
|
||||
if let Some(expression) = msg {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::Delete { targets } => {
|
||||
self.scan_expressions(targets)?;
|
||||
}
|
||||
ast::Statement::Assign { targets, value } => {
|
||||
self.scan_expressions(targets)?;
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Statement::AugAssign { target, value, .. } => {
|
||||
self.scan_expression(target)?;
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Statement::With { items, body } => {
|
||||
for item in items {
|
||||
self.scan_expression(&item.context_expr)?;
|
||||
if let Some(expression) = &item.optional_vars {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
}
|
||||
self.scan_statements(body)?;
|
||||
}
|
||||
ast::Statement::Try {
|
||||
body,
|
||||
handlers,
|
||||
orelse,
|
||||
finalbody,
|
||||
} => {
|
||||
self.scan_statements(body)?;
|
||||
for handler in handlers {
|
||||
if let Some(expression) = &handler.typ {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
if let Some(name) = &handler.name {
|
||||
self.register_name(name, SymbolRole::Assigned)?;
|
||||
}
|
||||
self.scan_statements(&handler.body)?;
|
||||
}
|
||||
if let Some(code) = orelse {
|
||||
self.scan_statements(code)?;
|
||||
}
|
||||
if let Some(code) = finalbody {
|
||||
self.scan_statements(code)?;
|
||||
}
|
||||
}
|
||||
ast::Statement::Raise { exception, cause } => {
|
||||
if let Some(expression) = exception {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
if let Some(expression) = cause {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_expressions(&mut self, expressions: &[ast::Expression]) -> SymbolTableResult {
|
||||
for expression in expressions {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_expression(&mut self, expression: &ast::Expression) -> SymbolTableResult {
|
||||
match expression {
|
||||
ast::Expression::Binop { a, b, .. } => {
|
||||
self.scan_expression(a)?;
|
||||
self.scan_expression(b)?;
|
||||
}
|
||||
ast::Expression::BoolOp { a, b, .. } => {
|
||||
self.scan_expression(a)?;
|
||||
self.scan_expression(b)?;
|
||||
}
|
||||
ast::Expression::Compare { vals, .. } => {
|
||||
self.scan_expressions(vals)?;
|
||||
}
|
||||
ast::Expression::Subscript { a, b } => {
|
||||
self.scan_expression(a)?;
|
||||
self.scan_expression(b)?;
|
||||
}
|
||||
ast::Expression::Attribute { value, .. } => {
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Expression::Dict { elements } => {
|
||||
for (key, value) in elements {
|
||||
if let Some(key) = key {
|
||||
self.scan_expression(key)?;
|
||||
} else {
|
||||
// dict unpacking marker
|
||||
}
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
}
|
||||
ast::Expression::Await { value } => {
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Expression::Yield { value } => {
|
||||
if let Some(expression) = value {
|
||||
self.scan_expression(expression)?;
|
||||
}
|
||||
}
|
||||
ast::Expression::YieldFrom { value } => {
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Expression::Unop { a, .. } => {
|
||||
self.scan_expression(a)?;
|
||||
}
|
||||
ast::Expression::True
|
||||
| ast::Expression::False
|
||||
| ast::Expression::None
|
||||
| ast::Expression::Ellipsis => {}
|
||||
ast::Expression::Number { .. } => {}
|
||||
ast::Expression::Starred { value } => {
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::Expression::Bytes { .. } => {}
|
||||
ast::Expression::Tuple { elements }
|
||||
| ast::Expression::Set { elements }
|
||||
| ast::Expression::List { elements }
|
||||
| ast::Expression::Slice { elements } => {
|
||||
self.scan_expressions(elements)?;
|
||||
}
|
||||
ast::Expression::Comprehension { kind, generators } => {
|
||||
match **kind {
|
||||
ast::ComprehensionKind::GeneratorExpression { ref element }
|
||||
| ast::ComprehensionKind::List { ref element }
|
||||
| ast::ComprehensionKind::Set { ref element } => {
|
||||
self.scan_expression(element)?;
|
||||
}
|
||||
ast::ComprehensionKind::Dict { ref key, ref value } => {
|
||||
self.scan_expression(&key)?;
|
||||
self.scan_expression(&value)?;
|
||||
}
|
||||
}
|
||||
|
||||
for generator in generators {
|
||||
self.scan_expression(&generator.target)?;
|
||||
self.scan_expression(&generator.iter)?;
|
||||
for if_expr in &generator.ifs {
|
||||
self.scan_expression(if_expr)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
ast::Expression::Call {
|
||||
function,
|
||||
args,
|
||||
keywords,
|
||||
} => {
|
||||
self.scan_expression(function)?;
|
||||
self.scan_expressions(args)?;
|
||||
for keyword in keywords {
|
||||
self.scan_expression(&keyword.value)?;
|
||||
}
|
||||
}
|
||||
ast::Expression::String { value } => {
|
||||
self.scan_string_group(value)?;
|
||||
}
|
||||
ast::Expression::Identifier { name } => {
|
||||
self.register_name(name, SymbolRole::Used)?;
|
||||
}
|
||||
ast::Expression::Lambda { args, body } => {
|
||||
self.enter_function(args)?;
|
||||
self.scan_expression(body)?;
|
||||
self.leave_scope();
|
||||
}
|
||||
ast::Expression::IfExpression { test, body, orelse } => {
|
||||
self.scan_expression(test)?;
|
||||
self.scan_expression(body)?;
|
||||
self.scan_expression(orelse)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn enter_function(&mut self, args: &ast::Parameters) -> SymbolTableResult {
|
||||
// Evaluate eventual default parameters:
|
||||
self.scan_expressions(&args.defaults)?;
|
||||
for kw_default in &args.kw_defaults {
|
||||
if let Some(expression) = kw_default {
|
||||
self.scan_expression(&expression)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Annotations are scanned in outer scope:
|
||||
self.scan_parameters_annotations(&args.args)?;
|
||||
self.scan_parameters_annotations(&args.kwonlyargs)?;
|
||||
if let ast::Varargs::Named(name) = &args.vararg {
|
||||
self.scan_parameter_annotation(name)?;
|
||||
}
|
||||
if let ast::Varargs::Named(name) = &args.kwarg {
|
||||
self.scan_parameter_annotation(name)?;
|
||||
}
|
||||
|
||||
self.enter_scope();
|
||||
|
||||
// Fill scope with parameter names:
|
||||
self.scan_parameters(&args.args)?;
|
||||
self.scan_parameters(&args.kwonlyargs)?;
|
||||
if let ast::Varargs::Named(name) = &args.vararg {
|
||||
self.scan_parameter(name)?;
|
||||
}
|
||||
if let ast::Varargs::Named(name) = &args.kwarg {
|
||||
self.scan_parameter(name)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scan_string_group(&mut self, group: &ast::StringGroup) -> SymbolTableResult {
|
||||
match group {
|
||||
ast::StringGroup::Constant { .. } => {}
|
||||
ast::StringGroup::FormattedValue { value, .. } => {
|
||||
self.scan_expression(value)?;
|
||||
}
|
||||
ast::StringGroup::Joined { values } => {
|
||||
for subgroup in values {
|
||||
self.scan_string_group(subgroup)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::single_match)]
|
||||
fn register_name(&mut self, name: &str, role: SymbolRole) -> SymbolTableResult {
|
||||
let scope_depth = self.scopes.len();
|
||||
let current_scope = self.scopes.last_mut().unwrap();
|
||||
let location = Default::default();
|
||||
if current_scope.symbols.contains_key(name) {
|
||||
// Role already set..
|
||||
match role {
|
||||
SymbolRole::Global => {
|
||||
return Err(SymbolTableError {
|
||||
error: format!("name '{}' is used prior to global declaration", name),
|
||||
location,
|
||||
})
|
||||
}
|
||||
SymbolRole::Nonlocal => {
|
||||
return Err(SymbolTableError {
|
||||
error: format!("name '{}' is used prior to nonlocal declaration", name),
|
||||
location,
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// Ok?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match role {
|
||||
SymbolRole::Nonlocal => {
|
||||
if scope_depth < 2 {
|
||||
return Err(SymbolTableError {
|
||||
error: format!("cannot define nonlocal '{}' at top level.", name),
|
||||
location,
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Ok!
|
||||
}
|
||||
}
|
||||
current_scope.symbols.insert(name.to_string(), role);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue