reorganize compiler crates

This commit is contained in:
Jeong YunWon 2022-08-22 04:02:00 +09:00
parent 3351b4408b
commit 060d153bb3
82 changed files with 12368 additions and 164 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,101 +0,0 @@
use rustpython_ast::Location;
use std::{error::Error, fmt};
#[derive(Debug)]
pub struct CompileError {
pub error: CompileErrorType,
pub location: Location,
pub source_path: String,
}
#[derive(Debug)]
#[non_exhaustive]
pub enum CompileErrorType {
/// Invalid assignment, cannot store value in target.
Assign(&'static str),
/// Invalid delete
Delete(&'static str),
SyntaxError(String),
/// Multiple `*` detected
MultipleStarArgs,
/// Misplaced `*` expression
InvalidStarExpr,
/// Break statement outside of loop.
InvalidBreak,
/// Continue statement outside of loop.
InvalidContinue,
InvalidReturn,
InvalidYield,
InvalidYieldFrom,
InvalidAwait,
AsyncYieldFrom,
AsyncReturnValue,
InvalidFuturePlacement,
InvalidFutureFeature(String),
FunctionImportStar,
TooManyStarUnpack,
EmptyWithItems,
EmptyWithBody,
NotImplementedYet, // RustPython marker for unimplemented features
}
impl fmt::Display for CompileErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
CompileErrorType::Assign(target) => write!(f, "cannot assign to {}", target),
CompileErrorType::Delete(target) => write!(f, "cannot delete {}", target),
CompileErrorType::SyntaxError(err) => write!(f, "{}", err.as_str()),
CompileErrorType::MultipleStarArgs => {
write!(f, "two starred expressions in assignment")
}
CompileErrorType::InvalidStarExpr => write!(f, "cannot use starred expression here"),
CompileErrorType::InvalidBreak => write!(f, "'break' outside loop"),
CompileErrorType::InvalidContinue => write!(f, "'continue' outside loop"),
CompileErrorType::InvalidReturn => write!(f, "'return' outside function"),
CompileErrorType::InvalidYield => write!(f, "'yield' outside function"),
CompileErrorType::InvalidYieldFrom => write!(f, "'yield from' outside function"),
CompileErrorType::InvalidAwait => write!(f, "'await' outside async function"),
CompileErrorType::AsyncYieldFrom => write!(f, "'yield from' inside async function"),
CompileErrorType::AsyncReturnValue => {
write!(f, "'return' with value inside async generator")
}
CompileErrorType::InvalidFuturePlacement => write!(
f,
"from __future__ imports must occur at the beginning of the file"
),
CompileErrorType::InvalidFutureFeature(feat) => {
write!(f, "future feature {} is not defined", feat)
}
CompileErrorType::FunctionImportStar => {
write!(f, "import * only allowed at module level")
}
CompileErrorType::TooManyStarUnpack => {
write!(f, "too many expressions in star-unpacking assignment")
}
CompileErrorType::EmptyWithItems => {
write!(f, "empty items on With")
}
CompileErrorType::EmptyWithBody => {
write!(f, "empty body on With")
}
CompileErrorType::NotImplementedYet => {
write!(f, "RustPython does not implement this feature yet")
}
}
}
}
impl Error for CompileErrorType {}
impl fmt::Display for CompileError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} at {}", self.error, self.location)
}
}
impl Error for CompileError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}

237
src/ir.rs
View file

@ -1,237 +0,0 @@
use crate::IndexSet;
use rustpython_bytecode::{CodeFlags, CodeObject, ConstantData, Instruction, Label, Location};
pub type BlockIdx = Label;
#[derive(Debug)]
pub struct InstructionInfo {
/// If the instruction has a Label argument, it's actually a BlockIdx, not a code offset
pub instr: Instruction,
pub location: Location,
}
// TODO: look into using petgraph for handling blocks and stuff? it's heavier than this, but it
// might enable more analysis/optimizations
#[derive(Debug)]
pub struct Block {
pub instructions: Vec<InstructionInfo>,
pub next: BlockIdx,
}
impl Default for Block {
fn default() -> Self {
Block {
instructions: Vec::new(),
next: Label(u32::MAX),
}
}
}
pub struct CodeInfo {
pub flags: CodeFlags,
pub posonlyarg_count: usize, // Number of positional-only arguments
pub arg_count: usize,
pub kwonlyarg_count: usize,
pub source_path: String,
pub first_line_number: usize,
pub obj_name: String, // Name of the object that created this code object
pub blocks: Vec<Block>,
pub current_block: BlockIdx,
pub constants: IndexSet<ConstantData>,
pub name_cache: IndexSet<String>,
pub varname_cache: IndexSet<String>,
pub cellvar_cache: IndexSet<String>,
pub freevar_cache: IndexSet<String>,
}
impl CodeInfo {
pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
let max_stackdepth = self.max_stackdepth();
let cell2arg = self.cell2arg();
if optimize > 0 {
self.dce();
}
let CodeInfo {
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
obj_name,
blocks,
current_block: _,
constants,
name_cache,
varname_cache,
cellvar_cache,
freevar_cache,
} = self;
let mut num_instructions = 0;
let mut block_to_offset = vec![Label(0); blocks.len()];
for (idx, block) in iter_blocks(&blocks) {
block_to_offset[idx.0 as usize] = Label(num_instructions as u32);
num_instructions += block.instructions.len();
}
let mut instructions = Vec::with_capacity(num_instructions);
let mut locations = Vec::with_capacity(num_instructions);
for (_, block) in iter_blocks(&blocks) {
for info in &block.instructions {
let mut instr = info.instr.clone();
if let Some(l) = instr.label_arg_mut() {
*l = block_to_offset[l.0 as usize];
}
instructions.push(instr);
locations.push(info.location);
}
}
CodeObject {
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
obj_name,
max_stackdepth,
instructions: instructions.into_boxed_slice(),
locations: locations.into_boxed_slice(),
constants: constants.into_iter().collect(),
names: name_cache.into_iter().collect(),
varnames: varname_cache.into_iter().collect(),
cellvars: cellvar_cache.into_iter().collect(),
freevars: freevar_cache.into_iter().collect(),
cell2arg,
}
}
fn cell2arg(&self) -> Option<Box<[isize]>> {
if self.cellvar_cache.is_empty() {
return None;
}
let total_args = self.arg_count
+ self.kwonlyarg_count
+ self.flags.contains(CodeFlags::HAS_VARARGS) as usize
+ self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as usize;
let mut found_cellarg = false;
let cell2arg = self
.cellvar_cache
.iter()
.map(|var| {
self.varname_cache
.get_index_of(var)
// check that it's actually an arg
.filter(|i| *i < total_args)
.map_or(-1, |i| {
found_cellarg = true;
i as isize
})
})
.collect::<Box<[_]>>();
if found_cellarg {
Some(cell2arg)
} else {
None
}
}
fn dce(&mut self) {
for block in &mut self.blocks {
let mut last_instr = None;
for (i, ins) in block.instructions.iter().enumerate() {
if ins.instr.unconditional_branch() {
last_instr = Some(i);
break;
}
}
if let Some(i) = last_instr {
block.instructions.truncate(i + 1);
}
}
}
fn max_stackdepth(&self) -> u32 {
let mut maxdepth = 0u32;
let mut stack = Vec::with_capacity(self.blocks.len());
let mut startdepths = vec![u32::MAX; self.blocks.len()];
startdepths[0] = 0;
stack.push(Label(0));
const DEBUG: bool = false;
'process_blocks: while let Some(block) = stack.pop() {
let mut depth = startdepths[block.0 as usize];
if DEBUG {
eprintln!("===BLOCK {}===", block.0);
}
let block = &self.blocks[block.0 as usize];
for i in &block.instructions {
let instr = &i.instr;
let effect = instr.stack_effect(false);
if DEBUG {
eprint!("{instr:?}: {depth} {effect:+} => ");
}
let new_depth = add_ui(depth, effect);
if DEBUG {
eprintln!("{new_depth}");
}
if new_depth > maxdepth {
maxdepth = new_depth
}
// we don't want to worry about Continue, it uses unwinding to jump to
// its targets and as such the stack size is taken care of in frame.rs by setting
// it back to the level it was at when SetupLoop was run
let jump_label = instr
.label_arg()
.filter(|_| !matches!(instr, Instruction::Continue { .. }));
if let Some(&target_block) = jump_label {
let effect = instr.stack_effect(true);
let target_depth = add_ui(depth, effect);
if target_depth > maxdepth {
maxdepth = target_depth
}
stackdepth_push(&mut stack, &mut startdepths, target_block, target_depth);
}
depth = new_depth;
if instr.unconditional_branch() {
continue 'process_blocks;
}
}
stackdepth_push(&mut stack, &mut startdepths, block.next, depth);
}
if DEBUG {
eprintln!("DONE: {maxdepth}");
}
maxdepth
}
}
fn stackdepth_push(stack: &mut Vec<Label>, startdepths: &mut [u32], target: Label, depth: u32) {
let block_depth = &mut startdepths[target.0 as usize];
if *block_depth == u32::MAX || depth > *block_depth {
*block_depth = depth;
stack.push(target);
}
}
fn add_ui(a: u32, b: i32) -> u32 {
if b < 0 {
a - b.wrapping_abs() as u32
} else {
a + b as u32
}
}
fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ {
let get_idx = move |i: BlockIdx| blocks.get(i.0 as usize).map(|b| (i, b));
std::iter::successors(get_idx(Label(0)), move |(_, b)| get_idx(b.next)) // if b.next is u32::MAX that's the end
}

View file

@ -1,15 +1,124 @@
//! Compile a Python AST or source code into bytecode consumable by RustPython.
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-compiler/")]
use rustpython_bytecode::CodeObject;
use rustpython_codegen::{compile, symboltable};
use rustpython_parser::{
ast::{fold::Fold, ConstantOptimizer, Location},
parser,
};
use std::fmt;
#[macro_use]
extern crate log;
pub use compile::{CompileOpts, Mode};
pub use symboltable::{Symbol, SymbolScope, SymbolTable, SymbolTableType};
type IndexMap<K, V> = indexmap::IndexMap<K, V, ahash::RandomState>;
type IndexSet<T> = indexmap::IndexSet<T, ahash::RandomState>;
#[derive(Debug, thiserror::Error)]
pub enum CompileErrorType {
#[error(transparent)]
Compile(#[from] rustpython_codegen::error::CompileErrorType),
#[error(transparent)]
Parse(#[from] rustpython_parser::error::ParseErrorType),
}
pub mod compile;
pub mod error;
pub mod ir;
pub mod mode;
pub mod symboltable;
#[derive(Debug, thiserror::Error)]
pub struct CompileError {
pub error: CompileErrorType,
pub statement: Option<String>,
pub source_path: String,
pub location: Location,
}
impl fmt::Display for CompileError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let loc = self.location;
if let Some(ref stmt) = self.statement {
// visualize the error when location and statement are provided
write!(
f,
"{}",
loc.visualize(stmt, &format_args!("{} at {}", self.error, loc))
)
} else {
write!(f, "{} at {}", self.error, loc)
}
}
}
impl CompileError {
fn from_compile(error: rustpython_codegen::error::CompileError, source: &str) -> Self {
CompileError {
error: error.error.into(),
location: error.location,
source_path: error.source_path,
statement: get_statement(source, error.location),
}
}
fn from_parse(
error: rustpython_parser::error::ParseError,
source: &str,
source_path: String,
) -> Self {
CompileError {
error: error.error.into(),
location: error.location,
source_path,
statement: get_statement(source, error.location),
}
}
fn from_symtable(
error: symboltable::SymbolTableError,
source: &str,
source_path: String,
) -> Self {
Self::from_compile(error.into_compile_error(source_path), source)
}
}
/// Compile a given sourcecode into a bytecode object.
pub fn compile(
source: &str,
mode: compile::Mode,
source_path: String,
opts: CompileOpts,
) -> Result<CodeObject, CompileError> {
let parser_mode = match mode {
compile::Mode::Exec => parser::Mode::Module,
compile::Mode::Eval => parser::Mode::Expression,
compile::Mode::Single | compile::Mode::BlockExpr => parser::Mode::Interactive,
};
let mut ast = match parser::parse(source, parser_mode) {
Ok(x) => x,
Err(e) => return Err(CompileError::from_parse(e, source, source_path)),
};
if opts.optimize > 0 {
ast = ConstantOptimizer::new()
.fold_mod(ast)
.unwrap_or_else(|e| match e {});
}
compile::compile_top(&ast, source_path, mode, opts)
.map_err(|e| CompileError::from_compile(e, source))
}
pub fn compile_symtable(
source: &str,
mode: compile::Mode,
source_path: &str,
) -> Result<symboltable::SymbolTable, CompileError> {
let parse_err = |e| CompileError::from_parse(e, source, source_path.to_owned());
let res = match mode {
compile::Mode::Exec | compile::Mode::Single | compile::Mode::BlockExpr => {
let ast = parser::parse_program(source).map_err(parse_err)?;
symboltable::make_symbol_table(&ast)
}
compile::Mode::Eval => {
let expr = parser::parse_expression(source).map_err(parse_err)?;
symboltable::make_symbol_table_expr(&expr)
}
};
res.map_err(|e| CompileError::from_symtable(e, source, source_path.to_owned()))
}
fn get_statement(source: &str, loc: Location) -> Option<String> {
if loc.column() == 0 || loc.row() == 0 {
return None;
}
let line = source.split('\n').nth(loc.row() - 1)?.to_owned();
Some(line + "\n")
}

View file

@ -1,32 +0,0 @@
#[derive(Clone, Copy)]
pub enum Mode {
Exec,
Eval,
Single,
BlockExpr,
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
// To support `builtins.compile()` `mode` argument
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" => Ok(Mode::Exec),
"eval" => Ok(Mode::Eval),
"single" => Ok(Mode::Single),
_ => Err(ModeParseError { _priv: () }),
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}

View file

@ -1,14 +0,0 @@
---
source: compiler/src/compile.rs
expression: "compile_exec(\"\\\nif True and False and False:\n pass\n\")"
---
1 0 LoadConst (True)
1 JumpIfFalse (6)
2 LoadConst (False)
3 JumpIfFalse (6)
4 LoadConst (False)
5 JumpIfFalse (6)
2 >> 6 LoadConst (None)
7 ReturnValue

View file

@ -1,16 +0,0 @@
---
source: compiler/src/compile.rs
expression: "compile_exec(\"\\\nif (True and False) or (False and True):\n pass\n\")"
---
1 0 LoadConst (True)
1 JumpIfFalse (4)
2 LoadConst (False)
3 JumpIfTrue (8)
>> 4 LoadConst (False)
5 JumpIfFalse (8)
6 LoadConst (True)
7 JumpIfFalse (8)
2 >> 8 LoadConst (None)
9 ReturnValue

View file

@ -1,14 +0,0 @@
---
source: compiler/src/compile.rs
expression: "compile_exec(\"\\\nif True or False or False:\n pass\n\")"
---
1 0 LoadConst (True)
1 JumpIfTrue (6)
2 LoadConst (False)
3 JumpIfTrue (6)
4 LoadConst (False)
5 JumpIfFalse (6)
2 >> 6 LoadConst (None)
7 ReturnValue

View file

@ -1,87 +0,0 @@
---
source: compiler/src/compile.rs
expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with woohoo():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")"
---
1 0 SetupLoop (69)
1 LoadNameAny (0, StopIteration)
2 LoadConst ("spam")
3 CallFunctionPositional (1)
4 LoadNameAny (1, StopAsyncIteration)
5 LoadConst ("ham")
6 CallFunctionPositional (1)
7 BuildTuple (2, false)
8 GetIter
>> 9 ForIter (68)
10 StoreLocal (2, stop_exc)
2 11 LoadNameAny (3, self)
12 LoadMethod (subTest)
13 LoadNameAny (5, type)
14 LoadNameAny (2, stop_exc)
15 CallFunctionPositional (1)
16 LoadConst (("type"))
17 CallMethodKeyword (1)
18 SetupWith (65)
19 Pop
3 20 SetupExcept (40)
4 21 LoadNameAny (6, woohoo)
22 CallFunctionPositional (0)
23 BeforeAsyncWith
24 GetAwaitable
25 LoadConst (None)
26 YieldFrom
27 SetupAsyncWith (33)
28 Pop
5 29 LoadNameAny (2, stop_exc)
30 Raise (Raise)
4 31 PopBlock
32 EnterFinally
>> 33 WithCleanupStart
34 GetAwaitable
35 LoadConst (None)
36 YieldFrom
37 WithCleanupFinish
38 PopBlock
39 Jump (54)
>> 40 Duplicate
6 41 LoadNameAny (7, Exception)
42 TestOperation (ExceptionMatch)
43 JumpIfFalse (53)
44 StoreLocal (8, ex)
7 45 LoadNameAny (3, self)
46 LoadMethod (assertIs)
47 LoadNameAny (8, ex)
48 LoadNameAny (2, stop_exc)
49 CallMethodPositional (2)
50 Pop
51 PopException
52 Jump (63)
>> 53 Raise (Reraise)
9 >> 54 LoadNameAny (3, self)
55 LoadMethod (fail)
56 LoadConst ("")
1 57 LoadNameAny (2, stop_exc)
58 FormatValue (None)
9 59 LoadConst (" was suppressed")
60 BuildString (2)
61 CallMethodPositional (1)
62 Pop
2 >> 63 PopBlock
64 EnterFinally
>> 65 WithCleanupStart
66 WithCleanupFinish
67 Jump (9)
>> 68 PopBlock
>> 69 LoadConst (None)
70 ReturnValue

File diff suppressed because it is too large Load diff