Switch from 64-bit instruction enum to out-of-line arg values

This commit is contained in:
Noa 2022-12-09 20:32:48 -06:00
parent 5cc208cc43
commit da96cecfca
6 changed files with 1064 additions and 616 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,12 +1,47 @@
use std::ops;
use crate::IndexSet; use crate::IndexSet;
use rustpython_compiler_core::{CodeFlags, CodeObject, ConstantData, Instruction, Label, Location}; use rustpython_compiler_core::{
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
Location, OpArg,
};
pub type BlockIdx = Label; #[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct BlockIdx(pub u32);
impl BlockIdx {
pub const NULL: BlockIdx = BlockIdx(u32::MAX);
const fn idx(self) -> usize {
self.0 as usize
}
}
impl ops::Index<BlockIdx> for [Block] {
type Output = Block;
fn index(&self, idx: BlockIdx) -> &Block {
&self[idx.idx()]
}
}
impl ops::IndexMut<BlockIdx> for [Block] {
fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
&mut self[idx.idx()]
}
}
impl ops::Index<BlockIdx> for Vec<Block> {
type Output = Block;
fn index(&self, idx: BlockIdx) -> &Block {
&self[idx.idx()]
}
}
impl ops::IndexMut<BlockIdx> for Vec<Block> {
fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
&mut self[idx.idx()]
}
}
#[derive(Debug)] #[derive(Debug, Copy, Clone)]
pub struct InstructionInfo { pub struct InstructionInfo {
/// If the instruction has a Label argument, it's actually a BlockIdx, not a code offset
pub instr: Instruction, pub instr: Instruction,
pub arg: OpArg,
pub target: BlockIdx,
pub location: Location, pub location: Location,
} }
@ -21,7 +56,7 @@ impl Default for Block {
fn default() -> Self { fn default() -> Self {
Block { Block {
instructions: Vec::new(), instructions: Vec::new(),
next: Label(u32::MAX), next: BlockIdx::NULL,
} }
} }
} }
@ -45,13 +80,13 @@ pub struct CodeInfo {
} }
impl CodeInfo { impl CodeInfo {
pub fn finalize_code(mut self, optimize: u8) -> CodeObject { pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
let max_stackdepth = self.max_stackdepth();
let cell2arg = self.cell2arg();
if optimize > 0 { if optimize > 0 {
self.dce(); self.dce();
} }
let max_stackdepth = self.max_stackdepth();
let cell2arg = self.cell2arg();
let CodeInfo { let CodeInfo {
flags, flags,
posonlyarg_count, posonlyarg_count,
@ -61,7 +96,7 @@ impl CodeInfo {
first_line_number, first_line_number,
obj_name, obj_name,
blocks, mut blocks,
current_block: _, current_block: _,
constants, constants,
name_cache, name_cache,
@ -70,26 +105,50 @@ impl CodeInfo {
freevar_cache, freevar_cache,
} = self; } = self;
let mut num_instructions = 0; let mut instructions = Vec::new();
let mut locations = Vec::new();
let mut block_to_offset = vec![Label(0); blocks.len()]; let mut block_to_offset = vec![Label(0); blocks.len()];
loop {
for (idx, block) in iter_blocks(&blocks) { let mut num_instructions = 0;
block_to_offset[idx.0 as usize] = Label(num_instructions as u32); for (idx, block) in iter_blocks(&blocks) {
num_instructions += block.instructions.len(); block_to_offset[idx.idx()] = Label(num_instructions as u32);
} for instr in &block.instructions {
num_instructions += instr.arg.instr_size()
let mut instructions = Vec::with_capacity(num_instructions);
let mut locations = Vec::with_capacity(num_instructions);
for (_, block) in iter_blocks(&blocks) {
for info in &block.instructions {
let mut instr = info.instr.clone();
if let Some(l) = instr.label_arg_mut() {
*l = block_to_offset[l.0 as usize];
} }
instructions.push(instr);
locations.push(info.location);
} }
instructions.reserve_exact(num_instructions);
locations.reserve_exact(num_instructions);
let mut recompile_extended_arg = false;
let mut next_block = BlockIdx(0);
while next_block != BlockIdx::NULL {
let block = &mut blocks[next_block];
for info in &mut block.instructions {
let (op, arg, target) = (info.instr, &mut info.arg, info.target);
if target != BlockIdx::NULL {
let new_arg = OpArg(block_to_offset[target.idx()].0);
recompile_extended_arg |= new_arg.instr_size() != arg.instr_size();
*arg = new_arg;
}
let (extras, lo_arg) = arg.split();
locations.extend(std::iter::repeat(info.location).take(arg.instr_size()));
instructions.extend(
extras
.map(|byte| CodeUnit::new(Instruction::ExtendedArg, byte))
.chain([CodeUnit { op, arg: lo_arg }]),
);
}
next_block = block.next;
}
if !recompile_extended_arg {
break;
}
instructions.clear();
locations.clear()
} }
CodeObject { CodeObject {
@ -166,40 +225,48 @@ impl CodeInfo {
let mut stack = Vec::with_capacity(self.blocks.len()); let mut stack = Vec::with_capacity(self.blocks.len());
let mut startdepths = vec![u32::MAX; self.blocks.len()]; let mut startdepths = vec![u32::MAX; self.blocks.len()];
startdepths[0] = 0; startdepths[0] = 0;
stack.push(Label(0)); stack.push(BlockIdx(0));
const DEBUG: bool = false; const DEBUG: bool = false;
'process_blocks: while let Some(block) = stack.pop() { 'process_blocks: while let Some(block) = stack.pop() {
let mut depth = startdepths[block.0 as usize]; let mut depth = startdepths[block.idx()];
if DEBUG { if DEBUG {
eprintln!("===BLOCK {}===", block.0); eprintln!("===BLOCK {}===", block.0);
} }
let block = &self.blocks[block.0 as usize]; let block = &self.blocks[block];
for i in &block.instructions { for i in &block.instructions {
let instr = &i.instr; let instr = &i.instr;
let effect = instr.stack_effect(false); let effect = instr.stack_effect(i.arg, false);
if DEBUG { if DEBUG {
eprint!("{instr:?}: {depth} {effect:+} => "); let display_arg = if i.target == BlockIdx::NULL {
i.arg
} else {
OpArg(i.target.0)
};
let instr_display = instr.display(display_arg, self);
eprint!("{instr_display}: {depth} {effect:+} => ");
} }
let new_depth = add_ui(depth, effect); let new_depth = depth.checked_add_signed(effect).unwrap();
if DEBUG { if DEBUG {
eprintln!("{new_depth}"); eprintln!("{new_depth}");
} }
if new_depth > maxdepth { if new_depth > maxdepth {
maxdepth = new_depth maxdepth = new_depth
} }
// we don't want to worry about Continue, it uses unwinding to jump to // we don't want to worry about Break/Continue, they use unwinding to jump to
// its targets and as such the stack size is taken care of in frame.rs by setting // their targets and as such the stack size is taken care of in frame.rs by setting
// it back to the level it was at when SetupLoop was run // it back to the level it was at when SetupLoop was run
let jump_label = instr if i.target != BlockIdx::NULL
.label_arg() && !matches!(
.filter(|_| !matches!(instr, Instruction::Continue { .. })); instr,
if let Some(&target_block) = jump_label { Instruction::Continue { .. } | Instruction::Break { .. }
let effect = instr.stack_effect(true); )
let target_depth = add_ui(depth, effect); {
let effect = instr.stack_effect(i.arg, true);
let target_depth = depth.checked_add_signed(effect).unwrap();
if target_depth > maxdepth { if target_depth > maxdepth {
maxdepth = target_depth maxdepth = target_depth
} }
stackdepth_push(&mut stack, &mut startdepths, target_block, target_depth); stackdepth_push(&mut stack, &mut startdepths, i.target, target_depth);
} }
depth = new_depth; depth = new_depth;
if instr.unconditional_branch() { if instr.unconditional_branch() {
@ -215,23 +282,46 @@ impl CodeInfo {
} }
} }
fn stackdepth_push(stack: &mut Vec<Label>, startdepths: &mut [u32], target: Label, depth: u32) { impl InstrDisplayContext for CodeInfo {
let block_depth = &mut startdepths[target.0 as usize]; type Constant = ConstantData;
fn get_constant(&self, i: usize) -> &ConstantData {
&self.constants[i]
}
fn get_name(&self, i: usize) -> &str {
self.name_cache[i].as_ref()
}
fn get_varname(&self, i: usize) -> &str {
self.varname_cache[i].as_ref()
}
fn get_cellname(&self, i: usize) -> &str {
self.cellvar_cache
.get_index(i)
.unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()])
.as_ref()
}
}
fn stackdepth_push(
stack: &mut Vec<BlockIdx>,
startdepths: &mut [u32],
target: BlockIdx,
depth: u32,
) {
let block_depth = &mut startdepths[target.idx()];
if *block_depth == u32::MAX || depth > *block_depth { if *block_depth == u32::MAX || depth > *block_depth {
*block_depth = depth; *block_depth = depth;
stack.push(target); stack.push(target);
} }
} }
fn add_ui(a: u32, b: i32) -> u32 {
if b < 0 {
a - b.wrapping_abs() as u32
} else {
a + b as u32
}
}
fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ { fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ {
let get_idx = move |i: BlockIdx| blocks.get(i.0 as usize).map(|b| (i, b)); let mut next = BlockIdx(0);
std::iter::successors(get_idx(Label(0)), move |(_, b)| get_idx(b.next)) // if b.next is u32::MAX that's the end std::iter::from_fn(move || {
if next == BlockIdx::NULL {
return None;
}
let (idx, b) = (next, &blocks[next]);
next = b.next;
Some((idx, b))
})
} }

View file

@ -2,23 +2,23 @@
source: compiler/codegen/src/compile.rs source: compiler/codegen/src/compile.rs
expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with woohoo():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")" expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with woohoo():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")"
--- ---
1 0 SetupLoop (69) 1 0 SetupLoop
1 LoadNameAny (0, StopIteration) 1 LoadNameAny (0, StopIteration)
2 LoadConst ("spam") 2 LoadConst ("spam")
3 CallFunctionPositional (1) 3 CallFunctionPositional(1)
4 LoadNameAny (1, StopAsyncIteration) 4 LoadNameAny (1, StopAsyncIteration)
5 LoadConst ("ham") 5 LoadConst ("ham")
6 CallFunctionPositional (1) 6 CallFunctionPositional(1)
7 BuildTuple (2, false) 7 BuildTuple (2)
8 GetIter 8 GetIter
>> 9 ForIter (68) >> 9 ForIter (68)
10 StoreLocal (2, stop_exc) 10 StoreLocal (2, stop_exc)
2 11 LoadNameAny (3, self) 2 11 LoadNameAny (3, self)
12 LoadMethod (subTest) 12 LoadMethod (4, subTest)
13 LoadNameAny (5, type) 13 LoadNameAny (5, type)
14 LoadNameAny (2, stop_exc) 14 LoadNameAny (2, stop_exc)
15 CallFunctionPositional (1) 15 CallFunctionPositional(1)
16 LoadConst (("type")) 16 LoadConst (("type"))
17 CallMethodKeyword (1) 17 CallMethodKeyword (1)
18 SetupWith (65) 18 SetupWith (65)
@ -27,7 +27,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
3 20 SetupExcept (40) 3 20 SetupExcept (40)
4 21 LoadNameAny (6, woohoo) 4 21 LoadNameAny (6, woohoo)
22 CallFunctionPositional (0) 22 CallFunctionPositional(0)
23 BeforeAsyncWith 23 BeforeAsyncWith
24 GetAwaitable 24 GetAwaitable
25 LoadConst (None) 25 LoadConst (None)
@ -55,7 +55,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
44 StoreLocal (8, ex) 44 StoreLocal (8, ex)
7 45 LoadNameAny (3, self) 7 45 LoadNameAny (3, self)
46 LoadMethod (assertIs) 46 LoadMethod (9, assertIs)
47 LoadNameAny (8, ex) 47 LoadNameAny (8, ex)
48 LoadNameAny (2, stop_exc) 48 LoadNameAny (2, stop_exc)
49 CallMethodPositional (2) 49 CallMethodPositional (2)
@ -65,7 +65,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
>> 53 Raise (Reraise) >> 53 Raise (Reraise)
9 >> 54 LoadNameAny (3, self) 9 >> 54 LoadNameAny (3, self)
55 LoadMethod (fail) 55 LoadMethod (10, fail)
56 LoadConst ("") 56 LoadConst ("")
57 LoadNameAny (2, stop_exc) 57 LoadNameAny (2, stop_exc)
58 FormatValue (None) 58 FormatValue (None)
@ -80,6 +80,6 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
66 WithCleanupFinish 66 WithCleanupFinish
67 Jump (9) 67 Jump (9)
>> 68 PopBlock >> 68 PopBlock
>> 69 LoadConst (None) 69 LoadConst (None)
70 ReturnValue 70 ReturnValue

View file

@ -15,6 +15,6 @@ itertools = "0.10.3"
lz4_flex = "0.9.2" lz4_flex = "0.9.2"
num-bigint = { version = "0.4.3", features = ["serde"] } num-bigint = { version = "0.4.3", features = ["serde"] }
num-complex = { version = "0.4.0", features = ["serde"] } num-complex = { version = "0.4.0", features = ["serde"] }
num_enum = "0.5.7"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
static_assertions = "1.1.0"
thiserror = "1.0" thiserror = "1.0"

File diff suppressed because it is too large Load diff

View file

@ -6,8 +6,7 @@ use rustpython_parser::{
}; };
pub use rustpython_codegen::compile::CompileOpts; pub use rustpython_codegen::compile::CompileOpts;
pub use rustpython_compiler_core::CodeObject; pub use rustpython_compiler_core::{BaseError as CompileErrorBody, CodeObject, Mode};
pub use rustpython_compiler_core::{BaseError as CompileErrorBody, Mode};
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum CompileErrorType { pub enum CompileErrorType {
@ -29,7 +28,7 @@ pub fn compile(
source: &str, source: &str,
mode: compile::Mode, mode: compile::Mode,
source_path: String, source_path: String,
opts: compile::CompileOpts, opts: CompileOpts,
) -> Result<CodeObject, CompileError> { ) -> Result<CodeObject, CompileError> {
let mut ast = match parser::parse(source, mode.into(), &source_path) { let mut ast = match parser::parse(source, mode.into(), &source_path) {
Ok(x) => x, Ok(x) => x,