mirror of
https://github.com/RustPython/Parser.git
synced 2025-08-29 14:54:56 +00:00
Switch from 64-bit instruction enum to out-of-line arg values
This commit is contained in:
parent
5cc208cc43
commit
da96cecfca
6 changed files with 1064 additions and 616 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,12 +1,47 @@
|
||||||
|
use std::ops;
|
||||||
|
|
||||||
use crate::IndexSet;
|
use crate::IndexSet;
|
||||||
use rustpython_compiler_core::{CodeFlags, CodeObject, ConstantData, Instruction, Label, Location};
|
use rustpython_compiler_core::{
|
||||||
|
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
|
||||||
|
Location, OpArg,
|
||||||
|
};
|
||||||
|
|
||||||
pub type BlockIdx = Label;
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct BlockIdx(pub u32);
|
||||||
|
impl BlockIdx {
|
||||||
|
pub const NULL: BlockIdx = BlockIdx(u32::MAX);
|
||||||
|
const fn idx(self) -> usize {
|
||||||
|
self.0 as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ops::Index<BlockIdx> for [Block] {
|
||||||
|
type Output = Block;
|
||||||
|
fn index(&self, idx: BlockIdx) -> &Block {
|
||||||
|
&self[idx.idx()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ops::IndexMut<BlockIdx> for [Block] {
|
||||||
|
fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
|
||||||
|
&mut self[idx.idx()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ops::Index<BlockIdx> for Vec<Block> {
|
||||||
|
type Output = Block;
|
||||||
|
fn index(&self, idx: BlockIdx) -> &Block {
|
||||||
|
&self[idx.idx()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ops::IndexMut<BlockIdx> for Vec<Block> {
|
||||||
|
fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
|
||||||
|
&mut self[idx.idx()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
pub struct InstructionInfo {
|
pub struct InstructionInfo {
|
||||||
/// If the instruction has a Label argument, it's actually a BlockIdx, not a code offset
|
|
||||||
pub instr: Instruction,
|
pub instr: Instruction,
|
||||||
|
pub arg: OpArg,
|
||||||
|
pub target: BlockIdx,
|
||||||
pub location: Location,
|
pub location: Location,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +56,7 @@ impl Default for Block {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Block {
|
Block {
|
||||||
instructions: Vec::new(),
|
instructions: Vec::new(),
|
||||||
next: Label(u32::MAX),
|
next: BlockIdx::NULL,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,13 +80,13 @@ pub struct CodeInfo {
|
||||||
}
|
}
|
||||||
impl CodeInfo {
|
impl CodeInfo {
|
||||||
pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
|
pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
|
||||||
let max_stackdepth = self.max_stackdepth();
|
|
||||||
let cell2arg = self.cell2arg();
|
|
||||||
|
|
||||||
if optimize > 0 {
|
if optimize > 0 {
|
||||||
self.dce();
|
self.dce();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let max_stackdepth = self.max_stackdepth();
|
||||||
|
let cell2arg = self.cell2arg();
|
||||||
|
|
||||||
let CodeInfo {
|
let CodeInfo {
|
||||||
flags,
|
flags,
|
||||||
posonlyarg_count,
|
posonlyarg_count,
|
||||||
|
@ -61,7 +96,7 @@ impl CodeInfo {
|
||||||
first_line_number,
|
first_line_number,
|
||||||
obj_name,
|
obj_name,
|
||||||
|
|
||||||
blocks,
|
mut blocks,
|
||||||
current_block: _,
|
current_block: _,
|
||||||
constants,
|
constants,
|
||||||
name_cache,
|
name_cache,
|
||||||
|
@ -70,26 +105,50 @@ impl CodeInfo {
|
||||||
freevar_cache,
|
freevar_cache,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
let mut num_instructions = 0;
|
let mut instructions = Vec::new();
|
||||||
|
let mut locations = Vec::new();
|
||||||
|
|
||||||
let mut block_to_offset = vec![Label(0); blocks.len()];
|
let mut block_to_offset = vec![Label(0); blocks.len()];
|
||||||
|
loop {
|
||||||
for (idx, block) in iter_blocks(&blocks) {
|
let mut num_instructions = 0;
|
||||||
block_to_offset[idx.0 as usize] = Label(num_instructions as u32);
|
for (idx, block) in iter_blocks(&blocks) {
|
||||||
num_instructions += block.instructions.len();
|
block_to_offset[idx.idx()] = Label(num_instructions as u32);
|
||||||
}
|
for instr in &block.instructions {
|
||||||
|
num_instructions += instr.arg.instr_size()
|
||||||
let mut instructions = Vec::with_capacity(num_instructions);
|
|
||||||
let mut locations = Vec::with_capacity(num_instructions);
|
|
||||||
|
|
||||||
for (_, block) in iter_blocks(&blocks) {
|
|
||||||
for info in &block.instructions {
|
|
||||||
let mut instr = info.instr.clone();
|
|
||||||
if let Some(l) = instr.label_arg_mut() {
|
|
||||||
*l = block_to_offset[l.0 as usize];
|
|
||||||
}
|
}
|
||||||
instructions.push(instr);
|
|
||||||
locations.push(info.location);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
instructions.reserve_exact(num_instructions);
|
||||||
|
locations.reserve_exact(num_instructions);
|
||||||
|
|
||||||
|
let mut recompile_extended_arg = false;
|
||||||
|
let mut next_block = BlockIdx(0);
|
||||||
|
while next_block != BlockIdx::NULL {
|
||||||
|
let block = &mut blocks[next_block];
|
||||||
|
for info in &mut block.instructions {
|
||||||
|
let (op, arg, target) = (info.instr, &mut info.arg, info.target);
|
||||||
|
if target != BlockIdx::NULL {
|
||||||
|
let new_arg = OpArg(block_to_offset[target.idx()].0);
|
||||||
|
recompile_extended_arg |= new_arg.instr_size() != arg.instr_size();
|
||||||
|
*arg = new_arg;
|
||||||
|
}
|
||||||
|
let (extras, lo_arg) = arg.split();
|
||||||
|
locations.extend(std::iter::repeat(info.location).take(arg.instr_size()));
|
||||||
|
instructions.extend(
|
||||||
|
extras
|
||||||
|
.map(|byte| CodeUnit::new(Instruction::ExtendedArg, byte))
|
||||||
|
.chain([CodeUnit { op, arg: lo_arg }]),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
next_block = block.next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !recompile_extended_arg {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
instructions.clear();
|
||||||
|
locations.clear()
|
||||||
}
|
}
|
||||||
|
|
||||||
CodeObject {
|
CodeObject {
|
||||||
|
@ -166,40 +225,48 @@ impl CodeInfo {
|
||||||
let mut stack = Vec::with_capacity(self.blocks.len());
|
let mut stack = Vec::with_capacity(self.blocks.len());
|
||||||
let mut startdepths = vec![u32::MAX; self.blocks.len()];
|
let mut startdepths = vec![u32::MAX; self.blocks.len()];
|
||||||
startdepths[0] = 0;
|
startdepths[0] = 0;
|
||||||
stack.push(Label(0));
|
stack.push(BlockIdx(0));
|
||||||
const DEBUG: bool = false;
|
const DEBUG: bool = false;
|
||||||
'process_blocks: while let Some(block) = stack.pop() {
|
'process_blocks: while let Some(block) = stack.pop() {
|
||||||
let mut depth = startdepths[block.0 as usize];
|
let mut depth = startdepths[block.idx()];
|
||||||
if DEBUG {
|
if DEBUG {
|
||||||
eprintln!("===BLOCK {}===", block.0);
|
eprintln!("===BLOCK {}===", block.0);
|
||||||
}
|
}
|
||||||
let block = &self.blocks[block.0 as usize];
|
let block = &self.blocks[block];
|
||||||
for i in &block.instructions {
|
for i in &block.instructions {
|
||||||
let instr = &i.instr;
|
let instr = &i.instr;
|
||||||
let effect = instr.stack_effect(false);
|
let effect = instr.stack_effect(i.arg, false);
|
||||||
if DEBUG {
|
if DEBUG {
|
||||||
eprint!("{instr:?}: {depth} {effect:+} => ");
|
let display_arg = if i.target == BlockIdx::NULL {
|
||||||
|
i.arg
|
||||||
|
} else {
|
||||||
|
OpArg(i.target.0)
|
||||||
|
};
|
||||||
|
let instr_display = instr.display(display_arg, self);
|
||||||
|
eprint!("{instr_display}: {depth} {effect:+} => ");
|
||||||
}
|
}
|
||||||
let new_depth = add_ui(depth, effect);
|
let new_depth = depth.checked_add_signed(effect).unwrap();
|
||||||
if DEBUG {
|
if DEBUG {
|
||||||
eprintln!("{new_depth}");
|
eprintln!("{new_depth}");
|
||||||
}
|
}
|
||||||
if new_depth > maxdepth {
|
if new_depth > maxdepth {
|
||||||
maxdepth = new_depth
|
maxdepth = new_depth
|
||||||
}
|
}
|
||||||
// we don't want to worry about Continue, it uses unwinding to jump to
|
// we don't want to worry about Break/Continue, they use unwinding to jump to
|
||||||
// its targets and as such the stack size is taken care of in frame.rs by setting
|
// their targets and as such the stack size is taken care of in frame.rs by setting
|
||||||
// it back to the level it was at when SetupLoop was run
|
// it back to the level it was at when SetupLoop was run
|
||||||
let jump_label = instr
|
if i.target != BlockIdx::NULL
|
||||||
.label_arg()
|
&& !matches!(
|
||||||
.filter(|_| !matches!(instr, Instruction::Continue { .. }));
|
instr,
|
||||||
if let Some(&target_block) = jump_label {
|
Instruction::Continue { .. } | Instruction::Break { .. }
|
||||||
let effect = instr.stack_effect(true);
|
)
|
||||||
let target_depth = add_ui(depth, effect);
|
{
|
||||||
|
let effect = instr.stack_effect(i.arg, true);
|
||||||
|
let target_depth = depth.checked_add_signed(effect).unwrap();
|
||||||
if target_depth > maxdepth {
|
if target_depth > maxdepth {
|
||||||
maxdepth = target_depth
|
maxdepth = target_depth
|
||||||
}
|
}
|
||||||
stackdepth_push(&mut stack, &mut startdepths, target_block, target_depth);
|
stackdepth_push(&mut stack, &mut startdepths, i.target, target_depth);
|
||||||
}
|
}
|
||||||
depth = new_depth;
|
depth = new_depth;
|
||||||
if instr.unconditional_branch() {
|
if instr.unconditional_branch() {
|
||||||
|
@ -215,23 +282,46 @@ impl CodeInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn stackdepth_push(stack: &mut Vec<Label>, startdepths: &mut [u32], target: Label, depth: u32) {
|
impl InstrDisplayContext for CodeInfo {
|
||||||
let block_depth = &mut startdepths[target.0 as usize];
|
type Constant = ConstantData;
|
||||||
|
fn get_constant(&self, i: usize) -> &ConstantData {
|
||||||
|
&self.constants[i]
|
||||||
|
}
|
||||||
|
fn get_name(&self, i: usize) -> &str {
|
||||||
|
self.name_cache[i].as_ref()
|
||||||
|
}
|
||||||
|
fn get_varname(&self, i: usize) -> &str {
|
||||||
|
self.varname_cache[i].as_ref()
|
||||||
|
}
|
||||||
|
fn get_cellname(&self, i: usize) -> &str {
|
||||||
|
self.cellvar_cache
|
||||||
|
.get_index(i)
|
||||||
|
.unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()])
|
||||||
|
.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn stackdepth_push(
|
||||||
|
stack: &mut Vec<BlockIdx>,
|
||||||
|
startdepths: &mut [u32],
|
||||||
|
target: BlockIdx,
|
||||||
|
depth: u32,
|
||||||
|
) {
|
||||||
|
let block_depth = &mut startdepths[target.idx()];
|
||||||
if *block_depth == u32::MAX || depth > *block_depth {
|
if *block_depth == u32::MAX || depth > *block_depth {
|
||||||
*block_depth = depth;
|
*block_depth = depth;
|
||||||
stack.push(target);
|
stack.push(target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_ui(a: u32, b: i32) -> u32 {
|
|
||||||
if b < 0 {
|
|
||||||
a - b.wrapping_abs() as u32
|
|
||||||
} else {
|
|
||||||
a + b as u32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ {
|
fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ {
|
||||||
let get_idx = move |i: BlockIdx| blocks.get(i.0 as usize).map(|b| (i, b));
|
let mut next = BlockIdx(0);
|
||||||
std::iter::successors(get_idx(Label(0)), move |(_, b)| get_idx(b.next)) // if b.next is u32::MAX that's the end
|
std::iter::from_fn(move || {
|
||||||
|
if next == BlockIdx::NULL {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let (idx, b) = (next, &blocks[next]);
|
||||||
|
next = b.next;
|
||||||
|
Some((idx, b))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,23 +2,23 @@
|
||||||
source: compiler/codegen/src/compile.rs
|
source: compiler/codegen/src/compile.rs
|
||||||
expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with woohoo():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")"
|
expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with woohoo():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")"
|
||||||
---
|
---
|
||||||
1 0 SetupLoop (69)
|
1 0 SetupLoop
|
||||||
1 LoadNameAny (0, StopIteration)
|
1 LoadNameAny (0, StopIteration)
|
||||||
2 LoadConst ("spam")
|
2 LoadConst ("spam")
|
||||||
3 CallFunctionPositional (1)
|
3 CallFunctionPositional(1)
|
||||||
4 LoadNameAny (1, StopAsyncIteration)
|
4 LoadNameAny (1, StopAsyncIteration)
|
||||||
5 LoadConst ("ham")
|
5 LoadConst ("ham")
|
||||||
6 CallFunctionPositional (1)
|
6 CallFunctionPositional(1)
|
||||||
7 BuildTuple (2, false)
|
7 BuildTuple (2)
|
||||||
8 GetIter
|
8 GetIter
|
||||||
>> 9 ForIter (68)
|
>> 9 ForIter (68)
|
||||||
10 StoreLocal (2, stop_exc)
|
10 StoreLocal (2, stop_exc)
|
||||||
|
|
||||||
2 11 LoadNameAny (3, self)
|
2 11 LoadNameAny (3, self)
|
||||||
12 LoadMethod (subTest)
|
12 LoadMethod (4, subTest)
|
||||||
13 LoadNameAny (5, type)
|
13 LoadNameAny (5, type)
|
||||||
14 LoadNameAny (2, stop_exc)
|
14 LoadNameAny (2, stop_exc)
|
||||||
15 CallFunctionPositional (1)
|
15 CallFunctionPositional(1)
|
||||||
16 LoadConst (("type"))
|
16 LoadConst (("type"))
|
||||||
17 CallMethodKeyword (1)
|
17 CallMethodKeyword (1)
|
||||||
18 SetupWith (65)
|
18 SetupWith (65)
|
||||||
|
@ -27,7 +27,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
|
||||||
3 20 SetupExcept (40)
|
3 20 SetupExcept (40)
|
||||||
|
|
||||||
4 21 LoadNameAny (6, woohoo)
|
4 21 LoadNameAny (6, woohoo)
|
||||||
22 CallFunctionPositional (0)
|
22 CallFunctionPositional(0)
|
||||||
23 BeforeAsyncWith
|
23 BeforeAsyncWith
|
||||||
24 GetAwaitable
|
24 GetAwaitable
|
||||||
25 LoadConst (None)
|
25 LoadConst (None)
|
||||||
|
@ -55,7 +55,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
|
||||||
44 StoreLocal (8, ex)
|
44 StoreLocal (8, ex)
|
||||||
|
|
||||||
7 45 LoadNameAny (3, self)
|
7 45 LoadNameAny (3, self)
|
||||||
46 LoadMethod (assertIs)
|
46 LoadMethod (9, assertIs)
|
||||||
47 LoadNameAny (8, ex)
|
47 LoadNameAny (8, ex)
|
||||||
48 LoadNameAny (2, stop_exc)
|
48 LoadNameAny (2, stop_exc)
|
||||||
49 CallMethodPositional (2)
|
49 CallMethodPositional (2)
|
||||||
|
@ -65,7 +65,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
|
||||||
>> 53 Raise (Reraise)
|
>> 53 Raise (Reraise)
|
||||||
|
|
||||||
9 >> 54 LoadNameAny (3, self)
|
9 >> 54 LoadNameAny (3, self)
|
||||||
55 LoadMethod (fail)
|
55 LoadMethod (10, fail)
|
||||||
56 LoadConst ("")
|
56 LoadConst ("")
|
||||||
57 LoadNameAny (2, stop_exc)
|
57 LoadNameAny (2, stop_exc)
|
||||||
58 FormatValue (None)
|
58 FormatValue (None)
|
||||||
|
@ -80,6 +80,6 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn
|
||||||
66 WithCleanupFinish
|
66 WithCleanupFinish
|
||||||
67 Jump (9)
|
67 Jump (9)
|
||||||
>> 68 PopBlock
|
>> 68 PopBlock
|
||||||
>> 69 LoadConst (None)
|
69 LoadConst (None)
|
||||||
70 ReturnValue
|
70 ReturnValue
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,6 @@ itertools = "0.10.3"
|
||||||
lz4_flex = "0.9.2"
|
lz4_flex = "0.9.2"
|
||||||
num-bigint = { version = "0.4.3", features = ["serde"] }
|
num-bigint = { version = "0.4.3", features = ["serde"] }
|
||||||
num-complex = { version = "0.4.0", features = ["serde"] }
|
num-complex = { version = "0.4.0", features = ["serde"] }
|
||||||
|
num_enum = "0.5.7"
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
static_assertions = "1.1.0"
|
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -6,8 +6,7 @@ use rustpython_parser::{
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use rustpython_codegen::compile::CompileOpts;
|
pub use rustpython_codegen::compile::CompileOpts;
|
||||||
pub use rustpython_compiler_core::CodeObject;
|
pub use rustpython_compiler_core::{BaseError as CompileErrorBody, CodeObject, Mode};
|
||||||
pub use rustpython_compiler_core::{BaseError as CompileErrorBody, Mode};
|
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum CompileErrorType {
|
pub enum CompileErrorType {
|
||||||
|
@ -29,7 +28,7 @@ pub fn compile(
|
||||||
source: &str,
|
source: &str,
|
||||||
mode: compile::Mode,
|
mode: compile::Mode,
|
||||||
source_path: String,
|
source_path: String,
|
||||||
opts: compile::CompileOpts,
|
opts: CompileOpts,
|
||||||
) -> Result<CodeObject, CompileError> {
|
) -> Result<CodeObject, CompileError> {
|
||||||
let mut ast = match parser::parse(source, mode.into(), &source_path) {
|
let mut ast = match parser::parse(source, mode.into(), &source_path) {
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue