Control flow graph: setup (#17064)

This PR contains the scaffolding for a new control flow graph
implementation, along with its application to the `unreachable` rule. At
the moment, the implementation is a maximal over-approximation: no
control flow is modeled and all statements are counted as reachable.
With each additional statement type we support, this approximation will
improve.

So this PR just contains:
- A `ControlFlowGraph` struct and builder
- Support for printing the flow graph as a Mermaid graph
- Snapshot tests for the actual graphs
- (a very bad!) reimplementation of `unreachable` using the new structs
- Snapshot tests for `unreachable`

# Instructions for Viewing Mermaid snapshots
Unfortunately I don't know how to convince GitHub to render the Mermaid
graphs in the snapshots. However, you can view these locally in VSCode
if you install an extension that supports Mermaid graphs in Markdown,
and then add this to your `settings.json`:

```json
  "files.associations": {
"*.md.snap": "markdown",
  }
  ```
This commit is contained in:
Dylan 2025-04-01 05:53:42 -05:00 committed by GitHub
parent 0073fd4945
commit aa93005d8d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 775 additions and 6244 deletions

View file

@ -27,6 +27,8 @@ serde = { workspace = true, optional = true }
smallvec = { workspace = true }
[dev-dependencies]
insta = { workspace = true, features = ["filters", "json", "redactions"] }
test-case = { workspace = true }
ruff_python_parser = { workspace = true }
[lints]

View file

@ -0,0 +1,24 @@
def func(): ...
def func():
pass
def func():
x = 1
x = 2
def func():
foo()
def func():
from foo import bar
class C:
a = 1
c = C()
del c

View file

@ -0,0 +1,293 @@
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::Stmt;
use ruff_text_size::{Ranged, TextRange};
use smallvec::{smallvec, SmallVec};
/// Returns the control flow graph associated to an array of statements
pub fn build_cfg(stmts: &[Stmt]) -> ControlFlowGraph<'_> {
let mut builder = CFGBuilder::with_capacity(stmts.len());
builder.process_stmts(stmts);
builder.finish()
}
/// Control flow graph
#[derive(Debug)]
pub struct ControlFlowGraph<'stmt> {
/// Basic blocks - the nodes of the control flow graph
blocks: IndexVec<BlockId, BlockData<'stmt>>,
/// Entry point to the control flow graph
initial: BlockId,
/// Terminal block - will always be empty
terminal: BlockId,
}
impl<'stmt> ControlFlowGraph<'stmt> {
/// Index of entry point to the control flow graph
pub fn initial(&self) -> BlockId {
self.initial
}
/// Index of terminal block
pub fn terminal(&self) -> BlockId {
self.terminal
}
/// Number of basic blocks, or nodes, in the graph
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
/// Returns the statements comprising the basic block at the given index
pub fn stmts(&self, block: BlockId) -> &'stmt [Stmt] {
self.blocks[block].stmts
}
/// Returns the range of the statements comprising the basic block at the given index
pub fn range(&self, block: BlockId) -> TextRange {
self.blocks[block].range()
}
/// Returns the [`Edges`] going out of the basic block at the given index
pub fn outgoing(&self, block: BlockId) -> &Edges {
&self.blocks[block].out
}
/// Returns an iterator over the indices of the direct predecessors of the block at the given index
pub fn predecessors(&self, block: BlockId) -> impl ExactSizeIterator<Item = BlockId> + '_ {
self.blocks[block].parents.iter().copied()
}
/// Returns the [`BlockKind`] of the block at the given index
pub(crate) fn kind(&self, block: BlockId) -> BlockKind {
self.blocks[block].kind
}
}
#[newtype_index]
pub struct BlockId;
/// Holds the data of a basic block. A basic block consists of a collection of
/// [`Stmt`]s, together with outgoing edges to other basic blocks.
#[derive(Debug, Default)]
struct BlockData<'stmt> {
kind: BlockKind,
/// Slice of statements regarded as executing unconditionally in order
stmts: &'stmt [Stmt],
/// Outgoing edges, indicating possible paths of execution after the
/// block has concluded
out: Edges,
/// Collection of indices for basic blocks having the current
/// block as the target of an edge
parents: SmallVec<[BlockId; 2]>,
}
impl Ranged for BlockData<'_> {
fn range(&self) -> TextRange {
let Some(first) = self.stmts.first() else {
return TextRange::default();
};
let Some(last) = self.stmts.last() else {
return TextRange::default();
};
TextRange::new(first.start(), last.end())
}
}
#[derive(Debug, Default, Clone, Copy)]
pub(crate) enum BlockKind {
#[default]
Generic,
/// Entry point of the control flow graph
Start,
/// Terminal block for the control flow graph
Terminal,
}
/// Holds a collection of edges. Each edge is determined by:
/// - a [`Condition`] for traversing the edge, and
/// - a target block, specified by its [`BlockId`].
///
/// The conditions and targets are kept in two separate
/// vectors which must always be kept the same length.
#[derive(Debug, Default, Clone)]
pub struct Edges {
conditions: SmallVec<[Condition; 4]>,
targets: SmallVec<[BlockId; 4]>,
}
impl Edges {
/// Creates an unconditional edge to the target block
fn always(target: BlockId) -> Self {
Self {
conditions: smallvec![Condition::Always],
targets: smallvec![target],
}
}
/// Returns iterator over indices of blocks targeted by given edges
pub fn targets(&self) -> impl ExactSizeIterator<Item = BlockId> + '_ {
self.targets.iter().copied()
}
/// Returns iterator over [`Condition`]s which must be satisfied to traverse corresponding edge
pub fn conditions(&self) -> impl ExactSizeIterator<Item = &Condition> {
self.conditions.iter()
}
fn is_empty(&self) -> bool {
self.targets.is_empty()
}
pub fn filter_targets_by_conditions<'a, T: FnMut(&Condition) -> bool + 'a>(
&'a self,
mut predicate: T,
) -> impl Iterator<Item = BlockId> + 'a {
self.conditions()
.zip(self.targets())
.filter(move |(cond, _)| predicate(cond))
.map(|(_, block)| block)
}
}
/// Represents a condition to be tested in a multi-way branch
#[derive(Debug, Clone)]
pub enum Condition {
/// Unconditional edge
Always,
}
struct CFGBuilder<'stmt> {
/// Control flow graph under construction
cfg: ControlFlowGraph<'stmt>,
/// Current basic block index
current: BlockId,
/// Exit block index for current control flow
exit: BlockId,
}
impl<'stmt> CFGBuilder<'stmt> {
/// Returns [`CFGBuilder`] with vector of blocks initialized at given capacity and with both initial and terminal blocks populated.
fn with_capacity(capacity: usize) -> Self {
let mut blocks = IndexVec::with_capacity(capacity);
let initial = blocks.push(BlockData {
kind: BlockKind::Start,
..BlockData::default()
});
let terminal = blocks.push(BlockData {
kind: BlockKind::Terminal,
..BlockData::default()
});
Self {
cfg: ControlFlowGraph {
blocks,
initial,
terminal,
},
current: initial,
exit: terminal,
}
}
/// Runs the core logic for the builder.
fn process_stmts(&mut self, stmts: &'stmt [Stmt]) {
let start = 0;
for stmt in stmts {
let cache_exit = self.exit();
match stmt {
Stmt::FunctionDef(_)
| Stmt::ClassDef(_)
| Stmt::Assign(_)
| Stmt::AugAssign(_)
| Stmt::AnnAssign(_)
| Stmt::TypeAlias(_)
| Stmt::Import(_)
| Stmt::ImportFrom(_)
| Stmt::Global(_)
| Stmt::Nonlocal(_)
| Stmt::Expr(_)
| Stmt::Pass(_)
| Stmt::Delete(_)
| Stmt::IpyEscapeCommand(_) => {}
// Loops
Stmt::While(_) => {}
Stmt::For(_) => {}
// Switch statements
Stmt::If(_) => {}
Stmt::Match(_) => {}
// Exception handling statements
Stmt::Try(_) => {}
Stmt::With(_) => {}
// Jumps
Stmt::Return(_) => {}
Stmt::Break(_) => {}
Stmt::Continue(_) => {}
Stmt::Raise(_) => {}
// An `assert` is a mixture of a switch and a jump.
Stmt::Assert(_) => {}
}
// Restore exit
self.update_exit(cache_exit);
}
// It can happen that we have statements left over
// and not yet occupying a block. In that case,
// `self.current` should be pointing to an empty block
// and we push the remaining statements to it here.
if start < stmts.len() {
self.set_current_block_stmts(&stmts[start..]);
}
// Add edge to exit if not already present
if self.cfg.blocks[self.current].out.is_empty() {
let edges = Edges::always(self.exit());
self.set_current_block_edges(edges);
}
self.move_to(self.exit());
}
/// Returns finished control flow graph
fn finish(self) -> ControlFlowGraph<'stmt> {
self.cfg
}
/// Current exit block, which may change during construction
fn exit(&self) -> BlockId {
self.exit
}
/// Point the current exit to block at provided index
fn update_exit(&mut self, new_exit: BlockId) {
self.exit = new_exit;
}
/// Moves current block to provided index
fn move_to(&mut self, block: BlockId) {
self.current = block;
}
/// Populates the current basic block with the given set of statements.
///
/// This should only be called once on any given block.
fn set_current_block_stmts(&mut self, stmts: &'stmt [Stmt]) {
debug_assert!(
self.cfg.blocks[self.current].stmts.is_empty(),
"Attempting to set statements on an already populated basic block."
);
self.cfg.blocks[self.current].stmts = stmts;
}
/// Draws provided edges out of the current basic block.
///
/// This should only be called once on any given block.
fn set_current_block_edges(&mut self, edges: Edges) {
debug_assert!(
self.cfg.blocks[self.current].out.is_empty(),
"Attempting to set edges on a basic block that already has an outgoing edge."
);
self.cfg.blocks[self.current].out = edges;
}
}

View file

@ -0,0 +1,61 @@
pub mod graph;
pub mod visualize;
#[cfg(test)]
mod tests {
use std::fmt::Write;
use std::fs;
use std::path::PathBuf;
use crate::cfg::graph::build_cfg;
use crate::cfg::visualize::draw_cfg;
use insta;
use ruff_python_parser::parse_module;
use ruff_text_size::Ranged;
use test_case::test_case;
#[test_case("no_flow.py")]
fn control_flow_graph(filename: &str) {
let path = PathBuf::from("resources/test/fixtures/cfg").join(filename);
let source = fs::read_to_string(path).expect("failed to read file");
let stmts = parse_module(&source)
.unwrap_or_else(|err| panic!("failed to parse source: '{source}': {err}"))
.into_suite();
let mut output = String::new();
for (i, stmt) in stmts.into_iter().enumerate() {
let func = stmt.as_function_def_stmt().expect(
"Snapshot test for control flow graph should consist only of function definitions",
);
let cfg = build_cfg(&func.body);
let mermaid_graph = draw_cfg(cfg, &source);
writeln!(
output,
"## Function {}\n\
### Source\n\
```python\n\
{}\n\
```\n\n\
### Control Flow Graph\n\
```mermaid\n\
{}\n\
```\n",
i,
&source[func.range()],
mermaid_graph,
)
.unwrap();
}
insta::with_settings!({
omit_expression => true,
input_file => filename,
description => "This is a Mermaid graph. You can use https://mermaid.live to visualize it as a diagram."
}, {
insta::assert_snapshot!(format!("{filename}.md"), output);
});
}
}

View file

@ -0,0 +1,89 @@
---
source: crates/ruff_python_semantic/src/cfg/mod.rs
description: "This is a Mermaid graph. You can use https://mermaid.live to visualize it as a diagram."
---
## Function 0
### Source
```python
def func(): ...
```
### Control Flow Graph
```mermaid
flowchart TD
node0["..."]
node1((("EXIT")))
node0==>node1
```
## Function 1
### Source
```python
def func():
pass
```
### Control Flow Graph
```mermaid
flowchart TD
node0["pass"]
node1((("EXIT")))
node0==>node1
```
## Function 2
### Source
```python
def func():
x = 1
x = 2
```
### Control Flow Graph
```mermaid
flowchart TD
node0["x = 1
x = 2"]
node1((("EXIT")))
node0==>node1
```
## Function 3
### Source
```python
def func():
foo()
```
### Control Flow Graph
```mermaid
flowchart TD
node0["foo()"]
node1((("EXIT")))
node0==>node1
```
## Function 4
### Source
```python
def func():
from foo import bar
class C:
a = 1
c = C()
del c
```
### Control Flow Graph
```mermaid
flowchart TD
node0["from foo import bar
class C:
a = 1
c = C()
del c"]
node1((("EXIT")))
node0==>node1
```

View file

@ -0,0 +1,244 @@
//! Heavily inspired by rustc data structures
use ruff_index::Idx;
use ruff_text_size::Ranged;
use std::fmt::{self, Display};
use crate::cfg::graph::{BlockId, BlockKind, Condition, ControlFlowGraph};
/// Returns control flow graph in Mermaid syntax.
pub fn draw_cfg(graph: ControlFlowGraph, source: &str) -> String {
CFGWithSource::new(graph, source).draw_graph()
}
trait MermaidGraph<'a>: DirectedGraph<'a> {
fn draw_node(&self, node: Self::Node) -> MermaidNode;
fn draw_edges(&self, node: Self::Node) -> impl Iterator<Item = (Self::Node, MermaidEdge)>;
fn draw_graph(&self) -> String {
let mut graph = Vec::new();
// Begin mermaid graph.
graph.push("flowchart TD".to_string());
// Draw nodes
let num_nodes = self.num_nodes();
for idx in 0..num_nodes {
let node = Self::Node::new(idx);
graph.push(format!("\tnode{}{}", idx, &self.draw_node(node)));
}
// Draw edges
for idx in 0..num_nodes {
graph.extend(
self.draw_edges(Self::Node::new(idx))
.map(|(end_idx, edge)| format!("\tnode{}{}node{}", idx, edge, end_idx.index())),
);
}
graph.join("\n")
}
}
pub struct MermaidNode {
shape: MermaidNodeShape,
content: String,
}
impl MermaidNode {
pub fn with_content(content: String) -> Self {
Self {
shape: MermaidNodeShape::default(),
content,
}
}
fn mermaid_write_quoted_str(f: &mut fmt::Formatter<'_>, value: &str) -> fmt::Result {
let mut parts = value.split('"');
if let Some(v) = parts.next() {
write!(f, "{v}")?;
}
for v in parts {
write!(f, "#quot;{v}")?;
}
Ok(())
}
}
impl Display for MermaidNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (open, close) = self.shape.open_close();
write!(f, "{open}\"")?;
if self.content.is_empty() {
write!(f, "empty")?;
} else {
MermaidNode::mermaid_write_quoted_str(f, &self.content)?;
}
write!(f, "\"{close}")
}
}
#[derive(Debug, Default)]
pub enum MermaidNodeShape {
#[default]
Rectangle,
DoubleRectangle,
RoundedRectangle,
Stadium,
Circle,
DoubleCircle,
Asymmetric,
Rhombus,
Hexagon,
Parallelogram,
Trapezoid,
}
impl MermaidNodeShape {
fn open_close(&self) -> (&'static str, &'static str) {
match self {
Self::Rectangle => ("[", "]"),
Self::DoubleRectangle => ("[[", "]]"),
Self::RoundedRectangle => ("(", ")"),
Self::Stadium => ("([", "])"),
Self::Circle => ("((", "))"),
Self::DoubleCircle => ("(((", ")))"),
Self::Asymmetric => (">", "]"),
Self::Rhombus => ("{", "}"),
Self::Hexagon => ("{{", "}}"),
Self::Parallelogram => ("[/", "/]"),
Self::Trapezoid => ("[/", "\\]"),
}
}
}
#[derive(Debug, Default)]
pub struct MermaidEdge {
kind: MermaidEdgeKind,
content: String,
}
impl Display for MermaidEdge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.content.is_empty() {
write!(f, "{}", self.kind)
} else {
write!(f, "{}|\"{}\"|", self.kind, self.content)
}
}
}
#[derive(Debug, Default)]
pub enum MermaidEdgeKind {
#[default]
Arrow,
DottedArrow,
ThickArrow,
BidirectionalArrow,
}
impl Display for MermaidEdgeKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
MermaidEdgeKind::Arrow => write!(f, "-->"),
MermaidEdgeKind::DottedArrow => write!(f, "-..->"),
MermaidEdgeKind::ThickArrow => write!(f, "==>"),
MermaidEdgeKind::BidirectionalArrow => write!(f, "<-->"),
}
}
}
pub trait DirectedGraph<'a> {
type Node: Idx;
fn num_nodes(&self) -> usize;
fn start_node(&self) -> Self::Node;
fn successors(&self, node: Self::Node) -> impl ExactSizeIterator<Item = Self::Node> + '_;
}
struct CFGWithSource<'stmt> {
cfg: ControlFlowGraph<'stmt>,
source: &'stmt str,
}
impl<'stmt> CFGWithSource<'stmt> {
fn new(cfg: ControlFlowGraph<'stmt>, source: &'stmt str) -> Self {
Self { cfg, source }
}
}
impl<'stmt> DirectedGraph<'stmt> for CFGWithSource<'stmt> {
type Node = BlockId;
fn num_nodes(&self) -> usize {
self.cfg.num_blocks()
}
fn start_node(&self) -> Self::Node {
self.cfg.initial()
}
fn successors(&self, node: Self::Node) -> impl ExactSizeIterator<Item = Self::Node> + '_ {
self.cfg.outgoing(node).targets()
}
}
impl<'stmt> MermaidGraph<'stmt> for CFGWithSource<'stmt> {
fn draw_node(&self, node: Self::Node) -> MermaidNode {
let statements: Vec<String> = self
.cfg
.stmts(node)
.iter()
.map(|stmt| self.source[stmt.range()].to_string())
.collect();
let content = match self.cfg.kind(node) {
BlockKind::Generic => {
if statements.is_empty() {
"EMPTY".to_string()
} else {
statements.join("\n")
}
}
BlockKind::Start => {
if statements.is_empty() {
"START".to_string()
} else {
statements.join("\n")
}
}
BlockKind::Terminal => {
return MermaidNode {
content: "EXIT".to_string(),
shape: MermaidNodeShape::DoubleCircle,
}
}
};
MermaidNode::with_content(content)
}
fn draw_edges(&self, node: Self::Node) -> impl Iterator<Item = (Self::Node, MermaidEdge)> {
let edge_data = self.cfg.outgoing(node);
edge_data
.targets()
.zip(edge_data.conditions())
.map(|(target, condition)| {
let edge = match condition {
Condition::Always => {
if target == self.cfg.terminal() {
MermaidEdge {
kind: MermaidEdgeKind::ThickArrow,
content: String::new(),
}
} else {
MermaidEdge {
kind: MermaidEdgeKind::Arrow,
content: String::new(),
}
}
}
};
(target, edge)
})
.collect::<Vec<_>>()
.into_iter()
}
}

View file

@ -1,6 +1,7 @@
pub mod analyze;
mod binding;
mod branches;
pub mod cfg;
mod context;
mod definition;
mod globals;