Control flow: return and raise (#17121)

We add support for `return` and `raise` statements in the control flow graph: we simply add an edge to the terminal block, push the statements to the current block, and proceed. This implementation will have to be modified somewhat once we add support for `try` statements - then we will need to check whether to _defer_ the jump. But for now this will do! Also in this PR: We fix the `unreachable` diagnostic range so that it lumps together consecutive unreachable blocks.
2025-07-07 21:25:08 +00:00 · 2025-04-03 08:30:29 -05:00 · 2025-04-03 08:30:29 -05:00 · d401a5440e
commit d401a5440e
parent 755ece0c36
7 changed files with 194 additions and 14 deletions
--- a/crates/ruff_linter/resources/test/fixtures/pylint/unreachable.py
+++ b/crates/ruff_linter/resources/test/fixtures/pylint/unreachable.py
@ -12,3 +12,19 @@ def no_control_flow_reachable():
    del c
    def foo():
        return
+
+def after_return():
+    return 1
+    print("unreachable")
+    print("and this")
+
+def after_raise():
+    raise ValueError
+    print("unreachable")
+    print("and this")
+
+def multiple_returns():
+    return 1
+    print("unreachable")
+    return 2
+    print("unreachable range should include above return")
--- a/crates/ruff_linter/src/rules/pylint/rules/unreachable.rs
+++ b/crates/ruff_linter/src/rules/pylint/rules/unreachable.rs
@ -3,7 +3,7 @@ use std::collections::HashSet;
 use itertools::Itertools;
 use ruff_python_ast::{Identifier, Stmt};
 use ruff_python_semantic::cfg::graph::{build_cfg, BlockId, Condition, ControlFlowGraph};
-use ruff_text_size::{Ranged, TextRange};
+use ruff_text_size::TextRange;

 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, ViolationMetadata};
@ -46,19 +46,24 @@ pub(crate) fn in_function(checker: &Checker, name: &Identifier, body: &[Stmt]) {
    let cfg = build_cfg(body);
    let reachable = reachable(&cfg);

-    let mut unreachable = (0..cfg.num_blocks())
+    let mut blocks = (0..cfg.num_blocks())
        .map(BlockId::from_usize)
-        .filter(|block| !reachable.contains(block) && !cfg.stmts(*block).is_empty())
-        .map(|block| cfg.range(block))
-        .sorted_by_key(ruff_text_size::Ranged::start)
+        .filter(|block| !cfg.stmts(*block).is_empty())
+        .sorted_by_key(|block| cfg.range(*block).start())
        .peekable();

-    while let Some(block_range) = unreachable.next() {
-        let start = block_range.start();
-        let mut end = block_range.end();
-        while let Some(next_block) = unreachable.next_if(|nxt| nxt.start() <= end) {
-            end = next_block.end();
+    // Advance past leading reachable blocks
+    while blocks.next_if(|block| reachable.contains(block)).is_some() {}
+
+    while let Some(start_block) = blocks.next() {
+        // Advance to next reachable block
+        let mut end_block = start_block;
+        while let Some(next_block) = blocks.next_if(|block| !reachable.contains(block)) {
+            end_block = next_block;
        }
+        let start = cfg.range(start_block).start();
+        let end = cfg.range(end_block).end();
+
        checker.report_diagnostic(Diagnostic::new(
            UnreachableCode {
                name: name.to_string(),
--- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linterrulespylinttestsPLW0101_unreachable.py.snap
+++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linterrulespylinttestsPLW0101_unreachable.py.snap
@ -1,4 +1,34 @@
 ---
 source: crates/ruff_linter/src/rules/pylint/mod.rs
 ---
+unreachable.py:18:5: PLW0101 Unreachable code in `after_return`
+   |
+16 |   def after_return():
+17 |       return 1
+18 | /     print("unreachable")
+19 | |     print("and this")
+   | |_____________________^ PLW0101
+20 |
+21 |   def after_raise():
+   |

+unreachable.py:23:5: PLW0101 Unreachable code in `after_raise`
+   |
+21 |   def after_raise():
+22 |       raise ValueError
+23 | /     print("unreachable")
+24 | |     print("and this")
+   | |_____________________^ PLW0101
+25 |
+26 |   def multiple_returns():
+   |
+
+unreachable.py:28:5: PLW0101 Unreachable code in `multiple_returns`
+   |
+26 |   def multiple_returns():
+27 |       return 1
+28 | /     print("unreachable")
+29 | |     return 2
+30 | |     print("unreachable range should include above return")
+   | |__________________________________________________________^ PLW0101
+   |
--- a/crates/ruff_python_semantic/resources/test/fixtures/cfg/jumps.py
+++ b/crates/ruff_python_semantic/resources/test/fixtures/cfg/jumps.py
@ -0,0 +1,18 @@
+def only_return():
+    return
+
+def after_return():
+    return 1
+    print("unreachable")
+    print("and this")
+
+def after_raise():
+    raise ValueError
+    print("unreachable")
+    print("and this")
+
+def multiple_returns():
+    return 1
+    print("unreachable")
+    return 2
+    print("and this")
--- a/crates/ruff_python_semantic/src/cfg/graph.rs
+++ b/crates/ruff_python_semantic/src/cfg/graph.rs
@ -192,8 +192,12 @@ impl<'stmt> CFGBuilder<'stmt> {

    /// Runs the core logic for the builder.
    fn process_stmts(&mut self, stmts: &'stmt [Stmt]) {
-        let start = 0;
-        for stmt in stmts {
+        // SAFETY With notation as below, we always maintain the invariant
+        // `start <= end + 1`. Since `end <= stmts.len() -1` we conclude that
+        // `start <= stmts.len()`. It is therefore always safe to use `start` as
+        // the beginning of a range for the purposes of slicing into `stmts`.
+        let mut start = 0;
+        for (end, stmt) in stmts.iter().enumerate() {
            let cache_exit = self.exit();
            match stmt {
                Stmt::FunctionDef(_)
@ -223,10 +227,30 @@ impl<'stmt> CFGBuilder<'stmt> {
                Stmt::With(_) => {}

                // Jumps
-                Stmt::Return(_) => {}
+                Stmt::Return(_) => {
+                    let edges = Edges::always(self.cfg.terminal());
+                    self.set_current_block_stmts(&stmts[start..=end]);
+                    self.set_current_block_edges(edges);
+                    start = end + 1;
+
+                    if stmts.get(start).is_some() {
+                        let next_block = self.new_block();
+                        self.move_to(next_block);
+                    }
+                }
                Stmt::Break(_) => {}
                Stmt::Continue(_) => {}
-                Stmt::Raise(_) => {}
+                Stmt::Raise(_) => {
+                    let edges = Edges::always(self.cfg.terminal());
+                    self.set_current_block_stmts(&stmts[start..=end]);
+                    self.set_current_block_edges(edges);
+                    start = end + 1;
+
+                    if stmts.get(start).is_some() {
+                        let next_block = self.new_block();
+                        self.move_to(next_block);
+                    }
+                }

                // An `assert` is a mixture of a switch and a jump.
                Stmt::Assert(_) => {}
@ -269,6 +293,11 @@ impl<'stmt> CFGBuilder<'stmt> {
        self.current = block;
    }

+    /// Makes new block and returns index
+    fn new_block(&mut self) -> BlockId {
+        self.cfg.blocks.push(BlockData::default())
+    }
+
    /// Populates the current basic block with the given set of statements.
    ///
    /// This should only be called once on any given block.
--- a/crates/ruff_python_semantic/src/cfg/mod.rs
+++ b/crates/ruff_python_semantic/src/cfg/mod.rs
@ -16,6 +16,7 @@ mod tests {
    use test_case::test_case;

    #[test_case("no_flow.py")]
+    #[test_case("jumps.py")]
    fn control_flow_graph(filename: &str) {
        let path = PathBuf::from("resources/test/fixtures/cfg").join(filename);
        let source = fs::read_to_string(path).expect("failed to read file");
--- a/crates/ruff_python_semantic/src/cfg/snapshots/ruff_python_semanticcfgtests__jumps.py.md.snap
+++ b/crates/ruff_python_semantic/src/cfg/snapshots/ruff_python_semanticcfgtests__jumps.py.md.snap
@ -0,0 +1,81 @@
+---
+source: crates/ruff_python_semantic/src/cfg/mod.rs
+description: "This is a Mermaid graph. You can use https://mermaid.live to visualize it as a diagram."
+---
+## Function 0
+### Source
+```python
+def only_return():
+    return
+```
+
+### Control Flow Graph
+```mermaid
+flowchart TD
+	node0["return"]
+	node1((("EXIT")))
+	node0==>node1
+```
+
+## Function 1
+### Source
+```python
+def after_return():
+    return 1
+    print("unreachable")
+    print("and this")
+```
+
+### Control Flow Graph
+```mermaid
+flowchart TD
+	node0["return 1"]
+	node1((("EXIT")))
+	node2["print(#quot;unreachable#quot;)
+print(#quot;and this#quot;)"]
+	node0==>node1
+	node2==>node1
+```
+
+## Function 2
+### Source
+```python
+def after_raise():
+    raise ValueError
+    print("unreachable")
+    print("and this")
+```
+
+### Control Flow Graph
+```mermaid
+flowchart TD
+	node0["raise ValueError"]
+	node1((("EXIT")))
+	node2["print(#quot;unreachable#quot;)
+print(#quot;and this#quot;)"]
+	node0==>node1
+	node2==>node1
+```
+
+## Function 3
+### Source
+```python
+def multiple_returns():
+    return 1
+    print("unreachable")
+    return 2
+    print("and this")
+```
+
+### Control Flow Graph
+```mermaid
+flowchart TD
+	node0["return 1"]
+	node1((("EXIT")))
+	node2["print(#quot;unreachable#quot;)
+return 2"]
+	node3["print(#quot;and this#quot;)"]
+	node0==>node1
+	node2==>node1
+	node3==>node1
+```