[red-knot] Statically known branches (#15019)

## Summary

This changeset adds support for precise type-inference and
boundness-handling of definitions inside control-flow branches with
statically-known conditions, i.e. test-expressions whose truthiness we
can unambiguously infer as *always false* or *always true*.

This branch also includes:
- `sys.platform` support
- statically-known branches handling for Boolean expressions and while
  loops
- new `target-version` requirements in some Markdown tests which were
  now required due to the understanding of `sys.version_info` branches.

closes #12700 
closes #15034 

## Performance

### `tomllib`, -7%, needs to resolve one additional module (sys)

| Command | Mean [ms] | Min [ms] | Max [ms] | Relative |
|:---|---:|---:|---:|---:|
| `./red_knot_main --project /home/shark/tomllib` | 22.2 ± 1.3 | 19.1 |
25.6 | 1.00 |
| `./red_knot_feature --project /home/shark/tomllib` | 23.8 ± 1.6 | 20.8
| 28.6 | 1.07 ± 0.09 |

### `black`, -6%

| Command | Mean [ms] | Min [ms] | Max [ms] | Relative |
|:---|---:|---:|---:|---:|
| `./red_knot_main --project /home/shark/black` | 129.3 ± 5.1 | 119.0 |
137.8 | 1.00 |
| `./red_knot_feature --project /home/shark/black` | 136.5 ± 6.8 | 123.8
| 147.5 | 1.06 ± 0.07 |

## Test Plan

- New Markdown tests for the main feature in
  `statically-known-branches.md`
- New Markdown tests for `sys.platform`
- Adapted tests for `EllipsisType`, `Never`, etc
This commit is contained in:
David Peter 2024-12-21 11:33:10 +01:00 committed by GitHub
parent d3f51cf3a6
commit 000948ad3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
39 changed files with 3187 additions and 632 deletions

View file

@ -6,15 +6,16 @@ use rustc_hash::{FxHashMap, FxHashSet};
use ruff_db::files::File;
use ruff_db::parsed::ParsedModule;
use ruff_index::IndexVec;
use ruff_python_ast as ast;
use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_pattern, walk_stmt, Visitor};
use ruff_python_ast::{self as ast, Pattern};
use ruff_python_ast::{BoolOp, Expr};
use crate::ast_node_ref::AstNodeRef;
use crate::module_name::ModuleName;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIdsBuilder;
use crate::semantic_index::constraint::PatternConstraintKind;
use crate::semantic_index::definition::{
AssignmentDefinitionNodeRef, ComprehensionDefinitionNodeRef, Definition, DefinitionNodeKey,
DefinitionNodeRef, ForStmtDefinitionNodeRef, ImportFromDefinitionNodeRef,
@ -24,9 +25,12 @@ use crate::semantic_index::symbol::{
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId,
SymbolTableBuilder,
};
use crate::semantic_index::use_def::{FlowSnapshot, UseDefMapBuilder};
use crate::semantic_index::use_def::{
FlowSnapshot, ScopedConstraintId, ScopedVisibilityConstraintId, UseDefMapBuilder,
};
use crate::semantic_index::SemanticIndex;
use crate::unpack::Unpack;
use crate::visibility_constraints::VisibilityConstraint;
use crate::Db;
use super::constraint::{Constraint, ConstraintNode, PatternConstraint};
@ -285,10 +289,6 @@ impl<'db> SemanticIndexBuilder<'db> {
constraint
}
fn record_constraint(&mut self, constraint: Constraint<'db>) {
self.current_use_def_map_mut().record_constraint(constraint);
}
fn build_constraint(&mut self, constraint_node: &Expr) -> Constraint<'db> {
let expression = self.add_standalone_expression(constraint_node);
Constraint {
@ -297,12 +297,89 @@ impl<'db> SemanticIndexBuilder<'db> {
}
}
fn record_negated_constraint(&mut self, constraint: Constraint<'db>) {
/// Adds a new constraint to the list of all constraints, but does not record it. Returns the
/// constraint ID for later recording using [`SemanticIndexBuilder::record_constraint_id`].
fn add_constraint(&mut self, constraint: Constraint<'db>) -> ScopedConstraintId {
self.current_use_def_map_mut().add_constraint(constraint)
}
/// Negates a constraint and adds it to the list of all constraints, does not record it.
fn add_negated_constraint(
&mut self,
constraint: Constraint<'db>,
) -> (Constraint<'db>, ScopedConstraintId) {
let negated = Constraint {
node: constraint.node,
is_positive: false,
};
let id = self.current_use_def_map_mut().add_constraint(negated);
(negated, id)
}
/// Records a previously added constraint by adding it to all live bindings.
fn record_constraint_id(&mut self, constraint: ScopedConstraintId) {
self.current_use_def_map_mut()
.record_constraint(Constraint {
node: constraint.node,
is_positive: false,
});
.record_constraint_id(constraint);
}
/// Adds and records a constraint, i.e. adds it to all live bindings.
fn record_constraint(&mut self, constraint: Constraint<'db>) {
self.current_use_def_map_mut().record_constraint(constraint);
}
/// Negates the given constraint and then adds it to all live bindings.
fn record_negated_constraint(&mut self, constraint: Constraint<'db>) -> ScopedConstraintId {
let (_, id) = self.add_negated_constraint(constraint);
self.record_constraint_id(id);
id
}
/// Adds a new visibility constraint, but does not record it. Returns the constraint ID
/// for later recording using [`SemanticIndexBuilder::record_visibility_constraint_id`].
fn add_visibility_constraint(
&mut self,
constraint: VisibilityConstraint<'db>,
) -> ScopedVisibilityConstraintId {
self.current_use_def_map_mut()
.add_visibility_constraint(constraint)
}
/// Records a previously added visibility constraint by applying it to all live bindings
/// and declarations.
fn record_visibility_constraint_id(&mut self, constraint: ScopedVisibilityConstraintId) {
self.current_use_def_map_mut()
.record_visibility_constraint_id(constraint);
}
/// Negates the given visibility constraint and then adds it to all live bindings and declarations.
fn record_negated_visibility_constraint(
&mut self,
constraint: ScopedVisibilityConstraintId,
) -> ScopedVisibilityConstraintId {
self.current_use_def_map_mut()
.record_visibility_constraint(VisibilityConstraint::VisibleIfNot(constraint))
}
/// Records a visibility constraint by applying it to all live bindings and declarations.
fn record_visibility_constraint(
&mut self,
constraint: Constraint<'db>,
) -> ScopedVisibilityConstraintId {
self.current_use_def_map_mut()
.record_visibility_constraint(VisibilityConstraint::VisibleIf(constraint))
}
/// Records a [`VisibilityConstraint::Ambiguous`] constraint.
fn record_ambiguous_visibility(&mut self) -> ScopedVisibilityConstraintId {
self.current_use_def_map_mut()
.record_visibility_constraint(VisibilityConstraint::Ambiguous)
}
/// Simplifies (resets) visibility constraints on all live bindings and declarations that did
/// not see any new definitions since the given snapshot.
fn simplify_visibility_constraints(&mut self, snapshot: FlowSnapshot) {
self.current_use_def_map_mut()
.simplify_visibility_constraints(snapshot);
}
fn push_assignment(&mut self, assignment: CurrentAssignment<'db>) {
@ -324,30 +401,37 @@ impl<'db> SemanticIndexBuilder<'db> {
fn add_pattern_constraint(
&mut self,
subject: &ast::Expr,
subject: Expression<'db>,
pattern: &ast::Pattern,
) -> PatternConstraint<'db> {
#[allow(unsafe_code)]
let (subject, pattern) = unsafe {
(
AstNodeRef::new(self.module.clone(), subject),
AstNodeRef::new(self.module.clone(), pattern),
)
guard: Option<&ast::Expr>,
) -> Constraint<'db> {
let guard = guard.map(|guard| self.add_standalone_expression(guard));
let kind = match pattern {
Pattern::MatchValue(pattern) => {
let value = self.add_standalone_expression(&pattern.value);
PatternConstraintKind::Value(value, guard)
}
Pattern::MatchSingleton(singleton) => {
PatternConstraintKind::Singleton(singleton.value, guard)
}
_ => PatternConstraintKind::Unsupported,
};
let pattern_constraint = PatternConstraint::new(
self.db,
self.file,
self.current_scope(),
subject,
pattern,
kind,
countme::Count::default(),
);
self.current_use_def_map_mut()
.record_constraint(Constraint {
node: ConstraintNode::Pattern(pattern_constraint),
is_positive: true,
});
pattern_constraint
let constraint = Constraint {
node: ConstraintNode::Pattern(pattern_constraint),
is_positive: true,
};
self.current_use_def_map_mut().record_constraint(constraint);
constraint
}
/// Record an expression that needs to be a Salsa ingredient, because we need to infer its type
@ -799,6 +883,10 @@ where
let constraint = self.record_expression_constraint(&node.test);
let mut constraints = vec![constraint];
self.visit_body(&node.body);
let visibility_constraint_id = self.record_visibility_constraint(constraint);
let mut vis_constraints = vec![visibility_constraint_id];
let mut post_clauses: Vec<FlowSnapshot> = vec![];
let elif_else_clauses = node
.elif_else_clauses
@ -825,15 +913,31 @@ where
for constraint in &constraints {
self.record_negated_constraint(*constraint);
}
if let Some(elif_test) = clause_test {
let elif_constraint = if let Some(elif_test) = clause_test {
self.visit_expr(elif_test);
constraints.push(self.record_expression_constraint(elif_test));
}
let constraint = self.record_expression_constraint(elif_test);
constraints.push(constraint);
Some(constraint)
} else {
None
};
self.visit_body(clause_body);
for id in &vis_constraints {
self.record_negated_visibility_constraint(*id);
}
if let Some(elif_constraint) = elif_constraint {
let id = self.record_visibility_constraint(elif_constraint);
vis_constraints.push(id);
}
}
for post_clause_state in post_clauses {
self.flow_merge(post_clause_state);
}
self.simplify_visibility_constraints(pre_if);
}
ast::Stmt::While(ast::StmtWhile {
test,
@ -856,6 +960,8 @@ where
self.visit_body(body);
self.set_inside_loop(outer_loop_state);
let vis_constraint_id = self.record_visibility_constraint(constraint);
// Get the break states from the body of this loop, and restore the saved outer
// ones.
let break_states =
@ -863,15 +969,21 @@ where
// We may execute the `else` clause without ever executing the body, so merge in
// the pre-loop state before visiting `else`.
self.flow_merge(pre_loop);
self.flow_merge(pre_loop.clone());
self.record_negated_constraint(constraint);
self.visit_body(orelse);
self.record_negated_visibility_constraint(vis_constraint_id);
// Breaking out of a while loop bypasses the `else` clause, so merge in the break
// states after visiting `else`.
for break_state in break_states {
self.flow_merge(break_state);
let snapshot = self.flow_snapshot();
self.flow_restore(break_state);
self.record_visibility_constraint(constraint);
self.flow_merge(snapshot);
}
self.simplify_visibility_constraints(pre_loop);
}
ast::Stmt::With(ast::StmtWith {
items,
@ -912,6 +1024,8 @@ where
self.add_standalone_expression(iter);
self.visit_expr(iter);
self.record_ambiguous_visibility();
let pre_loop = self.flow_snapshot();
let saved_break_states = std::mem::take(&mut self.loop_break_states);
@ -947,32 +1061,63 @@ where
cases,
range: _,
}) => {
self.add_standalone_expression(subject);
let subject_expr = self.add_standalone_expression(subject);
self.visit_expr(subject);
let after_subject = self.flow_snapshot();
let Some((first, remaining)) = cases.split_first() else {
return;
};
self.add_pattern_constraint(subject, &first.pattern);
let first_constraint_id = self.add_pattern_constraint(
subject_expr,
&first.pattern,
first.guard.as_deref(),
);
self.visit_match_case(first);
let first_vis_constraint_id =
self.record_visibility_constraint(first_constraint_id);
let mut vis_constraints = vec![first_vis_constraint_id];
let mut post_case_snapshots = vec![];
for case in remaining {
post_case_snapshots.push(self.flow_snapshot());
self.flow_restore(after_subject.clone());
self.add_pattern_constraint(subject, &case.pattern);
let constraint_id = self.add_pattern_constraint(
subject_expr,
&case.pattern,
case.guard.as_deref(),
);
self.visit_match_case(case);
for id in &vis_constraints {
self.record_negated_visibility_constraint(*id);
}
let vis_constraint_id = self.record_visibility_constraint(constraint_id);
vis_constraints.push(vis_constraint_id);
}
for post_clause_state in post_case_snapshots {
self.flow_merge(post_clause_state);
}
// If there is no final wildcard match case, pretend there is one. This is similar to how
// we add an implicit `else` block in if-elif chains, in case it's not present.
if !cases
.last()
.is_some_and(|case| case.guard.is_none() && case.pattern.is_wildcard())
{
self.flow_merge(after_subject);
post_case_snapshots.push(self.flow_snapshot());
self.flow_restore(after_subject.clone());
for id in &vis_constraints {
self.record_negated_visibility_constraint(*id);
}
}
for post_clause_state in post_case_snapshots {
self.flow_merge(post_clause_state);
}
self.simplify_visibility_constraints(after_subject);
}
ast::Stmt::Try(ast::StmtTry {
body,
@ -982,6 +1127,8 @@ where
is_star,
range: _,
}) => {
self.record_ambiguous_visibility();
// Save the state prior to visiting any of the `try` block.
//
// Potentially none of the `try` block could have been executed prior to executing
@ -1222,19 +1369,19 @@ where
ast::Expr::If(ast::ExprIf {
body, test, orelse, ..
}) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression for
// later checking)
self.visit_expr(test);
let pre_if = self.flow_snapshot();
let constraint = self.record_expression_constraint(test);
self.visit_expr(body);
let visibility_constraint = self.record_visibility_constraint(constraint);
let post_body = self.flow_snapshot();
self.flow_restore(pre_if);
self.flow_restore(pre_if.clone());
self.record_negated_constraint(constraint);
self.visit_expr(orelse);
self.record_negated_visibility_constraint(visibility_constraint);
self.flow_merge(post_body);
self.simplify_visibility_constraints(pre_if);
}
ast::Expr::ListComp(
list_comprehension @ ast::ExprListComp {
@ -1291,27 +1438,55 @@ where
range: _,
op,
}) => {
// TODO detect statically known truthy or falsy values (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression for
// later checking)
let pre_op = self.flow_snapshot();
let mut snapshots = vec![];
let mut visibility_constraints = vec![];
for (index, value) in values.iter().enumerate() {
self.visit_expr(value);
// In the last value we don't need to take a snapshot nor add a constraint
for vid in &visibility_constraints {
self.record_visibility_constraint_id(*vid);
}
// For the last value, we don't need to model control flow. There is short-circuiting
// anymore.
if index < values.len() - 1 {
// Snapshot is taken after visiting the expression but before adding the constraint.
snapshots.push(self.flow_snapshot());
let constraint = self.build_constraint(value);
match op {
BoolOp::And => self.record_constraint(constraint),
BoolOp::Or => self.record_negated_constraint(constraint),
let (constraint, constraint_id) = match op {
BoolOp::And => (constraint, self.add_constraint(constraint)),
BoolOp::Or => self.add_negated_constraint(constraint),
};
let visibility_constraint = self
.add_visibility_constraint(VisibilityConstraint::VisibleIf(constraint));
let after_expr = self.flow_snapshot();
// We first model the short-circuiting behavior. We take the short-circuit
// path here if all of the previous short-circuit paths were not taken, so
// we record all previously existing visibility constraints, and negate the
// one for the current expression.
for vid in &visibility_constraints {
self.record_visibility_constraint_id(*vid);
}
self.record_negated_visibility_constraint(visibility_constraint);
snapshots.push(self.flow_snapshot());
// Then we model the non-short-circuiting behavior. Here, we need to delay
// the application of the visibility constraint until after the expression
// has been evaluated, so we only push it onto the stack here.
self.flow_restore(after_expr);
self.record_constraint_id(constraint_id);
visibility_constraints.push(visibility_constraint);
}
}
for snapshot in snapshots {
self.flow_merge(snapshot);
}
self.simplify_visibility_constraints(pre_op);
}
_ => {
walk_expr(self, expr);