mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-22 19:34:23 +00:00
Prefer expanding parenthesized expressions before operands
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary This PR implements Black's behavior where it first splits off parenthesized expressions before splitting before operands to avoid unnecessary parentheses: ```python # We want if a + [ b, c ]: pass # Rather than if ( a + [b, c] ): pass ``` This is implemented by using the new IR elements introduced in #5596. * We give the group wrapping the optional parentheses an ID (`parentheses_id`) * We use `conditional_group` for the lower priority groups (all non-parenthesized expressions) with the condition that the `parentheses_id` group breaks (we want to split before operands only if the parentheses are necessary) * We use `fits_expanded` to wrap all other parenthesized expressions (lists, dicts, sets), to prevent that expanding e.g. a list expands the `parentheses_id` group. We gate the `fits_expand` to only apply if the `parentheses_id` group fits (because we prefer `a\n+[b, c]` over expanding `[b, c]` if the whole expression gets parenthesized). We limit using `fits_expanded` and `conditional_group` only to expressions that themselves are not in parentheses (checking the conditions isn't free) ## Test Plan It increases the Jaccard index for Django from 0.915 to 0.917 ## Incompatibilites There are two incompatibilities left that I'm aware of (there may be more, I didn't go through all snapshot differences). ### Long string literals I commented on the regression. The issue is that a very long string (or any content without a split point) may not fit when only breaking the right side. The formatter than inserts the optional parentheses. But this is kind of useless because the overlong string will still not fit, because there are no new split points. I think we should ignore this incompatibility for now ### Expressions on statement level I don't fully understand the logic behind this yet, but black doesn't break before the operators for the following example even though the expression exceeds the configured line width ```python aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa < bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb > ccccccccccccccccccccccccccccc == ddddddddddddddddddddd ``` But it would if the expression is used inside of a condition. What I understand so far is that Black doesn't insert optional parentheses on the expression statement level (and a few other places) and, therefore, only breaks after opening parentheses. I propose to keep this deviation for now to avoid overlong-lines and use the compatibility report to make a decision if we should implement the same behavior.
This commit is contained in:
parent
d30e9125eb
commit
715250a179
26 changed files with 680 additions and 943 deletions
|
@ -1,16 +1,23 @@
|
|||
use rustpython_parser::ast;
|
||||
use rustpython_parser::ast::{Expr, Operator};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use crate::builders::optional_parentheses;
|
||||
use ruff_formatter::{
|
||||
format_args, FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions,
|
||||
};
|
||||
use ruff_python_ast::node::AnyNodeRef;
|
||||
use ruff_python_ast::visitor::preorder::{walk_expr, PreorderVisitor};
|
||||
|
||||
use crate::comments::Comments;
|
||||
use crate::context::NodeLevel;
|
||||
use crate::expression::expr_tuple::TupleParentheses;
|
||||
use crate::expression::parentheses::{NeedsParentheses, Parentheses, Parenthesize};
|
||||
use crate::expression::parentheses::{
|
||||
is_expression_parenthesized, parenthesized, NeedsParentheses, Parentheses, Parenthesize,
|
||||
};
|
||||
use crate::expression::string::StringLayout;
|
||||
use crate::prelude::*;
|
||||
use ruff_formatter::{
|
||||
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions,
|
||||
};
|
||||
use rustpython_parser::ast::Expr;
|
||||
|
||||
pub(crate) mod binary_like;
|
||||
pub(crate) mod expr_attribute;
|
||||
pub(crate) mod expr_await;
|
||||
pub(crate) mod expr_bin_op;
|
||||
|
@ -99,26 +106,63 @@ impl FormatRule<Expr, PyFormatContext<'_>> for FormatExpr {
|
|||
Expr::Slice(expr) => expr.format().fmt(f),
|
||||
});
|
||||
|
||||
let saved_level = f.context().node_level();
|
||||
f.context_mut().set_node_level(NodeLevel::Expression);
|
||||
|
||||
let result = match parentheses {
|
||||
Parentheses::Always => {
|
||||
write!(
|
||||
f,
|
||||
[group(&format_args![
|
||||
text("("),
|
||||
soft_block_indent(&format_expr),
|
||||
text(")")
|
||||
])]
|
||||
)
|
||||
}
|
||||
Parentheses::Always => parenthesized("(", &format_expr, ")").fmt(f),
|
||||
// Add optional parentheses. Ignore if the item renders parentheses itself.
|
||||
Parentheses::Optional => optional_parentheses(&format_expr).fmt(f),
|
||||
Parentheses::Custom | Parentheses::Never => Format::fmt(&format_expr, f),
|
||||
};
|
||||
Parentheses::Optional => {
|
||||
if can_omit_optional_parentheses(item, f.context()) {
|
||||
let saved_level = f.context().node_level();
|
||||
|
||||
f.context_mut().set_node_level(saved_level);
|
||||
// The group id is used as a condition in [`in_parentheses_only`] to create a conditional group
|
||||
// that is only active if the optional parentheses group expands.
|
||||
let parens_id = f.group_id("optional_parentheses");
|
||||
|
||||
f.context_mut()
|
||||
.set_node_level(NodeLevel::Expression(Some(parens_id)));
|
||||
|
||||
// We can't use `soft_block_indent` here because that would always increment the indent,
|
||||
// even if the group does not break (the indent is not soft). This would result in
|
||||
// too deep indentations if a `parenthesized` group expands. Using `indent_if_group_breaks`
|
||||
// gives us the desired *soft* indentation that is only present if the optional parentheses
|
||||
// are shown.
|
||||
let result = group(&format_args![
|
||||
if_group_breaks(&text("(")),
|
||||
indent_if_group_breaks(
|
||||
&format_args![soft_line_break(), format_expr],
|
||||
parens_id
|
||||
),
|
||||
soft_line_break(),
|
||||
if_group_breaks(&text(")"))
|
||||
])
|
||||
.with_group_id(Some(parens_id))
|
||||
.fmt(f);
|
||||
|
||||
f.context_mut().set_node_level(saved_level);
|
||||
|
||||
result
|
||||
} else {
|
||||
optional_parentheses(&format_expr).fmt(f)
|
||||
}
|
||||
}
|
||||
Parentheses::Custom | Parentheses::Never => {
|
||||
let saved_level = f.context().node_level();
|
||||
|
||||
let new_level = match saved_level {
|
||||
NodeLevel::TopLevel | NodeLevel::CompoundStatement => {
|
||||
NodeLevel::Expression(None)
|
||||
}
|
||||
level @ (NodeLevel::Expression(_) | NodeLevel::ParenthesizedExpression) => {
|
||||
level
|
||||
}
|
||||
};
|
||||
|
||||
f.context_mut().set_node_level(new_level);
|
||||
|
||||
let result = Format::fmt(&format_expr, f);
|
||||
f.context_mut().set_node_level(saved_level);
|
||||
result
|
||||
}
|
||||
};
|
||||
|
||||
result
|
||||
}
|
||||
|
@ -178,3 +222,240 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Expr {
|
|||
FormatOwnedWithRule::new(self, FormatExpr::default())
|
||||
}
|
||||
}
|
||||
|
||||
/// Tests if it is safe to omit the optional parentheses.
|
||||
///
|
||||
/// We prefer parentheses at least in the following cases:
|
||||
/// * The expression contains more than one unparenthesized expression with the same priority. For example,
|
||||
/// the expression `a * b * c` contains two multiply operations. We prefer parentheses in that case.
|
||||
/// `(a * b) * c` or `a * b + c` are okay, because the subexpression is parenthesized, or the expression uses operands with a lower priority
|
||||
/// * The expression contains at least one parenthesized sub expression (optimization to avoid unnecessary work)
|
||||
///
|
||||
/// This mimics Black's [`_maybe_split_omitting_optional_parens`](https://github.com/psf/black/blob/d1248ca9beaf0ba526d265f4108836d89cf551b7/src/black/linegen.py#L746-L820)
|
||||
fn can_omit_optional_parentheses(expr: &Expr, context: &PyFormatContext) -> bool {
|
||||
let mut visitor = MaxOperatorPriorityVisitor::new(context.contents());
|
||||
|
||||
visitor.visit_subexpression(expr);
|
||||
|
||||
let (max_operator_priority, operation_count, any_parenthesized_expression) = visitor.finish();
|
||||
|
||||
if operation_count > 1 {
|
||||
false
|
||||
} else if max_operator_priority == OperatorPriority::Attribute {
|
||||
true
|
||||
} else {
|
||||
// Only use the more complex IR when there is any expression that we can possibly split by
|
||||
any_parenthesized_expression
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct MaxOperatorPriorityVisitor<'input> {
|
||||
max_priority: OperatorPriority,
|
||||
max_priority_count: u32,
|
||||
any_parenthesized_expressions: bool,
|
||||
source: &'input str,
|
||||
}
|
||||
|
||||
impl<'input> MaxOperatorPriorityVisitor<'input> {
|
||||
fn new(source: &'input str) -> Self {
|
||||
Self {
|
||||
source,
|
||||
max_priority: OperatorPriority::None,
|
||||
max_priority_count: 0,
|
||||
any_parenthesized_expressions: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn update_max_priority(&mut self, current_priority: OperatorPriority) {
|
||||
self.update_max_priority_with_count(current_priority, 1);
|
||||
}
|
||||
|
||||
fn update_max_priority_with_count(&mut self, current_priority: OperatorPriority, count: u32) {
|
||||
match self.max_priority.cmp(¤t_priority) {
|
||||
Ordering::Less => {
|
||||
self.max_priority_count = count;
|
||||
self.max_priority = current_priority;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
self.max_priority_count += count;
|
||||
}
|
||||
Ordering::Greater => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Visits a subexpression, ignoring whether it is parenthesized or not
|
||||
fn visit_subexpression(&mut self, expr: &'input Expr) {
|
||||
match expr {
|
||||
Expr::Dict(_) | Expr::List(_) | Expr::Tuple(_) | Expr::Set(_) => {
|
||||
self.any_parenthesized_expressions = true;
|
||||
// The values are always parenthesized, don't visit.
|
||||
return;
|
||||
}
|
||||
Expr::ListComp(_) | Expr::SetComp(_) | Expr::DictComp(_) => {
|
||||
self.any_parenthesized_expressions = true;
|
||||
self.update_max_priority(OperatorPriority::Comprehension);
|
||||
return;
|
||||
}
|
||||
// It's impossible for a file smaller or equal to 4GB to contain more than 2^32 comparisons
|
||||
// because each comparison requires a left operand, and `n` `operands` and right sides.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
Expr::BoolOp(ast::ExprBoolOp {
|
||||
range: _,
|
||||
op: _,
|
||||
values,
|
||||
}) => self.update_max_priority_with_count(
|
||||
OperatorPriority::BooleanOperation,
|
||||
values.len().saturating_sub(1) as u32,
|
||||
),
|
||||
Expr::BinOp(ast::ExprBinOp {
|
||||
op,
|
||||
left: _,
|
||||
right: _,
|
||||
range: _,
|
||||
}) => self.update_max_priority(OperatorPriority::from(*op)),
|
||||
|
||||
Expr::IfExp(_) => {
|
||||
// + 1 for the if and one for the else
|
||||
self.update_max_priority_with_count(OperatorPriority::Conditional, 2);
|
||||
}
|
||||
|
||||
// It's impossible for a file smaller or equal to 4GB to contain more than 2^32 comparisons
|
||||
// because each comparison requires a left operand, and `n` `operands` and right sides.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
Expr::Compare(ast::ExprCompare {
|
||||
range: _,
|
||||
left: _,
|
||||
ops,
|
||||
comparators: _,
|
||||
}) => {
|
||||
self.update_max_priority_with_count(OperatorPriority::Comparator, ops.len() as u32);
|
||||
}
|
||||
Expr::Call(ast::ExprCall {
|
||||
range: _,
|
||||
func,
|
||||
args: _,
|
||||
keywords: _,
|
||||
}) => {
|
||||
self.any_parenthesized_expressions = true;
|
||||
// Only walk the function, the arguments are always parenthesized
|
||||
self.visit_expr(func);
|
||||
return;
|
||||
}
|
||||
Expr::Subscript(_) => {
|
||||
// Don't walk the value. Splitting before the value looks weird.
|
||||
// Don't walk the slice, because the slice is always parenthesized.
|
||||
return;
|
||||
}
|
||||
Expr::UnaryOp(ast::ExprUnaryOp {
|
||||
range: _,
|
||||
op,
|
||||
operand: _,
|
||||
}) => {
|
||||
if op.is_invert() {
|
||||
self.update_max_priority(OperatorPriority::BitwiseInversion);
|
||||
}
|
||||
}
|
||||
|
||||
// `[a, b].test[300].dot`
|
||||
Expr::Attribute(ast::ExprAttribute {
|
||||
range: _,
|
||||
value,
|
||||
attr: _,
|
||||
ctx: _,
|
||||
}) => {
|
||||
if has_parentheses(value, self.source) {
|
||||
self.update_max_priority(OperatorPriority::Attribute);
|
||||
}
|
||||
}
|
||||
|
||||
Expr::NamedExpr(_)
|
||||
| Expr::GeneratorExp(_)
|
||||
| Expr::Lambda(_)
|
||||
| Expr::Await(_)
|
||||
| Expr::Yield(_)
|
||||
| Expr::YieldFrom(_)
|
||||
| Expr::FormattedValue(_)
|
||||
| Expr::JoinedStr(_)
|
||||
| Expr::Constant(_)
|
||||
| Expr::Starred(_)
|
||||
| Expr::Name(_)
|
||||
| Expr::Slice(_) => {}
|
||||
};
|
||||
|
||||
walk_expr(self, expr);
|
||||
}
|
||||
|
||||
fn finish(self) -> (OperatorPriority, u32, bool) {
|
||||
(
|
||||
self.max_priority,
|
||||
self.max_priority_count,
|
||||
self.any_parenthesized_expressions,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> PreorderVisitor<'input> for MaxOperatorPriorityVisitor<'input> {
|
||||
fn visit_expr(&mut self, expr: &'input Expr) {
|
||||
// Rule only applies for non-parenthesized expressions.
|
||||
if is_expression_parenthesized(AnyNodeRef::from(expr), self.source) {
|
||||
self.any_parenthesized_expressions = true;
|
||||
} else {
|
||||
self.visit_subexpression(expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn has_parentheses(expr: &Expr, source: &str) -> bool {
|
||||
matches!(
|
||||
expr,
|
||||
Expr::Dict(_)
|
||||
| Expr::List(_)
|
||||
| Expr::Tuple(_)
|
||||
| Expr::Set(_)
|
||||
| Expr::ListComp(_)
|
||||
| Expr::SetComp(_)
|
||||
| Expr::DictComp(_)
|
||||
| Expr::Call(_)
|
||||
| Expr::Subscript(_)
|
||||
) || is_expression_parenthesized(AnyNodeRef::from(expr), source)
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
||||
enum OperatorPriority {
|
||||
None,
|
||||
Attribute,
|
||||
Comparator,
|
||||
Exponential,
|
||||
BitwiseInversion,
|
||||
Multiplicative,
|
||||
Additive,
|
||||
Shift,
|
||||
BitwiseAnd,
|
||||
BitwiseOr,
|
||||
BitwiseXor,
|
||||
// TODO(micha)
|
||||
#[allow(unused)]
|
||||
String,
|
||||
BooleanOperation,
|
||||
Conditional,
|
||||
Comprehension,
|
||||
}
|
||||
|
||||
impl From<ast::Operator> for OperatorPriority {
|
||||
fn from(value: Operator) -> Self {
|
||||
match value {
|
||||
Operator::Add | Operator::Sub => OperatorPriority::Additive,
|
||||
Operator::Mult
|
||||
| Operator::MatMult
|
||||
| Operator::Div
|
||||
| Operator::Mod
|
||||
| Operator::FloorDiv => OperatorPriority::Multiplicative,
|
||||
Operator::Pow => OperatorPriority::Exponential,
|
||||
Operator::LShift | Operator::RShift => OperatorPriority::Shift,
|
||||
Operator::BitOr => OperatorPriority::BitwiseOr,
|
||||
Operator::BitXor => OperatorPriority::BitwiseXor,
|
||||
Operator::BitAnd => OperatorPriority::BitwiseAnd,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue