From c26076290004288bbe5d30af08f33cf581d9c12c Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 8 Sep 2023 11:32:20 +0200 Subject: [PATCH] Formatter: Implicit concatenation in compare expressions ## Summary This PR implements the logic for breaking implicit concatenated strings before compare expressions by building on top of #7145 The main change is a new `BinaryLike` enum that has the `BinaryExpression` and `CompareExpression` variants. Supporting both variants requires some downstream changes but doesn't introduce any new concepts. ## Test Plan I added a few more tests. The compatibility improvements are minor but we now perfectly match black on twine :partying_face: **PR** | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1632 | | django | 0.99966 | 2760 | 58 | | transformers | 0.99928 | 2587 | 454 | | **twine** | 1.00000 | 33 | 0 | <-- improved | typeshed | 0.99978 | 3496 | 2173 | | **warehouse** | 0.99824 | 648 | 22 | <-- improved | zulip | 0.99948 | 1437 | 28 | **Base** | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1633 | | django | 0.99966 | 2760 | 58 | | transformers | 0.99928 | 2587 | 454 | | twine | 0.99982 | 33 | 1 | | typeshed | 0.99978 | 3496 | 2173 | | warehouse | 0.99823 | 648 | 23 | | zulip | 0.99948 | 1437 | 28 | --- .../test/fixtures/ruff/expression/compare.py | 58 +++ .../src/expression/binary_like.rs | 332 ++++++++++++------ .../src/expression/expr_bin_op.rs | 2 +- .../src/expression/expr_compare.rs | 78 ++-- ...expression__binary_implicit_string.py.snap | 8 +- .../format@expression__compare.py.snap | 126 ++++++- 6 files changed, 444 insertions(+), 160 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/compare.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/compare.py index 88b6da20bd..e266b6b37a 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/compare.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/compare.py @@ -111,3 +111,61 @@ ct_match = ( ct_match = ( (aaaaaaaaaaaaaaaa) == self.get_content_type[obj, rel_obj, using, instance._state.db].id ) + +# comments + +c = ( + 1 > # 1 + # 2 + 3 # 3 + > # 4 + 5 # 5 + # 6 +) + +# Implicit strings and comments + +assert ( + "One or more packages has an associated PGP signature; these will " + "be silently ignored by the index" + in caplog.messages +) + +c = (a > + # test leading binary comment + "a" "b" * b + ) + +c = (a * + # test leading comment + "a" "b" > b + ) + +c = (a + > # test trailing comment + "a" "b" * b + ) + +c = (a + > + "a" "b" # test trailing comment + * b + ) + +c = (a + > + "a" "b" # test trailing binary comment + + b + ) + + +c = (a > + # test leading binary comment + "a" "b" * b + ) + +c = (a * + # test leading comment + "a" "b" > b + ) + diff --git a/crates/ruff_python_formatter/src/expression/binary_like.rs b/crates/ruff_python_formatter/src/expression/binary_like.rs index 563e3ae5f8..e3b58a59ae 100644 --- a/crates/ruff_python_formatter/src/expression/binary_like.rs +++ b/crates/ruff_python_formatter/src/expression/binary_like.rs @@ -5,8 +5,8 @@ use smallvec::SmallVec; use ruff_formatter::write; use ruff_python_ast::{ - BytesConstant, Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, - StringConstant, UnaryOp, + BytesConstant, Constant, Expr, ExprAttribute, ExprBinOp, ExprCompare, ExprConstant, + ExprUnaryOp, StringConstant, UnaryOp, }; use crate::comments::{leading_comments, trailing_comments, Comments, SourceComment}; @@ -20,13 +20,178 @@ use crate::expression::string::StringLayout; use crate::expression::OperatorPrecedence; use crate::prelude::*; -pub(super) struct BinaryLike<'a>(pub(super) &'a ExprBinOp); +#[derive(Copy, Clone, Debug)] +pub(super) enum BinaryLike<'a> { + BinaryExpression(&'a ExprBinOp), + CompareExpression(&'a ExprCompare), +} + +impl<'a> BinaryLike<'a> { + /// Flattens the hierarchical binary expression into a flat operand, operator, operand... sequence. + /// + /// See [`FlatBinaryExpressionSlice`] for an in depth explanation. + fn flatten(self, comments: &'a Comments<'a>, source: &str) -> FlatBinaryExpression<'a> { + fn recurse_compare<'a>( + compare: &'a ExprCompare, + leading_comments: &'a [SourceComment], + trailing_comments: &'a [SourceComment], + comments: &'a Comments, + source: &str, + parts: &mut SmallVec<[OperandOrOperator<'a>; 8]>, + ) { + parts.reserve(compare.comparators.len() * 2 + 1); + + rec( + Operand::Left { + expression: &compare.left, + leading_comments, + }, + comments, + source, + parts, + ); + + assert_eq!( + compare.comparators.len(), + compare.ops.len(), + "Compare expression with an unbalanced number of comparators and operations." + ); + + if let Some((last_expression, middle_expressions)) = compare.comparators.split_last() { + let (last_operator, middle_operators) = compare.ops.split_last().unwrap(); + + for (operator, expression) in middle_operators.iter().zip(middle_expressions) { + parts.push(OperandOrOperator::Operator(Operator { + symbol: OperatorSymbol::Comparator(*operator), + trailing_comments: &[], + })); + + rec(Operand::Middle { expression }, comments, source, parts); + } + + parts.push(OperandOrOperator::Operator(Operator { + symbol: OperatorSymbol::Comparator(*last_operator), + trailing_comments: &[], + })); + + rec( + Operand::Right { + expression: last_expression, + trailing_comments, + }, + comments, + source, + parts, + ); + } + } + + fn recurse_binary<'a>( + binary: &'a ExprBinOp, + leading_comments: &'a [SourceComment], + trailing_comments: &'a [SourceComment], + comments: &'a Comments, + source: &str, + parts: &mut SmallVec<[OperandOrOperator<'a>; 8]>, + ) { + rec( + Operand::Left { + leading_comments, + expression: &binary.left, + }, + comments, + source, + parts, + ); + + parts.push(OperandOrOperator::Operator(Operator { + symbol: OperatorSymbol::Binary(binary.op), + trailing_comments: comments.dangling(binary), + })); + + rec( + Operand::Right { + expression: binary.right.as_ref(), + trailing_comments, + }, + comments, + source, + parts, + ); + } + + fn rec<'a>( + operand: Operand<'a>, + comments: &'a Comments, + source: &str, + parts: &mut SmallVec<[OperandOrOperator<'a>; 8]>, + ) { + let expression = operand.expression(); + match expression { + Expr::BinOp(binary) if !is_expression_parenthesized(expression.into(), source) => { + let leading_comments = operand + .leading_binary_comments() + .unwrap_or_else(|| comments.leading(binary)); + + let trailing_comments = operand + .trailing_binary_comments() + .unwrap_or_else(|| comments.trailing(binary)); + + recurse_binary( + binary, + leading_comments, + trailing_comments, + comments, + source, + parts, + ); + } + Expr::Compare(compare) + if !is_expression_parenthesized(expression.into(), source) => + { + let leading_comments = operand + .leading_binary_comments() + .unwrap_or_else(|| comments.leading(compare)); + + let trailing_comments = operand + .trailing_binary_comments() + .unwrap_or_else(|| comments.trailing(compare)); + + recurse_compare( + compare, + leading_comments, + trailing_comments, + comments, + source, + parts, + ); + } + _ => { + parts.push(OperandOrOperator::Operand(operand)); + } + } + } + + let mut parts = SmallVec::new(); + match self { + BinaryLike::BinaryExpression(binary) => { + // Leading and trailing comments are handled by the binary's ``FormatNodeRule` implementation. + recurse_binary(binary, &[], &[], comments, source, &mut parts); + } + BinaryLike::CompareExpression(compare) => { + // Leading and trailing comments are handled by the compare's ``FormatNodeRule` implementation. + recurse_compare(compare, &[], &[], comments, source, &mut parts); + } + } + + FlatBinaryExpression(parts) + } +} impl Format> for BinaryLike<'_> { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { let comments = f.context().comments().clone(); - let flat_binary = - FlatBinaryExpression::from_binary_expression(self.0, &comments, f.context().source()); + let flat_binary = self.flatten(&comments, f.context().source()); let source = f.context().source(); let mut string_operands = flat_binary @@ -237,89 +402,6 @@ const fn is_simple_power_operand(expr: &Expr) -> bool { #[derive(Debug)] struct FlatBinaryExpression<'a>(SmallVec<[OperandOrOperator<'a>; 8]>); -impl<'a> FlatBinaryExpression<'a> { - /// Flattens parenthesized binary expression recursively (left and right) - fn from_binary_expression( - binary: &'a ExprBinOp, - comments: &'a Comments, - source: &'a str, - ) -> Self { - fn rec<'a>( - operand: Operand<'a>, - comments: &'a Comments, - source: &'a str, - parts: &mut SmallVec<[OperandOrOperator<'a>; 8]>, - ) { - let expression = operand.expression(); - match expression { - Expr::BinOp(binary) if !is_expression_parenthesized(expression.into(), source) => { - let leading_comments = operand - .leading_binary_comments() - .unwrap_or_else(|| comments.leading(binary)); - - rec( - Operand::Left { - leading_comments, - expression: &binary.left, - }, - comments, - source, - parts, - ); - - parts.push(OperandOrOperator::Operator(Operator { - symbol: binary.op, - trailing_comments: comments.dangling(binary), - })); - - let trailing_comments = operand - .trailing_binary_comments() - .unwrap_or_else(|| comments.trailing(binary)); - - rec( - Operand::Right { - expression: binary.right.as_ref(), - trailing_comments, - }, - comments, - source, - parts, - ); - } - _ => { - parts.push(OperandOrOperator::Operand(operand)); - } - } - } - - let mut parts = SmallVec::new(); - rec( - Operand::Left { - expression: &binary.left, - leading_comments: &[], // Already handled by `FormatNodeRule` - }, - comments, - source, - &mut parts, - ); - parts.push(OperandOrOperator::Operator(Operator { - symbol: binary.op, - trailing_comments: comments.dangling(binary), - })); - rec( - Operand::Right { - expression: &binary.right, - trailing_comments: &[], // Already handled by `FormatNodeRule`. - }, - comments, - source, - &mut parts, - ); - - Self(parts) - } -} - impl<'a> Deref for FlatBinaryExpression<'a> { type Target = FlatBinaryExpressionSlice<'a>; @@ -569,11 +651,36 @@ impl<'a> OperandOrOperator<'a> { #[derive(Debug)] enum Operand<'a> { /// Operand that used to be on the left side of a binary operation. + /// + /// For example `a` in the following code + /// + /// ```python + /// a + b + c + /// ``` Left { expression: &'a Expr, /// Leading comments of the outer most binary expression that starts at this node. leading_comments: &'a [SourceComment], }, + /// Operand that is neither at the start nor the end of a binary like expression. + /// Only applies to compare expression. + /// + /// `b` and `c` are *middle* operands whereas `a` is a left and `d` a right operand. + /// + /// ```python + /// a > b > c > d + /// ``` + /// + /// Middle have no leading or trailing comments from the enclosing binary like expression. + Middle { expression: &'a Expr }, + + /// Operand that is on the right side of a binary operation. + /// + /// For example `b` and `c` are right sides of the binary expressions. + /// + /// ```python + /// a + b + c + /// ``` Right { expression: &'a Expr, /// Trailing comments of the outer most binary expression that ends at this operand. @@ -586,6 +693,7 @@ impl<'a> Operand<'a> { match self { Operand::Left { expression, .. } => expression, Operand::Right { expression, .. } => expression, + Operand::Middle { expression } => expression, } } @@ -594,7 +702,9 @@ impl<'a> Operand<'a> { Operand::Left { leading_comments, .. } => !leading_comments.is_empty(), - Operand::Right { expression, .. } => comments.has_leading(*expression), + Operand::Middle { expression } | Operand::Right { expression, .. } => { + comments.has_leading(*expression) + } } } @@ -604,13 +714,13 @@ impl<'a> Operand<'a> { Operand::Left { leading_comments, .. } => Some(leading_comments), - Operand::Right { .. } => None, + Operand::Middle { .. } | Operand::Right { .. } => None, } } fn trailing_binary_comments(&self) -> Option<&'a [SourceComment]> { match self { - Operand::Left { .. } => None, + Operand::Middle { .. } | Operand::Left { .. } => None, Operand::Right { trailing_comments, .. } => Some(trailing_comments), @@ -620,13 +730,13 @@ impl<'a> Operand<'a> { #[derive(Debug)] struct Operator<'a> { - symbol: ruff_python_ast::Operator, + symbol: OperatorSymbol, trailing_comments: &'a [SourceComment], } impl Operator<'_> { fn precedence(&self) -> OperatorPrecedence { - OperatorPrecedence::from(self.symbol) + self.symbol.precedence() } fn has_trailing_comments(&self) -> bool { @@ -636,13 +746,35 @@ impl Operator<'_> { impl Format> for Operator<'_> { fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { - write!( - f, - [ - self.symbol.format(), - trailing_comments(self.trailing_comments) - ] - ) + write!(f, [self.symbol, trailing_comments(self.trailing_comments)]) + } +} + +#[derive(Copy, Clone, Debug)] +enum OperatorSymbol { + Binary(ruff_python_ast::Operator), + Comparator(ruff_python_ast::CmpOp), +} + +impl OperatorSymbol { + const fn is_pow(self) -> bool { + matches!(self, OperatorSymbol::Binary(ruff_python_ast::Operator::Pow)) + } + + fn precedence(self) -> OperatorPrecedence { + match self { + OperatorSymbol::Binary(operator) => OperatorPrecedence::from(operator), + OperatorSymbol::Comparator(_) => OperatorPrecedence::Comparator, + } + } +} + +impl Format> for OperatorSymbol { + fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { + match self { + OperatorSymbol::Binary(operator) => operator.format().fmt(f), + OperatorSymbol::Comparator(operator) => operator.format().fmt(f), + } } } diff --git a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs index d102b97d35..3a87b6d691 100644 --- a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs +++ b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs @@ -14,7 +14,7 @@ pub struct FormatExprBinOp; impl FormatNodeRule for FormatExprBinOp { #[inline] fn fmt_fields(&self, item: &ExprBinOp, f: &mut PyFormatter) -> FormatResult<()> { - BinaryLike(item).fmt(f) + BinaryLike::BinaryExpression(item).fmt(f) } fn fmt_dangling_comments( diff --git a/crates/ruff_python_formatter/src/expression/expr_compare.rs b/crates/ruff_python_formatter/src/expression/expr_compare.rs index 61e60a0cc8..9ad639a049 100644 --- a/crates/ruff_python_formatter/src/expression/expr_compare.rs +++ b/crates/ruff_python_formatter/src/expression/expr_compare.rs @@ -1,62 +1,21 @@ -use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule}; +use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule}; use ruff_python_ast::node::AnyNodeRef; -use ruff_python_ast::{CmpOp, ExprCompare}; +use ruff_python_ast::{CmpOp, Expr, ExprCompare}; -use crate::comments::{leading_comments, SourceComment}; -use crate::expression::parentheses::{ - in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space, NeedsParentheses, - OptionalParentheses, -}; +use crate::comments::SourceComment; +use crate::expression::binary_like::BinaryLike; +use crate::expression::expr_constant::is_multiline_string; +use crate::expression::has_parentheses; +use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; use crate::prelude::*; #[derive(Default)] pub struct FormatExprCompare; impl FormatNodeRule for FormatExprCompare { + #[inline] fn fmt_fields(&self, item: &ExprCompare, f: &mut PyFormatter) -> FormatResult<()> { - let ExprCompare { - range: _, - left, - ops, - comparators, - } = item; - - let comments = f.context().comments().clone(); - - let inner = format_with(|f| { - write!(f, [in_parentheses_only_group(&left.format())])?; - - assert_eq!(comparators.len(), ops.len()); - - for (operator, comparator) in ops.iter().zip(comparators) { - let leading_comparator_comments = comments.leading(comparator); - if leading_comparator_comments.is_empty() { - write!(f, [in_parentheses_only_soft_line_break_or_space()])?; - } else { - // Format the expressions leading comments **before** the operator - write!( - f, - [ - hard_line_break(), - leading_comments(leading_comparator_comments) - ] - )?; - } - - write!( - f, - [ - operator.format(), - space(), - in_parentheses_only_group(&comparator.format()) - ] - )?; - } - - Ok(()) - }); - - in_parentheses_only_group(&inner).fmt(f) + BinaryLike::CompareExpression(item).fmt(f) } fn fmt_dangling_comments( @@ -73,9 +32,24 @@ impl NeedsParentheses for ExprCompare { fn needs_parentheses( &self, _parent: AnyNodeRef, - _context: &PyFormatContext, + context: &PyFormatContext, ) -> OptionalParentheses { - OptionalParentheses::Multiline + if let Expr::Constant(constant) = self.left.as_ref() { + // Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses + if !constant.value.is_implicit_concatenated() + && is_multiline_string(constant, context.source()) + && !context.comments().has(self.left.as_ref()) + && self.comparators.first().is_some_and(|right| { + has_parentheses(right, context).is_some() && !context.comments().has(right) + }) + { + OptionalParentheses::Never + } else { + OptionalParentheses::Multiline + } + } else { + OptionalParentheses::Multiline + } } } diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__binary_implicit_string.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__binary_implicit_string.py.snap index 7631c9998c..7ce094c667 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__binary_implicit_string.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__binary_implicit_string.py.snap @@ -267,13 +267,9 @@ self._assert_skipping( ) ( - b - < c - > d - < "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + b < c > d < "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" - "cccccccccccccccccccccccccc" % aaaaaaaaaaaa - > x + "cccccccccccccccccccccccccc" % aaaaaaaaaaaa > x ) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__compare.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__compare.py.snap index e47b22e60a..817c44b908 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__compare.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__compare.py.snap @@ -117,6 +117,64 @@ ct_match = ( ct_match = ( (aaaaaaaaaaaaaaaa) == self.get_content_type[obj, rel_obj, using, instance._state.db].id ) + +# comments + +c = ( + 1 > # 1 + # 2 + 3 # 3 + > # 4 + 5 # 5 + # 6 +) + +# Implicit strings and comments + +assert ( + "One or more packages has an associated PGP signature; these will " + "be silently ignored by the index" + in caplog.messages +) + +c = (a > + # test leading binary comment + "a" "b" * b + ) + +c = (a * + # test leading comment + "a" "b" > b + ) + +c = (a + > # test trailing comment + "a" "b" * b + ) + +c = (a + > + "a" "b" # test trailing comment + * b + ) + +c = (a + > + "a" "b" # test trailing binary comment + + b + ) + + +c = (a > + # test leading binary comment + "a" "b" * b + ) + +c = (a * + # test leading comment + "a" "b" > b + ) + ``` ## Output @@ -134,8 +192,9 @@ a not in b ( a + == # comment - == b + b ) ( @@ -278,6 +337,71 @@ ct_match = { ct_match = ( aaaaaaaaaaaaaaaa ) == self.get_content_type[obj, rel_obj, using, instance._state.db].id + +# comments + +c = ( + 1 # 1 + > + # 2 + 3 # 3 # 4 + > 5 # 5 + # 6 +) + +# Implicit strings and comments + +assert ( + "One or more packages has an associated PGP signature; these will " + "be silently ignored by the index" in caplog.messages +) + +c = ( + a > + # test leading binary comment + "a" + "b" * b +) + +c = ( + a * + # test leading comment + "a" + "b" > b +) + +c = ( + a # test trailing comment + > "a" + "b" * b +) + +c = ( + a > "a" + "b" # test trailing comment + * b +) + +c = ( + a > "a" + "b" # test trailing binary comment + + b +) + + +c = ( + a > + # test leading binary comment + "a" + "b" * b +) + +c = ( + a * + # test leading comment + "a" + "b" > b +) ```