Split implicit concatenated strings before binary expressions (#7145)

This commit is contained in:
Micha Reiser 2023-09-08 08:51:26 +02:00 committed by GitHub
parent 9671922e40
commit e376c3ff7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 1067 additions and 366 deletions

View file

@ -30,6 +30,7 @@ once_cell = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, optional = true }
smallvec = { workspace = true }
static_assertions = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
unicode-width = { workspace = true }

View file

@ -313,3 +313,18 @@ expected_content = (
self.base_url
)
)
rowuses = [(1 << j) | # column ordinal
(1 << (n + i-j + n-1)) | # NW-SE ordinal
(1 << (n + 2*n-1 + i+j)) # NE-SW ordinal
for j in rangen]
skip_bytes = (
header.timecnt * 5 # Transition times and types
+ header.typecnt * 6 # Local time type records
+ header.charcnt # Time zone designations
+ header.leapcnt * 8 # Leap second records
+ header.isstdcnt # Standard/wall indicators
+ header.isutcnt # UT/local indicators
)

View file

@ -118,3 +118,45 @@ def test3():
"(CASE WHEN JSON_TYPE(%s, %%s) IN (%s) "
"THEN JSON_TYPE(%s, %%s) ELSE JSON_EXTRACT(%s, %%s) END)"
) % (lhs, datatype_values, lhs, lhs), (tuple(params) + (json_path,)) * 3
c = (a +
# test leading binary comment
"a" "b" * b
)
c = (a *
# test leading comment
"a" "b" + b
)
c = (a
+ # test trailing comment
"a" "b" * b
)
c = (a
+
"a" "b" # test trailing comment
* b
)
c = (a
*
"a" "b" # test trailing binary comment
+ b
)
c = (a
*
"a" "b"
+ # test trailing operator comment
b
)
c = (a
*
"a" "b"
+
# test trailing operator comment
b
)

View file

@ -102,84 +102,60 @@ impl Format<PyFormatContext<'_>> for FormatLeadingAlternateBranchComments<'_> {
}
}
/// Formats the trailing comments of `node`
pub(crate) fn trailing_node_comments<T>(node: &T) -> FormatTrailingComments
where
T: AstNode,
{
FormatTrailingComments::Node(node.as_any_node_ref())
}
/// Formats the passed comments as trailing comments
pub(crate) fn trailing_comments(comments: &[SourceComment]) -> FormatTrailingComments {
FormatTrailingComments::Comments(comments)
FormatTrailingComments(comments)
}
pub(crate) enum FormatTrailingComments<'a> {
Node(AnyNodeRef<'a>),
Comments(&'a [SourceComment]),
}
pub(crate) struct FormatTrailingComments<'a>(&'a [SourceComment]);
impl Format<PyFormatContext<'_>> for FormatTrailingComments<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
fn write_trailing_comments(
comments: &[SourceComment],
f: &mut PyFormatter,
) -> FormatResult<()> {
let mut has_trailing_own_line_comment = false;
let mut has_trailing_own_line_comment = false;
for trailing in comments.iter().filter(|comment| comment.is_unformatted()) {
has_trailing_own_line_comment |= trailing.line_position().is_own_line();
for trailing in self.0.iter().filter(|comment| comment.is_unformatted()) {
has_trailing_own_line_comment |= trailing.line_position().is_own_line();
if has_trailing_own_line_comment {
let lines_before_comment = lines_before(trailing.start(), f.context().source());
if has_trailing_own_line_comment {
let lines_before_comment = lines_before(trailing.start(), f.context().source());
// A trailing comment at the end of a body or list
// ```python
// def test():
// pass
//
// # Some comment
// ```
write!(
f,
[
line_suffix(
&format_args![
empty_lines(lines_before_comment),
format_comment(trailing)
],
// Reserving width isn't necessary because we don't split
// comments and the empty lines expand any enclosing group.
0
),
expand_parent()
]
)?;
} else {
// A trailing comment at the end of a line has a reserved width to
// consider during line measurement.
// ```python
// tup = (
// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
// ) # Some comment
// ```
trailing_end_of_line_comment(trailing).fmt(f)?;
}
trailing.mark_formatted();
// A trailing comment at the end of a body or list
// ```python
// def test():
// pass
//
// # Some comment
// ```
write!(
f,
[
line_suffix(
&format_args![
empty_lines(lines_before_comment),
format_comment(trailing)
],
// Reserving width isn't necessary because we don't split
// comments and the empty lines expand any enclosing group.
0
),
expand_parent()
]
)?;
} else {
// A trailing comment at the end of a line has a reserved width to
// consider during line measurement.
// ```python
// tup = (
// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
// ) # Some comment
// ```
trailing_end_of_line_comment(trailing).fmt(f)?;
}
Ok(())
trailing.mark_formatted();
}
match self {
FormatTrailingComments::Node(node) => {
let comments = f.context().comments().clone();
write_trailing_comments(comments.trailing(*node), f)
}
FormatTrailingComments::Comments(comments) => write_trailing_comments(comments, f),
}
Ok(())
}
}

View file

@ -87,24 +87,22 @@
//!
//! It is possible to add an additional optional label to [`SourceComment`] If ever the need arises to distinguish two *dangling comments* in the formatting logic,
use ruff_text_size::{Ranged, TextRange};
use std::cell::Cell;
use std::fmt::Debug;
use std::rc::Rc;
use ruff_python_ast::Mod;
pub(crate) use format::{
dangling_comments, dangling_node_comments, dangling_open_parenthesis_comments,
leading_alternate_branch_comments, leading_comments, leading_node_comments, trailing_comments,
trailing_node_comments,
};
use ruff_formatter::{SourceCode, SourceCodeSlice};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal};
use ruff_python_ast::Mod;
use ruff_python_index::CommentRanges;
use ruff_python_trivia::PythonWhitespace;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
use crate::comments::debug::{DebugComment, DebugComments};
use crate::comments::map::{LeadingDanglingTrailing, MultiMap};
@ -527,12 +525,12 @@ impl<'a> PreorderVisitor<'a> for MarkVerbatimCommentsAsFormattedVisitor<'a> {
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use ruff_python_ast::Mod;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_formatter::SourceCode;
use ruff_python_ast::Mod;
use ruff_python_index::{CommentRanges, CommentRangesBuilder};
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use crate::comments::Comments;

View file

@ -0,0 +1,723 @@
use std::num::NonZeroUsize;
use std::ops::{Deref, Index};
use smallvec::SmallVec;
use ruff_formatter::write;
use ruff_python_ast::{
BytesConstant, Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp,
StringConstant, UnaryOp,
};
use crate::comments::{leading_comments, trailing_comments, Comments, SourceComment};
use crate::expression::expr_constant::ExprConstantLayout;
use crate::expression::parentheses::{
in_parentheses_only_group, in_parentheses_only_soft_line_break,
in_parentheses_only_soft_line_break_or_space, is_expression_parenthesized,
write_in_parentheses_only_group_end_tag, write_in_parentheses_only_group_start_tag,
};
use crate::expression::string::StringLayout;
use crate::expression::OperatorPrecedence;
use crate::prelude::*;
pub(super) struct BinaryLike<'a>(pub(super) &'a ExprBinOp);
impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let flat_binary =
FlatBinaryExpression::from_binary_expression(self.0, &comments, f.context().source());
let source = f.context().source();
let mut string_operands = flat_binary
.operands()
.filter_map(|(index, operand)| {
if let Expr::Constant(
constant @ ExprConstant {
value:
Constant::Str(StringConstant {
implicit_concatenated: true,
..
})
| Constant::Bytes(BytesConstant {
implicit_concatenated: true,
..
}),
..
},
) = operand.expression()
{
if is_expression_parenthesized(constant.into(), source) {
None
} else {
Some((index, constant, operand))
}
} else {
None
}
})
.peekable();
// Split the binary expressions by implicit concatenated strings first by creating:
// * One group that encloses the whole binary expression and ensures that all implicit concatenated strings
// break together or fit on the same line
// * Group the left operand and left operator as well as the right operator and right operand
// to give them a lower precedence than the implicit concatenated string parts (the implicit strings should break first)
if let Some((first_index, _, _)) = string_operands.peek() {
// Group all strings in a single group so that they all break together or none of them.
write_in_parentheses_only_group_start_tag(f);
// Start the group for the left side coming before an implicit concatenated string if it isn't the first
// ```python
// a + "b" "c"
// ^^^- start this group
// ```
if *first_index != OperandIndex::new(0) {
write_in_parentheses_only_group_start_tag(f);
}
// The index of the last formatted operator
let mut last_operator_index = None;
loop {
if let Some((index, string_constant, operand)) = string_operands.next() {
// An implicit concatenated string that isn't the first operand in a binary expression
// ```python
// a + "b" "c" + ddddddd + "e" "d"
// ^^^^^^ this part or ^^^^^^^ this part
// ```
if let Some(left_operator_index) = index.left_operator() {
// Everything between the last implicit concatenated string and the left operator
// right before the implicit concatenated string:
// ```python
// a + b + "c" "d"
// ^--- left_operator
// ^^^^^-- left
// ```
let left =
flat_binary.between_operators(last_operator_index, left_operator_index);
let left_operator = &flat_binary[left_operator_index];
if let Some(leading) = left.first_operand().leading_binary_comments() {
leading_comments(leading).fmt(f)?;
}
// Write the left, the left operator, and the space before the right side
write!(
f,
[
left,
left.last_operand()
.trailing_binary_comments()
.map(trailing_comments),
in_parentheses_only_soft_line_break_or_space(),
left_operator,
]
)?;
// Finish the left-side group (the group was started before the loop or by the
// previous iteration)
write_in_parentheses_only_group_end_tag(f);
if operand.has_leading_comments(f.context().comments())
|| left_operator.has_trailing_comments()
{
hard_line_break().fmt(f)?;
} else {
space().fmt(f)?;
}
write!(
f,
[
operand.leading_binary_comments().map(leading_comments),
string_constant
.format()
.with_options(ExprConstantLayout::String(
StringLayout::ImplicitConcatenatedStringInBinaryLike,
)),
operand.trailing_binary_comments().map(trailing_comments),
line_suffix_boundary(),
]
)?;
} else {
// Binary expression that starts with an implicit concatenated string:
// ```python
// "a" "b" + c
// ^^^^^^^-- format the first operand of a binary expression
// ```
string_constant
.format()
.with_options(ExprConstantLayout::String(
StringLayout::ImplicitConcatenatedStringInBinaryLike,
))
.fmt(f)?;
}
// Write the right operator and start the group for the right side (if any)
// ```python
// a + "b" "c" + ddddddd + "e" "d"
// ^^--- write this
// ^^^^^^^^^^^-- start this group
// ```
let right_operator_index = index.right_operator();
if let Some(right_operator) = flat_binary.get_operator(index.right_operator()) {
write_in_parentheses_only_group_start_tag(f);
let right_operand = &flat_binary[right_operator_index.right_operand()];
let right_operand_has_leading_comments =
right_operand.has_leading_comments(f.context().comments());
// Keep the operator on the same line if the right side has leading comments (and thus, breaks)
if right_operand_has_leading_comments {
space().fmt(f)?;
} else {
in_parentheses_only_soft_line_break_or_space().fmt(f)?;
}
right_operator.fmt(f)?;
if right_operand_has_leading_comments
|| right_operator.has_trailing_comments()
{
hard_line_break().fmt(f)?;
} else {
space().fmt(f)?;
}
last_operator_index = Some(right_operator_index);
} else {
break;
}
} else {
if let Some(last_operator_index) = last_operator_index {
let end = flat_binary.after_operator(last_operator_index);
end.fmt(f)?;
write_in_parentheses_only_group_end_tag(f);
}
break;
}
}
// Finish the group that wraps all implicit concatenated strings
write_in_parentheses_only_group_end_tag(f);
} else {
in_parentheses_only_group(&&*flat_binary).fmt(f)?;
}
Ok(())
}
}
const fn is_simple_power_expression(left: &Expr, right: &Expr) -> bool {
is_simple_power_operand(left) && is_simple_power_operand(right)
}
/// Return `true` if an [`Expr`] adheres to [Black's definition](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#line-breaks-binary-operators)
/// of a non-complex expression, in the context of a power operation.
const fn is_simple_power_operand(expr: &Expr) -> bool {
match expr {
Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::Not, ..
}) => false,
Expr::Constant(ExprConstant {
value: Constant::Complex { .. } | Constant::Float(_) | Constant::Int(_),
..
}) => true,
Expr::Name(_) => true,
Expr::UnaryOp(ExprUnaryOp { operand, .. }) => is_simple_power_operand(operand),
Expr::Attribute(ExprAttribute { value, .. }) => is_simple_power_operand(value),
_ => false,
}
}
/// Owned [`FlatBinaryExpressionSlice`]. Read the [`FlatBinaryExpressionSlice`] documentation for more details about the data structure.
#[derive(Debug)]
struct FlatBinaryExpression<'a>(SmallVec<[OperandOrOperator<'a>; 8]>);
impl<'a> FlatBinaryExpression<'a> {
/// Flattens parenthesized binary expression recursively (left and right)
fn from_binary_expression(
binary: &'a ExprBinOp,
comments: &'a Comments,
source: &'a str,
) -> Self {
fn rec<'a>(
operand: Operand<'a>,
comments: &'a Comments,
source: &'a str,
parts: &mut SmallVec<[OperandOrOperator<'a>; 8]>,
) {
let expression = operand.expression();
match expression {
Expr::BinOp(binary) if !is_expression_parenthesized(expression.into(), source) => {
let leading_comments = operand
.leading_binary_comments()
.unwrap_or_else(|| comments.leading(binary));
rec(
Operand::Left {
leading_comments,
expression: &binary.left,
},
comments,
source,
parts,
);
parts.push(OperandOrOperator::Operator(Operator {
symbol: binary.op,
trailing_comments: comments.dangling(binary),
}));
let trailing_comments = operand
.trailing_binary_comments()
.unwrap_or_else(|| comments.trailing(binary));
rec(
Operand::Right {
expression: binary.right.as_ref(),
trailing_comments,
},
comments,
source,
parts,
);
}
_ => {
parts.push(OperandOrOperator::Operand(operand));
}
}
}
let mut parts = SmallVec::new();
rec(
Operand::Left {
expression: &binary.left,
leading_comments: &[], // Already handled by `FormatNodeRule`
},
comments,
source,
&mut parts,
);
parts.push(OperandOrOperator::Operator(Operator {
symbol: binary.op,
trailing_comments: comments.dangling(binary),
}));
rec(
Operand::Right {
expression: &binary.right,
trailing_comments: &[], // Already handled by `FormatNodeRule`.
},
comments,
source,
&mut parts,
);
Self(parts)
}
}
impl<'a> Deref for FlatBinaryExpression<'a> {
type Target = FlatBinaryExpressionSlice<'a>;
fn deref(&self) -> &Self::Target {
FlatBinaryExpressionSlice::from_slice(&self.0)
}
}
/// Binary chain represented as a flat vector where operands are stored at even indices and operators
/// add odd indices.
///
/// ```python
/// a + 5 * 3 + 2
/// ```
///
/// Gets parsed as:
///
/// ```text
/// graph
/// +
/// ├──a
/// ├──*
/// │ ├──b
/// │ └──c
/// └──d
/// ```
///
/// The slice representation of the above is closer to what you have in source. It's a simple sequence of operands and operators,
/// entirely ignoring operator precedence (doesn't flatten parenthesized expressions):
///
/// ```text
/// -----------------------------
/// | a | + | 5 | * | 3 | + | 2 |
/// -----------------------------
/// ```
///
/// The advantage of a flat structure are:
/// * It becomes possible to adjust the operator / operand precedence. E.g splitting implicit concatenated strings before `+` operations.
/// * It allows arbitrary slicing of binary expressions for as long as a slice always starts and ends with an operand.
///
/// A slice is guaranteed to always start and end with an operand. The smallest valid slice is a slice containing a single operand.
/// Operands in multi-operand slices are separated by operators.
#[repr(transparent)]
struct FlatBinaryExpressionSlice<'a>([OperandOrOperator<'a>]);
impl<'a> FlatBinaryExpressionSlice<'a> {
fn from_slice<'slice>(slice: &'slice [OperandOrOperator<'a>]) -> &'slice Self {
debug_assert!(
!slice.is_empty(),
"Operand slice must contain at least one operand"
);
#[allow(unsafe_code)]
unsafe {
// SAFETY: `BinaryChainSlice` has the same layout as a slice because it uses `repr(transparent)`
&*(slice as *const [OperandOrOperator<'a>] as *const FlatBinaryExpressionSlice<'a>)
}
}
fn operators(&self) -> impl Iterator<Item = (OperatorIndex, &Operator<'a>)> {
self.0.iter().enumerate().filter_map(|(index, part)| {
if let OperandOrOperator::Operator(operator) = part {
Some((OperatorIndex::new(index), operator))
} else {
None
}
})
}
fn operands(&self) -> impl Iterator<Item = (OperandIndex, &Operand<'a>)> {
self.0.iter().enumerate().filter_map(|(index, part)| {
if let OperandOrOperator::Operand(operand) = part {
Some((OperandIndex::new(index), operand))
} else {
None
}
})
}
/// Creates a subslice that contains the operands coming after `last_operator` and up to, but not including the `end` operator.
fn between_operators(&self, last_operator: Option<OperatorIndex>, end: OperatorIndex) -> &Self {
let start = last_operator.map_or(0usize, |operator| operator.right_operand().0);
Self::from_slice(&self.0[start..end.value()])
}
/// Creates a slice starting at the right operand of `index`.
fn after_operator(&self, index: OperatorIndex) -> &Self {
Self::from_slice(&self.0[index.right_operand().0..])
}
/// Returns the lowest precedence of any operator in this binary chain.
fn lowest_precedence(&self) -> OperatorPrecedence {
self.operators()
.map(|(_, operator)| operator.precedence())
.max()
.unwrap_or(OperatorPrecedence::None)
}
/// Returns the first operand in the slice.
fn first_operand(&self) -> &Operand<'a> {
match self.0.first() {
Some(OperandOrOperator::Operand(operand)) => operand,
_ => unreachable!("Expected an operand"),
}
}
/// Returns the last operand (the right most operand).
fn last_operand(&self) -> &Operand<'a> {
match self.0.last() {
Some(OperandOrOperator::Operand(operand)) => operand,
_ => unreachable!("Expected an operand"),
}
}
/// Returns the operator at the given index or `None` if it is out of bounds.
fn get_operator(&self, index: OperatorIndex) -> Option<&Operator<'a>> {
self.0
.get(index.value())
.map(OperandOrOperator::unwrap_operator)
}
}
/// Formats a binary chain slice by inserting soft line breaks before the lowest-precedence operators.
/// In other words: It splits the line before by the lowest precedence operators (and it either splits
/// all of them or none). For example, the lowest precedence operator for `a + b * c + d` is the `+` operator.
/// The expression either gets formatted as `a + b * c + d` if it fits on the line or as
/// ```python
/// a
/// + b * c
/// + d
/// ```
///
/// Notice how the formatting first splits by the lower precedence operator `+` but tries to keep the `*` operation
/// on a single line.
///
/// The formatting is recursive (with a depth of `O(operators)` where `operators` are operators with different precedences).
///
/// Comments before or after the first operand must be formatted by the caller because they shouldn't be part of the group
/// wrapping the whole binary chain. This is to avoid that `b * c` expands in the following example because of its trailing comemnt:
///
/// ```python
///
/// ( a
/// + b * c # comment
/// + d
/// )
/// ```
///
///
impl Format<PyFormatContext<'_>> for FlatBinaryExpressionSlice<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext>) -> FormatResult<()> {
// Single operand slice
if let [OperandOrOperator::Operand(operand)] = &self.0 {
return operand.expression().format().fmt(f);
}
let mut last_operator: Option<OperatorIndex> = None;
let lowest_precedence = self.lowest_precedence();
for (index, operator_part) in self.operators() {
if operator_part.precedence() == lowest_precedence {
let left = self.between_operators(last_operator, index);
let right = self.after_operator(index);
let is_pow = operator_part.symbol.is_pow()
&& is_simple_power_expression(
left.last_operand().expression(),
right.first_operand().expression(),
);
if let Some(leading) = left.first_operand().leading_binary_comments() {
leading_comments(leading).fmt(f)?;
}
in_parentheses_only_group(&left).fmt(f)?;
if let Some(trailing) = left.last_operand().trailing_binary_comments() {
trailing_comments(trailing).fmt(f)?;
}
if is_pow {
in_parentheses_only_soft_line_break().fmt(f)?;
} else {
in_parentheses_only_soft_line_break_or_space().fmt(f)?;
}
operator_part.fmt(f)?;
// Format the operator on its own line if the right side has any leading comments.
if right
.first_operand()
.has_leading_comments(f.context().comments())
|| operator_part.has_trailing_comments()
{
hard_line_break().fmt(f)?;
} else if !is_pow {
space().fmt(f)?;
}
last_operator = Some(index);
}
}
// Format the last right side
// SAFETY: It is guaranteed that the slice contains at least a operand, operator, operand sequence or:
// * the slice contains only a single operand in which case the function exits early above.
// * the slice is empty, which isn't a valid slice
// * the slice violates the operand, operator, operand constraint, in which case the error already happened earlier.
let right = self.after_operator(last_operator.unwrap());
if let Some(leading) = right.first_operand().leading_binary_comments() {
leading_comments(leading).fmt(f)?;
}
in_parentheses_only_group(&right).fmt(f)
}
}
/// Either an [`Operand`] or [`Operator`]
#[derive(Debug)]
enum OperandOrOperator<'a> {
Operand(Operand<'a>),
Operator(Operator<'a>),
}
impl<'a> OperandOrOperator<'a> {
fn unwrap_operand(&self) -> &Operand<'a> {
match self {
OperandOrOperator::Operand(operand) => operand,
OperandOrOperator::Operator(operator) => {
panic!("Expected operand but found operator {operator:?}.")
}
}
}
fn unwrap_operator(&self) -> &Operator<'a> {
match self {
OperandOrOperator::Operator(operator) => operator,
OperandOrOperator::Operand(operand) => {
panic!("Expected operator but found operand {operand:?}.")
}
}
}
}
#[derive(Debug)]
enum Operand<'a> {
/// Operand that used to be on the left side of a binary operation.
Left {
expression: &'a Expr,
/// Leading comments of the outer most binary expression that starts at this node.
leading_comments: &'a [SourceComment],
},
Right {
expression: &'a Expr,
/// Trailing comments of the outer most binary expression that ends at this operand.
trailing_comments: &'a [SourceComment],
},
}
impl<'a> Operand<'a> {
fn expression(&self) -> &'a Expr {
match self {
Operand::Left { expression, .. } => expression,
Operand::Right { expression, .. } => expression,
}
}
fn has_leading_comments(&self, comments: &Comments) -> bool {
match self {
Operand::Left {
leading_comments, ..
} => !leading_comments.is_empty(),
Operand::Right { expression, .. } => comments.has_leading(*expression),
}
}
/// Comments of the outer-most enclosing binary expression.
fn leading_binary_comments(&self) -> Option<&'a [SourceComment]> {
match self {
Operand::Left {
leading_comments, ..
} => Some(leading_comments),
Operand::Right { .. } => None,
}
}
fn trailing_binary_comments(&self) -> Option<&'a [SourceComment]> {
match self {
Operand::Left { .. } => None,
Operand::Right {
trailing_comments, ..
} => Some(trailing_comments),
}
}
}
#[derive(Debug)]
struct Operator<'a> {
symbol: ruff_python_ast::Operator,
trailing_comments: &'a [SourceComment],
}
impl Operator<'_> {
fn precedence(&self) -> OperatorPrecedence {
OperatorPrecedence::from(self.symbol)
}
fn has_trailing_comments(&self) -> bool {
!self.trailing_comments.is_empty()
}
}
impl Format<PyFormatContext<'_>> for Operator<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
write!(
f,
[
self.symbol.format(),
trailing_comments(self.trailing_comments)
]
)
}
}
/// Index of an Operand in the [`FlatBinaryExpressionSlice`].
#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
struct OperandIndex(usize);
impl OperandIndex {
fn new(index: usize) -> Self {
debug_assert_eq!(index % 2, 0, "Operand indices must be even positions");
Self(index)
}
/// Returns the index of the operator directly left to this operand. Returns [`None`] if
/// this is the first operand in the call chain.
fn left_operator(self) -> Option<OperatorIndex> {
if self.0 == 0 {
None
} else {
Some(OperatorIndex::new(self.0 - 1))
}
}
/// Returns the index of the operand's right operator. The method always returns an index
/// even if the operand has no right operator. Use [`BinaryCallChain::get_operator`] to test if
/// the operand has a right operator.
fn right_operator(self) -> OperatorIndex {
OperatorIndex::new(self.0 + 1)
}
}
/// Index of an Operator in the [`FlatBinaryExpressionSlice`].
#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
struct OperatorIndex(NonZeroUsize);
impl OperatorIndex {
fn new(index: usize) -> Self {
assert_eq!(index % 2, 1, "Operator indices must be odd positions");
// SAFETY A value with a module 0 is guaranteed to never equal 0
#[allow(unsafe_code)]
Self(unsafe { NonZeroUsize::new_unchecked(index) })
}
const fn value(self) -> usize {
self.0.get()
}
fn right_operand(self) -> OperandIndex {
OperandIndex::new(self.value() + 1)
}
}
impl<'a> Index<OperatorIndex> for FlatBinaryExpressionSlice<'a> {
type Output = Operator<'a>;
fn index(&self, index: OperatorIndex) -> &Self::Output {
self.0[index.value()].unwrap_operator()
}
}
impl<'a> Index<OperandIndex> for FlatBinaryExpressionSlice<'a> {
type Output = Operand<'a>;
fn index(&self, index: OperandIndex) -> &Self::Output {
self.0[index.0].unwrap_operand()
}
}
mod size_assertion {
use super::{FlatBinaryExpressionSlice, OperandOrOperator, OperatorIndex};
static_assertions::assert_eq_size!(Option<OperatorIndex>, OperatorIndex);
static_assertions::assert_eq_size!(&FlatBinaryExpressionSlice, &[OperandOrOperator]);
static_assertions::assert_eq_align!(&FlatBinaryExpressionSlice, &[OperandOrOperator]);
}

View file

@ -1,146 +1,20 @@
use std::iter;
use smallvec::SmallVec;
use ruff_formatter::{format_args, write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{
Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, Operator, StringConstant,
UnaryOp,
};
use ruff_python_ast::{Expr, ExprBinOp};
use crate::comments::{trailing_comments, trailing_node_comments, SourceComment};
use crate::expression::expr_constant::{is_multiline_string, ExprConstantLayout};
use crate::comments::SourceComment;
use crate::expression::binary_like::BinaryLike;
use crate::expression::expr_constant::is_multiline_string;
use crate::expression::has_parentheses;
use crate::expression::parentheses::{
in_parentheses_only_group, in_parentheses_only_soft_line_break,
in_parentheses_only_soft_line_break_or_space, is_expression_parenthesized, parenthesized,
NeedsParentheses, OptionalParentheses,
};
use crate::expression::string::StringLayout;
use crate::expression::OperatorPrecedence;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
#[derive(Default)]
pub struct FormatExprBinOp;
impl FormatNodeRule<ExprBinOp> for FormatExprBinOp {
#[inline]
fn fmt_fields(&self, item: &ExprBinOp, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
match Self::layout(item, f.context()) {
BinOpLayout::LeftString(expression) => {
let right_has_leading_comment = comments.has_leading(item.right.as_ref());
let format_right_and_op = format_with(|f| {
if right_has_leading_comment {
space().fmt(f)?;
} else {
soft_line_break_or_space().fmt(f)?;
}
item.op.format().fmt(f)?;
if right_has_leading_comment {
hard_line_break().fmt(f)?;
} else {
space().fmt(f)?;
}
group(&item.right.format()).fmt(f)
});
let format_left = format_with(|f: &mut PyFormatter| {
let format_string =
expression.format().with_options(ExprConstantLayout::String(
StringLayout::ImplicitConcatenatedBinaryLeftSide,
));
if is_expression_parenthesized(expression.into(), f.context().source()) {
parenthesized("(", &format_string, ")").fmt(f)
} else {
format_string.fmt(f)
}
});
group(&format_args![format_left, group(&format_right_and_op)]).fmt(f)
}
BinOpLayout::Default => {
let format_inner = format_with(|f: &mut PyFormatter| {
let source = f.context().source();
let precedence = OperatorPrecedence::from(item.op);
let binary_chain: SmallVec<[&ExprBinOp; 4]> =
iter::successors(Some(item), |parent| {
parent.left.as_bin_op_expr().and_then(|bin_expression| {
if OperatorPrecedence::from(bin_expression.op) != precedence
|| is_expression_parenthesized(bin_expression.into(), source)
{
None
} else {
Some(bin_expression)
}
})
})
.collect();
// SAFETY: `binary_chain` is guaranteed not to be empty because it always contains the current expression.
let left_most = binary_chain.last().unwrap();
// Format the left most expression
in_parentheses_only_group(&left_most.left.format()).fmt(f)?;
// Iterate upwards in the binary expression tree and, for each level, format the operator
// and the right expression.
for current in binary_chain.into_iter().rev() {
let ExprBinOp {
range: _,
left: _,
op,
right,
} = current;
let operator_comments = comments.dangling(current);
let needs_space = !is_simple_power_expression(current);
let before_operator_space = if needs_space {
in_parentheses_only_soft_line_break_or_space()
} else {
in_parentheses_only_soft_line_break()
};
write!(
f,
[
before_operator_space,
op.format(),
trailing_comments(operator_comments),
]
)?;
// Format the operator on its own line if the right side has any leading comments.
if comments.has_leading(right.as_ref()) || !operator_comments.is_empty() {
hard_line_break().fmt(f)?;
} else if needs_space {
space().fmt(f)?;
}
in_parentheses_only_group(&right.format()).fmt(f)?;
// It's necessary to format the trailing comments because the code bypasses
// `FormatNodeRule::fmt` for the nested binary expressions.
// Don't call the formatting function for the most outer binary expression because
// these comments have already been formatted.
if current != item {
trailing_node_comments(current).fmt(f)?;
}
}
Ok(())
});
in_parentheses_only_group(&format_inner).fmt(f)
}
}
BinaryLike(item).fmt(f)
}
fn fmt_dangling_comments(
@ -153,117 +27,6 @@ impl FormatNodeRule<ExprBinOp> for FormatExprBinOp {
}
}
impl FormatExprBinOp {
fn layout<'a>(bin_op: &'a ExprBinOp, context: &PyFormatContext) -> BinOpLayout<'a> {
if let Some(
constant @ ExprConstant {
value:
Constant::Str(StringConstant {
implicit_concatenated: true,
..
}),
..
},
) = bin_op.left.as_constant_expr()
{
let comments = context.comments();
if bin_op.op == Operator::Mod
&& context.node_level().is_parenthesized()
&& !comments.has_dangling(constant)
&& !comments.has_dangling(bin_op)
{
BinOpLayout::LeftString(constant)
} else {
BinOpLayout::Default
}
} else {
BinOpLayout::Default
}
}
}
const fn is_simple_power_expression(expr: &ExprBinOp) -> bool {
expr.op.is_pow() && is_simple_power_operand(&expr.left) && is_simple_power_operand(&expr.right)
}
/// Return `true` if an [`Expr`] adheres to [Black's definition](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#line-breaks-binary-operators)
/// of a non-complex expression, in the context of a power operation.
const fn is_simple_power_operand(expr: &Expr) -> bool {
match expr {
Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::Not, ..
}) => false,
Expr::Constant(ExprConstant {
value: Constant::Complex { .. } | Constant::Float(_) | Constant::Int(_),
..
}) => true,
Expr::Name(_) => true,
Expr::UnaryOp(ExprUnaryOp { operand, .. }) => is_simple_power_operand(operand),
Expr::Attribute(ExprAttribute { value, .. }) => is_simple_power_operand(value),
_ => false,
}
}
#[derive(Copy, Clone, Debug)]
enum BinOpLayout<'a> {
Default,
/// Specific layout for an implicit concatenated string using the "old" c-style formatting.
///
/// ```python
/// (
/// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa %s"
/// "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb %s" % (a, b)
/// )
/// ```
///
/// Prefers breaking the string parts over breaking in front of the `%` because it looks better if it
/// is kept on the same line.
LeftString(&'a ExprConstant),
}
#[derive(Copy, Clone)]
pub struct FormatOperator;
impl<'ast> AsFormat<PyFormatContext<'ast>> for Operator {
type Format<'a> = FormatRefWithRule<'a, Operator, FormatOperator, PyFormatContext<'ast>>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(self, FormatOperator)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for Operator {
type Format = FormatOwnedWithRule<Operator, FormatOperator, PyFormatContext<'ast>>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(self, FormatOperator)
}
}
impl FormatRule<Operator, PyFormatContext<'_>> for FormatOperator {
fn fmt(&self, item: &Operator, f: &mut PyFormatter) -> FormatResult<()> {
let operator = match item {
Operator::Add => "+",
Operator::Sub => "-",
Operator::Mult => "*",
Operator::MatMult => "@",
Operator::Div => "/",
Operator::Mod => "%",
Operator::Pow => "**",
Operator::LShift => "<<",
Operator::RShift => ">>",
Operator::BitOr => "|",
Operator::BitXor => "^",
Operator::BitAnd => "&",
Operator::FloorDiv => "//",
};
token(operator).fmt(f)
}
}
impl NeedsParentheses for ExprBinOp {
fn needs_parentheses(
&self,

View file

@ -19,6 +19,7 @@ use crate::expression::parentheses::{
};
use crate::prelude::*;
mod binary_like;
pub(crate) mod expr_attribute;
pub(crate) mod expr_await;
pub(crate) mod expr_bin_op;
@ -48,6 +49,7 @@ pub(crate) mod expr_unary_op;
pub(crate) mod expr_yield;
pub(crate) mod expr_yield_from;
pub(crate) mod number;
mod operator;
pub(crate) mod parentheses;
pub(crate) mod string;
@ -784,7 +786,7 @@ pub(crate) fn has_own_parentheses(
}
/// The precedence of [python operators](https://docs.python.org/3/reference/expressions.html#operator-precedence) from
/// lowest to highest priority.
/// highest to lowest priority.
///
/// Ruff uses the operator precedence to decide in which order to split operators:
/// Operators with a lower precedence split before higher-precedence operators.

View file

@ -0,0 +1,44 @@
use crate::prelude::*;
use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::Operator;
#[derive(Copy, Clone)]
pub struct FormatOperator;
impl<'ast> AsFormat<PyFormatContext<'ast>> for Operator {
type Format<'a> = FormatRefWithRule<'a, Operator, FormatOperator, PyFormatContext<'ast>>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(self, FormatOperator)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for Operator {
type Format = FormatOwnedWithRule<Operator, FormatOperator, PyFormatContext<'ast>>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(self, FormatOperator)
}
}
impl FormatRule<Operator, PyFormatContext<'_>> for FormatOperator {
fn fmt(&self, item: &Operator, f: &mut PyFormatter) -> FormatResult<()> {
let operator = match item {
Operator::Add => "+",
Operator::Sub => "-",
Operator::Mult => "*",
Operator::MatMult => "@",
Operator::Div => "/",
Operator::Mod => "%",
Operator::Pow => "**",
Operator::LShift => "<<",
Operator::RShift => ">>",
Operator::BitOr => "|",
Operator::BitXor => "^",
Operator::BitAnd => "&",
Operator::FloorDiv => "//",
};
token(operator).fmt(f)
}
}

View file

@ -314,23 +314,41 @@ pub(crate) struct FormatInParenthesesOnlyGroup<'content, 'ast> {
impl<'ast> Format<PyFormatContext<'ast>> for FormatInParenthesesOnlyGroup<'_, 'ast> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'ast>>) -> FormatResult<()> {
match f.context().node_level() {
NodeLevel::Expression(Some(parentheses_id)) => {
// If this content is enclosed by a group that adds the optional parentheses, then *disable*
// this group *except* if the optional parentheses are shown.
conditional_group(
&Arguments::from(&self.content),
Condition::if_group_breaks(parentheses_id),
)
.fmt(f)
}
NodeLevel::ParenthesizedExpression => {
// Unconditionally group the content if it is not enclosed by an optional parentheses group.
group(&Arguments::from(&self.content)).fmt(f)
}
NodeLevel::Expression(None) | NodeLevel::TopLevel | NodeLevel::CompoundStatement => {
Arguments::from(&self.content).fmt(f)
}
write_in_parentheses_only_group_start_tag(f);
Arguments::from(&self.content).fmt(f)?;
write_in_parentheses_only_group_end_tag(f);
Ok(())
}
}
pub(super) fn write_in_parentheses_only_group_start_tag(f: &mut PyFormatter) {
match f.context().node_level() {
NodeLevel::Expression(Some(parentheses_id)) => {
f.write_element(FormatElement::Tag(tag::Tag::StartConditionalGroup(
tag::ConditionalGroup::new(Condition::if_group_breaks(parentheses_id)),
)));
}
NodeLevel::ParenthesizedExpression => {
// Unconditionally group the content if it is not enclosed by an optional parentheses group.
f.write_element(FormatElement::Tag(tag::Tag::StartGroup(tag::Group::new())));
}
NodeLevel::Expression(None) | NodeLevel::TopLevel | NodeLevel::CompoundStatement => {
// No group
}
}
}
pub(super) fn write_in_parentheses_only_group_end_tag(f: &mut PyFormatter) {
match f.context().node_level() {
NodeLevel::Expression(Some(_)) => {
f.write_element(FormatElement::Tag(tag::Tag::EndConditionalGroup));
}
NodeLevel::ParenthesizedExpression => {
// Unconditionally group the content if it is not enclosed by an optional parentheses group.
f.write_element(FormatElement::Tag(tag::Tag::EndGroup));
}
NodeLevel::Expression(None) | NodeLevel::TopLevel | NodeLevel::CompoundStatement => {
// No group
}
}
}

View file

@ -89,7 +89,11 @@ pub enum StringLayout {
#[default]
Default,
DocString,
ImplicitConcatenatedBinaryLeftSide,
/// An implicit concatenated string in a binary like (e.g. `a + b` or `a < b`) expression.
///
/// Formats the implicit concatenated string parts without the enclosing group because the group
/// is added by the binary like formatting.
ImplicitConcatenatedStringInBinaryLike,
}
impl<'a> FormatString<'a> {
@ -135,7 +139,7 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
);
format_docstring(&string_part, f)
}
StringLayout::ImplicitConcatenatedBinaryLeftSide => {
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
}

View file

@ -168,10 +168,9 @@ pub fn format_node<'a>(
}
/// Public function for generating a printable string of the debug comments.
pub fn pretty_comments(formatted: &Formatted<PyFormatContext>, source: &str) -> String {
let comments = formatted.context().comments();
pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
// When comments are empty we'd display an empty map '{}'
std::format!(
"{comments:#?}",
comments = comments.debug(SourceCode::new(source))
@ -217,10 +216,12 @@ if True:
#[test]
fn quick_test() {
let src = r#"
for converter in connection.ops.get_db_converters(
expression
) + expression.get_db_converters(connection):
...
(header.timecnt * 5 # Transition times and types
+ header.typecnt * 6 # Local time type records
+ header.charcnt # Time zone designations
+ header.leapcnt * 8 # Leap second records
+ header.isstdcnt # Standard/wall indicators
+ header.isutcnt) # UT/local indicators
"#;
// Tokenize once
let mut tokens = Vec::new();
@ -244,9 +245,9 @@ for converter in connection.ops.get_db_converters(
// Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR
// inside of a `Format` implementation
// use ruff_formatter::FormatContext;
// formatted
// dbg!(formatted
// .document()
// .display(formatted.context().source_code());
// .display(formatted.context().source_code()));
//
// dbg!(formatted
// .context()

View file

@ -319,6 +319,21 @@ expected_content = (
self.base_url
)
)
rowuses = [(1 << j) | # column ordinal
(1 << (n + i-j + n-1)) | # NW-SE ordinal
(1 << (n + 2*n-1 + i+j)) # NE-SW ordinal
for j in rangen]
skip_bytes = (
header.timecnt * 5 # Transition times and types
+ header.typecnt * 6 # Local time type records
+ header.charcnt # Time zone designations
+ header.leapcnt * 8 # Leap second records
+ header.isstdcnt # Standard/wall indicators
+ header.isutcnt # UT/local indicators
)
```
## Output
@ -697,6 +712,25 @@ expected_content = (
self.base_url
)
)
rowuses = [
(
1 << j # column ordinal
)
| (1 << (n + i - j + n - 1)) # NW-SE ordinal
| (1 << (n + 2 * n - 1 + i + j)) # NE-SW ordinal
for j in rangen
]
skip_bytes = (
header.timecnt * 5 # Transition times and types
+ header.typecnt * 6 # Local time type records
+ header.charcnt # Time zone designations
+ header.leapcnt * 8 # Leap second records
+ header.isstdcnt # Standard/wall indicators
+ header.isutcnt # UT/local indicators
)
```

View file

@ -124,6 +124,48 @@ def test3():
"(CASE WHEN JSON_TYPE(%s, %%s) IN (%s) "
"THEN JSON_TYPE(%s, %%s) ELSE JSON_EXTRACT(%s, %%s) END)"
) % (lhs, datatype_values, lhs, lhs), (tuple(params) + (json_path,)) * 3
c = (a +
# test leading binary comment
"a" "b" * b
)
c = (a *
# test leading comment
"a" "b" + b
)
c = (a
+ # test trailing comment
"a" "b" * b
)
c = (a
+
"a" "b" # test trailing comment
* b
)
c = (a
*
"a" "b" # test trailing binary comment
+ b
)
c = (a
*
"a" "b"
+ # test trailing operator comment
b
)
c = (a
*
"a" "b"
+
# test trailing operator comment
b
)
```
## Output
@ -215,18 +257,13 @@ self._assert_skipping(
(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
"cccccccccccccccccccccccccc" % aaaaaaaaaaaa
+ x
"cccccccccccccccccccccccccc" % aaaaaaaaaaaa + x
)
(
b
+ c
+ d
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
b + c + d + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
"cccccccccccccccccccccccccc" % aaaaaaaaaaaa
+ x
"cccccccccccccccccccccccccc" % aaaaaaaaaaaa + x
)
(
@ -243,8 +280,7 @@ self._assert_skipping(
self.assertEqual(
response.status_code,
status_code,
msg_prefix
+ "Couldn't retrieve content: Response code was %d"
msg_prefix + "Couldn't retrieve content: Response code was %d"
" (expected %d)" % (response.status_code, status_code),
)
@ -268,6 +304,54 @@ def test3():
"(CASE WHEN JSON_TYPE(%s, %%s) IN (%s) "
"THEN JSON_TYPE(%s, %%s) ELSE JSON_EXTRACT(%s, %%s) END)"
) % (lhs, datatype_values, lhs, lhs), (tuple(params) + (json_path,)) * 3
c = (
a +
# test leading binary comment
"a"
"b" * b
)
c = (
a *
# test leading comment
"a"
"b" + b
)
c = (
a
+ # test trailing comment
"a"
"b" * b
)
c = (
a + "a"
"b" # test trailing comment
* b
)
c = (
a * "a"
"b" # test trailing binary comment
+ b
)
c = (
a * "a"
"b"
+ # test trailing operator comment
b
)
c = (
a * "a"
"b" +
# test trailing operator comment
b
)
```

View file

@ -222,8 +222,7 @@ b"Let's" b"start" b"with" b"a" b"simple" b"example" b"now repeat after me:" b"I
)
if (
a
+ b"Let's"
a + b"Let's"
b"start"
b"with"
b"a"
@ -371,8 +370,7 @@ b"Let's" b'start' b'with' b'a' b'simple' b'example' b'now repeat after me:' b'I
)
if (
a
+ b"Let's"
a + b"Let's"
b'start'
b'with'
b'a'

View file

@ -240,8 +240,7 @@ String \"\"\"
)
if (
a
+ "Let's"
a + "Let's"
"start"
"with"
"a"
@ -410,8 +409,7 @@ String \"\"\"
)
if (
a
+ "Let's"
a + "Let's"
'start'
'with'
'a'