Add optimized best_fit_parenthesize IR (#7475)

This commit is contained in:
Micha Reiser 2023-09-19 08:29:05 +02:00 committed by GitHub
parent 28b48ab902
commit 6a4dbd622b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 413 additions and 96 deletions

View file

@ -70,3 +70,32 @@ def test():
if True:
VLM_m2m = VLM.m2m_also_quite_long_zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.through
allows_group_by_select_index = self.connection.features.allows_group_by_select_index
# This is a deviation from Black:
# Black keeps the comment inside of the parentheses, making it more likely to exceed the line width.
# Ruff renders the comment after the parentheses, giving it more space to fit.
if True:
if True:
if True:
# Black layout
model.config.use_cache = (
False # FSTM still requires this hack -> FSTM should probably be refactored s
)
# Ruff layout
model.config.use_cache = False # FSTM still requires this hack -> FSTM should probably be refactored s
# Regression test for https://github.com/astral-sh/ruff/issues/7463
mp3fmt="<span style=\"color: grey\"><a href=\"{}\" id=\"audiolink\">listen</a></span></br>\n"
# Regression test for https://github.com/astral-sh/ruff/issues/7067
def f():
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa = (
True
)
# Regression test for https://github.com/astral-sh/ruff/issues/7462
if grid is not None:
rgrid = (rgrid.rio.reproject_match(grid, nodata=fillvalue) # rio.reproject nodata is use to initlialize the destination array
.where(~grid.isnull()))

View file

@ -5,7 +5,7 @@ use ruff_text_size::{Ranged, TextLen, TextRange};
use crate::comments::SourceComment;
use crate::expression::number::{FormatComplex, FormatFloat, FormatInt};
use crate::expression::parentheses::{should_use_best_fit, NeedsParentheses, OptionalParentheses};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::string::{
AnyString, FormatString, StringLayout, StringPrefix, StringQuotes,
};
@ -76,16 +76,10 @@ impl NeedsParentheses for ExprConstant {
) -> OptionalParentheses {
if self.value.is_implicit_concatenated() {
OptionalParentheses::Multiline
} else if is_multiline_string(self, context.source())
|| self.value.is_none()
|| self.value.is_bool()
|| self.value.is_ellipsis()
{
} else if is_multiline_string(self, context.source()) {
OptionalParentheses::Never
} else if should_use_best_fit(self, context) {
OptionalParentheses::BestFit
} else {
OptionalParentheses::Never
OptionalParentheses::BestFit
}
}
}

View file

@ -4,7 +4,7 @@ use ruff_formatter::FormatResult;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::ExprFString;
use crate::expression::parentheses::{should_use_best_fit, NeedsParentheses, OptionalParentheses};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
use super::string::{AnyString, FormatString};
@ -26,9 +26,7 @@ impl NeedsParentheses for ExprFString {
) -> OptionalParentheses {
if self.implicit_concatenated {
OptionalParentheses::Multiline
} else if memchr2(b'\n', b'\r', context.source()[self.range].as_bytes()).is_none()
&& should_use_best_fit(self, context)
{
} else if memchr2(b'\n', b'\r', context.source()[self.range].as_bytes()).is_none() {
OptionalParentheses::BestFit
} else {
OptionalParentheses::Never

View file

@ -3,7 +3,7 @@ use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::ExprName;
use crate::comments::SourceComment;
use crate::expression::parentheses::{should_use_best_fit, NeedsParentheses, OptionalParentheses};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
#[derive(Default)]
@ -38,13 +38,9 @@ impl NeedsParentheses for ExprName {
fn needs_parentheses(
&self,
_parent: AnyNodeRef,
context: &PyFormatContext,
_context: &PyFormatContext,
) -> OptionalParentheses {
if should_use_best_fit(self, context) {
OptionalParentheses::BestFit
} else {
OptionalParentheses::Never
}
OptionalParentheses::BestFit
}
}

View file

@ -3,7 +3,7 @@ use std::cmp::Ordering;
use itertools::Itertools;
use ruff_formatter::{
format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions,
write, FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions,
};
use ruff_python_ast as ast;
use ruff_python_ast::node::AnyNodeRef;
@ -195,8 +195,9 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
f.context().source(),
);
let has_comments =
comments.has_leading(*expression) || comments.has_trailing_own_line(*expression);
let node_comments = comments.leading_dangling_trailing(*expression);
let has_comments = node_comments.has_leading() || node_comments.has_trailing_own_line();
// If the expression has comments, we always want to preserve the parentheses. This also
// ensures that we correctly handle parenthesized comments, and don't need to worry about
@ -245,53 +246,16 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
expression.format().with_options(Parentheses::Never).fmt(f)
}
Parenthesize::IfBreaks => {
let group_id = f.group_id("optional_parentheses");
let f = &mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f);
let mut format_expression = expression
.format()
.with_options(Parentheses::Never)
.memoized();
// Don't use best fitting if it is known that the expression can never fit
if format_expression.inspect(f)?.will_break() {
// The group here is necessary because `format_expression` may contain IR elements
// that refer to the group id
group(&format_args![
token("("),
soft_block_indent(&format_expression),
token(")")
])
.with_group_id(Some(group_id))
.fmt(f)
if node_comments.has_trailing() {
expression.format().with_options(Parentheses::Always).fmt(f)
} else {
// Only add parentheses if it makes the expression fit on the line.
// Using the flat version as the most expanded version gives a left-to-right splitting behavior
// which differs from when using regular groups, because they split right-to-left.
best_fitting![
// ---------------------------------------------------------------------
// Variant 1:
// Try to fit the expression without any parentheses
group(&format_expression).with_group_id(Some(group_id)),
// ---------------------------------------------------------------------
// Variant 2:
// Try to fit the expression by adding parentheses and indenting the expression.
group(&format_args![
token("("),
soft_block_indent(&format_expression),
token(")")
])
.with_group_id(Some(group_id))
.should_expand(true),
// ---------------------------------------------------------------------
// Variant 3: Fallback, no parentheses
// Expression doesn't fit regardless of adding the parentheses. Remove the parentheses again.
group(&format_expression)
.with_group_id(Some(group_id))
.should_expand(true)
]
// Measure all lines, to avoid that the printer decides that this fits right after hitting
// the `(`.
.with_mode(BestFittingMode::AllLines)
// The group id is necessary because the nested expressions may reference it.
let group_id = f.group_id("optional_parentheses");
let f = &mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f);
ruff_formatter::prelude::best_fit_parenthesize(
&expression.format().with_options(Parentheses::Never),
)
.with_group_id(Some(group_id))
.fmt(f)
}
}

View file

@ -1,5 +1,5 @@
use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments, FormatContext, FormatOptions};
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::ExpressionRef;
use ruff_python_trivia::CommentRanges;
@ -24,35 +24,14 @@ pub(crate) enum OptionalParentheses {
Always,
/// Add parentheses if it helps to make this expression fit. Otherwise never add parentheses.
/// This mode should only be used for expressions that don't have their own split points, e.g. identifiers,
/// or constants.
/// This mode should only be used for expressions that don't have their own split points to the left, e.g. identifiers,
/// or constants, calls starting with an identifier, etc.
BestFit,
/// Never add parentheses. Use it for expressions that have their own parentheses or if the expression body always spans multiple lines (multiline strings).
Never,
}
pub(super) fn should_use_best_fit<T>(value: T, context: &PyFormatContext) -> bool
where
T: Ranged,
{
let text_len = context.source()[value.range()].len();
// Only use best fits if:
// * The text is longer than 5 characters:
// This is to align the behavior with `True` and `False`, that don't use best fits and are 5 characters long.
// It allows to avoid [`OptionalParentheses::BestFit`] for most numbers and common identifiers like `self`.
// The downside is that it can result in short values not being parenthesized if they exceed the line width.
// This is considered an edge case not worth the performance penalty and IMO, breaking an identifier
// of 5 characters to avoid it exceeding the line width by 1 reduces the readability.
// * The text is know to never fit: The text can never fit even when parenthesizing if it is longer
// than the configured line width (minus indent).
text_len > 5
&& text_len
<= context.options().line_width().value() as usize
- context.options().indent_width().value() as usize
}
pub(crate) trait NeedsParentheses {
/// Determines if this object needs optional parentheses or if it is safe to omit the parentheses.
fn needs_parentheses(

View file

@ -76,6 +76,35 @@ def test():
if True:
VLM_m2m = VLM.m2m_also_quite_long_zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.through
allows_group_by_select_index = self.connection.features.allows_group_by_select_index
# This is a deviation from Black:
# Black keeps the comment inside of the parentheses, making it more likely to exceed the line width.
# Ruff renders the comment after the parentheses, giving it more space to fit.
if True:
if True:
if True:
# Black layout
model.config.use_cache = (
False # FSTM still requires this hack -> FSTM should probably be refactored s
)
# Ruff layout
model.config.use_cache = False # FSTM still requires this hack -> FSTM should probably be refactored s
# Regression test for https://github.com/astral-sh/ruff/issues/7463
mp3fmt="<span style=\"color: grey\"><a href=\"{}\" id=\"audiolink\">listen</a></span></br>\n"
# Regression test for https://github.com/astral-sh/ruff/issues/7067
def f():
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa = (
True
)
# Regression test for https://github.com/astral-sh/ruff/issues/7462
if grid is not None:
rgrid = (rgrid.rio.reproject_match(grid, nodata=fillvalue) # rio.reproject nodata is use to initlialize the destination array
.where(~grid.isnull()))
```
## Output
@ -140,7 +169,9 @@ for converter in connection.ops.get_db_converters(
...
aaa = bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb # awkward comment
aaa = (
bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb # awkward comment
)
def test():
@ -167,6 +198,44 @@ def test():
allows_group_by_select_index = (
self.connection.features.allows_group_by_select_index
)
# This is a deviation from Black:
# Black keeps the comment inside of the parentheses, making it more likely to exceed the line width.
# Ruff renders the comment after the parentheses, giving it more space to fit.
if True:
if True:
if True:
# Black layout
model.config.use_cache = (
False # FSTM still requires this hack -> FSTM should probably be refactored s
)
# Ruff layout
model.config.use_cache = (
False
) # FSTM still requires this hack -> FSTM should probably be refactored s
# Regression test for https://github.com/astral-sh/ruff/issues/7463
mp3fmt = (
'<span style="color: grey"><a href="{}" id="audiolink">listen</a></span></br>\n'
)
# Regression test for https://github.com/astral-sh/ruff/issues/7067
def f():
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa = (
True
)
# Regression test for https://github.com/astral-sh/ruff/issues/7462
if grid is not None:
rgrid = rgrid.rio.reproject_match(
grid, nodata=fillvalue
).where( # rio.reproject nodata is use to initlialize the destination array
~grid.isnull()
)
```