Hug multiline-strings preview style (#9243)

This commit is contained in:
Micha Reiser 2024-01-10 12:47:34 +01:00 committed by GitHub
parent 6be73322da
commit ac02d3aedd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 590 additions and 215 deletions

View file

@ -394,12 +394,12 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
f,
[
operand.leading_binary_comments().map(leading_comments),
leading_comments(comments.leading(&string_constant)),
leading_comments(comments.leading(string_constant)),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
trailing_comments(comments.trailing(string_constant)),
operand.trailing_binary_comments().map(trailing_comments),
line_suffix_boundary(),
]
@ -413,12 +413,12 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
write!(
f,
[
leading_comments(comments.leading(&string_constant)),
leading_comments(comments.leading(string_constant)),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
trailing_comments(comments.trailing(string_constant)),
]
)?;
}

View file

@ -3,10 +3,10 @@ use ruff_python_ast::ExprBinOp;
use crate::comments::SourceComment;
use crate::expression::binary_like::BinaryLike;
use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::has_parentheses;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
use crate::string::AnyString;
#[derive(Default)]
pub struct FormatExprBinOp;
@ -35,13 +35,13 @@ impl NeedsParentheses for ExprBinOp {
) -> OptionalParentheses {
if parent.is_expr_await() {
OptionalParentheses::Always
} else if let Some(literal_expr) = self.left.as_literal_expr() {
} else if let Some(string) = AnyString::from_expression(&self.left) {
// Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses
if !literal_expr.is_implicit_concatenated()
&& is_multiline_string(literal_expr.into(), context.source())
if !string.is_implicit_concatenated()
&& string.is_multiline(context.source())
&& has_parentheses(&self.right, context).is_some()
&& !context.comments().has_dangling(self)
&& !context.comments().has(literal_expr)
&& !context.comments().has(string)
&& !context.comments().has(self.right.as_ref())
{
OptionalParentheses::Never

View file

@ -2,7 +2,6 @@ use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprBytesLiteral;
use crate::comments::SourceComment;
use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
@ -41,7 +40,7 @@ impl NeedsParentheses for ExprBytesLiteral {
) -> OptionalParentheses {
if self.value.is_implicit_concatenated() {
OptionalParentheses::Multiline
} else if is_multiline_string(self.into(), context.source()) {
} else if AnyString::Bytes(self).is_multiline(context.source()) {
OptionalParentheses::Never
} else {
OptionalParentheses::BestFit

View file

@ -4,10 +4,10 @@ use ruff_python_ast::{CmpOp, ExprCompare};
use crate::comments::SourceComment;
use crate::expression::binary_like::BinaryLike;
use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::has_parentheses;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
use crate::string::AnyString;
#[derive(Default)]
pub struct FormatExprCompare;
@ -37,11 +37,11 @@ impl NeedsParentheses for ExprCompare {
) -> OptionalParentheses {
if parent.is_expr_await() {
OptionalParentheses::Always
} else if let Some(literal_expr) = self.left.as_literal_expr() {
} else if let Some(string) = AnyString::from_expression(&self.left) {
// Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses
if !literal_expr.is_implicit_concatenated()
&& is_multiline_string(literal_expr.into(), context.source())
&& !context.comments().has(literal_expr)
if !string.is_implicit_concatenated()
&& string.is_multiline(context.source())
&& !context.comments().has(string)
&& self.comparators.first().is_some_and(|right| {
has_parentheses(right, context).is_some() && !context.comments().has(right)
})

View file

@ -1,5 +1,3 @@
use memchr::memchr2;
use ruff_python_ast::{AnyNodeRef, ExprFString};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
@ -50,10 +48,10 @@ impl NeedsParentheses for ExprFString {
) -> OptionalParentheses {
if self.value.is_implicit_concatenated() {
OptionalParentheses::Multiline
} else if memchr2(b'\n', b'\r', context.source()[self.range].as_bytes()).is_none() {
OptionalParentheses::BestFit
} else {
} else if AnyString::FString(self).is_multiline(context.source()) {
OptionalParentheses::Never
} else {
OptionalParentheses::BestFit
}
}
}

View file

@ -1,6 +1,5 @@
use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::{AnyNodeRef, ExprStringLiteral};
use ruff_text_size::{Ranged, TextLen, TextRange};
use crate::comments::SourceComment;
use crate::expression::parentheses::{
@ -8,7 +7,7 @@ use crate::expression::parentheses::{
};
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation, StringPrefix, StringQuotes};
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Default)]
pub struct FormatExprStringLiteral {
@ -80,24 +79,10 @@ impl NeedsParentheses for ExprStringLiteral {
) -> OptionalParentheses {
if self.value.is_implicit_concatenated() {
OptionalParentheses::Multiline
} else if is_multiline_string(self.into(), context.source()) {
} else if AnyString::String(self).is_multiline(context.source()) {
OptionalParentheses::Never
} else {
OptionalParentheses::BestFit
}
}
}
pub(super) fn is_multiline_string(expr: AnyNodeRef, source: &str) -> bool {
if expr.is_expr_string_literal() || expr.is_expr_bytes_literal() {
let contents = &source[expr.range()];
let prefix = StringPrefix::parse(contents);
let quotes =
StringQuotes::parse(&contents[TextRange::new(prefix.text_len(), contents.text_len())]);
quotes.is_some_and(StringQuotes::is_triple)
&& memchr::memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
} else {
false
}
}

View file

@ -17,11 +17,14 @@ use crate::context::{NodeLevel, WithNodeLevel};
use crate::expression::expr_generator_exp::is_generator_parenthesized;
use crate::expression::expr_tuple::is_tuple_parenthesized;
use crate::expression::parentheses::{
is_expression_parenthesized, optional_parentheses, parenthesized, NeedsParentheses,
OptionalParentheses, Parentheses, Parenthesize,
is_expression_parenthesized, optional_parentheses, parenthesized, HuggingStyle,
NeedsParentheses, OptionalParentheses, Parentheses, Parenthesize,
};
use crate::prelude::*;
use crate::preview::is_hug_parens_with_braces_and_square_brackets_enabled;
use crate::preview::{
is_hug_parens_with_braces_and_square_brackets_enabled, is_multiline_string_handling_enabled,
};
use crate::string::AnyString;
mod binary_like;
pub(crate) mod expr_attribute;
@ -126,7 +129,7 @@ impl FormatRule<Expr, PyFormatContext<'_>> for FormatExpr {
let node_comments = comments.leading_dangling_trailing(expression);
if !node_comments.has_leading() && !node_comments.has_trailing() {
parenthesized("(", &format_expr, ")")
.with_indent(!is_expression_huggable(expression, f.context()))
.with_hugging(is_expression_huggable(expression, f.context()))
.fmt(f)
} else {
format_with_parentheses_comments(expression, &node_comments, f)
@ -444,7 +447,7 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
OptionalParentheses::Never => match parenthesize {
Parenthesize::IfBreaksOrIfRequired => {
parenthesize_if_expands(&expression.format().with_options(Parentheses::Never))
.with_indent(!is_expression_huggable(expression, f.context()))
.with_indent(is_expression_huggable(expression, f.context()).is_none())
.fmt(f)
}
@ -1084,7 +1087,7 @@ pub(crate) fn has_own_parentheses(
}
/// Returns `true` if the expression can hug directly to enclosing parentheses, as in Black's
/// `hug_parens_with_braces_and_square_brackets` preview style behavior.
/// `hug_parens_with_braces_and_square_brackets` or `multiline_string_handling` preview styles behavior.
///
/// For example, in preview style, given:
/// ```python
@ -1110,11 +1113,10 @@ pub(crate) fn has_own_parentheses(
/// ]
/// )
/// ```
pub(crate) fn is_expression_huggable(expr: &Expr, context: &PyFormatContext) -> bool {
if !is_hug_parens_with_braces_and_square_brackets_enabled(context) {
return false;
}
pub(crate) fn is_expression_huggable(
expr: &Expr,
context: &PyFormatContext,
) -> Option<HuggingStyle> {
match expr {
Expr::Tuple(_)
| Expr::List(_)
@ -1122,18 +1124,14 @@ pub(crate) fn is_expression_huggable(expr: &Expr, context: &PyFormatContext) ->
| Expr::Dict(_)
| Expr::ListComp(_)
| Expr::SetComp(_)
| Expr::DictComp(_) => true,
| Expr::DictComp(_) => is_hug_parens_with_braces_and_square_brackets_enabled(context)
.then_some(HuggingStyle::Always),
Expr::Starred(ast::ExprStarred { value, .. }) => matches!(
value.as_ref(),
Expr::Tuple(_)
| Expr::List(_)
| Expr::Set(_)
| Expr::Dict(_)
| Expr::ListComp(_)
| Expr::SetComp(_)
| Expr::DictComp(_)
),
Expr::Starred(ast::ExprStarred { value, .. }) => is_expression_huggable(value, context),
Expr::StringLiteral(string) => is_huggable_string(AnyString::String(string), context),
Expr::BytesLiteral(bytes) => is_huggable_string(AnyString::Bytes(bytes), context),
Expr::FString(fstring) => is_huggable_string(AnyString::FString(fstring), context),
Expr::BoolOp(_)
| Expr::NamedExpr(_)
@ -1147,18 +1145,28 @@ pub(crate) fn is_expression_huggable(expr: &Expr, context: &PyFormatContext) ->
| Expr::YieldFrom(_)
| Expr::Compare(_)
| Expr::Call(_)
| Expr::FString(_)
| Expr::Attribute(_)
| Expr::Subscript(_)
| Expr::Name(_)
| Expr::Slice(_)
| Expr::IpyEscapeCommand(_)
| Expr::StringLiteral(_)
| Expr::BytesLiteral(_)
| Expr::NumberLiteral(_)
| Expr::BooleanLiteral(_)
| Expr::NoneLiteral(_)
| Expr::EllipsisLiteral(_) => false,
| Expr::EllipsisLiteral(_) => None,
}
}
/// Returns `true` if `string` is a multiline string that is not implicitly concatenated.
fn is_huggable_string(string: AnyString, context: &PyFormatContext) -> Option<HuggingStyle> {
if !is_multiline_string_handling_enabled(context) {
return None;
}
if !string.is_implicit_concatenated() && string.is_multiline(context.source()) {
Some(HuggingStyle::IfFirstLineFits)
} else {
None
}
}

View file

@ -126,7 +126,7 @@ where
FormatParenthesized {
left,
comments: &[],
indent: true,
hug: None,
content: Argument::new(content),
right,
}
@ -135,7 +135,7 @@ where
pub(crate) struct FormatParenthesized<'content, 'ast> {
left: &'static str,
comments: &'content [SourceComment],
indent: bool,
hug: Option<HuggingStyle>,
content: Argument<'content, PyFormatContext<'ast>>,
right: &'static str,
}
@ -158,8 +158,11 @@ impl<'content, 'ast> FormatParenthesized<'content, 'ast> {
}
/// Whether to indent the content within the parentheses.
pub(crate) fn with_indent(self, indent: bool) -> FormatParenthesized<'content, 'ast> {
FormatParenthesized { indent, ..self }
pub(crate) fn with_hugging(
self,
hug: Option<HuggingStyle>,
) -> FormatParenthesized<'content, 'ast> {
FormatParenthesized { hug, ..self }
}
}
@ -167,17 +170,41 @@ impl<'ast> Format<PyFormatContext<'ast>> for FormatParenthesized<'_, 'ast> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'ast>>) -> FormatResult<()> {
let current_level = f.context().node_level();
let content = format_with(|f| {
group(&format_with(|f| {
dangling_open_parenthesis_comments(self.comments).fmt(f)?;
if self.indent || !self.comments.is_empty() {
soft_block_indent(&Arguments::from(&self.content)).fmt(f)?;
} else {
Arguments::from(&self.content).fmt(f)?;
let indented = format_with(|f| {
let content = Arguments::from(&self.content);
if self.comments.is_empty() {
match self.hug {
None => group(&soft_block_indent(&content)).fmt(f),
Some(HuggingStyle::Always) => content.fmt(f),
Some(HuggingStyle::IfFirstLineFits) => {
// It's not immediately obvious how the below IR works to only indent the content if the first line exceeds the configured line width.
// The trick is the first group that doesn't wrap `self.content`.
// * The group doesn't wrap `self.content` because we need to assume that `self.content`
// contains a hard line break and hard-line-breaks always expand the enclosing group.
// * The printer decides that a group fits if its content (in this case a `soft_line_break` that has a width of 0 and is guaranteed to fit)
// and the content coming after the group in expanded mode (`self.content`) fits on the line.
// The content coming after fits if the content up to the first soft or hard line break (or the end of the document) fits.
//
// This happens to be right what we want. The first group should add an indent and a soft line break if the content of `self.content`
// up to the first line break exceeds the configured line length, but not otherwise.
let indented = f.group_id("indented_content");
write!(
f,
[
group(&indent(&soft_line_break())).with_group_id(Some(indented)),
indent_if_group_breaks(&content, indented),
if_group_breaks(&soft_line_break()).with_group_id(Some(indented))
]
)
}
}
Ok(())
}))
.fmt(f)
} else {
group(&format_args![
dangling_open_parenthesis_comments(self.comments),
soft_block_indent(&content),
])
.fmt(f)
}
});
let inner = format_with(|f| {
@ -186,12 +213,12 @@ impl<'ast> Format<PyFormatContext<'ast>> for FormatParenthesized<'_, 'ast> {
// This ensures that expanding this parenthesized expression does not expand the optional parentheses group.
write!(
f,
[fits_expanded(&content)
[fits_expanded(&indented)
.with_condition(Some(Condition::if_group_fits_on_line(group_id)))]
)
} else {
// It's not necessary to wrap the content if it is not inside of an optional_parentheses group.
content.fmt(f)
indented.fmt(f)
}
});
@ -201,6 +228,20 @@ impl<'ast> Format<PyFormatContext<'ast>> for FormatParenthesized<'_, 'ast> {
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum HuggingStyle {
/// Always hug the content (never indent).
Always,
/// Hug the content if the content up to the first line break fits into the configured line length. Otherwise indent the content.
///
/// This is different from [`HuggingStyle::Always`] in that it doesn't indent if the content contains a hard line break, and the content up to that hard line break fits into the configured line length.
///
/// This style is used for formatting multiline strings that, by definition, always break. The idea is to
/// only hug a multiline string if its content up to the first line breaks exceeds the configured line length.
IfFirstLineFits,
}
/// Wraps an expression in parentheses only if it still does not fit after expanding all expressions that start or end with
/// a parentheses (`()`, `[]`, `{}`).
pub(crate) fn optional_parentheses<'content, 'ast, Content>(

View file

@ -6,10 +6,11 @@ use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::comments::SourceComment;
use crate::expression::expr_generator_exp::GeneratorExpParentheses;
use crate::expression::is_expression_huggable;
use crate::expression::parentheses::{empty_parenthesized, parenthesized, Parentheses};
use crate::expression::parentheses::{
empty_parenthesized, parenthesized, HuggingStyle, Parentheses,
};
use crate::other::commas;
use crate::prelude::*;
use crate::preview::is_hug_parens_with_braces_and_square_brackets_enabled;
#[derive(Default)]
pub struct FormatArguments;
@ -107,7 +108,7 @@ impl FormatNodeRule<Arguments> for FormatArguments {
// )
// ```
parenthesized("(", &group(&all_arguments), ")")
.with_indent(!is_argument_huggable(item, f.context()))
.with_hugging(is_arguments_huggable(item, f.context()))
.with_dangling_comments(dangling_comments)
]
)
@ -177,29 +178,23 @@ fn is_single_argument_parenthesized(argument: &Expr, call_end: TextSize, source:
///
/// Hugging should only be applied to single-argument collections, like lists, or starred versions
/// of those collections.
fn is_argument_huggable(item: &Arguments, context: &PyFormatContext) -> bool {
if !is_hug_parens_with_braces_and_square_brackets_enabled(context) {
return false;
}
fn is_arguments_huggable(item: &Arguments, context: &PyFormatContext) -> Option<HuggingStyle> {
// Find the lone argument or `**kwargs` keyword.
let arg = match (item.args.as_slice(), item.keywords.as_slice()) {
([arg], []) => arg,
([], [keyword]) if keyword.arg.is_none() && !context.comments().has(keyword) => {
&keyword.value
}
_ => return false,
_ => return None,
};
// If the expression itself isn't huggable, then we can't hug it.
if !is_expression_huggable(arg, context) {
return false;
}
let hugging_style = is_expression_huggable(arg, context)?;
// If the expression has leading or trailing comments, then we can't hug it.
let comments = context.comments().leading_dangling_trailing(arg);
if comments.has_leading() || comments.has_trailing() {
return false;
return None;
}
let options = context.options();
@ -208,8 +203,8 @@ fn is_argument_huggable(item: &Arguments, context: &PyFormatContext) -> bool {
if options.magic_trailing_comma().is_respect()
&& commas::has_magic_trailing_comma(TextRange::new(arg.end(), item.end()), options, context)
{
return false;
return None;
}
true
Some(hugging_style)
}

View file

@ -62,3 +62,8 @@ pub(crate) const fn is_dummy_implementations_enabled(context: &PyFormatContext)
pub(crate) const fn is_hex_codes_in_unicode_sequences_enabled(context: &PyFormatContext) -> bool {
context.is_preview()
}
/// Returns `true` if the [`multiline_string_handling`](https://github.com/astral-sh/ruff/issues/8896) preview style is enabled.
pub(crate) const fn is_multiline_string_handling_enabled(context: &PyFormatContext) -> bool {
context.is_preview()
}

View file

@ -1,6 +1,7 @@
use std::borrow::Cow;
use bitflags::bitflags;
use memchr::memchr2;
use ruff_formatter::{format_args, write};
use ruff_python_ast::AnyNodeRef;
@ -29,7 +30,7 @@ pub(crate) enum Quoting {
/// Represents any kind of string expression. This could be either a string,
/// bytes or f-string.
#[derive(Clone, Debug)]
#[derive(Copy, Clone, Debug)]
pub(crate) enum AnyString<'a> {
String(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral),
@ -50,7 +51,7 @@ impl<'a> AnyString<'a> {
}
/// Returns `true` if the string is implicitly concatenated.
pub(crate) fn is_implicit_concatenated(&self) -> bool {
pub(crate) fn is_implicit_concatenated(self) -> bool {
match self {
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
@ -59,7 +60,7 @@ impl<'a> AnyString<'a> {
}
/// Returns the quoting to be used for this string.
fn quoting(&self, locator: &Locator<'_>) -> Quoting {
fn quoting(self, locator: &Locator<'_>) -> Quoting {
match self {
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => f_string_quoting(f_string, locator),
@ -67,7 +68,7 @@ impl<'a> AnyString<'a> {
}
/// Returns a vector of all the [`AnyStringPart`] of this string.
fn parts(&self, quoting: Quoting) -> Vec<AnyStringPart<'a>> {
fn parts(self, quoting: Quoting) -> Vec<AnyStringPart<'a>> {
match self {
Self::String(ExprStringLiteral { value, .. }) => value
.iter()
@ -94,6 +95,24 @@ impl<'a> AnyString<'a> {
.collect(),
}
}
pub(crate) fn is_multiline(self, source: &str) -> bool {
match self {
AnyString::String(_) | AnyString::Bytes(_) => {
let contents = &source[self.range()];
let prefix = StringPrefix::parse(contents);
let quotes = StringQuotes::parse(
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
);
quotes.is_some_and(StringQuotes::is_triple)
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
}
AnyString::FString(fstring) => {
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
}
}
}
}
impl Ranged for AnyString<'_> {
@ -116,6 +135,12 @@ impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
}
}
impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
fn from(value: AnyString<'a>) -> Self {
AnyNodeRef::from(&value)
}
}
impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {