Split string formatting to individual nodes (#9058)

This PR splits the string formatting code in the formatter to be handled
by the respective nodes.

Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.

The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).


### Formatter ecosystem result

**This PR**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |

**main**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
This commit is contained in:
Dhruv Manilawala 2023-12-14 12:55:10 -06:00 committed by GitHub
parent 28b1aa201b
commit 189e947808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 364 additions and 266 deletions

View file

@ -33,9 +33,15 @@ node_lines = (
nodes = []
for node_line in node_lines:
node = node_line.split("(")[1].split(")")[0].split("::")[-1].split("<")[0]
# These nodes aren't used in the formatter as the formatting of them is handled
# in one of the other nodes containing them.
if node in ("FStringLiteralElement", "FStringExpressionElement"):
# `FString` and `StringLiteral` has a custom implementation while the formatting for
# `FStringLiteralElement` and `FStringExpressionElement` are handled by the `FString`
# implementation.
if node in (
"FString",
"StringLiteral",
"FStringLiteralElement",
"FStringExpressionElement",
):
continue
nodes.append(node)
print(nodes)

View file

@ -1,5 +1,5 @@
use crate::comments::Comments;
use crate::expression::string::QuoteChar;
use crate::string::QuoteChar;
use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_source_file::Locator;

View file

@ -18,10 +18,10 @@ use crate::expression::parentheses::{
is_expression_parenthesized, write_in_parentheses_only_group_end_tag,
write_in_parentheses_only_group_start_tag, Parentheses,
};
use crate::expression::string::{AnyString, FormatString, StringLayout};
use crate::expression::OperatorPrecedence;
use crate::prelude::*;
use crate::preview::is_fix_power_op_line_length_enabled;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Copy, Clone, Debug)]
pub(super) enum BinaryLike<'a> {
@ -395,9 +395,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
[
operand.leading_binary_comments().map(leading_comments),
leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout(
StringLayout::ImplicitConcatenatedStringInBinaryLike,
),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
operand.trailing_binary_comments().map(trailing_comments),
line_suffix_boundary(),
@ -413,9 +414,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
f,
[
leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout(
StringLayout::ImplicitConcatenatedStringInBinaryLike
),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
]
)?;

View file

@ -3,16 +3,24 @@ use ruff_python_ast::ExprBytesLiteral;
use crate::comments::SourceComment;
use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::string::{AnyString, FormatString};
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Default)]
pub struct FormatExprBytesLiteral;
impl FormatNodeRule<ExprBytesLiteral> for FormatExprBytesLiteral {
fn fmt_fields(&self, item: &ExprBytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::Bytes(item)).fmt(f)
let ExprBytesLiteral { value, .. } = item;
match value.as_slice() {
[bytes_literal] => bytes_literal.format().fmt(f),
_ => in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::Bytes(item)))
.fmt(f),
}
}
fn fmt_dangling_comments(

View file

@ -1,21 +1,35 @@
use memchr::memchr2;
use ruff_python_ast::{AnyNodeRef, ExprFString};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
use crate::comments::SourceComment;
use ruff_formatter::FormatResult;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprFString;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::other::f_string_part::FormatFStringPart;
use crate::prelude::*;
use super::string::{AnyString, FormatString};
use crate::string::{AnyString, FormatStringContinuation, Quoting};
#[derive(Default)]
pub struct FormatExprFString;
impl FormatNodeRule<ExprFString> for FormatExprFString {
fn fmt_fields(&self, item: &ExprFString, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::FString(item)).fmt(f)
let ExprFString { value, .. } = item;
match value.as_slice() {
[f_string_part] => FormatFStringPart::new(
f_string_part,
f_string_quoting(item, &f.context().locator()),
)
.fmt(f),
_ => {
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::FString(item)))
.fmt(f)
}
}
}
fn fmt_dangling_comments(
@ -43,3 +57,28 @@ impl NeedsParentheses for ExprFString {
}
}
}
pub(crate) fn f_string_quoting(f_string: &ExprFString, locator: &Locator) -> Quoting {
let unprefixed = locator
.slice(f_string.range())
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted = unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string
.value
.elements()
.filter_map(|element| element.as_expression())
.any(|expression| {
let string_content = locator.slice(expression.range());
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
})
{
Quoting::Preserve
} else {
Quoting::CanChange
}
}

View file

@ -1,34 +1,66 @@
use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprStringLiteral;
use ruff_python_ast::{AnyNodeRef, ExprStringLiteral};
use ruff_text_size::{Ranged, TextLen, TextRange};
use crate::comments::SourceComment;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::string::{
AnyString, FormatString, StringLayout, StringPrefix, StringQuotes,
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation, StringPrefix, StringQuotes};
#[derive(Default)]
pub struct FormatExprStringLiteral {
layout: StringLayout,
kind: ExprStringLiteralKind,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum ExprStringLiteralKind {
#[default]
String,
Docstring,
}
impl ExprStringLiteralKind {
const fn string_literal_kind(self) -> StringLiteralKind {
match self {
ExprStringLiteralKind::String => StringLiteralKind::String,
ExprStringLiteralKind::Docstring => StringLiteralKind::Docstring,
}
}
const fn is_docstring(self) -> bool {
matches!(self, ExprStringLiteralKind::Docstring)
}
}
impl FormatRuleWithOptions<ExprStringLiteral, PyFormatContext<'_>> for FormatExprStringLiteral {
type Options = StringLayout;
type Options = ExprStringLiteralKind;
fn with_options(mut self, options: Self::Options) -> Self {
self.layout = options;
self.kind = options;
self
}
}
impl FormatNodeRule<ExprStringLiteral> for FormatExprStringLiteral {
fn fmt_fields(&self, item: &ExprStringLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::String(item))
.with_layout(self.layout)
.fmt(f)
let ExprStringLiteral { value, .. } = item;
match value.as_slice() {
[string_literal] => {
FormatStringLiteral::new(string_literal, self.kind.string_literal_kind()).fmt(f)
}
_ => {
// This is just a sanity check because [`DocstringStmt::try_from_statement`]
// ensures that the docstring is a *single* string literal.
assert!(!self.kind.is_docstring());
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::String(item)))
}
.fmt(f),
}
}
fn fmt_dangling_comments(

View file

@ -58,7 +58,6 @@ pub(crate) mod expr_yield;
pub(crate) mod expr_yield_from;
mod operator;
pub(crate) mod parentheses;
pub(crate) mod string;
#[derive(Copy, Clone, PartialEq, Eq, Default)]
pub struct FormatExpr {

View file

@ -2943,70 +2943,6 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::TypeParamParamSpec {
}
}
impl FormatRule<ast::FString, PyFormatContext<'_>> for crate::other::f_string::FormatFString {
#[inline]
fn fmt(&self, node: &ast::FString, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::FString>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::FString {
type Format<'a> = FormatRefWithRule<
'a,
ast::FString,
crate::other::f_string::FormatFString,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(self, crate::other::f_string::FormatFString::default())
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::FString {
type Format = FormatOwnedWithRule<
ast::FString,
crate::other::f_string::FormatFString,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(self, crate::other::f_string::FormatFString::default())
}
}
impl FormatRule<ast::StringLiteral, PyFormatContext<'_>>
for crate::other::string_literal::FormatStringLiteral
{
#[inline]
fn fmt(&self, node: &ast::StringLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::StringLiteral>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::StringLiteral {
type Format<'a> = FormatRefWithRule<
'a,
ast::StringLiteral,
crate::other::string_literal::FormatStringLiteral,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(
self,
crate::other::string_literal::FormatStringLiteral::default(),
)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::StringLiteral {
type Format = FormatOwnedWithRule<
ast::StringLiteral,
crate::other::string_literal::FormatStringLiteral,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(
self,
crate::other::string_literal::FormatStringLiteral::default(),
)
}
}
impl FormatRule<ast::BytesLiteral, PyFormatContext<'_>>
for crate::other::bytes_literal::FormatBytesLiteral
{

View file

@ -36,6 +36,7 @@ mod prelude;
mod preview;
mod shared_traits;
pub(crate) mod statement;
pub(crate) mod string;
pub(crate) mod type_param;
mod verbatim;

View file

@ -1,12 +1,23 @@
use ruff_python_ast::BytesLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)]
pub struct FormatBytesLiteral;
impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
fn fmt_fields(&self, _item: &BytesLiteral, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprBytesLiteral`");
fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
StringPart::from_source(item.range(), &locator)
.normalize(
Quoting::CanChange,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f)
}
}

View file

@ -1,12 +1,49 @@
use ruff_python_ast::FString;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)]
pub struct FormatFString;
/// Formats an f-string which is part of a larger f-string expression.
///
/// For example, this would be used to format the f-string part in `"foo" f"bar {x}"`
/// or the standalone f-string in `f"foo {x} bar"`.
pub(crate) struct FormatFString<'a> {
value: &'a FString,
/// The quoting of an f-string. This is determined by the parent node
/// (f-string expression) and is required to format an f-string correctly.
quoting: Quoting,
}
impl FormatNodeRule<FString> for FormatFString {
fn fmt_fields(&self, _item: &FString, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprFString`");
impl<'a> FormatFString<'a> {
pub(crate) fn new(value: &'a FString, quoting: Quoting) -> Self {
Self { value, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFString<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let result = StringPart::from_source(self.value.range(), &locator)
.normalize(
self.quoting,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f);
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
let comments = f.context().comments();
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
result
}
}

View file

@ -0,0 +1,39 @@
use ruff_python_ast::FStringPart;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::Quoting;
/// Formats an f-string part which is either a string literal or an f-string.
///
/// This delegates the actual formatting to the appropriate formatter.
pub(crate) struct FormatFStringPart<'a> {
part: &'a FStringPart,
/// The quoting to be used for all the f-string parts. This is determined by
/// the parent node (f-string expression) and is required to format all parts
/// correctly.
quoting: Quoting,
}
impl<'a> FormatFStringPart<'a> {
pub(crate) fn new(part: &'a FStringPart, quoting: Quoting) -> Self {
Self { part, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFStringPart<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.part {
FStringPart::Literal(string_literal) => FormatStringLiteral::new(
string_literal,
// If an f-string part is a string literal, the f-string is always
// implicitly concatenated e.g., `"foo" f"bar {x}"`. A standalone
// string literal would be a string expression, not an f-string.
StringLiteralKind::InImplicitlyConcatenatedFString(self.quoting),
)
.fmt(f),
FStringPart::FString(f_string) => FormatFString::new(f_string, self.quoting).fmt(f),
}
}
}

View file

@ -7,6 +7,7 @@ pub(crate) mod decorator;
pub(crate) mod elif_else_clause;
pub(crate) mod except_handler_except_handler;
pub(crate) mod f_string;
pub(crate) mod f_string_part;
pub(crate) mod identifier;
pub(crate) mod keyword;
pub(crate) mod match_case;

View file

@ -1,12 +1,72 @@
use ruff_python_ast::StringLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{docstring, Quoting, StringPart};
use crate::QuoteStyle;
#[derive(Default)]
pub struct FormatStringLiteral;
pub(crate) struct FormatStringLiteral<'a> {
value: &'a StringLiteral,
layout: StringLiteralKind,
}
impl FormatNodeRule<StringLiteral> for FormatStringLiteral {
fn fmt_fields(&self, _item: &StringLiteral, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprStringLiteral`");
impl<'a> FormatStringLiteral<'a> {
pub(crate) fn new(value: &'a StringLiteral, layout: StringLiteralKind) -> Self {
Self { value, layout }
}
}
/// The kind of a string literal.
#[derive(Copy, Clone, Debug, Default)]
pub(crate) enum StringLiteralKind {
/// A normal string literal e.g., `"foo"`.
#[default]
String,
/// A string literal used as a docstring.
Docstring,
/// A string literal that is implicitly concatenated with an f-string. This
/// makes the overall expression an f-string whose quoting detection comes
/// from the parent node (f-string expression).
InImplicitlyConcatenatedFString(Quoting),
}
impl StringLiteralKind {
/// Checks if this string literal is a docstring.
pub(crate) const fn is_docstring(self) -> bool {
matches!(self, StringLiteralKind::Docstring)
}
/// Returns the quoting to be used for this string literal.
fn quoting(self) -> Quoting {
match self {
StringLiteralKind::String | StringLiteralKind::Docstring => Quoting::CanChange,
StringLiteralKind::InImplicitlyConcatenatedFString(quoting) => quoting,
}
}
}
impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let quote_style = if self.layout.is_docstring() {
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double
} else {
f.options().quote_style()
};
let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
self.layout.quoting(),
&locator,
quote_style,
f.context().docstring(),
);
if self.layout.is_docstring() {
docstring::format(&normalized, f)
} else {
normalized.fmt(f)
}
}
}

View file

@ -9,7 +9,7 @@ use crate::comments::{
leading_comments, trailing_comments, Comments, LeadingDanglingTrailingComments,
};
use crate::context::{NodeLevel, TopLevelStatementPosition, WithIndentLevel, WithNodeLevel};
use crate::expression::string::StringLayout;
use crate::expression::expr_string_literal::ExprStringLiteralKind;
use crate::prelude::*;
use crate::statement::stmt_expr::FormatStmtExpr;
use crate::verbatim::{
@ -609,7 +609,7 @@ impl Format<PyFormatContext<'_>> for DocstringStmt<'_> {
leading_comments(node_comments.leading),
string_literal
.format()
.with_options(StringLayout::DocString),
.with_options(ExprStringLiteralKind::Docstring),
]
)?;

View file

@ -102,7 +102,7 @@ use super::{NormalizedString, QuoteChar};
/// line c
/// """
/// ```
pub(super) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
let docstring = &normalized.text;
// Black doesn't change the indentation of docstrings that contain an escaped newline

View file

@ -5,35 +5,41 @@ use bitflags::bitflags;
use ruff_formatter::{format_args, write};
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::{
self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::{
in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space,
};
use crate::expression::Expr;
use crate::expression::expr_f_string::f_string_quoting;
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::QuoteStyle;
mod docstring;
pub(crate) mod docstring;
#[derive(Copy, Clone, Debug)]
enum Quoting {
#[derive(Copy, Clone, Debug, Default)]
pub(crate) enum Quoting {
#[default]
CanChange,
Preserve,
}
/// Represents any kind of string expression. This could be either a string,
/// bytes or f-string.
#[derive(Clone, Debug)]
pub(super) enum AnyString<'a> {
pub(crate) enum AnyString<'a> {
String(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral),
FString(&'a ExprFString),
}
impl<'a> AnyString<'a> {
/// Creates a new [`AnyString`] from the given [`Expr`].
///
/// Returns `None` if the expression is not either a string, bytes or f-string.
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
match expression {
Expr::StringLiteral(string) => Some(AnyString::String(string)),
@ -43,39 +49,8 @@ impl<'a> AnyString<'a> {
}
}
fn quoting(&self, locator: &Locator) -> Quoting {
match self {
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => {
let unprefixed = locator
.slice(f_string.range)
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted =
unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string.value.elements().any(|element| match element {
ast::FStringElement::Expression(ast::FStringExpressionElement {
range,
..
}) => {
let string_content = locator.slice(*range);
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
}
ast::FStringElement::Literal(_) => false,
}) {
Quoting::Preserve
} else {
Quoting::CanChange
}
}
}
}
/// Returns `true` if the string is implicitly concatenated.
pub(super) fn is_implicit_concatenated(&self) -> bool {
pub(crate) fn is_implicit_concatenated(&self) -> bool {
match self {
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
@ -83,21 +58,38 @@ impl<'a> AnyString<'a> {
}
}
fn parts(&self) -> Vec<AnyStringPart<'a>> {
/// Returns the quoting to be used for this string.
fn quoting(&self, locator: &Locator<'_>) -> Quoting {
match self {
Self::String(ExprStringLiteral { value, .. }) => {
value.iter().map(AnyStringPart::String).collect()
}
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => f_string_quoting(f_string, locator),
}
}
/// Returns a vector of all the [`AnyStringPart`] of this string.
fn parts(&self, quoting: Quoting) -> Vec<AnyStringPart<'a>> {
match self {
Self::String(ExprStringLiteral { value, .. }) => value
.iter()
.map(|part| AnyStringPart::String {
part,
layout: StringLiteralKind::String,
})
.collect(),
Self::Bytes(ExprBytesLiteral { value, .. }) => {
value.iter().map(AnyStringPart::Bytes).collect()
}
Self::FString(ExprFString { value, .. }) => value
.iter()
.map(|f_string_part| match f_string_part {
ast::FStringPart::Literal(string_literal) => {
AnyStringPart::String(string_literal)
}
ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string),
ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
part: string_literal,
layout: StringLiteralKind::InImplicitlyConcatenatedFString(quoting),
},
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
part: f_string,
quoting,
},
})
.collect(),
}
@ -134,19 +126,29 @@ impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
}
}
/// Represents any kind of string which is part of an implicitly concatenated
/// string. This could be either a string, bytes or f-string.
///
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
#[derive(Clone, Debug)]
enum AnyStringPart<'a> {
String(&'a ast::StringLiteral),
String {
part: &'a ast::StringLiteral,
layout: StringLiteralKind,
},
Bytes(&'a ast::BytesLiteral),
FString(&'a ast::FString),
FString {
part: &'a ast::FString,
quoting: Quoting,
},
}
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyStringPart<'a>) -> Self {
match value {
AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part),
AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
AnyStringPart::FString(part) => AnyNodeRef::FString(part),
AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
}
}
}
@ -154,99 +156,33 @@ impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
impl Ranged for AnyStringPart<'_> {
fn range(&self) -> TextRange {
match self {
Self::String(part) => part.range(),
Self::String { part, .. } => part.range(),
Self::Bytes(part) => part.range(),
Self::FString(part) => part.range(),
Self::FString { part, .. } => part.range(),
}
}
}
pub(super) struct FormatString<'a> {
string: &'a AnyString<'a>,
layout: StringLayout,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum StringLayout {
#[default]
Default,
DocString,
/// An implicit concatenated string in a binary like (e.g. `a + b` or `a < b`) expression.
///
/// Formats the implicit concatenated string parts without the enclosing group because the group
/// is added by the binary like formatting.
ImplicitConcatenatedStringInBinaryLike,
}
impl<'a> FormatString<'a> {
pub(super) fn new(string: &'a AnyString<'a>) -> Self {
Self {
string,
layout: StringLayout::Default,
}
}
pub(super) fn with_layout(mut self, layout: StringLayout) -> Self {
self.layout = layout;
self
}
}
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let parent_docstring_quote_style = f.context().docstring();
let locator = f.context().locator();
let result = match self.layout {
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
StringPart::from_source(self.string.range(), &locator)
.normalize(
self.string.quoting(&locator),
&locator,
f.options().quote_style(),
parent_docstring_quote_style,
)
.fmt(f)
}
match self {
AnyStringPart::String { part, layout } => {
FormatStringLiteral::new(part, *layout).fmt(f)
}
StringLayout::DocString => {
let string_part = StringPart::from_source(self.string.range(), &locator);
let normalized = string_part.normalize(
Quoting::CanChange,
&locator,
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double,
parent_docstring_quote_style,
);
docstring::format(&normalized, f)
}
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
};
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
if let AnyString::FString(fstring) = self.string {
let comments = f.context().comments();
fstring.value.elements().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
}
result
}
}
struct FormatStringContinuation<'a> {
/// Formats any implicitly concatenated string. This could be any valid combination
/// of string, bytes or f-string literals.
pub(crate) struct FormatStringContinuation<'a> {
string: &'a AnyString<'a>,
}
impl<'a> FormatStringContinuation<'a> {
fn new(string: &'a AnyString<'a>) -> Self {
pub(crate) fn new(string: &'a AnyString<'a>) -> Self {
Self { string }
}
}
@ -254,24 +190,15 @@ impl<'a> FormatStringContinuation<'a> {
impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
let locator = f.context().locator();
let in_docstring = f.context().docstring();
let quote_style = f.options().quote_style();
let quoting = self.string.quoting(&f.context().locator());
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for part in self.string.parts() {
let normalized = StringPart::from_source(part.range(), &locator).normalize(
self.string.quoting(&locator),
&locator,
quote_style,
in_docstring,
);
for part in self.string.parts(quoting) {
joiner.entry(&format_args![
line_suffix_boundary(),
leading_comments(comments.leading(&part)),
normalized,
part,
trailing_comments(comments.trailing(&part))
]);
}
@ -281,7 +208,7 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
}
#[derive(Debug)]
struct StringPart {
pub(crate) struct StringPart {
/// The prefix.
prefix: StringPrefix,
@ -293,7 +220,7 @@ struct StringPart {
}
impl StringPart {
fn from_source(range: TextRange, locator: &Locator) -> Self {
pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self {
let string_content = locator.slice(range);
let prefix = StringPrefix::parse(string_content);
@ -320,7 +247,7 @@ impl StringPart {
/// snippet within the docstring. The quote style should correspond to the
/// style of quotes used by said docstring. Normalization will ensure the
/// quoting styles don't conflict.
fn normalize<'a>(
pub(crate) fn normalize<'a>(
self,
quoting: Quoting,
locator: &'a Locator,
@ -412,7 +339,7 @@ impl StringPart {
}
#[derive(Debug)]
struct NormalizedString<'a> {
pub(crate) struct NormalizedString<'a> {
prefix: StringPrefix,
/// The quotes of the normalized string (preferred quotes)
@ -448,7 +375,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
bitflags! {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(super) struct StringPrefix: u8 {
pub(crate) struct StringPrefix: u8 {
const UNICODE = 0b0000_0001;
/// `r"test"`
const RAW = 0b0000_0010;
@ -460,7 +387,7 @@ bitflags! {
}
impl StringPrefix {
pub(super) fn parse(input: &str) -> StringPrefix {
pub(crate) fn parse(input: &str) -> StringPrefix {
let chars = input.chars();
let mut prefix = StringPrefix::empty();
@ -485,7 +412,7 @@ impl StringPrefix {
prefix
}
pub(super) const fn text_len(self) -> TextSize {
pub(crate) const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones())
}
@ -688,13 +615,13 @@ fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar)
}
#[derive(Copy, Clone, Debug)]
pub(super) struct StringQuotes {
pub(crate) struct StringQuotes {
triple: bool,
quote_char: QuoteChar,
}
impl StringQuotes {
pub(super) fn parse(input: &str) -> Option<StringQuotes> {
pub(crate) fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars();
let quote_char = chars.next()?;
@ -708,7 +635,7 @@ impl StringQuotes {
})
}
pub(super) const fn is_triple(self) -> bool {
pub(crate) const fn is_triple(self) -> bool {
self.triple
}