Split string formatting to individual nodes (#9058)

This PR splits the string formatting code in the formatter to be handled
by the respective nodes.

Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.

The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).


### Formatter ecosystem result

**This PR**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |

**main**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
This commit is contained in:
Dhruv Manilawala 2023-12-14 12:55:10 -06:00 committed by GitHub
parent 28b1aa201b
commit 189e947808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 364 additions and 266 deletions

View file

@ -1,12 +1,23 @@
use ruff_python_ast::BytesLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)]
pub struct FormatBytesLiteral;
impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
fn fmt_fields(&self, _item: &BytesLiteral, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprBytesLiteral`");
fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
StringPart::from_source(item.range(), &locator)
.normalize(
Quoting::CanChange,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f)
}
}

View file

@ -1,12 +1,49 @@
use ruff_python_ast::FString;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{Quoting, StringPart};
#[derive(Default)]
pub struct FormatFString;
/// Formats an f-string which is part of a larger f-string expression.
///
/// For example, this would be used to format the f-string part in `"foo" f"bar {x}"`
/// or the standalone f-string in `f"foo {x} bar"`.
pub(crate) struct FormatFString<'a> {
value: &'a FString,
/// The quoting of an f-string. This is determined by the parent node
/// (f-string expression) and is required to format an f-string correctly.
quoting: Quoting,
}
impl FormatNodeRule<FString> for FormatFString {
fn fmt_fields(&self, _item: &FString, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprFString`");
impl<'a> FormatFString<'a> {
pub(crate) fn new(value: &'a FString, quoting: Quoting) -> Self {
Self { value, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFString<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let result = StringPart::from_source(self.value.range(), &locator)
.normalize(
self.quoting,
&locator,
f.options().quote_style(),
f.context().docstring(),
)
.fmt(f);
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
let comments = f.context().comments();
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
result
}
}

View file

@ -0,0 +1,39 @@
use ruff_python_ast::FStringPart;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::Quoting;
/// Formats an f-string part which is either a string literal or an f-string.
///
/// This delegates the actual formatting to the appropriate formatter.
pub(crate) struct FormatFStringPart<'a> {
part: &'a FStringPart,
/// The quoting to be used for all the f-string parts. This is determined by
/// the parent node (f-string expression) and is required to format all parts
/// correctly.
quoting: Quoting,
}
impl<'a> FormatFStringPart<'a> {
pub(crate) fn new(part: &'a FStringPart, quoting: Quoting) -> Self {
Self { part, quoting }
}
}
impl Format<PyFormatContext<'_>> for FormatFStringPart<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.part {
FStringPart::Literal(string_literal) => FormatStringLiteral::new(
string_literal,
// If an f-string part is a string literal, the f-string is always
// implicitly concatenated e.g., `"foo" f"bar {x}"`. A standalone
// string literal would be a string expression, not an f-string.
StringLiteralKind::InImplicitlyConcatenatedFString(self.quoting),
)
.fmt(f),
FStringPart::FString(f_string) => FormatFString::new(f_string, self.quoting).fmt(f),
}
}
}

View file

@ -7,6 +7,7 @@ pub(crate) mod decorator;
pub(crate) mod elif_else_clause;
pub(crate) mod except_handler_except_handler;
pub(crate) mod f_string;
pub(crate) mod f_string_part;
pub(crate) mod identifier;
pub(crate) mod keyword;
pub(crate) mod match_case;

View file

@ -1,12 +1,72 @@
use ruff_python_ast::StringLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{docstring, Quoting, StringPart};
use crate::QuoteStyle;
#[derive(Default)]
pub struct FormatStringLiteral;
pub(crate) struct FormatStringLiteral<'a> {
value: &'a StringLiteral,
layout: StringLiteralKind,
}
impl FormatNodeRule<StringLiteral> for FormatStringLiteral {
fn fmt_fields(&self, _item: &StringLiteral, _f: &mut PyFormatter) -> FormatResult<()> {
unreachable!("Handled inside of `FormatExprStringLiteral`");
impl<'a> FormatStringLiteral<'a> {
pub(crate) fn new(value: &'a StringLiteral, layout: StringLiteralKind) -> Self {
Self { value, layout }
}
}
/// The kind of a string literal.
#[derive(Copy, Clone, Debug, Default)]
pub(crate) enum StringLiteralKind {
/// A normal string literal e.g., `"foo"`.
#[default]
String,
/// A string literal used as a docstring.
Docstring,
/// A string literal that is implicitly concatenated with an f-string. This
/// makes the overall expression an f-string whose quoting detection comes
/// from the parent node (f-string expression).
InImplicitlyConcatenatedFString(Quoting),
}
impl StringLiteralKind {
/// Checks if this string literal is a docstring.
pub(crate) const fn is_docstring(self) -> bool {
matches!(self, StringLiteralKind::Docstring)
}
/// Returns the quoting to be used for this string literal.
fn quoting(self) -> Quoting {
match self {
StringLiteralKind::String | StringLiteralKind::Docstring => Quoting::CanChange,
StringLiteralKind::InImplicitlyConcatenatedFString(quoting) => quoting,
}
}
}
impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let quote_style = if self.layout.is_docstring() {
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double
} else {
f.options().quote_style()
};
let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
self.layout.quoting(),
&locator,
quote_style,
f.context().docstring(),
);
if self.layout.is_docstring() {
docstring::format(&normalized, f)
} else {
normalized.fmt(f)
}
}
}