Split string formatting to individual nodes (#9058)

This PR splits the string formatting code in the formatter to be handled
by the respective nodes.

Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.

The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).


### Formatter ecosystem result

**This PR**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |

**main**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
This commit is contained in:
Dhruv Manilawala 2023-12-14 12:55:10 -06:00 committed by GitHub
parent 28b1aa201b
commit 189e947808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 364 additions and 266 deletions

View file

@ -18,10 +18,10 @@ use crate::expression::parentheses::{
is_expression_parenthesized, write_in_parentheses_only_group_end_tag,
write_in_parentheses_only_group_start_tag, Parentheses,
};
use crate::expression::string::{AnyString, FormatString, StringLayout};
use crate::expression::OperatorPrecedence;
use crate::prelude::*;
use crate::preview::is_fix_power_op_line_length_enabled;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Copy, Clone, Debug)]
pub(super) enum BinaryLike<'a> {
@ -395,9 +395,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
[
operand.leading_binary_comments().map(leading_comments),
leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout(
StringLayout::ImplicitConcatenatedStringInBinaryLike,
),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
operand.trailing_binary_comments().map(trailing_comments),
line_suffix_boundary(),
@ -413,9 +414,10 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
f,
[
leading_comments(comments.leading(&string_constant)),
FormatString::new(&string_constant).with_layout(
StringLayout::ImplicitConcatenatedStringInBinaryLike
),
// Call `FormatStringContinuation` directly to avoid formatting
// the implicitly concatenated string with the enclosing group
// because the group is added by the binary like formatting.
FormatStringContinuation::new(&string_constant),
trailing_comments(comments.trailing(&string_constant)),
]
)?;

View file

@ -3,16 +3,24 @@ use ruff_python_ast::ExprBytesLiteral;
use crate::comments::SourceComment;
use crate::expression::expr_string_literal::is_multiline_string;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::string::{AnyString, FormatString};
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation};
#[derive(Default)]
pub struct FormatExprBytesLiteral;
impl FormatNodeRule<ExprBytesLiteral> for FormatExprBytesLiteral {
fn fmt_fields(&self, item: &ExprBytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::Bytes(item)).fmt(f)
let ExprBytesLiteral { value, .. } = item;
match value.as_slice() {
[bytes_literal] => bytes_literal.format().fmt(f),
_ => in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::Bytes(item)))
.fmt(f),
}
}
fn fmt_dangling_comments(

View file

@ -1,21 +1,35 @@
use memchr::memchr2;
use ruff_python_ast::{AnyNodeRef, ExprFString};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
use crate::comments::SourceComment;
use ruff_formatter::FormatResult;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprFString;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::other::f_string_part::FormatFStringPart;
use crate::prelude::*;
use super::string::{AnyString, FormatString};
use crate::string::{AnyString, FormatStringContinuation, Quoting};
#[derive(Default)]
pub struct FormatExprFString;
impl FormatNodeRule<ExprFString> for FormatExprFString {
fn fmt_fields(&self, item: &ExprFString, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::FString(item)).fmt(f)
let ExprFString { value, .. } = item;
match value.as_slice() {
[f_string_part] => FormatFStringPart::new(
f_string_part,
f_string_quoting(item, &f.context().locator()),
)
.fmt(f),
_ => {
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::FString(item)))
.fmt(f)
}
}
}
fn fmt_dangling_comments(
@ -43,3 +57,28 @@ impl NeedsParentheses for ExprFString {
}
}
}
pub(crate) fn f_string_quoting(f_string: &ExprFString, locator: &Locator) -> Quoting {
let unprefixed = locator
.slice(f_string.range())
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted = unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string
.value
.elements()
.filter_map(|element| element.as_expression())
.any(|expression| {
let string_content = locator.slice(expression.range());
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
})
{
Quoting::Preserve
} else {
Quoting::CanChange
}
}

View file

@ -1,34 +1,66 @@
use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprStringLiteral;
use ruff_python_ast::{AnyNodeRef, ExprStringLiteral};
use ruff_text_size::{Ranged, TextLen, TextRange};
use crate::comments::SourceComment;
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::expression::string::{
AnyString, FormatString, StringLayout, StringPrefix, StringQuotes,
use crate::expression::parentheses::{
in_parentheses_only_group, NeedsParentheses, OptionalParentheses,
};
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::{AnyString, FormatStringContinuation, StringPrefix, StringQuotes};
#[derive(Default)]
pub struct FormatExprStringLiteral {
layout: StringLayout,
kind: ExprStringLiteralKind,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum ExprStringLiteralKind {
#[default]
String,
Docstring,
}
impl ExprStringLiteralKind {
const fn string_literal_kind(self) -> StringLiteralKind {
match self {
ExprStringLiteralKind::String => StringLiteralKind::String,
ExprStringLiteralKind::Docstring => StringLiteralKind::Docstring,
}
}
const fn is_docstring(self) -> bool {
matches!(self, ExprStringLiteralKind::Docstring)
}
}
impl FormatRuleWithOptions<ExprStringLiteral, PyFormatContext<'_>> for FormatExprStringLiteral {
type Options = StringLayout;
type Options = ExprStringLiteralKind;
fn with_options(mut self, options: Self::Options) -> Self {
self.layout = options;
self.kind = options;
self
}
}
impl FormatNodeRule<ExprStringLiteral> for FormatExprStringLiteral {
fn fmt_fields(&self, item: &ExprStringLiteral, f: &mut PyFormatter) -> FormatResult<()> {
FormatString::new(&AnyString::String(item))
.with_layout(self.layout)
.fmt(f)
let ExprStringLiteral { value, .. } = item;
match value.as_slice() {
[string_literal] => {
FormatStringLiteral::new(string_literal, self.kind.string_literal_kind()).fmt(f)
}
_ => {
// This is just a sanity check because [`DocstringStmt::try_from_statement`]
// ensures that the docstring is a *single* string literal.
assert!(!self.kind.is_docstring());
in_parentheses_only_group(&FormatStringContinuation::new(&AnyString::String(item)))
}
.fmt(f),
}
}
fn fmt_dangling_comments(

View file

@ -58,7 +58,6 @@ pub(crate) mod expr_yield;
pub(crate) mod expr_yield_from;
mod operator;
pub(crate) mod parentheses;
pub(crate) mod string;
#[derive(Copy, Clone, PartialEq, Eq, Default)]
pub struct FormatExpr {

File diff suppressed because it is too large Load diff

View file

@ -1,877 +0,0 @@
use std::borrow::Cow;
use bitflags::bitflags;
use ruff_formatter::{format_args, write};
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::{
self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::{
in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space,
};
use crate::expression::Expr;
use crate::prelude::*;
use crate::QuoteStyle;
mod docstring;
#[derive(Copy, Clone, Debug)]
enum Quoting {
CanChange,
Preserve,
}
#[derive(Clone, Debug)]
pub(super) enum AnyString<'a> {
String(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral),
FString(&'a ExprFString),
}
impl<'a> AnyString<'a> {
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
match expression {
Expr::StringLiteral(string) => Some(AnyString::String(string)),
Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
Expr::FString(fstring) => Some(AnyString::FString(fstring)),
_ => None,
}
}
fn quoting(&self, locator: &Locator) -> Quoting {
match self {
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => {
let unprefixed = locator
.slice(f_string.range)
.trim_start_matches(|c| c != '"' && c != '\'');
let triple_quoted =
unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
if f_string.value.elements().any(|element| match element {
ast::FStringElement::Expression(ast::FStringExpressionElement {
range,
..
}) => {
let string_content = locator.slice(*range);
if triple_quoted {
string_content.contains(r#"""""#) || string_content.contains("'''")
} else {
string_content.contains(['"', '\''])
}
}
ast::FStringElement::Literal(_) => false,
}) {
Quoting::Preserve
} else {
Quoting::CanChange
}
}
}
}
/// Returns `true` if the string is implicitly concatenated.
pub(super) fn is_implicit_concatenated(&self) -> bool {
match self {
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
}
}
fn parts(&self) -> Vec<AnyStringPart<'a>> {
match self {
Self::String(ExprStringLiteral { value, .. }) => {
value.iter().map(AnyStringPart::String).collect()
}
Self::Bytes(ExprBytesLiteral { value, .. }) => {
value.iter().map(AnyStringPart::Bytes).collect()
}
Self::FString(ExprFString { value, .. }) => value
.iter()
.map(|f_string_part| match f_string_part {
ast::FStringPart::Literal(string_literal) => {
AnyStringPart::String(string_literal)
}
ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string),
})
.collect(),
}
}
}
impl Ranged for AnyString<'_> {
fn range(&self) -> TextRange {
match self {
Self::String(expr) => expr.range(),
Self::Bytes(expr) => expr.range(),
Self::FString(expr) => expr.range(),
}
}
}
impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {
AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
}
}
}
impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {
AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
AnyString::FString(expr) => ExpressionRef::FString(expr),
}
}
}
#[derive(Clone, Debug)]
enum AnyStringPart<'a> {
String(&'a ast::StringLiteral),
Bytes(&'a ast::BytesLiteral),
FString(&'a ast::FString),
}
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyStringPart<'a>) -> Self {
match value {
AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part),
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
AnyStringPart::FString(part) => AnyNodeRef::FString(part),
}
}
}
impl Ranged for AnyStringPart<'_> {
fn range(&self) -> TextRange {
match self {
Self::String(part) => part.range(),
Self::Bytes(part) => part.range(),
Self::FString(part) => part.range(),
}
}
}
pub(super) struct FormatString<'a> {
string: &'a AnyString<'a>,
layout: StringLayout,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum StringLayout {
#[default]
Default,
DocString,
/// An implicit concatenated string in a binary like (e.g. `a + b` or `a < b`) expression.
///
/// Formats the implicit concatenated string parts without the enclosing group because the group
/// is added by the binary like formatting.
ImplicitConcatenatedStringInBinaryLike,
}
impl<'a> FormatString<'a> {
pub(super) fn new(string: &'a AnyString<'a>) -> Self {
Self {
string,
layout: StringLayout::Default,
}
}
pub(super) fn with_layout(mut self, layout: StringLayout) -> Self {
self.layout = layout;
self
}
}
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let parent_docstring_quote_style = f.context().docstring();
let locator = f.context().locator();
let result = match self.layout {
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
StringPart::from_source(self.string.range(), &locator)
.normalize(
self.string.quoting(&locator),
&locator,
f.options().quote_style(),
parent_docstring_quote_style,
)
.fmt(f)
}
}
StringLayout::DocString => {
let string_part = StringPart::from_source(self.string.range(), &locator);
let normalized = string_part.normalize(
Quoting::CanChange,
&locator,
// Per PEP 8 and PEP 257, always prefer double quotes for docstrings
QuoteStyle::Double,
parent_docstring_quote_style,
);
docstring::format(&normalized, f)
}
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
}
};
// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
if let AnyString::FString(fstring) = self.string {
let comments = f.context().comments();
fstring.value.elements().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
}
result
}
}
struct FormatStringContinuation<'a> {
string: &'a AnyString<'a>,
}
impl<'a> FormatStringContinuation<'a> {
fn new(string: &'a AnyString<'a>) -> Self {
Self { string }
}
}
impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
let locator = f.context().locator();
let in_docstring = f.context().docstring();
let quote_style = f.options().quote_style();
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for part in self.string.parts() {
let normalized = StringPart::from_source(part.range(), &locator).normalize(
self.string.quoting(&locator),
&locator,
quote_style,
in_docstring,
);
joiner.entry(&format_args![
line_suffix_boundary(),
leading_comments(comments.leading(&part)),
normalized,
trailing_comments(comments.trailing(&part))
]);
}
joiner.finish()
}
}
#[derive(Debug)]
struct StringPart {
/// The prefix.
prefix: StringPrefix,
/// The actual quotes of the string in the source
quotes: StringQuotes,
/// The range of the string's content (full range minus quotes and prefix)
content_range: TextRange,
}
impl StringPart {
fn from_source(range: TextRange, locator: &Locator) -> Self {
let string_content = locator.slice(range);
let prefix = StringPrefix::parse(string_content);
let after_prefix = &string_content[usize::from(prefix.text_len())..];
let quotes =
StringQuotes::parse(after_prefix).expect("Didn't find string quotes after prefix");
let relative_raw_content_range = TextRange::new(
prefix.text_len() + quotes.text_len(),
string_content.text_len() - quotes.text_len(),
);
let raw_content_range = relative_raw_content_range + range.start();
Self {
prefix,
content_range: raw_content_range,
quotes,
}
}
/// Computes the strings preferred quotes and normalizes its content.
///
/// The parent docstring quote style should be set when formatting a code
/// snippet within the docstring. The quote style should correspond to the
/// style of quotes used by said docstring. Normalization will ensure the
/// quoting styles don't conflict.
fn normalize<'a>(
self,
quoting: Quoting,
locator: &'a Locator,
configured_style: QuoteStyle,
parent_docstring_quote_char: Option<QuoteChar>,
) -> NormalizedString<'a> {
// Per PEP 8, always prefer double quotes for triple-quoted strings.
let preferred_style = if self.quotes.triple {
// ... unless we're formatting a code snippet inside a docstring,
// then we specifically want to invert our quote style to avoid
// writing out invalid Python.
//
// It's worth pointing out that we can actually wind up being
// somewhat out of sync with PEP8 in this case. Consider this
// example:
//
// def foo():
// '''
// Something.
//
// >>> """tricksy"""
// '''
// pass
//
// Ideally, this would be reformatted as:
//
// def foo():
// """
// Something.
//
// >>> '''tricksy'''
// """
// pass
//
// But the logic here results in the original quoting being
// preserved. This is because the quoting style of the outer
// docstring is determined, in part, by looking at its contents. In
// this case, it notices that it contains a `"""` and thus infers
// that using `'''` would overall read better because it avoids
// the need to escape the interior `"""`. Except... in this case,
// the `"""` is actually part of a code snippet that could get
// reformatted to using a different quoting style itself.
//
// Fixing this would, I believe, require some fairly seismic
// changes to how formatting strings works. Namely, we would need
// to look for code snippets before normalizing the docstring, and
// then figure out the quoting style more holistically by looking
// at the various kinds of quotes used in the code snippets and
// what reformatting them might look like.
//
// Overall this is a bit of a corner case and just inverting the
// style from what the parent ultimately decided upon works, even
// if it doesn't have perfect alignment with PEP8.
if let Some(quote) = parent_docstring_quote_char {
QuoteStyle::from(quote.invert())
} else {
QuoteStyle::Double
}
} else {
configured_style
};
let raw_content = locator.slice(self.content_range);
let quotes = match quoting {
Quoting::Preserve => self.quotes,
Quoting::CanChange => {
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
if self.prefix.is_raw_string() {
choose_quotes_raw(raw_content, self.quotes, preferred_quote)
} else {
choose_quotes(raw_content, self.quotes, preferred_quote)
}
} else {
self.quotes
}
}
};
let normalized = normalize_string(locator.slice(self.content_range), quotes, self.prefix);
NormalizedString {
prefix: self.prefix,
content_range: self.content_range,
text: normalized,
quotes,
}
}
}
#[derive(Debug)]
struct NormalizedString<'a> {
prefix: StringPrefix,
/// The quotes of the normalized string (preferred quotes)
quotes: StringQuotes,
/// The range of the string's content in the source (minus prefix and quotes).
content_range: TextRange,
/// The normalized text
text: Cow<'a, str>,
}
impl Ranged for NormalizedString<'_> {
fn range(&self) -> TextRange {
self.content_range
}
}
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
write!(f, [self.prefix, self.quotes])?;
match &self.text {
Cow::Borrowed(_) => {
source_text_slice(self.range()).fmt(f)?;
}
Cow::Owned(normalized) => {
text(normalized, Some(self.start())).fmt(f)?;
}
}
self.quotes.fmt(f)
}
}
bitflags! {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(super) struct StringPrefix: u8 {
const UNICODE = 0b0000_0001;
/// `r"test"`
const RAW = 0b0000_0010;
/// `R"test"
const RAW_UPPER = 0b0000_0100;
const BYTE = 0b0000_1000;
const F_STRING = 0b0001_0000;
}
}
impl StringPrefix {
pub(super) fn parse(input: &str) -> StringPrefix {
let chars = input.chars();
let mut prefix = StringPrefix::empty();
for c in chars {
let flag = match c {
'u' | 'U' => StringPrefix::UNICODE,
'f' | 'F' => StringPrefix::F_STRING,
'b' | 'B' => StringPrefix::BYTE,
'r' => StringPrefix::RAW,
'R' => StringPrefix::RAW_UPPER,
'\'' | '"' => break,
c => {
unreachable!(
"Unexpected character '{c}' terminating the prefix of a string literal"
);
}
};
prefix |= flag;
}
prefix
}
pub(super) const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones())
}
pub(super) const fn is_raw_string(self) -> bool {
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
}
pub(super) const fn is_fstring(self) -> bool {
self.contains(StringPrefix::F_STRING)
}
}
impl Format<PyFormatContext<'_>> for StringPrefix {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
// Retain the casing for the raw prefix:
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
if self.contains(StringPrefix::RAW) {
token("r").fmt(f)?;
} else if self.contains(StringPrefix::RAW_UPPER) {
token("R").fmt(f)?;
}
if self.contains(StringPrefix::BYTE) {
token("b").fmt(f)?;
}
if self.contains(StringPrefix::F_STRING) {
token("f").fmt(f)?;
}
// Remove the unicode prefix `u` if any because it is meaningless in Python 3+.
Ok(())
}
}
/// Choose the appropriate quote style for a raw string.
///
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
/// style is double quotes.
fn choose_quotes_raw(
input: &str,
quotes: StringQuotes,
preferred_quote: QuoteChar,
) -> StringQuotes {
let preferred_quote_char = preferred_quote.as_char();
let mut chars = input.chars().peekable();
let contains_unescaped_configured_quotes = loop {
match chars.next() {
Some('\\') => {
// Ignore escaped characters
chars.next();
}
// `"` or `'`
Some(c) if c == preferred_quote_char => {
if !quotes.triple {
break true;
}
match chars.peek() {
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
// about where the closing triple quotes start
None => break true,
Some(next) if *next == preferred_quote_char => {
// `""` or `''`
chars.next();
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
// `"""` or `'''` respectively inside the string
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
break true;
}
}
_ => {}
}
}
Some(_) => continue,
None => break false,
}
};
StringQuotes {
triple: quotes.triple,
quote_char: if contains_unescaped_configured_quotes {
quotes.quote_char
} else {
preferred_quote
},
}
}
/// Choose the appropriate quote style for a string.
///
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
/// would require fewer escapes.
///
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
/// used unless the string contains `"""`).
fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) -> StringQuotes {
let quote = if quotes.triple {
// True if the string contains a triple quote sequence of the configured quote style.
let mut uses_triple_quotes = false;
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
let preferred_quote_char = preferred_quote.as_char();
match c {
'\\' => {
if matches!(chars.peek(), Some('"' | '\\')) {
chars.next();
}
}
// `"` or `'`
c if c == preferred_quote_char => {
match chars.peek().copied() {
Some(c) if c == preferred_quote_char => {
// `""` or `''`
chars.next();
match chars.peek().copied() {
Some(c) if c == preferred_quote_char => {
// `"""` or `'''`
chars.next();
uses_triple_quotes = true;
break;
}
Some(_) => {}
None => {
// Handle `''' ""'''`. At this point we have consumed both
// double quotes, so on the next iteration the iterator is empty
// and we'd miss the string ending with a preferred quote
uses_triple_quotes = true;
break;
}
}
}
Some(_) => {
// A single quote char, this is ok
}
None => {
// Trailing quote at the end of the comment
uses_triple_quotes = true;
break;
}
}
}
_ => continue,
}
}
if uses_triple_quotes {
// String contains a triple quote sequence of the configured quote style.
// Keep the existing quote style.
quotes.quote_char
} else {
preferred_quote
}
} else {
let mut single_quotes = 0u32;
let mut double_quotes = 0u32;
for c in input.chars() {
match c {
'\'' => {
single_quotes += 1;
}
'"' => {
double_quotes += 1;
}
_ => continue,
}
}
match preferred_quote {
QuoteChar::Single => {
if single_quotes > double_quotes {
QuoteChar::Double
} else {
QuoteChar::Single
}
}
QuoteChar::Double => {
if double_quotes > single_quotes {
QuoteChar::Single
} else {
QuoteChar::Double
}
}
}
};
StringQuotes {
triple: quotes.triple,
quote_char: quote,
}
}
#[derive(Copy, Clone, Debug)]
pub(super) struct StringQuotes {
triple: bool,
quote_char: QuoteChar,
}
impl StringQuotes {
pub(super) fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars();
let quote_char = chars.next()?;
let quote = QuoteChar::try_from(quote_char).ok()?;
let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
Some(Self {
triple,
quote_char: quote,
})
}
pub(super) const fn is_triple(self) -> bool {
self.triple
}
const fn text_len(self) -> TextSize {
if self.triple {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
}
impl Format<PyFormatContext<'_>> for StringQuotes {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let quotes = match (self.quote_char, self.triple) {
(QuoteChar::Single, false) => "'",
(QuoteChar::Single, true) => "'''",
(QuoteChar::Double, false) => "\"",
(QuoteChar::Double, true) => "\"\"\"",
};
token(quotes).fmt(f)
}
}
/// The quotation character used to quote a string, byte, or fstring literal.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum QuoteChar {
/// A single quote: `'`
Single,
/// A double quote: '"'
Double,
}
impl QuoteChar {
pub const fn as_char(self) -> char {
match self {
QuoteChar::Single => '\'',
QuoteChar::Double => '"',
}
}
#[must_use]
pub const fn invert(self) -> QuoteChar {
match self {
QuoteChar::Single => QuoteChar::Double,
QuoteChar::Double => QuoteChar::Single,
}
}
#[must_use]
pub const fn from_style(style: QuoteStyle) -> Option<QuoteChar> {
match style {
QuoteStyle::Single => Some(QuoteChar::Single),
QuoteStyle::Double => Some(QuoteChar::Double),
QuoteStyle::Preserve => None,
}
}
}
impl From<QuoteChar> for QuoteStyle {
fn from(value: QuoteChar) -> Self {
match value {
QuoteChar::Single => QuoteStyle::Single,
QuoteChar::Double => QuoteStyle::Double,
}
}
}
impl TryFrom<char> for QuoteChar {
type Error = ();
fn try_from(value: char) -> Result<Self, Self::Error> {
match value {
'\'' => Ok(QuoteChar::Single),
'"' => Ok(QuoteChar::Double),
_ => Err(()),
}
}
}
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
/// with the provided [`StringQuotes`] style.
///
/// Returns the normalized string and whether it contains new lines.
fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow<str> {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
// Tracks the last index of `input` that has been written to `output`.
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;
let quote = quotes.quote_char;
let preferred_quote = quote.as_char();
let opposite_quote = quote.invert().as_char();
let mut chars = input.char_indices().peekable();
let is_raw = prefix.is_raw_string();
let is_fstring = prefix.is_fstring();
let mut formatted_value_nesting = 0u32;
while let Some((index, c)) = chars.next() {
if is_fstring && matches!(c, '{' | '}') {
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
// Skip over the second character of the double braces
chars.next();
} else if c == '{' {
formatted_value_nesting += 1;
} else {
// Safe to assume that `c == '}'` here because of the matched pattern above
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
}
continue;
}
if c == '\r' {
output.push_str(&input[last_index..index]);
// Skip over the '\r' character, keep the `\n`
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
chars.next();
}
// Replace the `\r` with a `\n`
else {
output.push('\n');
}
last_index = index + '\r'.len_utf8();
} else if !quotes.triple && !is_raw {
if c == '\\' {
if let Some((_, next)) = chars.peek().copied() {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote && formatted_value_nesting == 0 {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
last_index = index + '\\'.len_utf8();
} else if next == preferred_quote {
// Quote is already escaped, skip over it.
chars.next();
} else if next == '\\' {
// Skip over escaped backslashes
chars.next();
}
}
} else if c == preferred_quote && formatted_value_nesting == 0 {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
output.push(c);
last_index = index + preferred_quote.len_utf8();
}
}
}
let normalized = if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
};
normalized
}