mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-12 22:58:22 +00:00
Alternate quotes for strings inside f-strings in preview (#13860)
This commit is contained in:
parent
f335fe4d4a
commit
2f88f84972
12 changed files with 556 additions and 118 deletions
|
@ -3,7 +3,10 @@ use std::cmp::Ordering;
|
|||
use std::iter::FusedIterator;
|
||||
|
||||
use ruff_formatter::FormatContext;
|
||||
use ruff_python_ast::{str::Quote, AnyStringFlags, StringFlags, StringLikePart};
|
||||
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
|
||||
use ruff_python_ast::{
|
||||
str::Quote, AnyStringFlags, BytesLiteral, FString, StringFlags, StringLikePart, StringLiteral,
|
||||
};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::context::FStringState;
|
||||
|
@ -37,53 +40,55 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
|||
self
|
||||
}
|
||||
|
||||
fn quoting(&self, string: StringLikePart) -> Quoting {
|
||||
match (self.quoting, self.context.f_string_state()) {
|
||||
(Quoting::Preserve, _) => Quoting::Preserve,
|
||||
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
// the inner string itself isn't triple-quoted. For example:
|
||||
//
|
||||
// ```python
|
||||
// f"""outer {"inner"}""" # Valid
|
||||
// f"""outer {"""inner"""}""" # Invalid
|
||||
// ```
|
||||
//
|
||||
// Or, if the target version supports PEP 701.
|
||||
//
|
||||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
(Quoting::CanChange, FStringState::InsideExpressionElement(context)) => {
|
||||
if (context.f_string().flags().is_triple_quoted()
|
||||
&& !string.flags().is_triple_quoted())
|
||||
|| self.context.options().target_version().supports_pep_701()
|
||||
{
|
||||
Quoting::CanChange
|
||||
} else {
|
||||
Quoting::Preserve
|
||||
}
|
||||
}
|
||||
|
||||
(Quoting::CanChange, _) => Quoting::CanChange,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the preferred quote style for `string`.
|
||||
/// The formatter should use the preferred quote style unless
|
||||
/// it can't because the string contains the preferred quotes OR
|
||||
/// it leads to more escaping.
|
||||
pub(super) fn preferred_quote_style(&self, string: StringLikePart) -> QuoteStyle {
|
||||
match self.quoting(string) {
|
||||
match self.quoting {
|
||||
Quoting::Preserve => QuoteStyle::Preserve,
|
||||
Quoting::CanChange => {
|
||||
let preferred_quote_style = self
|
||||
.preferred_quote_style
|
||||
.unwrap_or(self.context.options().quote_style());
|
||||
|
||||
if preferred_quote_style.is_preserve() {
|
||||
return QuoteStyle::Preserve;
|
||||
}
|
||||
|
||||
// There are two cases where it's necessary to preserve the quotes
|
||||
// if the target version is pre 3.12 and the part is an f-string.
|
||||
if !self.context.options().target_version().supports_pep_701() {
|
||||
if let StringLikePart::FString(fstring) = string {
|
||||
// An f-string expression contains a debug text with a quote character
|
||||
// because the formatter will emit the debug expression **exactly** the same as in the source text.
|
||||
if is_fstring_with_quoted_debug_expression(fstring, self.context) {
|
||||
return QuoteStyle::Preserve;
|
||||
}
|
||||
|
||||
// An f-string expression that contains a triple quoted string literal expression
|
||||
// that contains a quote.
|
||||
if is_fstring_with_triple_quoted_literal_expression_containing_quotes(
|
||||
fstring,
|
||||
self.context,
|
||||
) {
|
||||
return QuoteStyle::Preserve;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
|
||||
if let FStringState::InsideExpressionElement(parent_context) =
|
||||
self.context.f_string_state()
|
||||
{
|
||||
let parent_flags = parent_context.f_string().flags();
|
||||
|
||||
if !parent_flags.is_triple_quoted() || string.flags().is_triple_quoted() {
|
||||
return QuoteStyle::from(parent_flags.quote_style().opposite());
|
||||
}
|
||||
}
|
||||
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
if string.flags().is_triple_quoted() {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
|
@ -132,8 +137,6 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
|||
// if it doesn't have perfect alignment with PEP8.
|
||||
if let Some(quote) = self.context.docstring() {
|
||||
QuoteStyle::from(quote.opposite())
|
||||
} else if preferred_quote_style.is_preserve() {
|
||||
QuoteStyle::Preserve
|
||||
} else {
|
||||
QuoteStyle::Double
|
||||
}
|
||||
|
@ -163,12 +166,18 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
|||
// The preferred quote style is single or double quotes, and the string contains a quote or
|
||||
// another character that may require escaping
|
||||
(Ok(preferred_quote), Some(first_quote_or_normalized_char_offset)) => {
|
||||
let quote = QuoteMetadata::from_str(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.flags(),
|
||||
preferred_quote,
|
||||
)
|
||||
.choose(preferred_quote);
|
||||
let metadata = if string.is_fstring() {
|
||||
QuoteMetadata::from_part(string, self.context, preferred_quote)
|
||||
} else {
|
||||
QuoteMetadata::from_str(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.flags(),
|
||||
preferred_quote,
|
||||
)
|
||||
};
|
||||
|
||||
let quote = metadata.choose(preferred_quote);
|
||||
|
||||
string_flags.with_quote_style(quote)
|
||||
}
|
||||
|
||||
|
@ -235,6 +244,59 @@ pub(crate) struct QuoteMetadata {
|
|||
/// Tracks information about the used quotes in a string which is used
|
||||
/// to choose the quotes for a part.
|
||||
impl QuoteMetadata {
|
||||
pub(crate) fn from_part(
|
||||
part: StringLikePart,
|
||||
context: &PyFormatContext,
|
||||
preferred_quote: Quote,
|
||||
) -> Self {
|
||||
match part {
|
||||
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
|
||||
let text = context.locator().slice(part.content_range());
|
||||
|
||||
Self::from_str(text, part.flags(), preferred_quote)
|
||||
}
|
||||
StringLikePart::FString(fstring) => {
|
||||
if is_f_string_formatting_enabled(context) {
|
||||
// For f-strings, only consider the quotes inside string-literals but ignore
|
||||
// quotes inside expressions. This allows both the outer and the nested literals
|
||||
// to make the optimal local-choice to reduce the total number of quotes necessary.
|
||||
// This doesn't require any pre 312 special handling because an expression
|
||||
// can never contain the outer quote character, not even escaped:
|
||||
// ```python
|
||||
// f"{'escaping a quote like this \" is a syntax error pre 312'}"
|
||||
// ```
|
||||
let mut literals = fstring.elements.literals();
|
||||
|
||||
let Some(first) = literals.next() else {
|
||||
return QuoteMetadata::from_str("", part.flags(), preferred_quote);
|
||||
};
|
||||
|
||||
let mut metadata = QuoteMetadata::from_str(
|
||||
context.locator().slice(first.range()),
|
||||
fstring.flags.into(),
|
||||
preferred_quote,
|
||||
);
|
||||
|
||||
for literal in literals {
|
||||
metadata = metadata
|
||||
.merge(&QuoteMetadata::from_str(
|
||||
context.locator().slice(literal.range()),
|
||||
fstring.flags.into(),
|
||||
preferred_quote,
|
||||
))
|
||||
.expect("Merge to succeed because all parts have the same flags");
|
||||
}
|
||||
|
||||
metadata
|
||||
} else {
|
||||
let text = context.locator().slice(part.content_range());
|
||||
|
||||
Self::from_str(text, part.flags(), preferred_quote)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_str(text: &str, flags: AnyStringFlags, preferred_quote: Quote) -> Self {
|
||||
let kind = if flags.is_raw_string() {
|
||||
QuoteMetadataKind::raw(text, preferred_quote, flags.is_triple_quoted())
|
||||
|
@ -276,6 +338,61 @@ impl QuoteMetadata {
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Merges the quotes metadata of different literals.
|
||||
///
|
||||
/// ## Raw and triple quoted strings
|
||||
/// Merging raw and triple quoted strings is only correct if all literals are from the same part.
|
||||
/// E.g. it's okay to merge triple and raw strings from a single `FString` part's literals
|
||||
/// but it isn't safe to merge raw and triple quoted strings from different parts of an implicit
|
||||
/// concatenated string. Where safe means, it may lead to incorrect results.
|
||||
pub(super) fn merge(self, other: &QuoteMetadata) -> Option<QuoteMetadata> {
|
||||
let kind = match (self.kind, other.kind) {
|
||||
(
|
||||
QuoteMetadataKind::Regular {
|
||||
single_quotes: self_single,
|
||||
double_quotes: self_double,
|
||||
},
|
||||
QuoteMetadataKind::Regular {
|
||||
single_quotes: other_single,
|
||||
double_quotes: other_double,
|
||||
},
|
||||
) => QuoteMetadataKind::Regular {
|
||||
single_quotes: self_single + other_single,
|
||||
double_quotes: self_double + other_double,
|
||||
},
|
||||
|
||||
// Can't merge quotes from raw strings (even when both strings are raw)
|
||||
(
|
||||
QuoteMetadataKind::Raw {
|
||||
contains_preferred: self_contains_preferred,
|
||||
},
|
||||
QuoteMetadataKind::Raw {
|
||||
contains_preferred: other_contains_preferred,
|
||||
},
|
||||
) => QuoteMetadataKind::Raw {
|
||||
contains_preferred: self_contains_preferred || other_contains_preferred,
|
||||
},
|
||||
|
||||
(
|
||||
QuoteMetadataKind::Triple {
|
||||
contains_preferred: self_contains_preferred,
|
||||
},
|
||||
QuoteMetadataKind::Triple {
|
||||
contains_preferred: other_contains_preferred,
|
||||
},
|
||||
) => QuoteMetadataKind::Triple {
|
||||
contains_preferred: self_contains_preferred || other_contains_preferred,
|
||||
},
|
||||
|
||||
(_, _) => return None,
|
||||
};
|
||||
|
||||
Some(Self {
|
||||
kind,
|
||||
source_style: self.source_style,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
@ -738,18 +855,142 @@ impl UnicodeEscape {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if `string` is an f-string part that contains a debug expression that uses quotes
|
||||
/// and the format target is pre Python 312
|
||||
/// We can't join f-strings where:
|
||||
///
|
||||
/// ```python
|
||||
/// f"{10 + len('bar')=}"
|
||||
/// f'{10 + len("bar")=}'
|
||||
/// f""""{10 + len('''bar''')=}"""
|
||||
/// ```
|
||||
pub(super) fn is_fstring_with_quoted_debug_expression(
|
||||
fstring: &FString,
|
||||
context: &PyFormatContext,
|
||||
) -> bool {
|
||||
if fstring.elements.expressions().any(|expression| {
|
||||
if expression.debug_text.is_some() {
|
||||
let content = context.locator().slice(expression.range());
|
||||
match fstring.flags.quote_style() {
|
||||
Quote::Single => {
|
||||
if fstring.flags.is_triple_quoted() {
|
||||
content.contains(r#"""""#)
|
||||
} else {
|
||||
content.contains('"')
|
||||
}
|
||||
}
|
||||
Quote::Double => {
|
||||
if fstring.flags.is_triple_quoted() {
|
||||
content.contains("'''")
|
||||
} else {
|
||||
content.contains('\'')
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that
|
||||
/// contains a quote character opposite to its own quote character.
|
||||
///
|
||||
/// ```python
|
||||
/// f'{"""other " """}'
|
||||
/// ```
|
||||
///
|
||||
/// We can't flip the quote of the outer f-string because it would result in invalid syntax:
|
||||
/// ```python
|
||||
/// f"{'''other " '''}'
|
||||
/// ```
|
||||
pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes(
|
||||
fstring: &FString,
|
||||
context: &PyFormatContext,
|
||||
) -> bool {
|
||||
struct Visitor<'a> {
|
||||
context: &'a PyFormatContext<'a>,
|
||||
found: bool,
|
||||
}
|
||||
|
||||
impl Visitor<'_> {
|
||||
fn visit_string_like_part(&mut self, part: StringLikePart) {
|
||||
if !part.flags().is_triple_quoted() || self.found {
|
||||
return;
|
||||
}
|
||||
|
||||
let contains_quotes = match part {
|
||||
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
|
||||
self.contains_quote(part.content_range(), part.flags())
|
||||
}
|
||||
StringLikePart::FString(fstring) => {
|
||||
let mut contains_quotes = false;
|
||||
for literal in fstring.elements.literals() {
|
||||
if self.contains_quote(literal.range(), fstring.flags.into()) {
|
||||
contains_quotes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
contains_quotes
|
||||
}
|
||||
};
|
||||
|
||||
if contains_quotes {
|
||||
self.found = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn contains_quote(&self, range: TextRange, flags: AnyStringFlags) -> bool {
|
||||
self.context
|
||||
.locator()
|
||||
.slice(range)
|
||||
.contains(flags.quote_style().as_char())
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceOrderVisitor<'_> for Visitor<'_> {
|
||||
fn visit_f_string(&mut self, f_string: &FString) {
|
||||
self.visit_string_like_part(StringLikePart::FString(f_string));
|
||||
}
|
||||
|
||||
fn visit_string_literal(&mut self, string_literal: &StringLiteral) {
|
||||
self.visit_string_like_part(StringLikePart::String(string_literal));
|
||||
}
|
||||
|
||||
fn visit_bytes_literal(&mut self, bytes_literal: &BytesLiteral) {
|
||||
self.visit_string_like_part(StringLikePart::Bytes(bytes_literal));
|
||||
}
|
||||
}
|
||||
|
||||
let mut visitor = Visitor {
|
||||
context,
|
||||
found: false,
|
||||
};
|
||||
|
||||
ruff_python_ast::visitor::source_order::walk_f_string(&mut visitor, fstring);
|
||||
|
||||
visitor.found
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use super::UnicodeEscape;
|
||||
use crate::string::normalize_string;
|
||||
use ruff_python_ast::{
|
||||
str::Quote,
|
||||
str_prefix::{AnyStringPrefix, ByteStringPrefix},
|
||||
AnyStringFlags,
|
||||
};
|
||||
|
||||
use crate::string::normalize_string;
|
||||
|
||||
use super::UnicodeEscape;
|
||||
|
||||
#[test]
|
||||
fn normalize_32_escape() {
|
||||
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue