Consider quotes inside format-specs when choosing the quotes for an f-string (#14493)

This commit is contained in:
Micha Reiser 2024-11-22 13:43:53 +01:00 committed by GitHub
parent 2917534279
commit b80de52592
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 325 additions and 80 deletions

View file

@ -15,3 +15,31 @@ f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
f'{1: hy "user"}'
f'{1:hy "user"}'
f'{1: abcd "{1}" }'
f'{1: abcd "{'aa'}" }'
f'{1=: "abcd {'aa'}}'
f'{x:a{z:hy "user"}} \'\'\''
# Changing the outer quotes is fine because the format-spec is in a nested expression.
f'{f'{z=:hy "user"}'} \'\'\''
# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim.
f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error
f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped
# Don't change the quotes in the following cases:
f'{x=:hy "user"} \'\'\''
f'{x=:a{y:hy "user"}} \'\'\''
f'{x=:a{y:{z:hy "user"}}} \'\'\''
f'{x:a{y=:{z:hy "user"}}} \'\'\''
# This is fine because the debug expression and format spec are in a nested expression
f"""{1=: "this" is fine}"""
f'''{1=: "this" is fine}''' # Change quotes to double quotes because they're preferred
f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part.

View file

@ -319,3 +319,10 @@ assert False, "Implicit concatenated stringuses {} layout on {} format"[
assert False, +"Implicit concatenated string" "uses {} layout on {} format".format(
"Multiline", "first"
)
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
"a" f'{1=: "abcd \'\'}'
f'{1=: "abcd \'\'}' "a"
f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}"

View file

@ -20,7 +20,7 @@ use crate::preview::{
};
use crate::string::docstring::needs_chaperone_space;
use crate::string::normalize::{
is_fstring_with_quoted_debug_expression,
is_fstring_with_quoted_debug_expression, is_fstring_with_quoted_format_spec_and_debug,
is_fstring_with_triple_quoted_literal_expression_containing_quotes, QuoteMetadata,
};
use crate::string::{normalize_string, StringLikeExtensions, StringNormalizer, StringQuotes};
@ -206,15 +206,8 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
return None;
}
// Avoid invalid syntax for pre Python 312:
// * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}'
// * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'`
if !context.options().target_version().supports_pep_701() {
if is_fstring_with_quoted_debug_expression(fstring, context)
|| is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring, context,
)
{
if context.options().target_version().supports_pep_701() {
if is_fstring_with_quoted_format_spec_and_debug(fstring, context) {
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
@ -223,6 +216,21 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
preserve_quotes_requirement = Some(part.flags().quote_style());
}
}
// Avoid invalid syntax for pre Python 312:
// * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}'
// * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'`
else if is_fstring_with_quoted_debug_expression(fstring, context)
|| is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring, context,
)
{
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
return None;
}
preserve_quotes_requirement = Some(part.flags().quote_style());
}
}
}
@ -238,26 +246,30 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
StringLike::FString(_) => AnyStringPrefix::Format(FStringPrefix::Regular),
};
// Only determining the preferred quote for the first string is sufficient
// because we don't support joining triple quoted strings with non triple quoted strings.
let quote = if let Ok(preferred_quote) =
Quote::try_from(normalizer.preferred_quote_style(first_part))
{
for part in string.parts() {
let part_quote_metadata =
QuoteMetadata::from_part(part, context, preferred_quote);
if let Some(merged) = merged_quotes.as_mut() {
*merged = part_quote_metadata.merge(merged)?;
} else {
merged_quotes = Some(part_quote_metadata);
}
}
merged_quotes?.choose(preferred_quote)
let quote = if let Some(quote) = preserve_quotes_requirement {
quote
} else {
// Use the quotes of the first part if the quotes should be preserved.
first_part.flags().quote_style()
// Only determining the preferred quote for the first string is sufficient
// because we don't support joining triple quoted strings with non triple quoted strings.
if let Ok(preferred_quote) =
Quote::try_from(normalizer.preferred_quote_style(first_part))
{
for part in string.parts() {
let part_quote_metadata =
QuoteMetadata::from_part(part, context, preferred_quote);
if let Some(merged) = merged_quotes.as_mut() {
*merged = part_quote_metadata.merge(merged)?;
} else {
merged_quotes = Some(part_quote_metadata);
}
}
merged_quotes?.choose(preferred_quote)
} else {
// Use the quotes of the first part if the quotes should be preserved.
first_part.flags().quote_style()
}
};
Some(AnyStringFlags::new(prefix, quote, false))

View file

@ -5,7 +5,8 @@ use std::iter::FusedIterator;
use ruff_formatter::FormatContext;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::{
str::Quote, AnyStringFlags, BytesLiteral, FString, StringFlags, StringLikePart, StringLiteral,
str::Quote, AnyStringFlags, BytesLiteral, FString, FStringElement, FStringElements,
FStringFlags, StringFlags, StringLikePart, StringLiteral,
};
use ruff_text_size::{Ranged, TextRange, TextSlice};
@ -78,6 +79,10 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
return QuoteStyle::Preserve;
}
}
} else if let StringLikePart::FString(fstring) = string {
if is_fstring_with_quoted_format_spec_and_debug(fstring, self.context) {
return QuoteStyle::Preserve;
}
}
// For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
@ -262,37 +267,14 @@ impl QuoteMetadata {
}
StringLikePart::FString(fstring) => {
if is_f_string_formatting_enabled(context) {
// For f-strings, only consider the quotes inside string-literals but ignore
// quotes inside expressions. This allows both the outer and the nested literals
// to make the optimal local-choice to reduce the total number of quotes necessary.
// This doesn't require any pre 312 special handling because an expression
// can never contain the outer quote character, not even escaped:
// ```python
// f"{'escaping a quote like this \" is a syntax error pre 312'}"
// ```
let mut literals = fstring.elements.literals();
let metadata = QuoteMetadata::from_str("", part.flags(), preferred_quote);
let Some(first) = literals.next() else {
return QuoteMetadata::from_str("", part.flags(), preferred_quote);
};
let mut metadata = QuoteMetadata::from_str(
context.source().slice(first),
fstring.flags.into(),
metadata.merge_fstring_elements(
&fstring.elements,
fstring.flags,
context,
preferred_quote,
);
for literal in literals {
metadata = metadata
.merge(&QuoteMetadata::from_str(
context.source().slice(literal),
fstring.flags.into(),
preferred_quote,
))
.expect("Merge to succeed because all parts have the same flags");
}
metadata
)
} else {
let text = &context.source()[part.content_range()];
@ -398,6 +380,52 @@ impl QuoteMetadata {
source_style: self.source_style,
})
}
/// For f-strings, only consider the quotes inside string-literals but ignore
/// quotes inside expressions (except inside the format spec). This allows both the outer and the nested literals
/// to make the optimal local-choice to reduce the total number of quotes necessary.
/// This doesn't require any pre 312 special handling because an expression
/// can never contain the outer quote character, not even escaped:
/// ```python
/// f"{'escaping a quote like this \" is a syntax error pre 312'}"
/// ```
fn merge_fstring_elements(
self,
elements: &FStringElements,
flags: FStringFlags,
context: &PyFormatContext,
preferred_quote: Quote,
) -> Self {
let mut merged = self;
for element in elements {
match element {
FStringElement::Literal(literal) => {
merged = merged
.merge(&QuoteMetadata::from_str(
context.source().slice(literal),
flags.into(),
preferred_quote,
))
.expect("Merge to succeed because all parts have the same flags");
}
FStringElement::Expression(expression) => {
if let Some(spec) = expression.format_spec.as_deref() {
if expression.debug_text.is_none() {
merged = merged.merge_fstring_elements(
&spec.elements,
flags,
context,
preferred_quote,
);
}
}
}
}
}
merged
}
}
#[derive(Copy, Clone, Debug)]
@ -891,33 +919,66 @@ pub(super) fn is_fstring_with_quoted_debug_expression(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
if fstring.elements.expressions().any(|expression| {
fstring.elements.expressions().any(|expression| {
if expression.debug_text.is_some() {
let content = context.source().slice(expression);
match fstring.flags.quote_style() {
Quote::Single => {
if fstring.flags.is_triple_quoted() {
content.contains(r#"""""#)
} else {
content.contains('"')
}
}
Quote::Double => {
if fstring.flags.is_triple_quoted() {
content.contains("'''")
} else {
content.contains('\'')
}
}
}
contains_opposite_quote(content, fstring.flags.into())
} else {
false
}
}) {
return true;
})
}
/// Returns `true` if `string` has any f-string expression element (direct or nested) with a debug expression and a format spec
/// that contains the opposite quote. It's important to preserve the quote style for those f-strings
/// because changing the quote style would result in invalid syntax.
///
/// ```python
/// f'{1=: "abcd \'\'}'
/// f'{x=:a{y:"abcd"}}'
/// f'{x=:a{y:{z:"abcd"}}}'
/// ```
pub(super) fn is_fstring_with_quoted_format_spec_and_debug(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
fn has_format_spec_with_opposite_quote(
elements: &FStringElements,
flags: FStringFlags,
context: &PyFormatContext,
in_debug: bool,
) -> bool {
elements.iter().any(|element| match element {
FStringElement::Literal(literal) => {
let content = context.source().slice(literal);
in_debug && contains_opposite_quote(content, flags.into())
}
FStringElement::Expression(expression) => {
expression.format_spec.as_deref().is_some_and(|spec| {
has_format_spec_with_opposite_quote(
&spec.elements,
flags,
context,
in_debug || expression.debug_text.is_some(),
)
})
}
})
}
false
fstring.elements.expressions().any(|expression| {
if let Some(spec) = expression.format_spec.as_deref() {
return has_format_spec_with_opposite_quote(
&spec.elements,
fstring.flags,
context,
expression.debug_text.is_some(),
);
}
false
})
}
/// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that
@ -997,6 +1058,40 @@ pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes
visitor.found
}
fn contains_opposite_quote(content: &str, flags: AnyStringFlags) -> bool {
if flags.is_triple_quoted() {
match flags.quote_style() {
Quote::Single => content.contains(r#"""""#),
Quote::Double => content.contains("'''"),
}
} else {
let mut rest = content;
while let Some(index) = rest.find(flags.quote_style().opposite().as_char()) {
// Quotes in raw strings can't be escaped
if flags.is_raw_string() {
return true;
}
// Only if the quote isn't escaped
if rest[..index]
.chars()
.rev()
.take_while(|c| *c == '\\')
.count()
% 2
== 0
{
return true;
}
rest = &rest[index + flags.quote_style().opposite().as_char().len_utf8()..];
}
false
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;

View file

@ -22,6 +22,34 @@ f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
f'{1: hy "user"}'
f'{1:hy "user"}'
f'{1: abcd "{1}" }'
f'{1: abcd "{'aa'}" }'
f'{1=: "abcd {'aa'}}'
f'{x:a{z:hy "user"}} \'\'\''
# Changing the outer quotes is fine because the format-spec is in a nested expression.
f'{f'{z=:hy "user"}'} \'\'\''
# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim.
f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error
f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped
# Don't change the quotes in the following cases:
f'{x=:hy "user"} \'\'\''
f'{x=:a{y:hy "user"}} \'\'\''
f'{x=:a{y:{z:hy "user"}}} \'\'\''
f'{x:a{y=:{z:hy "user"}}} \'\'\''
# This is fine because the debug expression and format spec are in a nested expression
f"""{1=: "this" is fine}"""
f'''{1=: "this" is fine}''' # Change quotes to double quotes because they're preferred
f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part.
```
## Outputs
@ -57,6 +85,35 @@ f'{"""other " """}'
f'{"""other " """ + "more"}'
f'{b"""other " """}'
f'{f"""other " """}'
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
f'{1: hy "user"}'
f'{1:hy "user"}'
f'{1: abcd "{1}" }'
f'{1: abcd "{'aa'}" }'
f'{1=: "abcd {'aa'}}'
f'{x:a{z:hy "user"}} \'\'\''
# Changing the outer quotes is fine because the format-spec is in a nested expression.
f'{f'{z=:hy "user"}'} \'\'\''
# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim.
f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error
f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped
# Don't change the quotes in the following cases:
f'{x=:hy "user"} \'\'\''
f'{x=:a{y:hy "user"}} \'\'\''
f'{x=:a{y:{z:hy "user"}}} \'\'\''
f'{x:a{y=:{z:hy "user"}}} \'\'\''
# This is fine because the debug expression and format spec are in a nested expression
f"""{1=: "this" is fine}"""
f"""{1=: "this" is fine}""" # Change quotes to double quotes because they're preferred
f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part.
```
@ -64,7 +121,7 @@ f'{f"""other " """}'
```diff
--- Stable
+++ Preview
@@ -6,11 +6,11 @@
@@ -6,32 +6,32 @@
f"{'a'}"
# 312+, it's okay to change the outer quotes even when there's a debug expression using the same quotes
@ -82,4 +139,36 @@ f'{f"""other " """}'
+f"{'''other " ''' + 'more'}"
+f"{b'''other " '''}"
+f"{f'''other " '''}"
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
f'{1: hy "user"}'
f'{1:hy "user"}'
f'{1: abcd "{1}" }'
-f'{1: abcd "{'aa'}" }'
+f'{1: abcd "{"aa"}" }'
f'{1=: "abcd {'aa'}}'
-f'{x:a{z:hy "user"}} \'\'\''
+f"{x:a{z:hy \"user\"}} '''"
# Changing the outer quotes is fine because the format-spec is in a nested expression.
-f'{f'{z=:hy "user"}'} \'\'\''
+f"{f'{z=:hy "user"}'} '''"
# We have to be careful about changing the quotes if the f-string has a debug expression because it is inserted verbatim.
f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error
-f'{1=: abcd \'\'}' # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
-f'{1=: abcd \"\"}' # Changing the quotes here is fine because the inner quotes are escaped
+f"{1=: abcd \'\'}" # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
+f"{1=: abcd \"\"}" # Changing the quotes here is fine because the inner quotes are escaped
# Don't change the quotes in the following cases:
f'{x=:hy "user"} \'\'\''
f'{x=:a{y:hy "user"}} \'\'\''
@@ -42,4 +42,4 @@
f"""{1=: "this" is fine}"""
f"""{1=: "this" is fine}""" # Change quotes to double quotes because they're preferred
-f'{1=: {'ab"cd"'}}' # It's okay if the quotes are in an expression part.
+f"{1=: {'ab"cd"'}}" # It's okay if the quotes are in an expression part.
```

View file

@ -326,6 +326,13 @@ assert False, "Implicit concatenated stringuses {} layout on {} format"[
assert False, +"Implicit concatenated string" "uses {} layout on {} format".format(
"Multiline", "first"
)
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
"a" f'{1=: "abcd \'\'}'
f'{1=: "abcd \'\'}' "a"
f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}"
```
## Outputs
@ -733,4 +740,11 @@ assert False, "Implicit concatenated stringuses {} layout on {} format"[
assert False, +"Implicit concatenated stringuses {} layout on {} format".format(
"Multiline", "first"
)
# Regression tests for https://github.com/astral-sh/ruff/issues/13935
f'a{1=: "abcd \'\'}'
f'{1=: "abcd \'\'}a'
f'{1=: "abcd \'\'}' f"{1=: 'abcd \"\"}"
```