Implement template strings (#17851)

This PR implements template strings (t-strings) in the parser and
formatter for Ruff.

Minimal changes necessary to compile were made in other parts of the code (e.g. ty, the linter, etc.). These will be covered properly in follow-up PRs.
This commit is contained in:
Dylan 2025-05-30 15:00:56 -05:00 committed by GitHub
parent ad024f9a09
commit 9bbf4987e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
261 changed files with 18023 additions and 1802 deletions

View file

@ -2,28 +2,31 @@ use itertools::Itertools;
use ruff_formatter::{FormatContext, format_args, write};
use ruff_python_ast::str::{Quote, TripleQuotes};
use ruff_python_ast::str_prefix::{
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix,
};
use ruff_python_ast::{
AnyStringFlags, FString, InterpolatedStringElement, StringFlags, StringLike, StringLikePart,
TString,
};
use ruff_python_ast::{AnyStringFlags, FStringElement, StringFlags, StringLike, StringLikePart};
use ruff_source_file::LineRanges;
use ruff_text_size::{Ranged, TextRange};
use std::borrow::Cow;
use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
use crate::other::f_string::{FStringContext, FStringLayout};
use crate::other::f_string_element::FormatFStringExpressionElement;
use crate::other::interpolated_string::{InterpolatedStringContext, InterpolatedStringLayout};
use crate::other::interpolated_string_element::FormatInterpolatedElement;
use crate::prelude::*;
use crate::string::docstring::needs_chaperone_space;
use crate::string::normalize::{
QuoteMetadata, is_fstring_with_quoted_debug_expression,
is_fstring_with_quoted_format_spec_and_debug,
is_fstring_with_triple_quoted_literal_expression_containing_quotes,
is_interpolated_string_with_quoted_format_spec_and_debug,
};
use crate::string::{StringLikeExtensions, StringNormalizer, StringQuotes, normalize_string};
/// Formats any implicitly concatenated string. This could be any valid combination
/// of string, bytes or f-string literals.
/// of string, bytes, f-string, or t-string literals.
pub(crate) struct FormatImplicitConcatenatedString<'a> {
string: StringLike<'a>,
}
@ -98,6 +101,7 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringExpanded<'_
StringLikePart::String(part) => part.format().fmt(f),
StringLikePart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
StringLikePart::FString(part) => part.format().fmt(f),
StringLikePart::TString(part) => part.format().fmt(f),
});
let part_comments = comments.leading_dangling_trailing(part);
@ -138,7 +142,7 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
let first_part = string.parts().next()?;
// The string is either a regular string, f-string, or bytes string.
// The string is either a regular string, f-string, t-string, or bytes string.
let normalizer = StringNormalizer::from_context(context);
// Some if a part requires preserving its quotes.
@ -164,9 +168,34 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
return None;
}
if let StringLikePart::FString(fstring) = part {
if context.options().target_version().supports_pep_701() {
if is_fstring_with_quoted_format_spec_and_debug(fstring, context) {
match part {
StringLikePart::FString(fstring) => {
if matches!(string, StringLike::TString(_)) {
// Don't concatenate t-strings and f-strings
return None;
}
if context.options().target_version().supports_pep_701() {
if is_interpolated_string_with_quoted_format_spec_and_debug(
&fstring.elements,
fstring.flags.into(),
context,
) {
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
return None;
}
preserve_quotes_requirement = Some(part.flags().quote_style());
}
}
// Avoid invalid syntax for pre Python 312:
// * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}'
// * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'`
else if is_fstring_with_quoted_debug_expression(fstring, context)
|| is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring, context,
)
{
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
@ -175,21 +204,21 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
preserve_quotes_requirement = Some(part.flags().quote_style());
}
}
// Avoid invalid syntax for pre Python 312:
// * When joining parts that have debug expressions with quotes: `f"{10 + len('bar')=}" f'{10 + len("bar")=}'
// * When joining parts that contain triple quoted strings with quotes: `f"{'''test ' '''}" f'{"""other " """}'`
else if is_fstring_with_quoted_debug_expression(fstring, context)
|| is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring, context,
)
{
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
return None;
StringLikePart::TString(tstring) => {
if is_interpolated_string_with_quoted_format_spec_and_debug(
&tstring.elements,
tstring.flags.into(),
context,
) {
if preserve_quotes_requirement
.is_some_and(|quote| quote != part.flags().quote_style())
{
return None;
}
preserve_quotes_requirement = Some(part.flags().quote_style());
}
preserve_quotes_requirement = Some(part.flags().quote_style());
}
StringLikePart::Bytes(_) | StringLikePart::String(_) => {}
}
}
@ -203,6 +232,7 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
StringLike::String(_) => AnyStringPrefix::Regular(StringLiteralPrefix::Empty),
StringLike::Bytes(_) => AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
StringLike::FString(_) => AnyStringPrefix::Format(FStringPrefix::Regular),
StringLike::TString(_) => AnyStringPrefix::Template(TStringPrefix::Regular),
};
let quote = if let Some(quote) = preserve_quotes_requirement {
@ -287,7 +317,7 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {
FormatLiteralContent {
range: part.content_range(),
flags: self.flags,
is_fstring: false,
is_interpolated_string: false,
trim_start: first_non_empty && self.docstring,
trim_end: self.docstring && parts.peek().is_none(),
}
@ -300,28 +330,32 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {
}
}
StringLikePart::FString(f_string) => {
for element in &f_string.elements {
StringLikePart::FString(FString { elements, .. })
| StringLikePart::TString(TString { elements, .. }) => {
for element in elements {
match element {
FStringElement::Literal(literal) => {
InterpolatedStringElement::Literal(literal) => {
FormatLiteralContent {
range: literal.range(),
flags: self.flags,
is_fstring: true,
is_interpolated_string: true,
trim_end: false,
trim_start: false,
}
.fmt(f)?;
}
// Formatting the expression here and in the expanded version is safe **only**
// because we assert that the f-string never contains any comments.
FStringElement::Expression(expression) => {
let context = FStringContext::new(
// because we assert that the f/t-string never contains any comments.
InterpolatedStringElement::Interpolation(expression) => {
let context = InterpolatedStringContext::new(
self.flags,
FStringLayout::from_f_string(f_string, f.context().source()),
InterpolatedStringLayout::from_interpolated_string_elements(
elements,
f.context().source(),
),
);
FormatFStringExpressionElement::new(expression, context).fmt(f)?;
FormatInterpolatedElement::new(expression, context).fmt(f)?;
}
}
}
@ -336,7 +370,7 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {
struct FormatLiteralContent {
range: TextRange,
flags: AnyStringFlags,
is_fstring: bool,
is_interpolated_string: bool,
trim_start: bool,
trim_end: bool,
}
@ -348,7 +382,7 @@ impl Format<PyFormatContext<'_>> for FormatLiteralContent {
content,
0,
self.flags,
self.flags.is_f_string() && !self.is_fstring,
self.flags.is_interpolated_string() && !self.is_interpolated_string,
);
// Trim the start and end of the string if it's the first or last part of a docstring.

View file

@ -85,57 +85,55 @@ pub(crate) trait StringLikeExtensions {
impl StringLikeExtensions for ast::StringLike<'_> {
fn is_multiline(&self, context: &PyFormatContext) -> bool {
// Helper for f-string and t-string parts
fn contains_line_break_or_comments(
elements: &ast::InterpolatedStringElements,
context: &PyFormatContext,
triple_quotes: TripleQuotes,
) -> bool {
elements.iter().any(|element| match element {
ast::InterpolatedStringElement::Literal(literal) => {
triple_quotes.is_yes() && context.source().contains_line_break(literal.range())
}
ast::InterpolatedStringElement::Interpolation(expression) => {
// Expressions containing comments can't be joined.
//
// Format specifiers needs to be checked as well. For example, the
// following should be considered multiline because the literal
// part of the format specifier contains a newline at the end
// (`.3f\n`):
//
// ```py
// x = f"hello {a + b + c + d:.3f
// } world"
// ```
context.comments().contains_comments(expression.into())
|| expression.format_spec.as_deref().is_some_and(|spec| {
contains_line_break_or_comments(&spec.elements, context, triple_quotes)
})
|| expression.debug_text.as_ref().is_some_and(|debug_text| {
memchr2(b'\n', b'\r', debug_text.leading.as_bytes()).is_some()
|| memchr2(b'\n', b'\r', debug_text.trailing.as_bytes()).is_some()
})
}
})
}
self.parts().any(|part| match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
part.flags().is_triple_quoted()
&& context.source().contains_line_break(part.range())
}
StringLikePart::FString(f_string) => {
fn contains_line_break_or_comments(
elements: &ast::FStringElements,
context: &PyFormatContext,
triple_quotes: TripleQuotes,
) -> bool {
elements.iter().any(|element| match element {
ast::FStringElement::Literal(literal) => {
triple_quotes.is_yes()
&& context.source().contains_line_break(literal.range())
}
ast::FStringElement::Expression(expression) => {
// Expressions containing comments can't be joined.
//
// Format specifiers needs to be checked as well. For example, the
// following should be considered multiline because the literal
// part of the format specifier contains a newline at the end
// (`.3f\n`):
//
// ```py
// x = f"hello {a + b + c + d:.3f
// } world"
// ```
context.comments().contains_comments(expression.into())
|| expression.format_spec.as_deref().is_some_and(|spec| {
contains_line_break_or_comments(
&spec.elements,
context,
triple_quotes,
)
})
|| expression.debug_text.as_ref().is_some_and(|debug_text| {
memchr2(b'\n', b'\r', debug_text.leading.as_bytes()).is_some()
|| memchr2(b'\n', b'\r', debug_text.trailing.as_bytes())
.is_some()
})
}
})
}
contains_line_break_or_comments(
&f_string.elements,
context,
f_string.flags.triple_quotes(),
)
}
StringLikePart::FString(f_string) => contains_line_break_or_comments(
&f_string.elements,
context,
f_string.flags.triple_quotes(),
),
StringLikePart::TString(t_string) => contains_line_break_or_comments(
&t_string.elements,
context,
t_string.flags.triple_quotes(),
),
})
}
}

View file

@ -5,16 +5,15 @@ use std::iter::FusedIterator;
use ruff_formatter::FormatContext;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::{
AnyStringFlags, BytesLiteral, FString, FStringElement, FStringElements, FStringFlags,
AnyStringFlags, BytesLiteral, FString, InterpolatedStringElement, InterpolatedStringElements,
StringFlags, StringLikePart, StringLiteral,
str::{Quote, TripleQuotes},
};
use ruff_text_size::{Ranged, TextRange, TextSlice};
use crate::QuoteStyle;
use crate::context::FStringState;
use crate::context::InterpolatedStringState;
use crate::prelude::*;
use crate::string::StringQuotes;
use crate::string::{Quote, StringQuotes, TripleQuotes};
pub(crate) struct StringNormalizer<'a, 'src> {
preferred_quote_style: Option<QuoteStyle>,
@ -47,11 +46,11 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
.unwrap_or(self.context.options().quote_style());
let supports_pep_701 = self.context.options().target_version().supports_pep_701();
// For f-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
if let FStringState::InsideExpressionElement(parent_context) = self.context.f_string_state()
// For f-strings and t-strings prefer alternating the quotes unless The outer string is triple quoted and the inner isn't.
if let InterpolatedStringState::InsideInterpolatedElement(parent_context) =
self.context.interpolated_string_state()
{
let parent_flags = parent_context.f_string().flags();
let parent_flags = parent_context.interpolated_string().flags();
if !parent_flags.is_triple_quoted() || string.flags().is_triple_quoted() {
// This logic is even necessary when using preserve and the target python version doesn't support PEP701 because
// we might end up joining two f-strings that have different quote styles, in which case we need to alternate the quotes
@ -67,33 +66,49 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
return QuoteStyle::Preserve;
}
// There are cases where it is necessary to preserve the quotes to prevent an invalid f-string.
if let StringLikePart::FString(fstring) = string {
// There are two cases where it's necessary to preserve the quotes if the
// target version is pre 3.12 and the part is an f-string.
if !supports_pep_701 {
// An f-string expression contains a debug text with a quote character
// because the formatter will emit the debug expression **exactly** the
// same as in the source text.
if is_fstring_with_quoted_debug_expression(fstring, self.context) {
return QuoteStyle::Preserve;
// There are cases where it is necessary to preserve the quotes to prevent an invalid f-string or t-string.
match string {
StringLikePart::FString(fstring) => {
// There are two cases where it's necessary to preserve the quotes if the
// target version is pre 3.12 and the part is an f-string.
if !supports_pep_701 {
// An f-string expression contains a debug text with a quote character
// because the formatter will emit the debug expression **exactly** the
// same as in the source text.
if is_fstring_with_quoted_debug_expression(fstring, self.context) {
return QuoteStyle::Preserve;
}
// An f-string expression that contains a triple quoted string literal
// expression that contains a quote.
if is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring,
self.context,
) {
return QuoteStyle::Preserve;
}
}
// An f-string expression that contains a triple quoted string literal
// expression that contains a quote.
if is_fstring_with_triple_quoted_literal_expression_containing_quotes(
fstring,
// An f-string expression element contains a debug text and the corresponding
// format specifier has a literal element with a quote character.
if is_interpolated_string_with_quoted_format_spec_and_debug(
&fstring.elements,
fstring.flags.into(),
self.context,
) {
return QuoteStyle::Preserve;
}
}
// An f-string expression element contains a debug text and the corresponding
// format specifier has a literal element with a quote character.
if is_fstring_with_quoted_format_spec_and_debug(fstring, self.context) {
return QuoteStyle::Preserve;
StringLikePart::TString(tstring) => {
if is_interpolated_string_with_quoted_format_spec_and_debug(
&tstring.elements,
tstring.flags.into(),
self.context,
) {
return QuoteStyle::Preserve;
}
}
_ => {}
}
// Per PEP 8, always prefer double quotes for triple-quoted strings.
@ -172,7 +187,7 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
// The preferred quote style is single or double quotes, and the string contains a quote or
// another character that may require escaping
(Ok(preferred_quote), Some(first_quote_or_normalized_char_offset)) => {
let metadata = if string.is_fstring() {
let metadata = if string.is_interpolated_string() {
QuoteMetadata::from_part(string, self.context, preferred_quote)
} else {
QuoteMetadata::from_str(
@ -262,9 +277,19 @@ impl QuoteMetadata {
StringLikePart::FString(fstring) => {
let metadata = QuoteMetadata::from_str("", part.flags(), preferred_quote);
metadata.merge_fstring_elements(
metadata.merge_interpolated_string_elements(
&fstring.elements,
fstring.flags,
fstring.flags.into(),
context,
preferred_quote,
)
}
StringLikePart::TString(tstring) => {
let metadata = QuoteMetadata::from_str("", part.flags(), preferred_quote);
metadata.merge_interpolated_string_elements(
&tstring.elements,
tstring.flags.into(),
context,
preferred_quote,
)
@ -369,7 +394,7 @@ impl QuoteMetadata {
})
}
/// For f-strings, only consider the quotes inside string-literals but ignore
/// For f-strings and t-strings, only consider the quotes inside string-literals but ignore
/// quotes inside expressions (except inside the format spec). This allows both the outer and the nested literals
/// to make the optimal local-choice to reduce the total number of quotes necessary.
/// This doesn't require any pre 312 special handling because an expression
@ -377,10 +402,10 @@ impl QuoteMetadata {
/// ```python
/// f"{'escaping a quote like this \" is a syntax error pre 312'}"
/// ```
fn merge_fstring_elements(
fn merge_interpolated_string_elements(
self,
elements: &FStringElements,
flags: FStringFlags,
elements: &InterpolatedStringElements,
flags: AnyStringFlags,
context: &PyFormatContext,
preferred_quote: Quote,
) -> Self {
@ -388,19 +413,19 @@ impl QuoteMetadata {
for element in elements {
match element {
FStringElement::Literal(literal) => {
InterpolatedStringElement::Literal(literal) => {
merged = merged
.merge(&QuoteMetadata::from_str(
context.source().slice(literal),
flags.into(),
flags,
preferred_quote,
))
.expect("Merge to succeed because all parts have the same flags");
}
FStringElement::Expression(expression) => {
InterpolatedStringElement::Interpolation(expression) => {
if let Some(spec) = expression.format_spec.as_deref() {
if expression.debug_text.is_none() {
merged = merged.merge_fstring_elements(
merged = merged.merge_interpolated_string_elements(
&spec.elements,
flags,
context,
@ -879,7 +904,7 @@ pub(super) fn is_fstring_with_quoted_debug_expression(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
fstring.elements.expressions().any(|expression| {
fstring.elements.interpolations().any(|expression| {
if expression.debug_text.is_some() {
let content = context.source().slice(expression);
contains_opposite_quote(content, fstring.flags.into())
@ -889,58 +914,6 @@ pub(super) fn is_fstring_with_quoted_debug_expression(
})
}
/// Returns `true` if `string` has any f-string expression element (direct or nested) with a debug expression and a format spec
/// that contains the opposite quote. It's important to preserve the quote style for those f-strings
/// because changing the quote style would result in invalid syntax.
///
/// ```python
/// f'{1=: "abcd \'\'}'
/// f'{x=:a{y:"abcd"}}'
/// f'{x=:a{y:{z:"abcd"}}}'
/// ```
pub(super) fn is_fstring_with_quoted_format_spec_and_debug(
fstring: &FString,
context: &PyFormatContext,
) -> bool {
fn has_format_spec_with_opposite_quote(
elements: &FStringElements,
flags: FStringFlags,
context: &PyFormatContext,
in_debug: bool,
) -> bool {
elements.iter().any(|element| match element {
FStringElement::Literal(literal) => {
let content = context.source().slice(literal);
in_debug && contains_opposite_quote(content, flags.into())
}
FStringElement::Expression(expression) => {
expression.format_spec.as_deref().is_some_and(|spec| {
has_format_spec_with_opposite_quote(
&spec.elements,
flags,
context,
in_debug || expression.debug_text.is_some(),
)
})
}
})
}
fstring.elements.expressions().any(|expression| {
if let Some(spec) = expression.format_spec.as_deref() {
return has_format_spec_with_opposite_quote(
&spec.elements,
fstring.flags,
context,
expression.debug_text.is_some(),
);
}
false
})
}
/// Tests if the `fstring` contains any triple quoted string, byte, or f-string literal that
/// contains a quote character opposite to its own quote character.
///
@ -980,6 +953,17 @@ pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes
}
}
contains_quotes
}
StringLikePart::TString(tstring) => {
let mut contains_quotes = false;
for literal in tstring.elements.literals() {
if self.contains_quote(literal.range(), tstring.flags.into()) {
contains_quotes = true;
break;
}
}
contains_quotes
}
};
@ -1018,6 +1002,59 @@ pub(super) fn is_fstring_with_triple_quoted_literal_expression_containing_quotes
visitor.found
}
/// Returns `true` if `string` has any f/t-string interpolation element (direct or nested) with a debug expression and a format spec
/// that contains the opposite quote. It's important to preserve the quote style for those f/t-strings
/// because changing the quote style would result in invalid syntax.
///
/// ```python
/// t'{1=: "abcd \'\'}'
/// t'{x=:a{y:"abcd"}}'
/// t'{x=:a{y:{z:"abcd"}}}'
/// ```
pub(super) fn is_interpolated_string_with_quoted_format_spec_and_debug(
elements: &InterpolatedStringElements,
flags: AnyStringFlags,
context: &PyFormatContext,
) -> bool {
fn has_format_spec_with_opposite_quote(
elements: &InterpolatedStringElements,
flags: AnyStringFlags,
context: &PyFormatContext,
in_debug: bool,
) -> bool {
elements.iter().any(|element| match element {
InterpolatedStringElement::Literal(literal) => {
let content = context.source().slice(literal);
in_debug && contains_opposite_quote(content, flags)
}
InterpolatedStringElement::Interpolation(expression) => {
expression.format_spec.as_deref().is_some_and(|spec| {
has_format_spec_with_opposite_quote(
&spec.elements,
flags,
context,
in_debug || expression.debug_text.is_some(),
)
})
}
})
}
elements.interpolations().any(|expression| {
if let Some(spec) = expression.format_spec.as_deref() {
return has_format_spec_with_opposite_quote(
&spec.elements,
flags,
context,
expression.debug_text.is_some(),
);
}
false
})
}
fn contains_opposite_quote(content: &str, flags: AnyStringFlags) -> bool {
if flags.is_triple_quoted() {
match flags.quote_style() {