Avoid repeating function calls in f-string conversions (#10265)

## Summary

Given a format string like `"{x} {x}".format(x=foo())`, we should avoid
converting to an f-string, since doing so would require repeating the
function call (`f"{foo()} {foo()}"`), which could introduce side
effects.

Closes https://github.com/astral-sh/ruff/issues/10258.
This commit is contained in:
Charlie Marsh 2024-03-06 20:33:19 -08:00 committed by GitHub
parent b9264a5a11
commit 461cdad53a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 192 additions and 113 deletions

View file

@ -252,3 +252,10 @@ raise ValueError(
# The dictionary should be parenthesized. # The dictionary should be parenthesized.
"{}".format({0: 1}()) "{}".format({0: 1}())
# The string shouldn't be converted, since it would require repeating the function call.
"{x} {x}".format(x=foo())
"{0} {0}".format(foo())
# The string _should_ be converted, since the function call is repeated in the arguments.
"{0} {1}".format(foo(), foo())

View file

@ -1,10 +1,11 @@
use std::borrow::Cow; use std::borrow::Cow;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use rustc_hash::FxHashMap; use rustc_hash::{FxHashMap, FxHashSet};
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::any_over_expr;
use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::{self as ast, Expr, Keyword}; use ruff_python_ast::{self as ast, Expr, Keyword};
use ruff_python_literal::format::{ use ruff_python_literal::format::{
@ -56,7 +57,7 @@ impl Violation for FString {
} }
/// Like [`FormatSummary`], but maps positional and keyword arguments to their /// Like [`FormatSummary`], but maps positional and keyword arguments to their
/// values. For example, given `{a} {b}".format(a=1, b=2)`, `FormatFunction` /// values. For example, given `{a} {b}".format(a=1, b=2)`, [`FormatSummary`]
/// would include `"a"` and `'b'` in `kwargs`, mapped to `1` and `2` /// would include `"a"` and `'b'` in `kwargs`, mapped to `1` and `2`
/// respectively. /// respectively.
#[derive(Debug)] #[derive(Debug)]
@ -106,11 +107,11 @@ impl<'a> FormatSummaryValues<'a> {
}) })
} }
/// Return the next positional argument. /// Return the next positional index.
fn arg_auto(&mut self) -> Option<&Expr> { fn arg_auto(&mut self) -> usize {
let idx = self.auto_index; let idx = self.auto_index;
self.auto_index += 1; self.auto_index += 1;
self.arg_positional(idx) idx
} }
/// Return the positional argument at the given index. /// Return the positional argument at the given index.
@ -216,130 +217,178 @@ fn formatted_expr<'a>(expr: &Expr, context: FormatContext, locator: &Locator<'a>
} }
} }
/// Convert a string `.format` call to an f-string. #[derive(Debug, Clone)]
/// enum FStringConversion {
/// Returns `None` if the string does not require conversion, and `Err` if the conversion /// The format string only contains literal parts.
/// is not possible. Literal,
fn try_convert_to_f_string( /// The format call uses arguments with side effects which are repeated within the
range: TextRange, /// format string. For example: `"{x} {x}".format(x=foo())`.
summary: &mut FormatSummaryValues, SideEffects,
locator: &Locator, /// The format string should be converted to an f-string.
) -> Result<Option<String>> { Convert(String),
let contents = locator.slice(range); }
// Strip the unicode prefix. It's redundant in Python 3, and invalid when used #[derive(Debug, Clone, Eq, PartialEq, Hash)]
// with f-strings. enum IndexOrKeyword {
let contents = if contents.starts_with('U') || contents.starts_with('u') { /// The field uses a positional index.
&contents[1..] Index(usize),
} else { /// The field uses a keyword name.
contents Keyword(String),
}; }
// Temporarily strip the raw prefix, if present. It will be prepended to the result, before the impl FStringConversion {
// 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code. /// Convert a string `.format` call to an f-string.
let raw = contents.starts_with('R') || contents.starts_with('r'); fn try_convert(
let contents = if raw { &contents[1..] } else { contents }; range: TextRange,
summary: &mut FormatSummaryValues,
locator: &Locator,
) -> Result<Self> {
let contents = locator.slice(range);
// Remove the leading and trailing quotes. // Strip the unicode prefix. It's redundant in Python 3, and invalid when used
let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?; // with f-strings.
let trailing_quote = trailing_quote(contents).context("Unable to identify trailing quote")?; let contents = if contents.starts_with('U') || contents.starts_with('u') {
let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()]; &contents[1..]
if contents.is_empty() { } else {
return Ok(None); contents
} };
// Parse the format string. // Temporarily strip the raw prefix, if present. It will be prepended to the result, before the
let format_string = FormatString::from_str(contents)?; // 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code.
let raw = contents.starts_with('R') || contents.starts_with('r');
let contents = if raw { &contents[1..] } else { contents };
if format_string // Remove the leading and trailing quotes.
.format_parts let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?;
.iter() let trailing_quote =
.all(|part| matches!(part, FormatPart::Literal(..))) trailing_quote(contents).context("Unable to identify trailing quote")?;
{ let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];
return Ok(None);
}
let mut converted = String::with_capacity(contents.len()); // If the format string is empty, it doesn't need to be converted.
for part in format_string.format_parts { if contents.is_empty() {
match part { return Ok(Self::Literal);
FormatPart::Field { }
field_name,
conversion_spec,
format_spec,
} => {
converted.push('{');
let field = FieldName::parse(&field_name)?; // Parse the format string.
let arg = match field.field_type { let format_string = FormatString::from_str(contents)?;
FieldType::Auto => summary.arg_auto(),
FieldType::Index(index) => summary.arg_positional(index),
FieldType::Keyword(name) => summary.arg_keyword(&name),
}
.context("Unable to parse field")?;
converted.push_str(&formatted_expr(
arg,
if field.parts.is_empty() {
FormatContext::Bare
} else {
FormatContext::Accessed
},
locator,
));
for part in field.parts { // If the format string contains only literal parts, it doesn't need to be converted.
match part { if format_string
FieldNamePart::Attribute(name) => { .format_parts
converted.push('.'); .iter()
converted.push_str(&name); .all(|part| matches!(part, FormatPart::Literal(..)))
{
return Ok(Self::Literal);
}
let mut converted = String::with_capacity(contents.len());
let mut seen = FxHashSet::default();
for part in format_string.format_parts {
match part {
FormatPart::Field {
field_name,
conversion_spec,
format_spec,
} => {
converted.push('{');
let field = FieldName::parse(&field_name)?;
// Map from field type to specifier.
let specifier = match field.field_type {
FieldType::Auto => IndexOrKeyword::Index(summary.arg_auto()),
FieldType::Index(index) => IndexOrKeyword::Index(index),
FieldType::Keyword(name) => IndexOrKeyword::Keyword(name),
};
let arg = match &specifier {
IndexOrKeyword::Index(index) => {
summary.arg_positional(*index).ok_or_else(|| {
anyhow::anyhow!("Positional argument {index} is missing")
})?
} }
FieldNamePart::Index(index) => { IndexOrKeyword::Keyword(name) => {
converted.push('['); summary.arg_keyword(name).ok_or_else(|| {
converted.push_str(index.to_string().as_str()); anyhow::anyhow!("Keyword argument '{name}' is missing")
converted.push(']'); })?
} }
FieldNamePart::StringIndex(index) => { };
let quote = match trailing_quote {
"'" | "'''" | "\"\"\"" => '"', // If the argument contains a side effect, and it's repeated in the format
"\"" => '\'', // string, we can't convert the format string to an f-string. For example,
_ => unreachable!("invalid trailing quote"), // converting `"{x} {x}".format(x=foo())` would result in `f"{foo()} {foo()}"`,
}; // which would call `foo()` twice.
converted.push('['); if !seen.insert(specifier) {
converted.push(quote); if any_over_expr(arg, &Expr::is_call_expr) {
converted.push_str(&index); return Ok(Self::SideEffects);
converted.push(quote);
converted.push(']');
} }
} }
}
if let Some(conversion_spec) = conversion_spec { converted.push_str(&formatted_expr(
converted.push('!'); arg,
converted.push(conversion_spec); if field.parts.is_empty() {
} FormatContext::Bare
} else {
FormatContext::Accessed
},
locator,
));
if !format_spec.is_empty() { for part in field.parts {
converted.push(':'); match part {
converted.push_str(&format_spec); FieldNamePart::Attribute(name) => {
} converted.push('.');
converted.push_str(&name);
}
FieldNamePart::Index(index) => {
converted.push('[');
converted.push_str(index.to_string().as_str());
converted.push(']');
}
FieldNamePart::StringIndex(index) => {
let quote = match trailing_quote {
"'" | "'''" | "\"\"\"" => '"',
"\"" => '\'',
_ => unreachable!("invalid trailing quote"),
};
converted.push('[');
converted.push(quote);
converted.push_str(&index);
converted.push(quote);
converted.push(']');
}
}
}
converted.push('}'); if let Some(conversion_spec) = conversion_spec {
} converted.push('!');
FormatPart::Literal(value) => { converted.push(conversion_spec);
converted.push_str(&curly_escape(&value)); }
if !format_spec.is_empty() {
converted.push(':');
converted.push_str(&format_spec);
}
converted.push('}');
}
FormatPart::Literal(value) => {
converted.push_str(&curly_escape(&value));
}
} }
} }
}
// Construct the format string. // Construct the format string.
let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len()); let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len());
if raw { if raw {
contents.push('r'); contents.push('r');
}
contents.push('f');
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Ok(Self::Convert(contents))
} }
contents.push('f');
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Ok(Some(contents))
} }
/// UP032 /// UP032
@ -389,13 +438,16 @@ pub(crate) fn f_strings(
break range.start(); break range.start();
} }
Some((Tok::String { .. }, range)) => { Some((Tok::String { .. }, range)) => {
match try_convert_to_f_string(range, &mut summary, checker.locator()) { match FStringConversion::try_convert(range, &mut summary, checker.locator()) {
Ok(Some(fstring)) => patches.push((range, fstring)), Ok(FStringConversion::Convert(fstring)) => patches.push((range, fstring)),
// Convert escaped curly brackets e.g. `{{` to `{` in literal string parts // Convert escaped curly brackets e.g. `{{` to `{` in literal string parts
Ok(None) => patches.push(( Ok(FStringConversion::Literal) => patches.push((
range, range,
curly_unescape(checker.locator().slice(range)).to_string(), curly_unescape(checker.locator().slice(range)).to_string(),
)), )),
// If the format string contains side effects that would need to be repeated,
// we can't convert it to an f-string.
Ok(FStringConversion::SideEffects) => return,
// If any of the segments fail to convert, then we can't convert the entire // If any of the segments fail to convert, then we can't convert the entire
// expression. // expression.
Err(_) => return, Err(_) => return,

View file

@ -1256,6 +1256,8 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 | # The dictionary should be parenthesized. 253 | # The dictionary should be parenthesized.
254 | "{}".format({0: 1}()) 254 | "{}".format({0: 1}())
| ^^^^^^^^^^^^^^^^^^^^^ UP032 | ^^^^^^^^^^^^^^^^^^^^^ UP032
255 |
256 | # The string shouldn't be converted, since it would require repeating the function call.
| |
= help: Convert to f-string = help: Convert to f-string
@ -1265,3 +1267,21 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 253 | # The dictionary should be parenthesized. 253 253 | # The dictionary should be parenthesized.
254 |-"{}".format({0: 1}()) 254 |-"{}".format({0: 1}())
254 |+f"{({0: 1}())}" 254 |+f"{({0: 1}())}"
255 255 |
256 256 | # The string shouldn't be converted, since it would require repeating the function call.
257 257 | "{x} {x}".format(x=foo())
UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call
|
260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 | "{0} {1}".format(foo(), foo())
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string
Safe fix
258 258 | "{0} {0}".format(foo())
259 259 |
260 260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 |-"{0} {1}".format(foo(), foo())
261 |+f"{foo()} {foo()}"