Avoid repeating function calls in f-string conversions (#10265)

## Summary

Given a format string like `"{x} {x}".format(x=foo())`, we should avoid
converting to an f-string, since doing so would require repeating the
function call (`f"{foo()} {foo()}"`), which could introduce side
effects.

Closes https://github.com/astral-sh/ruff/issues/10258.
This commit is contained in:
Charlie Marsh 2024-03-06 20:33:19 -08:00 committed by GitHub
parent b9264a5a11
commit 461cdad53a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 192 additions and 113 deletions

View file

@ -252,3 +252,10 @@ raise ValueError(
# The dictionary should be parenthesized.
"{}".format({0: 1}())
# The string shouldn't be converted, since it would require repeating the function call.
"{x} {x}".format(x=foo())
"{0} {0}".format(foo())
# The string _should_ be converted, since the function call is repeated in the arguments.
"{0} {1}".format(foo(), foo())

View file

@ -1,10 +1,11 @@
use std::borrow::Cow;
use anyhow::{Context, Result};
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::any_over_expr;
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::{self as ast, Expr, Keyword};
use ruff_python_literal::format::{
@ -56,7 +57,7 @@ impl Violation for FString {
}
/// Like [`FormatSummary`], but maps positional and keyword arguments to their
/// values. For example, given `{a} {b}".format(a=1, b=2)`, `FormatFunction`
/// values. For example, given `{a} {b}".format(a=1, b=2)`, [`FormatSummary`]
/// would include `"a"` and `'b'` in `kwargs`, mapped to `1` and `2`
/// respectively.
#[derive(Debug)]
@ -106,11 +107,11 @@ impl<'a> FormatSummaryValues<'a> {
})
}
/// Return the next positional argument.
fn arg_auto(&mut self) -> Option<&Expr> {
/// Return the next positional index.
fn arg_auto(&mut self) -> usize {
let idx = self.auto_index;
self.auto_index += 1;
self.arg_positional(idx)
idx
}
/// Return the positional argument at the given index.
@ -216,15 +217,32 @@ fn formatted_expr<'a>(expr: &Expr, context: FormatContext, locator: &Locator<'a>
}
}
#[derive(Debug, Clone)]
enum FStringConversion {
/// The format string only contains literal parts.
Literal,
/// The format call uses arguments with side effects which are repeated within the
/// format string. For example: `"{x} {x}".format(x=foo())`.
SideEffects,
/// The format string should be converted to an f-string.
Convert(String),
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum IndexOrKeyword {
/// The field uses a positional index.
Index(usize),
/// The field uses a keyword name.
Keyword(String),
}
impl FStringConversion {
/// Convert a string `.format` call to an f-string.
///
/// Returns `None` if the string does not require conversion, and `Err` if the conversion
/// is not possible.
fn try_convert_to_f_string(
fn try_convert(
range: TextRange,
summary: &mut FormatSummaryValues,
locator: &Locator,
) -> Result<Option<String>> {
) -> Result<Self> {
let contents = locator.slice(range);
// Strip the unicode prefix. It's redundant in Python 3, and invalid when used
@ -242,24 +260,29 @@ fn try_convert_to_f_string(
// Remove the leading and trailing quotes.
let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?;
let trailing_quote = trailing_quote(contents).context("Unable to identify trailing quote")?;
let trailing_quote =
trailing_quote(contents).context("Unable to identify trailing quote")?;
let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];
// If the format string is empty, it doesn't need to be converted.
if contents.is_empty() {
return Ok(None);
return Ok(Self::Literal);
}
// Parse the format string.
let format_string = FormatString::from_str(contents)?;
// If the format string contains only literal parts, it doesn't need to be converted.
if format_string
.format_parts
.iter()
.all(|part| matches!(part, FormatPart::Literal(..)))
{
return Ok(None);
return Ok(Self::Literal);
}
let mut converted = String::with_capacity(contents.len());
let mut seen = FxHashSet::default();
for part in format_string.format_parts {
match part {
FormatPart::Field {
@ -270,12 +293,37 @@ fn try_convert_to_f_string(
converted.push('{');
let field = FieldName::parse(&field_name)?;
let arg = match field.field_type {
FieldType::Auto => summary.arg_auto(),
FieldType::Index(index) => summary.arg_positional(index),
FieldType::Keyword(name) => summary.arg_keyword(&name),
// Map from field type to specifier.
let specifier = match field.field_type {
FieldType::Auto => IndexOrKeyword::Index(summary.arg_auto()),
FieldType::Index(index) => IndexOrKeyword::Index(index),
FieldType::Keyword(name) => IndexOrKeyword::Keyword(name),
};
let arg = match &specifier {
IndexOrKeyword::Index(index) => {
summary.arg_positional(*index).ok_or_else(|| {
anyhow::anyhow!("Positional argument {index} is missing")
})?
}
.context("Unable to parse field")?;
IndexOrKeyword::Keyword(name) => {
summary.arg_keyword(name).ok_or_else(|| {
anyhow::anyhow!("Keyword argument '{name}' is missing")
})?
}
};
// If the argument contains a side effect, and it's repeated in the format
// string, we can't convert the format string to an f-string. For example,
// converting `"{x} {x}".format(x=foo())` would result in `f"{foo()} {foo()}"`,
// which would call `foo()` twice.
if !seen.insert(specifier) {
if any_over_expr(arg, &Expr::is_call_expr) {
return Ok(Self::SideEffects);
}
}
converted.push_str(&formatted_expr(
arg,
if field.parts.is_empty() {
@ -339,7 +387,8 @@ fn try_convert_to_f_string(
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Ok(Some(contents))
Ok(Self::Convert(contents))
}
}
/// UP032
@ -389,13 +438,16 @@ pub(crate) fn f_strings(
break range.start();
}
Some((Tok::String { .. }, range)) => {
match try_convert_to_f_string(range, &mut summary, checker.locator()) {
Ok(Some(fstring)) => patches.push((range, fstring)),
match FStringConversion::try_convert(range, &mut summary, checker.locator()) {
Ok(FStringConversion::Convert(fstring)) => patches.push((range, fstring)),
// Convert escaped curly brackets e.g. `{{` to `{` in literal string parts
Ok(None) => patches.push((
Ok(FStringConversion::Literal) => patches.push((
range,
curly_unescape(checker.locator().slice(range)).to_string(),
)),
// If the format string contains side effects that would need to be repeated,
// we can't convert it to an f-string.
Ok(FStringConversion::SideEffects) => return,
// If any of the segments fail to convert, then we can't convert the entire
// expression.
Err(_) => return,

View file

@ -1256,6 +1256,8 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 | # The dictionary should be parenthesized.
254 | "{}".format({0: 1}())
| ^^^^^^^^^^^^^^^^^^^^^ UP032
255 |
256 | # The string shouldn't be converted, since it would require repeating the function call.
|
= help: Convert to f-string
@ -1265,3 +1267,21 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 253 | # The dictionary should be parenthesized.
254 |-"{}".format({0: 1}())
254 |+f"{({0: 1}())}"
255 255 |
256 256 | # The string shouldn't be converted, since it would require repeating the function call.
257 257 | "{x} {x}".format(x=foo())
UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call
|
260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 | "{0} {1}".format(foo(), foo())
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string
Safe fix
258 258 | "{0} {0}".format(foo())
259 259 |
260 260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 |-"{0} {1}".format(foo(), foo())
261 |+f"{foo()} {foo()}"