Support concatenated literals in format-literals (#4974)

This commit is contained in:
Charlie Marsh 2023-06-08 21:29:19 -04:00 committed by GitHub
parent 2c19000e4a
commit 293889a352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 112 additions and 72 deletions

View file

@ -21,10 +21,6 @@ x = "foo {0}" \
"\N{snowman} {0}".format(1)
'{' '0}'.format(1)
# These will not change because we are waiting for libcst to fix this issue:
# https://github.com/Instagram/LibCST/issues/846
print(
'foo{0}'
'bar{1}'.format(1, 2)
@ -34,3 +30,5 @@ print(
'foo{0}' # ohai\n"
'bar{1}'.format(1, 2)
)
'{' '0}'.format(1)

View file

@ -13,3 +13,5 @@ f"{0}".format(a)
f"{0}".format(1)
print(f"{0}".format(1))
''.format(1)

View file

@ -1,14 +1,14 @@
use anyhow::{anyhow, bail, Result};
use libcst_native::Arg;
use anyhow::{anyhow, Result};
use libcst_native::{Arg, Expression};
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::{Expr, Ranged};
use crate::autofix::codemods::CodegenStylist;
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::{Locator, Stylist};
use crate::autofix::codemods::CodegenStylist;
use crate::checkers::ast::Checker;
use crate::cst::matchers::{match_attribute, match_call_mut, match_expression};
use crate::registry::AsRule;
@ -35,29 +35,51 @@ impl Violation for FormatLiterals {
static FORMAT_SPECIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap());
/// Returns a string without the format specifiers.
/// Ex. "Hello {0} {1}" -> "Hello {} {}"
fn remove_specifiers(raw_specifiers: &str) -> String {
FORMAT_SPECIFIER
.replace_all(raw_specifiers, "{$fmt}")
.to_string()
/// Remove the explicit positional indices from a format string.
fn remove_specifiers<'a>(value: &mut Expression<'a>, arena: &'a mut typed_arena::Arena<String>) {
match value {
Expression::SimpleString(expr) => {
expr.value = arena.alloc(
FORMAT_SPECIFIER
.replace_all(expr.value, "{$fmt}")
.to_string(),
);
}
Expression::ConcatenatedString(expr) => {
let mut stack = vec![&mut expr.left, &mut expr.right];
while let Some(string) = stack.pop() {
match string.as_mut() {
libcst_native::String::Simple(string) => {
string.value = arena.alloc(
FORMAT_SPECIFIER
.replace_all(string.value, "{$fmt}")
.to_string(),
);
}
libcst_native::String::Concatenated(string) => {
stack.push(&mut string.left);
stack.push(&mut string.right);
}
libcst_native::String::Formatted(_) => {}
}
}
}
_ => {}
}
}
/// Return the corrected argument vector.
fn generate_arguments<'a>(
old_args: &[Arg<'a>],
correct_order: &'a [usize],
) -> Result<Vec<Arg<'a>>> {
let mut new_args: Vec<Arg> = Vec::with_capacity(old_args.len());
for (idx, given) in correct_order.iter().enumerate() {
fn generate_arguments<'a>(arguments: &[Arg<'a>], order: &'a [usize]) -> Result<Vec<Arg<'a>>> {
let mut new_arguments: Vec<Arg> = Vec::with_capacity(arguments.len());
for (idx, given) in order.iter().enumerate() {
// We need to keep the formatting in the same order but move the values.
let values = old_args
let values = arguments
.get(*given)
.ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?;
let formatting = old_args
let formatting = arguments
.get(idx)
.ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?;
let new_arg = Arg {
let argument = Arg {
value: values.value.clone(),
comma: formatting.comma.clone(),
equal: None,
@ -66,19 +88,14 @@ fn generate_arguments<'a>(
whitespace_after_star: formatting.whitespace_after_star.clone(),
whitespace_after_arg: formatting.whitespace_after_arg.clone(),
};
new_args.push(new_arg);
new_arguments.push(argument);
}
Ok(new_args)
Ok(new_arguments)
}
/// Returns true if the indices are sequential.
fn is_sequential(indices: &[usize]) -> bool {
for (expected, actual) in indices.iter().enumerate() {
if expected != *actual {
return false;
}
}
true
indices.iter().enumerate().all(|(idx, value)| idx == *value)
}
/// Returns the corrected function call.
@ -88,28 +105,32 @@ fn generate_call(
locator: &Locator,
stylist: &Stylist,
) -> Result<String> {
let module_text = locator.slice(expr.range());
let mut expression = match_expression(module_text)?;
let call = match_call_mut(&mut expression)?;
let content = locator.slice(expr.range());
let parenthesized_content = format!("({content})");
let mut expression = match_expression(&parenthesized_content)?;
// Fix the call arguments.
let call = match_call_mut(&mut expression)?;
if !is_sequential(correct_order) {
call.args = generate_arguments(&call.args, correct_order)?;
}
// Fix the string itself.
let item = match_attribute(&mut call.func)?;
let mut arena = typed_arena::Arena::new();
remove_specifiers(&mut item.value, &mut arena);
let cleaned = remove_specifiers(&item.codegen_stylist(stylist));
// Remove the parentheses (first and last characters).
let mut output = expression.codegen_stylist(stylist);
output.remove(0);
output.pop();
call.func = Box::new(match_expression(&cleaned)?);
let state = expression.codegen_stylist(stylist);
if module_text == state {
// Ex) `'{' '0}'.format(1)`
bail!("Failed to generate call expression for: {module_text}")
// Ex) `'{' '0}'.format(1)`
if output == content {
return Err(anyhow!("Unable to identify format literals"));
}
Ok(state)
Ok(output)
}
/// UP030
@ -124,23 +145,21 @@ pub(crate) fn format_literals(checker: &mut Checker, summary: &FormatSummary, ex
if !summary.autos.is_empty() {
return;
}
if !(0..summary.indices.len()).all(|index| summary.indices.contains(&index)) {
if summary.indices.is_empty() {
return;
}
if (0..summary.indices.len()).any(|index| !summary.indices.contains(&index)) {
return;
}
let mut diagnostic = Diagnostic::new(FormatLiterals, expr.range());
if checker.patch(diagnostic.kind.rule()) {
// Currently, the only issue we know of is in LibCST:
// https://github.com/Instagram/LibCST/issues/846
if let Ok(contents) =
generate_call(expr, &summary.indices, checker.locator, checker.stylist)
{
#[allow(deprecated)]
diagnostic.set_fix(Fix::unspecified(Edit::range_replacement(
contents,
diagnostic.try_set_fix(|| {
Ok(Fix::suggested(Edit::range_replacement(
generate_call(expr, &summary.indices, checker.locator, checker.stylist)?,
expr.range(),
)));
};
)))
});
}
checker.diagnostics.push(diagnostic);
}

View file

@ -183,7 +183,7 @@ UP030_0.py:22:1: UP030 [*] Use implicit references for positional format fields
24 | "\N{snowman} {0}".format(1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP030
25 |
26 | '{' '0}'.format(1)
26 | print(
|
= help: Remove explicit positional indices
@ -194,25 +194,36 @@ UP030_0.py:22:1: UP030 [*] Use implicit references for positional format fields
22 |-"\N{snowman} {0}".format(1)
22 |+"\N{snowman} {}".format(1)
23 23 |
24 24 | '{' '0}'.format(1)
25 25 |
24 24 | print(
25 25 | 'foo{0}'
UP030_0.py:24:1: UP030 Use implicit references for positional format fields
UP030_0.py:25:5: UP030 [*] Use implicit references for positional format fields
|
24 | "\N{snowman} {0}".format(1)
25 |
26 | '{' '0}'.format(1)
| ^^^^^^^^^^^^^^^^^^ UP030
27 |
28 | # These will not change because we are waiting for libcst to fix this issue:
25 | print(
26 | 'foo{0}'
| _____^
27 | | 'bar{1}'.format(1, 2)
| |_________________________^ UP030
28 | )
|
= help: Remove explicit positional indices
UP030_0.py:29:5: UP030 Use implicit references for positional format fields
Suggested fix
22 22 | "\N{snowman} {0}".format(1)
23 23 |
24 24 | print(
25 |- 'foo{0}'
26 |- 'bar{1}'.format(1, 2)
25 |+ 'foo{}'
26 |+ 'bar{}'.format(1, 2)
27 27 | )
28 28 |
29 29 | print(
UP030_0.py:30:5: UP030 [*] Use implicit references for positional format fields
|
29 | # https://github.com/Instagram/LibCST/issues/846
30 | print(
31 | 'foo{0}'
31 | 'foo{0}' # ohai\n"
| _____^
32 | | 'bar{1}'.format(1, 2)
| |_________________________^ UP030
@ -220,14 +231,24 @@ UP030_0.py:29:5: UP030 Use implicit references for positional format fields
|
= help: Remove explicit positional indices
UP030_0.py:34:5: UP030 Use implicit references for positional format fields
Suggested fix
27 27 | )
28 28 |
29 29 | print(
30 |- 'foo{0}' # ohai\n"
31 |- 'bar{1}'.format(1, 2)
30 |+ 'foo{}' # ohai\n"
31 |+ 'bar{}'.format(1, 2)
32 32 | )
33 33 |
34 34 | '{' '0}'.format(1)
UP030_0.py:34:1: UP030 Use implicit references for positional format fields
|
34 | print(
35 | 'foo{0}' # ohai\n"
| _____^
36 | | 'bar{1}'.format(1, 2)
| |_________________________^ UP030
37 | )
34 | )
35 |
36 | '{' '0}'.format(1)
| ^^^^^^^^^^^^^^^^^^ UP030
|
= help: Remove explicit positional indices