Support concatenated literals in format-literals (#4974)

This commit is contained in:
Charlie Marsh 2023-06-08 21:29:19 -04:00 committed by GitHub
parent 2c19000e4a
commit 293889a352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 112 additions and 72 deletions

View file

@ -21,10 +21,6 @@ x = "foo {0}" \
"\N{snowman} {0}".format(1) "\N{snowman} {0}".format(1)
'{' '0}'.format(1)
# These will not change because we are waiting for libcst to fix this issue:
# https://github.com/Instagram/LibCST/issues/846
print( print(
'foo{0}' 'foo{0}'
'bar{1}'.format(1, 2) 'bar{1}'.format(1, 2)
@ -34,3 +30,5 @@ print(
'foo{0}' # ohai\n" 'foo{0}' # ohai\n"
'bar{1}'.format(1, 2) 'bar{1}'.format(1, 2)
) )
'{' '0}'.format(1)

View file

@ -13,3 +13,5 @@ f"{0}".format(a)
f"{0}".format(1) f"{0}".format(1)
print(f"{0}".format(1)) print(f"{0}".format(1))
''.format(1)

View file

@ -1,14 +1,14 @@
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, Result};
use libcst_native::Arg; use libcst_native::{Arg, Expression};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use rustpython_parser::ast::{Expr, Ranged}; use rustpython_parser::ast::{Expr, Ranged};
use crate::autofix::codemods::CodegenStylist;
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::source_code::{Locator, Stylist};
use crate::autofix::codemods::CodegenStylist;
use crate::checkers::ast::Checker; use crate::checkers::ast::Checker;
use crate::cst::matchers::{match_attribute, match_call_mut, match_expression}; use crate::cst::matchers::{match_attribute, match_call_mut, match_expression};
use crate::registry::AsRule; use crate::registry::AsRule;
@ -35,29 +35,51 @@ impl Violation for FormatLiterals {
static FORMAT_SPECIFIER: Lazy<Regex> = static FORMAT_SPECIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap()); Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap());
/// Returns a string without the format specifiers. /// Remove the explicit positional indices from a format string.
/// Ex. "Hello {0} {1}" -> "Hello {} {}" fn remove_specifiers<'a>(value: &mut Expression<'a>, arena: &'a mut typed_arena::Arena<String>) {
fn remove_specifiers(raw_specifiers: &str) -> String { match value {
Expression::SimpleString(expr) => {
expr.value = arena.alloc(
FORMAT_SPECIFIER FORMAT_SPECIFIER
.replace_all(raw_specifiers, "{$fmt}") .replace_all(expr.value, "{$fmt}")
.to_string() .to_string(),
);
}
Expression::ConcatenatedString(expr) => {
let mut stack = vec![&mut expr.left, &mut expr.right];
while let Some(string) = stack.pop() {
match string.as_mut() {
libcst_native::String::Simple(string) => {
string.value = arena.alloc(
FORMAT_SPECIFIER
.replace_all(string.value, "{$fmt}")
.to_string(),
);
}
libcst_native::String::Concatenated(string) => {
stack.push(&mut string.left);
stack.push(&mut string.right);
}
libcst_native::String::Formatted(_) => {}
}
}
}
_ => {}
}
} }
/// Return the corrected argument vector. /// Return the corrected argument vector.
fn generate_arguments<'a>( fn generate_arguments<'a>(arguments: &[Arg<'a>], order: &'a [usize]) -> Result<Vec<Arg<'a>>> {
old_args: &[Arg<'a>], let mut new_arguments: Vec<Arg> = Vec::with_capacity(arguments.len());
correct_order: &'a [usize], for (idx, given) in order.iter().enumerate() {
) -> Result<Vec<Arg<'a>>> {
let mut new_args: Vec<Arg> = Vec::with_capacity(old_args.len());
for (idx, given) in correct_order.iter().enumerate() {
// We need to keep the formatting in the same order but move the values. // We need to keep the formatting in the same order but move the values.
let values = old_args let values = arguments
.get(*given) .get(*given)
.ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?; .ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?;
let formatting = old_args let formatting = arguments
.get(idx) .get(idx)
.ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?; .ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?;
let new_arg = Arg { let argument = Arg {
value: values.value.clone(), value: values.value.clone(),
comma: formatting.comma.clone(), comma: formatting.comma.clone(),
equal: None, equal: None,
@ -66,19 +88,14 @@ fn generate_arguments<'a>(
whitespace_after_star: formatting.whitespace_after_star.clone(), whitespace_after_star: formatting.whitespace_after_star.clone(),
whitespace_after_arg: formatting.whitespace_after_arg.clone(), whitespace_after_arg: formatting.whitespace_after_arg.clone(),
}; };
new_args.push(new_arg); new_arguments.push(argument);
} }
Ok(new_args) Ok(new_arguments)
} }
/// Returns true if the indices are sequential. /// Returns true if the indices are sequential.
fn is_sequential(indices: &[usize]) -> bool { fn is_sequential(indices: &[usize]) -> bool {
for (expected, actual) in indices.iter().enumerate() { indices.iter().enumerate().all(|(idx, value)| idx == *value)
if expected != *actual {
return false;
}
}
true
} }
/// Returns the corrected function call. /// Returns the corrected function call.
@ -88,28 +105,32 @@ fn generate_call(
locator: &Locator, locator: &Locator,
stylist: &Stylist, stylist: &Stylist,
) -> Result<String> { ) -> Result<String> {
let module_text = locator.slice(expr.range()); let content = locator.slice(expr.range());
let mut expression = match_expression(module_text)?; let parenthesized_content = format!("({content})");
let call = match_call_mut(&mut expression)?; let mut expression = match_expression(&parenthesized_content)?;
// Fix the call arguments. // Fix the call arguments.
let call = match_call_mut(&mut expression)?;
if !is_sequential(correct_order) { if !is_sequential(correct_order) {
call.args = generate_arguments(&call.args, correct_order)?; call.args = generate_arguments(&call.args, correct_order)?;
} }
// Fix the string itself. // Fix the string itself.
let item = match_attribute(&mut call.func)?; let item = match_attribute(&mut call.func)?;
let mut arena = typed_arena::Arena::new();
remove_specifiers(&mut item.value, &mut arena);
let cleaned = remove_specifiers(&item.codegen_stylist(stylist)); // Remove the parentheses (first and last characters).
let mut output = expression.codegen_stylist(stylist);
output.remove(0);
output.pop();
call.func = Box::new(match_expression(&cleaned)?);
let state = expression.codegen_stylist(stylist);
if module_text == state {
// Ex) `'{' '0}'.format(1)` // Ex) `'{' '0}'.format(1)`
bail!("Failed to generate call expression for: {module_text}") if output == content {
return Err(anyhow!("Unable to identify format literals"));
} }
Ok(state)
Ok(output)
} }
/// UP030 /// UP030
@ -124,23 +145,21 @@ pub(crate) fn format_literals(checker: &mut Checker, summary: &FormatSummary, ex
if !summary.autos.is_empty() { if !summary.autos.is_empty() {
return; return;
} }
if !(0..summary.indices.len()).all(|index| summary.indices.contains(&index)) { if summary.indices.is_empty() {
return;
}
if (0..summary.indices.len()).any(|index| !summary.indices.contains(&index)) {
return; return;
} }
let mut diagnostic = Diagnostic::new(FormatLiterals, expr.range()); let mut diagnostic = Diagnostic::new(FormatLiterals, expr.range());
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
// Currently, the only issue we know of is in LibCST: diagnostic.try_set_fix(|| {
// https://github.com/Instagram/LibCST/issues/846 Ok(Fix::suggested(Edit::range_replacement(
if let Ok(contents) = generate_call(expr, &summary.indices, checker.locator, checker.stylist)?,
generate_call(expr, &summary.indices, checker.locator, checker.stylist)
{
#[allow(deprecated)]
diagnostic.set_fix(Fix::unspecified(Edit::range_replacement(
contents,
expr.range(), expr.range(),
))); )))
}; });
} }
checker.diagnostics.push(diagnostic); checker.diagnostics.push(diagnostic);
} }

View file

@ -183,7 +183,7 @@ UP030_0.py:22:1: UP030 [*] Use implicit references for positional format fields
24 | "\N{snowman} {0}".format(1) 24 | "\N{snowman} {0}".format(1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP030 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP030
25 | 25 |
26 | '{' '0}'.format(1) 26 | print(
| |
= help: Remove explicit positional indices = help: Remove explicit positional indices
@ -194,25 +194,36 @@ UP030_0.py:22:1: UP030 [*] Use implicit references for positional format fields
22 |-"\N{snowman} {0}".format(1) 22 |-"\N{snowman} {0}".format(1)
22 |+"\N{snowman} {}".format(1) 22 |+"\N{snowman} {}".format(1)
23 23 | 23 23 |
24 24 | '{' '0}'.format(1) 24 24 | print(
25 25 | 25 25 | 'foo{0}'
UP030_0.py:24:1: UP030 Use implicit references for positional format fields UP030_0.py:25:5: UP030 [*] Use implicit references for positional format fields
| |
24 | "\N{snowman} {0}".format(1) 25 | print(
25 | 26 | 'foo{0}'
26 | '{' '0}'.format(1) | _____^
| ^^^^^^^^^^^^^^^^^^ UP030 27 | | 'bar{1}'.format(1, 2)
27 | | |_________________________^ UP030
28 | # These will not change because we are waiting for libcst to fix this issue: 28 | )
| |
= help: Remove explicit positional indices = help: Remove explicit positional indices
UP030_0.py:29:5: UP030 Use implicit references for positional format fields Suggested fix
22 22 | "\N{snowman} {0}".format(1)
23 23 |
24 24 | print(
25 |- 'foo{0}'
26 |- 'bar{1}'.format(1, 2)
25 |+ 'foo{}'
26 |+ 'bar{}'.format(1, 2)
27 27 | )
28 28 |
29 29 | print(
UP030_0.py:30:5: UP030 [*] Use implicit references for positional format fields
| |
29 | # https://github.com/Instagram/LibCST/issues/846
30 | print( 30 | print(
31 | 'foo{0}' 31 | 'foo{0}' # ohai\n"
| _____^ | _____^
32 | | 'bar{1}'.format(1, 2) 32 | | 'bar{1}'.format(1, 2)
| |_________________________^ UP030 | |_________________________^ UP030
@ -220,14 +231,24 @@ UP030_0.py:29:5: UP030 Use implicit references for positional format fields
| |
= help: Remove explicit positional indices = help: Remove explicit positional indices
UP030_0.py:34:5: UP030 Use implicit references for positional format fields Suggested fix
27 27 | )
28 28 |
29 29 | print(
30 |- 'foo{0}' # ohai\n"
31 |- 'bar{1}'.format(1, 2)
30 |+ 'foo{}' # ohai\n"
31 |+ 'bar{}'.format(1, 2)
32 32 | )
33 33 |
34 34 | '{' '0}'.format(1)
UP030_0.py:34:1: UP030 Use implicit references for positional format fields
| |
34 | print( 34 | )
35 | 'foo{0}' # ohai\n" 35 |
| _____^ 36 | '{' '0}'.format(1)
36 | | 'bar{1}'.format(1, 2) | ^^^^^^^^^^^^^^^^^^ UP030
| |_________________________^ UP030
37 | )
| |
= help: Remove explicit positional indices = help: Remove explicit positional indices