Extend UP032 to support implicitly concatenated strings (#6263)

This commit is contained in:
Harutaka Kawamura 2023-08-03 03:56:24 +09:00 committed by GitHub
parent bcc41ba062
commit ec8fad5b02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 359 additions and 96 deletions

View file

@ -76,6 +76,47 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
111111
)
"{a}" "{b}".format(a=1, b=1)
(
"{a}"
"{b}"
).format(a=1, b=1)
(
"{a}"
""
"{b}"
""
).format(a=1, b=1)
(
(
# comment
"{a}"
# comment
"{b}"
)
# comment
.format(a=1, b=1)
)
(
"{a}"
"b"
).format(a=1)
def d(osname, version, release):
return"{}-{}.{}".format(osname, version, release)
def e():
yield"{}".format(1)
assert"{}".format(1)
###
# Non-errors
###
@ -105,8 +146,6 @@ b"{} {}".format(a, b)
r'"\N{snowman} {}".format(a)'
"{a}" "{b}".format(a=1, b=1)
"123456789 {}".format(
11111111111111111111111111111111111111111111111111111111111111111111111111,
)
@ -149,13 +188,7 @@ async def c():
async def c():
return "{}".format(1 + await 3)
def d(osname, version, release):
return"{}-{}.{}".format(osname, version, release)
def e():
yield"{}".format(1)
assert"{}".format(1)
(
"{a}"
"{1 + 2}"
).format(a=1)

View file

@ -1,15 +1,17 @@
use std::borrow::Cow;
use anyhow::{Context, Result};
use ruff_python_ast::{self as ast, Arguments, Constant, Expr, Keyword, Ranged};
use ruff_python_literal::format::{
FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate,
};
use ruff_python_parser::{lexer, Mode, Tok};
use ruff_text_size::TextRange;
use rustc_hash::FxHashMap;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{is_implicit_concatenation, leading_quote, trailing_quote};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_source_file::Locator;
use crate::checkers::ast::Checker;
@ -187,59 +189,42 @@ fn formatted_expr<'a>(expr: &Expr, context: FormatContext, locator: &Locator<'a>
}
}
/// Generate an f-string from an [`Expr`].
fn try_convert_to_f_string(expr: &Expr, locator: &Locator) -> Option<String> {
let Expr::Call(ast::ExprCall { func, .. }) = expr else {
return None;
};
let Expr::Attribute(ast::ExprAttribute { value, .. }) = func.as_ref() else {
return None;
};
if !matches!(
value.as_ref(),
Expr::Constant(ast::ExprConstant {
value: Constant::Str(..),
..
}),
) {
return None;
};
let Some(mut summary) = FormatSummaryValues::try_from_expr(expr, locator) else {
return None;
};
let contents = locator.slice(value.range());
// Skip implicit string concatenations.
if is_implicit_concatenation(contents) {
return None;
}
/// Convert a string `.format` call to an f-string.
///
/// Returns `None` if the string does not require conversion, and `Err` if the conversion
/// is not possible.
fn try_convert_to_f_string(
range: TextRange,
summary: &mut FormatSummaryValues,
locator: &Locator,
) -> Result<Option<String>> {
// Strip the unicode prefix. It's redundant in Python 3, and invalid when used
// with f-strings.
let contents = locator.slice(range);
let contents = if contents.starts_with('U') || contents.starts_with('u') {
&contents[1..]
} else {
contents
};
if contents.is_empty() {
return None;
}
// Remove the leading and trailing quotes.
let Some(leading_quote) = leading_quote(contents) else {
return None;
};
let Some(trailing_quote) = trailing_quote(contents) else {
return None;
};
let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?;
let trailing_quote = trailing_quote(contents).context("Unable to identify trailing quote")?;
let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];
if contents.is_empty() {
return Ok(None);
}
// Parse the format string.
let Ok(format_string) = FormatString::from_str(contents) else {
return None;
};
let format_string = FormatString::from_str(contents)?;
if format_string
.format_parts
.iter()
.all(|part| matches!(part, FormatPart::Literal(..)))
{
return Ok(None);
}
let mut converted = String::with_capacity(contents.len());
for part in format_string.format_parts {
@ -251,12 +236,13 @@ fn try_convert_to_f_string(expr: &Expr, locator: &Locator) -> Option<String> {
} => {
converted.push('{');
let field = FieldName::parse(&field_name).ok()?;
let field = FieldName::parse(&field_name)?;
let arg = match field.field_type {
FieldType::Auto => summary.arg_auto(),
FieldType::Index(index) => summary.arg_positional(index),
FieldType::Keyword(name) => summary.arg_keyword(&name),
}?;
}
.context("Unable to parse field")?;
converted.push_str(&formatted_expr(
arg,
if field.parts.is_empty() {
@ -317,7 +303,7 @@ fn try_convert_to_f_string(expr: &Expr, locator: &Locator) -> Option<String> {
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Some(contents)
Ok(Some(contents))
}
/// UP032
@ -332,16 +318,81 @@ pub(crate) fn f_strings(
return;
}
// Avoid refactoring strings that are implicitly concatenated.
if is_implicit_concatenation(checker.locator().slice(template.range())) {
let Expr::Call(ast::ExprCall { func, .. }) = expr else {
return;
};
let Expr::Attribute(ast::ExprAttribute { value, .. }) = func.as_ref() else {
return;
};
if !matches!(
value.as_ref(),
Expr::Constant(ast::ExprConstant {
value: Constant::Str(..),
..
}),
) {
return;
};
let Some(mut summary) = FormatSummaryValues::try_from_expr(expr, checker.locator()) else {
return;
};
let mut patches: Vec<(TextRange, String)> = vec![];
let mut lex = lexer::lex_starts_at(
checker.locator().slice(func.range()),
Mode::Expression,
expr.start(),
)
.flatten();
let end = loop {
match lex.next() {
Some((Tok::Dot, range)) => {
// ```
// (
// "a"
// " {} "
// "b"
// ).format(x)
// ```
// ^ Get the position of the character before the dot.
//
// We know that the expression is a string literal, so we can safely assume that the
// dot is the start of an attribute access.
break range.start();
}
Some((Tok::String { .. }, range)) => {
match try_convert_to_f_string(range, &mut summary, checker.locator()) {
Ok(Some(fstring)) => patches.push((range, fstring)),
// Skip any strings that don't require conversion (e.g., literal segments of an
// implicit concatenation).
Ok(None) => continue,
// If any of the segments fail to convert, then we can't convert the entire
// expression.
Err(_) => return,
}
}
Some(_) => continue,
None => unreachable!("Should break from the `Tok::Dot` arm"),
}
};
if patches.is_empty() {
return;
}
// Currently, the only issue we know of is in LibCST:
// https://github.com/Instagram/LibCST/issues/846
let Some(mut contents) = try_convert_to_f_string(expr, checker.locator()) else {
return;
};
let mut contents = String::with_capacity(checker.locator().slice(expr.range()).len());
let mut prev_end = expr.start();
for (range, fstring) in patches {
contents.push_str(
checker
.locator()
.slice(TextRange::new(prev_end, range.start())),
);
contents.push_str(&fstring);
prev_end = range.end();
}
contents.push_str(checker.locator().slice(TextRange::new(prev_end, end)));
// Avoid refactors that exceed the line length limit.
let col_offset = template.start() - checker.locator().line_start(template.start());

View file

@ -677,7 +677,7 @@ UP032_0.py:73:85: UP032 [*] Use f-string instead of `format` call
77 | | )
| |_^ UP032
78 |
79 | ###
79 | "{a}" "{b}".format(a=1, b=1)
|
= help: Convert to f-string
@ -694,57 +694,202 @@ UP032_0.py:73:85: UP032 [*] Use f-string instead of `format` call
74 |+{111111}
75 |+"""
78 76 |
79 77 | ###
80 78 | # Non-errors
79 77 | "{a}" "{b}".format(a=1, b=1)
80 78 |
UP032_0.py:154:11: UP032 [*] Use f-string instead of `format` call
UP032_0.py:79:1: UP032 [*] Use f-string instead of `format` call
|
77 | )
78 |
79 | "{a}" "{b}".format(a=1, b=1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032
80 |
81 | (
|
= help: Convert to f-string
Suggested fix
76 76 | 111111
77 77 | )
78 78 |
79 |-"{a}" "{b}".format(a=1, b=1)
79 |+f"{1}" f"{1}"
80 80 |
81 81 | (
82 82 | "{a}"
UP032_0.py:81:1: UP032 [*] Use f-string instead of `format` call
|
79 | "{a}" "{b}".format(a=1, b=1)
80 |
81 | / (
82 | | "{a}"
83 | | "{b}"
84 | | ).format(a=1, b=1)
| |__________________^ UP032
85 |
86 | (
|
= help: Convert to f-string
Suggested fix
79 79 | "{a}" "{b}".format(a=1, b=1)
80 80 |
81 81 | (
82 |- "{a}"
83 |- "{b}"
84 |-).format(a=1, b=1)
82 |+ f"{1}"
83 |+ f"{1}"
84 |+)
85 85 |
86 86 | (
87 87 | "{a}"
UP032_0.py:86:1: UP032 [*] Use f-string instead of `format` call
|
84 | ).format(a=1, b=1)
85 |
86 | / (
87 | | "{a}"
88 | | ""
89 | | "{b}"
90 | | ""
91 | | ).format(a=1, b=1)
| |__________________^ UP032
92 |
93 | (
|
= help: Convert to f-string
Suggested fix
84 84 | ).format(a=1, b=1)
85 85 |
86 86 | (
87 |- "{a}"
87 |+ f"{1}"
88 88 | ""
89 |- "{b}"
89 |+ f"{1}"
90 90 | ""
91 |-).format(a=1, b=1)
91 |+)
92 92 |
93 93 | (
94 94 | (
UP032_0.py:94:5: UP032 [*] Use f-string instead of `format` call
|
153 | def d(osname, version, release):
154 | return"{}-{}.{}".format(osname, version, release)
93 | (
94 | (
| _____^
95 | | # comment
96 | | "{a}"
97 | | # comment
98 | | "{b}"
99 | | )
100 | | # comment
101 | | .format(a=1, b=1)
| |_____________________^ UP032
102 | )
|
= help: Convert to f-string
Suggested fix
93 93 | (
94 94 | (
95 95 | # comment
96 |- "{a}"
96 |+ f"{1}"
97 97 | # comment
98 |- "{b}"
98 |+ f"{1}"
99 99 | )
100 100 | # comment
101 |- .format(a=1, b=1)
101 |+
102 102 | )
103 103 |
104 104 | (
UP032_0.py:104:1: UP032 [*] Use f-string instead of `format` call
|
102 | )
103 |
104 | / (
105 | | "{a}"
106 | | "b"
107 | | ).format(a=1)
| |_____________^ UP032
|
= help: Convert to f-string
Suggested fix
102 102 | )
103 103 |
104 104 | (
105 |- "{a}"
105 |+ f"{1}"
106 106 | "b"
107 |-).format(a=1)
107 |+)
108 108 |
109 109 |
110 110 | def d(osname, version, release):
UP032_0.py:111:11: UP032 [*] Use f-string instead of `format` call
|
110 | def d(osname, version, release):
111 | return"{}-{}.{}".format(osname, version, release)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string
Suggested fix
151 151 |
152 152 |
153 153 | def d(osname, version, release):
154 |- return"{}-{}.{}".format(osname, version, release)
154 |+ return f"{osname}-{version}.{release}"
155 155 |
156 156 |
157 157 | def e():
108 108 |
109 109 |
110 110 | def d(osname, version, release):
111 |- return"{}-{}.{}".format(osname, version, release)
111 |+ return f"{osname}-{version}.{release}"
112 112 |
113 113 |
114 114 | def e():
UP032_0.py:158:10: UP032 [*] Use f-string instead of `format` call
UP032_0.py:115:10: UP032 [*] Use f-string instead of `format` call
|
157 | def e():
158 | yield"{}".format(1)
114 | def e():
115 | yield"{}".format(1)
| ^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string
Suggested fix
155 155 |
156 156 |
157 157 | def e():
158 |- yield"{}".format(1)
158 |+ yield f"{1}"
159 159 |
160 160 |
161 161 | assert"{}".format(1)
112 112 |
113 113 |
114 114 | def e():
115 |- yield"{}".format(1)
115 |+ yield f"{1}"
116 116 |
117 117 |
118 118 | assert"{}".format(1)
UP032_0.py:161:7: UP032 [*] Use f-string instead of `format` call
UP032_0.py:118:7: UP032 [*] Use f-string instead of `format` call
|
161 | assert"{}".format(1)
118 | assert"{}".format(1)
| ^^^^^^^^^^^^^^ UP032
119 |
120 | ###
|
= help: Convert to f-string
Suggested fix
158 158 | yield"{}".format(1)
159 159 |
160 160 |
161 |-assert"{}".format(1)
161 |+assert f"{1}"
115 115 | yield"{}".format(1)
116 116 |
117 117 |
118 |-assert"{}".format(1)
118 |+assert f"{1}"
119 119 |
120 120 | ###
121 121 | # Non-errors

View file

@ -1,6 +1,7 @@
use itertools::{Itertools, PeekingNext};
use num_traits::{cast::ToPrimitive, FromPrimitive, Signed};
use std::error::Error;
use std::ops::Deref;
use std::{cmp, str::FromStr};
@ -744,6 +745,39 @@ pub enum FormatParseError {
InvalidCharacterAfterRightBracket,
}
impl std::fmt::Display for FormatParseError {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnmatchedBracket => {
std::write!(fmt, "unmatched bracket in format string")
}
Self::MissingStartBracket => {
std::write!(fmt, "missing start bracket in format string")
}
Self::UnescapedStartBracketInLiteral => {
std::write!(fmt, "unescaped start bracket in literal")
}
Self::InvalidFormatSpecifier => {
std::write!(fmt, "invalid format specifier")
}
Self::UnknownConversion => {
std::write!(fmt, "unknown conversion")
}
Self::EmptyAttribute => {
std::write!(fmt, "empty attribute")
}
Self::MissingRightBracket => {
std::write!(fmt, "missing right bracket")
}
Self::InvalidCharacterAfterRightBracket => {
std::write!(fmt, "invalid character after right bracket")
}
}
}
}
impl Error for FormatParseError {}
impl FromStr for FormatSpec {
type Err = FormatSpecError;
fn from_str(s: &str) -> Result<Self, Self::Err> {