[pylint] Implement Pylint bad-format-character (E1300) (#6171)

## Summary

Relates to #970.

Add new `bad-format-character` Pylint rule.

I had to make a change in `crates/ruff_python_literal/src/format.rs` to
get a more detailed error in case the format character is not correct. I
chose to do this since most of the format spec parsing functions are
private. It would have required me reimplementing most of the parsing
logic just to know if the format char was correct.

This PR also doesn't reflect current Pylint functionality in two ways.

It supports new format strings correctly, Pylint as of now doesn't. See
pylint-dev/pylint#6085.

In case there are multiple adjacent string literals delimited by
whitespace the index of the wrong format char will relative to the
single string. Pylint will instead reported it relative to the
concatenated string.

Given this:
```
"%s" "%z" % ("hello", "world")
```

Ruff will report this:
```Unsupported format character 'z' (0x7a) at index 1```

Pylint instead:
```Unsupported format character 'z' (0x7a) at index 3```

I believe it's more sensible to report the index relative to the
individual string.

## Test Plan

Added new snapshot and a small test in
`crates/ruff_python_literal/src/format.rs`.

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Silvano Cerza 2023-08-02 23:32:43 +02:00 committed by GitHub
parent 5b2e973fa5
commit 82410524d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 221 additions and 5 deletions

View file

@ -0,0 +1,29 @@
# pylint: disable=missing-docstring,consider-using-f-string, pointless-statement
## Old style formatting
"%s %z" % ("hello", "world") # [bad-format-character]
"%s" "%z" % ("hello", "world") # [bad-format-character]
"""%s %z""" % ("hello", "world") # [bad-format-character]
"""%s""" """%z""" % ("hello", "world") # [bad-format-character]
## New style formatting
"{:s} {:y}".format("hello", "world") # [bad-format-character]
"{:*^30s}".format("centered")
## f-strings
H, W = "hello", "world"
f"{H} {W}"
f"{H:s} {W:z}" # [bad-format-character]
f"{1:z}" # [bad-format-character]
## False negatives
print(("%" "z") % 1)

View file

@ -419,6 +419,14 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
}
}
}
if checker.enabled(Rule::BadStringFormatCharacter) {
pylint::rules::bad_string_format_character::call(
checker,
val.as_str(),
location,
);
}
}
}
}
@ -1025,6 +1033,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
checker.locator,
);
}
if checker.enabled(Rule::BadStringFormatCharacter) {
pylint::rules::bad_string_format_character::percent(checker, expr);
}
if checker.enabled(Rule::BadStringFormatType) {
pylint::rules::bad_string_format_type(checker, expr, right);
}

View file

@ -191,6 +191,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "E1142") => (RuleGroup::Unspecified, rules::pylint::rules::AwaitOutsideAsync),
(Pylint, "E1205") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooManyArgs),
(Pylint, "E1206") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooFewArgs),
(Pylint, "E1300") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatCharacter),
(Pylint, "E1307") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatType),
(Pylint, "E1310") => (RuleGroup::Unspecified, rules::pylint::rules::BadStrStripCall),
(Pylint, "E1507") => (RuleGroup::Unspecified, rules::pylint::rules::InvalidEnvvarValue),

View file

@ -18,8 +18,12 @@ mod tests {
use crate::settings::Settings;
use crate::test::test_path;
#[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))]
#[test_case(Rule::AssertOnStringLiteral, Path::new("assert_on_string_literal.py"))]
#[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))]
#[test_case(
Rule::BadStringFormatCharacter,
Path::new("bad_string_format_character.py")
)]
#[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"))]
#[test_case(Rule::BadStringFormatType, Path::new("bad_string_format_type.py"))]
#[test_case(Rule::BidirectionalUnicode, Path::new("bidirectional_unicode.py"))]

View file

@ -0,0 +1,108 @@
use std::str::FromStr;
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::{Expr, Ranged};
use ruff_python_literal::{
cformat::{CFormatErrorType, CFormatString},
format::FormatPart,
format::FromTemplate,
format::{FormatSpec, FormatSpecError, FormatString},
};
use ruff_python_parser::{lexer, Mode};
use ruff_text_size::TextRange;
use crate::checkers::ast::Checker;
/// ## What it does
/// Checks for unsupported format types in format strings.
///
/// ## Why is this bad?
/// The format string is not checked at compile time, so it is easy to
/// introduce bugs by mistyping the format string.
///
/// ## Example
/// ```python
/// # `z` is not a valid format type.
/// print("%z" % "1")
///
/// print("{:z}".format("1"))
/// ```
#[violation]
pub struct BadStringFormatCharacter {
format_char: char,
}
impl Violation for BadStringFormatCharacter {
#[derive_message_formats]
fn message(&self) -> String {
format!("Unsupported format character '{}'", self.format_char)
}
}
/// Ex) `"{:z}".format("1")`
pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
if let Ok(format_string) = FormatString::from_str(string) {
for part in &format_string.format_parts {
let FormatPart::Field { format_spec, .. } = part else {
continue;
};
if let Err(FormatSpecError::InvalidFormatType) = FormatSpec::parse(format_spec) {
checker.diagnostics.push(Diagnostic::new(
BadStringFormatCharacter {
// The format type character is always the last one.
// More info in the official spec:
// https://docs.python.org/3/library/string.html#format-specification-mini-language
format_char: format_spec.chars().last().unwrap(),
},
range,
));
}
}
}
}
/// Ex) `"%z" % "1"`
pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
// Grab each string segment (in case there's an implicit concatenation).
let mut strings: Vec<TextRange> = vec![];
for (tok, range) in lexer::lex_starts_at(
checker.locator().slice(expr.range()),
Mode::Module,
expr.start(),
)
.flatten()
{
if tok.is_string() {
strings.push(range);
} else if tok.is_percent() {
// Break as soon as we find the modulo symbol.
break;
}
}
// If there are no string segments, abort.
if strings.is_empty() {
return;
}
for range in &strings {
let string = checker.locator().slice(*range);
let (Some(leader), Some(trailer)) = (leading_quote(string), trailing_quote(string)) else {
return;
};
let string = &string[leader.len()..string.len() - trailer.len()];
// Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`.
if let Err(format_error) = CFormatString::from_str(string) {
if let CFormatErrorType::UnsupportedFormatChar(format_char) = format_error.typ {
checker.diagnostics.push(Diagnostic::new(
BadStringFormatCharacter { format_char },
expr.range(),
));
}
};
}
}

View file

@ -1,6 +1,7 @@
pub(crate) use assert_on_string_literal::*;
pub(crate) use await_outside_async::*;
pub(crate) use bad_str_strip_call::*;
pub(crate) use bad_string_format_character::BadStringFormatCharacter;
pub(crate) use bad_string_format_type::*;
pub(crate) use bidirectional_unicode::*;
pub(crate) use binary_op_exception::*;
@ -55,6 +56,7 @@ pub(crate) use yield_in_init::*;
mod assert_on_string_literal;
mod await_outside_async;
mod bad_str_strip_call;
pub(crate) mod bad_string_format_character;
mod bad_string_format_type;
mod bidirectional_unicode;
mod binary_op_exception;

View file

@ -0,0 +1,44 @@
---
source: crates/ruff/src/rules/pylint/mod.rs
---
bad_string_format_character.py:5:1: PLE1300 Unsupported format character 'z'
|
3 | ## Old style formatting
4 |
5 | "%s %z" % ("hello", "world") # [bad-format-character]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
6 |
7 | "%s" "%z" % ("hello", "world") # [bad-format-character]
|
bad_string_format_character.py:7:1: PLE1300 Unsupported format character 'z'
|
5 | "%s %z" % ("hello", "world") # [bad-format-character]
6 |
7 | "%s" "%z" % ("hello", "world") # [bad-format-character]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
8 |
9 | """%s %z""" % ("hello", "world") # [bad-format-character]
|
bad_string_format_character.py:9:1: PLE1300 Unsupported format character 'z'
|
7 | "%s" "%z" % ("hello", "world") # [bad-format-character]
8 |
9 | """%s %z""" % ("hello", "world") # [bad-format-character]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
10 |
11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character]
|
bad_string_format_character.py:11:1: PLE1300 Unsupported format character 'z'
|
9 | """%s %z""" % ("hello", "world") # [bad-format-character]
10 |
11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
12 |
13 | ## New style formatting
|

View file

@ -180,6 +180,7 @@ impl FormatParse for FormatType {
Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_str()),
Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_str()),
Some('%') => (Some(Self::Percentage), chars.as_str()),
Some(_) => (None, chars.as_str()),
_ => (None, text),
}
}
@ -283,10 +284,20 @@ impl FormatSpec {
let (width, text) = parse_number(text)?;
let (grouping_option, text) = FormatGrouping::parse(text);
let (precision, text) = parse_precision(text)?;
let (format_type, text) = FormatType::parse(text);
if !text.is_empty() {
return Err(FormatSpecError::InvalidFormatSpecifier);
}
let (format_type, _text) = if text.is_empty() {
(None, text)
} else {
// If there's any remaining text, we should yield a valid format type and consume it
// all.
let (format_type, text) = FormatType::parse(text);
if format_type.is_none() {
return Err(FormatSpecError::InvalidFormatType);
}
if !text.is_empty() {
return Err(FormatSpecError::InvalidFormatSpecifier);
}
(format_type, text)
};
if zero && fill.is_none() {
fill.replace('0');
@ -724,6 +735,7 @@ pub enum FormatSpecError {
DecimalDigitsTooMany,
PrecisionTooBig,
InvalidFormatSpecifier,
InvalidFormatType,
UnspecifiedFormat(char, char),
UnknownFormatCode(char, &'static str),
PrecisionNotAllowed,
@ -1275,6 +1287,10 @@ mod tests {
FormatSpec::parse("d "),
Err(FormatSpecError::InvalidFormatSpecifier)
);
assert_eq!(
FormatSpec::parse("z"),
Err(FormatSpecError::InvalidFormatType)
);
}
#[test]