From 2cf2805848f35804b8057e19b368d0efc4bf32ed Mon Sep 17 00:00:00 2001 From: Oliver Margetts Date: Thu, 24 Nov 2022 23:09:36 +0000 Subject: [PATCH] Implement F521 (#898) --- LICENSE | 25 ++ resources/test/fixtures/F521.py | 29 ++ src/check_ast.rs | 22 ++ src/checks.rs | 8 + src/checks_gen.rs | 1 + src/lib.rs | 1 + src/linter.rs | 1 + src/pyflakes/checks.rs | 24 ++ src/pyflakes/format.rs | 22 ++ src/pyflakes/mod.rs | 1 + .../ruff__linter__tests__F521_F521.py.snap | 68 ++++ src/vendored/format.rs | 360 ++++++++++++++++++ src/vendored/mod.rs | 1 + 13 files changed, 563 insertions(+) create mode 100644 resources/test/fixtures/F521.py create mode 100644 src/pyflakes/format.rs create mode 100644 src/snapshots/ruff__linter__tests__F521_F521.py.snap create mode 100644 src/vendored/format.rs create mode 100644 src/vendored/mod.rs diff --git a/LICENSE b/LICENSE index 875fe58626..41a40ef3cb 100644 --- a/LICENSE +++ b/LICENSE @@ -443,3 +443,28 @@ are: OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + +- RustPython, licensed as follows: + """ + MIT License + + Copyright (c) 2020 RustPython Team + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + """ diff --git a/resources/test/fixtures/F521.py b/resources/test/fixtures/F521.py new file mode 100644 index 0000000000..a6c76b3c79 --- /dev/null +++ b/resources/test/fixtures/F521.py @@ -0,0 +1,29 @@ +"{".format(1) +"}".format(1) +"{foo[}".format(foo=1) +# too much string recursion (placeholder-in-placeholder) +"{:{:{}}}".format(1, 2, 3) +# ruff picks these issues up, but flake8 doesn't +"{foo[]}".format(foo={"": 1}) +"{foo..}".format(foo=1) +"{foo..bar}".format(foo=1) + +# "{} {1}".format(1, 2) # F525 +# "{0} {}".format(1, 2) # F525 +# "{}".format(1, 2) # F523 +# "{}".format(1, bar=2) # F522 +# "{} {}".format(1) # F524 +# "{2}".format() # F524 +# "{bar}".format() # F524 + +# The following are all "good" uses of .format +"{.__class__}".format("") +"{foo[bar]}".format(foo={"bar": "barv"}) +"{[bar]}".format({"bar": "barv"}) +"{:{}} {}".format(1, 15, 2) +"{:2}".format(1) +"{foo}-{}".format(1, foo=2) +a = () +"{}".format(*a) +k = {} +"{foo}".format(**k) diff --git a/src/check_ast.rs b/src/check_ast.rs index 370da7865e..d3f0d3b19d 100644 --- a/src/check_ast.rs +++ b/src/check_ast.rs @@ -1243,6 +1243,28 @@ where args, keywords, } => { + // pyflakes + if let ExprKind::Attribute { value, attr, .. } = &func.node { + if let ExprKind::Constant { + value: Constant::Str(value), + .. + } = &value.node + { + if attr == "format" { + // "...".format(...) call + if self.settings.enabled.contains(&CheckCode::F521) { + let location = Range::from_located(expr); + if let Some(check) = + pyflakes::checks::string_dot_format_invalid(value, location) + { + self.add_check(check); + } + } + } + } + } + + // pyupgrade if self.settings.enabled.contains(&CheckCode::U005) { pyupgrade::plugins::deprecated_unittest_alias(self, func); } diff --git a/src/checks.rs b/src/checks.rs index 02345c536c..cd67ab8a57 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -52,6 +52,7 @@ pub enum CheckCode { F405, F406, F407, + F521, F541, F601, F602, @@ -403,6 +404,7 @@ pub enum CheckKind { MultiValueRepeatedKeyVariable(String), RaiseNotImplemented, ReturnOutsideFunction, + StringDotFormatInvalidFormat(String), TwoStarredExpressions, UndefinedExport(String), UndefinedLocal(String), @@ -646,6 +648,7 @@ impl CheckCode { } CheckCode::F406 => CheckKind::ImportStarNotPermitted("...".to_string()), CheckCode::F407 => CheckKind::FutureFeatureNotDefined("...".to_string()), + CheckCode::F521 => CheckKind::StringDotFormatInvalidFormat("...".to_string()), CheckCode::F541 => CheckKind::FStringMissingPlaceholders, CheckCode::F601 => CheckKind::MultiValueRepeatedKeyLiteral, CheckCode::F602 => CheckKind::MultiValueRepeatedKeyVariable("...".to_string()), @@ -912,6 +915,7 @@ impl CheckCode { CheckCode::F405 => CheckCategory::Pyflakes, CheckCode::F406 => CheckCategory::Pyflakes, CheckCode::F407 => CheckCategory::Pyflakes, + CheckCode::F521 => CheckCategory::Pyflakes, CheckCode::F541 => CheckCategory::Pyflakes, CheckCode::F601 => CheckCategory::Pyflakes, CheckCode::F602 => CheckCategory::Pyflakes, @@ -1136,6 +1140,7 @@ impl CheckKind { CheckKind::NotIsTest => &CheckCode::E714, CheckKind::RaiseNotImplemented => &CheckCode::F901, CheckKind::ReturnOutsideFunction => &CheckCode::F706, + CheckKind::StringDotFormatInvalidFormat(_) => &CheckCode::F521, CheckKind::SyntaxError(_) => &CheckCode::E999, CheckKind::ExpressionsInStarAssignment => &CheckCode::F621, CheckKind::TrueFalseComparison(..) => &CheckCode::E712, @@ -1422,6 +1427,9 @@ impl CheckKind { CheckKind::ReturnOutsideFunction => { "`return` statement outside of a function/method".to_string() } + CheckKind::StringDotFormatInvalidFormat(message) => { + format!("'...'.format(...) has invalid format string: {message}") + } CheckKind::SyntaxError(message) => format!("SyntaxError: {message}"), CheckKind::ExpressionsInStarAssignment => { "Too many expressions in star-unpacking assignment".to_string() diff --git a/src/checks_gen.rs b/src/checks_gen.rs index cc09fefaba..3905e47f8c 100644 --- a/src/checks_gen.rs +++ b/src/checks_gen.rs @@ -847,6 +847,7 @@ impl CheckCodePrefix { CheckCode::F405, CheckCode::F406, CheckCode::F407, + CheckCode::F521, CheckCode::F541, CheckCode::F601, CheckCode::F602, diff --git a/src/lib.rs b/src/lib.rs index 5a1e76dbf8..849d44e84e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,6 +70,7 @@ pub mod settings; pub mod source_code_locator; #[cfg(feature = "update-informer")] pub mod updates; +mod vendored; pub mod visibility; /// Run Ruff over Python source code directly. diff --git a/src/linter.rs b/src/linter.rs index 9e2002728d..790271119b 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -506,6 +506,7 @@ mod tests { #[test_case(CheckCode::F405, Path::new("F405.py"); "F405")] #[test_case(CheckCode::F406, Path::new("F406.py"); "F406")] #[test_case(CheckCode::F407, Path::new("F407.py"); "F407")] + #[test_case(CheckCode::F521, Path::new("F521.py"); "F521")] #[test_case(CheckCode::F541, Path::new("F541.py"); "F541")] #[test_case(CheckCode::F601, Path::new("F601.py"); "F601")] #[test_case(CheckCode::F602, Path::new("F602.py"); "F602")] diff --git a/src/pyflakes/checks.rs b/src/pyflakes/checks.rs index 7dcea16ce9..298de3d2ac 100644 --- a/src/pyflakes/checks.rs +++ b/src/pyflakes/checks.rs @@ -6,6 +6,30 @@ use rustpython_parser::ast::{ use crate::ast::types::{BindingKind, FunctionScope, Range, Scope, ScopeKind}; use crate::checks::{Check, CheckKind}; +use crate::vendored::format::{FieldName, FormatPart, FormatString, FromTemplate}; + +// F521 +pub fn string_dot_format_invalid(literal: &str, location: Range) -> Option { + match FormatString::from_str(literal) { + Err(e) => Some(Check::new( + CheckKind::StringDotFormatInvalidFormat(e.to_string()), + location, + )), + Ok(format_string) => { + for part in format_string.format_parts { + if let FormatPart::Field { field_name, .. } = &part { + if let Err(e) = FieldName::parse(field_name) { + return Some(Check::new( + CheckKind::StringDotFormatInvalidFormat(e.to_string()), + location, + )); + } + } + } + None + } + } +} /// F631 pub fn assert_tuple(test: &Expr, location: Range) -> Option { diff --git a/src/pyflakes/format.rs b/src/pyflakes/format.rs new file mode 100644 index 0000000000..e371df419c --- /dev/null +++ b/src/pyflakes/format.rs @@ -0,0 +1,22 @@ +//! Implements helper functions for using vendored/format.rs +use std::fmt; + +use crate::vendored::format::FormatParseError; + +impl fmt::Display for FormatParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let message = match self { + FormatParseError::EmptyAttribute => "Empty attribute in format string", + FormatParseError::InvalidCharacterAfterRightBracket => { + "Only '.' or '[' may follow ']' in format field specifier" + } + FormatParseError::InvalidFormatSpecifier => "Max string recursion exceeded", + FormatParseError::MissingStartBracket => "Single '}' encountered in format string", + FormatParseError::MissingRightBracket => "Expected '}' before end of string", + FormatParseError::UnmatchedBracket => "Single '{' encountered in format string", + _ => "Unexpected error parsing format string", + }; + + write!(f, "{message}") + } +} diff --git a/src/pyflakes/mod.rs b/src/pyflakes/mod.rs index e96a4069b2..ee40f9eeca 100644 --- a/src/pyflakes/mod.rs +++ b/src/pyflakes/mod.rs @@ -1,3 +1,4 @@ pub mod checks; pub mod fixes; +mod format; pub mod plugins; diff --git a/src/snapshots/ruff__linter__tests__F521_F521.py.snap b/src/snapshots/ruff__linter__tests__F521_F521.py.snap new file mode 100644 index 0000000000..c17f553e36 --- /dev/null +++ b/src/snapshots/ruff__linter__tests__F521_F521.py.snap @@ -0,0 +1,68 @@ +--- +source: src/linter.rs +expression: checks +--- +- kind: + StringDotFormatInvalidFormat: "Single '{' encountered in format string" + location: + row: 1 + column: 0 + end_location: + row: 1 + column: 13 + fix: ~ +- kind: + StringDotFormatInvalidFormat: "Single '}' encountered in format string" + location: + row: 2 + column: 0 + end_location: + row: 2 + column: 13 + fix: ~ +- kind: + StringDotFormatInvalidFormat: "Expected '}' before end of string" + location: + row: 3 + column: 0 + end_location: + row: 3 + column: 22 + fix: ~ +- kind: + StringDotFormatInvalidFormat: Max string recursion exceeded + location: + row: 5 + column: 0 + end_location: + row: 5 + column: 26 + fix: ~ +- kind: + StringDotFormatInvalidFormat: Empty attribute in format string + location: + row: 7 + column: 0 + end_location: + row: 7 + column: 29 + fix: ~ +- kind: + StringDotFormatInvalidFormat: Empty attribute in format string + location: + row: 8 + column: 0 + end_location: + row: 8 + column: 23 + fix: ~ +- kind: + StringDotFormatInvalidFormat: Empty attribute in format string + location: + row: 9 + column: 0 + end_location: + row: 9 + column: 26 + fix: ~ + diff --git a/src/vendored/format.rs b/src/vendored/format.rs new file mode 100644 index 0000000000..952be31dc2 --- /dev/null +++ b/src/vendored/format.rs @@ -0,0 +1,360 @@ +//! Vendored from [format.rs in rustpython-vm](https://github.com/RustPython/RustPython/blob/f54b5556e28256763c5506813ea977c9e1445af0/vm/src/format.rs). +//! The only changes we make are to remove dead code and code involving the vm. +use itertools::{Itertools, PeekingNext}; + +#[derive(Debug, PartialEq)] +pub(crate) enum FormatParseError { + UnmatchedBracket, + MissingStartBracket, + UnescapedStartBracketInLiteral, + InvalidFormatSpecifier, + UnknownConversion, + EmptyAttribute, + MissingRightBracket, + InvalidCharacterAfterRightBracket, +} + +#[derive(Debug, PartialEq)] +pub(crate) enum FieldNamePart { + Attribute(String), + Index(usize), + StringIndex(String), +} + +impl FieldNamePart { + fn parse_part( + chars: &mut impl PeekingNext, + ) -> Result, FormatParseError> { + chars + .next() + .map(|ch| match ch { + '.' => { + let mut attribute = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + attribute.push(ch); + } + if attribute.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else { + Ok(FieldNamePart::Attribute(attribute)) + } + } + '[' => { + let mut index = String::new(); + for ch in chars { + if ch == ']' { + return if index.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else if let Ok(index) = index.parse::() { + Ok(FieldNamePart::Index(index)) + } else { + Ok(FieldNamePart::StringIndex(index)) + }; + } + index.push(ch); + } + Err(FormatParseError::MissingRightBracket) + } + _ => Err(FormatParseError::InvalidCharacterAfterRightBracket), + }) + .transpose() + } +} + +#[derive(Debug, PartialEq)] +pub(crate) enum FieldType { + Auto, + Index(usize), + Keyword(String), +} + +#[derive(Debug, PartialEq)] +pub(crate) struct FieldName { + pub field_type: FieldType, + pub parts: Vec, +} + +impl FieldName { + pub(crate) fn parse(text: &str) -> Result { + let mut chars = text.chars().peekable(); + let mut first = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + first.push(ch); + } + + let field_type = if first.is_empty() { + FieldType::Auto + } else if let Ok(index) = first.parse::() { + FieldType::Index(index) + } else { + FieldType::Keyword(first) + }; + + let mut parts = Vec::new(); + while let Some(part) = FieldNamePart::parse_part(&mut chars)? { + parts.push(part); + } + + Ok(FieldName { field_type, parts }) + } +} + +#[derive(Debug, PartialEq)] +pub(crate) enum FormatPart { + Field { + field_name: String, + preconversion_spec: Option, + format_spec: String, + }, + Literal(String), +} + +#[derive(Debug, PartialEq)] +pub(crate) struct FormatString { + pub format_parts: Vec, +} + +impl FormatString { + fn parse_literal_single(text: &str) -> Result<(char, &str), FormatParseError> { + let mut chars = text.chars(); + // This should never be called with an empty str + let first_char = chars.next().unwrap(); + // isn't this detectable only with bytes operation? + if first_char == '{' || first_char == '}' { + let maybe_next_char = chars.next(); + // if we see a bracket, it has to be escaped by doubling up to be in a literal + return if maybe_next_char.is_none() || maybe_next_char.unwrap() != first_char { + Err(FormatParseError::UnescapedStartBracketInLiteral) + } else { + Ok((first_char, chars.as_str())) + }; + } + Ok((first_char, chars.as_str())) + } + + fn parse_literal(text: &str) -> Result<(FormatPart, &str), FormatParseError> { + let mut cur_text = text; + let mut result_string = String::new(); + while !cur_text.is_empty() { + match FormatString::parse_literal_single(cur_text) { + Ok((next_char, remaining)) => { + result_string.push(next_char); + cur_text = remaining; + } + Err(err) => { + return if result_string.is_empty() { + Err(err) + } else { + Ok((FormatPart::Literal(result_string), cur_text)) + }; + } + } + } + Ok((FormatPart::Literal(result_string), "")) + } + + fn parse_part_in_brackets(text: &str) -> Result { + let parts: Vec<&str> = text.splitn(2, ':').collect(); + // before the comma is a keyword or arg index, after the comma is maybe a spec. + let arg_part = parts[0]; + + let format_spec = if parts.len() > 1 { + parts[1].to_owned() + } else { + String::new() + }; + + // On parts[0] can still be the preconversor (!r, !s, !a) + let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); + // before the bang is a keyword or arg index, after the comma is maybe a + // conversor spec. + let arg_part = parts[0]; + + let preconversion_spec = parts + .get(1) + .map(|conversion| { + // conversions are only every one character + conversion + .chars() + .exactly_one() + .map_err(|_| FormatParseError::UnknownConversion) + }) + .transpose()?; + + Ok(FormatPart::Field { + field_name: arg_part.to_owned(), + preconversion_spec, + format_spec, + }) + } + + fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> { + let mut nested = false; + let mut end_bracket_pos = None; + let mut left = String::new(); + + // There may be one layer nesting brackets in spec + for (idx, c) in text.chars().enumerate() { + if idx == 0 { + if c != '{' { + return Err(FormatParseError::MissingStartBracket); + } + } else if c == '{' { + if nested { + return Err(FormatParseError::InvalidFormatSpecifier); + } + nested = true; + left.push(c); + continue; + } else if c == '}' { + if nested { + nested = false; + left.push(c); + continue; + } + end_bracket_pos = Some(idx); + break; + } else { + left.push(c); + } + } + if let Some(pos) = end_bracket_pos { + let (_, right) = text.split_at(pos); + let format_part = FormatString::parse_part_in_brackets(&left)?; + Ok((format_part, &right[1..])) + } else { + Err(FormatParseError::UnmatchedBracket) + } + } +} + +pub(crate) trait FromTemplate<'a>: Sized { + type Err; + fn from_str(s: &'a str) -> Result; +} + +impl<'a> FromTemplate<'a> for FormatString { + type Err = FormatParseError; + + fn from_str(text: &'a str) -> Result { + let mut cur_text: &str = text; + let mut parts: Vec = Vec::new(); + while !cur_text.is_empty() { + // Try to parse both literals and bracketed format parts until we + // run out of text + cur_text = FormatString::parse_literal(cur_text) + .or_else(|_| FormatString::parse_spec(cur_text)) + .map(|(part, new_text)| { + parts.push(part); + new_text + })?; + } + Ok(FormatString { + format_parts: parts, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_parse() { + let expected = Ok(FormatString { + format_parts: vec![ + FormatPart::Literal("abcd".to_owned()), + FormatPart::Field { + field_name: "1".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, + FormatPart::Literal(":".to_owned()), + FormatPart::Field { + field_name: "key".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, + ], + }); + + assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected); + } + + #[test] + fn test_format_parse_fail() { + assert_eq!( + FormatString::from_str("{s"), + Err(FormatParseError::UnmatchedBracket) + ); + } + + #[test] + fn test_format_parse_escape() { + let expected = Ok(FormatString { + format_parts: vec![ + FormatPart::Literal("{".to_owned()), + FormatPart::Field { + field_name: "key".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, + FormatPart::Literal("}ddfe".to_owned()), + ], + }); + + assert_eq!(FormatString::from_str("{{{key}}}ddfe"), expected); + } + + #[test] + fn test_parse_field_name() { + assert_eq!( + FieldName::parse(""), + Ok(FieldName { + field_type: FieldType::Auto, + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("0"), + Ok(FieldName { + field_type: FieldType::Index(0), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key"), + Ok(FieldName { + field_type: FieldType::Keyword("key".to_owned()), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key.attr[0][string]"), + Ok(FieldName { + field_type: FieldType::Keyword("key".to_owned()), + parts: vec![ + FieldNamePart::Attribute("attr".to_owned()), + FieldNamePart::Index(0), + FieldNamePart::StringIndex("string".to_owned()) + ], + }) + ); + assert_eq!( + FieldName::parse("key.."), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key[]"), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key["), + Err(FormatParseError::MissingRightBracket) + ); + assert_eq!( + FieldName::parse("key[0]after"), + Err(FormatParseError::InvalidCharacterAfterRightBracket) + ); + } +} diff --git a/src/vendored/mod.rs b/src/vendored/mod.rs new file mode 100644 index 0000000000..db7b59d94c --- /dev/null +++ b/src/vendored/mod.rs @@ -0,0 +1 @@ +pub mod format;