feat: no unnecessary encode utf8 (#686)

This commit is contained in:
Martin Lehoux 2022-11-12 16:54:36 +00:00 committed by GitHub
parent bbc38fea73
commit afa59d78bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 487 additions and 3 deletions

View file

@ -443,6 +443,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
| U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 | | U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 |
| U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 | | U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 |
| U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 | | U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 |
| U012 | UnnecessaryEncodeUTF8 | Unnecessary call to `encode` as UTF-8 | 🛠 |
### pep8-naming ### pep8-naming
@ -687,7 +688,7 @@ including:
- [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/) - [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/)
- [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32) - [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32)
- [`flake8-2020`](https://pypi.org/project/flake8-2020/) - [`flake8-2020`](https://pypi.org/project/flake8-2020/)
- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34) - [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34)
- [`autoflake`](https://pypi.org/project/autoflake/) (1/7) - [`autoflake`](https://pypi.org/project/autoflake/) (1/7)
Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8: Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8:
@ -713,7 +714,7 @@ Today, Ruff can be used to replace Flake8 when used with any of the following pl
- [`flake8-2020`](https://pypi.org/project/flake8-2020/) - [`flake8-2020`](https://pypi.org/project/flake8-2020/)
Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa), Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa),
and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34). and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34).
If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue. If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue.

52
resources/test/fixtures/U012.py vendored Normal file
View file

@ -0,0 +1,52 @@
# ASCII literals should be replaced by a bytes literal
"foo".encode("utf-8") # b"foo"
"foo".encode("u8") # b"foo"
"foo".encode() # b"foo"
"foo".encode("UTF8") # b"foo"
U"foo".encode("utf-8") # b"foo"
"foo".encode(encoding="utf-8") # b"foo"
"""
Lorem
Ipsum
""".encode(
"utf-8"
)
# b"""
# Lorem
#
# Ipsum
# """
# `encode` on variables should not be processed.
string = "hello there"
string.encode("utf-8")
bar = "bar"
f"foo{bar}".encode("utf-8") # f"foo{bar}".encode()
encoding = "latin"
"foo".encode(encoding)
f"foo{bar}".encode(encoding)
# `encode` with custom args and kwargs should not be processed.
"foo".encode("utf-8", errors="replace")
"foo".encode("utf-8", "replace")
"foo".encode(errors="replace")
"foo".encode(encoding="utf-8", errors="replace")
# `encode` with custom args and kwargs on unicode should not be processed.
"unicode text©".encode("utf-8", errors="replace")
"unicode text©".encode("utf-8", "replace")
"unicode text©".encode(errors="replace")
"unicode text©".encode(encoding="utf-8", errors="replace")
# Unicode literals should only be stripped of default encoding.
"unicode text©".encode("utf-8") # "unicode text©".encode()
"unicode text©".encode()
"unicode text©".encode(encoding="UTF8") # "unicode text©".encode()
r"fo\o".encode("utf-8") # br"fo\o"
u"foo".encode("utf-8") # b"foo"
R"fo\o".encode("utf-8") # br"fo\o"
U"foo".encode("utf-8") # b"foo"
print("foo".encode()) # print(b"foo")

View file

@ -1073,6 +1073,10 @@ where
pyupgrade::plugins::super_call_with_parameters(self, expr, func, args); pyupgrade::plugins::super_call_with_parameters(self, expr, func, args);
} }
if self.settings.enabled.contains(&CheckCode::U012) {
pyupgrade::plugins::unnecessary_encode_utf8(self, expr, func, args, keywords);
}
// flake8-print // flake8-print
if self.settings.enabled.contains(&CheckCode::T201) if self.settings.enabled.contains(&CheckCode::T201)
|| self.settings.enabled.contains(&CheckCode::T203) || self.settings.enabled.contains(&CheckCode::T203)

View file

@ -156,6 +156,7 @@ pub enum CheckCode {
U009, U009,
U010, U010,
U011, U011,
U012,
// pydocstyle // pydocstyle
D100, D100,
D101, D101,
@ -444,6 +445,7 @@ pub enum CheckKind {
PEP3120UnnecessaryCodingComment, PEP3120UnnecessaryCodingComment,
UnnecessaryFutureImport(Vec<String>), UnnecessaryFutureImport(Vec<String>),
UnnecessaryLRUCacheParams, UnnecessaryLRUCacheParams,
UnnecessaryEncodeUTF8,
// pydocstyle // pydocstyle
BlankLineAfterLastSection(String), BlankLineAfterLastSection(String),
BlankLineAfterSection(String), BlankLineAfterSection(String),
@ -691,6 +693,7 @@ impl CheckCode {
CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment, CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment,
CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]), CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]),
CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams, CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams,
CheckCode::U012 => CheckKind::UnnecessaryEncodeUTF8,
// pydocstyle // pydocstyle
CheckCode::D100 => CheckKind::PublicModule, CheckCode::D100 => CheckKind::PublicModule,
CheckCode::D101 => CheckKind::PublicClass, CheckCode::D101 => CheckKind::PublicClass,
@ -901,6 +904,7 @@ impl CheckCode {
CheckCode::U009 => CheckCategory::Pyupgrade, CheckCode::U009 => CheckCategory::Pyupgrade,
CheckCode::U010 => CheckCategory::Pyupgrade, CheckCode::U010 => CheckCategory::Pyupgrade,
CheckCode::U011 => CheckCategory::Pyupgrade, CheckCode::U011 => CheckCategory::Pyupgrade,
CheckCode::U012 => CheckCategory::Pyupgrade,
CheckCode::D100 => CheckCategory::Pydocstyle, CheckCode::D100 => CheckCategory::Pydocstyle,
CheckCode::D101 => CheckCategory::Pydocstyle, CheckCode::D101 => CheckCategory::Pydocstyle,
CheckCode::D102 => CheckCategory::Pydocstyle, CheckCode::D102 => CheckCategory::Pydocstyle,
@ -1103,6 +1107,7 @@ impl CheckKind {
CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009, CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009,
CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010, CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010,
CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011, CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011,
CheckKind::UnnecessaryEncodeUTF8 => &CheckCode::U012,
// pydocstyle // pydocstyle
CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413, CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413,
CheckKind::BlankLineAfterSection(_) => &CheckCode::D410, CheckKind::BlankLineAfterSection(_) => &CheckCode::D410,
@ -1607,6 +1612,7 @@ impl CheckKind {
CheckKind::UnnecessaryLRUCacheParams => { CheckKind::UnnecessaryLRUCacheParams => {
"Unnecessary parameters to functools.lru_cache".to_string() "Unnecessary parameters to functools.lru_cache".to_string()
} }
CheckKind::UnnecessaryEncodeUTF8 => "Unnecessary call to `encode` as UTF-8".to_string(),
// pydocstyle // pydocstyle
CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(), CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(),
CheckKind::BlankLineAfterSummary => { CheckKind::BlankLineAfterSummary => {
@ -1873,6 +1879,7 @@ impl CheckKind {
| CheckKind::UnnecessaryAbspath | CheckKind::UnnecessaryAbspath
| CheckKind::UnnecessaryCollectionCall(_) | CheckKind::UnnecessaryCollectionCall(_)
| CheckKind::UnnecessaryComprehension(_) | CheckKind::UnnecessaryComprehension(_)
| CheckKind::UnnecessaryEncodeUTF8
| CheckKind::UnnecessaryFutureImport(_) | CheckKind::UnnecessaryFutureImport(_)
| CheckKind::UnnecessaryGeneratorDict | CheckKind::UnnecessaryGeneratorDict
| CheckKind::UnnecessaryGeneratorList | CheckKind::UnnecessaryGeneratorList

View file

@ -265,6 +265,7 @@ pub enum CheckCodePrefix {
U01, U01,
U010, U010,
U011, U011,
U012,
W, W,
W2, W2,
W29, W29,
@ -1004,6 +1005,7 @@ impl CheckCodePrefix {
CheckCode::U009, CheckCode::U009,
CheckCode::U010, CheckCode::U010,
CheckCode::U011, CheckCode::U011,
CheckCode::U012,
], ],
CheckCodePrefix::U0 => vec![ CheckCodePrefix::U0 => vec![
CheckCode::U001, CheckCode::U001,
@ -1017,6 +1019,7 @@ impl CheckCodePrefix {
CheckCode::U009, CheckCode::U009,
CheckCode::U010, CheckCode::U010,
CheckCode::U011, CheckCode::U011,
CheckCode::U012,
], ],
CheckCodePrefix::U00 => vec![ CheckCodePrefix::U00 => vec![
CheckCode::U001, CheckCode::U001,
@ -1038,9 +1041,10 @@ impl CheckCodePrefix {
CheckCodePrefix::U007 => vec![CheckCode::U007], CheckCodePrefix::U007 => vec![CheckCode::U007],
CheckCodePrefix::U008 => vec![CheckCode::U008], CheckCodePrefix::U008 => vec![CheckCode::U008],
CheckCodePrefix::U009 => vec![CheckCode::U009], CheckCodePrefix::U009 => vec![CheckCode::U009],
CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011], CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011, CheckCode::U012],
CheckCodePrefix::U010 => vec![CheckCode::U010], CheckCodePrefix::U010 => vec![CheckCode::U010],
CheckCodePrefix::U011 => vec![CheckCode::U011], CheckCodePrefix::U011 => vec![CheckCode::U011],
CheckCodePrefix::U012 => vec![CheckCode::U012],
CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605], CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605],
CheckCodePrefix::W2 => vec![CheckCode::W292], CheckCodePrefix::W2 => vec![CheckCode::W292],
CheckCodePrefix::W29 => vec![CheckCode::W292], CheckCodePrefix::W29 => vec![CheckCode::W292],
@ -1351,6 +1355,7 @@ impl CheckCodePrefix {
CheckCodePrefix::U01 => PrefixSpecificity::Tens, CheckCodePrefix::U01 => PrefixSpecificity::Tens,
CheckCodePrefix::U010 => PrefixSpecificity::Explicit, CheckCodePrefix::U010 => PrefixSpecificity::Explicit,
CheckCodePrefix::U011 => PrefixSpecificity::Explicit, CheckCodePrefix::U011 => PrefixSpecificity::Explicit,
CheckCodePrefix::U012 => PrefixSpecificity::Explicit,
CheckCodePrefix::W => PrefixSpecificity::Category, CheckCodePrefix::W => PrefixSpecificity::Category,
CheckCodePrefix::W2 => PrefixSpecificity::Hundreds, CheckCodePrefix::W2 => PrefixSpecificity::Hundreds,
CheckCodePrefix::W29 => PrefixSpecificity::Tens, CheckCodePrefix::W29 => PrefixSpecificity::Tens,

View file

@ -486,6 +486,7 @@ mod tests {
#[test_case(CheckCode::U010, Path::new("U010.py"); "U010")] #[test_case(CheckCode::U010, Path::new("U010.py"); "U010")]
#[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")] #[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")]
#[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")] #[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")]
#[test_case(CheckCode::U012, Path::new("U012.py"); "U012")]
#[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")] #[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
#[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")] #[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
#[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")] #[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]

View file

@ -2,6 +2,7 @@ pub use deprecated_unittest_alias::deprecated_unittest_alias;
pub use super_call_with_parameters::super_call_with_parameters; pub use super_call_with_parameters::super_call_with_parameters;
pub use type_of_primitive::type_of_primitive; pub use type_of_primitive::type_of_primitive;
pub use unnecessary_abspath::unnecessary_abspath; pub use unnecessary_abspath::unnecessary_abspath;
pub use unnecessary_encode_utf8::unnecessary_encode_utf8;
pub use unnecessary_future_import::unnecessary_future_import; pub use unnecessary_future_import::unnecessary_future_import;
pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params; pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params;
pub use use_pep585_annotation::use_pep585_annotation; pub use use_pep585_annotation::use_pep585_annotation;
@ -13,6 +14,7 @@ mod deprecated_unittest_alias;
mod super_call_with_parameters; mod super_call_with_parameters;
mod type_of_primitive; mod type_of_primitive;
mod unnecessary_abspath; mod unnecessary_abspath;
mod unnecessary_encode_utf8;
mod unnecessary_future_import; mod unnecessary_future_import;
mod unnecessary_lru_cache_params; mod unnecessary_lru_cache_params;
mod use_pep585_annotation; mod use_pep585_annotation;

View file

@ -0,0 +1,152 @@
use rustpython_ast::{Constant, Expr, ExprKind, Keyword};
use crate::ast::types::Range;
use crate::autofix::Fix;
use crate::check_ast::Checker;
use crate::checks::{Check, CheckKind};
use crate::source_code_locator::SourceCodeLocator;
const UTF8_LITERALS: &[&str] = &["utf-8", "utf8", "utf_8", "u8", "utf", "cp65001"];
fn match_encoded_variable(func: &Expr) -> Option<&Expr> {
if let ExprKind::Attribute {
value: variable,
attr,
..
} = &func.node
{
if attr == "encode" {
return Some(variable);
}
}
None
}
fn is_utf8_encoding_arg(arg: &Expr) -> bool {
if let ExprKind::Constant {
value: Constant::Str(value),
..
} = &arg.node
{
UTF8_LITERALS.contains(&value.to_lowercase().as_str())
} else {
false
}
}
fn is_default_encode(args: &Vec<Expr>, kwargs: &Vec<Keyword>) -> bool {
match (args.len(), kwargs.len()) {
// .encode()
(0, 0) => true,
// .encode(encoding)
(1, 0) => is_utf8_encoding_arg(&args[0]),
// .encode(kwarg=kwarg)
(0, 1) => {
kwargs[0].node.arg == Some("encoding".to_string())
&& is_utf8_encoding_arg(&kwargs[0].node.value)
}
// .encode(*args, **kwargs)
_ => false,
}
}
// Return a Fix for a default `encode` call removing the encoding argument,
// keyword, or positional.
fn delete_default_encode_arg_or_kwarg(
expr: &Expr,
args: &[Expr],
kwargs: &[Keyword],
patch: bool,
) -> Option<Check> {
if let Some(arg) = args.get(0) {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
check.amend(Fix::deletion(arg.location, arg.end_location.unwrap()));
}
Some(check)
} else if let Some(kwarg) = kwargs.get(0) {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
check.amend(Fix::deletion(kwarg.location, kwarg.end_location.unwrap()));
}
Some(check)
} else {
None
}
}
// Return a Fix replacing the call to encode by a `"b"` prefix on the string.
fn replace_with_bytes_literal(
expr: &Expr,
constant: &Expr,
locator: &SourceCodeLocator,
patch: bool,
) -> Check {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
let content = locator.slice_source_code_range(&Range {
location: constant.location,
end_location: constant.end_location.unwrap(),
});
let content = format!(
"b{}",
content.trim_start_matches('u').trim_start_matches('U')
);
check.amend(Fix::replacement(
content,
expr.location,
expr.end_location.unwrap(),
))
}
check
}
/// U012
pub fn unnecessary_encode_utf8(
checker: &mut Checker,
expr: &Expr,
func: &Expr,
args: &Vec<Expr>,
kwargs: &Vec<Keyword>,
) {
if let Some(variable) = match_encoded_variable(func) {
match &variable.node {
ExprKind::Constant {
value: Constant::Str(literal),
..
} => {
// "str".encode()
// "str".encode("utf-8")
if is_default_encode(args, kwargs) {
if literal.is_ascii() {
// "foo".encode()
checker.add_check(replace_with_bytes_literal(
expr,
variable,
checker.locator,
checker.patch(),
));
} else {
// "unicode text©".encode("utf-8")
if let Some(check) =
delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
{
checker.add_check(check);
}
}
}
}
// f"foo{bar}".encode(*args, **kwargs)
ExprKind::JoinedStr { .. } => {
if is_default_encode(args, kwargs) {
if let Some(check) =
delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
{
checker.add_check(check);
}
}
}
_ => {}
}
}
}

View file

@ -0,0 +1,260 @@
---
source: src/linter.rs
expression: checks
---
- kind: UnnecessaryEncodeUTF8
location:
row: 2
column: 0
end_location:
row: 2
column: 21
fix:
patch:
content: "b\"foo\""
location:
row: 2
column: 0
end_location:
row: 2
column: 21
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 3
column: 0
end_location:
row: 3
column: 18
fix:
patch:
content: "b\"foo\""
location:
row: 3
column: 0
end_location:
row: 3
column: 18
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 4
column: 0
end_location:
row: 4
column: 14
fix:
patch:
content: "b\"foo\""
location:
row: 4
column: 0
end_location:
row: 4
column: 14
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 5
column: 0
end_location:
row: 5
column: 20
fix:
patch:
content: "b\"foo\""
location:
row: 5
column: 0
end_location:
row: 5
column: 20
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 6
column: 0
end_location:
row: 6
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 6
column: 0
end_location:
row: 6
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 7
column: 0
end_location:
row: 7
column: 30
fix:
patch:
content: "b\"foo\""
location:
row: 7
column: 0
end_location:
row: 7
column: 30
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 8
column: 0
end_location:
row: 14
column: 1
fix:
patch:
content: "b\"\"\"\nLorem\n\nIpsum\n\"\"\""
location:
row: 8
column: 0
end_location:
row: 14
column: 1
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 26
column: 0
end_location:
row: 26
column: 27
fix:
patch:
content: ""
location:
row: 26
column: 19
end_location:
row: 26
column: 26
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 44
column: 0
end_location:
row: 44
column: 31
fix:
patch:
content: ""
location:
row: 44
column: 23
end_location:
row: 44
column: 30
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 46
column: 0
end_location:
row: 46
column: 39
fix:
patch:
content: ""
location:
row: 46
column: 23
end_location:
row: 46
column: 38
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 48
column: 0
end_location:
row: 48
column: 23
fix:
patch:
content: "br\"fo\\o\""
location:
row: 48
column: 0
end_location:
row: 48
column: 23
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 49
column: 0
end_location:
row: 49
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 49
column: 0
end_location:
row: 49
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 50
column: 0
end_location:
row: 50
column: 23
fix:
patch:
content: "bR\"fo\\o\""
location:
row: 50
column: 0
end_location:
row: 50
column: 23
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 51
column: 0
end_location:
row: 51
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 51
column: 0
end_location:
row: 51
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 52
column: 6
end_location:
row: 52
column: 20
fix:
patch:
content: "b\"foo\""
location:
row: 52
column: 6
end_location:
row: 52
column: 20
applied: false