feat: no unnecessary encode utf8 (#686)

This commit is contained in:
Martin Lehoux 2022-11-12 16:54:36 +00:00 committed by GitHub
parent bbc38fea73
commit afa59d78bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 487 additions and 3 deletions

View file

@ -443,6 +443,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
| U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 |
| U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 |
| U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 |
| U012 | UnnecessaryEncodeUTF8 | Unnecessary call to `encode` as UTF-8 | 🛠 |
### pep8-naming
@ -687,7 +688,7 @@ including:
- [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/)
- [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32)
- [`flake8-2020`](https://pypi.org/project/flake8-2020/)
- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34)
- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34)
- [`autoflake`](https://pypi.org/project/autoflake/) (1/7)
Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8:
@ -713,7 +714,7 @@ Today, Ruff can be used to replace Flake8 when used with any of the following pl
- [`flake8-2020`](https://pypi.org/project/flake8-2020/)
Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa),
and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34).
and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34).
If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue.

52
resources/test/fixtures/U012.py vendored Normal file
View file

@ -0,0 +1,52 @@
# ASCII literals should be replaced by a bytes literal
"foo".encode("utf-8") # b"foo"
"foo".encode("u8") # b"foo"
"foo".encode() # b"foo"
"foo".encode("UTF8") # b"foo"
U"foo".encode("utf-8") # b"foo"
"foo".encode(encoding="utf-8") # b"foo"
"""
Lorem
Ipsum
""".encode(
"utf-8"
)
# b"""
# Lorem
#
# Ipsum
# """
# `encode` on variables should not be processed.
string = "hello there"
string.encode("utf-8")
bar = "bar"
f"foo{bar}".encode("utf-8") # f"foo{bar}".encode()
encoding = "latin"
"foo".encode(encoding)
f"foo{bar}".encode(encoding)
# `encode` with custom args and kwargs should not be processed.
"foo".encode("utf-8", errors="replace")
"foo".encode("utf-8", "replace")
"foo".encode(errors="replace")
"foo".encode(encoding="utf-8", errors="replace")
# `encode` with custom args and kwargs on unicode should not be processed.
"unicode text©".encode("utf-8", errors="replace")
"unicode text©".encode("utf-8", "replace")
"unicode text©".encode(errors="replace")
"unicode text©".encode(encoding="utf-8", errors="replace")
# Unicode literals should only be stripped of default encoding.
"unicode text©".encode("utf-8") # "unicode text©".encode()
"unicode text©".encode()
"unicode text©".encode(encoding="UTF8") # "unicode text©".encode()
r"fo\o".encode("utf-8") # br"fo\o"
u"foo".encode("utf-8") # b"foo"
R"fo\o".encode("utf-8") # br"fo\o"
U"foo".encode("utf-8") # b"foo"
print("foo".encode()) # print(b"foo")

View file

@ -1073,6 +1073,10 @@ where
pyupgrade::plugins::super_call_with_parameters(self, expr, func, args);
}
if self.settings.enabled.contains(&CheckCode::U012) {
pyupgrade::plugins::unnecessary_encode_utf8(self, expr, func, args, keywords);
}
// flake8-print
if self.settings.enabled.contains(&CheckCode::T201)
|| self.settings.enabled.contains(&CheckCode::T203)

View file

@ -156,6 +156,7 @@ pub enum CheckCode {
U009,
U010,
U011,
U012,
// pydocstyle
D100,
D101,
@ -444,6 +445,7 @@ pub enum CheckKind {
PEP3120UnnecessaryCodingComment,
UnnecessaryFutureImport(Vec<String>),
UnnecessaryLRUCacheParams,
UnnecessaryEncodeUTF8,
// pydocstyle
BlankLineAfterLastSection(String),
BlankLineAfterSection(String),
@ -691,6 +693,7 @@ impl CheckCode {
CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment,
CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]),
CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams,
CheckCode::U012 => CheckKind::UnnecessaryEncodeUTF8,
// pydocstyle
CheckCode::D100 => CheckKind::PublicModule,
CheckCode::D101 => CheckKind::PublicClass,
@ -901,6 +904,7 @@ impl CheckCode {
CheckCode::U009 => CheckCategory::Pyupgrade,
CheckCode::U010 => CheckCategory::Pyupgrade,
CheckCode::U011 => CheckCategory::Pyupgrade,
CheckCode::U012 => CheckCategory::Pyupgrade,
CheckCode::D100 => CheckCategory::Pydocstyle,
CheckCode::D101 => CheckCategory::Pydocstyle,
CheckCode::D102 => CheckCategory::Pydocstyle,
@ -1103,6 +1107,7 @@ impl CheckKind {
CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009,
CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010,
CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011,
CheckKind::UnnecessaryEncodeUTF8 => &CheckCode::U012,
// pydocstyle
CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413,
CheckKind::BlankLineAfterSection(_) => &CheckCode::D410,
@ -1607,6 +1612,7 @@ impl CheckKind {
CheckKind::UnnecessaryLRUCacheParams => {
"Unnecessary parameters to functools.lru_cache".to_string()
}
CheckKind::UnnecessaryEncodeUTF8 => "Unnecessary call to `encode` as UTF-8".to_string(),
// pydocstyle
CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(),
CheckKind::BlankLineAfterSummary => {
@ -1873,6 +1879,7 @@ impl CheckKind {
| CheckKind::UnnecessaryAbspath
| CheckKind::UnnecessaryCollectionCall(_)
| CheckKind::UnnecessaryComprehension(_)
| CheckKind::UnnecessaryEncodeUTF8
| CheckKind::UnnecessaryFutureImport(_)
| CheckKind::UnnecessaryGeneratorDict
| CheckKind::UnnecessaryGeneratorList

View file

@ -265,6 +265,7 @@ pub enum CheckCodePrefix {
U01,
U010,
U011,
U012,
W,
W2,
W29,
@ -1004,6 +1005,7 @@ impl CheckCodePrefix {
CheckCode::U009,
CheckCode::U010,
CheckCode::U011,
CheckCode::U012,
],
CheckCodePrefix::U0 => vec![
CheckCode::U001,
@ -1017,6 +1019,7 @@ impl CheckCodePrefix {
CheckCode::U009,
CheckCode::U010,
CheckCode::U011,
CheckCode::U012,
],
CheckCodePrefix::U00 => vec![
CheckCode::U001,
@ -1038,9 +1041,10 @@ impl CheckCodePrefix {
CheckCodePrefix::U007 => vec![CheckCode::U007],
CheckCodePrefix::U008 => vec![CheckCode::U008],
CheckCodePrefix::U009 => vec![CheckCode::U009],
CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011],
CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011, CheckCode::U012],
CheckCodePrefix::U010 => vec![CheckCode::U010],
CheckCodePrefix::U011 => vec![CheckCode::U011],
CheckCodePrefix::U012 => vec![CheckCode::U012],
CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605],
CheckCodePrefix::W2 => vec![CheckCode::W292],
CheckCodePrefix::W29 => vec![CheckCode::W292],
@ -1351,6 +1355,7 @@ impl CheckCodePrefix {
CheckCodePrefix::U01 => PrefixSpecificity::Tens,
CheckCodePrefix::U010 => PrefixSpecificity::Explicit,
CheckCodePrefix::U011 => PrefixSpecificity::Explicit,
CheckCodePrefix::U012 => PrefixSpecificity::Explicit,
CheckCodePrefix::W => PrefixSpecificity::Category,
CheckCodePrefix::W2 => PrefixSpecificity::Hundreds,
CheckCodePrefix::W29 => PrefixSpecificity::Tens,

View file

@ -486,6 +486,7 @@ mod tests {
#[test_case(CheckCode::U010, Path::new("U010.py"); "U010")]
#[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")]
#[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")]
#[test_case(CheckCode::U012, Path::new("U012.py"); "U012")]
#[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
#[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
#[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]

View file

@ -2,6 +2,7 @@ pub use deprecated_unittest_alias::deprecated_unittest_alias;
pub use super_call_with_parameters::super_call_with_parameters;
pub use type_of_primitive::type_of_primitive;
pub use unnecessary_abspath::unnecessary_abspath;
pub use unnecessary_encode_utf8::unnecessary_encode_utf8;
pub use unnecessary_future_import::unnecessary_future_import;
pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params;
pub use use_pep585_annotation::use_pep585_annotation;
@ -13,6 +14,7 @@ mod deprecated_unittest_alias;
mod super_call_with_parameters;
mod type_of_primitive;
mod unnecessary_abspath;
mod unnecessary_encode_utf8;
mod unnecessary_future_import;
mod unnecessary_lru_cache_params;
mod use_pep585_annotation;

View file

@ -0,0 +1,152 @@
use rustpython_ast::{Constant, Expr, ExprKind, Keyword};
use crate::ast::types::Range;
use crate::autofix::Fix;
use crate::check_ast::Checker;
use crate::checks::{Check, CheckKind};
use crate::source_code_locator::SourceCodeLocator;
const UTF8_LITERALS: &[&str] = &["utf-8", "utf8", "utf_8", "u8", "utf", "cp65001"];
fn match_encoded_variable(func: &Expr) -> Option<&Expr> {
if let ExprKind::Attribute {
value: variable,
attr,
..
} = &func.node
{
if attr == "encode" {
return Some(variable);
}
}
None
}
fn is_utf8_encoding_arg(arg: &Expr) -> bool {
if let ExprKind::Constant {
value: Constant::Str(value),
..
} = &arg.node
{
UTF8_LITERALS.contains(&value.to_lowercase().as_str())
} else {
false
}
}
fn is_default_encode(args: &Vec<Expr>, kwargs: &Vec<Keyword>) -> bool {
match (args.len(), kwargs.len()) {
// .encode()
(0, 0) => true,
// .encode(encoding)
(1, 0) => is_utf8_encoding_arg(&args[0]),
// .encode(kwarg=kwarg)
(0, 1) => {
kwargs[0].node.arg == Some("encoding".to_string())
&& is_utf8_encoding_arg(&kwargs[0].node.value)
}
// .encode(*args, **kwargs)
_ => false,
}
}
// Return a Fix for a default `encode` call removing the encoding argument,
// keyword, or positional.
fn delete_default_encode_arg_or_kwarg(
expr: &Expr,
args: &[Expr],
kwargs: &[Keyword],
patch: bool,
) -> Option<Check> {
if let Some(arg) = args.get(0) {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
check.amend(Fix::deletion(arg.location, arg.end_location.unwrap()));
}
Some(check)
} else if let Some(kwarg) = kwargs.get(0) {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
check.amend(Fix::deletion(kwarg.location, kwarg.end_location.unwrap()));
}
Some(check)
} else {
None
}
}
// Return a Fix replacing the call to encode by a `"b"` prefix on the string.
fn replace_with_bytes_literal(
expr: &Expr,
constant: &Expr,
locator: &SourceCodeLocator,
patch: bool,
) -> Check {
let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
if patch {
let content = locator.slice_source_code_range(&Range {
location: constant.location,
end_location: constant.end_location.unwrap(),
});
let content = format!(
"b{}",
content.trim_start_matches('u').trim_start_matches('U')
);
check.amend(Fix::replacement(
content,
expr.location,
expr.end_location.unwrap(),
))
}
check
}
/// U012
pub fn unnecessary_encode_utf8(
checker: &mut Checker,
expr: &Expr,
func: &Expr,
args: &Vec<Expr>,
kwargs: &Vec<Keyword>,
) {
if let Some(variable) = match_encoded_variable(func) {
match &variable.node {
ExprKind::Constant {
value: Constant::Str(literal),
..
} => {
// "str".encode()
// "str".encode("utf-8")
if is_default_encode(args, kwargs) {
if literal.is_ascii() {
// "foo".encode()
checker.add_check(replace_with_bytes_literal(
expr,
variable,
checker.locator,
checker.patch(),
));
} else {
// "unicode text©".encode("utf-8")
if let Some(check) =
delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
{
checker.add_check(check);
}
}
}
}
// f"foo{bar}".encode(*args, **kwargs)
ExprKind::JoinedStr { .. } => {
if is_default_encode(args, kwargs) {
if let Some(check) =
delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
{
checker.add_check(check);
}
}
}
_ => {}
}
}
}

View file

@ -0,0 +1,260 @@
---
source: src/linter.rs
expression: checks
---
- kind: UnnecessaryEncodeUTF8
location:
row: 2
column: 0
end_location:
row: 2
column: 21
fix:
patch:
content: "b\"foo\""
location:
row: 2
column: 0
end_location:
row: 2
column: 21
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 3
column: 0
end_location:
row: 3
column: 18
fix:
patch:
content: "b\"foo\""
location:
row: 3
column: 0
end_location:
row: 3
column: 18
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 4
column: 0
end_location:
row: 4
column: 14
fix:
patch:
content: "b\"foo\""
location:
row: 4
column: 0
end_location:
row: 4
column: 14
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 5
column: 0
end_location:
row: 5
column: 20
fix:
patch:
content: "b\"foo\""
location:
row: 5
column: 0
end_location:
row: 5
column: 20
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 6
column: 0
end_location:
row: 6
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 6
column: 0
end_location:
row: 6
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 7
column: 0
end_location:
row: 7
column: 30
fix:
patch:
content: "b\"foo\""
location:
row: 7
column: 0
end_location:
row: 7
column: 30
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 8
column: 0
end_location:
row: 14
column: 1
fix:
patch:
content: "b\"\"\"\nLorem\n\nIpsum\n\"\"\""
location:
row: 8
column: 0
end_location:
row: 14
column: 1
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 26
column: 0
end_location:
row: 26
column: 27
fix:
patch:
content: ""
location:
row: 26
column: 19
end_location:
row: 26
column: 26
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 44
column: 0
end_location:
row: 44
column: 31
fix:
patch:
content: ""
location:
row: 44
column: 23
end_location:
row: 44
column: 30
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 46
column: 0
end_location:
row: 46
column: 39
fix:
patch:
content: ""
location:
row: 46
column: 23
end_location:
row: 46
column: 38
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 48
column: 0
end_location:
row: 48
column: 23
fix:
patch:
content: "br\"fo\\o\""
location:
row: 48
column: 0
end_location:
row: 48
column: 23
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 49
column: 0
end_location:
row: 49
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 49
column: 0
end_location:
row: 49
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 50
column: 0
end_location:
row: 50
column: 23
fix:
patch:
content: "bR\"fo\\o\""
location:
row: 50
column: 0
end_location:
row: 50
column: 23
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 51
column: 0
end_location:
row: 51
column: 22
fix:
patch:
content: "b\"foo\""
location:
row: 51
column: 0
end_location:
row: 51
column: 22
applied: false
- kind: UnnecessaryEncodeUTF8
location:
row: 52
column: 6
end_location:
row: 52
column: 20
fix:
patch:
content: "b\"foo\""
location:
row: 52
column: 6
end_location:
row: 52
column: 20
applied: false