feat: no unnecessary encode utf8 (#686)

2025-09-26 11:59:35 +00:00 · 2022-11-12 16:54:36 +00:00 · 2022-11-12 16:54:36 +00:00 · afa59d78bb
commit afa59d78bb
parent bbc38fea73
9 changed files with 487 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -443,6 +443,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
 | U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 |
 | U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 |
 | U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 |
+| U012 | UnnecessaryEncodeUTF8 | Unnecessary call to `encode` as UTF-8 | 🛠 |

 ### pep8-naming

@ -687,7 +688,7 @@ including:
 - [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/)
 - [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32)
 - [`flake8-2020`](https://pypi.org/project/flake8-2020/)
- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34)
+- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34)
 - [`autoflake`](https://pypi.org/project/autoflake/) (1/7)

 Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8:
@ -713,7 +714,7 @@ Today, Ruff can be used to replace Flake8 when used with any of the following pl
 - [`flake8-2020`](https://pypi.org/project/flake8-2020/)

 Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa),
-and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34).
+and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34).

 If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue.

--- a/resources/test/fixtures/U012.py
+++ b/resources/test/fixtures/U012.py
@ -0,0 +1,52 @@
+# ASCII literals should be replaced by a bytes literal
+"foo".encode("utf-8")  # b"foo"
+"foo".encode("u8")  # b"foo"
+"foo".encode()  # b"foo"
+"foo".encode("UTF8")  # b"foo"
+U"foo".encode("utf-8")  # b"foo"
+"foo".encode(encoding="utf-8")  # b"foo"
+"""
+Lorem
+
+Ipsum
+""".encode(
+    "utf-8"
+)
+# b"""
+# Lorem
+#
+# Ipsum
+# """
+
+# `encode` on variables should not be processed.
+string = "hello there"
+string.encode("utf-8")
+
+bar = "bar"
+f"foo{bar}".encode("utf-8")  # f"foo{bar}".encode()
+encoding = "latin"
+"foo".encode(encoding)
+f"foo{bar}".encode(encoding)
+
+# `encode` with custom args and kwargs should not be processed.
+"foo".encode("utf-8", errors="replace")
+"foo".encode("utf-8", "replace")
+"foo".encode(errors="replace")
+"foo".encode(encoding="utf-8", errors="replace")
+
+# `encode` with custom args and kwargs on unicode should not be processed.
+"unicode text©".encode("utf-8", errors="replace")
+"unicode text©".encode("utf-8", "replace")
+"unicode text©".encode(errors="replace")
+"unicode text©".encode(encoding="utf-8", errors="replace")
+
+# Unicode literals should only be stripped of default encoding.
+"unicode text©".encode("utf-8")  # "unicode text©".encode()
+"unicode text©".encode()
+"unicode text©".encode(encoding="UTF8")  # "unicode text©".encode()
+
+r"fo\o".encode("utf-8")  # br"fo\o"
+u"foo".encode("utf-8")  # b"foo"
+R"fo\o".encode("utf-8")  # br"fo\o"
+U"foo".encode("utf-8")  # b"foo"
+print("foo".encode())  # print(b"foo")
--- a/src/check_ast.rs
+++ b/src/check_ast.rs
@ -1073,6 +1073,10 @@ where
                    pyupgrade::plugins::super_call_with_parameters(self, expr, func, args);
                }

+                if self.settings.enabled.contains(&CheckCode::U012) {
+                    pyupgrade::plugins::unnecessary_encode_utf8(self, expr, func, args, keywords);
+                }
+
                // flake8-print
                if self.settings.enabled.contains(&CheckCode::T201)
                    || self.settings.enabled.contains(&CheckCode::T203)
--- a/src/checks.rs
+++ b/src/checks.rs
@ -156,6 +156,7 @@ pub enum CheckCode {
    U009,
    U010,
    U011,
+    U012,
    // pydocstyle
    D100,
    D101,
@ -444,6 +445,7 @@ pub enum CheckKind {
    PEP3120UnnecessaryCodingComment,
    UnnecessaryFutureImport(Vec<String>),
    UnnecessaryLRUCacheParams,
+    UnnecessaryEncodeUTF8,
    // pydocstyle
    BlankLineAfterLastSection(String),
    BlankLineAfterSection(String),
@ -691,6 +693,7 @@ impl CheckCode {
            CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment,
            CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]),
            CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams,
+            CheckCode::U012 => CheckKind::UnnecessaryEncodeUTF8,
            // pydocstyle
            CheckCode::D100 => CheckKind::PublicModule,
            CheckCode::D101 => CheckKind::PublicClass,
@ -901,6 +904,7 @@ impl CheckCode {
            CheckCode::U009 => CheckCategory::Pyupgrade,
            CheckCode::U010 => CheckCategory::Pyupgrade,
            CheckCode::U011 => CheckCategory::Pyupgrade,
+            CheckCode::U012 => CheckCategory::Pyupgrade,
            CheckCode::D100 => CheckCategory::Pydocstyle,
            CheckCode::D101 => CheckCategory::Pydocstyle,
            CheckCode::D102 => CheckCategory::Pydocstyle,
@ -1103,6 +1107,7 @@ impl CheckKind {
            CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009,
            CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010,
            CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011,
+            CheckKind::UnnecessaryEncodeUTF8 => &CheckCode::U012,
            // pydocstyle
            CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413,
            CheckKind::BlankLineAfterSection(_) => &CheckCode::D410,
@ -1607,6 +1612,7 @@ impl CheckKind {
            CheckKind::UnnecessaryLRUCacheParams => {
                "Unnecessary parameters to functools.lru_cache".to_string()
            }
+            CheckKind::UnnecessaryEncodeUTF8 => "Unnecessary call to `encode` as UTF-8".to_string(),
            // pydocstyle
            CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(),
            CheckKind::BlankLineAfterSummary => {
@ -1873,6 +1879,7 @@ impl CheckKind {
                | CheckKind::UnnecessaryAbspath
                | CheckKind::UnnecessaryCollectionCall(_)
                | CheckKind::UnnecessaryComprehension(_)
+                | CheckKind::UnnecessaryEncodeUTF8
                | CheckKind::UnnecessaryFutureImport(_)
                | CheckKind::UnnecessaryGeneratorDict
                | CheckKind::UnnecessaryGeneratorList
--- a/src/checks_gen.rs
+++ b/src/checks_gen.rs
@ -265,6 +265,7 @@ pub enum CheckCodePrefix {
    U01,
    U010,
    U011,
+    U012,
    W,
    W2,
    W29,
@ -1004,6 +1005,7 @@ impl CheckCodePrefix {
                CheckCode::U009,
                CheckCode::U010,
                CheckCode::U011,
+                CheckCode::U012,
            ],
            CheckCodePrefix::U0 => vec![
                CheckCode::U001,
@ -1017,6 +1019,7 @@ impl CheckCodePrefix {
                CheckCode::U009,
                CheckCode::U010,
                CheckCode::U011,
+                CheckCode::U012,
            ],
            CheckCodePrefix::U00 => vec![
                CheckCode::U001,
@ -1038,9 +1041,10 @@ impl CheckCodePrefix {
            CheckCodePrefix::U007 => vec![CheckCode::U007],
            CheckCodePrefix::U008 => vec![CheckCode::U008],
            CheckCodePrefix::U009 => vec![CheckCode::U009],
-            CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011],
+            CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011, CheckCode::U012],
            CheckCodePrefix::U010 => vec![CheckCode::U010],
            CheckCodePrefix::U011 => vec![CheckCode::U011],
+            CheckCodePrefix::U012 => vec![CheckCode::U012],
            CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605],
            CheckCodePrefix::W2 => vec![CheckCode::W292],
            CheckCodePrefix::W29 => vec![CheckCode::W292],
@ -1351,6 +1355,7 @@ impl CheckCodePrefix {
            CheckCodePrefix::U01 => PrefixSpecificity::Tens,
            CheckCodePrefix::U010 => PrefixSpecificity::Explicit,
            CheckCodePrefix::U011 => PrefixSpecificity::Explicit,
+            CheckCodePrefix::U012 => PrefixSpecificity::Explicit,
            CheckCodePrefix::W => PrefixSpecificity::Category,
            CheckCodePrefix::W2 => PrefixSpecificity::Hundreds,
            CheckCodePrefix::W29 => PrefixSpecificity::Tens,
--- a/src/linter.rs
+++ b/src/linter.rs
@ -486,6 +486,7 @@ mod tests {
    #[test_case(CheckCode::U010, Path::new("U010.py"); "U010")]
    #[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")]
    #[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")]
+    #[test_case(CheckCode::U012, Path::new("U012.py"); "U012")]
    #[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
    #[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
    #[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]
--- a/src/pyupgrade/plugins/mod.rs
+++ b/src/pyupgrade/plugins/mod.rs
@ -2,6 +2,7 @@ pub use deprecated_unittest_alias::deprecated_unittest_alias;
 pub use super_call_with_parameters::super_call_with_parameters;
 pub use type_of_primitive::type_of_primitive;
 pub use unnecessary_abspath::unnecessary_abspath;
+pub use unnecessary_encode_utf8::unnecessary_encode_utf8;
 pub use unnecessary_future_import::unnecessary_future_import;
 pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params;
 pub use use_pep585_annotation::use_pep585_annotation;
@ -13,6 +14,7 @@ mod deprecated_unittest_alias;
 mod super_call_with_parameters;
 mod type_of_primitive;
 mod unnecessary_abspath;
+mod unnecessary_encode_utf8;
 mod unnecessary_future_import;
 mod unnecessary_lru_cache_params;
 mod use_pep585_annotation;
--- a/src/pyupgrade/plugins/unnecessary_encode_utf8.rs
+++ b/src/pyupgrade/plugins/unnecessary_encode_utf8.rs
@ -0,0 +1,152 @@
+use rustpython_ast::{Constant, Expr, ExprKind, Keyword};
+
+use crate::ast::types::Range;
+use crate::autofix::Fix;
+use crate::check_ast::Checker;
+use crate::checks::{Check, CheckKind};
+use crate::source_code_locator::SourceCodeLocator;
+
+const UTF8_LITERALS: &[&str] = &["utf-8", "utf8", "utf_8", "u8", "utf", "cp65001"];
+
+fn match_encoded_variable(func: &Expr) -> Option<&Expr> {
+    if let ExprKind::Attribute {
+        value: variable,
+        attr,
+        ..
+    } = &func.node
+    {
+        if attr == "encode" {
+            return Some(variable);
+        }
+    }
+    None
+}
+
+fn is_utf8_encoding_arg(arg: &Expr) -> bool {
+    if let ExprKind::Constant {
+        value: Constant::Str(value),
+        ..
+    } = &arg.node
+    {
+        UTF8_LITERALS.contains(&value.to_lowercase().as_str())
+    } else {
+        false
+    }
+}
+
+fn is_default_encode(args: &Vec<Expr>, kwargs: &Vec<Keyword>) -> bool {
+    match (args.len(), kwargs.len()) {
+        // .encode()
+        (0, 0) => true,
+        // .encode(encoding)
+        (1, 0) => is_utf8_encoding_arg(&args[0]),
+        // .encode(kwarg=kwarg)
+        (0, 1) => {
+            kwargs[0].node.arg == Some("encoding".to_string())
+                && is_utf8_encoding_arg(&kwargs[0].node.value)
+        }
+        // .encode(*args, **kwargs)
+        _ => false,
+    }
+}
+
+// Return a Fix for a default `encode` call removing the encoding argument,
+// keyword, or positional.
+fn delete_default_encode_arg_or_kwarg(
+    expr: &Expr,
+    args: &[Expr],
+    kwargs: &[Keyword],
+    patch: bool,
+) -> Option<Check> {
+    if let Some(arg) = args.get(0) {
+        let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
+        if patch {
+            check.amend(Fix::deletion(arg.location, arg.end_location.unwrap()));
+        }
+        Some(check)
+    } else if let Some(kwarg) = kwargs.get(0) {
+        let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
+        if patch {
+            check.amend(Fix::deletion(kwarg.location, kwarg.end_location.unwrap()));
+        }
+        Some(check)
+    } else {
+        None
+    }
+}
+
+// Return a Fix replacing the call to encode by a `"b"` prefix on the string.
+fn replace_with_bytes_literal(
+    expr: &Expr,
+    constant: &Expr,
+    locator: &SourceCodeLocator,
+    patch: bool,
+) -> Check {
+    let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
+    if patch {
+        let content = locator.slice_source_code_range(&Range {
+            location: constant.location,
+            end_location: constant.end_location.unwrap(),
+        });
+        let content = format!(
+            "b{}",
+            content.trim_start_matches('u').trim_start_matches('U')
+        );
+        check.amend(Fix::replacement(
+            content,
+            expr.location,
+            expr.end_location.unwrap(),
+        ))
+    }
+    check
+}
+
+/// U012
+pub fn unnecessary_encode_utf8(
+    checker: &mut Checker,
+    expr: &Expr,
+    func: &Expr,
+    args: &Vec<Expr>,
+    kwargs: &Vec<Keyword>,
+) {
+    if let Some(variable) = match_encoded_variable(func) {
+        match &variable.node {
+            ExprKind::Constant {
+                value: Constant::Str(literal),
+                ..
+            } => {
+                // "str".encode()
+                // "str".encode("utf-8")
+                if is_default_encode(args, kwargs) {
+                    if literal.is_ascii() {
+                        // "foo".encode()
+                        checker.add_check(replace_with_bytes_literal(
+                            expr,
+                            variable,
+                            checker.locator,
+                            checker.patch(),
+                        ));
+                    } else {
+                        // "unicode text©".encode("utf-8")
+                        if let Some(check) =
+                            delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
+                        {
+                            checker.add_check(check);
+                        }
+                    }
+                }
+            }
+            // f"foo{bar}".encode(*args, **kwargs)
+            ExprKind::JoinedStr { .. } => {
+                if is_default_encode(args, kwargs) {
+                    if let Some(check) =
+                        delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
+                    {
+                        checker.add_check(check);
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+}
--- a/src/snapshots/rufflintertests__U012_U012.py.snap
+++ b/src/snapshots/rufflintertests__U012_U012.py.snap
@ -0,0 +1,260 @@
+---
+source: src/linter.rs
+expression: checks
+---
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 2
+    column: 0
+  end_location:
+    row: 2
+    column: 21
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 2
+        column: 0
+      end_location:
+        row: 2
+        column: 21
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 3
+    column: 0
+  end_location:
+    row: 3
+    column: 18
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 3
+        column: 0
+      end_location:
+        row: 3
+        column: 18
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 4
+    column: 0
+  end_location:
+    row: 4
+    column: 14
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 4
+        column: 0
+      end_location:
+        row: 4
+        column: 14
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 5
+    column: 0
+  end_location:
+    row: 5
+    column: 20
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 5
+        column: 0
+      end_location:
+        row: 5
+        column: 20
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 6
+    column: 0
+  end_location:
+    row: 6
+    column: 22
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 6
+        column: 0
+      end_location:
+        row: 6
+        column: 22
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 7
+    column: 0
+  end_location:
+    row: 7
+    column: 30
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 7
+        column: 0
+      end_location:
+        row: 7
+        column: 30
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 8
+    column: 0
+  end_location:
+    row: 14
+    column: 1
+  fix:
+    patch:
+      content: "b\"\"\"\nLorem\n\nIpsum\n\"\"\""
+      location:
+        row: 8
+        column: 0
+      end_location:
+        row: 14
+        column: 1
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 26
+    column: 0
+  end_location:
+    row: 26
+    column: 27
+  fix:
+    patch:
+      content: ""
+      location:
+        row: 26
+        column: 19
+      end_location:
+        row: 26
+        column: 26
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 44
+    column: 0
+  end_location:
+    row: 44
+    column: 31
+  fix:
+    patch:
+      content: ""
+      location:
+        row: 44
+        column: 23
+      end_location:
+        row: 44
+        column: 30
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 46
+    column: 0
+  end_location:
+    row: 46
+    column: 39
+  fix:
+    patch:
+      content: ""
+      location:
+        row: 46
+        column: 23
+      end_location:
+        row: 46
+        column: 38
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 48
+    column: 0
+  end_location:
+    row: 48
+    column: 23
+  fix:
+    patch:
+      content: "br\"fo\\o\""
+      location:
+        row: 48
+        column: 0
+      end_location:
+        row: 48
+        column: 23
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 49
+    column: 0
+  end_location:
+    row: 49
+    column: 22
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 49
+        column: 0
+      end_location:
+        row: 49
+        column: 22
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 50
+    column: 0
+  end_location:
+    row: 50
+    column: 23
+  fix:
+    patch:
+      content: "bR\"fo\\o\""
+      location:
+        row: 50
+        column: 0
+      end_location:
+        row: 50
+        column: 23
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 51
+    column: 0
+  end_location:
+    row: 51
+    column: 22
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 51
+        column: 0
+      end_location:
+        row: 51
+        column: 22
+    applied: false
+- kind: UnnecessaryEncodeUTF8
+  location:
+    row: 52
+    column: 6
+  end_location:
+    row: 52
+    column: 20
+  fix:
+    patch:
+      content: "b\"foo\""
+      location:
+        row: 52
+        column: 6
+      end_location:
+        row: 52
+        column: 20
+    applied: false
+