feat: no unnecessary encode utf8 (#686)

2025-09-26 20:10:09 +00:00 · 2022-11-12 16:54:36 +00:00 · 2022-11-12 16:54:36 +00:00 · afa59d78bb
commit afa59d78bb
parent bbc38fea73
9 changed files with 487 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -443,6 +443,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
 | U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 |
 | U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 |
 | U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 |
 | U012 | UnnecessaryEncodeUTF8 | Unnecessary call to `encode` as UTF-8 | 🛠 |
 ### pep8-naming
@ -687,7 +688,7 @@ including:
 - [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/)
 - [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32)
 - [`flake8-2020`](https://pypi.org/project/flake8-2020/)
- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34)
+- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34)
 - [`autoflake`](https://pypi.org/project/autoflake/) (1/7)
 Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8:
@ -713,7 +714,7 @@ Today, Ruff can be used to replace Flake8 when used with any of the following pl
 - [`flake8-2020`](https://pypi.org/project/flake8-2020/)
 Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa),
-and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34).
+and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34).
 If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue.
--- a/resources/test/fixtures/U012.py
+++ b/resources/test/fixtures/U012.py
@ -0,0 +1,52 @@
 # ASCII literals should be replaced by a bytes literal
 "foo".encode("utf-8")  # b"foo"
 "foo".encode("u8")  # b"foo"
 "foo".encode()  # b"foo"
 "foo".encode("UTF8")  # b"foo"
 U"foo".encode("utf-8")  # b"foo"
 "foo".encode(encoding="utf-8")  # b"foo"
 """
 Lorem
 Ipsum
 """.encode(
    "utf-8"
 )
 # b"""
 # Lorem
 #
 # Ipsum
 # """
 # `encode` on variables should not be processed.
 string = "hello there"
 string.encode("utf-8")
 bar = "bar"
 f"foo{bar}".encode("utf-8")  # f"foo{bar}".encode()
 encoding = "latin"
 "foo".encode(encoding)
 f"foo{bar}".encode(encoding)
 # `encode` with custom args and kwargs should not be processed.
 "foo".encode("utf-8", errors="replace")
 "foo".encode("utf-8", "replace")
 "foo".encode(errors="replace")
 "foo".encode(encoding="utf-8", errors="replace")
 # `encode` with custom args and kwargs on unicode should not be processed.
 "unicode text©".encode("utf-8", errors="replace")
 "unicode text©".encode("utf-8", "replace")
 "unicode text©".encode(errors="replace")
 "unicode text©".encode(encoding="utf-8", errors="replace")
 # Unicode literals should only be stripped of default encoding.
 "unicode text©".encode("utf-8")  # "unicode text©".encode()
 "unicode text©".encode()
 "unicode text©".encode(encoding="UTF8")  # "unicode text©".encode()
 r"fo\o".encode("utf-8")  # br"fo\o"
 u"foo".encode("utf-8")  # b"foo"
 R"fo\o".encode("utf-8")  # br"fo\o"
 U"foo".encode("utf-8")  # b"foo"
 print("foo".encode())  # print(b"foo")
--- a/src/check_ast.rs
+++ b/src/check_ast.rs
@ -1073,6 +1073,10 @@ where
                    pyupgrade::plugins::super_call_with_parameters(self, expr, func, args);
                }
                if self.settings.enabled.contains(&CheckCode::U012) {
                    pyupgrade::plugins::unnecessary_encode_utf8(self, expr, func, args, keywords);
                }
                // flake8-print
                if self.settings.enabled.contains(&CheckCode::T201)
                    || self.settings.enabled.contains(&CheckCode::T203)
--- a/src/checks.rs
+++ b/src/checks.rs
@ -156,6 +156,7 @@ pub enum CheckCode {
    U009,
    U010,
    U011,
    U012,
    // pydocstyle
    D100,
    D101,
@ -444,6 +445,7 @@ pub enum CheckKind {
    PEP3120UnnecessaryCodingComment,
    UnnecessaryFutureImport(Vec<String>),
    UnnecessaryLRUCacheParams,
    UnnecessaryEncodeUTF8,
    // pydocstyle
    BlankLineAfterLastSection(String),
    BlankLineAfterSection(String),
@ -691,6 +693,7 @@ impl CheckCode {
            CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment,
            CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]),
            CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams,
            CheckCode::U012 => CheckKind::UnnecessaryEncodeUTF8,
            // pydocstyle
            CheckCode::D100 => CheckKind::PublicModule,
            CheckCode::D101 => CheckKind::PublicClass,
@ -901,6 +904,7 @@ impl CheckCode {
            CheckCode::U009 => CheckCategory::Pyupgrade,
            CheckCode::U010 => CheckCategory::Pyupgrade,
            CheckCode::U011 => CheckCategory::Pyupgrade,
            CheckCode::U012 => CheckCategory::Pyupgrade,
            CheckCode::D100 => CheckCategory::Pydocstyle,
            CheckCode::D101 => CheckCategory::Pydocstyle,
            CheckCode::D102 => CheckCategory::Pydocstyle,
@ -1103,6 +1107,7 @@ impl CheckKind {
            CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009,
            CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010,
            CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011,
            CheckKind::UnnecessaryEncodeUTF8 => &CheckCode::U012,
            // pydocstyle
            CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413,
            CheckKind::BlankLineAfterSection(_) => &CheckCode::D410,
@ -1607,6 +1612,7 @@ impl CheckKind {
            CheckKind::UnnecessaryLRUCacheParams => {
                "Unnecessary parameters to functools.lru_cache".to_string()
            }
            CheckKind::UnnecessaryEncodeUTF8 => "Unnecessary call to `encode` as UTF-8".to_string(),
            // pydocstyle
            CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(),
            CheckKind::BlankLineAfterSummary => {
@ -1873,6 +1879,7 @@ impl CheckKind {
                | CheckKind::UnnecessaryAbspath
                | CheckKind::UnnecessaryCollectionCall(_)
                | CheckKind::UnnecessaryComprehension(_)
                | CheckKind::UnnecessaryEncodeUTF8
                | CheckKind::UnnecessaryFutureImport(_)
                | CheckKind::UnnecessaryGeneratorDict
                | CheckKind::UnnecessaryGeneratorList
--- a/src/checks_gen.rs
+++ b/src/checks_gen.rs
@ -265,6 +265,7 @@ pub enum CheckCodePrefix {
    U01,
    U010,
    U011,
    U012,
    W,
    W2,
    W29,
@ -1004,6 +1005,7 @@ impl CheckCodePrefix {
                CheckCode::U009,
                CheckCode::U010,
                CheckCode::U011,
                CheckCode::U012,
            ],
            CheckCodePrefix::U0 => vec![
                CheckCode::U001,
@ -1017,6 +1019,7 @@ impl CheckCodePrefix {
                CheckCode::U009,
                CheckCode::U010,
                CheckCode::U011,
                CheckCode::U012,
            ],
            CheckCodePrefix::U00 => vec![
                CheckCode::U001,
@ -1038,9 +1041,10 @@ impl CheckCodePrefix {
            CheckCodePrefix::U007 => vec![CheckCode::U007],
            CheckCodePrefix::U008 => vec![CheckCode::U008],
            CheckCodePrefix::U009 => vec![CheckCode::U009],
-            CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011],
+            CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011, CheckCode::U012],
            CheckCodePrefix::U010 => vec![CheckCode::U010],
            CheckCodePrefix::U011 => vec![CheckCode::U011],
            CheckCodePrefix::U012 => vec![CheckCode::U012],
            CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605],
            CheckCodePrefix::W2 => vec![CheckCode::W292],
            CheckCodePrefix::W29 => vec![CheckCode::W292],
@ -1351,6 +1355,7 @@ impl CheckCodePrefix {
            CheckCodePrefix::U01 => PrefixSpecificity::Tens,
            CheckCodePrefix::U010 => PrefixSpecificity::Explicit,
            CheckCodePrefix::U011 => PrefixSpecificity::Explicit,
            CheckCodePrefix::U012 => PrefixSpecificity::Explicit,
            CheckCodePrefix::W => PrefixSpecificity::Category,
            CheckCodePrefix::W2 => PrefixSpecificity::Hundreds,
            CheckCodePrefix::W29 => PrefixSpecificity::Tens,
--- a/src/linter.rs
+++ b/src/linter.rs
@ -486,6 +486,7 @@ mod tests {
    #[test_case(CheckCode::U010, Path::new("U010.py"); "U010")]
    #[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")]
    #[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")]
    #[test_case(CheckCode::U012, Path::new("U012.py"); "U012")]
    #[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
    #[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
    #[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]
--- a/src/pyupgrade/plugins/mod.rs
+++ b/src/pyupgrade/plugins/mod.rs
@ -2,6 +2,7 @@ pub use deprecated_unittest_alias::deprecated_unittest_alias;
 pub use super_call_with_parameters::super_call_with_parameters;
 pub use type_of_primitive::type_of_primitive;
 pub use unnecessary_abspath::unnecessary_abspath;
 pub use unnecessary_encode_utf8::unnecessary_encode_utf8;
 pub use unnecessary_future_import::unnecessary_future_import;
 pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params;
 pub use use_pep585_annotation::use_pep585_annotation;
@ -13,6 +14,7 @@ mod deprecated_unittest_alias;
 mod super_call_with_parameters;
 mod type_of_primitive;
 mod unnecessary_abspath;
 mod unnecessary_encode_utf8;
 mod unnecessary_future_import;
 mod unnecessary_lru_cache_params;
 mod use_pep585_annotation;
--- a/src/pyupgrade/plugins/unnecessary_encode_utf8.rs
+++ b/src/pyupgrade/plugins/unnecessary_encode_utf8.rs
@ -0,0 +1,152 @@
 use rustpython_ast::{Constant, Expr, ExprKind, Keyword};
 use crate::ast::types::Range;
 use crate::autofix::Fix;
 use crate::check_ast::Checker;
 use crate::checks::{Check, CheckKind};
 use crate::source_code_locator::SourceCodeLocator;
 const UTF8_LITERALS: &[&str] = &["utf-8", "utf8", "utf_8", "u8", "utf", "cp65001"];
 fn match_encoded_variable(func: &Expr) -> Option<&Expr> {
    if let ExprKind::Attribute {
        value: variable,
        attr,
        ..
    } = &func.node
    {
        if attr == "encode" {
            return Some(variable);
        }
    }
    None
 }
 fn is_utf8_encoding_arg(arg: &Expr) -> bool {
    if let ExprKind::Constant {
        value: Constant::Str(value),
        ..
    } = &arg.node
    {
        UTF8_LITERALS.contains(&value.to_lowercase().as_str())
    } else {
        false
    }
 }
 fn is_default_encode(args: &Vec<Expr>, kwargs: &Vec<Keyword>) -> bool {
    match (args.len(), kwargs.len()) {
        // .encode()
        (0, 0) => true,
        // .encode(encoding)
        (1, 0) => is_utf8_encoding_arg(&args[0]),
        // .encode(kwarg=kwarg)
        (0, 1) => {
            kwargs[0].node.arg == Some("encoding".to_string())
                && is_utf8_encoding_arg(&kwargs[0].node.value)
        }
        // .encode(*args, **kwargs)
        _ => false,
    }
 }
 // Return a Fix for a default `encode` call removing the encoding argument,
 // keyword, or positional.
 fn delete_default_encode_arg_or_kwarg(
    expr: &Expr,
    args: &[Expr],
    kwargs: &[Keyword],
    patch: bool,
 ) -> Option<Check> {
    if let Some(arg) = args.get(0) {
        let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
        if patch {
            check.amend(Fix::deletion(arg.location, arg.end_location.unwrap()));
        }
        Some(check)
    } else if let Some(kwarg) = kwargs.get(0) {
        let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
        if patch {
            check.amend(Fix::deletion(kwarg.location, kwarg.end_location.unwrap()));
        }
        Some(check)
    } else {
        None
    }
 }
 // Return a Fix replacing the call to encode by a `"b"` prefix on the string.
 fn replace_with_bytes_literal(
    expr: &Expr,
    constant: &Expr,
    locator: &SourceCodeLocator,
    patch: bool,
 ) -> Check {
    let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr));
    if patch {
        let content = locator.slice_source_code_range(&Range {
            location: constant.location,
            end_location: constant.end_location.unwrap(),
        });
        let content = format!(
            "b{}",
            content.trim_start_matches('u').trim_start_matches('U')
        );
        check.amend(Fix::replacement(
            content,
            expr.location,
            expr.end_location.unwrap(),
        ))
    }
    check
 }
 /// U012
 pub fn unnecessary_encode_utf8(
    checker: &mut Checker,
    expr: &Expr,
    func: &Expr,
    args: &Vec<Expr>,
    kwargs: &Vec<Keyword>,
 ) {
    if let Some(variable) = match_encoded_variable(func) {
        match &variable.node {
            ExprKind::Constant {
                value: Constant::Str(literal),
                ..
            } => {
                // "str".encode()
                // "str".encode("utf-8")
                if is_default_encode(args, kwargs) {
                    if literal.is_ascii() {
                        // "foo".encode()
                        checker.add_check(replace_with_bytes_literal(
                            expr,
                            variable,
                            checker.locator,
                            checker.patch(),
                        ));
                    } else {
                        // "unicode text©".encode("utf-8")
                        if let Some(check) =
                            delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
                        {
                            checker.add_check(check);
                        }
                    }
                }
            }
            // f"foo{bar}".encode(*args, **kwargs)
            ExprKind::JoinedStr { .. } => {
                if is_default_encode(args, kwargs) {
                    if let Some(check) =
                        delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch())
                    {
                        checker.add_check(check);
                    }
                }
            }
            _ => {}
        }
    }
 }
--- a/src/snapshots/rufflintertests__U012_U012.py.snap
+++ b/src/snapshots/rufflintertests__U012_U012.py.snap
@ -0,0 +1,260 @@
 ---
 source: src/linter.rs
 expression: checks
 ---
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 2
    column: 0
  end_location:
    row: 2
    column: 21
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 2
        column: 0
      end_location:
        row: 2
        column: 21
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 3
    column: 0
  end_location:
    row: 3
    column: 18
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 3
        column: 0
      end_location:
        row: 3
        column: 18
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 4
    column: 0
  end_location:
    row: 4
    column: 14
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 4
        column: 0
      end_location:
        row: 4
        column: 14
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 5
    column: 0
  end_location:
    row: 5
    column: 20
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 5
        column: 0
      end_location:
        row: 5
        column: 20
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 6
    column: 0
  end_location:
    row: 6
    column: 22
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 6
        column: 0
      end_location:
        row: 6
        column: 22
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 7
    column: 0
  end_location:
    row: 7
    column: 30
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 7
        column: 0
      end_location:
        row: 7
        column: 30
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 8
    column: 0
  end_location:
    row: 14
    column: 1
  fix:
    patch:
      content: "b\"\"\"\nLorem\n\nIpsum\n\"\"\""
      location:
        row: 8
        column: 0
      end_location:
        row: 14
        column: 1
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 26
    column: 0
  end_location:
    row: 26
    column: 27
  fix:
    patch:
      content: ""
      location:
        row: 26
        column: 19
      end_location:
        row: 26
        column: 26
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 44
    column: 0
  end_location:
    row: 44
    column: 31
  fix:
    patch:
      content: ""
      location:
        row: 44
        column: 23
      end_location:
        row: 44
        column: 30
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 46
    column: 0
  end_location:
    row: 46
    column: 39
  fix:
    patch:
      content: ""
      location:
        row: 46
        column: 23
      end_location:
        row: 46
        column: 38
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 48
    column: 0
  end_location:
    row: 48
    column: 23
  fix:
    patch:
      content: "br\"fo\\o\""
      location:
        row: 48
        column: 0
      end_location:
        row: 48
        column: 23
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 49
    column: 0
  end_location:
    row: 49
    column: 22
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 49
        column: 0
      end_location:
        row: 49
        column: 22
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 50
    column: 0
  end_location:
    row: 50
    column: 23
  fix:
    patch:
      content: "bR\"fo\\o\""
      location:
        row: 50
        column: 0
      end_location:
        row: 50
        column: 23
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 51
    column: 0
  end_location:
    row: 51
    column: 22
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 51
        column: 0
      end_location:
        row: 51
        column: 22
    applied: false
 - kind: UnnecessaryEncodeUTF8
  location:
    row: 52
    column: 6
  end_location:
    row: 52
    column: 20
  fix:
    patch:
      content: "b\"foo\""
      location:
        row: 52
        column: 6
      end_location:
        row: 52
        column: 20
    applied: false