[pylint] Detect pathlib.Path.open calls in unspecified-encoding (PLW1514) (#11288)

<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Resolves #11263

Detect `pathlib.Path.open` calls which do not specify a file encoding.

## Test Plan

Test cases added to fixture.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
This commit is contained in:
Auguste Lalande 2024-05-09 08:36:20 -04:00 committed by GitHub
parent c80c1712f0
commit dd42961dd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 322 additions and 49 deletions

View file

@ -69,3 +69,28 @@ open(
(("test.txt")),
# comment
)
# pathlib
from pathlib import Path
# Errors.
Path("foo.txt").open()
Path("foo.txt").open("w")
text = Path("foo.txt").read_text()
Path("foo.txt").write_text(text)
# Non-errors.
Path("foo.txt").open(encoding="utf-8")
Path("foo.txt").open("wb")
Path("foo.txt").open(*args)
Path("foo.txt").open(**kwargs)
text = Path("foo.txt").read_text(encoding="utf-8")
text = Path("foo.txt").read_text(*args)
text = Path("foo.txt").read_text(**kwargs)
Path("foo.txt").write_text(text, encoding="utf-8")
Path("foo.txt").write_text(text, *args)
Path("foo.txt").write_text(text, **kwargs)
# Violation but not detectable
x = Path("foo.txt")
x.open()

View file

@ -1,11 +1,12 @@
use std::fmt::{Display, Formatter};
use anyhow::Result;
use ast::StringLiteralFlags;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_ast::name::QualifiedName;
use ruff_python_ast::Expr;
use ruff_python_ast::{self as ast, Expr, StringLiteralFlags};
use ruff_python_semantic::SemanticModel;
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
@ -43,7 +44,7 @@ use crate::settings::types::PythonVersion;
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
mode: Mode,
mode: ModeArgument,
}
impl AlwaysFixableViolation for UnspecifiedEncoding {
@ -55,10 +56,10 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
} = self;
match mode {
Mode::Supported => {
ModeArgument::Supported => {
format!("`{function_name}` in text mode without explicit `encoding` argument")
}
Mode::Unsupported => {
ModeArgument::Unsupported => {
format!("`{function_name}` without explicit `encoding` argument")
}
}
@ -71,11 +72,9 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some((function_name, mode)) = checker
.semantic()
.resolve_qualified_name(&call.func)
.filter(|qualified_name| is_violation(call, qualified_name))
.map(|qualified_name| (qualified_name.to_string(), Mode::from(&qualified_name)))
let Some((function_name, mode)) = Callee::try_from_call_expression(call, checker.semantic())
.filter(|segments| is_violation(call, segments))
.map(|segments| (segments.to_string(), segments.mode_argument()))
else {
return;
};
@ -97,6 +96,68 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall)
checker.diagnostics.push(diagnostic);
}
/// Represents the path of the function or method being called.
enum Callee<'a> {
/// Fully-qualified symbol name of the callee.
Qualified(QualifiedName<'a>),
/// Attribute value for the `pathlib.Path(...)` call e.g., `open` in
/// `pathlib.Path(...).open(...)`.
Pathlib(&'a str),
}
impl<'a> Callee<'a> {
fn try_from_call_expression(
call: &'a ast::ExprCall,
semantic: &'a SemanticModel,
) -> Option<Self> {
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = call.func.as_ref() {
// Check for `pathlib.Path(...).open(...)` or equivalent
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
if semantic
.resolve_qualified_name(func)
.is_some_and(|qualified_name| {
matches!(qualified_name.segments(), ["pathlib", "Path"])
})
{
return Some(Callee::Pathlib(attr));
}
}
}
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
return Some(Callee::Qualified(qualified_name));
}
None
}
fn mode_argument(&self) -> ModeArgument {
match self {
Callee::Qualified(qualified_name) => match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => ModeArgument::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
ModeArgument::Supported
}
["io" | "_io", "TextIOWrapper"] => ModeArgument::Unsupported,
_ => ModeArgument::Unsupported,
},
Callee::Pathlib(attr) => match *attr {
"open" => ModeArgument::Supported,
_ => ModeArgument::Unsupported,
},
}
}
}
impl Display for Callee<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Callee::Qualified(qualified_name) => f.write_str(&qualified_name.to_string()),
Callee::Pathlib(attr) => f.write_str(&format!("pathlib.Path(...).{attr}")),
}
}
}
/// Generate an [`Edit`] for Python 3.10 and later.
fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
Fix::unsafe_edit(add_argument(
@ -146,7 +207,7 @@ fn is_binary_mode(expr: &Expr) -> Option<bool> {
}
/// Returns `true` if the given call lacks an explicit `encoding`.
fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
fn is_violation(call: &ast::ExprCall, qualified_name: &Callee) -> bool {
// If we have something like `*args`, which might contain the encoding argument, abort.
if call.arguments.args.iter().any(Expr::is_starred_expr) {
return false;
@ -160,54 +221,61 @@ fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
{
return false;
}
match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
match qualified_name {
Callee::Qualified(qualified_name) => match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["tempfile", tempfile_class @ ("TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile")] =>
{
let mode_pos = usize::from(*tempfile_class == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
} else {
// defaults to binary mode
return false;
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = usize::from(qualified_name.segments()[1] == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
["io" | "_io", "TextIOWrapper"] => {
call.arguments.find_argument("encoding", 1).is_none()
}
_ => false,
},
Callee::Pathlib(attr) => match *attr {
"open" => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 0) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
} else {
// defaults to binary mode
return false;
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 2).is_none()
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
"read_text" => call.arguments.find_argument("encoding", 0).is_none(),
"write_text" => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
},
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
enum ModeArgument {
/// The call supports a `mode` argument.
Supported,
/// The call does not support a `mode` argument.
Unsupported,
}
impl From<&QualifiedName<'_>> for Mode {
fn from(value: &QualifiedName<'_>) -> Self {
match value.segments() {
["" | "codecs" | "_io", "open"] => Mode::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
Mode::Supported
}
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
_ => Mode::Unsupported,
}
}
}

View file

@ -352,5 +352,86 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
69 |+ (("test.txt")), encoding="locale",
70 70 | # comment
71 71 | )
72 72 |
unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
|
= help: Add explicit `encoding` argument
Unsafe fix
74 74 | from pathlib import Path
75 75 |
76 76 | # Errors.
77 |-Path("foo.txt").open()
77 |+Path("foo.txt").open(encoding="locale")
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)
unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument
Unsafe fix
75 75 |
76 76 | # Errors.
77 77 | Path("foo.txt").open()
78 |-Path("foo.txt").open("w")
78 |+Path("foo.txt").open("w", encoding="locale")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)
81 81 |
unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
|
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument
Unsafe fix
76 76 | # Errors.
77 77 | Path("foo.txt").open()
78 78 | Path("foo.txt").open("w")
79 |-text = Path("foo.txt").read_text()
79 |+text = Path("foo.txt").read_text(encoding="locale")
80 80 | Path("foo.txt").write_text(text)
81 81 |
82 82 | # Non-errors.
unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
|
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
81 |
82 | # Non-errors.
|
= help: Add explicit `encoding` argument
Unsafe fix
77 77 | Path("foo.txt").open()
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 |-Path("foo.txt").write_text(text)
80 |+Path("foo.txt").write_text(text, encoding="locale")
81 81 |
82 82 | # Non-errors.
83 83 | Path("foo.txt").open(encoding="utf-8")

View file

@ -473,5 +473,104 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
70 |+ (("test.txt")), encoding=locale.getpreferredencoding(False),
70 71 | # comment
71 72 | )
72 73 |
unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
|
= help: Add explicit `encoding` argument
Unsafe fix
72 72 |
73 73 | # pathlib
74 74 | from pathlib import Path
75 |+import locale
75 76 |
76 77 | # Errors.
77 |-Path("foo.txt").open()
78 |+Path("foo.txt").open(encoding=locale.getpreferredencoding(False))
78 79 | Path("foo.txt").open("w")
79 80 | text = Path("foo.txt").read_text()
80 81 | Path("foo.txt").write_text(text)
unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument
Unsafe fix
72 72 |
73 73 | # pathlib
74 74 | from pathlib import Path
75 |+import locale
75 76 |
76 77 | # Errors.
77 78 | Path("foo.txt").open()
78 |-Path("foo.txt").open("w")
79 |+Path("foo.txt").open("w", encoding=locale.getpreferredencoding(False))
79 80 | text = Path("foo.txt").read_text()
80 81 | Path("foo.txt").write_text(text)
81 82 |
unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
|
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument
Unsafe fix
72 72 |
73 73 | # pathlib
74 74 | from pathlib import Path
75 |+import locale
75 76 |
76 77 | # Errors.
77 78 | Path("foo.txt").open()
78 79 | Path("foo.txt").open("w")
79 |-text = Path("foo.txt").read_text()
80 |+text = Path("foo.txt").read_text(encoding=locale.getpreferredencoding(False))
80 81 | Path("foo.txt").write_text(text)
81 82 |
82 83 | # Non-errors.
unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
|
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
81 |
82 | # Non-errors.
|
= help: Add explicit `encoding` argument
Unsafe fix
72 72 |
73 73 | # pathlib
74 74 | from pathlib import Path
75 |+import locale
75 76 |
76 77 | # Errors.
77 78 | Path("foo.txt").open()
78 79 | Path("foo.txt").open("w")
79 80 | text = Path("foo.txt").read_text()
80 |-Path("foo.txt").write_text(text)
81 |+Path("foo.txt").write_text(text, encoding=locale.getpreferredencoding(False))
81 82 |
82 83 | # Non-errors.
83 84 | Path("foo.txt").open(encoding="utf-8")