mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-30 22:01:18 +00:00
[pylint
] Detect pathlib.Path.open
calls in unspecified-encoding
(PLW1514
) (#11288)
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary Resolves #11263 Detect `pathlib.Path.open` calls which do not specify a file encoding. ## Test Plan Test cases added to fixture. --------- Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
This commit is contained in:
parent
c80c1712f0
commit
dd42961dd9
4 changed files with 322 additions and 49 deletions
|
@ -69,3 +69,28 @@ open(
|
|||
(("test.txt")),
|
||||
# comment
|
||||
)
|
||||
|
||||
# pathlib
|
||||
from pathlib import Path
|
||||
|
||||
# Errors.
|
||||
Path("foo.txt").open()
|
||||
Path("foo.txt").open("w")
|
||||
text = Path("foo.txt").read_text()
|
||||
Path("foo.txt").write_text(text)
|
||||
|
||||
# Non-errors.
|
||||
Path("foo.txt").open(encoding="utf-8")
|
||||
Path("foo.txt").open("wb")
|
||||
Path("foo.txt").open(*args)
|
||||
Path("foo.txt").open(**kwargs)
|
||||
text = Path("foo.txt").read_text(encoding="utf-8")
|
||||
text = Path("foo.txt").read_text(*args)
|
||||
text = Path("foo.txt").read_text(**kwargs)
|
||||
Path("foo.txt").write_text(text, encoding="utf-8")
|
||||
Path("foo.txt").write_text(text, *args)
|
||||
Path("foo.txt").write_text(text, **kwargs)
|
||||
|
||||
# Violation but not detectable
|
||||
x = Path("foo.txt")
|
||||
x.open()
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use ast::StringLiteralFlags;
|
||||
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast as ast;
|
||||
use ruff_python_ast::name::QualifiedName;
|
||||
use ruff_python_ast::Expr;
|
||||
use ruff_python_ast::{self as ast, Expr, StringLiteralFlags};
|
||||
use ruff_python_semantic::SemanticModel;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
|
@ -43,7 +44,7 @@ use crate::settings::types::PythonVersion;
|
|||
#[violation]
|
||||
pub struct UnspecifiedEncoding {
|
||||
function_name: String,
|
||||
mode: Mode,
|
||||
mode: ModeArgument,
|
||||
}
|
||||
|
||||
impl AlwaysFixableViolation for UnspecifiedEncoding {
|
||||
|
@ -55,10 +56,10 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
|
|||
} = self;
|
||||
|
||||
match mode {
|
||||
Mode::Supported => {
|
||||
ModeArgument::Supported => {
|
||||
format!("`{function_name}` in text mode without explicit `encoding` argument")
|
||||
}
|
||||
Mode::Unsupported => {
|
||||
ModeArgument::Unsupported => {
|
||||
format!("`{function_name}` without explicit `encoding` argument")
|
||||
}
|
||||
}
|
||||
|
@ -71,11 +72,9 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
|
|||
|
||||
/// PLW1514
|
||||
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
|
||||
let Some((function_name, mode)) = checker
|
||||
.semantic()
|
||||
.resolve_qualified_name(&call.func)
|
||||
.filter(|qualified_name| is_violation(call, qualified_name))
|
||||
.map(|qualified_name| (qualified_name.to_string(), Mode::from(&qualified_name)))
|
||||
let Some((function_name, mode)) = Callee::try_from_call_expression(call, checker.semantic())
|
||||
.filter(|segments| is_violation(call, segments))
|
||||
.map(|segments| (segments.to_string(), segments.mode_argument()))
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
@ -97,6 +96,68 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall)
|
|||
checker.diagnostics.push(diagnostic);
|
||||
}
|
||||
|
||||
/// Represents the path of the function or method being called.
|
||||
enum Callee<'a> {
|
||||
/// Fully-qualified symbol name of the callee.
|
||||
Qualified(QualifiedName<'a>),
|
||||
/// Attribute value for the `pathlib.Path(...)` call e.g., `open` in
|
||||
/// `pathlib.Path(...).open(...)`.
|
||||
Pathlib(&'a str),
|
||||
}
|
||||
|
||||
impl<'a> Callee<'a> {
|
||||
fn try_from_call_expression(
|
||||
call: &'a ast::ExprCall,
|
||||
semantic: &'a SemanticModel,
|
||||
) -> Option<Self> {
|
||||
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = call.func.as_ref() {
|
||||
// Check for `pathlib.Path(...).open(...)` or equivalent
|
||||
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
|
||||
if semantic
|
||||
.resolve_qualified_name(func)
|
||||
.is_some_and(|qualified_name| {
|
||||
matches!(qualified_name.segments(), ["pathlib", "Path"])
|
||||
})
|
||||
{
|
||||
return Some(Callee::Pathlib(attr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
|
||||
return Some(Callee::Qualified(qualified_name));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn mode_argument(&self) -> ModeArgument {
|
||||
match self {
|
||||
Callee::Qualified(qualified_name) => match qualified_name.segments() {
|
||||
["" | "codecs" | "_io", "open"] => ModeArgument::Supported,
|
||||
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
|
||||
ModeArgument::Supported
|
||||
}
|
||||
["io" | "_io", "TextIOWrapper"] => ModeArgument::Unsupported,
|
||||
_ => ModeArgument::Unsupported,
|
||||
},
|
||||
Callee::Pathlib(attr) => match *attr {
|
||||
"open" => ModeArgument::Supported,
|
||||
_ => ModeArgument::Unsupported,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Callee<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Callee::Qualified(qualified_name) => f.write_str(&qualified_name.to_string()),
|
||||
Callee::Pathlib(attr) => f.write_str(&format!("pathlib.Path(...).{attr}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate an [`Edit`] for Python 3.10 and later.
|
||||
fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
|
||||
Fix::unsafe_edit(add_argument(
|
||||
|
@ -146,7 +207,7 @@ fn is_binary_mode(expr: &Expr) -> Option<bool> {
|
|||
}
|
||||
|
||||
/// Returns `true` if the given call lacks an explicit `encoding`.
|
||||
fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
|
||||
fn is_violation(call: &ast::ExprCall, qualified_name: &Callee) -> bool {
|
||||
// If we have something like `*args`, which might contain the encoding argument, abort.
|
||||
if call.arguments.args.iter().any(Expr::is_starred_expr) {
|
||||
return false;
|
||||
|
@ -160,54 +221,61 @@ fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
|
|||
{
|
||||
return false;
|
||||
}
|
||||
match qualified_name.segments() {
|
||||
["" | "codecs" | "_io", "open"] => {
|
||||
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
|
||||
if is_binary_mode(mode_arg).unwrap_or(true) {
|
||||
// binary mode or unknown mode is no violation
|
||||
match qualified_name {
|
||||
Callee::Qualified(qualified_name) => match qualified_name.segments() {
|
||||
["" | "codecs" | "_io", "open"] => {
|
||||
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
|
||||
if is_binary_mode(mode_arg).unwrap_or(true) {
|
||||
// binary mode or unknown mode is no violation
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// else mode not specified, defaults to text mode
|
||||
call.arguments.find_argument("encoding", 3).is_none()
|
||||
}
|
||||
["tempfile", tempfile_class @ ("TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile")] =>
|
||||
{
|
||||
let mode_pos = usize::from(*tempfile_class == "SpooledTemporaryFile");
|
||||
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
|
||||
if is_binary_mode(mode_arg).unwrap_or(true) {
|
||||
// binary mode or unknown mode is no violation
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// defaults to binary mode
|
||||
return false;
|
||||
}
|
||||
call.arguments
|
||||
.find_argument("encoding", mode_pos + 2)
|
||||
.is_none()
|
||||
}
|
||||
// else mode not specified, defaults to text mode
|
||||
call.arguments.find_argument("encoding", 3).is_none()
|
||||
}
|
||||
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
|
||||
let mode_pos = usize::from(qualified_name.segments()[1] == "SpooledTemporaryFile");
|
||||
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
|
||||
if is_binary_mode(mode_arg).unwrap_or(true) {
|
||||
// binary mode or unknown mode is no violation
|
||||
return false;
|
||||
["io" | "_io", "TextIOWrapper"] => {
|
||||
call.arguments.find_argument("encoding", 1).is_none()
|
||||
}
|
||||
_ => false,
|
||||
},
|
||||
Callee::Pathlib(attr) => match *attr {
|
||||
"open" => {
|
||||
if let Some(mode_arg) = call.arguments.find_argument("mode", 0) {
|
||||
if is_binary_mode(mode_arg).unwrap_or(true) {
|
||||
// binary mode or unknown mode is no violation
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// defaults to binary mode
|
||||
return false;
|
||||
// else mode not specified, defaults to text mode
|
||||
call.arguments.find_argument("encoding", 2).is_none()
|
||||
}
|
||||
call.arguments
|
||||
.find_argument("encoding", mode_pos + 2)
|
||||
.is_none()
|
||||
}
|
||||
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
|
||||
_ => false,
|
||||
"read_text" => call.arguments.find_argument("encoding", 0).is_none(),
|
||||
"write_text" => call.arguments.find_argument("encoding", 1).is_none(),
|
||||
_ => false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum Mode {
|
||||
enum ModeArgument {
|
||||
/// The call supports a `mode` argument.
|
||||
Supported,
|
||||
/// The call does not support a `mode` argument.
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
impl From<&QualifiedName<'_>> for Mode {
|
||||
fn from(value: &QualifiedName<'_>) -> Self {
|
||||
match value.segments() {
|
||||
["" | "codecs" | "_io", "open"] => Mode::Supported,
|
||||
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
|
||||
Mode::Supported
|
||||
}
|
||||
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
|
||||
_ => Mode::Unsupported,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -352,5 +352,86 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
|
|||
69 |+ (("test.txt")), encoding="locale",
|
||||
70 70 | # comment
|
||||
71 71 | )
|
||||
72 72 |
|
||||
|
||||
unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
||||
|
|
||||
76 | # Errors.
|
||||
77 | Path("foo.txt").open()
|
||||
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
74 74 | from pathlib import Path
|
||||
75 75 |
|
||||
76 76 | # Errors.
|
||||
77 |-Path("foo.txt").open()
|
||||
77 |+Path("foo.txt").open(encoding="locale")
|
||||
78 78 | Path("foo.txt").open("w")
|
||||
79 79 | text = Path("foo.txt").read_text()
|
||||
80 80 | Path("foo.txt").write_text(text)
|
||||
|
||||
unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
||||
|
|
||||
76 | # Errors.
|
||||
77 | Path("foo.txt").open()
|
||||
78 | Path("foo.txt").open("w")
|
||||
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
75 75 |
|
||||
76 76 | # Errors.
|
||||
77 77 | Path("foo.txt").open()
|
||||
78 |-Path("foo.txt").open("w")
|
||||
78 |+Path("foo.txt").open("w", encoding="locale")
|
||||
79 79 | text = Path("foo.txt").read_text()
|
||||
80 80 | Path("foo.txt").write_text(text)
|
||||
81 81 |
|
||||
|
||||
unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
|
||||
|
|
||||
77 | Path("foo.txt").open()
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
76 76 | # Errors.
|
||||
77 77 | Path("foo.txt").open()
|
||||
78 78 | Path("foo.txt").open("w")
|
||||
79 |-text = Path("foo.txt").read_text()
|
||||
79 |+text = Path("foo.txt").read_text(encoding="locale")
|
||||
80 80 | Path("foo.txt").write_text(text)
|
||||
81 81 |
|
||||
82 82 | # Non-errors.
|
||||
|
||||
unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
|
||||
|
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
81 |
|
||||
82 | # Non-errors.
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
77 77 | Path("foo.txt").open()
|
||||
78 78 | Path("foo.txt").open("w")
|
||||
79 79 | text = Path("foo.txt").read_text()
|
||||
80 |-Path("foo.txt").write_text(text)
|
||||
80 |+Path("foo.txt").write_text(text, encoding="locale")
|
||||
81 81 |
|
||||
82 82 | # Non-errors.
|
||||
83 83 | Path("foo.txt").open(encoding="utf-8")
|
||||
|
|
|
@ -473,5 +473,104 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
|
|||
70 |+ (("test.txt")), encoding=locale.getpreferredencoding(False),
|
||||
70 71 | # comment
|
||||
71 72 | )
|
||||
72 73 |
|
||||
|
||||
unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
||||
|
|
||||
76 | # Errors.
|
||||
77 | Path("foo.txt").open()
|
||||
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
72 72 |
|
||||
73 73 | # pathlib
|
||||
74 74 | from pathlib import Path
|
||||
75 |+import locale
|
||||
75 76 |
|
||||
76 77 | # Errors.
|
||||
77 |-Path("foo.txt").open()
|
||||
78 |+Path("foo.txt").open(encoding=locale.getpreferredencoding(False))
|
||||
78 79 | Path("foo.txt").open("w")
|
||||
79 80 | text = Path("foo.txt").read_text()
|
||||
80 81 | Path("foo.txt").write_text(text)
|
||||
|
||||
unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
||||
|
|
||||
76 | # Errors.
|
||||
77 | Path("foo.txt").open()
|
||||
78 | Path("foo.txt").open("w")
|
||||
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
72 72 |
|
||||
73 73 | # pathlib
|
||||
74 74 | from pathlib import Path
|
||||
75 |+import locale
|
||||
75 76 |
|
||||
76 77 | # Errors.
|
||||
77 78 | Path("foo.txt").open()
|
||||
78 |-Path("foo.txt").open("w")
|
||||
79 |+Path("foo.txt").open("w", encoding=locale.getpreferredencoding(False))
|
||||
79 80 | text = Path("foo.txt").read_text()
|
||||
80 81 | Path("foo.txt").write_text(text)
|
||||
81 82 |
|
||||
|
||||
unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
|
||||
|
|
||||
77 | Path("foo.txt").open()
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
72 72 |
|
||||
73 73 | # pathlib
|
||||
74 74 | from pathlib import Path
|
||||
75 |+import locale
|
||||
75 76 |
|
||||
76 77 | # Errors.
|
||||
77 78 | Path("foo.txt").open()
|
||||
78 79 | Path("foo.txt").open("w")
|
||||
79 |-text = Path("foo.txt").read_text()
|
||||
80 |+text = Path("foo.txt").read_text(encoding=locale.getpreferredencoding(False))
|
||||
80 81 | Path("foo.txt").write_text(text)
|
||||
81 82 |
|
||||
82 83 | # Non-errors.
|
||||
|
||||
unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
|
||||
|
|
||||
78 | Path("foo.txt").open("w")
|
||||
79 | text = Path("foo.txt").read_text()
|
||||
80 | Path("foo.txt").write_text(text)
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
|
||||
81 |
|
||||
82 | # Non-errors.
|
||||
|
|
||||
= help: Add explicit `encoding` argument
|
||||
|
||||
ℹ Unsafe fix
|
||||
72 72 |
|
||||
73 73 | # pathlib
|
||||
74 74 | from pathlib import Path
|
||||
75 |+import locale
|
||||
75 76 |
|
||||
76 77 | # Errors.
|
||||
77 78 | Path("foo.txt").open()
|
||||
78 79 | Path("foo.txt").open("w")
|
||||
79 80 | text = Path("foo.txt").read_text()
|
||||
80 |-Path("foo.txt").write_text(text)
|
||||
81 |+Path("foo.txt").write_text(text, encoding=locale.getpreferredencoding(False))
|
||||
81 82 |
|
||||
82 83 | # Non-errors.
|
||||
83 84 | Path("foo.txt").open(encoding="utf-8")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue