[refurb] Implement read-whole-file [FURB101] (#7682)

## Summary

This PR is part of a bigger effort of re-implementing `refurb` rules
#1348. It adds support for
[FURB101](https://github.com/dosisod/refurb/blob/master/refurb/checks/pathlib/read_text.py)

## Test Plan

I included a new test + checked that all other tests pass.
This commit is contained in:
Valeriy Savchenko 2023-10-20 17:22:38 +01:00 committed by GitHub
parent c8464c3a90
commit bc49492085
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 566 additions and 0 deletions

View file

@ -0,0 +1,126 @@
def foo():
...
def bar(x):
...
# Errors.
# FURB101
with open("file.txt") as f:
x = f.read()
# FURB101
with open("file.txt", "rb") as f:
x = f.read()
# FURB101
with open("file.txt", mode="rb") as f:
x = f.read()
# FURB101
with open("file.txt", encoding="utf8") as f:
x = f.read()
# FURB101
with open("file.txt", errors="ignore") as f:
x = f.read()
# FURB101
with open("file.txt", errors="ignore", mode="rb") as f:
x = f.read()
# FURB101
with open("file.txt", mode="r") as f: # noqa: FURB120
x = f.read()
# FURB101
with open(foo(), "rb") as f:
# The body of `with` is non-trivial, but the recommendation holds.
bar("pre")
bar(f.read())
bar("post")
print("Done")
# FURB101
with open("a.txt") as a, open("b.txt", "rb") as b:
x = a.read()
y = b.read()
# FURB101
with foo() as a, open("file.txt") as b, foo() as c:
# We have other things in here, multiple with items, but
# the user reads the whole file and that bit they can replace.
bar(a)
bar(bar(a + b.read()))
bar(c)
# Non-errors.
f2 = open("file2.txt")
with open("file.txt") as f:
x = f2.read()
with open("file.txt") as f:
# Path.read_text() does not support size, so ignore this
x = f.read(100)
# mode is dynamic
with open("file.txt", foo()) as f:
x = f.read()
# keyword mode is incorrect
with open("file.txt", mode="a+") as f:
x = f.read()
# enables line buffering, not supported in read_text()
with open("file.txt", buffering=1) as f:
x = f.read()
# force CRLF, not supported in read_text()
with open("file.txt", newline="\r\n") as f:
x = f.read()
# dont mistake "newline" for "mode"
with open("file.txt", newline="b") as f:
x = f.read()
# I guess we can possibly also report this case, but the question
# is why the user would put "r+" here in the first place.
with open("file.txt", "r+") as f:
x = f.read()
# Even though we read the whole file, we do other things.
with open("file.txt") as f:
x = f.read()
f.seek(0)
x += f.read(100)
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(*filename) as f:
x = f.read()
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open(**kwargs) as f:
x = f.read()
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", **kwargs) as f:
x = f.read()
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
with open("file.txt", mode="r", **kwargs) as f:
x = f.read()
# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, mode="r") as f:
x = f.read()
# This could error (but doesn't), since it can't contain unsupported arguments, like
# `buffering`.
with open(*filename, file="file.txt", mode="r") as f:
x = f.read()

View file

@ -1175,6 +1175,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
if checker.enabled(Rule::RedefinedLoopName) {
pylint::rules::redefined_loop_name(checker, stmt);
}
if checker.enabled(Rule::ReadWholeFile) {
refurb::rules::read_whole_file(checker, with_stmt);
}
}
Stmt::While(ast::StmtWhile { body, orelse, .. }) => {
if checker.enabled(Rule::FunctionUsesLoopVariable) {

View file

@ -921,6 +921,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Slots, "002") => (RuleGroup::Stable, rules::flake8_slots::rules::NoSlotsInNamedtupleSubclass),
// refurb
(Refurb, "101") => (RuleGroup::Preview, rules::refurb::rules::ReadWholeFile),
(Refurb, "105") => (RuleGroup::Preview, rules::refurb::rules::PrintEmptyString),
#[allow(deprecated)]
(Refurb, "113") => (RuleGroup::Nursery, rules::refurb::rules::RepeatedAppend),

View file

@ -14,6 +14,7 @@ mod tests {
use crate::test::test_path;
use crate::{assert_messages, settings};
#[test_case(Rule::ReadWholeFile, Path::new("FURB101.py"))]
#[test_case(Rule::RepeatedAppend, Path::new("FURB113.py"))]
#[test_case(Rule::DeleteFullSlice, Path::new("FURB131.py"))]
#[test_case(Rule::CheckAndRemoveFromSet, Path::new("FURB132.py"))]

View file

@ -2,6 +2,7 @@ pub(crate) use check_and_remove_from_set::*;
pub(crate) use delete_full_slice::*;
pub(crate) use implicit_cwd::*;
pub(crate) use print_empty_string::*;
pub(crate) use read_whole_file::*;
pub(crate) use reimplemented_starmap::*;
pub(crate) use repeated_append::*;
pub(crate) use single_item_membership_test::*;
@ -12,6 +13,7 @@ mod check_and_remove_from_set;
mod delete_full_slice;
mod implicit_cwd;
mod print_empty_string;
mod read_whole_file;
mod reimplemented_starmap;
mod repeated_append;
mod single_item_membership_test;

View file

@ -0,0 +1,336 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::visitor::{self, Visitor};
use ruff_python_ast::{self as ast, Expr};
use ruff_python_codegen::Generator;
use ruff_python_semantic::{BindingId, ResolvedReference, SemanticModel};
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
use crate::fix::snippet::SourceCodeSnippet;
/// ## What it does
/// Checks for uses of `open` and `read` that can be replaced by `pathlib`
/// methods, like `Path.read_text` and `Path.read_bytes`.
///
/// ## Why is this bad?
/// When reading the entire contents of a file into a variable, it's simpler
/// and more concise to use `pathlib` methods like `Path.read_text` and
/// `Path.read_bytes` instead of `open` and `read` calls via `with` statements.
///
/// ## Example
/// ```python
/// with open(filename) as f:
/// contents = f.read()
/// ```
///
/// Use instead:
/// ```python
/// from pathlib import Path
///
/// contents = Path(filename).read_text()
/// ```
///
/// ## References
/// - [Python documentation: `Path.read_bytes`](https://docs.python.org/3/library/pathlib.html#pathlib.Path.read_bytes)
/// - [Python documentation: `Path.read_text`](https://docs.python.org/3/library/pathlib.html#pathlib.Path.read_text)
#[violation]
pub struct ReadWholeFile {
filename: SourceCodeSnippet,
suggestion: SourceCodeSnippet,
}
impl Violation for ReadWholeFile {
#[derive_message_formats]
fn message(&self) -> String {
let filename = self.filename.truncated_display();
let suggestion = self.suggestion.truncated_display();
format!("`open` and `read` should be replaced by `Path({filename}).{suggestion}`")
}
}
/// FURB101
pub(crate) fn read_whole_file(checker: &mut Checker, with: &ast::StmtWith) {
// `async` check here is more of a precaution.
if with.is_async || !checker.semantic().is_builtin("open") {
return;
}
// First we go through all the items in the statement and find all `open` operations.
let candidates = find_file_opens(with, checker.semantic());
if candidates.is_empty() {
return;
}
// Then we need to match each `open` operation with exactly one `read` call.
let matches = {
let mut matcher = ReadMatcher::new(candidates);
visitor::walk_body(&mut matcher, &with.body);
matcher.into_matches()
};
// All the matched operations should be reported.
let diagnostics: Vec<Diagnostic> = matches
.iter()
.map(|open| {
Diagnostic::new(
ReadWholeFile {
filename: SourceCodeSnippet::from_str(&checker.generator().expr(open.filename)),
suggestion: make_suggestion(open, checker.generator()),
},
open.item.range(),
)
})
.collect();
checker.diagnostics.extend(diagnostics);
}
#[derive(Debug)]
enum ReadMode {
/// "r" -> `read_text`
Text,
/// "rb" -> `read_bytes`
Bytes,
}
/// A grab bag struct that joins together every piece of information we need to track
/// about a file open operation.
#[derive(Debug)]
struct FileOpen<'a> {
/// With item where the open happens, we use it for the reporting range.
item: &'a ast::WithItem,
/// Filename expression used as the first argument in `open`, we use it in the diagnostic message.
filename: &'a Expr,
/// The type of read to choose `read_text` or `read_bytes`.
mode: ReadMode,
/// Keywords that can be used in the new read call.
keywords: Vec<&'a ast::Keyword>,
/// We only check `open` operations whose file handles are used exactly once.
reference: &'a ResolvedReference,
}
impl<'a> FileOpen<'a> {
/// Determine whether an expression is a reference to the file handle, by comparing
/// their ranges. If two expressions have the same range, they must be the same expression.
fn is_ref(&self, expr: &Expr) -> bool {
expr.range() == self.reference.range()
}
}
/// Find and return all `open` operations in the given `with` statement.
fn find_file_opens<'a>(
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
) -> Vec<FileOpen<'a>> {
with.items
.iter()
.filter_map(|item| find_file_open(item, with, semantic))
.collect()
}
/// Find `open` operation in the given `with` item.
fn find_file_open<'a>(
item: &'a ast::WithItem,
with: &'a ast::StmtWith,
semantic: &'a SemanticModel<'a>,
) -> Option<FileOpen<'a>> {
// We want to match `open(...) as var`.
let ast::ExprCall {
func,
arguments: ast::Arguments { args, keywords, .. },
..
} = item.context_expr.as_call_expr()?;
if func.as_name_expr()?.id != "open" {
return None;
}
let var = item.optional_vars.as_deref()?.as_name_expr()?;
// Ignore calls with `*args` and `**kwargs`. In the exact case of `open(*filename, mode="r")`,
// it could be a match; but in all other cases, the call _could_ contain unsupported keyword
// arguments, like `buffering`.
if args.iter().any(Expr::is_starred_expr)
|| keywords.iter().any(|keyword| keyword.arg.is_none())
{
return None;
}
// Match positional arguments, get filename and read mode.
let (filename, pos_mode) = match_open_args(args)?;
// Match keyword arguments, get keyword arguments to forward and possibly read mode.
let (keywords, kw_mode) = match_open_keywords(keywords)?;
// `pos_mode` could've been assigned default value corresponding to "r", while
// keyword mode should override that.
let mode = kw_mode.unwrap_or(pos_mode);
// Now we need to find what is this variable bound to...
let scope = semantic.current_scope();
let bindings: Vec<BindingId> = scope.get_all(var.id.as_str()).collect();
let binding = bindings
.iter()
.map(|x| semantic.binding(*x))
// We might have many bindings with the same name, but we only care
// for the one we are looking at right now.
.find(|binding| binding.range() == var.range())?;
// Since many references can share the same binding, we can limit our attention span
// exclusively to the body of the current `with` statement.
let references: Vec<&ResolvedReference> = binding
.references
.iter()
.map(|id| semantic.reference(*id))
.filter(|reference| with.range().contains_range(reference.range()))
.collect();
// And even with all these restrictions, if the file handle gets used not exactly once,
// it doesn't fit the bill.
let [reference] = references.as_slice() else {
return None;
};
Some(FileOpen {
item,
filename,
mode,
keywords,
reference,
})
}
/// Match positional arguments. Return expression for the file name and read mode.
fn match_open_args(args: &[Expr]) -> Option<(&Expr, ReadMode)> {
match args {
[filename] => Some((filename, ReadMode::Text)),
[filename, mode_literal] => match_open_mode(mode_literal).map(|mode| (filename, mode)),
// The third positional argument is `buffering` and `read_text` doesn't support it.
_ => None,
}
}
/// Match keyword arguments. Return keyword arguments to forward and read mode.
fn match_open_keywords(
keywords: &[ast::Keyword],
) -> Option<(Vec<&ast::Keyword>, Option<ReadMode>)> {
let mut result: Vec<&ast::Keyword> = vec![];
let mut mode: Option<ReadMode> = None;
for keyword in keywords {
match keyword.arg.as_ref()?.as_str() {
"encoding" | "errors" => result.push(keyword),
// This might look bizarre, - why do we re-wrap this optional?
//
// The answer is quite simple, in the result of the current function
// mode being `None` is a possible and correct option meaning that there
// was NO "mode" keyword argument.
//
// The result of `match_open_mode` on the other hand is None
// in the cases when the mode is not compatible with `read_text`/`read_bytes`.
//
// So, here we return None from this whole function if the mode
// is incompatible.
"mode" => mode = Some(match_open_mode(&keyword.value)?),
// All other keywords cannot be directly forwarded.
_ => return None,
};
}
Some((result, mode))
}
/// Match open mode to see if it is supported.
fn match_open_mode(mode: &Expr) -> Option<ReadMode> {
let ast::StringConstant {
value,
implicit_concatenated: false,
..
} = mode.as_constant_expr()?.value.as_str()?
else {
return None;
};
match value.as_str() {
"r" => Some(ReadMode::Text),
"rb" => Some(ReadMode::Bytes),
_ => None,
}
}
/// AST visitor that matches `open` operations with the corresponding `read` calls.
#[derive(Debug)]
struct ReadMatcher<'a> {
candidates: Vec<FileOpen<'a>>,
matches: Vec<FileOpen<'a>>,
}
impl<'a> ReadMatcher<'a> {
fn new(candidates: Vec<FileOpen<'a>>) -> Self {
Self {
candidates,
matches: vec![],
}
}
fn into_matches(self) -> Vec<FileOpen<'a>> {
self.matches
}
}
impl<'a> Visitor<'a> for ReadMatcher<'a> {
fn visit_expr(&mut self, expr: &'a Expr) {
if let Some(read_from) = match_read_call(expr) {
if let Some(open) = self
.candidates
.iter()
.position(|open| open.is_ref(read_from))
{
self.matches.push(self.candidates.remove(open));
}
return;
}
visitor::walk_expr(self, expr);
}
}
/// Match `x.read()` expression and return expression `x` on success.
fn match_read_call(expr: &Expr) -> Option<&Expr> {
let call = expr.as_call_expr()?;
let attr = call.func.as_attribute_expr()?;
let method_name = &attr.attr;
if method_name != "read"
|| !attr.value.is_name_expr()
|| !call.arguments.args.is_empty()
|| !call.arguments.keywords.is_empty()
{
return None;
}
Some(attr.value.as_ref())
}
/// Construct the replacement suggestion call.
fn make_suggestion(open: &FileOpen<'_>, generator: Generator) -> SourceCodeSnippet {
let method_name = match open.mode {
ReadMode::Text => "read_text",
ReadMode::Bytes => "read_bytes",
};
let name = ast::ExprName {
id: method_name.to_string(),
ctx: ast::ExprContext::Load,
range: TextRange::default(),
};
let call = ast::ExprCall {
func: Box::new(name.into()),
arguments: ast::Arguments {
args: vec![],
keywords: open.keywords.iter().copied().cloned().collect(),
range: TextRange::default(),
},
range: TextRange::default(),
};
SourceCodeSnippet::from_str(&generator.expr(&call.into()))
}

View file

@ -0,0 +1,96 @@
---
source: crates/ruff_linter/src/rules/refurb/mod.rs
---
FURB101.py:12:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()`
|
11 | # FURB101
12 | with open("file.txt") as f:
| ^^^^^^^^^^^^^^^^^^^^^ FURB101
13 | x = f.read()
|
FURB101.py:16:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes()`
|
15 | # FURB101
16 | with open("file.txt", "rb") as f:
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
17 | x = f.read()
|
FURB101.py:20:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes()`
|
19 | # FURB101
20 | with open("file.txt", mode="rb") as f:
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
21 | x = f.read()
|
FURB101.py:24:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text(encoding="utf8")`
|
23 | # FURB101
24 | with open("file.txt", encoding="utf8") as f:
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
25 | x = f.read()
|
FURB101.py:28:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text(errors="ignore")`
|
27 | # FURB101
28 | with open("file.txt", errors="ignore") as f:
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
29 | x = f.read()
|
FURB101.py:32:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_bytes(errors="ignore")`
|
31 | # FURB101
32 | with open("file.txt", errors="ignore", mode="rb") as f:
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
33 | x = f.read()
|
FURB101.py:36:6: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()`
|
35 | # FURB101
36 | with open("file.txt", mode="r") as f: # noqa: FURB120
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
37 | x = f.read()
|
FURB101.py:40:6: FURB101 `open` and `read` should be replaced by `Path(foo()).read_bytes()`
|
39 | # FURB101
40 | with open(foo(), "rb") as f:
| ^^^^^^^^^^^^^^^^^^^^^^ FURB101
41 | # The body of `with` is non-trivial, but the recommendation holds.
42 | bar("pre")
|
FURB101.py:48:6: FURB101 `open` and `read` should be replaced by `Path("a.txt").read_text()`
|
47 | # FURB101
48 | with open("a.txt") as a, open("b.txt", "rb") as b:
| ^^^^^^^^^^^^^^^^^^ FURB101
49 | x = a.read()
50 | y = b.read()
|
FURB101.py:48:26: FURB101 `open` and `read` should be replaced by `Path("b.txt").read_bytes()`
|
47 | # FURB101
48 | with open("a.txt") as a, open("b.txt", "rb") as b:
| ^^^^^^^^^^^^^^^^^^^^^^^^ FURB101
49 | x = a.read()
50 | y = b.read()
|
FURB101.py:53:18: FURB101 `open` and `read` should be replaced by `Path("file.txt").read_text()`
|
52 | # FURB101
53 | with foo() as a, open("file.txt") as b, foo() as c:
| ^^^^^^^^^^^^^^^^^^^^^ FURB101
54 | # We have other things in here, multiple with items, but
55 | # the user reads the whole file and that bit they can replace.
|

1
ruff.schema.json generated
View file

@ -2780,6 +2780,7 @@
"FURB",
"FURB1",
"FURB10",
"FURB101",
"FURB105",
"FURB11",
"FURB113",