Handle io errors gracefully (#5611)

## Summary

It can happen that we can't read a file (a python file, a jupyter
notebook or pyproject.toml), which needs to be handled and handled
consistently for all file types. Instead of using `Err` or `error!`, we
emit E602 with the io error as message and continue. This PR makes sure
we handle all three cases consistently, emit E602.

I'm not convinced that it should be possible to disable io errors, but
we now handle the regular case consistently and at least print warning
consistently.

I went with `warn!` but i can change them all to `error!`, too.

It also checks the error case when a pyproject.toml is not readable. The
error message is not very helpful, but it's now a bit clearer that
actually ruff itself failed instead vs this being a diagnostic.

## Examples

This is how an Err of `run` looks now:


![image](890f7ab2-2309-4b6f-a4b3-67161947cc83)

With an unreadable file and `IOError` disabled:


![image](fd3d6959-fa23-4ddf-b2e5-8d6022df54b1)

(we lint zero files but count files before linting not during so we exit
0)

I'm not sure if it should (or if we should take a different path with
manual ExitStatus), but this currently also triggers when `files` is
empty:


![image](f7ede301-41b5-4743-97fd-49149f750337)

## Test Plan

Unix only: Create a temporary directory with files with permissions
`000` (not readable by the owner) and run on that directory. Since this
breaks the assumptions of most of the test code (single file, `ruff`
instead of `ruff_cli`), the test code is rather cumbersome and looks a
bit misplaced; i'm happy about suggestions to fit it in closer with the
other tests or streamline it in other ways. I added another test for
when the entire directory is not readable.
This commit is contained in:
konsti 2023-07-20 11:30:14 +02:00 committed by GitHub
parent 029fe05a5f
commit 92f471a666
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 250 additions and 37 deletions

4
Cargo.lock generated
View file

@ -1005,6 +1005,7 @@ dependencies = [
"globset",
"lazy_static",
"linked-hash-map",
"regex",
"similar",
"walkdir",
"yaml-rust",
@ -1945,6 +1946,7 @@ dependencies = [
"smallvec",
"strum",
"strum_macros",
"tempfile",
"test-case",
"thiserror",
"toml",
@ -2003,6 +2005,7 @@ dependencies = [
"filetime",
"glob",
"ignore",
"insta",
"itertools",
"itoa",
"log",
@ -2025,6 +2028,7 @@ dependencies = [
"shellexpand",
"similar",
"strum",
"tempfile",
"tikv-jemallocator",
"ureq",
"walkdir",

View file

@ -21,7 +21,7 @@ filetime = { version = "0.2.20" }
glob = { version = "0.3.1" }
globset = { version = "0.4.10" }
ignore = { version = "0.4.20" }
insta = { version = "1.31.0" }
insta = { version = "1.31.0", feature = ["filters", "glob"] }
is-macro = { version = "0.2.2" }
itertools = { version = "0.10.5" }
log = { version = "0.4.17" }

View file

@ -86,6 +86,7 @@ pretty_assertions = "1.3.0"
test-case = { workspace = true }
# Disable colored output in tests
colored = { workspace = true, features = ["no-color"] }
tempfile = "3.6.0"
[features]
default = []

View file

@ -1,4 +1,5 @@
use anyhow::Result;
use colored::Colorize;
use log::warn;
use pyproject_toml::{BuildSystem, Project};
use ruff_text_size::{TextRange, TextSize};
use serde::{Deserialize, Serialize};
@ -22,34 +23,38 @@ struct PyProjectToml {
project: Option<Project>,
}
pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Result<Vec<Message>> {
let mut messages = vec![];
let err = match toml::from_str::<PyProjectToml>(source_file.source_text()) {
Ok(_) => return Ok(messages),
Err(err) => err,
pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Vec<Message> {
let Some(err) = toml::from_str::<PyProjectToml>(source_file.source_text()).err() else {
return Vec::default();
};
let mut messages = Vec::new();
let range = match err.span() {
// This is bad but sometimes toml and/or serde just don't give us spans
// TODO(konstin,micha): https://github.com/astral-sh/ruff/issues/4571
None => TextRange::default(),
Some(range) => {
let Ok(end) = TextSize::try_from(range.end) else {
let message = format!(
"{} is larger than 4GB, but ruff assumes all files to be smaller",
source_file.name(),
);
if settings.rules.enabled(Rule::IOError) {
let diagnostic = Diagnostic::new(
IOError {
message: "pyproject.toml is larger than 4GB".to_string(),
},
TextRange::default(),
);
let diagnostic = Diagnostic::new(IOError { message }, TextRange::default());
messages.push(Message::from_diagnostic(
diagnostic,
source_file,
TextSize::default(),
));
} else {
warn!(
"{}{}{} {message}",
"Failed to lint ".bold(),
source_file.name().bold(),
":".bold()
);
}
return Ok(messages);
return messages;
};
TextRange::new(
// start <= end, so if end < 4GB follows start < 4GB
@ -69,5 +74,5 @@ pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Resu
));
}
Ok(messages)
messages
}

View file

@ -212,7 +212,7 @@ mod tests {
let messages = lint_pyproject_toml(
source_file,
&settings::Settings::for_rule(Rule::InvalidPyprojectToml),
)?;
);
assert_messages!(snapshot, messages);
Ok(())
}

View file

@ -293,7 +293,6 @@ impl Settings {
})
}
#[cfg(test)]
pub fn for_rule(rule_code: Rule) -> Self {
Self {
rules: RuleTable::from_iter([rule_code]),
@ -301,7 +300,6 @@ impl Settings {
}
}
#[cfg(test)]
pub fn for_rules(rules: impl IntoIterator<Item = Rule>) -> Self {
Self {
rules: RuleTable::from_iter(rules),

View file

@ -64,6 +64,8 @@ wild = { version = "2" }
[dev-dependencies]
assert_cmd = { version = "2.0.8" }
insta = { workspace = true, features = ["filters"] }
tempfile = "3.6.0"
ureq = { version = "2.6.2", features = [] }
[target.'cfg(target_os = "windows")'.dependencies]

View file

@ -50,7 +50,15 @@ pub fn main() -> ExitCode {
Err(err) => {
#[allow(clippy::print_stderr)]
{
eprintln!("{}{} {err:?}", "error".red().bold(), ":".bold());
// This communicates that this isn't a linter error but ruff itself hard-errored for
// some reason (e.g. failed to resolve the configuration)
eprintln!("{}", "ruff failed".red().bold());
// Currently we generally only see one error, but e.g. with io errors when resolving
// the configuration it is help to chain errors ("resolving configuration failed" ->
// "failed to read file: subdir/pyproject.toml")
for cause in err.chain() {
eprintln!(" {} {cause}", "Cause:".bold());
}
}
ExitStatus::Error.into()
}

View file

@ -125,7 +125,7 @@ pub(crate) fn run(
(Some(path.to_owned()), {
let mut error = e.to_string();
for cause in e.chain() {
write!(&mut error, "\n Caused by: {cause}").unwrap();
write!(&mut error, "\n Cause: {cause}").unwrap();
}
error
})
@ -143,30 +143,30 @@ pub(crate) fn run(
}
.unwrap_or_else(|(path, message)| {
if let Some(path) = &path {
error!(
"{}{}{} {message}",
"Failed to lint ".bold(),
fs::relativize_path(path).bold(),
":".bold()
);
let settings = resolver.resolve(path, pyproject_config);
if settings.rules.enabled(Rule::IOError) {
let file =
let dummy =
SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
Diagnostics::new(
vec![Message::from_diagnostic(
Diagnostic::new(IOError { message }, TextRange::default()),
file,
dummy,
TextSize::default(),
)],
ImportMap::default(),
)
} else {
warn!(
"{}{}{} {message}",
"Failed to lint ".bold(),
fs::relativize_path(path).bold(),
":".bold()
);
Diagnostics::default()
}
} else {
error!("{} {message}", "Encountered error:".bold());
warn!("{} {message}", "Encountered error:".bold());
Diagnostics::default()
}
})
@ -226,3 +226,85 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
}
}
}
#[cfg(test)]
#[cfg(unix)]
mod test {
use super::run;
use crate::args::Overrides;
use anyhow::Result;
use ruff::message::{Emitter, EmitterContext, TextEmitter};
use ruff::registry::Rule;
use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
use ruff::settings::{flags, AllSettings, CliSettings, Settings};
use rustc_hash::FxHashMap;
use std::fs;
use std::os::unix::fs::OpenOptionsExt;
use tempfile::TempDir;
/// We check that regular python files, pyproject.toml and jupyter notebooks all handle io
/// errors gracefully
#[test]
fn unreadable_files() -> Result<()> {
let path = "E902.py";
let rule_code = Rule::IOError;
// Create inaccessible files
let tempdir = TempDir::new()?;
let pyproject_toml = tempdir.path().join("pyproject.toml");
let python_file = tempdir.path().join("code.py");
let notebook = tempdir.path().join("notebook.ipynb");
for file in [&pyproject_toml, &python_file, &notebook] {
fs::OpenOptions::new()
.create(true)
.write(true)
.mode(0o000)
.open(file)?;
}
// Configure
let snapshot = format!("{}_{}", rule_code.noqa_code(), path);
let settings = AllSettings {
cli: CliSettings::default(),
// invalid pyproject.toml is not active by default
lib: Settings::for_rules(vec![rule_code, Rule::InvalidPyprojectToml]),
};
let pyproject_config =
PyprojectConfig::new(PyprojectDiscoveryStrategy::Fixed, settings, None);
// Run
let diagnostics = run(
// Notebooks are not included by default
&[tempdir.path().to_path_buf(), notebook],
&pyproject_config,
&Overrides::default(),
flags::Cache::Disabled,
flags::Noqa::Disabled,
flags::FixMode::Generate,
)
.unwrap();
let mut output = Vec::new();
TextEmitter::default()
.with_show_fix_status(true)
.emit(
&mut output,
&diagnostics.messages,
&EmitterContext::new(&FxHashMap::default()),
)
.unwrap();
let messages = String::from_utf8(output).unwrap();
insta::with_settings!({
omit_expression => true,
filters => vec![
// The tempdir is always different (and platform dependent)
(tempdir.path().to_str().unwrap(), "/home/ferris/project"),
]
}, {
insta::assert_snapshot!(snapshot, messages);
});
Ok(())
}
}

View file

@ -0,0 +1,7 @@
---
source: crates/ruff_cli/src/commands/run.rs
---
/home/ferris/project/code.py:1:1: E902 Permission denied (os error 13)
/home/ferris/project/notebook.ipynb:1:1: E902 Permission denied (os error 13)
/home/ferris/project/pyproject.toml:1:1: E902 Permission denied (os error 13)

View file

@ -8,19 +8,21 @@ use std::path::Path;
use anyhow::{anyhow, Result};
use colored::Colorize;
use log::{debug, error};
use ruff_text_size::TextSize;
use log::{debug, error, warn};
use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
use ruff::fs;
use ruff::jupyter::Notebook;
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError;
use ruff::message::Message;
use ruff::pyproject_toml::lint_pyproject_toml;
use ruff::registry::Rule;
use ruff::settings::{flags, AllSettings, Settings};
use ruff::source_kind::SourceKind;
use ruff::{fs, IOError};
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
@ -127,6 +129,31 @@ pub(crate) fn lint_path(
debug!("Checking: {}", path.display());
// In case of an io error we want to exit early
let io_error_diagnostics = |err: io::Error, path: &Path| -> Diagnostics {
if settings.lib.rules.enabled(Rule::IOError) {
let io_err = Diagnostic::new(
IOError {
message: err.to_string(),
},
TextRange::default(),
);
let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
Diagnostics::new(
vec![Message::from_diagnostic(io_err, dummy, TextSize::default())],
ImportMap::default(),
)
} else {
warn!(
"{}{}{} {err}",
"Failed to lint ".bold(),
fs::relativize_path(path).bold(),
":".bold()
);
Diagnostics::default()
}
};
// We have to special case this here since the Python tokenizer doesn't work with TOML.
if is_project_toml(path) {
let messages = if settings
@ -135,9 +162,14 @@ pub(crate) fn lint_path(
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_pyproject_toml())
{
let contents = std::fs::read_to_string(path)?;
let contents = match std::fs::read_to_string(path) {
Ok(contents) => contents,
Err(err) => {
return Ok(io_error_diagnostics(err, path));
}
};
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
lint_pyproject_toml(source_file, &settings.lib)?
lint_pyproject_toml(source_file, &settings.lib)
} else {
vec![]
};
@ -154,7 +186,14 @@ pub(crate) fn lint_path(
Err(diagnostic) => return Ok(*diagnostic),
}
} else {
SourceKind::Python(std::fs::read_to_string(path)?)
// This is tested by ruff_cli integration test `unreadable_file`
let contents = match std::fs::read_to_string(path) {
Ok(contents) => contents,
Err(err) => {
return Ok(io_error_diagnostics(err, path));
}
};
SourceKind::Python(contents)
};
let contents = source_kind.content().to_string();

View file

@ -1,13 +1,28 @@
#![cfg(not(target_family = "wasm"))]
#[cfg(unix)]
use std::fs;
#[cfg(unix)]
use std::fs::Permissions;
#[cfg(unix)]
use std::os::unix::fs::{OpenOptionsExt, PermissionsExt};
#[cfg(unix)]
use std::path::Path;
use std::str;
#[cfg(unix)]
use anyhow::Context;
use anyhow::Result;
use assert_cmd::Command;
#[cfg(unix)]
use clap::Parser;
#[cfg(unix)]
use path_absolutize::path_dedot;
#[cfg(unix)]
use tempfile::TempDir;
use ruff_cli::args::Args;
use ruff_cli::run;
const BIN_NAME: &str = "ruff";
@ -278,3 +293,55 @@ Found 1 error.
Ok(())
}
/// An unreadable pyproject.toml in non-isolated mode causes ruff to hard-error trying to build up
/// configuration globs
#[cfg(unix)]
#[test]
fn unreadable_pyproject_toml() -> Result<()> {
let tempdir = TempDir::new()?;
let pyproject_toml = tempdir.path().join("pyproject.toml");
// Create an empty file with 000 permissions
fs::OpenOptions::new()
.create(true)
.write(true)
.mode(0o000)
.open(pyproject_toml)?;
// Don't `--isolated` since the configuration discovery is where the error happens
let args = Args::parse_from(["", "check", "--no-cache", tempdir.path().to_str().unwrap()]);
let err = run(args).err().context("Unexpected success")?;
assert_eq!(
err.chain()
.map(std::string::ToString::to_string)
.collect::<Vec<_>>(),
vec!["Permission denied (os error 13)".to_string()],
);
Ok(())
}
/// Check the output with an unreadable directory
#[cfg(unix)]
#[test]
fn unreadable_dir() -> Result<()> {
// Create a directory with 000 (not iterable/readable) permissions
let tempdir = TempDir::new()?;
let unreadable_dir = tempdir.path().join("unreadable_dir");
fs::create_dir(&unreadable_dir)?;
fs::set_permissions(&unreadable_dir, Permissions::from_mode(0o000))?;
// We (currently?) have to use a subcommand to check exit status (currently wrong) and logging
// output
let mut cmd = Command::cargo_bin(BIN_NAME)?;
let output = cmd
.args(["--no-cache", "--isolated"])
.arg(&unreadable_dir)
.assert()
// TODO(konstin): This should be a failure, but we currently can't track that
.success();
assert_eq!(
str::from_utf8(&output.get_output().stderr)?,
"warning: Encountered error: Permission denied (os error 13)\n"
);
Ok(())
}

View file

@ -429,7 +429,7 @@ fn main() -> ExitCode {
if let Err(e) = run() {
eprintln!("💥 Minimizer failed");
for cause in e.chain() {
eprintln!(" Caused by: {cause}");
eprintln!(" Cause: {cause}");
}
ExitCode::FAILURE
} else {