Handle io errors gracefully (#5611)

## Summary It can happen that we can't read a file (a python file, a jupyter notebook or pyproject.toml), which needs to be handled and handled consistently for all file types. Instead of using `Err` or `error!`, we emit E602 with the io error as message and continue. This PR makes sure we handle all three cases consistently, emit E602. I'm not convinced that it should be possible to disable io errors, but we now handle the regular case consistently and at least print warning consistently. I went with `warn!` but i can change them all to `error!`, too. It also checks the error case when a pyproject.toml is not readable. The error message is not very helpful, but it's now a bit clearer that actually ruff itself failed instead vs this being a diagnostic. ## Examples This is how an Err of `run` looks now: ![image](890f7ab2-2309-4b6f-a4b3-67161947cc83) With an unreadable file and `IOError` disabled: ![image](fd3d6959-fa23-4ddf-b2e5-8d6022df54b1) (we lint zero files but count files before linting not during so we exit 0) I'm not sure if it should (or if we should take a different path with manual ExitStatus), but this currently also triggers when `files` is empty: ![image](f7ede301-41b5-4743-97fd-49149f750337) ## Test Plan Unix only: Create a temporary directory with files with permissions `000` (not readable by the owner) and run on that directory. Since this breaks the assumptions of most of the test code (single file, `ruff` instead of `ruff_cli`), the test code is rather cumbersome and looks a bit misplaced; i'm happy about suggestions to fit it in closer with the other tests or streamline it in other ways. I added another test for when the entire directory is not readable.
2025-07-24 05:25:17 +00:00 · 2023-07-20 11:30:14 +02:00 · 2023-07-20 11:30:14 +02:00 · 92f471a666
commit 92f471a666
parent 029fe05a5f
13 changed files with 250 additions and 37 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1005,6 +1005,7 @@ dependencies = [
 "globset",
 "lazy_static",
 "linked-hash-map",
+ "regex",
 "similar",
 "walkdir",
 "yaml-rust",
@ -1945,6 +1946,7 @@ dependencies = [
 "smallvec",
 "strum",
 "strum_macros",
+ "tempfile",
 "test-case",
 "thiserror",
 "toml",
@ -2003,6 +2005,7 @@ dependencies = [
 "filetime",
 "glob",
 "ignore",
+ "insta",
 "itertools",
 "itoa",
 "log",
@ -2025,6 +2028,7 @@ dependencies = [
 "shellexpand",
 "similar",
 "strum",
+ "tempfile",
 "tikv-jemallocator",
 "ureq",
 "walkdir",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -21,7 +21,7 @@ filetime = { version = "0.2.20" }
 glob = { version = "0.3.1" }
 globset = { version = "0.4.10" }
 ignore = { version = "0.4.20" }
-insta = { version = "1.31.0" }
+insta = { version = "1.31.0", feature = ["filters", "glob"] }
 is-macro = { version = "0.2.2" }
 itertools = { version = "0.10.5" }
 log = { version = "0.4.17" }
--- a/crates/ruff/Cargo.toml
+++ b/crates/ruff/Cargo.toml
@ -86,6 +86,7 @@ pretty_assertions = "1.3.0"
 test-case = { workspace = true }
 # Disable colored output in tests
 colored = { workspace = true, features = ["no-color"] }
+tempfile = "3.6.0"

 [features]
 default = []
--- a/crates/ruff/src/pyproject_toml.rs
+++ b/crates/ruff/src/pyproject_toml.rs
@ -1,4 +1,5 @@
-use anyhow::Result;
+use colored::Colorize;
+use log::warn;
 use pyproject_toml::{BuildSystem, Project};
 use ruff_text_size::{TextRange, TextSize};
 use serde::{Deserialize, Serialize};
@ -22,34 +23,38 @@ struct PyProjectToml {
    project: Option<Project>,
 }

-pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Result<Vec<Message>> {
-    let mut messages = vec![];
-
-    let err = match toml::from_str::<PyProjectToml>(source_file.source_text()) {
-        Ok(_) => return Ok(messages),
-        Err(err) => err,
+pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Vec<Message> {
+    let Some(err) = toml::from_str::<PyProjectToml>(source_file.source_text()).err() else {
+        return Vec::default();
    };

+    let mut messages = Vec::new();
    let range = match err.span() {
        // This is bad but sometimes toml and/or serde just don't give us spans
        // TODO(konstin,micha): https://github.com/astral-sh/ruff/issues/4571
        None => TextRange::default(),
        Some(range) => {
            let Ok(end) = TextSize::try_from(range.end) else {
+                let message = format!(
+                    "{} is larger than 4GB, but ruff assumes all files to be smaller",
+                    source_file.name(),
+                );
                if settings.rules.enabled(Rule::IOError) {
-                    let diagnostic = Diagnostic::new(
-                        IOError {
-                            message: "pyproject.toml is larger than 4GB".to_string(),
-                        },
-                        TextRange::default(),
-                    );
+                    let diagnostic = Diagnostic::new(IOError { message }, TextRange::default());
                    messages.push(Message::from_diagnostic(
                        diagnostic,
                        source_file,
                        TextSize::default(),
                    ));
+                } else {
+                    warn!(
+                        "{}{}{} {message}",
+                        "Failed to lint ".bold(),
+                        source_file.name().bold(),
+                        ":".bold()
+                    );
                }
-                return Ok(messages);
+                return messages;
            };
            TextRange::new(
                // start <= end, so if end < 4GB follows start < 4GB
@ -69,5 +74,5 @@ pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Resu
        ));
    }

-    Ok(messages)
+    messages
 }
--- a/crates/ruff/src/rules/ruff/mod.rs
+++ b/crates/ruff/src/rules/ruff/mod.rs
@ -212,7 +212,7 @@ mod tests {
        let messages = lint_pyproject_toml(
            source_file,
            &settings::Settings::for_rule(Rule::InvalidPyprojectToml),
-        )?;
+        );
        assert_messages!(snapshot, messages);
        Ok(())
    }
--- a/crates/ruff/src/settings/mod.rs
+++ b/crates/ruff/src/settings/mod.rs
@ -293,7 +293,6 @@ impl Settings {
        })
    }

-    #[cfg(test)]
    pub fn for_rule(rule_code: Rule) -> Self {
        Self {
            rules: RuleTable::from_iter([rule_code]),
@ -301,7 +300,6 @@ impl Settings {
        }
    }

-    #[cfg(test)]
    pub fn for_rules(rules: impl IntoIterator<Item = Rule>) -> Self {
        Self {
            rules: RuleTable::from_iter(rules),
--- a/crates/ruff_cli/Cargo.toml
+++ b/crates/ruff_cli/Cargo.toml
@ -64,6 +64,8 @@ wild = { version = "2" }

 [dev-dependencies]
 assert_cmd = { version = "2.0.8" }
+insta = { workspace = true, features = ["filters"] }
+tempfile = "3.6.0"
 ureq = { version = "2.6.2", features = [] }

 [target.'cfg(target_os = "windows")'.dependencies]
--- a/crates/ruff_cli/src/bin/ruff.rs
+++ b/crates/ruff_cli/src/bin/ruff.rs
@ -50,7 +50,15 @@ pub fn main() -> ExitCode {
        Err(err) => {
            #[allow(clippy::print_stderr)]
            {
-                eprintln!("{}{} {err:?}", "error".red().bold(), ":".bold());
+                // This communicates that this isn't a linter error but ruff itself hard-errored for
+                // some reason (e.g. failed to resolve the configuration)
+                eprintln!("{}", "ruff failed".red().bold());
+                // Currently we generally only see one error, but e.g. with io errors when resolving
+                // the configuration it is help to chain errors ("resolving configuration failed" ->
+                // "failed to read file: subdir/pyproject.toml")
+                for cause in err.chain() {
+                    eprintln!("  {} {cause}", "Cause:".bold());
+                }
            }
            ExitStatus::Error.into()
        }
--- a/crates/ruff_cli/src/commands/run.rs
+++ b/crates/ruff_cli/src/commands/run.rs
@ -125,7 +125,7 @@ pub(crate) fn run(
                        (Some(path.to_owned()), {
                            let mut error = e.to_string();
                            for cause in e.chain() {
-                                write!(&mut error, "\n  Caused by: {cause}").unwrap();
+                                write!(&mut error, "\n  Cause: {cause}").unwrap();
                            }
                            error
                        })
@ -143,30 +143,30 @@ pub(crate) fn run(
            }
            .unwrap_or_else(|(path, message)| {
                if let Some(path) = &path {
-                    error!(
-                        "{}{}{} {message}",
-                        "Failed to lint ".bold(),
-                        fs::relativize_path(path).bold(),
-                        ":".bold()
-                    );
                    let settings = resolver.resolve(path, pyproject_config);
                    if settings.rules.enabled(Rule::IOError) {
-                        let file =
+                        let dummy =
                            SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();

                        Diagnostics::new(
                            vec![Message::from_diagnostic(
                                Diagnostic::new(IOError { message }, TextRange::default()),
-                                file,
+                                dummy,
                                TextSize::default(),
                            )],
                            ImportMap::default(),
                        )
                    } else {
+                        warn!(
+                            "{}{}{} {message}",
+                            "Failed to lint ".bold(),
+                            fs::relativize_path(path).bold(),
+                            ":".bold()
+                        );
                        Diagnostics::default()
                    }
                } else {
-                    error!("{} {message}", "Encountered error:".bold());
+                    warn!("{} {message}", "Encountered error:".bold());
                    Diagnostics::default()
                }
            })
@ -226,3 +226,85 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
        }
    }
 }
+
+#[cfg(test)]
+#[cfg(unix)]
+mod test {
+    use super::run;
+    use crate::args::Overrides;
+    use anyhow::Result;
+    use ruff::message::{Emitter, EmitterContext, TextEmitter};
+    use ruff::registry::Rule;
+    use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
+    use ruff::settings::{flags, AllSettings, CliSettings, Settings};
+    use rustc_hash::FxHashMap;
+    use std::fs;
+    use std::os::unix::fs::OpenOptionsExt;
+    use tempfile::TempDir;
+
+    /// We check that regular python files, pyproject.toml and jupyter notebooks all handle io
+    /// errors gracefully
+    #[test]
+    fn unreadable_files() -> Result<()> {
+        let path = "E902.py";
+        let rule_code = Rule::IOError;
+
+        // Create inaccessible files
+        let tempdir = TempDir::new()?;
+        let pyproject_toml = tempdir.path().join("pyproject.toml");
+        let python_file = tempdir.path().join("code.py");
+        let notebook = tempdir.path().join("notebook.ipynb");
+        for file in [&pyproject_toml, &python_file, &notebook] {
+            fs::OpenOptions::new()
+                .create(true)
+                .write(true)
+                .mode(0o000)
+                .open(file)?;
+        }
+
+        // Configure
+        let snapshot = format!("{}_{}", rule_code.noqa_code(), path);
+        let settings = AllSettings {
+            cli: CliSettings::default(),
+            // invalid pyproject.toml is not active by default
+            lib: Settings::for_rules(vec![rule_code, Rule::InvalidPyprojectToml]),
+        };
+        let pyproject_config =
+            PyprojectConfig::new(PyprojectDiscoveryStrategy::Fixed, settings, None);
+
+        // Run
+        let diagnostics = run(
+            // Notebooks are not included by default
+            &[tempdir.path().to_path_buf(), notebook],
+            &pyproject_config,
+            &Overrides::default(),
+            flags::Cache::Disabled,
+            flags::Noqa::Disabled,
+            flags::FixMode::Generate,
+        )
+        .unwrap();
+        let mut output = Vec::new();
+
+        TextEmitter::default()
+            .with_show_fix_status(true)
+            .emit(
+                &mut output,
+                &diagnostics.messages,
+                &EmitterContext::new(&FxHashMap::default()),
+            )
+            .unwrap();
+
+        let messages = String::from_utf8(output).unwrap();
+
+        insta::with_settings!({
+            omit_expression => true,
+            filters => vec![
+                // The tempdir is always different (and platform dependent)
+                (tempdir.path().to_str().unwrap(), "/home/ferris/project"),
+            ]
+        }, {
+            insta::assert_snapshot!(snapshot, messages);
+        });
+        Ok(())
+    }
+}
--- a/crates/ruff_cli/src/commands/snapshots/ruff_clicommandsruntestE902_E902.py.snap
+++ b/crates/ruff_cli/src/commands/snapshots/ruff_clicommandsruntestE902_E902.py.snap
@ -0,0 +1,7 @@
+---
+source: crates/ruff_cli/src/commands/run.rs
+---
+/home/ferris/project/code.py:1:1: E902 Permission denied (os error 13)
+/home/ferris/project/notebook.ipynb:1:1: E902 Permission denied (os error 13)
+/home/ferris/project/pyproject.toml:1:1: E902 Permission denied (os error 13)
+
--- a/crates/ruff_cli/src/diagnostics.rs
+++ b/crates/ruff_cli/src/diagnostics.rs
@ -8,19 +8,21 @@ use std::path::Path;

 use anyhow::{anyhow, Result};
 use colored::Colorize;
-use log::{debug, error};
-use ruff_text_size::TextSize;
+use log::{debug, error, warn};
+use ruff_text_size::{TextRange, TextSize};
 use rustc_hash::FxHashMap;
 use similar::TextDiff;

-use ruff::fs;
 use ruff::jupyter::Notebook;
 use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
 use ruff::logging::DisplayParseError;
 use ruff::message::Message;
 use ruff::pyproject_toml::lint_pyproject_toml;
+use ruff::registry::Rule;
 use ruff::settings::{flags, AllSettings, Settings};
 use ruff::source_kind::SourceKind;
+use ruff::{fs, IOError};
+use ruff_diagnostics::Diagnostic;
 use ruff_python_ast::imports::ImportMap;
 use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
 use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
@ -127,6 +129,31 @@ pub(crate) fn lint_path(

    debug!("Checking: {}", path.display());

+    // In case of an io error we want to exit early
+    let io_error_diagnostics = |err: io::Error, path: &Path| -> Diagnostics {
+        if settings.lib.rules.enabled(Rule::IOError) {
+            let io_err = Diagnostic::new(
+                IOError {
+                    message: err.to_string(),
+                },
+                TextRange::default(),
+            );
+            let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
+            Diagnostics::new(
+                vec![Message::from_diagnostic(io_err, dummy, TextSize::default())],
+                ImportMap::default(),
+            )
+        } else {
+            warn!(
+                "{}{}{} {err}",
+                "Failed to lint ".bold(),
+                fs::relativize_path(path).bold(),
+                ":".bold()
+            );
+            Diagnostics::default()
+        }
+    };
+
    // We have to special case this here since the Python tokenizer doesn't work with TOML.
    if is_project_toml(path) {
        let messages = if settings
@ -135,9 +162,14 @@ pub(crate) fn lint_path(
            .iter_enabled()
            .any(|rule_code| rule_code.lint_source().is_pyproject_toml())
        {
-            let contents = std::fs::read_to_string(path)?;
+            let contents = match std::fs::read_to_string(path) {
+                Ok(contents) => contents,
+                Err(err) => {
+                    return Ok(io_error_diagnostics(err, path));
+                }
+            };
            let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
-            lint_pyproject_toml(source_file, &settings.lib)?
+            lint_pyproject_toml(source_file, &settings.lib)
        } else {
            vec![]
        };
@ -154,7 +186,14 @@ pub(crate) fn lint_path(
            Err(diagnostic) => return Ok(*diagnostic),
        }
    } else {
-        SourceKind::Python(std::fs::read_to_string(path)?)
+        // This is tested by ruff_cli integration test `unreadable_file`
+        let contents = match std::fs::read_to_string(path) {
+            Ok(contents) => contents,
+            Err(err) => {
+                return Ok(io_error_diagnostics(err, path));
+            }
+        };
+        SourceKind::Python(contents)
    };

    let contents = source_kind.content().to_string();
--- a/crates/ruff_cli/tests/integration_test.rs
+++ b/crates/ruff_cli/tests/integration_test.rs
@ -1,13 +1,28 @@
 #![cfg(not(target_family = "wasm"))]

+#[cfg(unix)]
+use std::fs;
+#[cfg(unix)]
+use std::fs::Permissions;
+#[cfg(unix)]
+use std::os::unix::fs::{OpenOptionsExt, PermissionsExt};
 #[cfg(unix)]
 use std::path::Path;
 use std::str;

+#[cfg(unix)]
+use anyhow::Context;
 use anyhow::Result;
 use assert_cmd::Command;
 #[cfg(unix)]
+use clap::Parser;
+#[cfg(unix)]
 use path_absolutize::path_dedot;
+#[cfg(unix)]
+use tempfile::TempDir;
+
+use ruff_cli::args::Args;
+use ruff_cli::run;

 const BIN_NAME: &str = "ruff";

@ -278,3 +293,55 @@ Found 1 error.

    Ok(())
 }
+
+/// An unreadable pyproject.toml in non-isolated mode causes ruff to hard-error trying to build up
+/// configuration globs
+#[cfg(unix)]
+#[test]
+fn unreadable_pyproject_toml() -> Result<()> {
+    let tempdir = TempDir::new()?;
+    let pyproject_toml = tempdir.path().join("pyproject.toml");
+    // Create an empty file with 000 permissions
+    fs::OpenOptions::new()
+        .create(true)
+        .write(true)
+        .mode(0o000)
+        .open(pyproject_toml)?;
+
+    // Don't `--isolated` since the configuration discovery is where the error happens
+    let args = Args::parse_from(["", "check", "--no-cache", tempdir.path().to_str().unwrap()]);
+    let err = run(args).err().context("Unexpected success")?;
+    assert_eq!(
+        err.chain()
+            .map(std::string::ToString::to_string)
+            .collect::<Vec<_>>(),
+        vec!["Permission denied (os error 13)".to_string()],
+    );
+    Ok(())
+}
+
+/// Check the output with an unreadable directory
+#[cfg(unix)]
+#[test]
+fn unreadable_dir() -> Result<()> {
+    // Create a directory with 000 (not iterable/readable) permissions
+    let tempdir = TempDir::new()?;
+    let unreadable_dir = tempdir.path().join("unreadable_dir");
+    fs::create_dir(&unreadable_dir)?;
+    fs::set_permissions(&unreadable_dir, Permissions::from_mode(0o000))?;
+
+    // We (currently?) have to use a subcommand to check exit status (currently wrong) and logging
+    // output
+    let mut cmd = Command::cargo_bin(BIN_NAME)?;
+    let output = cmd
+        .args(["--no-cache", "--isolated"])
+        .arg(&unreadable_dir)
+        .assert()
+        // TODO(konstin): This should be a failure, but we currently can't track that
+        .success();
+    assert_eq!(
+        str::from_utf8(&output.get_output().stderr)?,
+        "warning: Encountered error: Permission denied (os error 13)\n"
+    );
+    Ok(())
+}
--- a/crates/ruff_shrinking/src/main.rs
+++ b/crates/ruff_shrinking/src/main.rs
@ -429,7 +429,7 @@ fn main() -> ExitCode {
    if let Err(e) = run() {
        eprintln!("💥 Minimizer failed");
        for cause in e.chain() {
-            eprintln!("  Caused by: {cause}");
+            eprintln!("  Cause: {cause}");
        }
        ExitCode::FAILURE
    } else {