diff --git a/crates/ruff/src/jupyter/notebook.rs b/crates/ruff/src/jupyter/notebook.rs index a4289fd776..f355720600 100644 --- a/crates/ruff/src/jupyter/notebook.rs +++ b/crates/ruff/src/jupyter/notebook.rs @@ -33,7 +33,7 @@ pub fn round_trip(path: &Path) -> anyhow::Result { err ) })?; - let code = notebook.content().to_string(); + let code = notebook.source_code().to_string(); notebook.update_cell_content(&code); let mut writer = Vec::new(); notebook.write_inner(&mut writer)?; @@ -103,7 +103,7 @@ pub struct Notebook { /// separated by a newline and a trailing newline. The trailing newline /// is added to make sure that each cell ends with a newline which will /// be removed when updating the cell content. - content: String, + source_code: String, /// The index of the notebook. This is used to map between the concatenated /// source code and the original notebook. index: OnceCell, @@ -132,8 +132,8 @@ impl Notebook { } /// Read the Jupyter Notebook from its JSON string. - pub fn from_contents(contents: &str) -> Result> { - Self::from_reader(Cursor::new(contents)) + pub fn from_source_code(source_code: &str) -> Result> { + Self::from_reader(Cursor::new(source_code)) } /// Read a Jupyter Notebook from a [`Read`] implementor. @@ -268,7 +268,7 @@ impl Notebook { // The additional newline at the end is to maintain consistency for // all cells. These newlines will be removed before updating the // source code with the transformed content. Refer `update_cell_content`. - content: contents.join("\n") + "\n", + source_code: contents.join("\n") + "\n", cell_offsets, valid_code_cells, trailing_newline, @@ -404,8 +404,8 @@ impl Notebook { /// Return the notebook content. /// /// This is the concatenation of all Python code cells. - pub(crate) fn content(&self) -> &str { - &self.content + pub fn source_code(&self) -> &str { + &self.source_code } /// Return the Jupyter notebook index. @@ -429,7 +429,7 @@ impl Notebook { // it depends on the offsets to extract the cell content. self.update_cell_offsets(source_map); self.update_cell_content(transformed); - self.content = transformed.to_string(); + self.source_code = transformed.to_string(); } /// Return a slice of [`Cell`] in the Jupyter notebook. @@ -482,8 +482,8 @@ mod tests { /// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory. fn read_jupyter_cell(path: impl AsRef) -> Result { let path = test_resource_path("fixtures/jupyter/cell").join(path); - let contents = std::fs::read_to_string(path)?; - Ok(serde_json::from_str(&contents)?) + let source_code = std::fs::read_to_string(path)?; + Ok(serde_json::from_str(&source_code)?) } #[test] @@ -536,7 +536,7 @@ mod tests { fn test_concat_notebook() -> Result<()> { let notebook = read_jupyter_notebook(Path::new("valid.ipynb"))?; assert_eq!( - notebook.content, + notebook.source_code, r#"def unused_variable(): x = 1 y = 2 diff --git a/crates/ruff/src/source_kind.rs b/crates/ruff/src/source_kind.rs index ab63e89c28..a2c2a1e8b9 100644 --- a/crates/ruff/src/source_kind.rs +++ b/crates/ruff/src/source_kind.rs @@ -2,19 +2,11 @@ use crate::jupyter::Notebook; #[derive(Clone, Debug, PartialEq, is_macro::Is)] pub enum SourceKind { - Python(String), + Python, Jupyter(Notebook), } impl SourceKind { - /// Return the source content. - pub fn content(&self) -> &str { - match self { - SourceKind::Python(content) => content, - SourceKind::Jupyter(notebook) => notebook.content(), - } - } - /// Return the [`Notebook`] if the source kind is [`SourceKind::Jupyter`]. pub fn notebook(&self) -> Option<&Notebook> { if let Self::Jupyter(notebook) = self { diff --git a/crates/ruff/src/test.rs b/crates/ruff/src/test.rs index 07e9c15ac1..63a487294e 100644 --- a/crates/ruff/src/test.rs +++ b/crates/ruff/src/test.rs @@ -6,14 +6,13 @@ use std::path::Path; #[cfg(not(fuzzing))] use anyhow::Result; use itertools::Itertools; -use ruff_python_parser::lexer::LexResult; - use rustc_hash::FxHashMap; use ruff_diagnostics::{AutofixKind, Diagnostic}; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; +use ruff_python_parser::lexer::LexResult; use ruff_python_parser::AsMode; use ruff_python_trivia::textwrap::dedent; use ruff_source_file::{Locator, SourceFileBuilder}; @@ -53,7 +52,8 @@ pub(crate) fn test_path(path: impl AsRef, settings: &Settings) -> Result, settings: &Settings, ) -> Result<(Vec, SourceKind, SourceKind)> { - let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(path.as_ref())?); + let notebook = read_jupyter_notebook(path.as_ref())?; + let contents = notebook.source_code().to_string(); + let mut source_kind = SourceKind::Jupyter(notebook); let original_source_kind = source_kind.clone(); - let messages = test_contents(&mut source_kind, path.as_ref(), settings); + let messages = test_contents(&contents, &mut source_kind, path.as_ref(), settings); let expected_notebook = read_jupyter_notebook(expected.as_ref())?; if let SourceKind::Jupyter(notebook) = &source_kind { assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets()); assert_eq!(notebook.index(), expected_notebook.index()); - assert_eq!(notebook.content(), expected_notebook.content()); + assert_eq!(notebook.source_code(), expected_notebook.source_code()); }; Ok((messages, original_source_kind, source_kind)) } @@ -81,11 +83,7 @@ pub(crate) fn test_notebook_path( pub fn test_snippet(contents: &str, settings: &Settings) -> Vec { let path = Path::new(""); let contents = dedent(contents); - test_contents( - &mut SourceKind::Python(contents.to_string()), - path, - settings, - ) + test_contents(&contents, &mut SourceKind::Python, path, settings) } thread_local! { @@ -102,11 +100,15 @@ pub(crate) fn max_iterations() -> usize { /// A convenient wrapper around [`check_path`], that additionally /// asserts that autofixes converge after a fixed number of iterations. -fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec { - let contents = source_kind.content().to_string(); +fn test_contents( + contents: &str, + source_kind: &mut SourceKind, + path: &Path, + settings: &Settings, +) -> Vec { let source_type = PySourceType::from(path); - let tokens: Vec = ruff_python_parser::tokenize(&contents, source_type.as_mode()); - let locator = Locator::new(&contents); + let tokens: Vec = ruff_python_parser::tokenize(contents, source_type.as_mode()); + let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); let directives = directives::extract_directives( @@ -225,7 +227,7 @@ Source with applied fixes: let source_code = SourceFileBuilder::new( path.file_name().unwrap().to_string_lossy().as_ref(), - contents.as_str(), + contents, ) .finish(); diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index f348468b1b..1ea4883729 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -1,5 +1,6 @@ #![cfg_attr(target_family = "wasm", allow(dead_code))] +use std::borrow::Cow; use std::fs::write; use std::io; use std::io::Write; @@ -75,6 +76,31 @@ impl Diagnostics { source_kind: FxHashMap::default(), } } + + /// Generate [`Diagnostics`] based on an [`io::Error`]. + pub(crate) fn from_io_error(err: &io::Error, path: &Path, settings: &Settings) -> Self { + if settings.rules.enabled(Rule::IOError) { + let io_err = Diagnostic::new( + IOError { + message: err.to_string(), + }, + TextRange::default(), + ); + let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish(); + Self::new( + vec![Message::from_diagnostic(io_err, dummy, TextSize::default())], + ImportMap::default(), + ) + } else { + warn!( + "{}{}{} {err}", + "Failed to lint ".bold(), + fs::relativize_path(path).bold(), + ":".bold() + ); + Self::default() + } + } } impl AddAssign for Diagnostics { @@ -131,11 +157,11 @@ fn notebook_from_path(path: &Path) -> Result> { /// Parse a Jupyter Notebook from a JSON string. /// /// Returns either an indexed Python Jupyter Notebook or a diagnostic (which is empty if we skip). -fn notebook_from_contents( - contents: &str, +fn notebook_from_source_code( + source_code: &str, path: Option<&Path>, ) -> Result> { - let notebook = match Notebook::from_contents(contents) { + let notebook = match Notebook::from_source_code(source_code) { Ok(notebook) => { if !notebook.is_python_notebook() { // Not a python notebook, this could e.g. be an R notebook which we want to just skip. @@ -202,31 +228,6 @@ pub(crate) fn lint_path( debug!("Checking: {}", path.display()); - // In case of an io error we want to exit early - let io_error_diagnostics = |err: io::Error, path: &Path| -> Diagnostics { - if settings.lib.rules.enabled(Rule::IOError) { - let io_err = Diagnostic::new( - IOError { - message: err.to_string(), - }, - TextRange::default(), - ); - let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish(); - Diagnostics::new( - vec![Message::from_diagnostic(io_err, dummy, TextSize::default())], - ImportMap::default(), - ) - } else { - warn!( - "{}{}{} {err}", - "Failed to lint ".bold(), - fs::relativize_path(path).bold(), - ":".bold() - ); - Diagnostics::default() - } - }; - // We have to special case this here since the Python tokenizer doesn't work with TOML. if is_project_toml(path) { let messages = if settings @@ -238,7 +239,7 @@ pub(crate) fn lint_path( let contents = match std::fs::read_to_string(path) { Ok(contents) => contents, Err(err) => { - return Ok(io_error_diagnostics(err, path)); + return Ok(Diagnostics::from_io_error(&err, path, &settings.lib)); } }; let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish(); @@ -252,27 +253,21 @@ pub(crate) fn lint_path( }); } - let source_type = PySourceType::from(path); - - // Read the file from disk - let mut source_kind = if source_type.is_jupyter() { - match notebook_from_path(path) { - Ok(notebook) => SourceKind::Jupyter(notebook), - Err(diagnostic) => return Ok(*diagnostic), + // Extract the sources from the file. + let LintSources { + source_type, + mut source_kind, + contents, + } = match LintSources::try_from_path(path) { + Ok(sources) => sources, + Err(SourceExtractionError::Io(err)) => { + return Ok(Diagnostics::from_io_error(&err, path, &settings.lib)); + } + Err(SourceExtractionError::Diagnostics(diagnostics)) => { + return Ok(*diagnostics); } - } else { - // This is tested by ruff_cli integration test `unreadable_file` - let contents = match std::fs::read_to_string(path) { - Ok(contents) => contents, - Err(err) => { - return Ok(io_error_diagnostics(err, path)); - } - }; - SourceKind::Python(contents) }; - let contents = source_kind.content().to_string(); - // Lint the file. let ( LinterResult { @@ -297,7 +292,7 @@ pub(crate) fn lint_path( if !fixed.is_empty() { match autofix { flags::FixMode::Apply => match &source_kind { - SourceKind::Python(_) => { + SourceKind::Python => { write(path, transformed.as_bytes())?; } SourceKind::Jupyter(notebook) => { @@ -306,9 +301,9 @@ pub(crate) fn lint_path( }, flags::FixMode::Diff => { match &source_kind { - SourceKind::Python(_) => { + SourceKind::Python => { let mut stdout = io::stdout().lock(); - TextDiff::from_lines(contents.as_str(), &transformed) + TextDiff::from_lines(&contents, &transformed) .unified_diff() .header(&fs::relativize_path(path), &fs::relativize_path(path)) .to_writer(&mut stdout)?; @@ -441,20 +436,22 @@ pub(crate) fn lint_stdin( noqa: flags::Noqa, autofix: flags::FixMode, ) -> Result { - let source_type = path.map(PySourceType::from).unwrap_or_default(); - - let mut source_kind = if source_type.is_jupyter() { - // SAFETY: Jupyter isn't the default type, so we must have a path. - match notebook_from_contents(contents, path) { - Ok(notebook) => SourceKind::Jupyter(notebook), - Err(diagnostic) => return Ok(*diagnostic), + // Extract the sources from the file. + let LintSources { + source_type, + mut source_kind, + contents, + } = match LintSources::try_from_source_code(contents, path) { + Ok(sources) => sources, + Err(SourceExtractionError::Io(err)) => { + // SAFETY: An `io::Error` can only occur if we're reading from a path. + return Ok(Diagnostics::from_io_error(&err, path.unwrap(), settings)); + } + Err(SourceExtractionError::Diagnostics(diagnostics)) => { + return Ok(*diagnostics); } - } else { - SourceKind::Python(contents.to_string()) }; - let contents = source_kind.content().to_string(); - // Lint the inputs. let ( LinterResult { @@ -484,7 +481,7 @@ pub(crate) fn lint_stdin( flags::FixMode::Diff => { // But only write a diff if it's non-empty. if !fixed.is_empty() { - let text_diff = TextDiff::from_lines(contents.as_str(), &transformed); + let text_diff = TextDiff::from_lines(&contents, &transformed); let mut unified_diff = text_diff.unified_diff(); if let Some(path) = path { unified_diff @@ -555,11 +552,84 @@ pub(crate) fn lint_stdin( }) } +#[derive(Debug)] +struct LintSources<'a> { + /// The "type" of source code, e.g. `.py`, `.pyi`, `.ipynb`, etc. + source_type: PySourceType, + /// The "kind" of source, e.g. Python file, Jupyter Notebook, etc. + source_kind: SourceKind, + /// The contents of the source code. + contents: Cow<'a, str>, +} + +impl<'a> LintSources<'a> { + /// Extract the lint [`LintSources`] from the given file path. + fn try_from_path(path: &Path) -> Result { + let source_type = PySourceType::from(path); + + // Read the file from disk. + if source_type.is_jupyter() { + let notebook = notebook_from_path(path).map_err(SourceExtractionError::Diagnostics)?; + let contents = notebook.source_code().to_string(); + let source_kind = SourceKind::Jupyter(notebook); + Ok(LintSources { + source_type, + source_kind, + contents: Cow::Owned(contents), + }) + } else { + // This is tested by ruff_cli integration test `unreadable_file` + let contents = std::fs::read_to_string(path).map_err(SourceExtractionError::Io)?; + Ok(LintSources { + source_type, + source_kind: SourceKind::Python, + contents: Cow::Owned(contents), + }) + } + } + + /// Extract the lint [`LintSources`] from the raw string contents, optionally accompanied by a + /// file path indicating the path to the file from which the contents were read. If provided, + /// the file path should be used for diagnostics, but not for reading the file from disk. + fn try_from_source_code( + source_code: &'a str, + path: Option<&Path>, + ) -> Result, SourceExtractionError> { + let source_type = path.map(PySourceType::from).unwrap_or_default(); + + if source_type.is_jupyter() { + let notebook = notebook_from_source_code(source_code, path) + .map_err(SourceExtractionError::Diagnostics)?; + let contents = notebook.source_code().to_string(); + let source_kind = SourceKind::Jupyter(notebook); + Ok(LintSources { + source_type, + source_kind, + contents: Cow::Owned(contents), + }) + } else { + Ok(LintSources { + source_type, + source_kind: SourceKind::Python, + contents: Cow::Borrowed(source_code), + }) + } + } +} + +#[derive(Debug)] +enum SourceExtractionError { + /// The extraction failed due to an [`io::Error`]. + Io(io::Error), + /// The extraction failed, and generated [`Diagnostics`] to report. + Diagnostics(Box), +} + #[cfg(test)] mod tests { use std::path::Path; - use crate::diagnostics::{notebook_from_contents, notebook_from_path, Diagnostics}; + use crate::diagnostics::{notebook_from_path, notebook_from_source_code, Diagnostics}; #[test] fn test_r() { @@ -573,7 +643,7 @@ mod tests { let contents = std::fs::read_to_string(path).unwrap(); // No diagnostics is used as skip signal. assert_eq!( - notebook_from_contents(&contents, Some(path)).unwrap_err(), + notebook_from_source_code(&contents, Some(path)).unwrap_err(), Box::::default() ); }