Ability to perform integration test on Jupyter notebooks (#5076)

## Summary Ability to perform integration test on Jupyter notebooks Part of #1218 ## Test Plan `cargo test`
2025-08-03 18:28:24 +00:00 · 2023-06-15 08:04:27 +05:30 · 2023-06-15 08:04:27 +05:30 · 097823b56d
commit 097823b56d
parent ed8113267c
7 changed files with 286 additions and 34 deletions
--- a/crates/ruff/resources/test/fixtures/jupyter/isort.ipynb
+++ b/crates/ruff/resources/test/fixtures/jupyter/isort.ipynb
@ -0,0 +1,51 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c7535f6-43cb-423f-bfe1-d263b8f55da0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import random\n",
+    "import math"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c066fa1a-5682-47af-8c17-5afec3cf4ad0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any\n",
+    "import collections\n",
+    "# Newline should be added here\n",
+    "def foo():\n",
+    "    pass"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff)",
+   "language": "python",
+   "name": "ruff"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff/resources/test/fixtures/jupyter/isort_expected.ipynb
+++ b/crates/ruff/resources/test/fixtures/jupyter/isort_expected.ipynb
@ -0,0 +1,53 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "663ba955-baca-4f34-9ebb-840d2573ae3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import random\n",
+    "from pathlib import Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0adfe23-8aea-47e9-bf67-d856cfcb96ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "from typing import Any\n",
+    "\n",
+    "\n",
+    "# Newline should be added here\n",
+    "def foo():\n",
+    "    pass"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff)",
+   "language": "python",
+   "name": "ruff"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff/src/jupyter/notebook.rs
+++ b/crates/ruff/src/jupyter/notebook.rs
@ -75,7 +75,7 @@ impl Cell {
    }
 }

-#[derive(Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct Notebook {
    /// Python source code of the notebook.
    ///
@ -414,8 +414,9 @@ mod test {
    use crate::jupyter::is_jupyter_notebook;
    use crate::jupyter::schema::Cell;
    use crate::jupyter::Notebook;
-
-    use crate::test::test_resource_path;
+    use crate::registry::Rule;
+    use crate::test::{test_notebook_path, test_resource_path};
+    use crate::{assert_messages, settings};

    /// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
    fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
@ -523,4 +524,16 @@ print("after empty cells")
            ]
        );
    }
+
+    #[test]
+    fn test_import_sorting() -> Result<()> {
+        let path = "isort.ipynb".to_string();
+        let (diagnostics, source_kind) = test_notebook_path(
+            &path,
+            Path::new("isort_expected.ipynb"),
+            &settings::Settings::for_rule(Rule::UnsortedImports),
+        )?;
+        assert_messages!(diagnostics, path, source_kind);
+        Ok(())
+    }
 }
--- a/crates/ruff/src/jupyter/schema.rs
+++ b/crates/ruff/src/jupyter/schema.rs
@ -26,7 +26,7 @@ use serde_with::skip_serializing_none;
 /// Generated by <https://app.quicktype.io/> from
 /// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
 /// Jupyter Notebook v4.5 JSON schema.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct RawNotebook {
    /// Array of cells of the current notebook.
    pub cells: Vec<Cell>,
@ -46,7 +46,7 @@ pub struct RawNotebook {
 ///
 /// Notebook code cell.
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(deny_unknown_fields)]
 pub struct Cell {
    pub attachments: Option<HashMap<String, HashMap<String, Value>>>,
@ -67,7 +67,7 @@ pub struct Cell {

 /// Cell-level metadata.
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct CellMetadata {
    /// Raw cell metadata format for nbconvert.
    pub format: Option<String>,
@ -94,7 +94,7 @@ pub struct CellMetadata {
 /// Execution time for the code in the cell. This tracks time at which messages are received
 /// from iopub or shell channels
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(deny_unknown_fields)]
 pub struct Execution {
    /// header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates
@ -124,7 +124,7 @@ pub struct Execution {
 ///
 /// Output of an error that occurred during code cell execution.
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(deny_unknown_fields)]
 pub struct Output {
    pub data: Option<HashMap<String, Value>>,
@ -147,7 +147,7 @@ pub struct Output {

 /// Notebook root-level metadata.
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct JupyterNotebookMetadata {
    /// The author(s) of the notebook document
    pub authors: Option<Vec<Option<Value>>>,
@ -166,7 +166,7 @@ pub struct JupyterNotebookMetadata {
 }

 /// Kernel information.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct Kernelspec {
    /// Name to display in UI.
    pub display_name: String,
@ -179,7 +179,7 @@ pub struct Kernelspec {

 /// Kernel information.
 #[skip_serializing_none]
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct LanguageInfo {
    /// The codemirror mode to use for code in this language.
    pub codemirror_mode: Option<CodemirrorMode>,
@ -202,7 +202,7 @@ pub struct LanguageInfo {
 /// Contents of the cell, represented as an array of lines.
 ///
 /// The stream's text output, represented as an array of strings.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum SourceValue {
    String(String),
@ -210,7 +210,7 @@ pub enum SourceValue {
 }

 /// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum ScrolledUnion {
    Bool(bool),
@ -223,7 +223,7 @@ pub enum ScrolledUnion {
 /// Contents of the cell, represented as an array of lines.
 ///
 /// The stream's text output, represented as an array of strings.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum TextUnion {
    String(String),
@ -231,7 +231,7 @@ pub enum TextUnion {
 }

 /// The codemirror mode to use for code in this language.
-#[derive(Debug, Serialize, Deserialize, PartialEq)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum CodemirrorMode {
    AnythingMap(HashMap<String, Option<Value>>),
--- a/crates/ruff/src/jupyter/snapshots/ruffjupyternotebooktestimport_sorting.snap
+++ b/crates/ruff/src/jupyter/snapshots/ruffjupyternotebooktestimport_sorting.snap
@ -0,0 +1,52 @@
+---
+source: crates/ruff/src/jupyter/notebook.rs
+---
+isort.ipynb:cell 1:1:1: I001 [*] Import block is un-sorted or un-formatted
+  |
+1 | / from pathlib import Path
+2 | | import random
+3 | | import math
+4 | | from typing import Any
+  | |_^ I001
+5 |   import collections
+6 |   # Newline should be added here
+  |
+  = help: Organize imports
+
+ℹ Fix
+  1 |+import math
+  2 |+import random
+1 3 | from pathlib import Path
+2   |-import random
+3   |-import math
+4 4 | from typing import Any
+5 5 | import collections
+6 6 | # Newline should be added here
+
+isort.ipynb:cell 2:1:1: I001 [*] Import block is un-sorted or un-formatted
+  |
+2 |   import random
+3 |   import math
+4 | / from typing import Any
+5 | | import collections
+6 | | # Newline should be added here
+  | |_^ I001
+7 |   def foo():
+8 |       pass
+  |
+  = help: Organize imports
+
+ℹ Fix
+1 1 | from pathlib import Path
+2 2 | import random
+3 3 | import math
+  4 |+import collections
+4 5 | from typing import Any
+5   |-import collections
+  6 |+
+  7 |+
+6 8 | # Newline should be added here
+7 9 | def foo():
+8 10 |     pass
+
+
--- a/crates/ruff/src/source_kind.rs
+++ b/crates/ruff/src/source_kind.rs
@ -1,6 +1,6 @@
 use crate::jupyter::Notebook;

-#[derive(Debug, PartialEq, is_macro::Is)]
+#[derive(Clone, Debug, PartialEq, is_macro::Is)]
 pub enum SourceKind {
    Python(String),
    Jupyter(Notebook),
--- a/crates/ruff/src/test.rs
+++ b/crates/ruff/src/test.rs
@ -15,12 +15,25 @@ use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist}

 use crate::autofix::{fix_file, FixResult};
 use crate::directives;
+use crate::jupyter::Notebook;
 use crate::linter::{check_path, LinterResult};
 use crate::message::{Emitter, EmitterContext, Message, TextEmitter};
 use crate::packaging::detect_package_root;
 use crate::registry::AsRule;
 use crate::rules::pycodestyle::rules::syntax_error;
 use crate::settings::{flags, Settings};
+use crate::source_kind::SourceKind;
+
+#[cfg(not(fuzzing))]
+fn read_jupyter_notebook(path: &Path) -> Result<Notebook> {
+    Notebook::read(path).map_err(|err| {
+        anyhow::anyhow!(
+            "Failed to read notebook file `{}`: {:?}",
+            path.display(),
+            err
+        )
+    })
+}

 #[cfg(not(fuzzing))]
 pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
@ -32,14 +45,41 @@ pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
 pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<Vec<Message>> {
    let path = test_resource_path("fixtures").join(path);
    let contents = std::fs::read_to_string(&path)?;
-    Ok(test_contents(&contents, &path, settings))
+    Ok(test_contents(
+        &mut SourceKind::Python(contents),
+        &path,
+        settings,
+    ))
+}
+
+#[cfg(not(fuzzing))]
+pub(crate) fn test_notebook_path(
+    path: impl AsRef<Path>,
+    expected: impl AsRef<Path>,
+    settings: &Settings,
+) -> Result<(Vec<Message>, SourceKind)> {
+    let path = test_resource_path("fixtures/jupyter").join(path);
+    let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(&path)?);
+    let messages = test_contents(&mut source_kind, &path, settings);
+    let expected_notebook =
+        read_jupyter_notebook(&test_resource_path("fixtures/jupyter").join(expected))?;
+    if let SourceKind::Jupyter(notebook) = &source_kind {
+        assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
+        assert_eq!(notebook.index(), expected_notebook.index());
+        assert_eq!(notebook.content(), expected_notebook.content());
+    };
+    Ok((messages, source_kind))
 }

 /// Run [`check_path`] on a snippet of Python code.
 pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
    let path = Path::new("<filename>");
    let contents = dedent(contents);
-    test_contents(&contents, path, settings)
+    test_contents(
+        &mut SourceKind::Python(contents.to_string()),
+        path,
+        settings,
+    )
 }

 thread_local! {
@ -56,9 +96,10 @@ pub(crate) fn max_iterations() -> usize {

 /// A convenient wrapper around [`check_path`], that additionally
 /// asserts that autofixes converge after a fixed number of iterations.
-fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Message> {
-    let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
-    let locator = Locator::new(contents);
+fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
+    let contents = source_kind.content().to_string();
+    let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
+    let locator = Locator::new(&contents);
    let stylist = Stylist::from_tokens(&tokens, &locator);
    let indexer = Indexer::from_tokens(&tokens, &locator);
    let directives = directives::extract_directives(
@ -81,7 +122,7 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
        &directives,
        settings,
        flags::Noqa::Enabled,
-        None,
+        Some(source_kind),
    );

    let source_has_errors = error.is_some();
@ -98,13 +139,14 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag

        while let Some(FixResult {
            code: fixed_contents,
+            source_map,
            ..
        }) = fix_file(&diagnostics, &Locator::new(&contents))
        {
            if iterations < max_iterations() {
                iterations += 1;
            } else {
-                let output = print_diagnostics(diagnostics, path, &contents);
+                let output = print_diagnostics(diagnostics, path, &contents, source_kind);

                panic!(
                    "Failed to converge after {} iterations. This likely \
@ -114,6 +156,10 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
                );
            }

+            if let Some(notebook) = source_kind.as_mut_jupyter() {
+                notebook.update(&source_map, &fixed_contents);
+            };
+
            let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents);
            let locator = Locator::new(&fixed_contents);
            let stylist = Stylist::from_tokens(&tokens, &locator);
@ -138,18 +184,18 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
                &directives,
                settings,
                flags::Noqa::Enabled,
-                None,
+                Some(source_kind),
            );

            if let Some(fixed_error) = fixed_error {
                if !source_has_errors {
                    // Previous fix introduced a syntax error, abort
-                    let fixes = print_diagnostics(diagnostics, path, &contents);
+                    let fixes = print_diagnostics(diagnostics, path, &contents, source_kind);

                    let mut syntax_diagnostics = Vec::new();
                    syntax_error(&mut syntax_diagnostics, &fixed_error, &locator);
                    let syntax_errors =
-                        print_diagnostics(syntax_diagnostics, path, &fixed_contents);
+                        print_diagnostics(syntax_diagnostics, path, &fixed_contents, source_kind);

                    panic!(
                        r#"Fixed source has a syntax error where the source document does not. This is a bug in one of the generated fixes:
@ -169,7 +215,7 @@ Source with applied fixes:

    let source_code = SourceFileBuilder::new(
        path.file_name().unwrap().to_string_lossy().as_ref(),
-        contents,
+        contents.as_str(),
    )
    .finish();

@ -203,12 +249,14 @@ Source with applied fixes:
        .collect()
 }

-fn print_diagnostics(diagnostics: Vec<Diagnostic>, file_path: &Path, source: &str) -> String {
-    let source_file = SourceFileBuilder::new(
-        file_path.file_name().unwrap().to_string_lossy().as_ref(),
-        source,
-    )
-    .finish();
+fn print_diagnostics(
+    diagnostics: Vec<Diagnostic>,
+    file_path: &Path,
+    source: &str,
+    source_kind: &SourceKind,
+) -> String {
+    let filename = file_path.file_name().unwrap().to_string_lossy();
+    let source_file = SourceFileBuilder::new(filename.as_ref(), source).finish();

    let messages: Vec<_> = diagnostics
        .into_iter()
@ -219,7 +267,35 @@ fn print_diagnostics(diagnostics: Vec<Diagnostic>, file_path: &Path, source: &st
        })
        .collect();

-    print_messages(&messages)
+    if source_kind.is_jupyter() {
+        print_jupyter_messages(&messages, &filename, source_kind)
+    } else {
+        print_messages(&messages)
+    }
+}
+
+pub(crate) fn print_jupyter_messages(
+    messages: &[Message],
+    filename: &str,
+    source_kind: &SourceKind,
+) -> String {
+    let mut output = Vec::new();
+
+    TextEmitter::default()
+        .with_show_fix_status(true)
+        .with_show_fix_diff(true)
+        .with_show_source(true)
+        .emit(
+            &mut output,
+            messages,
+            &EmitterContext::new(&FxHashMap::from_iter([(
+                filename.to_string(),
+                source_kind.clone(),
+            )])),
+        )
+        .unwrap();
+
+    String::from_utf8(output).unwrap()
 }

 pub(crate) fn print_messages(messages: &[Message]) -> String {
@ -241,6 +317,13 @@ pub(crate) fn print_messages(messages: &[Message]) -> String {

 #[macro_export]
 macro_rules! assert_messages {
+    ($value:expr, $path:expr, $source_kind:expr) => {{
+        insta::with_settings!({ omit_expression => true }, {
+            insta::assert_snapshot!(
+                $crate::test::print_jupyter_messages(&$value, &$path, &$source_kind)
+            );
+        });
+    }};
    ($value:expr, @$snapshot:literal) => {{
        insta::with_settings!({ omit_expression => true }, {
            insta::assert_snapshot!($crate::test::print_messages(&$value), $snapshot);