ruff server: Support Jupyter Notebook (*.ipynb) files (#11206)

## Summary Closes https://github.com/astral-sh/ruff/issues/10858. `ruff server` now supports `*.ipynb` (aka Jupyter Notebook) files. Extensive internal changes have been made to facilitate this, which I've done some work to contextualize with documentation and an pre-review that highlights notable sections of the code. `*.ipynb` cells should behave similarly to `*.py` documents, with one major exception. The format command `ruff.applyFormat` will only apply to the currently selected notebook cell - if you want to format an entire notebook document, use `Format Notebook` from the VS Code context menu. ## Test Plan The VS Code extension does not yet have Jupyter Notebook support enabled, so you'll first need to enable it manually. To do this, checkout the `pre-release` branch and modify `src/common/server.ts` as follows: Before: ![Screenshot 2024-05-13 at 10 59 06 PM](c6a3c604-c405-4968-b8a2-5d670de89172) After: ![Screenshot 2024-05-13 at 10 58 24 PM](94ab2e3d-0609-448d-9c8c-cd07c69a513b) I recommend testing this PR with large, complicated notebook files. I used notebook files from [this popular repository](https://github.com/jakevdp/PythonDataScienceHandbook/tree/master/notebooks) in my preliminary testing. The main thing to test is ensuring that notebook cells behave the same as Python documents, besides the aforementioned issue with `ruff.applyFormat`. You should also test adding and deleting cells (in particular, deleting all the code cells and ensure that doesn't break anything), changing the kind of a cell (i.e. from markup -> code or vice versa), and creating a new notebook file from scratch. Finally, you should also test that source actions work as expected (and across the entire notebook). Note: `ruff.applyAutofix` and `ruff.applyOrganizeImports` are currently broken for notebook files, and I suspect it has something to do with https://github.com/astral-sh/ruff/issues/11248. Once this is fixed, I will update the test plan accordingly. --------- Co-authored-by: nolan <nolan.king90@gmail.com>
2025-09-30 13:51:37 +00:00 · 2024-05-21 15:29:30 -07:00 · 2024-05-21 15:29:30 -07:00 · b0731ef9cb
commit b0731ef9cb
parent 84531d1644
39 changed files with 1584 additions and 622 deletions
--- a/crates/ruff_server/src/lint.rs
+++ b/crates/ruff_server/src/lint.rs
@ -10,26 +10,32 @@ use ruff_linter::{
    settings::{flags, LinterSettings},
    source_kind::SourceKind,
 };
-use ruff_python_ast::PySourceType;
+use ruff_notebook::Notebook;
 use ruff_python_codegen::Stylist;
 use ruff_python_index::Indexer;
 use ruff_python_parser::AsMode;
-use ruff_source_file::Locator;
-use ruff_text_size::Ranged;
+use ruff_source_file::{LineIndex, Locator};
+use ruff_text_size::{Ranged, TextRange};
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

-use crate::{edit::ToRangeExt, PositionEncoding, DIAGNOSTIC_NAME};
+use crate::{
+    edit::{NotebookRange, ToRangeExt},
+    session::DocumentQuery,
+    PositionEncoding, DIAGNOSTIC_NAME,
+};

 /// This is serialized on the diagnostic `data` field.
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub(crate) struct AssociatedDiagnosticData {
    pub(crate) kind: DiagnosticKind,
-    /// A possible fix for the associated diagnostic.
-    pub(crate) fix: Option<Fix>,
+    /// Edits to fix the diagnostic. If this is empty, a fix
+    /// does not exist.
+    pub(crate) edits: Vec<lsp_types::TextEdit>,
    /// The NOQA code for the diagnostic.
    pub(crate) code: String,
    /// Possible edit to add a `noqa` comment which will disable this diagnostic.
-    pub(crate) noqa_edit: Option<ruff_diagnostics::Edit>,
+    pub(crate) noqa_edit: Option<lsp_types::TextEdit>,
 }

 /// Describes a fix for `fixed_diagnostic` that may have quick fix
@ -49,18 +55,16 @@ pub(crate) struct DiagnosticFix {
    pub(crate) noqa_edit: Option<lsp_types::TextEdit>,
 }

+/// A series of diagnostics across a single text document or an arbitrary number of notebook cells.
+pub(crate) type Diagnostics = FxHashMap<lsp_types::Url, Vec<lsp_types::Diagnostic>>;
+
 pub(crate) fn check(
-    document: &crate::edit::Document,
-    document_url: &lsp_types::Url,
+    query: &DocumentQuery,
    linter_settings: &LinterSettings,
    encoding: PositionEncoding,
-) -> Vec<lsp_types::Diagnostic> {
-    let contents = document.contents();
-    let index = document.index().clone();
-
-    let document_path = document_url
-        .to_file_path()
-        .expect("document URL should be a valid file path");
+) -> Diagnostics {
+    let document_path = query.file_path();
+    let source_kind = query.make_source_kind();

    let package = detect_package_root(
        document_path
@ -69,16 +73,15 @@ pub(crate) fn check(
        &linter_settings.namespace_packages,
    );

-    let source_type = PySourceType::default();
-
-    // TODO(jane): Support Jupyter Notebooks
-    let source_kind = SourceKind::Python(contents.to_string());
+    let source_type = query.source_type();

    // Tokenize once.
-    let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode());
+    let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode());
+
+    let index = LineIndex::from_source_text(source_kind.source_code());

    // Map row and column locations to byte slices (lazily).
-    let locator = Locator::with_index(contents, index);
+    let locator = Locator::with_index(source_kind.source_code(), index.clone());

    // Detect the current code style (lazily).
    let stylist = Stylist::from_tokens(&tokens, &locator);
@ -90,10 +93,8 @@ pub(crate) fn check(
    let directives = extract_directives(&tokens, Flags::all(), &locator, &indexer);

    // Generate checks.
-    let LinterResult {
-        data: diagnostics, ..
-    } = check_path(
-        &document_path,
+    let LinterResult { data, .. } = check_path(
+        document_path,
        package,
        &locator,
        &stylist,
@ -107,8 +108,8 @@ pub(crate) fn check(
    );

    let noqa_edits = generate_noqa_edits(
-        &document_path,
-        diagnostics.as_slice(),
+        document_path,
+        data.as_slice(),
        &locator,
        indexer.comment_ranges(),
        &linter_settings.external,
@ -116,16 +117,45 @@ pub(crate) fn check(
        stylist.line_ending(),
    );

-    diagnostics
+    let mut diagnostics = Diagnostics::default();
+
+    // Populate all cell URLs with an empty diagnostic list.
+    // This ensures that cells without diagnostics still get updated.
+    if let Some(notebook) = query.as_notebook() {
+        for url in notebook.urls() {
+            diagnostics.entry(url.clone()).or_default();
+        }
+    }
+
+    let lsp_diagnostics = data
        .into_iter()
        .zip(noqa_edits)
-        .map(|(diagnostic, noqa_edit)| to_lsp_diagnostic(diagnostic, noqa_edit, document, encoding))
-        .collect()
+        .map(|(diagnostic, noqa_edit)| {
+            to_lsp_diagnostic(diagnostic, &noqa_edit, &source_kind, &index, encoding)
+        });
+
+    if let Some(notebook) = query.as_notebook() {
+        for (index, diagnostic) in lsp_diagnostics {
+            let Some(uri) = notebook.cell_uri_by_index(index) else {
+                tracing::warn!("Unable to find notebook cell at index {index}.");
+                continue;
+            };
+            diagnostics.entry(uri.clone()).or_default().push(diagnostic);
+        }
+    } else {
+        for (_, diagnostic) in lsp_diagnostics {
+            diagnostics
+                .entry(query.make_key().into_url())
+                .or_default()
+                .push(diagnostic);
+        }
+    }
+
+    diagnostics
 }

+/// Converts LSP diagnostics to a list of `DiagnosticFix`es by deserializing associated data on each diagnostic.
 pub(crate) fn fixes_for_diagnostics(
-    document: &crate::edit::Document,
-    encoding: PositionEncoding,
    diagnostics: Vec<lsp_types::Diagnostic>,
 ) -> crate::Result<Vec<DiagnosticFix>> {
    diagnostics
@ -139,36 +169,6 @@ pub(crate) fn fixes_for_diagnostics(
                serde_json::from_value(data).map_err(|err| {
                    anyhow::anyhow!("failed to deserialize diagnostic data: {err}")
                })?;
-            let edits = associated_data
-                .fix
-                .map(|fix| {
-                    fix.edits()
-                        .iter()
-                        .map(|edit| lsp_types::TextEdit {
-                            range: edit.range().to_range(
-                                document.contents(),
-                                document.index(),
-                                encoding,
-                            ),
-                            new_text: edit.content().unwrap_or_default().to_string(),
-                        })
-                        .collect()
-                })
-                .unwrap_or_default();
-
-            let noqa_edit =
-                associated_data
-                    .noqa_edit
-                    .as_ref()
-                    .map(|noqa_edit| lsp_types::TextEdit {
-                        range: noqa_edit.range().to_range(
-                            document.contents(),
-                            document.index(),
-                            encoding,
-                        ),
-                        new_text: noqa_edit.content().unwrap_or_default().to_string(),
-                    });
-
            Ok(Some(DiagnosticFix {
                fixed_diagnostic,
                code: associated_data.code,
@ -176,22 +176,28 @@ pub(crate) fn fixes_for_diagnostics(
                    .kind
                    .suggestion
                    .unwrap_or(associated_data.kind.name),
-                edits,
-                noqa_edit,
+                noqa_edit: associated_data.noqa_edit,
+                edits: associated_data.edits,
            }))
        })
        .filter_map(crate::Result::transpose)
        .collect()
 }

+/// Generates an LSP diagnostic with an associated cell index for the diagnostic to go in.
+/// If the source kind is a text document, the cell index will always be `0`.
 fn to_lsp_diagnostic(
    diagnostic: Diagnostic,
-    noqa_edit: Option<Edit>,
-    document: &crate::edit::Document,
+    noqa_edit: &Option<Edit>,
+    source_kind: &SourceKind,
+    index: &LineIndex,
    encoding: PositionEncoding,
-) -> lsp_types::Diagnostic {
+) -> (usize, lsp_types::Diagnostic) {
    let Diagnostic {
-        kind, range, fix, ..
+        kind,
+        range: diagnostic_range,
+        fix,
+        ..
    } = diagnostic;

    let rule = kind.rule();
@ -200,11 +206,24 @@ fn to_lsp_diagnostic(

    let data = (fix.is_some() || noqa_edit.is_some())
        .then(|| {
-            serde_json::to_value(&AssociatedDiagnosticData {
+            let edits = fix
+                .as_ref()
+                .into_iter()
+                .flat_map(Fix::edits)
+                .map(|edit| lsp_types::TextEdit {
+                    range: diagnostic_edit_range(edit.range(), source_kind, index, encoding),
+                    new_text: edit.content().unwrap_or_default().to_string(),
+                })
+                .collect();
+            let noqa_edit = noqa_edit.as_ref().map(|noqa_edit| lsp_types::TextEdit {
+                range: diagnostic_edit_range(noqa_edit.range(), source_kind, index, encoding),
+                new_text: noqa_edit.content().unwrap_or_default().to_string(),
+            });
+            serde_json::to_value(AssociatedDiagnosticData {
                kind: kind.clone(),
-                fix,
-                code: rule.noqa_code().to_string(),
                noqa_edit,
+                edits,
+                code: rule.noqa_code().to_string(),
            })
            .ok()
        })
@ -212,20 +231,53 @@ fn to_lsp_diagnostic(

    let code = rule.noqa_code().to_string();

-    lsp_types::Diagnostic {
-        range: range.to_range(document.contents(), document.index(), encoding),
-        severity: Some(severity(&code)),
-        tags: tags(&code),
-        code: Some(lsp_types::NumberOrString::String(code)),
-        code_description: rule.url().and_then(|url| {
-            Some(lsp_types::CodeDescription {
-                href: lsp_types::Url::parse(&url).ok()?,
-            })
-        }),
-        source: Some(DIAGNOSTIC_NAME.into()),
-        message: kind.body,
-        related_information: None,
-        data,
+    let range: lsp_types::Range;
+    let cell: usize;
+
+    if let Some(notebook_index) = source_kind.as_ipy_notebook().map(Notebook::index) {
+        NotebookRange { cell, range } = diagnostic_range.to_notebook_range(
+            source_kind.source_code(),
+            index,
+            notebook_index,
+            encoding,
+        );
+    } else {
+        cell = usize::default();
+        range = diagnostic_range.to_range(source_kind.source_code(), index, encoding);
+    }
+
+    (
+        cell,
+        lsp_types::Diagnostic {
+            range,
+            severity: Some(severity(&code)),
+            tags: tags(&code),
+            code: Some(lsp_types::NumberOrString::String(code)),
+            code_description: rule.url().and_then(|url| {
+                Some(lsp_types::CodeDescription {
+                    href: lsp_types::Url::parse(&url).ok()?,
+                })
+            }),
+            source: Some(DIAGNOSTIC_NAME.into()),
+            message: kind.body,
+            related_information: None,
+            data,
+        },
+    )
+}
+
+fn diagnostic_edit_range(
+    range: TextRange,
+    source_kind: &SourceKind,
+    index: &LineIndex,
+    encoding: PositionEncoding,
+) -> lsp_types::Range {
+    if let Some(notebook_index) = source_kind.as_ipy_notebook().map(Notebook::index) {
+        range
+            .to_notebook_range(source_kind.source_code(), index, notebook_index, encoding)
+            .range
+    } else {
+        range.to_range(source_kind.source_code(), index, encoding)
    }
 }