Add cell field to JSON output format (#7664)

## Summary This PR adds a new `cell` field to the JSON output format which indicates the Notebook cell this diagnostic (and fix) belongs to. It also updates the location for the diagnostic and fixes as per the `NotebookIndex`. It will be used in the VSCode extension to display the diagnostic in the correct cell. The diagnostic and edit start and end source locations are translated for the notebook as per the `NotebookIndex`. The end source location for an edit needs some special handling. ### Edit end location To understand this, the following context is required: 1. Visible lines in Jupyter Notebook vs JSON array strings: The newline is part of the string in the JSON format. This means that if there are 3 visible lines in a cell where the last line is empty then the JSON would contain 2 strings in the source array, both ending with a newline: **JSON format:** ```json [ "# first line\n", "# second line\n", ] ``` **Notebook view:** ```python 1 # first line 2 # second line 3 ``` 2. If an edit needs to remove an entire line including the newline, then the end location would be the start of the next row. To remove a statement in the following code: ```python import os ``` The edit would be: ``` start: row 1, col 1 end: row 2, col 1 ``` Now, here's where the problem lies. The notebook index doesn't have any information for row 2 because it doesn't exists in the actual notebook. The newline was added by Ruff to concatenate the source code and it's removed before writing back. But, the edit is computed looking at that newline. This means that while translating the end location for an edit belong to a Notebook, we need to check if both the start and end location belongs to the same cell. If not, then the end location should be the first character of the next row and if so, translate that back to the last character of the previous row. Taking the above example, the translated location for Notebook would be: ``` start: row 1, col 1 end: row 1, col 10 ``` ## Test Plan Add test cases for notebook output in the JSON format and update existing snapshots.
2025-09-26 20:10:09 +00:00 · 2023-10-13 06:36:02 +05:30 · 2023-10-13 06:36:02 +05:30 · 66179af4f1
commit 66179af4f1
parent 1e184e69f3
11 changed files with 256 additions and 19 deletions
--- a/crates/ruff_cli/tests/format.rs
+++ b/crates/ruff_cli/tests/format.rs
@ -168,6 +168,7 @@ import os
    ----- stdout -----
    [
      {
        "cell": null,
        "code": "F401",
        "end_location": {
          "column": 10,
--- a/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap
+++ b/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap
@ -17,6 +17,7 @@ exit_code: 1
 ----- stdout -----
 [
  {
    "cell": null,
    "code": "F401",
    "end_location": {
      "column": 10,
--- a/crates/ruff_linter/src/message/json.rs
+++ b/crates/ruff_linter/src/message/json.rs
@ -5,7 +5,8 @@ use serde::{Serialize, Serializer};
 use serde_json::{json, Value};
 use ruff_diagnostics::Edit;
-use ruff_source_file::SourceCode;
+use ruff_notebook::NotebookIndex;
 use ruff_source_file::{OneIndexed, SourceCode, SourceLocation};
 use ruff_text_size::Ranged;
 use crate::message::{Emitter, EmitterContext, Message};
@ -19,9 +20,9 @@ impl Emitter for JsonEmitter {
        &mut self,
        writer: &mut dyn Write,
        messages: &[Message],
-        _context: &EmitterContext,
+        context: &EmitterContext,
    ) -> anyhow::Result<()> {
-        serde_json::to_writer_pretty(writer, &ExpandedMessages { messages })?;
+        serde_json::to_writer_pretty(writer, &ExpandedMessages { messages, context })?;
        Ok(())
    }
@ -29,6 +30,7 @@ impl Emitter for JsonEmitter {
 struct ExpandedMessages<'a> {
    messages: &'a [Message],
    context: &'a EmitterContext<'a>,
 }
 impl Serialize for ExpandedMessages<'_> {
@ -39,7 +41,7 @@ impl Serialize for ExpandedMessages<'_> {
        let mut s = serializer.serialize_seq(Some(self.messages.len()))?;
        for message in self.messages {
-            let value = message_to_json_value(message);
+            let value = message_to_json_value(message, self.context);
            s.serialize_element(&value)?;
        }
@ -47,26 +49,40 @@ impl Serialize for ExpandedMessages<'_> {
    }
 }
-pub(crate) fn message_to_json_value(message: &Message) -> Value {
+pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) -> Value {
    let source_code = message.file.to_source_code();
    let notebook_index = context.notebook_index(message.filename());
    let fix = message.fix.as_ref().map(|fix| {
        json!({
            "applicability": fix.applicability(),
            "message": message.kind.suggestion.as_deref(),
-            "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code },
+            "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code, notebook_index },
        })
    });
-    let start_location = source_code.source_location(message.start());
+    let mut start_location = source_code.source_location(message.start());
-    let end_location = source_code.source_location(message.end());
+    let mut end_location = source_code.source_location(message.end());
-    let noqa_location = source_code.source_location(message.noqa_offset);
+    let mut noqa_location = source_code.source_location(message.noqa_offset);
    let mut notebook_cell_index = None;
    if let Some(notebook_index) = notebook_index {
        notebook_cell_index = Some(
            notebook_index
                .cell(start_location.row)
                .unwrap_or(OneIndexed::MIN),
        );
        start_location = notebook_index.translate_location(&start_location);
        end_location = notebook_index.translate_location(&end_location);
        noqa_location = notebook_index.translate_location(&noqa_location);
    }
    json!({
        "code": message.kind.rule().noqa_code().to_string(),
        "url": message.kind.rule().url(),
        "message": message.kind.body,
        "fix": fix,
        "cell": notebook_cell_index,
        "location": start_location,
        "end_location": end_location,
        "filename": message.filename(),
@ -77,6 +93,7 @@ pub(crate) fn message_to_json_value(message: &Message) -> Value {
 struct ExpandedEdits<'a> {
    edits: &'a [Edit],
    source_code: &'a SourceCode<'a, 'a>,
    notebook_index: Option<&'a NotebookIndex>,
 }
 impl Serialize for ExpandedEdits<'_> {
@ -87,10 +104,57 @@ impl Serialize for ExpandedEdits<'_> {
        let mut s = serializer.serialize_seq(Some(self.edits.len()))?;
        for edit in self.edits {
            let mut location = self.source_code.source_location(edit.start());
            let mut end_location = self.source_code.source_location(edit.end());
            if let Some(notebook_index) = self.notebook_index {
                // There exists a newline between each cell's source code in the
                // concatenated source code in Ruff. This newline doesn't actually
                // exists in the JSON source field.
                //
                // Now, certain edits may try to remove this newline, which means
                // the edit will spill over to the first character of the next cell.
                // If it does, we need to translate the end location to the last
                // character of the previous cell.
                match (
                    notebook_index.cell(location.row),
                    notebook_index.cell(end_location.row),
                ) {
                    (Some(start_cell), Some(end_cell)) if start_cell != end_cell => {
                        debug_assert_eq!(end_location.column.get(), 1);
                        let prev_row = end_location.row.saturating_sub(1);
                        end_location = SourceLocation {
                            row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
                            column: self
                                .source_code
                                .source_location(self.source_code.line_end_exclusive(prev_row))
                                .column,
                        };
                    }
                    (Some(_), None) => {
                        debug_assert_eq!(end_location.column.get(), 1);
                        let prev_row = end_location.row.saturating_sub(1);
                        end_location = SourceLocation {
                            row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
                            column: self
                                .source_code
                                .source_location(self.source_code.line_end_exclusive(prev_row))
                                .column,
                        };
                    }
                    _ => {
                        end_location = notebook_index.translate_location(&end_location);
                    }
                }
                location = notebook_index.translate_location(&location);
            }
            let value = json!({
                "content": edit.content().unwrap_or_default(),
-                "location": self.source_code.source_location(edit.start()),
+                "location": location,
-                "end_location": self.source_code.source_location(edit.end())
+                "end_location": end_location
            });
            s.serialize_element(&value)?;
@ -104,7 +168,10 @@ impl Serialize for ExpandedEdits<'_> {
 mod tests {
    use insta::assert_snapshot;
-    use crate::message::tests::{capture_emitter_output, create_messages};
+    use crate::message::tests::{
        capture_emitter_notebook_output, capture_emitter_output, create_messages,
        create_notebook_messages,
    };
    use crate::message::JsonEmitter;
    #[test]
@ -114,4 +181,13 @@ mod tests {
        assert_snapshot!(content);
    }
    #[test]
    fn notebook_output() {
        let mut emitter = JsonEmitter;
        let (messages, notebook_indexes) = create_notebook_messages();
        let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);
        assert_snapshot!(content);
    }
 }
--- a/crates/ruff_linter/src/message/json_lines.rs
+++ b/crates/ruff_linter/src/message/json_lines.rs
@ -11,11 +11,11 @@ impl Emitter for JsonLinesEmitter {
        &mut self,
        writer: &mut dyn Write,
        messages: &[Message],
-        _context: &EmitterContext,
+        context: &EmitterContext,
    ) -> anyhow::Result<()> {
        let mut w = writer;
        for message in messages {
-            serde_json::to_writer(&mut w, &message_to_json_value(message))?;
+            serde_json::to_writer(&mut w, &message_to_json_value(message, context))?;
            w.write_all(b"\n")?;
        }
        Ok(())
@ -27,7 +27,10 @@ mod tests {
    use insta::assert_snapshot;
    use crate::message::json_lines::JsonLinesEmitter;
-    use crate::message::tests::{capture_emitter_output, create_messages};
+    use crate::message::tests::{
        capture_emitter_notebook_output, capture_emitter_output, create_messages,
        create_notebook_messages,
    };
    #[test]
    fn output() {
@ -36,4 +39,13 @@ mod tests {
        assert_snapshot!(content);
    }
    #[test]
    fn notebook_output() {
        let mut emitter = JsonLinesEmitter;
        let (messages, notebook_indexes) = create_notebook_messages();
        let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);
        assert_snapshot!(content);
    }
 }
--- a/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejsontestsnotebook_output.snap
+++ b/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejsontestsnotebook_output.snap
@ -0,0 +1,105 @@
 ---
 source: crates/ruff_linter/src/message/json.rs
 expression: content
 ---
 [
  {
    "cell": 1,
    "code": "F401",
    "end_location": {
      "column": 10,
      "row": 2
    },
    "filename": "notebook.ipynb",
    "fix": {
      "applicability": "safe",
      "edits": [
        {
          "content": "",
          "end_location": {
            "column": 10,
            "row": 2
          },
          "location": {
            "column": 1,
            "row": 2
          }
        }
      ],
      "message": "Remove unused import: `os`"
    },
    "location": {
      "column": 8,
      "row": 2
    },
    "message": "`os` imported but unused",
    "noqa_row": 2,
    "url": "https://docs.astral.sh/ruff/rules/unused-import"
  },
  {
    "cell": 2,
    "code": "F401",
    "end_location": {
      "column": 12,
      "row": 2
    },
    "filename": "notebook.ipynb",
    "fix": {
      "applicability": "safe",
      "edits": [
        {
          "content": "",
          "end_location": {
            "column": 1,
            "row": 3
          },
          "location": {
            "column": 1,
            "row": 2
          }
        }
      ],
      "message": "Remove unused import: `math`"
    },
    "location": {
      "column": 8,
      "row": 2
    },
    "message": "`math` imported but unused",
    "noqa_row": 2,
    "url": "https://docs.astral.sh/ruff/rules/unused-import"
  },
  {
    "cell": 3,
    "code": "F841",
    "end_location": {
      "column": 6,
      "row": 4
    },
    "filename": "notebook.ipynb",
    "fix": {
      "applicability": "unsafe",
      "edits": [
        {
          "content": "",
          "end_location": {
            "column": 10,
            "row": 4
          },
          "location": {
            "column": 1,
            "row": 4
          }
        }
      ],
      "message": "Remove assignment to unused variable `x`"
    },
    "location": {
      "column": 5,
      "row": 4
    },
    "message": "Local variable `x` is assigned to but never used",
    "noqa_row": 4,
    "url": "https://docs.astral.sh/ruff/rules/unused-variable"
  }
 ]
--- a/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejsontestsoutput.snap
+++ b/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejsontestsoutput.snap
@ -4,6 +4,7 @@ expression: content
 ---
 [
  {
    "cell": null,
    "code": "F401",
    "end_location": {
      "column": 10,
@ -36,6 +37,7 @@ expression: content
    "url": "https://docs.astral.sh/ruff/rules/unused-import"
  },
  {
    "cell": null,
    "code": "F841",
    "end_location": {
      "column": 6,
@ -68,6 +70,7 @@ expression: content
    "url": "https://docs.astral.sh/ruff/rules/unused-variable"
  },
  {
    "cell": null,
    "code": "F821",
    "end_location": {
      "column": 5,
--- a/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejson_linestestsnotebook_output.snap
+++ b/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejson_linestestsnotebook_output.snap
@ -0,0 +1,8 @@
 ---
 source: crates/ruff_linter/src/message/json_lines.rs
 expression: content
 ---
 {"cell":1,"code":"F401","end_location":{"column":10,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":10,"row":2},"location":{"column":1,"row":2}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":2},"message":"`os` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
 {"cell":2,"code":"F401","end_location":{"column":12,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":1,"row":3},"location":{"column":1,"row":2}}],"message":"Remove unused import: `math`"},"location":{"column":8,"row":2},"message":"`math` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
 {"cell":3,"code":"F841","end_location":{"column":6,"row":4},"filename":"notebook.ipynb","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":4},"location":{"column":1,"row":4}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":4},"message":"Local variable `x` is assigned to but never used","noqa_row":4,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
--- a/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejson_linestestsoutput.snap
+++ b/crates/ruff_linter/src/message/snapshots/ruff_lintermessagejson_linestestsoutput.snap
@ -2,7 +2,7 @@
 source: crates/ruff_linter/src/message/json_lines.rs
 expression: content
 ---
-{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
+{"cell":null,"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
-{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
+{"cell":null,"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
-{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"}
+{"cell":null,"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"}
--- a/crates/ruff_notebook/src/index.rs
+++ b/crates/ruff_notebook/src/index.rs
@ -1,6 +1,6 @@
 use serde::{Deserialize, Serialize};
-use ruff_source_file::OneIndexed;
+use ruff_source_file::{OneIndexed, SourceLocation};
 /// Jupyter Notebook indexing table
 ///
@ -32,4 +32,17 @@ impl NotebookIndex {
    pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> {
        self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
    }
    /// Translates the given source location based on the indexing table.
    ///
    /// This will translate the row/column in the concatenated source code
    /// to the row/column in the Jupyter Notebook.
    pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation {
        SourceLocation {
            row: self
                .cell_row(source_location.row)
                .unwrap_or(OneIndexed::MIN),
            column: source_location.column,
        }
    }
 }
--- a/crates/ruff_source_file/src/lib.rs
+++ b/crates/ruff_source_file/src/lib.rs
@ -68,6 +68,10 @@ impl<'src, 'index> SourceCode<'src, 'index> {
        self.index.line_end(line, self.text)
    }
    pub fn line_end_exclusive(&self, line: OneIndexed) -> TextSize {
        self.index.line_end_exclusive(line, self.text)
    }
    pub fn line_range(&self, line: OneIndexed) -> TextRange {
        self.index.line_range(line, self.text)
    }
--- a/crates/ruff_source_file/src/line_index.rs
+++ b/crates/ruff_source_file/src/line_index.rs
@ -184,6 +184,20 @@ impl LineIndex {
        }
    }
    /// Returns the [byte offset](TextSize) of the `line`'s end.
    /// The offset is the end of the line, excluding the newline character ending the line (if any).
    pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize {
        let row_index = line.to_zero_indexed();
        let starts = self.line_starts();
        // If start-of-line position after last line
        if row_index.saturating_add(1) >= starts.len() {
            contents.text_len()
        } else {
            starts[row_index + 1] - TextSize::new(1)
        }
    }
    /// Returns the [`TextRange`] of the `line` with the given index.
    /// The start points to the first character's [byte offset](TextSize), the end up to, and including
    /// the newline character ending the line (if any).