Add cell field to JSON output format (#7664)

## Summary

This PR adds a new `cell` field to the JSON output format which
indicates the Notebook cell this diagnostic (and fix) belongs to. It
also updates the location for the diagnostic and fixes as per the
`NotebookIndex`. It will be used in the VSCode extension to display the
diagnostic in the correct cell.

The diagnostic and edit start and end source locations are translated
for the notebook as per the `NotebookIndex`. The end source location for
an edit needs some special handling.

### Edit end location

To understand this, the following context is required:

1. Visible lines in Jupyter Notebook vs JSON array strings: The newline
is part of the string in the JSON format. This means that if there are 3
visible lines in a cell where the last line is empty then the JSON would
contain 2 strings in the source array, both ending with a newline:

**JSON format:**
```json
[
	"# first line\n",
	"# second line\n",
]
```

**Notebook view:**
```python
1 # first line
2 # second line
3
```

2. If an edit needs to remove an entire line including the newline, then
the end location would be the start of the next row.

To remove a statement in the following code:
```python
import os
```

The edit would be:
```
start: row 1, col 1
end: row 2, col 1
```

Now, here's where the problem lies. The notebook index doesn't have any
information for row 2 because it doesn't exists in the actual notebook.
The newline was added by Ruff to concatenate the source code and it's
removed before writing back. But, the edit is computed looking at that
newline.

This means that while translating the end location for an edit belong to
a Notebook, we need to check if both the start and end location belongs
to the same cell. If not, then the end location should be the first
character of the next row and if so, translate that back to the last
character of the previous row. Taking the above example, the translated
location for Notebook would be:
```
start: row 1, col 1
end: row 1, col 10
```

## Test Plan

Add test cases for notebook output in the JSON format and update
existing snapshots.
This commit is contained in:
Dhruv Manilawala 2023-10-13 06:36:02 +05:30 committed by GitHub
parent 1e184e69f3
commit 66179af4f1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 256 additions and 19 deletions

View file

@ -168,6 +168,7 @@ import os
----- stdout ----- ----- stdout -----
[ [
{ {
"cell": null,
"code": "F401", "code": "F401",
"end_location": { "end_location": {
"column": 10, "column": 10,

View file

@ -17,6 +17,7 @@ exit_code: 1
----- stdout ----- ----- stdout -----
[ [
{ {
"cell": null,
"code": "F401", "code": "F401",
"end_location": { "end_location": {
"column": 10, "column": 10,

View file

@ -5,7 +5,8 @@ use serde::{Serialize, Serializer};
use serde_json::{json, Value}; use serde_json::{json, Value};
use ruff_diagnostics::Edit; use ruff_diagnostics::Edit;
use ruff_source_file::SourceCode; use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceCode, SourceLocation};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::message::{Emitter, EmitterContext, Message}; use crate::message::{Emitter, EmitterContext, Message};
@ -19,9 +20,9 @@ impl Emitter for JsonEmitter {
&mut self, &mut self,
writer: &mut dyn Write, writer: &mut dyn Write,
messages: &[Message], messages: &[Message],
_context: &EmitterContext, context: &EmitterContext,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
serde_json::to_writer_pretty(writer, &ExpandedMessages { messages })?; serde_json::to_writer_pretty(writer, &ExpandedMessages { messages, context })?;
Ok(()) Ok(())
} }
@ -29,6 +30,7 @@ impl Emitter for JsonEmitter {
struct ExpandedMessages<'a> { struct ExpandedMessages<'a> {
messages: &'a [Message], messages: &'a [Message],
context: &'a EmitterContext<'a>,
} }
impl Serialize for ExpandedMessages<'_> { impl Serialize for ExpandedMessages<'_> {
@ -39,7 +41,7 @@ impl Serialize for ExpandedMessages<'_> {
let mut s = serializer.serialize_seq(Some(self.messages.len()))?; let mut s = serializer.serialize_seq(Some(self.messages.len()))?;
for message in self.messages { for message in self.messages {
let value = message_to_json_value(message); let value = message_to_json_value(message, self.context);
s.serialize_element(&value)?; s.serialize_element(&value)?;
} }
@ -47,26 +49,40 @@ impl Serialize for ExpandedMessages<'_> {
} }
} }
pub(crate) fn message_to_json_value(message: &Message) -> Value { pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) -> Value {
let source_code = message.file.to_source_code(); let source_code = message.file.to_source_code();
let notebook_index = context.notebook_index(message.filename());
let fix = message.fix.as_ref().map(|fix| { let fix = message.fix.as_ref().map(|fix| {
json!({ json!({
"applicability": fix.applicability(), "applicability": fix.applicability(),
"message": message.kind.suggestion.as_deref(), "message": message.kind.suggestion.as_deref(),
"edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code }, "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code, notebook_index },
}) })
}); });
let start_location = source_code.source_location(message.start()); let mut start_location = source_code.source_location(message.start());
let end_location = source_code.source_location(message.end()); let mut end_location = source_code.source_location(message.end());
let noqa_location = source_code.source_location(message.noqa_offset); let mut noqa_location = source_code.source_location(message.noqa_offset);
let mut notebook_cell_index = None;
if let Some(notebook_index) = notebook_index {
notebook_cell_index = Some(
notebook_index
.cell(start_location.row)
.unwrap_or(OneIndexed::MIN),
);
start_location = notebook_index.translate_location(&start_location);
end_location = notebook_index.translate_location(&end_location);
noqa_location = notebook_index.translate_location(&noqa_location);
}
json!({ json!({
"code": message.kind.rule().noqa_code().to_string(), "code": message.kind.rule().noqa_code().to_string(),
"url": message.kind.rule().url(), "url": message.kind.rule().url(),
"message": message.kind.body, "message": message.kind.body,
"fix": fix, "fix": fix,
"cell": notebook_cell_index,
"location": start_location, "location": start_location,
"end_location": end_location, "end_location": end_location,
"filename": message.filename(), "filename": message.filename(),
@ -77,6 +93,7 @@ pub(crate) fn message_to_json_value(message: &Message) -> Value {
struct ExpandedEdits<'a> { struct ExpandedEdits<'a> {
edits: &'a [Edit], edits: &'a [Edit],
source_code: &'a SourceCode<'a, 'a>, source_code: &'a SourceCode<'a, 'a>,
notebook_index: Option<&'a NotebookIndex>,
} }
impl Serialize for ExpandedEdits<'_> { impl Serialize for ExpandedEdits<'_> {
@ -87,10 +104,57 @@ impl Serialize for ExpandedEdits<'_> {
let mut s = serializer.serialize_seq(Some(self.edits.len()))?; let mut s = serializer.serialize_seq(Some(self.edits.len()))?;
for edit in self.edits { for edit in self.edits {
let mut location = self.source_code.source_location(edit.start());
let mut end_location = self.source_code.source_location(edit.end());
if let Some(notebook_index) = self.notebook_index {
// There exists a newline between each cell's source code in the
// concatenated source code in Ruff. This newline doesn't actually
// exists in the JSON source field.
//
// Now, certain edits may try to remove this newline, which means
// the edit will spill over to the first character of the next cell.
// If it does, we need to translate the end location to the last
// character of the previous cell.
match (
notebook_index.cell(location.row),
notebook_index.cell(end_location.row),
) {
(Some(start_cell), Some(end_cell)) if start_cell != end_cell => {
debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
(Some(_), None) => {
debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
_ => {
end_location = notebook_index.translate_location(&end_location);
}
}
location = notebook_index.translate_location(&location);
}
let value = json!({ let value = json!({
"content": edit.content().unwrap_or_default(), "content": edit.content().unwrap_or_default(),
"location": self.source_code.source_location(edit.start()), "location": location,
"end_location": self.source_code.source_location(edit.end()) "end_location": end_location
}); });
s.serialize_element(&value)?; s.serialize_element(&value)?;
@ -104,7 +168,10 @@ impl Serialize for ExpandedEdits<'_> {
mod tests { mod tests {
use insta::assert_snapshot; use insta::assert_snapshot;
use crate::message::tests::{capture_emitter_output, create_messages}; use crate::message::tests::{
capture_emitter_notebook_output, capture_emitter_output, create_messages,
create_notebook_messages,
};
use crate::message::JsonEmitter; use crate::message::JsonEmitter;
#[test] #[test]
@ -114,4 +181,13 @@ mod tests {
assert_snapshot!(content); assert_snapshot!(content);
} }
#[test]
fn notebook_output() {
let mut emitter = JsonEmitter;
let (messages, notebook_indexes) = create_notebook_messages();
let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);
assert_snapshot!(content);
}
} }

View file

@ -11,11 +11,11 @@ impl Emitter for JsonLinesEmitter {
&mut self, &mut self,
writer: &mut dyn Write, writer: &mut dyn Write,
messages: &[Message], messages: &[Message],
_context: &EmitterContext, context: &EmitterContext,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let mut w = writer; let mut w = writer;
for message in messages { for message in messages {
serde_json::to_writer(&mut w, &message_to_json_value(message))?; serde_json::to_writer(&mut w, &message_to_json_value(message, context))?;
w.write_all(b"\n")?; w.write_all(b"\n")?;
} }
Ok(()) Ok(())
@ -27,7 +27,10 @@ mod tests {
use insta::assert_snapshot; use insta::assert_snapshot;
use crate::message::json_lines::JsonLinesEmitter; use crate::message::json_lines::JsonLinesEmitter;
use crate::message::tests::{capture_emitter_output, create_messages}; use crate::message::tests::{
capture_emitter_notebook_output, capture_emitter_output, create_messages,
create_notebook_messages,
};
#[test] #[test]
fn output() { fn output() {
@ -36,4 +39,13 @@ mod tests {
assert_snapshot!(content); assert_snapshot!(content);
} }
#[test]
fn notebook_output() {
let mut emitter = JsonLinesEmitter;
let (messages, notebook_indexes) = create_notebook_messages();
let content = capture_emitter_notebook_output(&mut emitter, &messages, &notebook_indexes);
assert_snapshot!(content);
}
} }

View file

@ -0,0 +1,105 @@
---
source: crates/ruff_linter/src/message/json.rs
expression: content
---
[
{
"cell": 1,
"code": "F401",
"end_location": {
"column": 10,
"row": 2
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "safe",
"edits": [
{
"content": "",
"end_location": {
"column": 10,
"row": 2
},
"location": {
"column": 1,
"row": 2
}
}
],
"message": "Remove unused import: `os`"
},
"location": {
"column": 8,
"row": 2
},
"message": "`os` imported but unused",
"noqa_row": 2,
"url": "https://docs.astral.sh/ruff/rules/unused-import"
},
{
"cell": 2,
"code": "F401",
"end_location": {
"column": 12,
"row": 2
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "safe",
"edits": [
{
"content": "",
"end_location": {
"column": 1,
"row": 3
},
"location": {
"column": 1,
"row": 2
}
}
],
"message": "Remove unused import: `math`"
},
"location": {
"column": 8,
"row": 2
},
"message": "`math` imported but unused",
"noqa_row": 2,
"url": "https://docs.astral.sh/ruff/rules/unused-import"
},
{
"cell": 3,
"code": "F841",
"end_location": {
"column": 6,
"row": 4
},
"filename": "notebook.ipynb",
"fix": {
"applicability": "unsafe",
"edits": [
{
"content": "",
"end_location": {
"column": 10,
"row": 4
},
"location": {
"column": 1,
"row": 4
}
}
],
"message": "Remove assignment to unused variable `x`"
},
"location": {
"column": 5,
"row": 4
},
"message": "Local variable `x` is assigned to but never used",
"noqa_row": 4,
"url": "https://docs.astral.sh/ruff/rules/unused-variable"
}
]

View file

@ -4,6 +4,7 @@ expression: content
--- ---
[ [
{ {
"cell": null,
"code": "F401", "code": "F401",
"end_location": { "end_location": {
"column": 10, "column": 10,
@ -36,6 +37,7 @@ expression: content
"url": "https://docs.astral.sh/ruff/rules/unused-import" "url": "https://docs.astral.sh/ruff/rules/unused-import"
}, },
{ {
"cell": null,
"code": "F841", "code": "F841",
"end_location": { "end_location": {
"column": 6, "column": 6,
@ -68,6 +70,7 @@ expression: content
"url": "https://docs.astral.sh/ruff/rules/unused-variable" "url": "https://docs.astral.sh/ruff/rules/unused-variable"
}, },
{ {
"cell": null,
"code": "F821", "code": "F821",
"end_location": { "end_location": {
"column": 5, "column": 5,

View file

@ -0,0 +1,8 @@
---
source: crates/ruff_linter/src/message/json_lines.rs
expression: content
---
{"cell":1,"code":"F401","end_location":{"column":10,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":10,"row":2},"location":{"column":1,"row":2}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":2},"message":"`os` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"cell":2,"code":"F401","end_location":{"column":12,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":1,"row":3},"location":{"column":1,"row":2}}],"message":"Remove unused import: `math`"},"location":{"column":8,"row":2},"message":"`math` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"cell":3,"code":"F841","end_location":{"column":6,"row":4},"filename":"notebook.ipynb","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":4},"location":{"column":1,"row":4}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":4},"message":"Local variable `x` is assigned to but never used","noqa_row":4,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}

View file

@ -2,7 +2,7 @@
source: crates/ruff_linter/src/message/json_lines.rs source: crates/ruff_linter/src/message/json_lines.rs
expression: content expression: content
--- ---
{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"} {"cell":null,"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"}
{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"} {"cell":null,"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"}
{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"} {"cell":null,"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use ruff_source_file::OneIndexed; use ruff_source_file::{OneIndexed, SourceLocation};
/// Jupyter Notebook indexing table /// Jupyter Notebook indexing table
/// ///
@ -32,4 +32,17 @@ impl NotebookIndex {
pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> { pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> {
self.row_to_row_in_cell.get(row.to_zero_indexed()).copied() self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
} }
/// Translates the given source location based on the indexing table.
///
/// This will translate the row/column in the concatenated source code
/// to the row/column in the Jupyter Notebook.
pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation {
SourceLocation {
row: self
.cell_row(source_location.row)
.unwrap_or(OneIndexed::MIN),
column: source_location.column,
}
}
} }

View file

@ -68,6 +68,10 @@ impl<'src, 'index> SourceCode<'src, 'index> {
self.index.line_end(line, self.text) self.index.line_end(line, self.text)
} }
pub fn line_end_exclusive(&self, line: OneIndexed) -> TextSize {
self.index.line_end_exclusive(line, self.text)
}
pub fn line_range(&self, line: OneIndexed) -> TextRange { pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.index.line_range(line, self.text) self.index.line_range(line, self.text)
} }

View file

@ -184,6 +184,20 @@ impl LineIndex {
} }
} }
/// Returns the [byte offset](TextSize) of the `line`'s end.
/// The offset is the end of the line, excluding the newline character ending the line (if any).
pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index.saturating_add(1) >= starts.len() {
contents.text_len()
} else {
starts[row_index + 1] - TextSize::new(1)
}
}
/// Returns the [`TextRange`] of the `line` with the given index. /// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including /// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any). /// the newline character ending the line (if any).