Maintain consistency when deserializing to JSON (#5114)

## Summary Maintain consistency while deserializing Jupyter notebook to JSON. The following changes were made: 1. Use string array to store the source value as that's the default (5781720423/nbformat/v4/nbjson.py (L56-L57)) 2. Remove unused structs and enums 3. Reorder the keys in alphabetical order as that's the default. (5781720423/nbformat/v4/nbjson.py (L51)) ### Side effect Removing the `preserve_order` feature means that the order of keys in JSON output (`--format json`) will be in alphabetical order. This is because the value is represented using `serde_json::Value` which internally is a `BTreeMap`, thus sorting it as per the string key. For posterity if this turns out to be not ideal, then we could define a struct representing the JSON object and the order of struct fields will determine the order in the JSON string. ## Test Plan Add a test case to assert the raw JSON string.
2025-10-07 17:10:31 +00:00 · 2023-06-19 23:47:56 +05:30 · 2023-06-19 23:47:56 +05:30 · 48f4f2d63d
commit 48f4f2d63d
parent 94abf7f088
15 changed files with 346 additions and 370 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2368,7 +2368,6 @@ version = "1.0.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
 dependencies = [
- "indexmap",
 "itoa",
 "ryu",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -43,7 +43,7 @@ rustpython-literal = { git = "https://github.com/astral-sh/RustPython-Parser.git
 rustpython-parser = { git = "https://github.com/astral-sh/RustPython-Parser.git", rev = "0dc8fdf52d146698c5bcf0b842fddc9e398ad8db", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
 schemars = { version = "0.8.12" }
 serde = { version = "1.0.152", features = ["derive"] }
-serde_json = { version = "1.0.93", features = ["preserve_order"] }
+serde_json = { version = "1.0.93" }
 shellexpand = { version = "3.0.0" }
 similar = { version = "2.2.1", features = ["inline"] }
 smallvec = { version = "1.10.0" }
--- a/crates/ruff/resources/test/fixtures/jupyter/after_fix.ipynb
+++ b/crates/ruff/resources/test/fixtures/jupyter/after_fix.ipynb
@ -0,0 +1,37 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "math.pi"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff)",
+   "language": "python",
+   "name": "ruff"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff/resources/test/fixtures/jupyter/before_fix.ipynb
+++ b/crates/ruff/resources/test/fixtures/jupyter/before_fix.ipynb
@ -0,0 +1,38 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import os\n",
+    "\n",
+    "math.pi"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ruff)",
+   "language": "python",
+   "name": "ruff"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/crates/ruff/resources/test/fixtures/jupyter/cell/code_and_magic.json
+++ b/crates/ruff/resources/test/fixtures/jupyter/cell/code_and_magic.json
@ -1,5 +1,8 @@
 {
+  "execution_count": null,
  "cell_type": "code",
+  "id": "1",
  "metadata": {},
+  "outputs": [],
  "source": ["def foo():\n", "    pass\n", "\n", "%timeit foo()"]
 }
--- a/crates/ruff/resources/test/fixtures/jupyter/cell/markdown.json
+++ b/crates/ruff/resources/test/fixtures/jupyter/cell/markdown.json
@ -1,5 +1,6 @@
 {
  "cell_type": "markdown",
+  "id": "1",
  "metadata": {},
  "source": ["This is a markdown cell\n", "Some more content"]
 }
--- a/crates/ruff/resources/test/fixtures/jupyter/cell/only_code.json
+++ b/crates/ruff/resources/test/fixtures/jupyter/cell/only_code.json
@ -1,5 +1,8 @@
 {
+  "execution_count": null,
  "cell_type": "code",
+  "id": "1",
  "metadata": {},
+  "outputs": [],
  "source": ["def foo():\n", "    pass"]
 }
--- a/crates/ruff/resources/test/fixtures/jupyter/cell/only_magic.json
+++ b/crates/ruff/resources/test/fixtures/jupyter/cell/only_magic.json
@ -1,5 +1,8 @@
 {
+  "execution_count": null,
  "cell_type": "code",
+  "id": "1",
  "metadata": {},
+  "outputs": [],
  "source": "%timeit print('hello world')"
 }
--- a/crates/ruff/src/jupyter/notebook.rs
+++ b/crates/ruff/src/jupyter/notebook.rs
@ -1,6 +1,6 @@
 use std::cmp::Ordering;
 use std::fs::File;
-use std::io::{BufReader, BufWriter, Cursor, Write};
+use std::io::{BufReader, BufWriter, Write};
 use std::iter;
 use std::path::Path;

@ -10,12 +10,12 @@ use serde::Serialize;
 use serde_json::error::Category;

 use ruff_diagnostics::Diagnostic;
-use ruff_python_whitespace::NewlineWithTrailingNewline;
+use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator};
 use ruff_text_size::{TextRange, TextSize};

 use crate::autofix::source_map::{SourceMap, SourceMarker};
 use crate::jupyter::index::JupyterIndex;
-use crate::jupyter::{Cell, CellType, RawNotebook, SourceValue};
+use crate::jupyter::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
 use crate::rules::pycodestyle::rules::SyntaxError;
 use crate::IOError;

@ -34,9 +34,9 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
    })?;
    let code = notebook.content().to_string();
    notebook.update_cell_content(&code);
-    let mut buffer = Cursor::new(Vec::new());
+    let mut buffer = BufWriter::new(Vec::new());
    notebook.write_inner(&mut buffer)?;
-    Ok(String::from_utf8(buffer.into_inner())?)
+    Ok(String::from_utf8(buffer.into_inner()?)?)
 }

 /// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`).
@ -49,18 +49,37 @@ pub fn is_jupyter_notebook(path: &Path) -> bool {
 }

 impl Cell {
+    /// Return the [`SourceValue`] of the cell.
+    fn source(&self) -> &SourceValue {
+        match self {
+            Cell::Code(cell) => &cell.source,
+            Cell::Markdown(cell) => &cell.source,
+            Cell::Raw(cell) => &cell.source,
+        }
+    }
+
+    /// Update the [`SourceValue`] of the cell.
+    fn set_source(&mut self, source: SourceValue) {
+        match self {
+            Cell::Code(cell) => cell.source = source,
+            Cell::Markdown(cell) => cell.source = source,
+            Cell::Raw(cell) => cell.source = source,
+        }
+    }
+
    /// Return `true` if it's a valid code cell.
    ///
-    /// A valid code cell is a cell where the type is [`CellType::Code`] and the
+    /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
    /// source doesn't contain a magic, shell or help command.
    fn is_valid_code_cell(&self) -> bool {
-        if self.cell_type != CellType::Code {
-            return false;
-        }
+        let source = match self {
+            Cell::Code(cell) => &cell.source,
+            _ => return false,
+        };
        // Ignore a cell if it contains a magic command. There could be valid
        // Python code as well, but we'll ignore that for now.
        // TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py
-        !match &self.source {
+        !match source {
            SourceValue::String(string) => string.lines().any(|line| {
                MAGIC_PREFIX
                    .iter()
@ -92,7 +111,7 @@ pub struct Notebook {
    /// The offsets of each cell in the concatenated source code. This includes
    /// the first and last character offsets as well.
    cell_offsets: Vec<TextSize>,
-    /// The cell numbers of all valid code cells in the notebook.
+    /// The cell index of all valid code cells in the notebook.
    valid_code_cells: Vec<u32>,
 }

@ -108,7 +127,7 @@ impl Notebook {
                TextRange::default(),
            )
        })?);
-        let notebook: RawNotebook = match serde_json::from_reader(reader) {
+        let raw_notebook: RawNotebook = match serde_json::from_reader(reader) {
            Ok(notebook) => notebook,
            Err(err) => {
                // Translate the error into a diagnostic
@ -176,34 +195,34 @@ impl Notebook {
        };

        // v4 is what everybody uses
-        if notebook.nbformat != 4 {
+        if raw_notebook.nbformat != 4 {
            // bail because we should have already failed at the json schema stage
            return Err(Box::new(Diagnostic::new(
                SyntaxError {
                    message: format!(
                        "Expected Jupyter Notebook format 4, found {}",
-                        notebook.nbformat
+                        raw_notebook.nbformat
                    ),
                },
                TextRange::default(),
            )));
        }

-        let valid_code_cells = notebook
+        let valid_code_cells = raw_notebook
            .cells
            .iter()
            .enumerate()
            .filter(|(_, cell)| cell.is_valid_code_cell())
-            .map(|(pos, _)| u32::try_from(pos).unwrap())
+            .map(|(idx, _)| u32::try_from(idx).unwrap())
            .collect::<Vec<_>>();

        let mut contents = Vec::with_capacity(valid_code_cells.len());
        let mut current_offset = TextSize::from(0);
-        let mut cell_offsets = Vec::with_capacity(notebook.cells.len());
+        let mut cell_offsets = Vec::with_capacity(valid_code_cells.len());
        cell_offsets.push(TextSize::from(0));

-        for &pos in &valid_code_cells {
-            let cell_contents = match &notebook.cells[pos as usize].source {
+        for &idx in &valid_code_cells {
+            let cell_contents = match &raw_notebook.cells[idx as usize].source() {
                SourceValue::String(string) => string.clone(),
                SourceValue::StringArray(string_array) => string_array.join(""),
            };
@ -213,7 +232,7 @@ impl Notebook {
        }

        Ok(Self {
-            raw: notebook,
+            raw: raw_notebook,
            index: OnceCell::new(),
            // The additional newline at the end is to maintain consistency for
            // all cells. These newlines will be removed before updating the
@ -267,7 +286,7 @@ impl Notebook {
    /// can happen only if the cell offsets were not updated before calling
    /// this method or the offsets were updated incorrectly.
    fn update_cell_content(&mut self, transformed: &str) {
-        for (&pos, (start, end)) in self
+        for (&idx, (start, end)) in self
            .valid_code_cells
            .iter()
            .zip(self.cell_offsets.iter().tuple_windows::<(_, _)>())
@ -275,22 +294,25 @@ impl Notebook {
            let cell_content = transformed
                .get(start.to_usize()..end.to_usize())
                .unwrap_or_else(|| {
-                    panic!("Transformed content out of bounds ({start:?}..{end:?}) for cell {pos}");
+                    panic!(
+                        "Transformed content out of bounds ({start:?}..{end:?}) for cell at {idx:?}"
+                    );
                });
-            self.raw.cells[pos as usize].source = SourceValue::String(
-                cell_content
+            self.raw.cells[idx as usize].set_source(SourceValue::StringArray(
+                UniversalNewlineIterator::from(
                    // We only need to strip the trailing newline which we added
                    // while concatenating the cell contents.
-                    .strip_suffix('\n')
-                    .unwrap_or(cell_content)
-                    .to_string(),
-            );
+                    cell_content.strip_suffix('\n').unwrap_or(cell_content),
+                )
+                .map(|line| line.as_full_str().to_string())
+                .collect::<Vec<_>>(),
+            ));
        }
    }

    /// Build and return the [`JupyterIndex`].
    ///
-    /// # Notes
+    /// ## Notes
    ///
    /// Empty cells don't have any newlines, but there's a single visible line
    /// in the UI. That single line needs to be accounted for.
@ -317,8 +339,8 @@ impl Notebook {
        let mut row_to_cell = vec![0];
        let mut row_to_row_in_cell = vec![0];

-        for &pos in &self.valid_code_cells {
-            let line_count = match &self.raw.cells[pos as usize].source {
+        for &idx in &self.valid_code_cells {
+            let line_count = match &self.raw.cells[idx as usize].source() {
                SourceValue::String(string) => {
                    if string.is_empty() {
                        1
@ -336,7 +358,7 @@ impl Notebook {
                    }
                }
            };
-            row_to_cell.extend(iter::repeat(pos + 1).take(line_count as usize));
+            row_to_cell.extend(iter::repeat(idx + 1).take(line_count as usize));
            row_to_row_in_cell.extend(1..=line_count);
        }

@ -390,7 +412,7 @@ impl Notebook {
        // https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
        let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
        let mut ser = serde_json::Serializer::with_formatter(writer, formatter);
-        self.raw.serialize(&mut ser)?;
+        SortAlphabetically(&self.raw).serialize(&mut ser)?;
        Ok(())
    }

@ -404,6 +426,7 @@ impl Notebook {

 #[cfg(test)]
 mod test {
+    use std::io::BufWriter;
    use std::path::Path;

    use anyhow::Result;
@ -536,4 +559,21 @@ print("after empty cells")
        assert_messages!(diagnostics, path, source_kind);
        Ok(())
    }
+
+    #[test]
+    fn test_json_consistency() -> Result<()> {
+        let path = "before_fix.ipynb".to_string();
+        let (_, source_kind) = test_notebook_path(
+            path,
+            Path::new("after_fix.ipynb"),
+            &settings::Settings::for_rule(Rule::UnusedImport),
+        )?;
+        let mut writer = BufWriter::new(Vec::new());
+        source_kind.expect_jupyter().write_inner(&mut writer)?;
+        let actual = String::from_utf8(writer.into_inner()?)?;
+        let expected =
+            std::fs::read_to_string(test_resource_path("fixtures/jupyter/after_fix.ipynb"))?;
+        assert_eq!(actual, expected);
+        Ok(())
+    }
 }
--- a/crates/ruff/src/jupyter/schema.rs
+++ b/crates/ruff/src/jupyter/schema.rs
@ -5,6 +5,7 @@
 //! Jupyter Notebook v4.5 JSON schema.
 //!
 //! The following changes were made to the generated version:
+//! * Only keep the required structs and enums.
 //! * `Cell::id` is optional because it wasn't required <v4.5
 //! * `#[serde(deny_unknown_fields)]` was added where the schema had
 //!   `"additionalProperties": false`
@ -12,26 +13,84 @@
 //!   `"additionalProperties": true` as preparation for round-trip support.
 //! * `#[serde(skip_serializing_none)]` was added to all structs where one or
 //!   more fields were optional to avoid serializing `null` values.
-//! * `Output::data` & `Cell::attachments` were changed to `Value` because
-//!    the scheme had `patternProperties`.
+//! * `Cell::execution_count` is a required property only for code cells, but
+//!   we serialize it for all cells. This is because we can't know if a cell is
+//!   a code cell or not without looking at the `cell_type` property, which
+//!   would require a custom serializer.

-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;

 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use serde_with::skip_serializing_none;

+fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
+    value: &T,
+    serializer: S,
+) -> Result<S::Ok, S::Error> {
+    let value = serde_json::to_value(value).map_err(serde::ser::Error::custom)?;
+    value.serialize(serializer)
+}
+
+/// This is used to serialize any value implementing [`Serialize`] alphabetically.
+///
+/// The reason for this is to maintain consistency in the generated JSON string,
+/// which is useful for diffing. The default serializer keeps the order of the
+/// fields as they are defined in the struct, which will not be consistent when
+/// there are `extra` fields.
+///
+/// # Example
+///
+/// ```
+/// use std::collections::BTreeMap;
+///
+/// use serde::Serialize;
+///
+/// use ruff::jupyter::SortAlphabetically;
+///
+/// #[derive(Serialize)]
+/// struct MyStruct {
+///    a: String,
+///    #[serde(flatten)]
+///    extra: BTreeMap<String, String>,
+///    b: String,
+/// }
+///
+/// let my_struct = MyStruct {
+///     a: "a".to_string(),
+///     extra: BTreeMap::from([
+///         ("d".to_string(), "d".to_string()),
+///         ("c".to_string(), "c".to_string()),
+///     ]),
+///     b: "b".to_string(),
+/// };
+///
+/// let serialized = serde_json::to_string_pretty(&SortAlphabetically(&my_struct)).unwrap();
+/// assert_eq!(
+///     serialized,
+/// r#"{
+///   "a": "a",
+///   "b": "b",
+///   "c": "c",
+///   "d": "d"
+/// }"#
+/// );
+/// ```
+#[derive(Serialize)]
+pub struct SortAlphabetically<T: Serialize>(#[serde(serialize_with = "sort_alphabetically")] pub T);
+
 /// The root of the JSON of a Jupyter Notebook
 ///
 /// Generated by <https://app.quicktype.io/> from
 /// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
 /// Jupyter Notebook v4.5 JSON schema.
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
 pub struct RawNotebook {
    /// Array of cells of the current notebook.
    pub cells: Vec<Cell>,
    /// Notebook root-level metadata.
-    pub metadata: JupyterNotebookMetadata,
+    pub metadata: RawNotebookMetadata,
    /// Notebook format (major number). Incremented between backwards incompatible changes to the
    /// notebook format.
    pub nbformat: i64,
@ -40,119 +99,73 @@ pub struct RawNotebook {
    pub nbformat_minor: i64,
 }

+/// String identifying the type of cell.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "cell_type")]
+pub enum Cell {
+    #[serde(rename = "code")]
+    Code(CodeCell),
+    #[serde(rename = "markdown")]
+    Markdown(MarkdownCell),
+    #[serde(rename = "raw")]
+    Raw(RawCell),
+}
+
 /// Notebook raw nbconvert cell.
-///
-/// Notebook markdown cell.
-///
-/// Notebook code cell.
 #[skip_serializing_none]
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 #[serde(deny_unknown_fields)]
-pub struct Cell {
-    pub attachments: Option<HashMap<String, HashMap<String, Value>>>,
-    /// String identifying the type of cell.
-    pub cell_type: CellType,
+pub struct RawCell {
+    pub attachments: Option<Value>,
    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
    pub id: Option<String>,
    /// Cell-level metadata.
-    pub metadata: CellMetadata,
+    pub metadata: Value,
    pub source: SourceValue,
+}
+
+/// Notebook markdown cell.
+#[skip_serializing_none]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct MarkdownCell {
+    pub attachments: Option<Value>,
+    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
+    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
+    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
+    pub id: Option<String>,
+    /// Cell-level metadata.
+    pub metadata: Value,
+    pub source: SourceValue,
+}
+
+/// Notebook code cell.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct CodeCell {
    /// The code cell's prompt number. Will be null if the cell has not been run.
    pub execution_count: Option<i64>,
+    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
+    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
+    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
+    pub id: Option<String>,
+    /// Cell-level metadata.
+    pub metadata: Value,
    /// Execution, display, or stream outputs.
-    pub outputs: Option<Vec<Output>>,
-}
-
-/// Cell-level metadata.
-#[skip_serializing_none]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct CellMetadata {
-    /// Raw cell metadata format for nbconvert.
-    pub format: Option<String>,
-    /// Official Jupyter Metadata for Raw Cells
-    ///
-    /// Official Jupyter Metadata for Markdown Cells
-    ///
-    /// Official Jupyter Metadata for Code Cells
-    pub jupyter: Option<HashMap<String, Option<Value>>>,
-    pub name: Option<String>,
-    pub tags: Option<Vec<String>>,
-    /// Whether the cell's output is collapsed/expanded.
-    pub collapsed: Option<bool>,
-    /// Execution time for the code in the cell. This tracks time at which messages are received
-    /// from iopub or shell channels
-    pub execution: Option<Execution>,
-    /// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
-    pub scrolled: Option<ScrolledUnion>,
-    /// Custom added: round-trip support
-    #[serde(flatten)]
-    pub other: BTreeMap<String, Value>,
-}
-
-/// Execution time for the code in the cell. This tracks time at which messages are received
-/// from iopub or shell channels
-#[skip_serializing_none]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(deny_unknown_fields)]
-pub struct Execution {
-    /// header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates
-    /// the time at which the kernel broadcasts an execute_input message to connected frontends
-    #[serde(rename = "iopub.execute_input")]
-    pub iopub_execute_input: Option<String>,
-    /// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
-    /// is 'busy'
-    #[serde(rename = "iopub.status.busy")]
-    pub iopub_status_busy: Option<String>,
-    /// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
-    /// is 'idle'. It indicates the time at which kernel finished processing the associated
-    /// request
-    #[serde(rename = "iopub.status.idle")]
-    pub iopub_status_idle: Option<String>,
-    /// header.date (in ISO 8601 format) of the shell channel's execute_reply message. It
-    /// indicates the time at which the execute_reply message was created
-    #[serde(rename = "shell.execute_reply")]
-    pub shell_execute_reply: Option<String>,
-}
-
-/// Result of executing a code cell.
-///
-/// Data displayed as a result of code cell execution.
-///
-/// Stream output from a code cell.
-///
-/// Output of an error that occurred during code cell execution.
-#[skip_serializing_none]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(deny_unknown_fields)]
-pub struct Output {
-    pub data: Option<HashMap<String, Value>>,
-    /// A result's prompt number.
-    pub execution_count: Option<i64>,
-    pub metadata: Option<HashMap<String, Option<Value>>>,
-    /// Type of cell output.
-    pub output_type: OutputType,
-    /// The name of the stream (stdout, stderr).
-    pub name: Option<String>,
-    /// The stream's text output, represented as an array of strings.
-    pub text: Option<TextUnion>,
-    /// The name of the error.
-    pub ename: Option<String>,
-    /// The value, or message, of the error.
-    pub evalue: Option<String>,
-    /// The error's traceback, represented as an array of strings.
-    pub traceback: Option<Vec<String>>,
+    pub outputs: Vec<Value>,
+    pub source: SourceValue,
 }

 /// Notebook root-level metadata.
 #[skip_serializing_none]
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct JupyterNotebookMetadata {
+pub struct RawNotebookMetadata {
    /// The author(s) of the notebook document
-    pub authors: Option<Vec<Option<Value>>>,
+    pub authors: Option<Value>,
    /// Kernel information.
-    pub kernelspec: Option<Kernelspec>,
+    pub kernelspec: Option<Value>,
    /// Kernel information.
    pub language_info: Option<LanguageInfo>,
    /// Original notebook format (major number) before converting the notebook between versions.
@ -160,21 +173,9 @@ pub struct JupyterNotebookMetadata {
    pub orig_nbformat: Option<i64>,
    /// The title of the notebook document
    pub title: Option<String>,
-    /// Custom added: round-trip support
+    /// For additional properties.
    #[serde(flatten)]
-    pub other: BTreeMap<String, Value>,
-}
-
-/// Kernel information.
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct Kernelspec {
-    /// Name to display in UI.
-    pub display_name: String,
-    /// Name of the kernel specification.
-    pub name: String,
-    /// Custom added: round-trip support
-    #[serde(flatten)]
-    pub other: BTreeMap<String, Value>,
+    pub extra: BTreeMap<String, Value>,
 }

 /// Kernel information.
@ -182,7 +183,7 @@ pub struct Kernelspec {
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct LanguageInfo {
    /// The codemirror mode to use for code in this language.
-    pub codemirror_mode: Option<CodemirrorMode>,
+    pub codemirror_mode: Option<Value>,
    /// The file extension for files in this language.
    pub file_extension: Option<String>,
    /// The mimetype corresponding to files in this language.
@ -191,9 +192,9 @@ pub struct LanguageInfo {
    pub name: String,
    /// The pygments lexer to use for code in this language.
    pub pygments_lexer: Option<String>,
-    /// Custom added: round-trip support
+    /// For additional properties.
    #[serde(flatten)]
-    pub other: BTreeMap<String, Value>,
+    pub extra: BTreeMap<String, Value>,
 }

 /// mimetype output (e.g. text/plain), represented as either an array of strings or a
@ -208,62 +209,3 @@ pub enum SourceValue {
    String(String),
    StringArray(Vec<String>),
 }
-
-/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(untagged)]
-pub enum ScrolledUnion {
-    Bool(bool),
-    Enum(ScrolledEnum),
-}
-
-/// mimetype output (e.g. text/plain), represented as either an array of strings or a
-/// string.
-///
-/// Contents of the cell, represented as an array of lines.
-///
-/// The stream's text output, represented as an array of strings.
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(untagged)]
-pub enum TextUnion {
-    String(String),
-    StringArray(Vec<String>),
-}
-
-/// The codemirror mode to use for code in this language.
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(untagged)]
-pub enum CodemirrorMode {
-    AnythingMap(HashMap<String, Option<Value>>),
-    String(String),
-}
-
-/// String identifying the type of cell.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Copy, Clone)]
-pub enum CellType {
-    #[serde(rename = "code")]
-    Code,
-    #[serde(rename = "markdown")]
-    Markdown,
-    #[serde(rename = "raw")]
-    Raw,
-}
-
-#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
-pub enum ScrolledEnum {
-    #[serde(rename = "auto")]
-    Auto,
-}
-
-/// Type of cell output.
-#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
-pub enum OutputType {
-    #[serde(rename = "display_data")]
-    DisplayData,
-    #[serde(rename = "error")]
-    Error,
-    #[serde(rename = "execute_result")]
-    ExecuteResult,
-    #[serde(rename = "stream")]
-    Stream,
-}
--- a/crates/ruff/src/message/snapshots/ruffmessagegitlabtestsoutput.snap
+++ b/crates/ruff/src/message/snapshots/ruffmessagegitlabtestsoutput.snap
@ -5,38 +5,38 @@ expression: redact_fingerprint(&content)
 [
  {
    "description": "(F401) `os` imported but unused",
-    "severity": "major",
    "fingerprint": "<redacted>",
    "location": {
-      "path": "fib.py",
      "lines": {
        "begin": 1,
        "end": 1
-      }
-    }
+      },
+      "path": "fib.py"
+    },
+    "severity": "major"
  },
  {
    "description": "(F841) Local variable `x` is assigned to but never used",
-    "severity": "major",
    "fingerprint": "<redacted>",
    "location": {
-      "path": "fib.py",
      "lines": {
        "begin": 6,
        "end": 6
-      }
-    }
+      },
+      "path": "fib.py"
+    },
+    "severity": "major"
  },
  {
    "description": "(F821) Undefined name `a`",
-    "severity": "major",
    "fingerprint": "<redacted>",
    "location": {
-      "path": "undef.py",
      "lines": {
        "begin": 1,
        "end": 1
-      }
-    }
+      },
+      "path": "undef.py"
+    },
+    "severity": "major"
  }
 ]
--- a/crates/ruff/src/message/snapshots/ruffmessagejsontestsoutput.snap
+++ b/crates/ruff/src/message/snapshots/ruffmessagejsontestsoutput.snap
@ -5,79 +5,79 @@ expression: content
 [
  {
    "code": "F401",
-    "message": "`os` imported but unused",
+    "end_location": {
+      "column": 10,
+      "row": 1
+    },
+    "filename": "fib.py",
    "fix": {
      "applicability": "Suggested",
-      "message": "Remove unused import: `os`",
      "edits": [
        {
          "content": "",
-          "location": {
-            "row": 1,
-            "column": 1
-          },
          "end_location": {
-            "row": 2,
-            "column": 1
-          }
-        }
-      ]
+            "column": 1,
+            "row": 2
          },
          "location": {
-      "row": 1,
-      "column": 8
+            "column": 1,
+            "row": 1
+          }
+        }
+      ],
+      "message": "Remove unused import: `os`"
    },
-    "end_location": {
-      "row": 1,
-      "column": 10
+    "location": {
+      "column": 8,
+      "row": 1
    },
-    "filename": "fib.py",
+    "message": "`os` imported but unused",
    "noqa_row": 1
  },
  {
    "code": "F841",
-    "message": "Local variable `x` is assigned to but never used",
+    "end_location": {
+      "column": 6,
+      "row": 6
+    },
+    "filename": "fib.py",
    "fix": {
      "applicability": "Suggested",
-      "message": "Remove assignment to unused variable `x`",
      "edits": [
        {
          "content": "",
-          "location": {
-            "row": 6,
-            "column": 5
-          },
          "end_location": {
-            "row": 6,
-            "column": 10
-          }
-        }
-      ]
+            "column": 10,
+            "row": 6
          },
          "location": {
-      "row": 6,
-      "column": 5
+            "column": 5,
+            "row": 6
+          }
+        }
+      ],
+      "message": "Remove assignment to unused variable `x`"
    },
-    "end_location": {
-      "row": 6,
-      "column": 6
+    "location": {
+      "column": 5,
+      "row": 6
    },
-    "filename": "fib.py",
+    "message": "Local variable `x` is assigned to but never used",
    "noqa_row": 6
  },
  {
    "code": "F821",
-    "message": "Undefined name `a`",
-    "fix": null,
-    "location": {
-      "row": 1,
-      "column": 4
-    },
    "end_location": {
-      "row": 1,
-      "column": 5
+      "column": 5,
+      "row": 1
    },
    "filename": "undef.py",
+    "fix": null,
+    "location": {
+      "column": 4,
+      "row": 1
+    },
+    "message": "Undefined name `a`",
    "noqa_row": 1
  }
 ]
--- a/crates/ruff/src/message/snapshots/ruffmessagejson_linestestsoutput.snap
+++ b/crates/ruff/src/message/snapshots/ruffmessagejson_linestestsoutput.snap
@ -1,8 +1,8 @@
 ---
-source: crates/ruff/src/message/jsonlines.rs
+source: crates/ruff/src/message/json_lines.rs
 expression: content
 ---
-{"code":"F401","message":"`os` imported but unused","fix":{"applicability":"Suggested","message":"Remove unused import: `os`","edits":[{"content":"","location":{"row":1,"column":1},"end_location":{"row":2,"column":1}}]},"location":{"row":1,"column":8},"end_location":{"row":1,"column":10},"filename":"fib.py","noqa_row":1}
-{"code":"F841","message":"Local variable `x` is assigned to but never used","fix":{"applicability":"Suggested","message":"Remove assignment to unused variable `x`","edits":[{"content":"","location":{"row":6,"column":5},"end_location":{"row":6,"column":10}}]},"location":{"row":6,"column":5},"end_location":{"row":6,"column":6},"filename":"fib.py","noqa_row":6}
-{"code":"F821","message":"Undefined name `a`","fix":null,"location":{"row":1,"column":4},"end_location":{"row":1,"column":5},"filename":"undef.py","noqa_row":1}
+{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1}
+{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6}
+{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1}

--- a/crates/ruff_cli/src/commands/run.rs
+++ b/crates/ruff_cli/src/commands/run.rs
@ -236,93 +236,3 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
        }
    }
 }
-
-#[cfg(test)]
-#[cfg(feature = "jupyter_notebook")]
-mod test {
-    use std::path::PathBuf;
-    use std::str::FromStr;
-
-    use anyhow::Result;
-    use path_absolutize::Absolutize;
-
-    use ruff::logging::LogLevel;
-    use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
-    use ruff::settings::configuration::{Configuration, RuleSelection};
-    use ruff::settings::flags::FixMode;
-    use ruff::settings::flags::{Cache, Noqa};
-    use ruff::settings::types::SerializationFormat;
-    use ruff::settings::AllSettings;
-    use ruff::RuleSelector;
-
-    use crate::args::Overrides;
-    use crate::printer::{Flags, Printer};
-
-    use super::run;
-
-    #[test]
-    fn test_jupyter_notebook_integration() -> Result<()> {
-        let overrides: Overrides = Overrides {
-            select: Some(vec![
-                RuleSelector::from_str("B")?,
-                RuleSelector::from_str("F")?,
-            ]),
-            ..Default::default()
-        };
-
-        let mut configuration = Configuration::default();
-        configuration.rule_selections.push(RuleSelection {
-            select: Some(vec![
-                RuleSelector::from_str("B")?,
-                RuleSelector::from_str("F")?,
-            ]),
-            ..Default::default()
-        });
-
-        let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-            .join("..")
-            .join("ruff")
-            .join("resources")
-            .join("test")
-            .join("fixtures")
-            .join("jupyter");
-
-        let diagnostics = run(
-            &[root_path.join("valid.ipynb")],
-            &PyprojectConfig::new(
-                PyprojectDiscoveryStrategy::Fixed,
-                AllSettings::from_configuration(configuration, &root_path)?,
-                None,
-            ),
-            &overrides,
-            Cache::Disabled,
-            Noqa::Enabled,
-            FixMode::Generate,
-        )?;
-
-        let printer = Printer::new(
-            SerializationFormat::Text,
-            LogLevel::Default,
-            FixMode::Generate,
-            Flags::SHOW_VIOLATIONS,
-        );
-        let mut writer: Vec<u8> = Vec::new();
-        // Mute the terminal color codes.
-        colored::control::set_override(false);
-        printer.write_once(&diagnostics, &mut writer)?;
-        // TODO(konstin): Set jupyter notebooks as none-fixable for now
-        // TODO(konstin): Make jupyter notebooks fixable
-        let expected = format!(
-            "{valid_ipynb}:cell 1:2:5: F841 [*] Local variable `x` is assigned to but never used
-{valid_ipynb}:cell 3:1:24: B006 Do not use mutable data structures for argument defaults
-Found 2 errors.
-[*] 1 potentially fixable with the --fix option.
-",
-            valid_ipynb = root_path.join("valid.ipynb").absolutize()?.display()
-        );
-
-        assert_eq!(expected, String::from_utf8(writer)?);
-
-        Ok(())
-    }
-}
--- a/crates/ruff_cli/tests/integration_test.rs
+++ b/crates/ruff_cli/tests/integration_test.rs
@ -91,33 +91,33 @@ fn stdin_json() -> Result<()> {
            r#"[
  {{
    "code": "F401",
-    "message": "`os` imported but unused",
+    "end_location": {{
+      "column": 10,
+      "row": 1
+    }},
+    "filename": "{file_path}",
    "fix": {{
      "applicability": "Automatic",
-      "message": "Remove unused import: `os`",
      "edits": [
        {{
          "content": "",
-          "location": {{
-            "row": 1,
-            "column": 1
-          }},
          "end_location": {{
-            "row": 2,
-            "column": 1
-          }}
-        }}
-      ]
+            "column": 1,
+            "row": 2
          }},
          "location": {{
-      "row": 1,
-      "column": 8
+            "column": 1,
+            "row": 1
+          }}
+        }}
+      ],
+      "message": "Remove unused import: `os`"
    }},
-    "end_location": {{
-      "row": 1,
-      "column": 10
+    "location": {{
+      "column": 8,
+      "row": 1
    }},
-    "filename": "{file_path}",
+    "message": "`os` imported but unused",
    "noqa_row": 1
  }}
 ]"#