Create ruff_notebook crate (#7039)

## Summary This PR moves `ruff/jupyter` into its own `ruff_notebook` crate. Beyond the move itself, there were a few challenges: 1. `ruff_notebook` relies on the source map abstraction. I've moved the source map into `ruff_diagnostics`, since it doesn't have any dependencies on its own and is used alongside diagnostics. 2. `ruff_notebook` has a couple tests for end-to-end linting and autofixing. I had to leave these tests in `ruff` itself. 3. We had code in `ruff/jupyter` that relied on Python lexing, in order to provide a more targeted error message in the event that a user saves a `.py` file with a `.ipynb` extension. I removed this in order to avoid a dependency on the parser, it felt like it wasn't worth retaining just for that dependency. ## Test Plan `cargo test`
2025-11-25 06:13:01 +00:00 · 2023-09-01 14:56:44 +01:00 · 2023-09-01 14:56:44 +01:00 · afcd00da56
commit afcd00da56
parent 08e246764f
48 changed files with 274 additions and 253 deletions
--- a/crates/ruff_notebook/src/index.rs
+++ b/crates/ruff_notebook/src/index.rs
@ -0,0 +1,24 @@
+/// Jupyter Notebook indexing table
+///
+/// When we lint a jupyter notebook, we have to translate the row/column based on
+/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct NotebookIndex {
+    /// Enter a row (1-based), get back the cell (1-based)
+    pub(super) row_to_cell: Vec<u32>,
+    /// Enter a row (1-based), get back the row in cell (1-based)
+    pub(super) row_to_row_in_cell: Vec<u32>,
+}
+
+impl NotebookIndex {
+    /// Returns the cell number (1-based) for the given row (1-based).
+    pub fn cell(&self, row: usize) -> Option<u32> {
+        self.row_to_cell.get(row).copied()
+    }
+
+    /// Returns the row number (1-based) in the cell (1-based) for the
+    /// given row (1-based).
+    pub fn cell_row(&self, row: usize) -> Option<u32> {
+        self.row_to_row_in_cell.get(row).copied()
+    }
+}
--- a/crates/ruff_notebook/src/lib.rs
+++ b/crates/ruff_notebook/src/lib.rs
@ -0,0 +1,9 @@
+//! Utils for reading and writing jupyter notebooks
+
+pub use index::*;
+pub use notebook::*;
+pub use schema::*;
+
+mod index;
+mod notebook;
+mod schema;
--- a/crates/ruff_notebook/src/notebook.rs
+++ b/crates/ruff_notebook/src/notebook.rs
@ -0,0 +1,523 @@
+use std::cmp::Ordering;
+use std::fmt::Display;
+use std::fs::File;
+use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
+use std::path::Path;
+use std::{io, iter};
+
+use itertools::Itertools;
+use once_cell::sync::OnceCell;
+use serde::Serialize;
+use serde_json::error::Category;
+use thiserror::Error;
+use uuid::Uuid;
+
+use ruff_diagnostics::{SourceMap, SourceMarker};
+use ruff_source_file::{NewlineWithTrailingNewline, UniversalNewlineIterator};
+use ruff_text_size::TextSize;
+
+use crate::index::NotebookIndex;
+use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
+
+/// Run round-trip source code generation on a given Jupyter notebook file path.
+pub fn round_trip(path: &Path) -> anyhow::Result<String> {
+    let mut notebook = Notebook::from_path(path).map_err(|err| {
+        anyhow::anyhow!(
+            "Failed to read notebook file `{}`: {:?}",
+            path.display(),
+            err
+        )
+    })?;
+    let code = notebook.source_code().to_string();
+    notebook.update_cell_content(&code);
+    let mut writer = Vec::new();
+    notebook.write(&mut writer)?;
+    Ok(String::from_utf8(writer)?)
+}
+
+impl Display for SourceValue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SourceValue::String(string) => f.write_str(string),
+            SourceValue::StringArray(string_array) => {
+                for string in string_array {
+                    f.write_str(string)?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
+impl Cell {
+    /// Return the [`SourceValue`] of the cell.
+    fn source(&self) -> &SourceValue {
+        match self {
+            Cell::Code(cell) => &cell.source,
+            Cell::Markdown(cell) => &cell.source,
+            Cell::Raw(cell) => &cell.source,
+        }
+    }
+
+    /// Update the [`SourceValue`] of the cell.
+    fn set_source(&mut self, source: SourceValue) {
+        match self {
+            Cell::Code(cell) => cell.source = source,
+            Cell::Markdown(cell) => cell.source = source,
+            Cell::Raw(cell) => cell.source = source,
+        }
+    }
+
+    /// Return `true` if it's a valid code cell.
+    ///
+    /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
+    /// source doesn't contain a cell magic.
+    fn is_valid_code_cell(&self) -> bool {
+        let source = match self {
+            Cell::Code(cell) => &cell.source,
+            _ => return false,
+        };
+        // Ignore cells containing cell magic as they act on the entire cell
+        // as compared to line magic which acts on a single line.
+        !match source {
+            SourceValue::String(string) => string
+                .lines()
+                .any(|line| line.trim_start().starts_with("%%")),
+            SourceValue::StringArray(string_array) => string_array
+                .iter()
+                .any(|line| line.trim_start().starts_with("%%")),
+        }
+    }
+}
+
+/// An error that can occur while deserializing a Jupyter Notebook.
+#[derive(Error, Debug)]
+pub enum NotebookError {
+    #[error(transparent)]
+    Io(#[from] io::Error),
+    #[error(transparent)]
+    Json(serde_json::Error),
+    #[error("Expected a Jupyter Notebook, which must be internally stored as JSON, but this file isn't valid JSON: {0}")]
+    InvalidJson(serde_json::Error),
+    #[error("This file does not match the schema expected of Jupyter Notebooks: {0}")]
+    InvalidSchema(serde_json::Error),
+    #[error("Expected Jupyter Notebook format 4, found: {0}")]
+    InvalidFormat(i64),
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct Notebook {
+    /// Python source code of the notebook.
+    ///
+    /// This is the concatenation of all valid code cells in the notebook
+    /// separated by a newline and a trailing newline. The trailing newline
+    /// is added to make sure that each cell ends with a newline which will
+    /// be removed when updating the cell content.
+    source_code: String,
+    /// The index of the notebook. This is used to map between the concatenated
+    /// source code and the original notebook.
+    index: OnceCell<NotebookIndex>,
+    /// The raw notebook i.e., the deserialized version of JSON string.
+    raw: RawNotebook,
+    /// The offsets of each cell in the concatenated source code. This includes
+    /// the first and last character offsets as well.
+    cell_offsets: Vec<TextSize>,
+    /// The cell index of all valid code cells in the notebook.
+    valid_code_cells: Vec<u32>,
+    /// Flag to indicate if the JSON string of the notebook has a trailing newline.
+    trailing_newline: bool,
+}
+
+impl Notebook {
+    /// Read the Jupyter Notebook from the given [`Path`].
+    pub fn from_path(path: &Path) -> Result<Self, NotebookError> {
+        Self::from_reader(BufReader::new(File::open(path)?))
+    }
+
+    /// Read the Jupyter Notebook from its JSON string.
+    pub fn from_source_code(source_code: &str) -> Result<Self, NotebookError> {
+        Self::from_reader(Cursor::new(source_code))
+    }
+
+    /// Read a Jupyter Notebook from a [`Read`] implementor.
+    ///
+    /// See also the black implementation
+    /// <https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#L1017-L1046>
+    fn from_reader<R>(mut reader: R) -> Result<Self, NotebookError>
+    where
+        R: Read + Seek,
+    {
+        let trailing_newline = reader.seek(SeekFrom::End(-1)).is_ok_and(|_| {
+            let mut buf = [0; 1];
+            reader.read_exact(&mut buf).is_ok_and(|_| buf[0] == b'\n')
+        });
+        reader.rewind()?;
+        let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
+            Ok(notebook) => notebook,
+            Err(err) => {
+                // Translate the error into a diagnostic
+                return Err(match err.classify() {
+                    Category::Io => NotebookError::Json(err),
+                    Category::Syntax | Category::Eof => NotebookError::InvalidJson(err),
+                    Category::Data => {
+                        // We could try to read the schema version here but if this fails it's
+                        // a bug anyway.
+                        NotebookError::InvalidSchema(err)
+                    }
+                });
+            }
+        };
+
+        // v4 is what everybody uses
+        if raw_notebook.nbformat != 4 {
+            // bail because we should have already failed at the json schema stage
+            return Err(NotebookError::InvalidFormat(raw_notebook.nbformat));
+        }
+
+        let valid_code_cells = raw_notebook
+            .cells
+            .iter()
+            .enumerate()
+            .filter(|(_, cell)| cell.is_valid_code_cell())
+            .map(|(idx, _)| u32::try_from(idx).unwrap())
+            .collect::<Vec<_>>();
+
+        let mut contents = Vec::with_capacity(valid_code_cells.len());
+        let mut current_offset = TextSize::from(0);
+        let mut cell_offsets = Vec::with_capacity(valid_code_cells.len());
+        cell_offsets.push(TextSize::from(0));
+
+        for &idx in &valid_code_cells {
+            let cell_contents = match &raw_notebook.cells[idx as usize].source() {
+                SourceValue::String(string) => string.clone(),
+                SourceValue::StringArray(string_array) => string_array.join(""),
+            };
+            current_offset += TextSize::of(&cell_contents) + TextSize::new(1);
+            contents.push(cell_contents);
+            cell_offsets.push(current_offset);
+        }
+
+        // Add cell ids to 4.5+ notebooks if they are missing
+        // https://github.com/astral-sh/ruff/issues/6834
+        // https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
+        if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
+            for cell in &mut raw_notebook.cells {
+                let id = match cell {
+                    Cell::Code(cell) => &mut cell.id,
+                    Cell::Markdown(cell) => &mut cell.id,
+                    Cell::Raw(cell) => &mut cell.id,
+                };
+                if id.is_none() {
+                    // https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
+                    *id = Some(Uuid::new_v4().to_string());
+                }
+            }
+        }
+
+        Ok(Self {
+            raw: raw_notebook,
+            index: OnceCell::new(),
+            // The additional newline at the end is to maintain consistency for
+            // all cells. These newlines will be removed before updating the
+            // source code with the transformed content. Refer `update_cell_content`.
+            source_code: contents.join("\n") + "\n",
+            cell_offsets,
+            valid_code_cells,
+            trailing_newline,
+        })
+    }
+
+    /// Update the cell offsets as per the given [`SourceMap`].
+    fn update_cell_offsets(&mut self, source_map: &SourceMap) {
+        // When there are multiple cells without any edits, the offsets of those
+        // cells will be updated using the same marker. So, we can keep track of
+        // the last marker used to update the offsets and check if it's still
+        // the closest marker to the current offset.
+        let mut last_marker: Option<&SourceMarker> = None;
+
+        // The first offset is always going to be at 0, so skip it.
+        for offset in self.cell_offsets.iter_mut().skip(1).rev() {
+            let closest_marker = match last_marker {
+                Some(marker) if marker.source() <= *offset => marker,
+                _ => {
+                    let Some(marker) = source_map
+                        .markers()
+                        .iter()
+                        .rev()
+                        .find(|marker| marker.source() <= *offset)
+                    else {
+                        // There are no markers above the current offset, so we can
+                        // stop here.
+                        break;
+                    };
+                    last_marker = Some(marker);
+                    marker
+                }
+            };
+
+            match closest_marker.source().cmp(&closest_marker.dest()) {
+                Ordering::Less => *offset += closest_marker.dest() - closest_marker.source(),
+                Ordering::Greater => *offset -= closest_marker.source() - closest_marker.dest(),
+                Ordering::Equal => (),
+            }
+        }
+    }
+
+    /// Update the cell contents with the transformed content.
+    ///
+    /// ## Panics
+    ///
+    /// Panics if the transformed content is out of bounds for any cell. This
+    /// can happen only if the cell offsets were not updated before calling
+    /// this method or the offsets were updated incorrectly.
+    fn update_cell_content(&mut self, transformed: &str) {
+        for (&idx, (start, end)) in self
+            .valid_code_cells
+            .iter()
+            .zip(self.cell_offsets.iter().tuple_windows::<(_, _)>())
+        {
+            let cell_content = transformed
+                .get(start.to_usize()..end.to_usize())
+                .unwrap_or_else(|| {
+                    panic!(
+                        "Transformed content out of bounds ({start:?}..{end:?}) for cell at {idx:?}"
+                    );
+                });
+            self.raw.cells[idx as usize].set_source(SourceValue::StringArray(
+                UniversalNewlineIterator::from(
+                    // We only need to strip the trailing newline which we added
+                    // while concatenating the cell contents.
+                    cell_content.strip_suffix('\n').unwrap_or(cell_content),
+                )
+                .map(|line| line.as_full_str().to_string())
+                .collect::<Vec<_>>(),
+            ));
+        }
+    }
+
+    /// Build and return the [`JupyterIndex`].
+    ///
+    /// ## Notes
+    ///
+    /// Empty cells don't have any newlines, but there's a single visible line
+    /// in the UI. That single line needs to be accounted for.
+    ///
+    /// In case of [`SourceValue::StringArray`], newlines are part of the strings.
+    /// So, to get the actual count of lines, we need to check for any trailing
+    /// newline for the last line.
+    ///
+    /// For example, consider the following cell:
+    /// ```python
+    /// [
+    ///    "import os\n",
+    ///    "import sys\n",
+    /// ]
+    /// ```
+    ///
+    /// Here, the array suggests that there are two lines, but the actual number
+    /// of lines visible in the UI is three. The same goes for [`SourceValue::String`]
+    /// where we need to check for the trailing newline.
+    ///
+    /// The index building is expensive as it needs to go through the content of
+    /// every valid code cell.
+    fn build_index(&self) -> NotebookIndex {
+        let mut row_to_cell = vec![0];
+        let mut row_to_row_in_cell = vec![0];
+
+        for &idx in &self.valid_code_cells {
+            let line_count = match &self.raw.cells[idx as usize].source() {
+                SourceValue::String(string) => {
+                    if string.is_empty() {
+                        1
+                    } else {
+                        u32::try_from(NewlineWithTrailingNewline::from(string).count()).unwrap()
+                    }
+                }
+                SourceValue::StringArray(string_array) => {
+                    if string_array.is_empty() {
+                        1
+                    } else {
+                        let trailing_newline =
+                            usize::from(string_array.last().is_some_and(|s| s.ends_with('\n')));
+                        u32::try_from(string_array.len() + trailing_newline).unwrap()
+                    }
+                }
+            };
+            row_to_cell.extend(iter::repeat(idx + 1).take(line_count as usize));
+            row_to_row_in_cell.extend(1..=line_count);
+        }
+
+        NotebookIndex {
+            row_to_cell,
+            row_to_row_in_cell,
+        }
+    }
+
+    /// Return the notebook content.
+    ///
+    /// This is the concatenation of all Python code cells.
+    pub fn source_code(&self) -> &str {
+        &self.source_code
+    }
+
+    /// Return the Jupyter notebook index.
+    ///
+    /// The index is built only once when required. This is only used to
+    /// report diagnostics, so by that time all of the autofixes must have
+    /// been applied if `--fix` was passed.
+    pub fn index(&self) -> &NotebookIndex {
+        self.index.get_or_init(|| self.build_index())
+    }
+
+    /// Return the cell offsets for the concatenated source code corresponding
+    /// the Jupyter notebook.
+    pub fn cell_offsets(&self) -> &[TextSize] {
+        &self.cell_offsets
+    }
+
+    /// Return `true` if the notebook has a trailing newline, `false` otherwise.
+    pub fn trailing_newline(&self) -> bool {
+        self.trailing_newline
+    }
+
+    /// Update the notebook with the given sourcemap and transformed content.
+    pub fn update(&mut self, source_map: &SourceMap, transformed: String) {
+        // Cell offsets must be updated before updating the cell content as
+        // it depends on the offsets to extract the cell content.
+        self.index.take();
+        self.update_cell_offsets(source_map);
+        self.update_cell_content(&transformed);
+        self.source_code = transformed;
+    }
+
+    /// Return a slice of [`Cell`] in the Jupyter notebook.
+    pub fn cells(&self) -> &[Cell] {
+        &self.raw.cells
+    }
+
+    /// Return `true` if the notebook is a Python notebook, `false` otherwise.
+    pub fn is_python_notebook(&self) -> bool {
+        self.raw
+            .metadata
+            .language_info
+            .as_ref()
+            .map_or(true, |language| language.name == "python")
+    }
+
+    /// Write the notebook back to the given [`Write`] implementor.
+    pub fn write(&self, writer: &mut dyn Write) -> anyhow::Result<()> {
+        // https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
+        let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
+        let mut serializer = serde_json::Serializer::with_formatter(writer, formatter);
+        SortAlphabetically(&self.raw).serialize(&mut serializer)?;
+        if self.trailing_newline {
+            writeln!(serializer.into_inner())?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::Path;
+
+    use anyhow::Result;
+    use test_case::test_case;
+
+    use crate::{Cell, Notebook, NotebookError, NotebookIndex};
+
+    /// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
+    fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
+        Path::new("./resources/test/fixtures/jupyter").join(path)
+    }
+
+    #[test]
+    fn test_python() -> Result<(), NotebookError> {
+        let notebook = Notebook::from_path(&notebook_path("valid.ipynb"))?;
+        assert!(notebook.is_python_notebook());
+        Ok(())
+    }
+
+    #[test]
+    fn test_r() -> Result<(), NotebookError> {
+        let notebook = Notebook::from_path(&notebook_path("R.ipynb"))?;
+        assert!(!notebook.is_python_notebook());
+        Ok(())
+    }
+
+    #[test]
+    fn test_invalid() {
+        assert!(matches!(
+            Notebook::from_path(&notebook_path("invalid_extension.ipynb")),
+            Err(NotebookError::InvalidJson(_))
+        ));
+        assert!(matches!(
+            Notebook::from_path(&notebook_path("not_json.ipynb")),
+            Err(NotebookError::InvalidJson(_))
+        ));
+        assert!(matches!(
+            Notebook::from_path(&notebook_path("wrong_schema.ipynb")),
+            Err(NotebookError::InvalidSchema(_))
+        ));
+    }
+
+    #[test_case(Path::new("markdown.json"), false; "markdown")]
+    #[test_case(Path::new("only_magic.json"), true; "only_magic")]
+    #[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")]
+    #[test_case(Path::new("only_code.json"), true; "only_code")]
+    #[test_case(Path::new("cell_magic.json"), false; "cell_magic")]
+    fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> {
+        /// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
+        fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
+            let path = notebook_path("cell").join(path);
+            let source_code = std::fs::read_to_string(path)?;
+            Ok(serde_json::from_str(&source_code)?)
+        }
+
+        assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_concat_notebook() -> Result<(), NotebookError> {
+        let notebook = Notebook::from_path(&notebook_path("valid.ipynb"))?;
+        assert_eq!(
+            notebook.source_code,
+            r#"def unused_variable():
+    x = 1
+    y = 2
+    print(f"cell one: {y}")
+
+unused_variable()
+def mutable_argument(z=set()):
+  print(f"cell two: {z}")
+
+mutable_argument()
+
+
+
+
+print("after empty cells")
+"#
+        );
+        assert_eq!(
+            notebook.index(),
+            &NotebookIndex {
+                row_to_cell: vec![0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 5, 7, 7, 8],
+                row_to_row_in_cell: vec![0, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 1, 1, 2, 1],
+            }
+        );
+        assert_eq!(
+            notebook.cell_offsets(),
+            &[
+                0.into(),
+                90.into(),
+                168.into(),
+                169.into(),
+                171.into(),
+                198.into()
+            ]
+        );
+        Ok(())
+    }
+}
--- a/crates/ruff_notebook/src/schema.rs
+++ b/crates/ruff_notebook/src/schema.rs
@ -0,0 +1,212 @@
+//! The JSON schema of a Jupyter Notebook, entrypoint is [`RawNotebook`]
+//!
+//! Generated by <https://app.quicktype.io/> from
+//! <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
+//! Jupyter Notebook v4.5 JSON schema.
+//!
+//! The following changes were made to the generated version:
+//! * Only keep the required structs and enums.
+//! * `Cell::id` is optional because it wasn't required <v4.5
+//! * `#[serde(deny_unknown_fields)]` was added where the schema had
+//!   `"additionalProperties": false`
+//! * `#[serde(flatten)] pub other: BTreeMap<String, Value>` for
+//!   `"additionalProperties": true` as preparation for round-trip support.
+//! * `#[serde(skip_serializing_none)]` was added to all structs where one or
+//!   more fields were optional to avoid serializing `null` values.
+//! * `Cell::execution_count` is a required property only for code cells, but
+//!   we serialize it for all cells. This is because we can't know if a cell is
+//!   a code cell or not without looking at the `cell_type` property, which
+//!   would require a custom serializer.
+
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use serde_with::skip_serializing_none;
+
+fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
+    value: &T,
+    serializer: S,
+) -> Result<S::Ok, S::Error> {
+    let value = serde_json::to_value(value).map_err(serde::ser::Error::custom)?;
+    value.serialize(serializer)
+}
+
+/// This is used to serialize any value implementing [`Serialize`] alphabetically.
+///
+/// The reason for this is to maintain consistency in the generated JSON string,
+/// which is useful for diffing. The default serializer keeps the order of the
+/// fields as they are defined in the struct, which will not be consistent when
+/// there are `extra` fields.
+///
+/// # Example
+///
+/// ```
+/// use std::collections::BTreeMap;
+///
+/// use serde::Serialize;
+///
+/// use ruff_notebook::SortAlphabetically;
+///
+/// #[derive(Serialize)]
+/// struct MyStruct {
+///    a: String,
+///    #[serde(flatten)]
+///    extra: BTreeMap<String, String>,
+///    b: String,
+/// }
+///
+/// let my_struct = MyStruct {
+///     a: "a".to_string(),
+///     extra: BTreeMap::from([
+///         ("d".to_string(), "d".to_string()),
+///         ("c".to_string(), "c".to_string()),
+///     ]),
+///     b: "b".to_string(),
+/// };
+///
+/// let serialized = serde_json::to_string_pretty(&SortAlphabetically(&my_struct)).unwrap();
+/// assert_eq!(
+///     serialized,
+/// r#"{
+///   "a": "a",
+///   "b": "b",
+///   "c": "c",
+///   "d": "d"
+/// }"#
+/// );
+/// ```
+#[derive(Serialize)]
+pub struct SortAlphabetically<T: Serialize>(#[serde(serialize_with = "sort_alphabetically")] pub T);
+
+/// The root of the JSON of a Jupyter Notebook
+///
+/// Generated by <https://app.quicktype.io/> from
+/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
+/// Jupyter Notebook v4.5 JSON schema.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct RawNotebook {
+    /// Array of cells of the current notebook.
+    pub cells: Vec<Cell>,
+    /// Notebook root-level metadata.
+    pub metadata: RawNotebookMetadata,
+    /// Notebook format (major number). Incremented between backwards incompatible changes to the
+    /// notebook format.
+    pub nbformat: i64,
+    /// Notebook format (minor number). Incremented for backward compatible changes to the
+    /// notebook format.
+    pub nbformat_minor: i64,
+}
+
+/// String identifying the type of cell.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "cell_type")]
+pub enum Cell {
+    #[serde(rename = "code")]
+    Code(CodeCell),
+    #[serde(rename = "markdown")]
+    Markdown(MarkdownCell),
+    #[serde(rename = "raw")]
+    Raw(RawCell),
+}
+
+/// Notebook raw nbconvert cell.
+#[skip_serializing_none]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct RawCell {
+    pub attachments: Option<Value>,
+    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
+    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
+    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
+    pub id: Option<String>,
+    /// Cell-level metadata.
+    pub metadata: Value,
+    pub source: SourceValue,
+}
+
+/// Notebook markdown cell.
+#[skip_serializing_none]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct MarkdownCell {
+    pub attachments: Option<Value>,
+    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
+    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
+    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
+    pub id: Option<String>,
+    /// Cell-level metadata.
+    pub metadata: Value,
+    pub source: SourceValue,
+}
+
+/// Notebook code cell.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct CodeCell {
+    /// The code cell's prompt number. Will be null if the cell has not been run.
+    pub execution_count: Option<i64>,
+    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
+    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
+    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// Cell-level metadata.
+    pub metadata: Value,
+    /// Execution, display, or stream outputs.
+    pub outputs: Vec<Value>,
+    pub source: SourceValue,
+}
+
+/// Notebook root-level metadata.
+#[skip_serializing_none]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct RawNotebookMetadata {
+    /// The author(s) of the notebook document
+    pub authors: Option<Value>,
+    /// Kernel information.
+    pub kernelspec: Option<Value>,
+    /// Kernel information.
+    pub language_info: Option<LanguageInfo>,
+    /// Original notebook format (major number) before converting the notebook between versions.
+    /// This should never be written to a file.
+    pub orig_nbformat: Option<i64>,
+    /// The title of the notebook document
+    pub title: Option<String>,
+    /// For additional properties.
+    #[serde(flatten)]
+    pub extra: BTreeMap<String, Value>,
+}
+
+/// Kernel information.
+#[skip_serializing_none]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct LanguageInfo {
+    /// The codemirror mode to use for code in this language.
+    pub codemirror_mode: Option<Value>,
+    /// The file extension for files in this language.
+    pub file_extension: Option<String>,
+    /// The mimetype corresponding to files in this language.
+    pub mimetype: Option<String>,
+    /// The programming language which this kernel runs.
+    pub name: String,
+    /// The pygments lexer to use for code in this language.
+    pub pygments_lexer: Option<String>,
+    /// For additional properties.
+    #[serde(flatten)]
+    pub extra: BTreeMap<String, Value>,
+}
+
+/// mimetype output (e.g. text/plain), represented as either an array of strings or a
+/// string.
+///
+/// Contents of the cell, represented as an array of lines.
+///
+/// The stream's text output, represented as an array of strings.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(untagged)]
+pub enum SourceValue {
+    String(String),
+    StringArray(Vec<String>),
+}